├── .gitignore
├── .idea
├── .gitignore
├── ddl_compare.iml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── LICENSE
├── README.md
├── images
├── cli_app_terminal.png
├── compare_result.png
└── logo.png
├── requirement.txt
├── setup.py
├── sondesh
├── __init__.py
├── apps
│ ├── __init__.py
│ └── cli_app.py
├── compare.py
├── ddl_parser.py
├── dialects
│ ├── __init__.py
│ ├── bigquery.py
│ ├── hql.py
│ ├── mssql.py
│ ├── mysql.py
│ ├── oracle.py
│ ├── redshift.py
│ ├── snowflake.py
│ ├── spark_sql.py
│ └── sql.py
├── output
│ ├── __init__.py
│ ├── common.py
│ └── dialects.py
├── parser.py
├── parsetab.py
├── tokens.py
└── utils.py
└── test
├── read_from_file.py
├── sql_files
├── one.sql
├── test_sql.sql
└── two.sql
├── test_oracle.py
└── test_redshift.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | app/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/.idea/ddl_compare.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Project : Sondesh
2 |
3 | [](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity)
4 | 
5 | [](https://pypi.python.org/pypi/ansicolortags/)
6 | [](https://www.python.org/)
7 | [](https://shields.io/)
8 |
9 | 
10 |
11 | ## Description
12 |
13 | Sondesh is the name of my cat . I love him very much
14 | so i've decided to name this project after him.
15 |
16 | This project is all about a parser and comparator .
17 | Question is what it is parsing ?
18 | - It parse SQL statements , but only DDL statements
19 | - It supports many sql dialects , example oracle , postgresql , sparksql , hive ..
20 | - There is a cli app ( cli_app.py ), it compares two DDL statements and show you the differences in terminal
21 |
22 | 
23 |
24 | ### Dependencies
25 |
26 | * Windows 10 , Debian , BSD these are the supported platform
27 | * Python version >= 3.8
28 |
29 | ### Installing
30 |
31 | * ddl_compare can be installed using pip
32 |
33 | ```
34 | pip install sondesh
35 | ```
36 |
37 | ### Usage
38 |
39 | ```python
40 | from sondesh import ddl_parser
41 | import pprint
42 |
43 | result = ddl_parser.parse_from_file('/home/koushik/sample_ddl.sql')
44 | pprint.pprint(result)
45 | ```
46 |
47 | Using the CLI APP .
48 |
49 | 1. Just Open the Terminal
50 | 2. type sondesh
51 | 3. VOALAA !!!!!
52 |
53 | 
54 |
55 | ## What Next :
56 |
57 | 1. Integration to remote file system to load .sql from there and parse it
58 | 2. Integration with data-catalogues like spark catalogue or hive metastore and compare ddl.
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/images/cli_app_terminal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/cli_app_terminal.png
--------------------------------------------------------------------------------
/images/compare_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/compare_result.png
--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/logo.png
--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | colorama==0.4.6
2 | commonmark==0.9.1
3 | ply==3.11
4 | pyfiglet==0.8.post1
5 | Pygments==2.14.0
6 | rich==13.0.1
7 | tqdm==4.64.1
8 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 |
4 | # Note: To use the 'upload' functionality of this file, you must:
5 | # $ pipenv install twine --dev
6 |
7 | import io
8 | import os
9 | import sys
10 | from shutil import rmtree
11 |
12 | from setuptools import find_packages, setup, Command
13 |
14 | # Package meta-data.
15 | NAME = 'sondesh'
16 | DESCRIPTION = 'parse sql , compare two .sql file , generate optimization hint for your sql and various other utilities'
17 | URL = 'https://github.com/koustreak/dot.parser'
18 | EMAIL = 'dot.py@yahoo.com'
19 | AUTHOR = 'Koushik Dutta'
20 | REQUIRES_PYTHON = '>=3.9.0'
21 | VERSION = '1.0'
22 |
23 | def parse_requirements(requirements):
24 | with open(requirements) as f:
25 | return [l.strip('\n') for l in f if l.strip('\n') and not l.startswith('#')]
26 |
27 | # What packages are required for this module to be executed?
28 | REQUIRED = parse_requirements('requirement.txt')
29 |
30 |
31 | # The rest you shouldn't have to touch too much :)
32 | # ------------------------------------------------
33 | # Except, perhaps the License and Trove Classifiers!
34 | # If you do change the License, remember to change the Trove Classifier for that!
35 |
36 | here = os.path.abspath(os.path.dirname(__file__))
37 |
38 | # Import the README and use it as the long-description.
39 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
40 | try:
41 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
42 | long_description = '\n' + f.read()
43 | except FileNotFoundError:
44 | long_description = DESCRIPTION
45 |
46 | # Load the package's __version__.py module as a dictionary.
47 | about = {}
48 | if not VERSION:
49 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
50 | with open(os.path.join(here, project_slug, '__version__.py')) as f:
51 | exec(f.read(), about)
52 | else:
53 | about['__version__'] = VERSION
54 |
55 |
56 | class UploadCommand(Command):
57 | """Support setup.py upload."""
58 |
59 | description = 'Build and publish the package.'
60 | user_options = []
61 |
62 | @staticmethod
63 | def status(s):
64 | """Prints things in bold."""
65 | print('\033[1m{0}\033[0m'.format(s))
66 |
67 | def initialize_options(self):
68 | pass
69 |
70 | def finalize_options(self):
71 | pass
72 |
73 | def run(self):
74 | try:
75 | self.status('Removing previous builds…')
76 | rmtree(os.path.join(here, 'app'))
77 | except OSError:
78 | pass
79 |
80 | self.status('Building Source and Wheel (universal) distribution…')
81 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
82 |
83 | self.status('Uploading the package to PyPI via Twine…')
84 | os.system('twine upload app/*')
85 |
86 | self.status('Pushing git tags…')
87 | os.system('git tag v{0}'.format(about['__version__']))
88 | os.system('git push --tags')
89 |
90 | sys.exit()
91 |
92 |
93 | # Where the magic happens:
94 | setup(
95 | name=NAME,
96 | version=about['__version__'],
97 | description=DESCRIPTION,
98 | long_description=long_description,
99 | long_description_content_type='text/markdown',
100 | author=AUTHOR,
101 | entry_points = {
102 | 'console_scripts': ['sondesh=sondesh.apps.cli_app:main_app'],
103 | },
104 | author_email=EMAIL,
105 | python_requires=REQUIRES_PYTHON,
106 | url=URL,
107 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*","test","test.*","*.test.*","*.test","images","images.*"]),
108 | # If your package is a single module, use this instead of 'packages':
109 | # py_modules=['mypackage'],
110 |
111 | # entry_points={
112 | # 'console_scripts': ['mycli=mymodule:cli'],
113 | # },
114 | install_requires=REQUIRED,
115 | include_package_data=True,
116 | license='MIT',
117 | classifiers=[
118 | # Trove classifiers
119 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
120 | 'License :: OSI Approved :: MIT License',
121 | 'Programming Language :: Python',
122 | 'Programming Language :: Python :: 3',
123 | 'Programming Language :: Python :: 3.9',
124 | 'Programming Language :: Python :: Implementation :: CPython',
125 | 'Programming Language :: Python :: Implementation :: PyPy'
126 | ],
127 | # $ setup.py publish support.
128 | cmdclass={
129 | 'upload': UploadCommand,
130 | },
131 | )
--------------------------------------------------------------------------------
/sondesh/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/__init__.py
--------------------------------------------------------------------------------
/sondesh/apps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/apps/__init__.py
--------------------------------------------------------------------------------
/sondesh/apps/cli_app.py:
--------------------------------------------------------------------------------
1 | #koushik dutta
2 | import pyfiglet
3 | from colorama import Fore,Back, init,Style
4 | from time import sleep
5 | from tqdm import tqdm
6 | import sys
7 | import os
8 | import json
9 | from rich.console import Console
10 | from rich.table import Table
11 | from collections import defaultdict
12 |
13 | init()
14 |
15 | print()
16 | print()
17 |
18 |
19 | def print_cli_table(df,context_name=None):
20 | if df:
21 | print(Fore.CYAN + 'visualizing ' + context_name + ' parse result ' + Style.RESET_ALL)
22 | for i in df:
23 | table = Table(title='column details for '+i['table_name'])
24 | columns = ["column_name", "column_type", "size", "foreign_key", "refers_to",
25 | "on_delete", "on_update", "unique", "nullable", "default", "check"]
26 | data = list()
27 | if i['columns']:
28 | for j in i['columns']:
29 | refers_to, on_delete, on_update, is_foreign_key = None, None, None, None
30 |
31 | if j.get('references'):
32 | refers_to = str(j.get('references').get('table'))
33 | if j.get('on_delete'):
34 | on_delete = str(j.get('references').get('on_delete'))
35 | if j.get('on_update'):
36 | on_update = str(j.get('references').get('on_update'))
37 | if j.get('references'):
38 | is_foreign_key = 'yes'
39 |
40 | data.append([str(j.get('name')), str(j.get('type')), str(j.get('size')),
41 | is_foreign_key, refers_to, on_delete, on_update,
42 | str(j.get('unique')), str(j.get('nullable')), str(j.get('default')),
43 | str(j.get('check'))])
44 | else:
45 | print(Fore.YELLOW + 'warning!! no column could be found in first sql' + Style.RESET_ALL)
46 |
47 | for col in columns:
48 | table.add_column(col)
49 | for row in data:
50 | table.add_row(*row, style='bright_green')
51 |
52 | console = Console()
53 | print(Fore.BLUE + '*****************************************************************************************'+Style.RESET_ALL)
54 | console.print(table)
55 | print()
56 |
57 | for i in df :
58 | table = Table(title='column details for ' + i['table_name'])
59 | columns = ['table property name','property value']
60 | data = [
61 | ['index', str(i.get('index'))],
62 | ['diststyle', str(i.get('diststyle'))],
63 | ['distkey', str(i.get('distkey'))],
64 | ['primary key', str(i.get('primary_key'))],
65 | ['sort key', str(i.get('sortkey'))],
66 | ['schema', str(i.get('schema'))],
67 | ['table space', str(i.get('tablespace'))]
68 | ]
69 |
70 | for col in columns:
71 | table.add_column(col)
72 | for row in data:
73 | table.add_row(*row, style='bright_green')
74 |
75 | console = Console()
76 | console.print(table)
77 | print(Fore.BLUE +'*****************************************************************************************'+Style.RESET_ALL)
78 | print()
79 |
80 | else:
81 | print(Fore.RED + 'Error occurred while parsing ' + context_name + ' aborting ' + Style.RESET_ALL)
82 |
83 | def main_app():
84 | '''
85 | This will be exposed as CLI app in setup.py
86 | :return: Object
87 | '''
88 | f = pyfiglet.Figlet(font='big')
89 | print(Fore.CYAN + f.renderText('Compare DDL') + Style.RESET_ALL)
90 | sleep(0.5)
91 | print(Fore.BLUE + '> author : koushik dutta ')
92 | sleep(0.5)
93 | print(Fore.BLUE + '> date : 28-Dec-2022 ')
94 | sleep(0.5)
95 | print(Fore.BLUE + '> purpose : compare two DDL ')
96 | sleep(0.5)
97 | print(Fore.BLUE + '> version : 1.0.0 ')
98 | sleep(0.5)
99 | print(Fore.BLUE + '> OS : ubuntu 18.04 ')
100 | sleep(0.5)
101 | print(Fore.BLUE + '> python version : 3.8 ')
102 | sleep(0.5)
103 | print(Fore.BLUE + '> help : please give me a star in github ')
104 | sleep(0.6)
105 | print(Fore.BLUE + '> docs : read the docs making is in progress ')
106 | sleep(0.6)
107 | print(Fore.BLUE + '> unit test : check sondesh/tests ')
108 | sleep(0.6)
109 | print(Fore.BLUE + '> powered by : Flex and YACC in python ')
110 | sleep(0.6)
111 | print(Fore.BLUE + '> Supported DDL : Redshift , Oracle , Mysql , sparkSQL ( tested ) ' + Style.RESET_ALL)
112 | print()
113 | print(Fore.BLUE)
114 |
115 | with tqdm(total=100) as pbar:
116 | pbar.set_description('initiating process')
117 | pbar.update(3)
118 | sleep(0.5)
119 | try:
120 | from sondesh.dialects import redshift
121 | pbar.update(10)
122 | pbar.set_description('Loading Redshift Dialect')
123 | except:
124 | print(Fore.RED + 'No Redshift Dialect detected , aborting . To fix it contact koushik')
125 | sys.exit()
126 |
127 | try:
128 | sleep(0.5)
129 | from sondesh.dialects import oracle
130 | pbar.update(10)
131 | pbar.set_description('Loading Oracle Dialect')
132 | except:
133 | print(Fore.RED + 'No Oracle Dialect detected , aborting . To fix it contact koushik')
134 | sys.exit()
135 |
136 | try:
137 | sleep(0.5)
138 | from sondesh.dialects import spark_sql
139 | pbar.update(12)
140 | pbar.set_description('Loading spark sql Dialect')
141 | except:
142 | print(Fore.RED + 'No spark sql Dialect detected , aborting . To fix it contact koushik')
143 | sys.exit()
144 |
145 | try:
146 | sleep(0.5)
147 | from sondesh.dialects import sql
148 | pbar.update(25)
149 | pbar.set_description('Loading ansi sql Dialect')
150 | except:
151 | print(Fore.RED + 'No Ansi sql Dialect detected , aborting . To fix it contact koushik')
152 | sys.exit()
153 |
154 | try:
155 | sleep(0.5)
156 | from sondesh.dialects import mysql
157 | pbar.update(8)
158 | pbar.set_description('Loading mysql Dialect')
159 | except:
160 | print(Fore.RED + 'No mysql Dialect detected , aborting . To fix it contact koushik')
161 | sys.exit()
162 |
163 | try:
164 | sleep(0.5)
165 | from sondesh.dialects import hql
166 | pbar.update(7)
167 | pbar.set_description('Loading HiveQL Dialect')
168 | except:
169 | print(Fore.RED + 'No hiveQL Dialect detected , aborting . To fix it contact koushik')
170 | sys.exit()
171 |
172 | try:
173 | sleep(0.5)
174 | from sondesh.ddl_parser import parse_from_file
175 | pbar.update(10)
176 | pbar.set_description('Loading SQL file parser')
177 | except:
178 | print(Fore.RED + 'No .sql file parser detected , aborting . To fix it contact koushik')
179 | sys.exit()
180 |
181 | try:
182 | sleep(0.5)
183 | from sondesh.ddl_parser import parse_the_ddl
184 | pbar.update(7)
185 | pbar.set_description('Loading raw sql parser')
186 | except:
187 | print(Fore.RED + 'No raw sql parser detected , aborting . It is required to parse from user input .'
188 | ' To fix it contact koushik')
189 | sys.exit()
190 |
191 | try:
192 | sleep(0.5)
193 | from sondesh import compare
194 | pbar.update(8)
195 | pbar.set_description('Loading comparator')
196 | except:
197 | print(Fore.RED + 'No raw sql parser detected , aborting . It is required to parse from user input .'
198 | ' To fix it contact koushik')
199 | sys.exit()
200 |
201 | pbar.set_description('Everything is loaded')
202 |
203 | print()
204 | print(Fore.GREEN + "All dialects and parser have been loaded successfully"+ Style.RESET_ALL)
205 | print()
206 | print()
207 |
208 | if os.path.exists('profile.json') and os.path.getsize('profile.json'):
209 | print(Fore.BLUE + 'Profile already exist , proceeding with that , if you want to reset remove profile.json')
210 | print()
211 | else:
212 | while True:
213 |
214 | print(Fore.BLUE + 'There is no profile of you , let me set one , don\'t worry i m not a spy and this is one time only \n' + Style.RESET_ALL)
215 | name = input(Fore.BLUE + '> what should i call you : '+Style.RESET_ALL)
216 | print(Fore.BLUE + ' >> hey '+name+' welcome to DDL Comparator '+'\n')
217 | favourite_db = input(Fore.BLUE + '> which DB you like most : '+Style.RESET_ALL)
218 | purpose = input(Fore.BLUE + '> are you going to use it for commercial purpose : '+Style.RESET_ALL)
219 | what_you_do = input(Fore.BLUE + '> what is your job role : '+Style.RESET_ALL)
220 | default_outdir = input(Fore.BLUE + '> default output dir for report (leave blank for current directory) : ' + Style.RESET_ALL)
221 | cloud_platform = input(Fore.BLUE + '> which cloud platform you are going to use : ' + Style.RESET_ALL)
222 | reporting_style = input(Fore.BLUE + '> Reporting style \n1.excel\n2.html\ (leave blank for excel): ' + Style.RESET_ALL)
223 | print()
224 | profile = {name:name,favourite_db:favourite_db,
225 | purpose:purpose,what_you_do:what_you_do,
226 | default_outdir:default_outdir,
227 | cloud_platform:cloud_platform,reporting_style:reporting_style}
228 | with open('profile.json', 'w') as fp:
229 | json.dump(profile,fp)
230 | print(Fore.CYAN + 'profile has setup successfully \n'+ Style.RESET_ALL)
231 | break
232 |
233 | if os.path.exists('validator.json') and os.path.getsize('validator.json'):
234 | print(Fore.BLUE + 'DDL Validator already exist , proceeding with that , if you want to reset remove validator.json')
235 | print()
236 | else:
237 | validator_err_ct = 0
238 | validator_payload = dict()
239 | while True and validator_err_ct < 2:
240 |
241 | print(Fore.BLUE + 'There is no DDL Validator setup , let me set one, this is for first time only \n' + Style.RESET_ALL)
242 |
243 | string_vs_varchar = input(Fore.BLUE + '> Should i highlight STRING vs VARCHAR diff (regardless of size) (Y/N): '+Style.RESET_ALL)
244 | if string_vs_varchar.upper() not in ('Y','N'):
245 | print(Fore.RED + '\n Please enter either y/n'+Style.RESET_ALL)
246 | validator_err_ct+=1
247 | continue
248 | elif validator_err_ct == 2:
249 | print(Fore.RED + '\n Maximum limit reached . aborting'+Style.RESET_ALL)
250 | sys.exit()
251 | else:
252 | validator_payload['string_vs_varchar'] = string_vs_varchar
253 | validator_err_ct = 0
254 |
255 | timezone_diff = input(Fore.BLUE + '> Should i highlight timezone diff (Y/N): ' + Style.RESET_ALL)
256 | if timezone_diff.upper() not in ('Y', 'N'):
257 | print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL)
258 | validator_err_ct += 1
259 | elif validator_err_ct == 2:
260 | print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL)
261 | sys.exit()
262 | else:
263 | validator_payload['timezone_diff'] = timezone_diff
264 | validator_err_ct = 0
265 |
266 | encoding_diff = input(Fore.BLUE + '> Should i highlight encoding diff (Y/N): ' + Style.RESET_ALL)
267 | if encoding_diff.upper() not in ('Y', 'N'):
268 | print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL)
269 | validator_err_ct += 1
270 | elif validator_err_ct == 2:
271 | print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL)
272 | sys.exit()
273 | else:
274 | validator_payload['encoding_diff'] = encoding_diff
275 | validator_err_ct = 0
276 |
277 | distyle_diff = input(Fore.BLUE + '> Should i highlight distyle diff (Y/N): ' + Style.RESET_ALL)
278 | if distyle_diff.upper() not in ('Y', 'N'):
279 | print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL)
280 | validator_err_ct += 1
281 | elif validator_err_ct == 2:
282 | print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL)
283 | sys.exit()
284 | else:
285 | validator_payload['distyle_diff'] = distyle_diff
286 | validator_err_ct = 0
287 |
288 | with open('validator.json', 'w') as fp:
289 | json.dump(validator_payload,fp)
290 | print(Fore.CYAN + 'validator has setup successfully \n'+ Style.RESET_ALL)
291 |
292 | break
293 |
294 | # Validation profiler will be setup accordingly
295 | # if os.path.exists('validation.json') and os.path.getsize('profile.json'):
296 | error_ct = 0
297 | choice = 'none'
298 | while True and error_ct<2:
299 | choice = input(Fore.CYAN + 'Do you want to compare file or provide SQL as user input (please type either file or raw) \n'+Style.RESET_ALL)
300 | if choice.upper() not in ('FILE','RAW'):
301 | print(Fore.RED + '\n Wrong input given , answer should be either file or raw '+Style.RESET_ALL)
302 | error_ct+=1
303 | continue
304 | elif error_ct == 2:
305 | print(Fore.RED + '\n You have crossed maximum limit of choice , aborting '+Style.RESET_ALL)
306 | sys.exit()
307 | else:
308 | print(Fore.CYAN + '\n You have entered '+choice+' for this session '+Style.RESET_ALL)
309 | break
310 |
311 | print()
312 |
313 | err_dialect = 0
314 | while True and err_dialect < 2:
315 | dialect = input(Fore.BLUE + '> Which dialect you want to use now , \n'
316 | '1.redshift\n2.oracle\n3.hql\n4.snowflake\n5.mysql\n'+Style.RESET_ALL)
317 | if dialect.upper() not in ['REDSHIFT','ORACLE','SNOWFLAKE','MYSQL','HQL']:
318 | print(Fore.RED + '\n Please enter a valid value '+Style.RESET_ALL)
319 | err_dialect+=1
320 | continue
321 | elif err_dialect == 2:
322 | print(Fore.RED + '\n Exceeded maximum limit of providing input'+Style.RESET_ALL)
323 | sys.exit()
324 | else:
325 | break
326 |
327 | print()
328 |
329 | while True:
330 | if choice.upper() == 'FILE':
331 | first_file = input(Fore.BLUE + '> Your first .sql file ? '+Style.RESET_ALL)
332 | second_file = input(Fore.BLUE + '> Your second .sql file ? ' + Style.RESET_ALL)
333 | print()
334 |
335 | if os.path.exists(first_file) and os.path.getsize(first_file) :
336 | if os.path.splitext(first_file)[1].upper() != '.SQL':
337 | print(Fore.YELLOW + ' WARNING !! your first input '+first_file+' is not a .sql file '+Style.RESET_ALL)
338 | print()
339 | else:
340 | print(Fore.RED + ' file not found '+first_file+Style.RESET_ALL)
341 | print()
342 | sys.exit()
343 |
344 | if os.path.exists(second_file) and os.path.getsize(second_file):
345 | if os.path.splitext(second_file)[1].upper() != '.SQL':
346 | print(Fore.YELLOW + ' WARNING !! your second input '+second_file+' is not a .sql file '+Style.RESET_ALL)
347 | print()
348 | else:
349 | print(Fore.RED + ' file not found '+second_file+Style.RESET_ALL)
350 | print()
351 | sys.exit()
352 |
353 | print(Fore.CYAN + 'parsing '+first_file+Style.RESET_ALL)
354 | first_file_parse_result = parse_from_file(first_file)
355 | print(Fore.CYAN + 'done!!'+first_file+Style.RESET_ALL)
356 |
357 | print()
358 |
359 | print(Fore.CYAN + 'parsing '+second_file+Style.RESET_ALL)
360 | second_file_parse_result = parse_from_file(second_file)
361 | print(Fore.CYAN + 'done!!'+first_file+Style.RESET_ALL)
362 |
363 | print()
364 |
365 | print(Fore.CYAN + 'comparison engine initiated ' + Style.RESET_ALL)
366 | if first_file_parse_result and second_file_parse_result:
367 | compare.compare_df(first_file_parse_result[0], second_file_parse_result[0], first_file, second_file)
368 |
369 | print ()
370 |
371 | question_ = input(Fore.CYAN + 'Do you want to see the table parse result leave blank for NO else YES '+Style.RESET_ALL)
372 | if question_.upper()=='YES' or question_.upper()=='Y' or (not question_):
373 | print_cli_table(first_file_parse_result,'first_file_parse_result')
374 | print()
375 | print_cli_table(second_file_parse_result, 'second_file_parse_result')
376 | print()
377 |
378 | else:
379 | print(Fore.YELLOW+'RAW Input Comparator has not been developed yet'+Style.RESET_ALL)
380 | '''
381 | first_sql_input = input(Fore.BLUE + 'Please enter your first sql '+Style.RESET_ALL)
382 | second_sql_input = input(Fore.BLUE + 'Please enter your second sql ' + Style.RESET_ALL)
383 |
384 | if first_sql_input is None or second_sql_input is None:
385 | print(Fore.RED + 'Please provide both of the mandatory input'+Style.RESET_ALL)
386 |
387 | print(Fore.CYAN + 'parsing first_sql_input '+ Style.RESET_ALL)
388 | first_sql_parse_result = parse_from_file(first_sql_input)
389 | print(Fore.CYAN + 'done!!' + Style.RESET_ALL)
390 |
391 | print()
392 |
393 | print(Fore.CYAN + 'parsing second_sql_input ' + Style.RESET_ALL)
394 | second_sql_parse_result = parse_from_file(second_sql_input)
395 | print(Fore.CYAN + 'done!!' + Style.RESET_ALL)
396 |
397 | print()
398 |
399 | question_ = input('Do you want to see the table parse result leave blank for NO else YES ')
400 | if question_:
401 | print_cli_table(first_sql_parse_result, 'first_sql_parse_result')
402 | print()
403 | print_cli_table(second_sql_parse_result, 'second_sql_parse_result')
404 | print()
405 |
406 | print(Fore.CYAN + 'comparison engine initiated '+Style.RESET_ALL)
407 | if first_sql_parse_result and second_sql_parse_result:
408 | compare.compare_df(first_sql_parse_result,second_sql_parse_result,first_sql_input,second_sql_input)
409 | '''
410 |
411 |
412 | redo_choice = input(Fore.CYAN + '> Do you want to use the tool again ? N for No , press anything else for Yes '+Style.RESET_ALL)
413 | if redo_choice.upper() == 'N':
414 | print(Fore.CYAN + 'Good Bye , have a good day\n'+ Style.RESET_ALL)
415 | break
416 | else:
417 | continue
--------------------------------------------------------------------------------
/sondesh/compare.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | from colorama import Fore,Back, init,Style
4 | from rich.console import Console
5 | from rich.table import Table
6 |
7 | init()
8 |
9 | def compare_df(query_one_df,query_two_df,context_one,context_two):
10 | validator = None
11 | string_vs_varchar = None
12 | timezone_diff = None
13 | encoding_diff = None
14 | distyle_diff = None
15 |
16 | if os.path.exists('../validator.json') and os.path.getsize('../validator.json'):
17 | with open('../validator.json') as fp:
18 | validator = json.load(fp)
19 | if validator :
20 | string_vs_varchar = validator['string_vs_varchar']
21 | timezone_diff = validator['timezone_diff']
22 | encoding_diff = validator['encoding_diff']
23 | distyle_diff = validator['distyle_diff']
24 |
25 | if validator is None:
26 | print(Fore.YELLOW + ' WARNING !! User validator profile is Blank ')
27 |
28 | if query_one_df and query_two_df :
29 | print(Fore.CYAN + 'visualizing compare result ' + Style.RESET_ALL)
30 | table = Table(title='comparing '+context_one+' vs '+context_two+' column level ')
31 | table_tab = Table(title='comparing ' + context_one + ' vs ' + context_two+' table level ')
32 |
33 | difference_tab = []
34 | columns_tab = ["property name", "value in " + context_one, "value in " + context_two]
35 |
36 | if query_one_df.get('table_name')!=query_two_df.get('table_name'):
37 | difference_tab.append(['table name found in sql',query_one_df.get('table_name'),query_two_df.get('table_name')])
38 |
39 | if query_one_df.get('tablespace')!=query_two_df.get('tablespace'):
40 | difference_tab.append(['tablespace',query_one_df.get('tablespace'),query_two_df.get('tablespace')])
41 |
42 | if query_one_df.get('schema')!=query_two_df.get('schema'):
43 | difference_tab.append(['schema',query_one_df.get('schema'),query_two_df.get('schema')])
44 |
45 | keys_one = None
46 | keys_two = None
47 | type_one = None
48 | type_two = None
49 |
50 |
51 | if query_one_df.get('sortkey'):
52 | keys_one = ','.join(query_one_df.get('sortkey').get('keys'))
53 | type_one = query_one_df.get('sortkey').get('type')
54 |
55 | if query_two_df.get('sortkey'):
56 | keys_two = ','.join(query_two_df.get('sortkey').get('keys'))
57 | type_two = query_two_df.get('sortkey').get('type')
58 |
59 | if (keys_one != keys_two) and (keys_one or keys_two):
60 | difference_tab.append(['sort keys',keys_one,keys_two])
61 |
62 | if (type_one != type_two) and (type_one or type_two):
63 | difference_tab.append(['sort type',type_one,type_two])
64 |
65 | if query_two_df.get('index') != query_one_df.get('index'):
66 | difference_tab.append(['index',','.join(query_one_df.get('index')),','.join(query_two_df.get('index'))])
67 |
68 | if query_two_df.get('partitioned_by') != query_one_df.get('partitioned_by'):
69 | difference_tab.append(['partition',','.join(query_one_df.get('partitioned_by')),
70 | ','.join(query_two_df.get('partitioned_by'))])
71 |
72 | if query_two_df.get('diststyle') != query_one_df.get('diststyle'):
73 | difference_tab.append(['distribution style',query_one_df.get('diststyle'),query_two_df.get('diststyle')])
74 |
75 | if query_two_df.get('checks') != query_one_df.get('checks'):
76 | difference_tab.append(['checks constraints', ','.join(query_one_df.get('checks')),
77 | ','.join(query_two_df.get('checks'))])
78 |
79 | if difference_tab:
80 | for col in columns_tab:
81 | table_tab.add_column(col)
82 | for row in difference_tab:
83 | table_tab.add_row(*row, style='bright_green')
84 | # table level difference
85 |
86 | console = Console()
87 | print(Fore.BLUE + '*****************************************************************************************' + Style.RESET_ALL)
88 | console.print(table_tab)
89 | print()
90 | else:
91 | print()
92 | print(Fore.GREEN + 'No Table Level Difference could be found '+Style.RESET_ALL)
93 |
94 | columns = ["column name", "property" , "value in "+context_one, "value in "+context_two]
95 |
96 | query_one_cols = query_one_df['columns']
97 | query_two_cols = query_two_df['columns']
98 |
99 | difference = []
100 |
101 | for j in query_one_cols:
102 | refers_to_one, on_delete_one, on_update_one, is_foreign_key_one = None, None, None, None
103 |
104 | if j.get('references'):
105 | refers_to_one = str(j.get('references').get('table'))
106 | if j.get('on_delete'):
107 | on_delete_one = str(j.get('references').get('on_delete'))
108 | if j.get('on_update'):
109 | on_update_one = str(j.get('references').get('on_update'))
110 | if j.get('references'):
111 | is_foreign_key_one = 'yes'
112 |
113 | col_name_one = str(j.get('name'))
114 | col_type_one = str(j.get('type'))
115 | col_size_one = str(j.get('size'))
116 | isunique_one = str(j.get('unique'))
117 | isnull_one = str(j.get('nullable'))
118 | default_val_one = str(j.get('default'))
119 | check_val_one = str(j.get('check'))
120 | encode_one = str(j.get('encode'))
121 |
122 | temp_two = list(filter(lambda x:x['name']==col_name_one,query_two_cols))
123 | col_name_two = None
124 | if temp_two:
125 | temp_two = temp_two[0]
126 | col_name_two = str(temp_two.get('name'))
127 | col_type_two = str(temp_two.get('type'))
128 | col_size_two = str(temp_two.get('size'))
129 | isunique_two = str(temp_two.get('unique'))
130 | isnull_two = str(temp_two.get('nullable'))
131 | default_val_two = str(temp_two.get('default'))
132 | check_val_two = str(temp_two.get('check'))
133 | encode_two = str(temp_two.get('encode'))
134 |
135 | if col_type_one != col_type_two:
136 | difference.append([col_name_two,'datatype',col_type_one,col_type_two])
137 |
138 | if col_size_two != col_size_one:
139 | difference.append([col_name_two,'size',col_size_one,col_size_two])
140 |
141 | if isunique_two != isunique_one:
142 | difference.append([col_name_two,'is unique',isunique_one,isunique_two])
143 |
144 | if isnull_one != isnull_two:
145 | difference.append([col_name_two,'nullable',isnull_one,isnull_two])
146 |
147 | if default_val_two != default_val_one:
148 | difference.append([col_name_two,'default value',default_val_one,default_val_two])
149 |
150 | if check_val_two != check_val_one:
151 | difference.append([col_name_two,'check constraint',check_val_one,check_val_two])
152 |
153 | if encode_one != encode_two:
154 | difference.append([col_name_two,'encode',encode_one,encode_two])
155 |
156 | refers_to_two, on_delete_two, on_update_two, is_foreign_key_two = None, None, None, None
157 |
158 | if j.get('references'):
159 | refers_to_two = str(j.get('references').get('table'))
160 | if j.get('on_delete'):
161 | on_delete_two = str(j.get('references').get('on_delete'))
162 | if j.get('on_update'):
163 | on_update_two = str(j.get('references').get('on_update'))
164 | if j.get('references'):
165 | is_foreign_key_two = 'yes'
166 |
167 | if is_foreign_key_two != is_foreign_key_one:
168 | difference.append([col_name_two,'foreign key',is_foreign_key_one,is_foreign_key_two])
169 |
170 | if refers_to_two != refers_to_one:
171 | difference.append([col_name_two,'foreign key reference',refers_to_one,refers_to_two])
172 |
173 | if on_delete_two != on_delete_one:
174 | difference.append([col_name_two,'on delete clause',on_delete_one,on_delete_two])
175 |
176 | if on_update_two != on_update_one:
177 | difference.append([col_name_two,'on update clause',on_update_one,on_update_two])
178 |
179 | query_two_cols = list(filter(lambda g:g['name']!=col_name_one,query_two_cols))
180 |
181 | else:
182 | difference.append([col_name_one,'is_found','yes','no'])
183 |
184 | if query_two_cols:
185 | for k in query_two_cols:
186 | difference.append([k['name'],'is_found','no','yes'])
187 |
188 | if difference:
189 | for col in columns:
190 | table.add_column(col)
191 | for row in difference:
192 | table.add_row(*row, style='bright_green')
193 | # table level difference
194 |
195 | console = Console()
196 | print(
197 | Fore.BLUE + '*****************************************************************************************' + Style.RESET_ALL)
198 | console.print(table)
199 | print()
200 | else:
201 | print()
202 | print(Fore.GREEN + 'No Column Level Difference could be found '+Style.RESET_ALL)
203 |
--------------------------------------------------------------------------------
/sondesh/ddl_parser.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List, Optional
2 |
3 | from ply.lex import LexToken
4 |
5 | from sondesh import tokens as tok
6 | from sondesh.dialects.bigquery import BigQuery
7 | from sondesh.dialects.hql import HQL
8 | from sondesh.dialects.mssql import MSSQL
9 | from sondesh.dialects.mysql import MySQL
10 | from sondesh.dialects.oracle import Oracle
11 | from sondesh.dialects.redshift import Redshift
12 | from sondesh.dialects.snowflake import Snowflake
13 | from sondesh.dialects.spark_sql import SparkSQL
14 | from sondesh.dialects.sql import BaseSQL
15 | from sondesh.parser import Parser
16 |
17 |
18 | class parse_the_ddl_error(Exception):
19 | pass
20 |
21 |
22 | class parse_the_ddl(
23 | Parser, SparkSQL, Snowflake, BaseSQL, HQL, MySQL, MSSQL, Oracle, Redshift, BigQuery
24 | ):
25 |
26 | tokens = tok.tokens
27 | t_ignore = "\t \r"
28 |
29 | def get_tag_symbol_value_and_increment(self, t: LexToken) -> LexToken:
30 | # todo: need to find less hacky way to parse HQL structure types
31 | if "<" in t.value:
32 | t.type = "LT"
33 | self.lexer.lt_open += t.value.count("<")
34 | if ">" in t.value and not self.lexer.check:
35 | t.type = "RT"
36 | self.lexer.lt_open -= t.value.count(">")
37 | return t
38 |
39 | def after_columns_tokens(self, t: LexToken) -> LexToken:
40 | t.type = tok.after_columns_tokens.get(t.value.upper(), t.type)
41 | if t.type != "ID":
42 | self.lexer.after_columns = True
43 | elif self.lexer.columns_def:
44 | t.type = tok.columns_defenition.get(t.value.upper(), t.type)
45 | return t
46 |
47 | def process_body_tokens(self, t: LexToken) -> LexToken:
48 | if (
49 | self.lexer.last_par == "RP" and not self.lexer.lp_open
50 | ) or self.lexer.after_columns:
51 | t = self.after_columns_tokens(t)
52 | elif self.lexer.columns_def:
53 | t.type = tok.columns_defenition.get(t.value.upper(), t.type)
54 | elif self.lexer.sequence:
55 | t.type = tok.sequence_reserved.get(t.value.upper(), "ID")
56 | return t
57 |
58 | def parse_tags_symbols(self, t) -> Optional[LexToken]:
59 | """like symbols < >"""
60 | if not self.lexer.check:
61 | for key in tok.symbol_tokens_no_check:
62 | if key in t.value:
63 | return self.get_tag_symbol_value_and_increment(t)
64 |
65 | def tokens_not_columns_names(self, t: LexToken) -> LexToken:
66 |
67 | t_tag = self.parse_tags_symbols(t)
68 | if t_tag:
69 | return t_tag
70 |
71 | if "ARRAY" in t.value:
72 | t.type = "ARRAY"
73 | return t
74 | elif self.lexer.is_like:
75 | t.type = tok.after_columns_tokens.get(t.value.upper(), t.type)
76 | elif not self.lexer.is_table:
77 | # if is_table mean wi already met INDEX or TABLE statement and
78 | # the definition already done and this is a string
79 | t.type = tok.defenition_statements.get(
80 | t.value.upper(), t.type
81 | ) # Check for reserved word
82 | elif self.lexer.last_token != "COMMA":
83 | t.type = tok.common_statements.get(t.value.upper(), t.type)
84 | else:
85 | t.type = tok.first_liners.get(t.value.upper(), t.type)
86 |
87 | # get tokens from other token dicts
88 | t = self.process_body_tokens(t)
89 |
90 | self.set_lexer_tags(t)
91 |
92 | return t
93 |
94 | def set_lexer_tags(self, t: LexToken) -> None:
95 | if t.type == "SEQUENCE":
96 | self.lexer.sequence = True
97 | elif t.type == "CHECK":
98 | self.lexer.check = True
99 |
100 | def t_DOT(self, t: LexToken) -> LexToken:
101 | r"\."
102 | t.type = "DOT"
103 | return self.set_last_token(t)
104 |
105 | def t_STRING(self, t: LexToken) -> LexToken:
106 | r"((\')([a-zA-Z_,`0-9:><\=\-\+.\~\%$\!() {}\[\]\/\\\"\#\*&^|?;±§@~]*)(\')){1}"
107 | t.type = "STRING"
108 | return self.set_last_token(t)
109 |
110 | def t_DQ_STRING(self, t: LexToken) -> LexToken:
111 | r"((\")([a-zA-Z_,`0-9:><\=\-\+.\~\%$\!() {}'\[\]\/\\\\#\*&^|?;±§@~]*)(\")){1}"
112 | t.type = "DQ_STRING"
113 | return self.set_last_token(t)
114 |
115 | def is_token_column_name(self, t: LexToken) -> bool:
116 | """many of reserved words can be used as column name,
117 | to decide is it a column name or not we need do some checks"""
118 | skip_id_tokens = ["(", ")", ","]
119 | return (
120 | t.value not in skip_id_tokens
121 | and self.lexer.is_table
122 | and self.lexer.lp_open
123 | and not self.lexer.is_like
124 | and (self.lexer.last_token == "COMMA" or self.lexer.last_token == "LP")
125 | and t.value.upper() not in tok.first_liners
126 | )
127 |
128 | def is_creation_name(self, t: LexToken) -> bool:
129 | """many of reserved words can be used as column name,
130 | to decide is it a column name or not we need do some checks"""
131 | skip_id_tokens = ["(", ")", ","]
132 | exceptional_keys = [
133 | "SCHEMA",
134 | "TABLE",
135 | "DATABASE",
136 | "TYPE",
137 | "DOMAIN",
138 | "TABLESPACE",
139 | "INDEX",
140 | "CONSTRAINT",
141 | "EXISTS",
142 | ]
143 | return (
144 | t.value not in skip_id_tokens
145 | and t.value.upper() not in ["IF"]
146 | and self.lexer.last_token in exceptional_keys
147 | and not self.exceptional_cases(t.value.upper())
148 | )
149 |
150 | def exceptional_cases(self, value: str) -> bool:
151 | if value == "TABLESPACE" and self.lexer.last_token == "INDEX":
152 | return True
153 | return False
154 |
155 | def t_AUTOINCREMENT(self, t: LexToken):
156 | r"(AUTO_INCREMENT|AUTOINCREMENT)(?i)\b"
157 | t.type = "AUTOINCREMENT"
158 | return self.set_last_token(t)
159 |
160 | def t_ID(self, t: LexToken):
161 | r"([0-9]+[.][0-9]*([e][+-]?[0-9]+)?|[0-9]\.[0-9])\w|([a-zA-Z_,0-9:><\/\\\=\-\+\~\%$@#\|&?;*\()!{}\[\]\`\[\]]+)"
162 | t.type = tok.symbol_tokens.get(t.value, "ID")
163 |
164 | if t.type == "LP":
165 | self.lexer.lp_open += 1
166 | self.lexer.columns_def = True
167 | self.lexer.last_token = "LP"
168 | return t
169 | elif self.is_token_column_name(t) or self.lexer.last_token == "DOT":
170 | t.type = "ID"
171 | elif t.type != "DQ_STRING" and self.is_creation_name(t):
172 | t.type = "ID"
173 | else:
174 | t = self.tokens_not_columns_names(t)
175 |
176 | self.capitalize_tokens(t)
177 | self.commat_type(t)
178 |
179 | self.set_lexx_tags(t)
180 |
181 | return self.set_last_token(t)
182 |
183 | def commat_type(self, t: LexToken):
184 | if t.type == "COMMA" and self.lexer.lt_open:
185 | t.type = "COMMAT"
186 |
187 | def capitalize_tokens(self, t: LexToken):
188 | if t.type != "ID" and t.type not in ["LT", "RT"]:
189 | t.value = t.value.upper()
190 |
191 | def set_parathesis_tokens(self, t: LexToken):
192 | if t.type in ["RP", "LP"]:
193 | if t.type == "RP" and self.lexer.lp_open:
194 | self.lexer.lp_open -= 1
195 | self.lexer.last_par = t.type
196 |
197 | def set_lexx_tags(self, t: LexToken):
198 | self.set_parathesis_tokens(t)
199 |
200 | if t.type == "ALTER":
201 | self.lexer.is_alter = True
202 | if t.type == "LIKE":
203 | self.lexer.is_like = True
204 | elif t.type in ["TYPE", "DOMAIN", "TABLESPACE"]:
205 | self.lexer.is_table = False
206 | elif t.type in ["TABLE", "INDEX"] and not self.lexer.is_alter:
207 | self.lexer.is_table = True
208 |
209 | def set_last_token(self, t: LexToken):
210 | self.lexer.last_token = t.type
211 | return t
212 |
213 | def p_id(self, p):
214 | """id : ID
215 | | DQ_STRING"""
216 | delimeters_to_start = ["`", '"', "["]
217 | delimeters_to_end = ["`", '"', "]"]
218 | p[0] = p[1]
219 |
220 | if self.normalize_names:
221 | for num, symbol in enumerate(delimeters_to_start):
222 | if p[0].startswith(symbol) and p[0].endswith(delimeters_to_end[num]):
223 | p[0] = p[0][1:-1]
224 |
225 | def p_id_or_string(self, p):
226 | """id_or_string : id
227 | | STRING"""
228 | p[0] = p[1]
229 |
230 | def t_error(self, t: LexToken):
231 | raise parse_the_ddl_error("Unknown symbol %r" % (t.value[0],))
232 |
233 | def p_error(self, p):
234 | if not self.silent:
235 | raise parse_the_ddl_error(f"Unknown statement at {p}")
236 |
237 |
238 | def parse_from_file(file_path: str, parser_settings: Optional[dict] = None, **kwargs) -> List[Dict]:
239 | """get useful data from ddl"""
240 | with open(file_path, "r") as df:
241 | return parse_the_ddl(df.read(), **(parser_settings or {})).run(file_path=file_path, **kwargs)
242 |
--------------------------------------------------------------------------------
/sondesh/dialects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/dialects/__init__.py
--------------------------------------------------------------------------------
/sondesh/dialects/bigquery.py:
--------------------------------------------------------------------------------
1 | class BigQuery:
2 | def p_expression_options(self, p):
3 | """expr : expr multiple_options"""
4 | p[0] = p[1]
5 | p[1].update(p[2])
6 |
7 | def p_multiple_options(self, p):
8 | """multiple_options : options
9 | | multiple_options options
10 | """
11 | if len(p) > 2:
12 | p[1]["options"].extend(p[2]["options"])
13 | p[0] = p[1]
14 | else:
15 | p[0] = p[1]
16 |
17 | def p_options(self, p):
18 | """options : OPTIONS LP id_equals RP"""
19 | p_list = list(p)
20 | if not isinstance(p[1], dict):
21 | p[0] = {"options": p[3]}
22 | else:
23 | p[0] = p[1]
24 | if len(p) == 4:
25 | p[0]["options"].append(p_list[-1][0])
26 |
--------------------------------------------------------------------------------
/sondesh/dialects/hql.py:
--------------------------------------------------------------------------------
1 | from sondesh.utils import check_spec, remove_par
2 |
3 |
4 | class HQL:
5 | def p_expression_location(self, p):
6 | """expr : expr LOCATION STRING
7 | | expr LOCATION DQ_STRING"""
8 | p[0] = p[1]
9 | p_list = list(p)
10 | p[0]["location"] = p_list[-1]
11 |
12 | def p_expression_clustered(self, p):
13 | """expr : expr ID ON LP pid RP
14 | | expr ID BY LP pid RP"""
15 | p[0] = p[1]
16 | p_list = list(p)
17 | p[0][f"{p_list[2].lower()}_{p_list[3].lower()}"] = p_list[-2]
18 |
19 | def p_expression_into_buckets(self, p):
20 | """expr : expr INTO ID ID"""
21 | p[0] = p[1]
22 | p_list = list(p)
23 | p[0][f"{p_list[2].lower()}_{p_list[-1].lower()}"] = p_list[-2]
24 |
25 | def p_row_format(self, p):
26 | """row_format : ROW FORMAT SERDE
27 | | ROW FORMAT
28 | """
29 | p_list = list(p)
30 | p[0] = {"serde": p_list[-1] == "SERDE"}
31 |
32 | def p_expression_row_format(self, p):
33 | """expr : expr row_format id
34 | | expr row_format STRING
35 | """
36 | p[0] = p[1]
37 | p_list = list(p)
38 | if p[2]["serde"]:
39 | format = {"serde": True, "java_class": p_list[-1]}
40 | else:
41 | format = check_spec(p_list[-1])
42 |
43 | p[0]["row_format"] = format
44 |
45 | def p_expression_with_serde(self, p):
46 | """expr : expr WITH SERDEPROPERTIES multi_assigments"""
47 | p[0] = p[1]
48 | p_list = list(p)
49 |
50 | row_format = p[0]["row_format"]
51 | row_format["properties"] = p_list[-1]
52 | p[0]["row_format"] = row_format
53 |
54 | def p_expression_tblproperties(self, p):
55 | """expr : expr TBLPROPERTIES multi_assigments"""
56 | p[0] = p[1]
57 | p[0]["tblproperties"] = list(p)[-1]
58 |
59 | def p_multi_assigments(self, p):
60 | """multi_assigments : LP assigment
61 | | multi_assigments RP
62 | | multi_assigments COMMA assigment"""
63 | p_list = remove_par(list(p))
64 | p[0] = p_list[1]
65 | p[0].update(p_list[-1])
66 |
67 | def p_assigment(self, p):
68 | """assigment : id id id
69 | | STRING id STRING
70 | | id id STRING
71 | | STRING id id
72 | | STRING id"""
73 | p_list = remove_par(list(p))
74 | if "state" in self.lexer.__dict__:
75 | p[0] = {p[1]: self.lexer.state.get(p_list[-1])}
76 | else:
77 | if "=" in p_list[-1]:
78 | p_list[-1] = p_list[-1].split("=")[-1]
79 | p[0] = {p_list[1]: p_list[-1]}
80 |
81 | def p_expression_comment(self, p):
82 | """expr : expr COMMENT STRING"""
83 | p[0] = p[1]
84 | p_list = list(p)
85 | p[0]["comment"] = check_spec(p_list[-1])
86 |
87 | def p_expression_terminated_by(self, p):
88 | """expr : expr id TERMINATED BY id
89 | | expr id TERMINATED BY STRING
90 | """
91 | p[0] = p[1]
92 | p_list = list(p)
93 | p[0][f"{p[2].lower()}_terminated_by"] = check_spec(p_list[-1])
94 |
95 | def p_expression_map_keys_terminated_by(self, p):
96 | """expr : expr MAP KEYS TERMINATED BY id
97 | | expr MAP KEYS TERMINATED BY STRING
98 | """
99 | p[0] = p[1]
100 | p_list = list(p)
101 | p[0]["map_keys_terminated_by"] = check_spec(p_list[-1])
102 |
103 | def p_expression_skewed_by(self, p):
104 | """expr : expr SKEWED BY LP id RP ON LP pid RP"""
105 | p[0] = p[1]
106 | p_list = remove_par(list(p))
107 | p[0]["skewed_by"] = {"key": p_list[4], "on": p_list[-1]}
108 |
109 | def p_expression_collection_terminated_by(self, p):
110 | """expr : expr COLLECTION ITEMS TERMINATED BY id
111 | | expr COLLECTION ITEMS TERMINATED BY STRING
112 | """
113 | p[0] = p[1]
114 | p_list = list(p)
115 | p[0]["collection_items_terminated_by"] = check_spec(p_list[-1])
116 |
117 | def p_expression_stored_as(self, p):
118 | """expr : expr STORED AS id
119 | | expr STORED AS id STRING
120 | | expr STORED AS id STRING id STRING
121 | """
122 | p[0] = p[1]
123 | p_list = list(p)
124 | if len(p_list) >= 6:
125 | # only input or output format
126 | p[0]["stored_as"] = {p_list[-2].lower(): p_list[-1]}
127 | if len(p_list) == 8:
128 | # both input & output
129 | p[0]["stored_as"].update({p_list[-4].lower(): p_list[-3]})
130 | else:
131 | p[0]["stored_as"] = p_list[-1]
132 |
133 | def p_expression_partitioned_by_hql(self, p):
134 | """expr : expr PARTITIONED BY pid_with_type
135 | | expr PARTITIONED BY LP pid RP
136 | | expr PARTITIONED BY LP multiple_funct RP
137 | """
138 | p[0] = p[1]
139 | p_list = remove_par(list(p))
140 | p[0]["partitioned_by"] = p_list[-1]
141 |
142 | def p_pid_with_type(self, p):
143 | """pid_with_type : LP column
144 | | pid_with_type COMMA column
145 | | pid_with_type RP
146 | """
147 | p_list = remove_par(list(p))
148 | if not isinstance(p_list[1], list):
149 | p[0] = [p_list[1]]
150 | else:
151 | p[0] = p_list[1]
152 | if len(p_list) > 2:
153 | p[0].append(p_list[-1])
154 |
--------------------------------------------------------------------------------
/sondesh/dialects/mssql.py:
--------------------------------------------------------------------------------
1 | import sondesh # noqa: F401 weird issue with failed tests
2 |
3 |
4 | class MSSQL:
5 | def p_pkey_constraint(self, p):
6 | """pkey_constraint : constraint pkey_statement id LP index_pid RP
7 | | constraint pkey_statement LP index_pid RP
8 | | pkey_constraint with
9 | | pkey_constraint with ON id
10 | """
11 | p_list = list(p)
12 | p[0] = p[1]
13 | if isinstance(p[2], dict) and "with" in p[2]:
14 | data = p_list[2]
15 | if "ON" in p_list:
16 | data["with"]["on"] = p_list[-1]
17 | elif len(p_list) == 7:
18 | data = {"primary_key": True, "columns": p_list[-2], p[3]: True}
19 | else:
20 | data = {"primary_key": True, "columns": p_list[-2]}
21 |
22 | p[0]["constraint"].update(data)
23 |
24 | def p_with(self, p):
25 | """with : WITH with_args"""
26 | p_list = list(p)
27 | p[0] = {"with": {"properties": [], "on": None}}
28 | if ")" not in p_list:
29 | p[0]["with"]["properties"] = p_list[-1]["properties"]
30 |
31 | def p_equals(self, p):
32 | """equals : id id id
33 | | id id ON
34 | | id id id DOT id
35 | """
36 | p_list = list(p)
37 | if "." in p_list:
38 | p[0] = {"name": p_list[1], "value": f"{p_list[3]}.{p_list[5]}"}
39 | else:
40 | p[0] = {"name": p_list[-3], "value": p_list[-1]}
41 |
42 | def p_with_args(self, p):
43 | """with_args : LP equals
44 | | with_args COMMA equals
45 | | with_args with_args
46 | | with_args RP
47 | """
48 | p_list = list(p)
49 | if isinstance(p[1], dict):
50 | p[0] = p[1]
51 | else:
52 | p[0] = {"properties": []}
53 | if ")" != p_list[2]:
54 | if ")" == p_list[-1]:
55 | p[0]["properties"].append(p_list[-1])
56 | else:
57 | p[0]["properties"].append(p_list[-1])
58 |
59 | def p_period_for(self, p):
60 | """period_for : id FOR id LP pid RP"""
61 | p[0] = {"period_for_system_time": p[5]}
62 |
63 | def p_expression_on_primary(self, p):
64 | """expr : expr ON id"""
65 | p[0] = p[1]
66 | p[0]["on"] = p[3]
67 |
68 | def p_expression_with(self, p):
69 | """expr : expr with"""
70 | p[0] = p[1]
71 | p[0].update(p[2])
72 |
73 | def p_expression_text_image_on(self, p):
74 | """expr : expr TEXTIMAGE_ON id"""
75 | p[0] = p[1]
76 | p[0].update({"textimage_on": p[3]})
77 |
--------------------------------------------------------------------------------
/sondesh/dialects/mysql.py:
--------------------------------------------------------------------------------
1 | import sondesh # noqa: F401 weird issue with failed tests
2 |
3 |
4 | class MySQL:
5 | def p_on_update(self, p):
6 | """on_update : ON UPDATE id
7 | | ON UPDATE STRING
8 | | ON UPDATE f_call
9 | """
10 | p_list = list(p)
11 | if not ")" == p_list[-1]:
12 | p[0] = {"on_update": p_list[-1]}
13 | else:
14 | p[0] = {"on_update": p_list[-2]}
15 |
--------------------------------------------------------------------------------
/sondesh/dialects/oracle.py:
--------------------------------------------------------------------------------
1 | from sondesh.utils import remove_par
2 |
3 |
4 | class Oracle:
5 | def p_encrypt(self, p):
6 | """encrypt : ENCRYPT
7 | | encrypt NO SALT
8 | | encrypt SALT
9 | | encrypt USING STRING
10 | | encrypt STRING
11 | """
12 | p_list = list(p)
13 | if isinstance(p[1], dict):
14 | p[0] = p[1]
15 | if "NO" in p_list:
16 | p[0]["encrypt"]["salt"] = False
17 | elif "USING" in p_list:
18 | p[0]["encrypt"]["encryption_algorithm"] = p_list[-1]
19 | elif "SALT" not in p_list:
20 | p[0]["encrypt"]["integrity_algorithm"] = p_list[-1]
21 |
22 | else:
23 | p[0] = {
24 | "encrypt": {
25 | "salt": True,
26 | "encryption_algorithm": "'AES192'",
27 | "integrity_algorithm": "SHA-1",
28 | }
29 | }
30 |
31 | def p_storage(self, p):
32 | """storage : STORAGE LP
33 | | storage id id
34 | | storage id id RP
35 | """
36 | # Initial 5m Next 5m Maxextents Unlimited
37 | p_list = remove_par(list(p))
38 | param = {}
39 | if len(p_list) == 4:
40 | param = {p_list[2].lower(): p_list[3]}
41 | if isinstance(p_list[1], dict):
42 | p[0] = p[1]
43 | else:
44 | p[0] = {}
45 | p[0].update(param)
46 |
47 | def p_expr_storage(self, p):
48 | """expr : expr storage"""
49 | p_list = list(p)
50 | p[0] = p[1]
51 | p[0]["storage"] = p_list[-1]
52 |
53 | def p_expr_index(self, p):
54 | """expr : expr ID INDEX"""
55 | p[0] = p[1]
56 | p[0][f"{p[2].lower()}_index"] = True
57 |
--------------------------------------------------------------------------------
/sondesh/dialects/redshift.py:
--------------------------------------------------------------------------------
1 | class Redshift:
2 | def p_expression_distkey(self, p):
3 | """expr : expr id LP id RP"""
4 | p_list = list(p)
5 | p[1].update({"distkey": p_list[-2]})
6 | p[0] = p[1]
7 |
8 | def p_encode(self, p):
9 | """encode : ENCODE id"""
10 | p_list = list(p)
11 | p[0] = {"encode": p_list[-1]}
12 |
13 | def p_expression_diststyle(self, p):
14 | """expr : expr id id
15 | | expr id KEY
16 | """
17 | p_list = list(p)
18 | p[1].update({p_list[-2]: p_list[-1]})
19 | p[0] = p[1]
20 |
21 | def p_expression_sortkey(self, p):
22 | """expr : expr id id LP pid RP"""
23 | p_list = list(p)
24 | p[1].update({"sortkey": {"type": p_list[2], "keys": p_list[-2]}})
25 | p[0] = p[1]
26 |
--------------------------------------------------------------------------------
/sondesh/dialects/snowflake.py:
--------------------------------------------------------------------------------
1 | from sondesh.utils import remove_par
2 |
3 |
4 | class Snowflake:
5 | def p_clone(self, p):
6 | """clone : CLONE id"""
7 | p_list = list(p)
8 | p[0] = {"clone": {"from": p_list[-1]}}
9 |
10 | def p_expression_cluster_by(self, p):
11 | """expr : expr CLUSTER BY LP pid RP
12 | | expr CLUSTER BY pid
13 | """
14 | p[0] = p[1]
15 | p_list = remove_par(list(p))
16 | p[0]["cluster_by"] = p_list[-1]
17 |
18 | def p_table_comment(self, p):
19 | """expr : expr option_comment
20 | """
21 | p[0] = p[1]
22 | if p[2]:
23 | p[0].update(p[2])
24 |
25 | def p_option_comment(self, p):
26 | """option_comment : ID STRING
27 | | ID DQ_STRING
28 | | COMMENT ID STRING
29 | | COMMENT ID DQ_STRING
30 | """
31 | p_list = remove_par(list(p))
32 | if "comment" in p[1].lower():
33 | p[0] = {"comment": p_list[-1]}
34 |
--------------------------------------------------------------------------------
/sondesh/dialects/spark_sql.py:
--------------------------------------------------------------------------------
1 | class SparkSQL:
2 | def p_expression_using(self, p):
3 | """expr : expr using"""
4 | p[0] = p[1]
5 | p[1].update(p[2])
6 |
7 | def p_using(self, p):
8 | """using : USING id"""
9 | p_list = list(p)
10 | p[0] = {"using": p_list[-1]}
11 |
--------------------------------------------------------------------------------
/sondesh/dialects/sql.py:
--------------------------------------------------------------------------------
1 | import re
2 | from collections import defaultdict
3 | from copy import deepcopy
4 | from typing import Any, Dict, List, Optional, Tuple, Union
5 |
6 | from sondesh.utils import check_spec, remove_par
7 |
8 | auth = "AUTHORIZATION"
9 |
10 |
11 | class AfterColumns:
12 | def p_expression_partition_by(self, p: List) -> None:
13 | """expr : expr PARTITION BY LP pid RP
14 | | expr PARTITION BY id LP pid RP
15 | | expr PARTITION BY pid
16 | | expr PARTITION BY id pid"""
17 | p[0] = p[1]
18 | p_list = list(p)
19 | _type = None
20 | if isinstance(p[4], list):
21 | columns = p[4]
22 | else:
23 | columns = p_list[-2]
24 | if isinstance(p[4], str) and p[4].lower() != "(":
25 | _type = p[4]
26 | p[0]["partition_by"] = {"columns": columns, "type": _type}
27 |
28 |
29 | class Database:
30 | def p_expression_create_database(self, p: List) -> None:
31 | """expr : expr database_base"""
32 | p[0] = p[1]
33 | p_list = list(p)
34 | p[0].update(p_list[-1])
35 |
36 | def p_database_base(self, p: List) -> None:
37 | """database_base : CREATE DATABASE id
38 | | CREATE ID DATABASE id
39 | | database_base clone
40 | """
41 | if isinstance(p[1], dict):
42 | p[0] = p[1]
43 | else:
44 | p[0] = {}
45 | p_list = list(p)
46 | if isinstance(p_list[-1], dict):
47 | p[0].update(p_list[-1])
48 | else:
49 | p[0]["database_name"] = p_list[-1]
50 | if len(p_list) == 5:
51 | p[0][p[2].lower()] = True
52 |
53 |
54 | class TableSpaces:
55 | @staticmethod
56 | def get_tablespace_data(p_list):
57 | if p_list[1] == "TABLESPACE":
58 | _type = None
59 | temp = False
60 | else:
61 | if p_list[1].upper() == "TEMPORARY":
62 | _type = None
63 | temp = True
64 | else:
65 | _type = p_list[1]
66 | if p_list[2].upper() == "TEMPORARY":
67 | temp = True
68 | else:
69 | temp = False
70 | if isinstance(p_list[-1], dict):
71 | properties = p_list[-1]
72 | tablespace_name = p_list[-2]
73 | else:
74 | properties = None
75 | tablespace_name = p_list[-1]
76 | result = {
77 | "tablespace_name": tablespace_name,
78 | "properties": properties,
79 | "type": _type,
80 | "temporary": temp,
81 | }
82 | return result
83 |
84 | def p_expression_create_tablespace(self, p: List) -> None:
85 | """expr : CREATE TABLESPACE id properties
86 | | CREATE id TABLESPACE id properties
87 | | CREATE id TABLESPACE id
88 | | CREATE TABLESPACE id
89 | | CREATE id id TABLESPACE id
90 | | CREATE id id TABLESPACE id properties
91 | """
92 | p_list = list(p)
93 | p[0] = self.get_tablespace_data(p_list[1:])
94 |
95 | def p_properties(self, p: List) -> None:
96 | """properties : property
97 | | properties property"""
98 | p_list = list(p)
99 | if len(p_list) == 3:
100 | p[0] = p[1]
101 | p[0].update(p[2])
102 | else:
103 | p[0] = p[1]
104 |
105 | def p_property(self, p: List) -> None:
106 | """property : id id
107 | | id STRING
108 | | id ON
109 | | id STORAGE
110 | | id ROW
111 | """
112 | p[0] = {p[1]: p[2]}
113 |
114 |
115 | class Table:
116 | @staticmethod
117 | def add_if_not_exists(data: Dict, p_list: List):
118 | if "EXISTS" in p_list:
119 | data["if_not_exists"] = True
120 | return data
121 |
122 | def p_create_table(self, p: List):
123 | """create_table : CREATE TABLE IF NOT EXISTS
124 | | CREATE TABLE
125 | | CREATE OR REPLACE TABLE IF NOT EXISTS
126 | | CREATE OR REPLACE TABLE
127 | | CREATE id TABLE IF NOT EXISTS
128 | | CREATE id TABLE
129 | | CREATE OR REPLACE id TABLE IF NOT EXISTS
130 | | CREATE OR REPLACE id TABLE
131 |
132 | """
133 | # id - for EXTERNAL, TRANSIENT, TEMPORARY
134 | # get schema & table name
135 | p[0] = {}
136 | p_list = list(p)
137 | self.add_if_not_exists(p[0], p_list)
138 |
139 | if 'REPLACE' in p_list:
140 | p[0]["replace"] = True
141 |
142 | id_key = p_list[4] if 'REPLACE' in p_list else p_list[2]
143 | id_key = id_key.upper()
144 |
145 | if id_key in ["EXTERNAL", "TRANSIENT"]:
146 | p[0][id_key.lower()] = True
147 | elif id_key in ["TEMP", "TEMPORARY"]:
148 | p[0]["temp"] = True
149 |
150 |
151 | class Column:
152 | def p_column_property(self, p: List):
153 | """c_property : id id"""
154 | p_list = list(p)
155 | if p[1].lower() == "auto":
156 | p[0] = {"increment": True}
157 | else:
158 | p[0] = {"property": {p_list[1]: p_list[-1]}}
159 |
160 | def set_base_column_propery(self, p: List) -> Dict:
161 |
162 | if "." in list(p):
163 | type_str = f"{p[2]}.{p[4]}"
164 | else:
165 | type_str = p[2]
166 | if isinstance(p[1], dict):
167 | p[0] = p[1]
168 | else:
169 | size = None
170 | p[0] = {"name": p[1], "type": type_str, "size": size}
171 | return p[0]
172 |
173 | @staticmethod
174 | def parse_complex_type(p_list: List[str]) -> str:
175 | # for complex <> types
176 | start_index = 1
177 | _type = ""
178 | if isinstance(p_list[1], dict):
179 | _type = p_list[1]["type"]
180 | start_index = 2
181 | for elem in p_list[start_index:]:
182 | if isinstance(elem, list):
183 | for _elem in elem:
184 | _type += f" {_elem.rstrip()}"
185 | elif "ARRAY" in elem and elem != "ARRAY":
186 | _type += elem
187 | else:
188 | _type += f" {elem}"
189 | return _type
190 |
191 | def p_c_type(self, p: List) -> None:
192 | """c_type : id
193 | | id id
194 | | id id id id
195 | | id id id
196 | | id DOT id
197 | | tid
198 | | ARRAY
199 | | c_type ARRAY
200 | | c_type tid
201 | """
202 | p[0] = {}
203 | p_list = remove_par(list(p))
204 | _type = None
205 |
206 | if len(p_list) == 2:
207 | _type = p_list[-1]
208 | elif isinstance(p[1], str) and p[1].lower() == "encode":
209 | p[0] = {"property": {"encode": p[2]}}
210 | else:
211 | _type = self.parse_complex_type(p_list)
212 | if _type:
213 | _type = self.process_type(_type, p_list, p)
214 | p[0]["type"] = _type
215 |
216 | def process_type(self, _type: Union[str, List], p_list: List, p: List) -> str:
217 |
218 | if isinstance(_type, list):
219 | _type = _type[0]
220 |
221 | elif isinstance(p_list[-1], str) and p_list[-1].lower() == "distkey":
222 | p[0] = {"property": {"distkey": True}}
223 | _type = _type.split("distkey")[0]
224 |
225 | _type = _type.strip().replace('" . "', '"."')
226 |
227 | _type = self.process_array_types(_type, p_list)
228 | return _type
229 |
230 | @staticmethod
231 | def process_array_types(_type: str, p_list: List) -> str:
232 | if "<" not in _type and "ARRAY" in _type:
233 | if "[" not in p_list[-1]:
234 | _type = _type.replace(" ARRAY", "[]").replace("ARRAY", "[]")
235 | else:
236 | _type = _type.replace("ARRAY", "")
237 | elif "<" in _type and "[]" in _type:
238 | _type = _type.replace("[]", "ARRAY")
239 | return _type
240 |
241 | @staticmethod
242 | def get_size(p_list: List):
243 | if p_list[-1].isnumeric():
244 | size = int(p_list[-1])
245 | else:
246 | size = p_list[-1]
247 | if len(p_list) != 3:
248 | if p_list[-3] != "*":
249 | # oracle can contain * in column size
250 | try:
251 | value_0 = int(p_list[-3])
252 | except ValueError:
253 | # we have column like p Geometry(MultiPolygon, 26918)
254 | value_0 = p_list[-3]
255 | else:
256 | value_0 = p_list[-3]
257 | size = (value_0, int(p_list[-1]))
258 | return size
259 |
260 | @staticmethod
261 | def get_column_details(p_list: List, p: List):
262 | if p_list[-1].get("type"):
263 | p[0]["type"] += f"{p_list[-1]['type'].strip()}"
264 | elif p_list[-1].get("comment"):
265 | p[0].update(p_list[-1])
266 | elif p_list[-1].get("property"):
267 | for key, value in p_list[-1]["property"].items():
268 | p[0][key] = value
269 | p_list.pop(-1)
270 |
271 | @staticmethod
272 | def check_type_parameter(size: Union[tuple, int]) -> bool:
273 | if isinstance(size, tuple) and not (
274 | isinstance(size[0], str) and size[0].strip() == '*') and not (
275 | isinstance(size[0], int) or isinstance(size[0], float)):
276 | return True
277 | return False
278 |
279 | @staticmethod
280 | def process_oracle_type_size(p_list):
281 | if p_list[-1] == ')' and p_list[-4] == '(':
282 | # for Oracle sizes like 30 CHAR
283 | p_list[-3] += f" {p_list[-2]}"
284 | del p_list[-2]
285 | return p_list
286 |
287 | def p_column(self, p: List) -> None:
288 | """column : id c_type
289 | | column comment
290 | | column LP id RP
291 | | column LP id id RP
292 | | column LP id RP c_type
293 | | column LP id COMMA id RP
294 | | column LP id COMMA id RP c_type
295 | """
296 | p[0] = self.set_base_column_propery(p)
297 | p_list = list(p)
298 |
299 | p_list = self.process_oracle_type_size(p_list)
300 |
301 | p_list = remove_par(p_list)
302 |
303 | if isinstance(p_list[-1], dict) and "type" in p_list[-1] and len(p_list) <= 3:
304 | p[0]["type"] = p_list[-1]["type"]
305 | if p_list[-1].get("property"):
306 | for key, value in p_list[-1]["property"].items():
307 | p[0][key] = value
308 | elif isinstance(p_list[-1], dict):
309 | self.get_column_details(p_list, p)
310 | self.set_column_size(p_list, p)
311 |
312 | def set_column_size(self, p_list: List, p: List):
313 | if (
314 | not isinstance(p_list[-1], dict)
315 | and bool(re.match(r"[0-9]+", p_list[-1]))
316 | or p_list[-1] == "max"
317 | ):
318 | size = self.get_size(p_list)
319 | if self.check_type_parameter(size):
320 | p[0]["type_parameters"] = size
321 | else:
322 | p[0]["size"] = size
323 |
324 | @staticmethod
325 | def set_property(p: List) -> List:
326 | for item in p[1:]:
327 | if isinstance(item, dict):
328 | if "property" in item:
329 | for key, value in item["property"].items():
330 | p[0][key] = value
331 | del item["property"]
332 | p[0].update(item)
333 | return p
334 |
335 | @staticmethod
336 | def get_column_properties(p_list: List) -> Tuple:
337 | pk = False
338 | nullable = True
339 | default = None
340 | unique = False
341 | references = None
342 | if isinstance(p_list[-1], str):
343 | if p_list[-1].upper() == "KEY":
344 | pk = True
345 | nullable = False
346 | elif p_list[-1].upper() == "UNIQUE":
347 | unique = True
348 | elif isinstance(p_list[-1], dict) and "references" in p_list[-1]:
349 | p_list[-1]["references"]["column"] = p_list[-1]["references"]["columns"][0]
350 | del p_list[-1]["references"]["columns"]
351 | references = p_list[-1]["references"]
352 | return pk, default, unique, references, nullable
353 |
354 | def p_autoincrement(self, p: List) -> None:
355 | """ autoincrement : AUTOINCREMENT"""
356 | p[0] = {"autoincrement": True}
357 |
358 | def p_defcolumn(self, p: List) -> None:
359 | """defcolumn : column
360 | | defcolumn comment
361 | | defcolumn null
362 | | defcolumn encode
363 | | defcolumn PRIMARY KEY
364 | | defcolumn UNIQUE KEY
365 | | defcolumn UNIQUE
366 | | defcolumn check_ex
367 | | defcolumn default
368 | | defcolumn collate
369 | | defcolumn enforced
370 | | defcolumn ref
371 | | defcolumn foreign ref
372 | | defcolumn encrypt
373 | | defcolumn generated
374 | | defcolumn c_property
375 | | defcolumn on_update
376 | | defcolumn options
377 | | defcolumn autoincrement
378 | """
379 | p[0] = p[1]
380 | p_list = list(p)
381 |
382 | pk, default, unique, references, nullable = self.get_column_properties(p_list)
383 |
384 | self.set_property(p)
385 |
386 | p[0]["references"] = p[0].get("references", references)
387 | p[0]["unique"] = unique or p[0].get("unique", unique)
388 | p[0]["primary_key"] = pk or p[0].get("primary_key", pk)
389 | p[0]["nullable"] = (
390 | nullable if nullable is not True else p[0].get("nullable", nullable)
391 | )
392 | p[0]["default"] = p[0].get("default", default)
393 | p[0]["check"] = p[0].get("check", None)
394 | if isinstance(p_list[-1], dict) and p_list[-1].get("encode"):
395 | p[0]["encode"] = p[0].get("encode", p_list[-1]["encode"])
396 | p[0]["check"] = self.set_check_in_columm(p[0].get("check"))
397 |
398 | @staticmethod
399 | def set_check_in_columm(check: Optional[List]) -> Optional[str]:
400 | if check:
401 | check_statement = ""
402 | for n, item in enumerate(check):
403 | if isinstance(item, list):
404 | in_clause = ", ".join(item)
405 | check_statement += f" ({in_clause})"
406 | else:
407 | check_statement += f" {item}" if n > 0 else f"{item}"
408 |
409 | return check_statement
410 |
411 | def p_check_ex(self, p: List) -> None:
412 | """check_ex : check_st
413 | | constraint check_st
414 | """
415 | name = None
416 | if isinstance(p[1], dict):
417 | if "constraint" in p[1]:
418 | p[0] = {
419 | "check": {
420 | "constraint_name": p[1]["constraint"]["name"],
421 | "statement": " ".join(p[2]["check"]),
422 | }
423 | }
424 | elif "check" in p[1]:
425 | p[0] = p[1]
426 | if isinstance(p[1], list):
427 | p[0] = {
428 | "check": {"constraint_name": name, "statement": p[1]["check"]}
429 | }
430 | if len(p) >= 3:
431 | for item in list(p)[2:]:
432 | p[0]["check"]["statement"].append(item)
433 | else:
434 | p[0] = {"check": {"statement": [p[2]], "constraint_name": name}}
435 |
436 |
437 | class Schema:
438 | def p_expression_schema(self, p: List) -> None:
439 | """expr : create_schema
440 | | create_database
441 | | expr id
442 | | expr clone
443 | """
444 | p[0] = p[1]
445 | p_list = list(p)
446 |
447 | if isinstance(p_list[-1], dict):
448 | p[0].update(p_list[-1])
449 | elif len(p) > 2:
450 | p[0]["authorization"] = p[2]
451 |
452 | def set_properties_for_schema_and_database(self, p: List, p_list: List) -> None:
453 | if not p[0].get("properties"):
454 | if len(p_list) == 3:
455 | properties = p_list[-1]
456 | elif len(p_list) > 3:
457 | properties = {p_list[-3]: p_list[-1]}
458 | else:
459 | properties = {}
460 | if properties:
461 | p[0]["properties"] = properties
462 | else:
463 | p[0]["properties"].update({p_list[-3]: p_list[-1]})
464 |
465 | def set_auth_property_in_schema(self, p: List, p_list: List) -> None:
466 | if p_list[2] == auth:
467 | p[0] = {"schema_name": p_list[3], auth.lower(): p_list[3]}
468 | else:
469 | p[0] = {"schema_name": p_list[2], auth.lower(): p_list[-1]}
470 |
471 | def p_c_schema(self, p: List) -> None:
472 | """c_schema : CREATE SCHEMA
473 | | CREATE ID SCHEMA"""
474 |
475 | if len(p) == 4:
476 | p[0] = {"remote": True}
477 |
478 | def p_create_schema(self, p: List) -> None:
479 | """create_schema : c_schema id id
480 | | c_schema id id id
481 | | c_schema id
482 | | c_schema id DOT id
483 | | c_schema id option_comment
484 | | c_schema id DOT id option_comment
485 | | c_schema IF NOT EXISTS id
486 | | c_schema IF NOT EXISTS id DOT id
487 | | create_schema id id id
488 | | create_schema id id STRING
489 | | create_schema options
490 | """
491 | p_list = list(p)
492 |
493 | p[0] = {}
494 | auth_index = None
495 |
496 | if "comment" in p_list[-1]:
497 | p[0].update(p_list[-1])
498 | del p_list[-1]
499 |
500 | self.add_if_not_exists(p[0], p_list)
501 | if isinstance(p_list[1], dict):
502 | p[0] = p_list[1]
503 | self.set_properties_for_schema_and_database(p, p_list)
504 | elif auth in p_list:
505 | auth_index = p_list.index(auth)
506 | self.set_auth_property_in_schema(p, p_list)
507 |
508 | if isinstance(p_list[-1], str):
509 | if auth_index:
510 | schema_name = p_list[auth_index - 1]
511 | if schema_name is None:
512 | schema_name = p_list[auth_index + 1]
513 | else:
514 | schema_name = p_list[-1]
515 | p[0]["schema_name"] = schema_name.replace("`", "")
516 |
517 | p[0] = self.set_project_in_schema(p[0], p_list, auth_index)
518 |
519 | @staticmethod
520 | def set_project_in_schema(data: Dict, p_list: List, auth_index: int) -> Dict:
521 | if len(p_list) > 4 and not auth_index and "." in p_list:
522 | data["project"] = p_list[-3].replace("`", "")
523 | return data
524 |
525 | def p_create_database(self, p: List) -> None:
526 | """create_database : database_base
527 | | create_database id id id
528 | | create_database id id STRING
529 | | create_database options
530 | """
531 | p_list = list(p)
532 |
533 | if isinstance(p_list[1], dict):
534 | p[0] = p_list[1]
535 | self.set_properties_for_schema_and_database(p, p_list)
536 | else:
537 | p[0] = {f"{p[2].lower()}_name": p_list[-1]}
538 |
539 |
540 | class Drop:
541 | def p_expression_drop_table(self, p: List) -> None:
542 | """expr : DROP TABLE id
543 | | DROP TABLE id DOT id
544 | """
545 | # get schema & table name
546 | p_list = list(p)
547 | schema = None
548 | if len(p) > 4:
549 | if "." in p:
550 | schema = p_list[-3]
551 | table_name = p_list[-1]
552 | else:
553 | table_name = p_list[-1]
554 | p[0] = {"schema": schema, "table_name": table_name}
555 |
556 |
557 | class Type:
558 | def p_multiple_column_names(self, p: List) -> None:
559 | """multiple_column_names : column
560 | | multiple_column_names COMMA
561 | | multiple_column_names column
562 | """
563 | p_list = list(p)
564 | if isinstance(p[1], dict):
565 | p[0] = [p[1]]
566 | else:
567 | p[0] = p[1]
568 | if p_list[-1] != ",":
569 | p[0].append(p_list[-1])
570 |
571 | @staticmethod
572 | def add_columns_property_for_type(data: Dict, p_list: List) -> Dict:
573 | if "TABLE" in p_list or isinstance(p_list[-1], dict) and p_list[-1].get("name"):
574 | if not data["properties"].get("columns"):
575 | data["properties"]["columns"] = []
576 | data["properties"]["columns"].append(p_list[-1])
577 | return data
578 |
579 | @staticmethod
580 | def set_base_type(data: Dict, p_list: List) -> Dict:
581 | if len(p_list) > 3:
582 | data["base_type"] = p_list[2]
583 | else:
584 | data["base_type"] = None
585 | return data
586 |
587 | @staticmethod
588 | def process_str_base_type(data: Dict, p_list: List) -> Dict:
589 | base_type = data["base_type"].upper()
590 | if base_type == "ENUM":
591 | data["properties"]["values"] = p_list[3]
592 | elif data["base_type"] == "OBJECT":
593 | if "type" in p_list[3][0]:
594 | data["properties"]["attributes"] = p_list[3]
595 | return data
596 |
597 | def p_type_definition(self, p: List) -> None: # noqa: C901
598 | """type_definition : type_name id LP pid RP
599 | | type_name id LP multiple_column_names RP
600 | | type_name LP id_equals RP
601 | | type_name TABLE LP defcolumn
602 | | type_definition COMMA defcolumn
603 | | type_definition RP
604 | """
605 | p_list = remove_par(list(p))
606 | p[0] = p[1]
607 | if not p[0].get("properties"):
608 | p[0]["properties"] = {}
609 |
610 | p[0] = self.add_columns_property_for_type(p[0], p_list)
611 |
612 | p[0] = self.set_base_type(p[0], p_list)
613 |
614 | if isinstance(p[0]["base_type"], str):
615 | p[0] = self.process_str_base_type(p[0], p_list)
616 | elif isinstance(p_list[-1], list):
617 | for item in p_list[-1]:
618 | p[0]["properties"].update(item)
619 |
620 | def p_expression_type_as(self, p: List) -> None:
621 | """expr : type_definition"""
622 | p[0] = p[1]
623 |
624 | def p_type_name(self, p: List) -> None:
625 | """type_name : type_create id AS
626 | | type_create id DOT id AS
627 | | type_create id DOT id
628 | | type_create id
629 | """
630 | p_list = list(p)
631 | p[0] = {}
632 | if "." not in p_list:
633 | p[0]["schema"] = None
634 | p[0]["type_name"] = p_list[2]
635 | else:
636 | p[0]["schema"] = p[2]
637 | p[0]["type_name"] = p_list[4]
638 |
639 | def p_type_create(self, p: List) -> None:
640 | """type_create : CREATE TYPE
641 | | CREATE OR REPLACE TYPE
642 | """
643 | p[0] = None
644 |
645 |
646 | class Domain:
647 | def p_expression_domain_as(self, p: List) -> None:
648 | """expr : domain_name id LP pid RP"""
649 | p_list = list(p)
650 | p[0] = p[1]
651 | p[0]["base_type"] = p[2]
652 | p[0]["properties"] = {}
653 | if p[0]["base_type"] == "ENUM":
654 | p[0]["properties"]["values"] = p_list[4]
655 |
656 | def p_domain_name(self, p: List) -> None:
657 | """domain_name : CREATE DOMAIN id AS
658 | | CREATE DOMAIN id DOT id AS
659 | | CREATE DOMAIN id DOT id
660 | | CREATE DOMAIN id
661 | """
662 | p_list = list(p)
663 | p[0] = {}
664 | if "." not in p_list:
665 | p[0]["schema"] = None
666 | else:
667 | p[0]["schema"] = p[3]
668 | p[0]["domain_name"] = p_list[-2]
669 |
670 |
671 | class BaseSQL(
672 | Database, Table, Drop, Domain, Column, AfterColumns, Type, Schema, TableSpaces
673 | ):
674 | def clean_up_id_list_in_equal(self, p_list: List) -> List: # noqa R701
675 | if isinstance(p_list[1], str) and p_list[1].endswith("="):
676 | p_list[1] = p_list[1][:-1]
677 | elif "," in p_list:
678 | if len(p_list) == 4:
679 | p_list = p_list[-1].split("=")
680 | elif len(p_list) == 5 and p_list[-2].endswith("="):
681 | p_list[-2] = p_list[-2][:-1]
682 | elif "=" == p_list[-2]:
683 | p_list.pop(-2)
684 | return p_list
685 |
686 | def get_property(self, p_list: List) -> Dict:
687 | _property = None
688 | if not isinstance(p_list[-2], list):
689 | _value = True
690 | value = None
691 | if p_list[-2]:
692 | if not p_list[-2] == "=":
693 | key = p_list[-2]
694 | else:
695 | key = p_list[-3]
696 |
697 | else:
698 | _value = False
699 | key = p_list[-1]
700 | if "=" in key:
701 | key = key.split("=")
702 | if _value:
703 | value = f"{key[1]} {p_list[-1]}"
704 | key = key[0]
705 | else:
706 | value = p_list[-1]
707 | _property = {key: value}
708 | else:
709 | _property = p_list[-2][0]
710 | return _property
711 |
712 | def p_id_equals(self, p: List) -> None:
713 | """id_equals : id id id_or_string
714 | | id id_or_string
715 | | id_equals COMMA
716 | | id_equals COMMA id id id_or_string
717 | | id
718 | | id_equals LP pid RP
719 | | id_equals LP pid RP id
720 | | id_equals COMMA id id
721 | | id_equals COMMA id
722 | """
723 | p_list = remove_par(list(p))
724 | if p_list[-1] == "]":
725 | p_list = p_list[:-1]
726 | if isinstance(p_list[-1], list):
727 | p[0] = p[1]
728 | p[0][-1][list(p[0][-1].keys())[0]] = p_list[-1]
729 | else:
730 | p_list = self.clean_up_id_list_in_equal(p_list)
731 | _property = self.get_property(p_list)
732 |
733 | if _property:
734 | if not isinstance(p[1], list):
735 | p[0] = [_property]
736 | else:
737 | p[0] = p[1]
738 | if not p_list[-1] == ",":
739 | p[0].append(_property)
740 |
741 | def p_expression_index(self, p: List) -> None:
742 | """expr : index_table_name LP index_pid RP"""
743 | p_list = remove_par(list(p))
744 | p[0] = p[1]
745 | for item in ["detailed_columns", "columns"]:
746 | if item not in p[0]:
747 | p[0][item] = p_list[-1][item]
748 | else:
749 | p[0][item].extend(p_list[-1][item])
750 |
751 | def p_index_table_name(self, p: List) -> None:
752 | """index_table_name : create_index ON id
753 | | create_index ON id DOT id
754 | """
755 | p[0] = p[1]
756 | p_list = list(p)
757 | schema = None
758 | if "." in p_list:
759 | schema = p_list[-3]
760 | table_name = p_list[-1]
761 | else:
762 | table_name = p_list[-1]
763 | p[0].update({"schema": schema, "table_name": table_name})
764 |
765 | def p_create_index(self, p: List) -> None:
766 | """create_index : CREATE INDEX id
767 | | CREATE UNIQUE INDEX id
768 | | create_index ON id
769 | | CREATE CLUSTERED INDEX id
770 | """
771 | p_list = list(p)
772 | if "CLUSTERED" in p_list:
773 | clustered = True
774 | else:
775 | clustered = False
776 | if isinstance(p[1], dict):
777 | p[0] = p[1]
778 | else:
779 | p[0] = {
780 | "schema": None,
781 | "index_name": p_list[-1],
782 | "unique": "UNIQUE" in p_list,
783 | "clustered": clustered,
784 | }
785 |
786 | def extract_check_data(self, p, p_list):
787 | if isinstance(p_list[-1]["check"], list):
788 | check = " ".join(p_list[-1]["check"])
789 | if isinstance(check, str):
790 | check = {"constraint_name": None, "statement": check}
791 | else:
792 | check = p_list[-1]["check"]
793 | p[0] = self.set_constraint(p[0], "checks", check, check["constraint_name"])
794 | if not p[0].get("checks"):
795 | p[0]["checks"] = []
796 | p[0]["checks"].append(check)
797 | return p[0]
798 |
799 | def p_expression_table(self, p: List) -> None: # noqa R701
800 | """expr : table_name defcolumn
801 | | table_name LP defcolumn
802 | | table_name
803 | | expr COMMA defcolumn
804 | | expr COMMA
805 | | expr COMMA constraint
806 | | expr COMMA check_ex
807 | | expr COMMA foreign
808 | | expr COMMA pkey
809 | | expr COMMA uniq
810 | | expr COMMA statem_by_id
811 | | expr COMMA constraint uniq
812 | | expr COMMA period_for
813 | | expr COMMA pkey_constraint
814 | | expr COMMA constraint pkey
815 | | expr COMMA constraint pkey enforced
816 | | expr COMMA constraint foreign ref
817 | | expr COMMA foreign ref
818 | | expr encode
819 | | expr DEFAULT id id id
820 | | expr RP
821 | """
822 | p[0] = p[1] or defaultdict(list)
823 | p_list = remove_par(list(p))
824 | if p_list[-1] != "," and p_list[-1] is not None:
825 | if "type" in p_list[-1] and "name" in p_list[-1]:
826 | if not p[0].get("columns"):
827 | p[0]["columns"] = []
828 | p[0]["columns"].append(p_list[-1])
829 | elif "check" in p_list[-1]:
830 | p[0] = self.extract_check_data(p, p_list)
831 | elif "enforced" in p_list[-1]:
832 | p_list[-2].update(p_list[-1])
833 | p[0].update({"primary_key_enforced": p_list[-1]["enforced"]})
834 | elif 'DEFAULT' in p_list:
835 | p[0].update({"default_charset": p_list[-1]})
836 | elif isinstance(p_list[-1], dict):
837 | p[0].update(p_list[-1])
838 |
839 | if isinstance(p_list[-1], dict):
840 | p[0] = self.process_constraints_and_refs(p[0], p_list)
841 |
842 | def process_unique_and_primary_constraint(self, data: Dict, p_list: List) -> Dict:
843 | if p_list[-1].get("unique_statement"):
844 | data = self.set_constraint(
845 | data,
846 | "uniques",
847 | {"columns": p_list[-1]["unique_statement"]},
848 | p_list[-2]["constraint"]["name"],
849 | )
850 | else:
851 | data = self.set_constraint(
852 | data,
853 | "primary_keys",
854 | {"columns": p_list[-1]["primary_key"]},
855 | p_list[-2]["constraint"]["name"],
856 | )
857 | return data
858 |
859 | def process_constraints_and_refs(self, data: Dict, p_list: List) -> Dict:
860 |
861 | if "constraint" in p_list[-2]:
862 | data = self.process_unique_and_primary_constraint(data, p_list)
863 | elif (
864 | len(p_list) >= 4
865 | and isinstance(p_list[3], dict)
866 | and p_list[3].get("constraint")
867 | and p_list[3]["constraint"].get("primary_key")
868 | ):
869 | del p_list[3]["constraint"]["primary_key"]
870 | data = self.set_constraint(
871 | target_dict=data,
872 | _type="primary_keys",
873 | constraint=p_list[3]["constraint"],
874 | constraint_name=p_list[3]["constraint"]["name"],
875 | )
876 | del data["constraint"]
877 | elif p_list[-1].get("references"):
878 | data = self.add_ref_information_to_table(data, p_list)
879 | return data
880 |
881 | def add_ref_information_to_table(self, data, p_list):
882 | if len(p_list) > 4 and "constraint" in p_list[3]:
883 | data = self.set_constraint(
884 | data,
885 | "references",
886 | p_list[-1]["references"],
887 | p_list[3]["constraint"]["name"],
888 | )
889 | elif isinstance(p_list[-2], list):
890 | if "ref_columns" not in data:
891 | data["ref_columns"] = []
892 |
893 | for num, column in enumerate(p_list[-2]):
894 | ref = deepcopy(p_list[-1]["references"])
895 | ref["column"] = ref["columns"][num]
896 | del ref["columns"]
897 | ref["name"] = column
898 | data["ref_columns"].append(ref)
899 | return data
900 |
901 | @staticmethod
902 | def set_constraint(
903 | target_dict: Dict, _type: str, constraint: Dict, constraint_name: str
904 | ) -> Dict:
905 | if not target_dict.get("constraints"):
906 | target_dict["constraints"] = {}
907 | if not target_dict["constraints"].get(_type):
908 | target_dict["constraints"][_type] = []
909 | if "name" in constraint:
910 | del constraint["name"]
911 | constraint.update({"constraint_name": constraint_name})
912 | target_dict["constraints"][_type].append(constraint)
913 | return target_dict
914 |
915 | def p_likke(self, p: List) -> None:
916 | """likke : LIKE
917 | | CLONE
918 | """
919 | p[0] = None
920 |
921 | def p_expression_like_table(self, p: List) -> None:
922 | """expr : table_name likke id
923 | | table_name likke id DOT id
924 | | table_name LP likke id DOT id RP
925 | | table_name LP likke id RP
926 | """
927 | # get schema & table name
928 | p_list = remove_par(list(p))
929 | if len(p_list) > 4:
930 | if "." in p:
931 | schema = p_list[-3]
932 | table_name = p_list[-1]
933 | else:
934 | table_name = p_list[-1]
935 | schema = None
936 | p[0] = p[1]
937 | p[0].update({"like": {"schema": schema, "table_name": table_name}})
938 |
939 | def p_t_name(self, p: List) -> None:
940 | """t_name : id DOT id
941 | | id
942 | | id DOT id DOT id
943 | """
944 | p_list = list(p)
945 |
946 | project = None
947 |
948 | if len(p) > 3:
949 | if "." in p:
950 | schema = p_list[-3]
951 | table_name = p_list[-1]
952 | if len(p) == 6:
953 | project = p_list[1]
954 | else:
955 | table_name = p_list[-1]
956 | schema = None
957 |
958 | p[0] = {"schema": schema, "table_name": table_name, "columns": [], "checks": []}
959 |
960 | if project:
961 | p[0]["project"] = project
962 |
963 | def p_table_name(self, p: List) -> None:
964 | """table_name : create_table t_name
965 | | table_name likke id
966 | """
967 | # can contain additional properties like 'external for HQL
968 | p[0] = p[1]
969 |
970 | p[0].update(list(p)[-1])
971 |
972 | def p_expression_seq(self, p: List) -> None:
973 | """expr : seq_name
974 | | expr INCREMENT id
975 | | expr INCREMENT id id
976 | | expr START id
977 | | expr START id id
978 | | expr MINVALUE id
979 | | expr NO MINVALUE
980 | | expr NO MAXVALUE
981 | | expr MAXVALUE id
982 | | expr CACHE id
983 | | expr CACHE
984 | """
985 | # get schema & table name
986 | p_list = list(p)
987 | p[0] = p[1]
988 | value = None
989 | if len(p) == 4:
990 | if p[2] == "NO":
991 | value = {p_list[-1].lower(): False}
992 | else:
993 | value = {p[2].lower(): int(p_list[-1])}
994 | elif len(p) == 3:
995 | value = {p[2].lower(): True}
996 | elif len(p) == 5:
997 | value = {f"{p[2].lower()}_{p[3].lower()}": int(p_list[-1])}
998 | if value:
999 | p[0].update(value)
1000 |
1001 | def p_seq_name(self, p: List) -> None:
1002 | """seq_name : create_seq id DOT id
1003 | | create_seq id
1004 | """
1005 | # get schema & table name
1006 | p_list = list(p)
1007 | schema = None
1008 | if len(p) > 4:
1009 | if "." in p:
1010 | schema = p_list[-3]
1011 | seq_name = p_list[-1]
1012 | else:
1013 | seq_name = p_list[-1]
1014 | p[0] = {"schema": schema, "sequence_name": seq_name}
1015 |
1016 | def p_create_seq(self, p: List) -> None:
1017 | """create_seq : CREATE SEQUENCE IF NOT EXISTS
1018 | | CREATE SEQUENCE
1019 |
1020 | """
1021 | # get schema & table name
1022 |
1023 | self.add_if_not_exists(p[0], list(p))
1024 |
1025 | def p_tid(self, p: List) -> None:
1026 | """tid : LT id
1027 | | LT
1028 | | tid LT
1029 | | tid id
1030 | | tid COMMAT
1031 | | tid RT
1032 | """
1033 | if not isinstance(p[1], list):
1034 | p[0] = [p[1]]
1035 | else:
1036 | p[0] = p[1]
1037 |
1038 | for i in list(p)[2:]:
1039 | if not i == "[]" and not i == ",":
1040 | p[0][0] += f" {i}"
1041 | else:
1042 | p[0][0] += f"{i}"
1043 |
1044 | @staticmethod
1045 | def get_complex_type(p, p_list):
1046 | if len(p_list) == 4:
1047 | p[0]["type"] = f"{p[2]} {p[3][0]}"
1048 | elif p[0]["type"]:
1049 | if len(p[0]["type"]) == 1 and isinstance(p[0]["type"], list):
1050 | p[0]["type"] = p[0]["type"][0]
1051 | p[0]["type"] = f'{p[0]["type"]} {p_list[-1][0]}'
1052 | else:
1053 | p[0]["type"] = p_list[-1][0]
1054 | return p[0]
1055 |
1056 | def extract_references(self, table_data: Dict):
1057 | ref = {
1058 | "table": table_data["table_name"],
1059 | "columns": [None],
1060 | "schema": table_data["schema"],
1061 | "on_delete": None,
1062 | "on_update": None,
1063 | "deferrable_initially": None,
1064 | }
1065 |
1066 | if table_data.get("project"):
1067 | ref["project"] = table_data["project"]
1068 |
1069 | return ref
1070 |
1071 | def p_null(self, p: List) -> None:
1072 | """null : NULL
1073 | | NOT NULL
1074 | """
1075 | nullable = True
1076 | if "NULL" in p or "null" in p:
1077 | if "NOT" in p or "not" in p:
1078 | nullable = False
1079 | p[0] = {"nullable": nullable}
1080 |
1081 | def p_f_call(self, p: List) -> None:
1082 | """f_call : id LP RP
1083 | | id LP f_call RP
1084 | | id LP multi_id RP
1085 | | id LP pid RP
1086 | """
1087 | p_list = list(p)
1088 | if isinstance(p[1], list):
1089 | p[0] = p[1]
1090 | p[0].append(p_list[-1])
1091 | else:
1092 | value = ""
1093 | for elem in p_list[1:]:
1094 | if isinstance(elem, list):
1095 | elem = ",".join(elem)
1096 | value += elem
1097 | p[0] = value
1098 |
1099 | def p_multi_id(self, p: List) -> None:
1100 | """multi_id : id
1101 | | multi_id id
1102 | | f_call
1103 | | multi_id f_call
1104 | """
1105 | p_list = list(p)
1106 | if isinstance(p[1], list):
1107 | p[0] = p[1]
1108 | p[0].append(p_list[-1])
1109 | else:
1110 | value = " ".join(p_list[1:])
1111 | p[0] = value
1112 |
1113 | def p_funct_args(self, p: List) -> None:
1114 | """funct_args : LP multi_id RP"""
1115 | p[0] = {"args": f"({p[2]})"}
1116 |
1117 | def p_funct(self, p: List) -> None:
1118 | """funct : id LP multi_id RP"""
1119 | p[0] = {"func_name": p[1], "args": f"({p[3]})"}
1120 |
1121 | def p_multiple_funct(self, p: List) -> None:
1122 | """multiple_funct : funct
1123 | | multiple_funct COMMA funct
1124 | | multiple_funct COMMA
1125 | """
1126 | if not isinstance(p[1], list):
1127 | p[0] = [p[1]]
1128 | else:
1129 | p[0] = p[1]
1130 | p[0].append(p[-1])
1131 |
1132 | def p_funct_expr(self, p: List) -> None:
1133 | """funct_expr : LP multi_id RP
1134 | | multi_id
1135 | """
1136 | if len(p) > 2:
1137 | p[0] = p[2]
1138 | else:
1139 | p[0] = p[1]
1140 |
1141 | def p_dot_id(self, p: List) -> None:
1142 | """dot_id : id DOT id"""
1143 | p[0] = f"{p[1]}.{p[3]}"
1144 |
1145 | def p_default(self, p: List) -> None:
1146 | """default : DEFAULT id
1147 | | DEFAULT STRING
1148 | | DEFAULT NULL
1149 | | default FOR dot_id
1150 | | DEFAULT funct_expr
1151 | | DEFAULT LP pid RP
1152 | | DEFAULT LP funct_expr pid RP
1153 | | default id
1154 | | default LP RP
1155 | """
1156 | p_list = remove_par(list(p))
1157 |
1158 | default = self.pre_process_default(p_list)
1159 |
1160 | if isinstance(p_list[-1], list):
1161 | p_list[-1] = " ".join(p_list[-1])
1162 | default = " ".join(p_list[1:])
1163 | elif not isinstance(default, dict) and default.isnumeric():
1164 | default = int(default)
1165 |
1166 | if isinstance(p[1], dict):
1167 | p[0] = self.process_dict_default_value(p_list, default)
1168 | else:
1169 | p[0] = {"default": default}
1170 |
1171 | @staticmethod
1172 | def pre_process_default(p_list: List) -> Any:
1173 | if len(p_list) == 5 and isinstance(p_list[3], list):
1174 | default = p_list[3][0]
1175 | elif "DEFAULT" in p_list and len(p_list) == 4:
1176 | default = f"{p_list[2]} {p_list[3]}"
1177 | else:
1178 | default = p_list[2]
1179 | return default
1180 |
1181 | @staticmethod
1182 | def process_dict_default_value(p_list: List, default: Any) -> Dict:
1183 | data = p_list[1]
1184 | if "FOR" in default:
1185 | data["default"] = {"next_value_for": p_list[-1]}
1186 | else:
1187 | for i in p_list[2:]:
1188 | if isinstance(p_list[2], str):
1189 | p_list[2] = p_list[2].replace("\\'", "'")
1190 | if i == ")" or i == "(":
1191 | data["default"] = str(data["default"]) + f"{i}"
1192 | else:
1193 | data["default"] = str(data["default"]) + f" {i}"
1194 | data["default"] = data["default"].replace("))", ")")
1195 | return data
1196 |
1197 | def p_enforced(self, p: List) -> None:
1198 | """enforced : ENFORCED
1199 | | NOT ENFORCED
1200 | """
1201 | p_list = list(p)
1202 | p[0] = {"enforced": len(p_list) == 1}
1203 |
1204 | def p_collate(self, p: List) -> None:
1205 | """collate : COLLATE id
1206 | | COLLATE STRING
1207 | """
1208 | p_list = list(p)
1209 | p[0] = {"collate": p_list[-1]}
1210 |
1211 | def p_constraint(self, p: List) -> None:
1212 | """
1213 | constraint : CONSTRAINT id
1214 | """
1215 |
1216 | p_list = list(p)
1217 |
1218 | p[0] = {"constraint": {"name": p_list[-1]}}
1219 |
1220 | def p_generated(self, p: List) -> None:
1221 | """
1222 | generated : gen_always funct_expr
1223 | | gen_always funct_expr id
1224 | | gen_always LP multi_id RP
1225 | | gen_always f_call
1226 | """
1227 | p_list = list(p)
1228 | stored = False
1229 | if len(p) > 3 and p_list[-1].lower() == "stored":
1230 | stored = True
1231 | _as = p[2]
1232 | p[0] = {"generated": {"always": True, "as": _as, "stored": stored}}
1233 |
1234 | def p_gen_always(self, p: List) -> None:
1235 | """
1236 | gen_always : GENERATED id AS
1237 | """
1238 | p[0] = {"generated": {"always": True}}
1239 |
1240 | def p_check_st(self, p: List) -> None:
1241 | """check_st : CHECK LP id
1242 | | check_st id
1243 | | check_st STRING
1244 | | check_st id STRING
1245 | | check_st id RP
1246 | | check_st STRING RP
1247 | | check_st funct_args
1248 | | check_st LP pid RP
1249 | """
1250 | p_list = remove_par(list(p))
1251 | if isinstance(p[1], dict):
1252 | p[0] = p[1]
1253 | else:
1254 | p[0] = {"check": []}
1255 | for item in p_list[2:]:
1256 | if isinstance(p_list[-1], dict) and p_list[-1].get("args"):
1257 | p[0]["check"][-1] += p_list[-1]["args"]
1258 | elif isinstance(item, list):
1259 | p[0]["check"].append(f"({','.join(item)})")
1260 | else:
1261 | p[0]["check"].append(item)
1262 |
1263 | def p_using_tablespace(self, p: List) -> None:
1264 | """using_tablespace : USING INDEX tablespace"""
1265 | p_list = list(p)
1266 | p[0] = {"using": {"tablespace": p_list[-1], "index": True}}
1267 |
1268 | def p_expression_alter(self, p: List) -> None:
1269 | """expr : alter_foreign ref
1270 | | alter_check
1271 | | alter_unique
1272 | | alter_default
1273 | | alter_primary_key
1274 | | alter_primary_key using_tablespace
1275 | """
1276 | p[0] = p[1]
1277 | if len(p) == 3:
1278 | p[0].update(p[2])
1279 |
1280 | def p_alter_primary_key(self, p: List) -> None:
1281 | """alter_primary_key : alt_table PRIMARY KEY LP pid RP
1282 | | alt_table constraint PRIMARY KEY LP pid RP
1283 | """
1284 |
1285 | p_list = remove_par(list(p))
1286 | p[0] = p[1]
1287 | p[0]["primary_key"] = {"constraint_name": None, "columns": p_list[-1]}
1288 | if "constraint" in p[2]:
1289 | p[0]["primary_key"]["constraint_name"] = p[2]["constraint"]["name"]
1290 |
1291 | def p_alter_unique(self, p: List) -> None:
1292 | """alter_unique : alt_table UNIQUE LP pid RP
1293 | | alt_table constraint UNIQUE LP pid RP
1294 | """
1295 |
1296 | p_list = remove_par(list(p))
1297 | p[0] = p[1]
1298 | p[0]["unique"] = {"constraint_name": None, "columns": p_list[-1]}
1299 | if "constraint" in p[2]:
1300 | p[0]["unique"]["constraint_name"] = p[2]["constraint"]["name"]
1301 |
1302 | @staticmethod
1303 | def get_column_and_value_from_alter(p: List) -> Tuple:
1304 |
1305 | p_list = remove_par(list(p))
1306 |
1307 | column = None
1308 | value = None
1309 |
1310 | if isinstance(p_list[2], str) and "FOR" == p_list[2].upper():
1311 | column = p_list[-1]
1312 | elif p[0].get("default") and p[0]["default"].get("value"):
1313 | value = p[0]["default"]["value"] + " " + p_list[-1]
1314 | else:
1315 | value = p_list[-1]
1316 | return column, value
1317 |
1318 | def p_alter_default(self, p: List) -> None:
1319 | """alter_default : alt_table id id
1320 | | alt_table constraint id id
1321 | | alt_table id STRING
1322 | | alt_table constraint id STRING
1323 | | alter_default id
1324 | | alter_default FOR pid
1325 | """
1326 |
1327 | p[0] = p[1]
1328 | column, value = self.get_column_and_value_from_alter(p)
1329 |
1330 | if "default" not in p[0]:
1331 |
1332 | p[0]["default"] = {
1333 | "constraint_name": None,
1334 | "columns": column,
1335 | "value": value,
1336 | }
1337 | else:
1338 | p[0]["default"].update(
1339 | {
1340 | "columns": p[0]["default"].get("column") or column,
1341 | "value": value or p[0]["default"].get("value"),
1342 | }
1343 | )
1344 | if "constraint" in p[2]:
1345 | p[0]["default"]["constraint_name"] = p[2]["constraint"]["name"]
1346 |
1347 | def p_pid(self, p: List) -> None:
1348 | """pid : id
1349 | | STRING
1350 | | pid id
1351 | | pid STRING
1352 | | STRING LP RP
1353 | | id LP RP
1354 | | pid COMMA id
1355 | | pid COMMA STRING
1356 | """
1357 | p_list = list(p)
1358 |
1359 | if len(p_list) == 4 and isinstance(p[1], str):
1360 | p[0] = ["".join(p[1:])]
1361 | elif not isinstance(p_list[1], list):
1362 | p[0] = [p_list[1]]
1363 | else:
1364 | p[0] = p_list[1]
1365 | p[0].append(p_list[-1])
1366 |
1367 | def p_alter_check(self, p: List) -> None:
1368 | """alter_check : alt_table check_st
1369 | | alt_table constraint check_st
1370 | """
1371 | p_list = remove_par(list(p))
1372 | p[0] = p[1]
1373 | if isinstance(p[1], dict):
1374 | p[0] = p[1]
1375 | if not p[0].get("check"):
1376 | p[0]["check"] = {"constraint_name": None, "statement": []}
1377 | if isinstance(p[2], dict) and "constraint" in p[2]:
1378 | p[0]["check"]["constraint_name"] = p[2]["constraint"]["name"]
1379 | p[0]["check"]["statement"] = p_list[-1]["check"]
1380 |
1381 | def p_index_pid(self, p: List) -> None:
1382 | """index_pid : id
1383 | | index_pid id
1384 | | index_pid COMMA index_pid
1385 | """
1386 | p_list = list(p)
1387 | if len(p_list) == 2:
1388 | detailed_column = {"name": p_list[1], "order": "ASC", "nulls": "LAST"}
1389 | column = p_list[1]
1390 | p[0] = {"detailed_columns": [detailed_column], "columns": [column]}
1391 | else:
1392 | p[0] = p[1]
1393 | if len(p) == 3:
1394 | if p_list[-1] in ["DESC", "ASC"]:
1395 | p[0]["detailed_columns"][0]["order"] = p_list[-1]
1396 | else:
1397 | p[0]["detailed_columns"][0]["nulls"] = p_list[-1]
1398 |
1399 | column = p_list[2]
1400 | elif isinstance(p_list[-1], dict):
1401 | for i in p_list[-1]["columns"]:
1402 | p[0]["columns"].append(i)
1403 | for i in p_list[-1]["detailed_columns"]:
1404 | p[0]["detailed_columns"].append(i)
1405 |
1406 | def p_alter_foreign(self, p: List) -> None:
1407 | """alter_foreign : alt_table foreign
1408 | | alt_table constraint foreign
1409 | """
1410 |
1411 | p_list = list(p)
1412 |
1413 | p[0] = p[1]
1414 | if isinstance(p_list[-1], list):
1415 | p[0]["columns"] = [{"name": i} for i in p_list[-1]]
1416 | else:
1417 | column = p_list[-1]
1418 |
1419 | if not p[0].get("columns"):
1420 | p[0]["columns"] = []
1421 | p[0]["columns"].append(column)
1422 |
1423 | for column in p[0]["columns"]:
1424 | if isinstance(p_list[2], dict) and "constraint" in p_list[2]:
1425 | column.update({"constraint_name": p_list[2]["constraint"]["name"]})
1426 |
1427 | def p_alt_table_name(self, p: List) -> None:
1428 | """alt_table : ALTER TABLE t_name ADD
1429 | | ALTER TABLE IF EXISTS t_name ADD
1430 | | ALTER TABLE ID t_name ADD"""
1431 | p_list = list(p)
1432 | table_data = p_list[-2]
1433 | p[0] = {
1434 | "alter_table_name": table_data["table_name"],
1435 | "schema": table_data["schema"],
1436 | }
1437 | if "IF" in p_list:
1438 | p[0]["if_exists"] = True
1439 | if len(p_list) == 6:
1440 | p[0]["only"] = True
1441 | if table_data.get("project"):
1442 | p[0]["project"] = table_data["project"]
1443 |
1444 | def p_foreign(self, p):
1445 | # todo: need to redone id lists
1446 | """foreign : FOREIGN KEY LP pid RP
1447 | | FOREIGN KEY"""
1448 | p_list = remove_par(list(p))
1449 | if len(p_list) == 4:
1450 | columns = p_list[-1]
1451 | p[0] = columns
1452 |
1453 | def p_ref(self, p: List) -> None:
1454 | """ref : REFERENCES t_name
1455 | | ref LP pid RP
1456 | | ref ON DELETE id
1457 | | ref ON UPDATE id
1458 | | ref DEFERRABLE INITIALLY id
1459 | | ref NOT DEFERRABLE
1460 | """
1461 | p_list = remove_par(list(p))
1462 | if isinstance(p[1], dict):
1463 | p[0] = p[1]
1464 | if "ON" not in p_list and "DEFERRABLE" not in p_list:
1465 | p[0]["references"]["columns"] = p_list[-1]
1466 | else:
1467 | p[0]["references"]["columns"] = p[0]["references"].get(
1468 | "columns", [None]
1469 | )
1470 | else:
1471 | data = {"references": self.extract_references(p_list[-1])}
1472 | p[0] = data
1473 | p[0] = self.process_references_with_properties(p[0], p_list)
1474 |
1475 | @staticmethod
1476 | def process_references_with_properties(data: Dict, p_list: List) -> Dict:
1477 | if "ON" in p_list:
1478 | if "DELETE" in p_list:
1479 | data["references"]["on_delete"] = p_list[-1]
1480 | elif "UPDATE" in p_list:
1481 | data["references"]["on_update"] = p_list[-1]
1482 | elif "DEFERRABLE" in p_list:
1483 | if "NOT" not in p_list:
1484 | data["references"]["deferrable_initially"] = p_list[-1]
1485 | else:
1486 | data["references"]["deferrable_initially"] = "NOT"
1487 | return data
1488 |
1489 | def p_expression_primary_key(self, p):
1490 | "expr : pkey"
1491 | p[0] = p[1]
1492 |
1493 | def p_uniq(self, p: List) -> None:
1494 | """uniq : UNIQUE LP pid RP"""
1495 | p_list = remove_par(list(p))
1496 | p[0] = {"unique_statement": p_list[-1]}
1497 |
1498 | def p_statem_by_id(self, p: List) -> None:
1499 | """statem_by_id : id LP pid RP
1500 | | id KEY LP pid RP
1501 | """
1502 | p_list = remove_par(list(p))
1503 | if p[1].upper() == "UNIQUE":
1504 | p[0] = {"unique_statement": p_list[-1]}
1505 | elif p[1].upper() == "CHECK":
1506 | p[0] = {"check": p_list[-1]}
1507 | elif p[1].upper() == "PRIMARY":
1508 | p[0] = {"primary_key": p_list[-1]}
1509 |
1510 | def p_pkey(self, p: List) -> None:
1511 | """pkey : pkey_statement LP pid RP
1512 | | pkey_statement ID LP pid RP
1513 | """
1514 | p_list = remove_par(list(p))
1515 |
1516 | columns = []
1517 |
1518 | p[0] = {}
1519 |
1520 | if isinstance(p_list[2], str) and "CLUSTERED" == p_list[2]:
1521 | order = None
1522 | column = None
1523 | for item in p_list[-1]:
1524 | if item not in ["ASC", "DESC"]:
1525 | column = item
1526 | else:
1527 | order = item
1528 | if column and order:
1529 | columns.append({"column": column, "order": order})
1530 | column = None
1531 | order = None
1532 | p[0]["clustered_primary_key"] = columns
1533 |
1534 | p[0] = self.process_order_in_pk(p[0], p_list)
1535 |
1536 | @staticmethod
1537 | def process_order_in_pk(data: Dict, p_list: List) -> Dict:
1538 | columns = []
1539 | for item in p_list[-1]:
1540 | if item not in ["ASC", "DESC"]:
1541 | columns.append(item)
1542 | data["primary_key"] = columns
1543 | return data
1544 |
1545 | def p_pkey_statement(self, p: List) -> None:
1546 | """pkey_statement : PRIMARY KEY"""
1547 | p[0] = {"primary_key": None}
1548 |
1549 | def p_comment(self, p: List) -> None:
1550 | """comment : COMMENT STRING"""
1551 | p_list = remove_par(list(p))
1552 | p[0] = {"comment": check_spec(p_list[-1])}
1553 |
1554 | def p_tablespace(self, p: List) -> None:
1555 | """tablespace : TABLESPACE id
1556 | | TABLESPACE id properties
1557 | """
1558 | # Initial 5m Next 5m Maxextents Unlimited
1559 | p[0] = self.get_tablespace_data(list(p))
1560 |
1561 | def p_expr_tablespace(self, p: List) -> None:
1562 | """expr : expr tablespace"""
1563 | p_list = list(p)
1564 | p[0] = p[1]
1565 | p[0]["tablespace"] = p_list[-1]
1566 |
--------------------------------------------------------------------------------
/sondesh/output/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/output/__init__.py
--------------------------------------------------------------------------------
/sondesh/output/common.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import os
4 | from copy import deepcopy
5 | from typing import Dict, List, Tuple
6 |
7 | from sondesh.output import dialects as d
8 |
9 | output_modes = [
10 | "mssql",
11 | "mysql",
12 | "oracle",
13 | "hql",
14 | "sql",
15 | "snowflake",
16 | "redshift",
17 | "bigquery",
18 | ]
19 |
20 |
21 | logger = logging.getLogger('sondesh')
22 |
23 |
24 | def get_table_from_tables_data(tables_dict: Dict, table_id: Tuple[str, str]) -> Dict:
25 | """get table by name and schema or rise exception"""
26 | target_table = tables_dict.get(table_id)
27 | if target_table is None:
28 |
29 | raise ValueError(
30 | f"Found ALTER statement to not existed TABLE {table_id[0]} with SCHEMA {table_id[1]}"
31 | )
32 | return target_table
33 |
34 |
35 | def add_index_to_table(tables_dict: Dict, statement: Dict, output_mode: str) -> Dict:
36 | """populate 'index' key in output data"""
37 | table_id = (statement["table_name"], statement["schema"])
38 | target_table = get_table_from_tables_data(tables_dict, table_id)
39 |
40 | del statement["schema"]
41 | del statement["table_name"]
42 |
43 | if output_mode != "mssql":
44 | del statement["clustered"]
45 |
46 | target_table["index"].append(statement)
47 |
48 | return tables_dict
49 |
50 |
51 | def create_alter_column(index: int, column: Dict, ref_statement: Dict) -> Dict:
52 | """create alter column metadata"""
53 | column_reference = ref_statement["columns"][index]
54 | alter_column = {
55 | "name": column["name"],
56 | "constraint_name": column.get("constraint_name"),
57 | }
58 | alter_column["references"] = deepcopy(ref_statement)
59 | alter_column["references"]["column"] = column_reference
60 | del alter_column["references"]["columns"]
61 | return alter_column
62 |
63 |
64 | def prepare_alter_columns(target_table: Dict, statement: Dict) -> Dict:
65 | """prepare alters column metadata"""
66 | alter_columns = []
67 | for num, column in enumerate(statement["columns"]):
68 | alter_columns.append(create_alter_column(num, column, statement["references"]))
69 | if not target_table["alter"].get("columns"):
70 | target_table["alter"]["columns"] = alter_columns
71 | else:
72 | target_table["alter"]["columns"].extend(alter_columns)
73 | return target_table
74 |
75 |
76 | def add_alter_to_table(tables_dict: Dict, statement: Dict) -> Dict:
77 | """add 'alter' statement to the table"""
78 | table_id = (statement["alter_table_name"], statement["schema"])
79 |
80 | target_table = get_table_from_tables_data(tables_dict, table_id)
81 |
82 | if "columns" in statement:
83 | prepare_alter_columns(target_table, statement)
84 | elif "check" in statement:
85 | if not target_table["alter"].get("checks"):
86 | target_table["alter"]["checks"] = []
87 | statement["check"]["statement"] = " ".join(statement["check"]["statement"])
88 | target_table["alter"]["checks"].append(statement["check"])
89 | elif "unique" in statement:
90 | target_table = set_alter_to_table_data("unique", statement, target_table)
91 | target_table = set_unique_columns_from_alter(statement, target_table)
92 | elif "default" in statement:
93 | target_table = set_alter_to_table_data("default", statement, target_table)
94 | target_table = set_default_columns_from_alter(statement, target_table)
95 | elif "primary_key" in statement:
96 | target_table = set_alter_to_table_data("primary_key", statement, target_table)
97 | return tables_dict
98 |
99 |
100 | def set_default_columns_from_alter(statement: Dict, target_table: Dict) -> Dict:
101 | for column in target_table["columns"]:
102 | if statement["default"]["columns"]:
103 | for column_name in statement["default"]["columns"]:
104 | if column["name"] == column_name:
105 | column["default"] = statement["default"]["value"]
106 | return target_table
107 |
108 |
109 | def set_unique_columns_from_alter(statement: Dict, target_table: Dict) -> Dict:
110 | for column in target_table["columns"]:
111 | for column_name in statement["unique"]["columns"]:
112 | if column["name"] == column_name:
113 | column["unique"] = True
114 | return target_table
115 |
116 |
117 | def set_alter_to_table_data(key: str, statement: Dict, target_table: Dict) -> Dict:
118 | if not target_table["alter"].get(key + "s"):
119 | target_table["alter"][key + "s"] = []
120 | if "using" in statement:
121 | statement[key]["using"] = statement["using"]
122 | target_table["alter"][key + "s"].append(statement[key])
123 | return target_table
124 |
125 |
126 | def init_table_data() -> Dict:
127 | return {
128 | "columns": [],
129 | "primary_key": None,
130 | "alter": {},
131 | "checks": [],
132 | "index": [],
133 | "partitioned_by": [],
134 | "tablespace": None,
135 | }
136 |
137 |
138 | def process_alter_and_index_result(
139 | tables_dict: Dict, table: Dict, output_mode: str
140 | ) -> Dict:
141 | if table.get("index_name"):
142 | tables_dict = add_index_to_table(tables_dict, table, output_mode)
143 |
144 | elif table.get("alter_table_name"):
145 | tables_dict = add_alter_to_table(tables_dict, table)
146 |
147 | return tables_dict
148 |
149 |
150 | def process_entities(tables_dict: Dict, table: Dict, output_mode: str) -> Dict:
151 | """process tables, types, sequence and etc. data"""
152 | is_it_table = True
153 |
154 | if table.get("table_name"):
155 | table_data = init_table_data()
156 | table_data = d.populate_dialects_table_data(output_mode, table_data)
157 | table_data.update(table)
158 | table_data = set_unique_columns(table_data)
159 | else:
160 | table_data = table
161 | is_it_table = False
162 |
163 | if is_it_table:
164 | table_data = process_is_it_table_item(table_data, tables_dict)
165 |
166 | table_data = normalize_ref_columns_in_final_output(table_data)
167 |
168 | d.dialects_clean_up(output_mode, table_data)
169 | return table_data
170 |
171 |
172 | def result_format(
173 | result: List[Dict], output_mode: str, group_by_type: bool
174 | ) -> List[Dict]:
175 | """method to format final output after parser"""
176 | final_result = []
177 | tables_dict = {}
178 | for table in result:
179 | # process each item in parser output
180 | if "index_name" in table or "alter_table_name" in table:
181 | tables_dict = process_alter_and_index_result(
182 | tables_dict, table, output_mode
183 | )
184 | else:
185 | # process tables, types, sequence and etc. data
186 | table_data = process_entities(tables_dict, table, output_mode)
187 | final_result.append(table_data)
188 | if group_by_type:
189 | final_result = group_by_type_result(final_result)
190 | return final_result
191 |
192 |
193 | def process_is_it_table_item(table_data: Dict, tables_dict: Dict) -> Dict:
194 | if table_data.get("table_name"):
195 | tables_dict[(table_data["table_name"], table_data["schema"])] = table_data
196 | else:
197 | logger.error(
198 | "\n Something goes wrong. Possible you try to parse unsupported statement \n "
199 | )
200 | if not table_data.get("primary_key"):
201 | table_data = check_pk_in_columns_and_constraints(table_data)
202 | else:
203 | table_data = remove_pk_from_columns(table_data)
204 |
205 | if table_data.get("unique"):
206 | table_data = add_unique_columns(table_data)
207 |
208 | for column in table_data["columns"]:
209 | if column["name"] in table_data["primary_key"]:
210 | column["nullable"] = False
211 | return table_data
212 |
213 |
214 | def normalize_ref_columns_in_final_output(table_data: Dict) -> Dict:
215 | # todo: this is hack, need to remove it
216 | if "references" in table_data:
217 | del table_data["references"]
218 | if "ref_columns" in table_data:
219 | for col_ref in table_data["ref_columns"]:
220 | name = col_ref["name"]
221 | for column in table_data["columns"]:
222 | if name == column["name"]:
223 | del col_ref["name"]
224 | column["references"] = col_ref
225 | del table_data["ref_columns"]
226 | return table_data
227 |
228 |
229 | def set_column_unique_param(table_data: Dict, key: str) -> Dict:
230 | for column in table_data["columns"]:
231 | if key == "constraints":
232 | unique = table_data[key].get("unique", [])
233 | if unique:
234 | check_in = unique["columns"]
235 | else:
236 | check_in = []
237 | else:
238 | check_in = table_data[key]
239 | if column["name"] in check_in:
240 | column["unique"] = True
241 | return table_data
242 |
243 |
244 | def set_unique_columns(table_data: Dict) -> Dict:
245 |
246 | unique_keys = ["unique_statement", "constraints"]
247 |
248 | for key in unique_keys:
249 | if table_data.get(key, None):
250 | # get column names from unique constraints & statements
251 | table_data = set_column_unique_param(table_data, key)
252 | if "unique_statement" in table_data:
253 | del table_data["unique_statement"]
254 | return table_data
255 |
256 |
257 | def group_by_type_result(final_result: List[Dict]) -> Dict[str, List]:
258 | result_as_dict = {
259 | "tables": [],
260 | "types": [],
261 | "sequences": [],
262 | "domains": [],
263 | "schemas": [],
264 | "ddl_properties": [],
265 | "comments": [],
266 | }
267 | keys_map = {
268 | "table_name": "tables",
269 | "sequence_name": "sequences",
270 | "type_name": "types",
271 | "domain_name": "domains",
272 | "schema_name": "schemas",
273 | "tablespace_name": "tablespaces",
274 | "database_name": "databases",
275 | "value": "ddl_properties",
276 | "comments": "comments",
277 | }
278 | for item in final_result:
279 | for key in keys_map:
280 | if key in item:
281 | _type = result_as_dict.get(keys_map.get(key))
282 | if _type is None:
283 | result_as_dict[keys_map.get(key)] = []
284 | _type = result_as_dict[keys_map.get(key)]
285 | if key != "comments":
286 | _type.append(item)
287 | else:
288 | _type.extend(item["comments"])
289 | break
290 | if result_as_dict["comments"] == []:
291 | del result_as_dict["comments"]
292 | return result_as_dict
293 |
294 |
295 | def add_unique_columns(table_data: Dict) -> Dict:
296 | for column in table_data["columns"]:
297 | if column["name"] in table_data["unique"]:
298 | column["unique"] = True
299 | del table_data["unique"]
300 | return table_data
301 |
302 |
303 | def remove_pk_from_columns(table_data: Dict) -> Dict:
304 | for column in table_data["columns"]:
305 | del column["primary_key"]
306 | return table_data
307 |
308 |
309 | def check_pk_in_columns_and_constraints(table_data: Dict) -> Dict:
310 | pk = []
311 | for column in table_data["columns"]:
312 | if column["primary_key"]:
313 | pk.append(column["name"])
314 | del column["primary_key"]
315 | if table_data.get("constraints") and table_data["constraints"].get("primary_keys"):
316 | for key_constraints in table_data["constraints"]["primary_keys"]:
317 | pk.extend(key_constraints["columns"])
318 | table_data["primary_key"] = pk
319 | return table_data
320 |
321 |
322 | def dump_data_to_file(table_name: str, dump_path: str, data: List[Dict]) -> None:
323 | """method to dump json schema"""
324 | if not os.path.isdir(dump_path):
325 | os.makedirs(dump_path, exist_ok=True)
326 | with open("{}/{}_schema.json".format(dump_path, table_name), "w+") as schema_file:
327 | json.dump(data, schema_file, indent=1)
328 |
--------------------------------------------------------------------------------
/sondesh/output/dialects.py:
--------------------------------------------------------------------------------
1 | from typing import Dict, List
2 |
3 | hql_clean_up_list = ["deferrable_initially"]
4 |
5 |
6 | sql_clean_up_list = [
7 | "external",
8 | "external",
9 | "stored_as",
10 | "row_format",
11 | "lines_terminated_by",
12 | "fields_terminated_by",
13 | "collection_items_terminated_by",
14 | "map_keys_terminated_by",
15 | ]
16 |
17 |
18 | def add_additional_hql_keys(table_data: Dict) -> Dict:
19 | table_data.update(
20 | {
21 | "stored_as": None,
22 | "location": None,
23 | "comment": None,
24 | "row_format": None,
25 | "fields_terminated_by": None,
26 | "lines_terminated_by": None,
27 | "fields_terminated_by": None,
28 | "map_keys_terminated_by": None,
29 | "collection_items_terminated_by": None,
30 | "external": table_data.get("external", False),
31 | }
32 | )
33 | return table_data
34 |
35 |
36 | def add_additional_oracle_keys(table_data: Dict) -> Dict:
37 | table_data.update(
38 | {
39 | "constraints": {"uniques": None, "checks": None, "references": None},
40 | "storage": None,
41 | }
42 | )
43 | return table_data
44 |
45 |
46 | def update_bigquery_output(table_data: Dict) -> Dict:
47 | if table_data.get("schema"):
48 | table_data["dataset"] = table_data["schema"]
49 | del table_data["schema"]
50 | return table_data
51 |
52 |
53 | def add_additional_redshift_keys(table_data: Dict) -> Dict:
54 | table_data.update(
55 | {
56 | "diststyle": None,
57 | "distkey": None,
58 | "sortkey": {"type": None, "keys": []},
59 | "encode": None,
60 | "temp": False,
61 | }
62 | )
63 | return table_data
64 |
65 |
66 | def add_additional_snowflake_keys(table_data: Dict) -> Dict:
67 | table_data.update({"clone": None, "primary_key_enforced": None})
68 | return table_data
69 |
70 |
71 | def add_additional_oracle_keys_in_column(column_data: Dict) -> Dict:
72 | column_data.update({"encrypt": None})
73 | return column_data
74 |
75 |
76 | def add_additional_snowflake_keys_in_column(column_data: Dict) -> Dict:
77 | return column_data
78 |
79 |
80 | def add_additional_redshift_keys_in_column(column_data: Dict, table_data: Dict) -> Dict:
81 | column_data["encode"] = column_data.get("encode", None)
82 | if column_data.get("distkey"):
83 | table_data["distkey"] = column_data["name"]
84 | del column_data["distkey"]
85 | return column_data, table_data
86 |
87 |
88 | def add_additional_mssql_keys(table_data: Dict) -> Dict:
89 | table_data.update(
90 | {
91 | "constraints": {"uniques": None, "checks": None, "references": None},
92 | }
93 | )
94 | return table_data
95 |
96 |
97 | def clean_up_output(table_data: Dict, key_list: List[str]) -> Dict:
98 | for key in key_list:
99 | if key in table_data:
100 | del table_data[key]
101 | return table_data
102 |
103 |
104 | def populate_dialects_table_data(output_mode: str, table_data: Dict) -> Dict:
105 |
106 | mehtod_mapper = {
107 | "hql": add_additional_hql_keys,
108 | "mssql": add_additional_mssql_keys,
109 | "mysql": add_additional_mssql_keys,
110 | "oracle": add_additional_oracle_keys,
111 | "redshift": add_additional_redshift_keys,
112 | "snowflake": add_additional_snowflake_keys,
113 | }
114 |
115 | method = mehtod_mapper.get(output_mode)
116 |
117 | if method:
118 | table_data = method(table_data)
119 |
120 | return table_data
121 |
122 |
123 | def key_cleaning(table_data: Dict, output_mode: str) -> Dict:
124 | if output_mode != "hql":
125 | table_data = clean_up_output(table_data, sql_clean_up_list)
126 | else:
127 | table_data = clean_up_output(table_data, hql_clean_up_list)
128 | # todo: need to figure out how workaround it normally
129 | if "_ddl_parser_comma_only_str" == table_data.get("fields_terminated_by"):
130 | table_data["fields_terminated_by"] = "','"
131 | return table_data
132 |
133 |
134 | def process_redshift_dialect(table_data: List[Dict]) -> List[Dict]:
135 | for column in table_data.get("columns", []):
136 | column, table_data = add_additional_redshift_keys_in_column(column, table_data)
137 | if table_data.get("encode"):
138 | column["encode"] = column["encode"] or table_data.get("encode")
139 | return table_data
140 |
141 |
142 | def dialects_clean_up(output_mode: str, table_data: Dict) -> Dict:
143 | key_cleaning(table_data, output_mode)
144 | update_mappers_for_table_properties = {"bigquery": update_bigquery_output}
145 | update_table_prop = update_mappers_for_table_properties.get(output_mode)
146 | if update_table_prop:
147 | table_data = update_table_prop(table_data)
148 |
149 | if output_mode == "oracle":
150 | for column in table_data.get("columns", []):
151 | column = add_additional_oracle_keys_in_column(column)
152 | elif output_mode == "snowflake":
153 | # can be no columns if it is a create database or create schema
154 | for column in table_data.get("columns", []):
155 | column = add_additional_snowflake_keys_in_column(column)
156 |
157 | elif output_mode == "redshift":
158 | table_data = process_redshift_dialect(table_data)
159 | return table_data
160 |
--------------------------------------------------------------------------------
/sondesh/parser.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import os
4 | import re
5 | from typing import Dict, List, Optional, Tuple, Union
6 |
7 | from ply import lex, yacc
8 |
9 | from sondesh.output.common import dump_data_to_file, result_format
10 | from sondesh.utils import find_first_unpair_closed_par
11 |
12 | # open comment
13 | OP_COM = "/*"
14 | # close comment
15 | CL_COM = "*/"
16 |
17 | IN_COM = "--"
18 | MYSQL_COM = "#"
19 |
20 |
21 | def set_logging_config(
22 | log_level: Union[str, int],
23 | log_file: Optional[str] = None) -> None:
24 |
25 | if log_file:
26 | logging.basicConfig(
27 | level=log_level,
28 | filename=log_file,
29 | filemode="w",
30 | format="%(filename)10s:%(lineno)4d:%(message)s",
31 | )
32 | else:
33 | logging.basicConfig(
34 | level=log_level,
35 | format="%(filename)10s:%(lineno)4d:%(message)s",
36 | )
37 |
38 |
39 | class Parser:
40 | """
41 | Base class for a lexer/parser that has the rules defined as methods
42 |
43 | It could not be loaded or called without Subclass,
44 |
45 | for example: DDLParser
46 |
47 | Subclass must include tokens for parser and rules
48 |
49 | This class contains logic for lines pre-processing before passing them to lexx&yacc parser:
50 |
51 | - clean up
52 | - catch comments
53 | - catch statements like 'SET' (they are not parsed by parser)
54 | - etc
55 | """
56 |
57 | def __init__(
58 | self,
59 | content: str,
60 | silent: bool = True,
61 | debug: bool = False,
62 | normalize_names: bool = False,
63 | log_file: Optional[str] = None,
64 | log_level: Union[str, int] = logging.DEBUG,
65 | ) -> None:
66 | """
67 | content: is a file content for processing
68 | silent: if true - will not raise errors, just return empty output
69 | debug: if True - parser will produce huge tokens tree & parser.out file, normally you don't want this enable
70 | normalize_names: if flag is True (default 'False') then all identifiers will be returned without
71 | '[', '"' and other delimeters that used in different SQL dialects to separate custom names
72 | from reserverd words & statements.
73 | For example, if flag set 'True' and you pass this input:
74 |
75 | CREATE TABLE [dbo].[TO_Requests](
76 | [Request_ID] [int] IDENTITY(1,1) NOT NULL,
77 | [user_id] [int]
78 |
79 | In output you will have names like 'dbo' and 'TO_Requests', not '[dbo]' and '[TO_Requests]'.
80 | log_file: path to file for logging
81 | log_level: set logging level for parser
82 | """
83 | self.tables = []
84 | self.silent = not debug if debug else silent
85 | self.data = content.encode("unicode_escape")
86 | self.paren_count = 0
87 | self.normalize_names = normalize_names
88 | set_logging_config(log_level, log_file)
89 | log = logging.getLogger()
90 | self.lexer = lex.lex(object=self, debug=False, debuglog=log)
91 | self.yacc = yacc.yacc(module=self, debug=False, debuglog=log)
92 | self.columns_closed = False
93 | self.statement = None
94 | self.block_comments = []
95 | self.comments = []
96 |
97 | def catch_comment_or_process_line(self, code_line: str) -> str:
98 | if self.multi_line_comment:
99 | self.comments.append(self.line)
100 | if CL_COM in self.line:
101 | self.multi_line_comment = False
102 | return ''
103 |
104 | elif not (
105 | self.line.strip().startswith(MYSQL_COM)
106 | or self.line.strip().startswith(IN_COM)
107 | ):
108 | return self.process_inline_comments(code_line)
109 | return code_line
110 |
111 | def pre_process_line(self) -> Tuple[str, List]:
112 | code_line = ""
113 | comma_only_str = r"((\')|(' ))+(,)((\')|( '))+\B"
114 | self.line = re.sub(comma_only_str, "_ddl_parser_comma_only_str", self.line)
115 | code_line = self.catch_comment_or_process_line(code_line)
116 | if self.line.startswith(OP_COM) and CL_COM not in self.line:
117 | self.multi_line_comment = True
118 | elif self.line.startswith(CL_COM):
119 | self.multi_line_comment = False
120 | self.line = code_line
121 |
122 | def process_in_comment(self, line: str) -> str:
123 | if re.search(r"((\")|(\'))+(.)*(--)+(.)*((\")|(\'))+", line):
124 | code_line = line
125 | else:
126 | splitted_line = line.split(IN_COM)
127 | code_line = splitted_line[0]
128 | self.comments.append(splitted_line[1])
129 | return code_line
130 |
131 | def process_line_before_comment(self) -> str:
132 | """ get useful codeline - remove comment """
133 | code_line = ""
134 | if IN_COM in self.line:
135 | code_line = self.process_in_comment(self.line)
136 | elif CL_COM not in self.line and OP_COM not in self.line:
137 | code_line = self.line
138 | return code_line
139 |
140 | def process_inline_comments(self, code_line: str) -> Tuple[str, List]:
141 | """ this method сatches comments like "create table ( # some comment" - inline this statement"""
142 | comment = None
143 | code_line = self.process_line_before_comment()
144 | if OP_COM in self.line:
145 | splitted_line = self.line.split(OP_COM)
146 | code_line += splitted_line[0]
147 | comment = splitted_line[1]
148 | self.block_comments.append(OP_COM)
149 | if CL_COM in code_line and self.block_comments:
150 | splitted_line = self.line.split(CL_COM)
151 | self.block_comments.pop(-1)
152 | code_line += splitted_line[1]
153 | comment = splitted_line[0]
154 |
155 | if comment:
156 | self.comments.append(comment)
157 | return code_line
158 |
159 | def process_regex_input(self, data):
160 | regex = data.split('"input.regex"')[1].split("=")[1]
161 | index = find_first_unpair_closed_par(regex)
162 | regex = regex[:index]
163 | data = data.replace(regex, " lexer_state_regex ")
164 | data = data.replace('"input.regex"', "parse_m_input_regex")
165 | self.lexer.state = {"lexer_state_regex": regex}
166 | return data
167 |
168 | def pre_process_data(self, data):
169 | data = data.decode("utf-8")
170 | # todo: not sure how to workaround ',' normal way
171 | if "input.regex" in data:
172 | data = self.process_regex_input(data)
173 |
174 | data = (
175 | data.replace(",", " , ")
176 | .replace("(", " ( ")
177 | .replace(")", " ) ")
178 | .replace("\\x", "\\0")
179 | .replace("‘", "'")
180 | .replace("’", "'")
181 | .replace("\\u2018", "'")
182 | .replace("\\u2019", "'")
183 | .replace("'\\t'", "'pars_m_t'")
184 | .replace("'\\n'", "'pars_m_n'")
185 | .replace("\\'", "pars_m_single")
186 | .replace("\\t", " ")
187 | )
188 | return data
189 |
190 | def process_set(self) -> None:
191 | self.set_line = self.set_line.split()
192 | if self.set_line[-2] == "=":
193 | name = self.set_line[1]
194 | else:
195 | name = self.set_line[-2]
196 | value = self.set_line[-1].replace(";", "")
197 | self.tables.append({"name": name, "value": value})
198 |
199 | def parse_set_statement(self):
200 | if re.match(r"SET ", self.line.upper()):
201 | self.set_was_in_line = True
202 | if not self.set_line:
203 | self.set_line = self.line
204 | else:
205 | self.process_set()
206 | self.set_line = self.line
207 | elif (self.set_line and len(self.set_line.split()) == 3) or (
208 | self.set_line and self.set_was_in_line
209 | ):
210 | self.process_set()
211 | self.set_line = None
212 | self.set_was_in_line = False
213 |
214 | def check_new_statement_start(self, line: str) -> bool:
215 | self.new_statement = False
216 | if self.statement and self.statement.count("(") == self.statement.count(")"):
217 | new_statements_tokens = ["ALTER ", "CREATE ", "DROP ", "SET "]
218 | for key in new_statements_tokens:
219 | if line.upper().startswith(key):
220 | self.new_statement = True
221 | return self.new_statement
222 |
223 | def check_line_on_skip_words(self) -> bool:
224 | skip_regex = r"^(GO|USE|INSERT)\b"
225 |
226 | self.skip = False
227 |
228 | if re.match(skip_regex, self.line.upper()):
229 | self.skip = True
230 | return self.skip
231 |
232 | def add_line_to_statement(self) -> str:
233 |
234 | if (
235 | self.line
236 | and not self.skip
237 | and not self.set_was_in_line
238 | and not self.new_statement
239 | ):
240 | if self.statement is None:
241 | self.statement = self.line
242 | else:
243 | self.statement += f" {self.line}"
244 |
245 | def parse_data(self) -> List[Dict]:
246 | self.tables: List[Dict] = []
247 | data = self.pre_process_data(self.data)
248 | lines = data.replace("\\t", "").split("\\n")
249 |
250 | self.set_line: Optional[str] = None
251 |
252 | self.set_was_in_line: bool = False
253 |
254 | self.multi_line_comment = False
255 |
256 | for num, self.line in enumerate(lines):
257 | self.process_line(num != len(lines) - 1)
258 | if self.comments:
259 | self.tables.append({"comments": self.comments})
260 | return self.tables
261 |
262 | def process_line(
263 | self,
264 | last_line: bool,
265 | ) -> Tuple[Optional[str], bool]:
266 | self.pre_process_line()
267 |
268 | self.line = self.line.strip().replace("\n", "").replace("\t", "")
269 | self.skip = self.check_line_on_skip_words()
270 |
271 | self.parse_set_statement()
272 | # to avoid issues when comma or parath are glued to column name
273 | self.check_new_statement_start(self.line)
274 |
275 | final_line = self.line.endswith(";") and not self.set_was_in_line
276 | self.add_line_to_statement()
277 |
278 | if (final_line or self.new_statement) and self.statement:
279 | # end of sql operation, remove ; from end of line
280 | self.statement = self.statement[:-1]
281 | elif last_line and not self.skip:
282 | # continue combine lines in one massive
283 | return
284 |
285 | self.set_default_flags_in_lexer()
286 |
287 | self.process_statement()
288 |
289 | def process_statement(self) -> None:
290 |
291 | if not self.set_line and self.statement:
292 | self.parse_statement()
293 | if self.new_statement:
294 | self.statement = self.line
295 | else:
296 | self.statement = None
297 |
298 | def parse_statement(self) -> None:
299 |
300 | _parse_result = yacc.parse(self.statement)
301 | if _parse_result:
302 | self.tables.append(_parse_result)
303 |
304 | def set_default_flags_in_lexer(self) -> None:
305 | attrs = [
306 | "is_table",
307 | "sequence",
308 | "last_token",
309 | "columns_def",
310 | "after_columns",
311 | "check",
312 | "is_table",
313 | "last_par",
314 | "lp_open",
315 | "is_alter",
316 | "is_like",
317 | ]
318 | for attr in attrs:
319 | setattr(self.lexer, attr, False)
320 | self.lexer.lt_open = 0
321 |
322 | def run(
323 | self,
324 | *,
325 | dump: bool = False,
326 | dump_path="schemas",
327 | file_path: Optional[str] = None,
328 | output_mode: str = "sql",
329 | group_by_type: bool = False,
330 | json_dump=False,
331 | ) -> List[Dict]:
332 | """
333 | dump: provide 'True' if you need to dump output in file
334 | dump_path: folder where you want to store result dump files
335 | file_path: pass full path to ddl file if you want to use this
336 | file name as name for the target output file
337 | output_mode: change output mode to get information relative to specific dialect,
338 | for example, in output_mode='hql' you will see also in self.tables such information as
339 | 'external', 'stored_as', etc. Possible variants: ["mssql", "mysql", "oracle", "hql", "sql", "redshift"]
340 | group_by_type: if you set True, output will be formed as Dict with keys ['self.tables',
341 | 'sequences', 'types', 'domains']
342 | and each dict will contain list of parsed entities. Without it output is a List with Dicts where each
343 | Dict == one entity from ddl - one table or sequence or type.
344 | """
345 | self.tables = self.parse_data()
346 | self.tables = result_format(self.tables, output_mode, group_by_type)
347 | if dump:
348 | if file_path:
349 | # if we run parse from one file - save same way to one file
350 | dump_data_to_file(
351 | os.path.basename(file_path).split(".")[0], dump_path, self.tables
352 | )
353 | else:
354 | for table in self.tables:
355 | dump_data_to_file(table["table_name"], dump_path, table)
356 | if json_dump:
357 | self.tables = json.dumps(self.tables)
358 | return self.tables
359 |
--------------------------------------------------------------------------------
/sondesh/tokens.py:
--------------------------------------------------------------------------------
1 | # statements that used at the start of defenition or in statements without columns
2 | defenition_statements = {
3 | "DROP": "DROP",
4 | "CREATE": "CREATE",
5 | "TABLE": "TABLE",
6 | "DATABASE": "DATABASE",
7 | "SCHEMA": "SCHEMA",
8 | "ALTER": "ALTER",
9 | "TYPE": "TYPE",
10 | "DOMAIN": "DOMAIN",
11 | "REPLACE": "REPLACE",
12 | "OR": "OR",
13 | "CLUSTERED": "CLUSTERED",
14 | "SEQUENCE": "SEQUENCE",
15 | "TABLESPACE": "TABLESPACE",
16 | }
17 | common_statements = {
18 | "INDEX": "INDEX",
19 | "REFERENCES": "REFERENCES",
20 | "KEY": "KEY",
21 | "ADD": "ADD",
22 | "AS": "AS",
23 | "CLONE": "CLONE",
24 | "DEFERRABLE": "DEFERRABLE",
25 | "INITIALLY": "INITIALLY",
26 | "IF": "IF",
27 | "NOT": "NOT",
28 | "EXISTS": "EXISTS",
29 | "ON": "ON",
30 | "FOR": "FOR",
31 | "ENCRYPT": "ENCRYPT",
32 | "SALT": "SALT",
33 | "NO": "NO",
34 | "USING": "USING",
35 | # bigquery
36 | "OPTIONS": "OPTIONS",
37 | }
38 |
39 | columns_defenition = {
40 | "DELETE": "DELETE",
41 | "UPDATE": "UPDATE",
42 | "NULL": "NULL",
43 | "ARRAY": "ARRAY",
44 | ",": "COMMA",
45 | "DEFAULT": "DEFAULT",
46 | "COLLATE": "COLLATE",
47 | "ENFORCED": "ENFORCED",
48 | "ENCODE": "ENCODE",
49 | "GENERATED": "GENERATED",
50 | "COMMENT": "COMMENT"
51 | }
52 | first_liners = {
53 | "LIKE": "LIKE",
54 | "CONSTRAINT": "CONSTRAINT",
55 | "FOREIGN": "FOREIGN",
56 | "PRIMARY": "PRIMARY",
57 | "UNIQUE": "UNIQUE",
58 | "CHECK": "CHECK",
59 | "WITH": "WITH",
60 | }
61 |
62 | common_statements.update(first_liners)
63 | defenition_statements.update(common_statements)
64 | after_columns_tokens = {
65 | "PARTITIONED": "PARTITIONED",
66 | "PARTITION": "PARTITION",
67 | "BY": "BY",
68 | # hql
69 | "INTO": "INTO",
70 | "STORED": "STORED",
71 | "LOCATION": "LOCATION",
72 | "ROW": "ROW",
73 | "FORMAT": "FORMAT",
74 | "TERMINATED": "TERMINATED",
75 | "COLLECTION": "COLLECTION",
76 | "ITEMS": "ITEMS",
77 | "MAP": "MAP",
78 | "KEYS": "KEYS",
79 | "SERDE": "SERDE",
80 | "CLUSTER": "CLUSTER",
81 | "SERDEPROPERTIES": "SERDEPROPERTIES",
82 | "TBLPROPERTIES": "TBLPROPERTIES",
83 | "USING": "USING",
84 | "SKEWED": "SKEWED",
85 | # oracle
86 | "STORAGE": "STORAGE",
87 | "TABLESPACE": "TABLESPACE",
88 | # mssql
89 | "TEXTIMAGE_ON": "TEXTIMAGE_ON",
90 | }
91 | sequence_reserved = {
92 | "INCREMENT": "INCREMENT",
93 | "START": "START",
94 | "MINVALUE": "MINVALUE",
95 | "MAXVALUE": "MAXVALUE",
96 | "CACHE": "CACHE",
97 | "NO": "NO",
98 | }
99 |
100 |
101 | tokens = tuple(
102 | set(
103 | ["ID", "DOT", "STRING", "DQ_STRING", "LP", "RP", "LT", "RT", "COMMAT", "AUTOINCREMENT"]
104 | + list(defenition_statements.values())
105 | + list(common_statements.values())
106 | + list(columns_defenition.values())
107 | + list(sequence_reserved.values())
108 | + list(after_columns_tokens.values())
109 | )
110 | )
111 |
112 | symbol_tokens = {
113 | ")": "RP",
114 | "(": "LP",
115 | }
116 |
117 | symbol_tokens_no_check = {"<": "LT", ">": "RT"}
118 |
--------------------------------------------------------------------------------
/sondesh/utils.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 |
4 | def remove_par(p_list: List[str]) -> List[str]:
5 | remove_list = ["(", ")"]
6 | for symbol in remove_list:
7 | while symbol in p_list:
8 | p_list.remove(symbol)
9 | return p_list
10 |
11 |
12 | spec_mapper = {
13 | "'pars_m_t'": "'\t'",
14 | "'pars_m_n'": "'\n'",
15 | "'pars_m_dq'": '"',
16 | "pars_m_single": "'",
17 | }
18 |
19 |
20 | def check_spec(value: str) -> str:
21 | replace_value = spec_mapper.get(value)
22 | if not replace_value:
23 | for item in spec_mapper:
24 | if item in value:
25 | replace_value = value.replace(item, spec_mapper[item])
26 | break
27 | else:
28 | replace_value = value
29 | return replace_value
30 |
31 |
32 | def find_first_unpair_closed_par(str_: str) -> int:
33 | stack = []
34 | n = -1
35 | for i in str_:
36 | n += 1
37 | if i == ")":
38 | if not stack:
39 | return n
40 | else:
41 | stack.pop(-1)
42 | elif i == "(":
43 | stack.append(i)
44 |
--------------------------------------------------------------------------------
/test/read_from_file.py:
--------------------------------------------------------------------------------
1 | import pprint
2 | from sondesh.ddl_parser import parse_from_file
3 |
4 | result = parse_from_file('sql_files/one.sql')
5 | pprint.pprint(result)
6 |
--------------------------------------------------------------------------------
/test/sql_files/one.sql:
--------------------------------------------------------------------------------
1 | create table sales(
2 | salesid integer not null,
3 | listid integer not null,
4 | sellerid integer not null,
5 | buyerid integer not null encode auto,
6 | eventid integer not null encode mostly16,
7 | dateid smallint,
8 | qtysold smallint not null encode mostly8,
9 | pricepaid decimal(8,2) encode delta32k,
10 | commission decimal(8,2) encode delta32k,
11 | saletime timestamp,
12 | test_col varchar(160),
13 | test_col2 varchar(130),
14 | primary key(salesid),
15 | foreign key(listid) references listing(listid),
16 | foreign key(sellerid) references users(userid),
17 | foreign key(buyerid) references users(userid),
18 | foreign key(dateid) references date(dateid)
19 | )
20 | diststyle auto1
21 | compound sortkey(salesid,sellerid);
--------------------------------------------------------------------------------
/test/sql_files/test_sql.sql:
--------------------------------------------------------------------------------
1 | create table sales(
2 | salesid integer not null,
3 | listid integer not null,
4 | sellerid integer not null,
5 | buyerid integer not null encode auto,
6 | eventid integer not null encode mostly16,
7 | dateid smallint,
8 | qtysold smallint not null encode mostly8,
9 | pricepaid decimal(8,2) encode delta32k,
10 | commission decimal(8,2) encode delta32k,
11 | saletime timestamp without time zone encode az64,
12 | test_col varchar(100),
13 | primary key(salesid),
14 | foreign key(listid) references listing(listid),
15 | foreign key(sellerid) references users(userid),
16 | foreign key(buyerid) references users(userid),
17 | foreign key(dateid) references date(dateid)
18 | )
19 | diststyle auto1
20 | compound sortkey(salesid,sellerid);
--------------------------------------------------------------------------------
/test/sql_files/two.sql:
--------------------------------------------------------------------------------
1 | create table sales(
2 | salesid integer not null,
3 | listid integer not null,
4 | sellerid varchar not null,
5 | buyerid integer not null encode auto,
6 | eventid integer not null encode mostly16,
7 | dateid smallint not null,
8 | qtysold smallint not null encode mostly8,
9 | pricepaid decimal(8,2) encode delta32k,
10 | commission decimal(8,2) encode delta32k,
11 | saletime timestamp without time zone encode az64,
12 | test_col varchar(120),
13 | primary key(salesid),
14 | foreign key(listid) references listing(listid),
15 | foreign key(sellerid) references users(userid),
16 | foreign key(buyerid) references users(userid),
17 | foreign key(dateid) references date(dateid)
18 | )
19 | diststyle auto
20 | compound sortkey(listid,sellerid);
--------------------------------------------------------------------------------
/test/test_oracle.py:
--------------------------------------------------------------------------------
1 | import pprint
2 |
3 | from sondesh.ddl_parser import parse_the_ddl
4 |
5 | def test_oracle_ddl():
6 |
7 | ddl = '''
8 | CREATE TABLE employee (
9 | employee_id number(100),
10 | first_name VARCHAR2(128) NOT NULL,
11 | last_name VARCHAR2(128) NOT NULL,
12 | salary NUMBER(6) ENCRYPT USING 'SHA256',
13 | emp_photo Blob,
14 | dept_id NUMBER(10),
15 | car_vin_no NUMBER(*,10),
16 | include_exclude_ind CHAR(1) DEFAULT 'Y',
17 | TEXT2_ NVARCHAR2(2000),
18 | CONSTRAINT check_employee_name CHECK (first_name = upper(first_name)),
19 | CONSTRAINT dept_fk FOREIGN KEY(dept_id) REFERENCES department(dept_id),
20 | CONSTRAINT employees_pk PRIMARY KEY (employee_id)
21 | )
22 | PARTITION BY REFERENCE(dept_fk)
23 | Storage ( Initial 5m Next 5m Maxextents Unlimited )
24 | ;
25 | '''
26 |
27 | result = parse_the_ddl(ddl).run(group_by_type=True)
28 | pprint.pprint(result)
29 |
30 | expected = '''
31 | {'ddl_properties': [],
32 | 'domains': [],
33 | 'schemas': [],
34 | 'sequences': [],
35 | 'tables': [{'alter': {},
36 | 'checks': [{'constraint_name': 'check_employee_name',
37 | 'statement': 'first_name = upper(first_name)'}],
38 | 'columns': [{'check': None,
39 | 'default': None,
40 | 'name': 'employee_id',
41 | 'nullable': False,
42 | 'references': None,
43 | 'size': 100,
44 | 'type': 'number',
45 | 'unique': False},
46 | {'check': None,
47 | 'default': None,
48 | 'name': 'first_name',
49 | 'nullable': False,
50 | 'references': None,
51 | 'size': 128,
52 | 'type': 'VARCHAR2',
53 | 'unique': False},
54 | {'check': None,
55 | 'default': None,
56 | 'name': 'last_name',
57 | 'nullable': False,
58 | 'references': None,
59 | 'size': 128,
60 | 'type': 'VARCHAR2',
61 | 'unique': False},
62 | {'check': None,
63 | 'default': None,
64 | 'encrypt': {'encryption_algorithm': "'SHA256'",
65 | 'integrity_algorithm': 'SHA-1',
66 | 'salt': True},
67 | 'name': 'salary',
68 | 'nullable': True,
69 | 'references': None,
70 | 'size': 6,
71 | 'type': 'NUMBER',
72 | 'unique': False},
73 | {'check': None,
74 | 'default': None,
75 | 'name': 'emp_photo',
76 | 'nullable': True,
77 | 'references': None,
78 | 'size': None,
79 | 'type': 'Blob',
80 | 'unique': False},
81 | {'check': None,
82 | 'default': None,
83 | 'name': 'dept_id',
84 | 'nullable': True,
85 | 'references': None,
86 | 'size': 10,
87 | 'type': 'NUMBER',
88 | 'unique': False},
89 | {'check': None,
90 | 'default': None,
91 | 'name': 'car_vin_no',
92 | 'nullable': True,
93 | 'references': None,
94 | 'size': ('*', 10),
95 | 'type': 'NUMBER',
96 | 'unique': False},
97 | {'check': None,
98 | 'default': "'Y'",
99 | 'name': 'include_exclude_ind',
100 | 'nullable': True,
101 | 'references': None,
102 | 'size': 1,
103 | 'type': 'CHAR',
104 | 'unique': False},
105 | {'check': None,
106 | 'default': None,
107 | 'name': 'TEXT2_',
108 | 'nullable': True,
109 | 'references': None,
110 | 'size': 2000,
111 | 'type': 'NVARCHAR2',
112 | 'unique': False}],
113 | 'constraints': {'checks': [{'constraint_name': 'check_employee_name',
114 | 'statement': 'first_name = '
115 | 'upper(first_name)'}],
116 | 'primary_keys': [{'columns': ['employee_id'],
117 | 'constraint_name': 'employees_pk'}],
118 | 'references': [{'columns': ['dept_id'],
119 | 'constraint_name': 'dept_fk',
120 | 'deferrable_initially': None,
121 | 'on_delete': None,
122 | 'on_update': None,
123 | 'schema': None,
124 | 'table': 'department'}]},
125 | 'index': [],
126 | 'partition_by': {'columns': ['dept_fk'], 'type': 'REFERENCE'},
127 | 'partitioned_by': [],
128 | 'primary_key': ['employee_id'],
129 | 'schema': None,
130 | 'storage': {'initial': '5m',
131 | 'maxextents': 'Unlimited',
132 | 'next': '5m'},
133 | 'table_name': 'employee',
134 | 'tablespace': None}],
135 | 'types': []}
136 | '''
137 | #assert expected == result
138 | pprint.pprint(result['tables'][0]['checks'])
139 |
140 | test_oracle_ddl()
--------------------------------------------------------------------------------
/test/test_redshift.py:
--------------------------------------------------------------------------------
1 | import pprint
2 |
3 | from sondesh.ddl_parser import parse_the_ddl
4 |
5 | def test_redshift():
6 |
7 | ddl = '''
8 | create table sales(
9 | salesid integer not null,
10 | listid integer not null,
11 | sellerid integer not null,
12 | buyerid integer not null encode auto,
13 | eventid integer not null encode mostly16,
14 | dateid smallint not null,
15 | qtysold smallint not null encode mostly8,
16 | pricepaid decimal(8,2) encode delta32k,
17 | commission decimal(8,2) encode delta32k,
18 | saletime timestamp,
19 | primary key(salesid),
20 | foreign key(listid) references listing(listid),
21 | foreign key(sellerid) references users(userid),
22 | foreign key(buyerid) references users(userid),
23 | foreign key(dateid) references date(dateid)
24 | )
25 | distkey(listid)
26 | compound sortkey(listid,sellerid)
27 | '''
28 | result = parse_the_ddl(ddl).run(group_by_type=True, output_mode="redshift")
29 | pprint.pprint(result)
30 |
31 | test_redshift()
--------------------------------------------------------------------------------