├── .gitignore
├── .idea
    ├── .gitignore
    ├── ddl_compare.iml
    ├── inspectionProfiles
    │   ├── Project_Default.xml
    │   └── profiles_settings.xml
    ├── misc.xml
    ├── modules.xml
    └── vcs.xml
├── LICENSE
├── README.md
├── images
    ├── cli_app_terminal.png
    ├── compare_result.png
    └── logo.png
├── requirement.txt
├── setup.py
├── sondesh
    ├── __init__.py
    ├── apps
    │   ├── __init__.py
    │   └── cli_app.py
    ├── compare.py
    ├── ddl_parser.py
    ├── dialects
    │   ├── __init__.py
    │   ├── bigquery.py
    │   ├── hql.py
    │   ├── mssql.py
    │   ├── mysql.py
    │   ├── oracle.py
    │   ├── redshift.py
    │   ├── snowflake.py
    │   ├── spark_sql.py
    │   └── sql.py
    ├── output
    │   ├── __init__.py
    │   ├── common.py
    │   └── dialects.py
    ├── parser.py
    ├── parsetab.py
    ├── tokens.py
    └── utils.py
└── test
    ├── read_from_file.py
    ├── sql_files
        ├── one.sql
        ├── test_sql.sql
        └── two.sql
    ├── test_oracle.py
    └── test_redshift.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | app/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | 


--------------------------------------------------------------------------------
/.idea/ddl_compare.iml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <module type="PYTHON_MODULE" version="4">
3 |   <component name="NewModuleRootManager">
4 |     <content url="file://$MODULE_DIR$" />
5 |     <orderEntry type="jdk" jdkName="$USER_HOME$/miniconda3" jdkType="Python SDK" />
6 |     <orderEntry type="sourceFolder" forTests="false" />
7 |   </component>
8 | </module>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyCompatibilityInspection" enabled="true" level="WARNING" enabled_by_default="true">
 5 |       <option name="ourVersions">
 6 |         <value>
 7 |           <list size="4">
 8 |             <item index="0" class="java.lang.String" itemvalue="2.7" />
 9 |             <item index="1" class="java.lang.String" itemvalue="3.5" />
10 |             <item index="2" class="java.lang.String" itemvalue="3.7" />
11 |             <item index="3" class="java.lang.String" itemvalue="3.11" />
12 |           </list>
13 |         </value>
14 |       </option>
15 |     </inspection_tool>
16 |   </profile>
17 | </component>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Black">
4 |     <option name="sdkName" value="Python 3.10 (ddl_compare)" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="$USER_HOME$/miniconda3" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/ddl_compare.iml" filepath="$PROJECT_DIR$/.idea/ddl_compare.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Project : Sondesh 
 2 | 
 3 | [![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity)
 4 | ![Maintainer](https://img.shields.io/badge/maintainer-Koushik-blue)
 5 | [![PyPI license](https://img.shields.io/pypi/l/ansicolortags.svg)](https://pypi.python.org/pypi/ansicolortags/)
 6 | [![made-with-python](https://img.shields.io/badge/Made%20with-Python-1f425f.svg)](https://www.python.org/)
 7 | [![Generic badge](https://img.shields.io/badge/release-1.0-green.svg)](https://shields.io/)
 8 | <br>
 9 | ![logo.png](https://i.ibb.co/x596NHL/logo.png)    
10 | 
11 | ## Description
12 | 
13 | Sondesh is the name of my cat . I love him very much 
14 | so i've decided to name this project after him.
15 | 
16 | This project is all about a parser and comparator . 
17 | Question is what it is parsing ? 
18 | - It parse SQL statements , but only DDL statements
19 | - It supports many sql dialects , example oracle , postgresql , sparksql , hive .. 
20 | - There is a cli app ( cli_app.py ), it compares two DDL statements and show you the differences in terminal
21 | 
22 | ![compare_result.png](https://i.ibb.co/94VWWTy/compare-result.png)
23 | 
24 | ### Dependencies
25 | 
26 | * Windows 10 , Debian , BSD these are the supported platform 
27 | * Python version >=  3.8
28 | 
29 | ### Installing
30 | 
31 | * ddl_compare can be installed using pip 
32 | 
33 | ```
34 | pip install sondesh
35 | ```
36 | 
37 | ### Usage
38 | 
39 | ```python
40 | from sondesh import ddl_parser
41 | import pprint
42 | 
43 | result = ddl_parser.parse_from_file('/home/koushik/sample_ddl.sql')
44 | pprint.pprint(result)
45 | ``` 
46 | 
47 | Using the CLI APP . 
48 | 
49 | 1. Just Open the Terminal 
50 | 2. type sondesh
51 | 3. VOALAA !!!!! 
52 | 
53 | ![logo_terminal.png](https://i.ibb.co/F67hnjf/cli-app-terminal.png)
54 | 
55 | ## What Next :
56 | 
57 | 1. Integration to remote file system to load .sql from there and parse it
58 | 2. Integration with data-catalogues like spark catalogue or hive metastore and compare ddl.
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/images/cli_app_terminal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/cli_app_terminal.png


--------------------------------------------------------------------------------
/images/compare_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/compare_result.png


--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/logo.png


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | colorama==0.4.6
2 | commonmark==0.9.1
3 | ply==3.11
4 | pyfiglet==0.8.post1
5 | Pygments==2.14.0
6 | rich==13.0.1
7 | tqdm==4.64.1
8 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Note: To use the 'upload' functionality of this file, you must:
  5 | #   $ pipenv install twine --dev
  6 | 
  7 | import io
  8 | import os
  9 | import sys
 10 | from shutil import rmtree
 11 | 
 12 | from setuptools import find_packages, setup, Command
 13 | 
 14 | # Package meta-data.
 15 | NAME = 'sondesh'
 16 | DESCRIPTION = 'parse sql , compare two .sql file , generate optimization hint for your sql and various other utilities'
 17 | URL = 'https://github.com/koustreak/dot.parser'
 18 | EMAIL = 'dot.py@yahoo.com'
 19 | AUTHOR = 'Koushik Dutta'
 20 | REQUIRES_PYTHON = '>=3.9.0'
 21 | VERSION = '1.0'
 22 | 
 23 | def parse_requirements(requirements):
 24 |     with open(requirements) as f:
 25 |         return [l.strip('\n') for l in f if l.strip('\n') and not l.startswith('#')]
 26 | 
 27 | # What packages are required for this module to be executed?
 28 | REQUIRED = parse_requirements('requirement.txt')
 29 | 
 30 | 
 31 | # The rest you shouldn't have to touch too much :)
 32 | # ------------------------------------------------
 33 | # Except, perhaps the License and Trove Classifiers!
 34 | # If you do change the License, remember to change the Trove Classifier for that!
 35 | 
 36 | here = os.path.abspath(os.path.dirname(__file__))
 37 | 
 38 | # Import the README and use it as the long-description.
 39 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
 40 | try:
 41 |     with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
 42 |         long_description = '\n' + f.read()
 43 | except FileNotFoundError:
 44 |     long_description = DESCRIPTION
 45 | 
 46 | # Load the package's __version__.py module as a dictionary.
 47 | about = {}
 48 | if not VERSION:
 49 |     project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
 50 |     with open(os.path.join(here, project_slug, '__version__.py')) as f:
 51 |         exec(f.read(), about)
 52 | else:
 53 |     about['__version__'] = VERSION
 54 | 
 55 | 
 56 | class UploadCommand(Command):
 57 |     """Support setup.py upload."""
 58 | 
 59 |     description = 'Build and publish the package.'
 60 |     user_options = []
 61 | 
 62 |     @staticmethod
 63 |     def status(s):
 64 |         """Prints things in bold."""
 65 |         print('\033[1m{0}\033[0m'.format(s))
 66 | 
 67 |     def initialize_options(self):
 68 |         pass
 69 | 
 70 |     def finalize_options(self):
 71 |         pass
 72 | 
 73 |     def run(self):
 74 |         try:
 75 |             self.status('Removing previous builds…')
 76 |             rmtree(os.path.join(here, 'app'))
 77 |         except OSError:
 78 |             pass
 79 | 
 80 |         self.status('Building Source and Wheel (universal) distribution…')
 81 |         os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
 82 | 
 83 |         self.status('Uploading the package to PyPI via Twine…')
 84 |         os.system('twine upload app/*')
 85 | 
 86 |         self.status('Pushing git tags…')
 87 |         os.system('git tag v{0}'.format(about['__version__']))
 88 |         os.system('git push --tags')
 89 | 
 90 |         sys.exit()
 91 | 
 92 | 
 93 | # Where the magic happens:
 94 | setup(
 95 |     name=NAME,
 96 |     version=about['__version__'],
 97 |     description=DESCRIPTION,
 98 |     long_description=long_description,
 99 |     long_description_content_type='text/markdown',
100 |     author=AUTHOR,
101 |     entry_points = {
102 |         'console_scripts': ['sondesh=sondesh.apps.cli_app:main_app'],
103 |     },
104 |     author_email=EMAIL,
105 |     python_requires=REQUIRES_PYTHON,
106 |     url=URL,
107 |     packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*","test","test.*","*.test.*","*.test","images","images.*"]),
108 |     # If your package is a single module, use this instead of 'packages':
109 |     # py_modules=['mypackage'],
110 | 
111 |     # entry_points={
112 |     #     'console_scripts': ['mycli=mymodule:cli'],
113 |     # },
114 |     install_requires=REQUIRED,
115 |     include_package_data=True,
116 |     license='MIT',
117 |     classifiers=[
118 |         # Trove classifiers
119 |         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
120 |         'License :: OSI Approved :: MIT License',
121 |         'Programming Language :: Python',
122 |         'Programming Language :: Python :: 3',
123 |         'Programming Language :: Python :: 3.9',
124 |         'Programming Language :: Python :: Implementation :: CPython',
125 |         'Programming Language :: Python :: Implementation :: PyPy'
126 |     ],
127 |     # $ setup.py publish support.
128 |     cmdclass={
129 |         'upload': UploadCommand,
130 |     },
131 | )


--------------------------------------------------------------------------------
/sondesh/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/__init__.py


--------------------------------------------------------------------------------
/sondesh/apps/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/apps/__init__.py


--------------------------------------------------------------------------------
/sondesh/apps/cli_app.py:
--------------------------------------------------------------------------------
  1 | #koushik dutta
  2 | import pyfiglet
  3 | from colorama import Fore,Back, init,Style
  4 | from time import sleep
  5 | from tqdm import tqdm
  6 | import sys
  7 | import os
  8 | import json
  9 | from rich.console import Console
 10 | from rich.table import Table
 11 | from collections import defaultdict
 12 | 
 13 | init()
 14 | 
 15 | print()
 16 | print()
 17 | 
 18 | 
 19 | def print_cli_table(df,context_name=None):
 20 |     if df:
 21 |         print(Fore.CYAN + 'visualizing ' + context_name + ' parse result ' + Style.RESET_ALL)
 22 |         for i in df:
 23 |             table = Table(title='column details for '+i['table_name'])
 24 |             columns = ["column_name", "column_type", "size", "foreign_key", "refers_to",
 25 |                        "on_delete", "on_update", "unique", "nullable", "default", "check"]
 26 |             data = list()
 27 |             if i['columns']:
 28 |                 for j in i['columns']:
 29 |                     refers_to, on_delete, on_update, is_foreign_key = None, None, None, None
 30 | 
 31 |                     if j.get('references'):
 32 |                         refers_to = str(j.get('references').get('table'))
 33 |                     if j.get('on_delete'):
 34 |                         on_delete = str(j.get('references').get('on_delete'))
 35 |                     if j.get('on_update'):
 36 |                         on_update = str(j.get('references').get('on_update'))
 37 |                     if j.get('references'):
 38 |                         is_foreign_key = 'yes'
 39 | 
 40 |                     data.append([str(j.get('name')), str(j.get('type')), str(j.get('size')),
 41 |                                  is_foreign_key, refers_to, on_delete, on_update,
 42 |                                  str(j.get('unique')), str(j.get('nullable')), str(j.get('default')),
 43 |                                  str(j.get('check'))])
 44 |             else:
 45 |                 print(Fore.YELLOW + 'warning!! no column could be found in first sql' + Style.RESET_ALL)
 46 | 
 47 |             for col in columns:
 48 |                 table.add_column(col)
 49 |             for row in data:
 50 |                 table.add_row(*row, style='bright_green')
 51 | 
 52 |             console = Console()
 53 |             print(Fore.BLUE + '*****************************************************************************************'+Style.RESET_ALL)
 54 |             console.print(table)
 55 |             print()
 56 | 
 57 |         for i in df :
 58 |             table = Table(title='column details for ' + i['table_name'])
 59 |             columns = ['table property name','property value']
 60 |             data = [
 61 |                 ['index', str(i.get('index'))],
 62 |                 ['diststyle', str(i.get('diststyle'))],
 63 |                 ['distkey', str(i.get('distkey'))],
 64 |                 ['primary key', str(i.get('primary_key'))],
 65 |                 ['sort key', str(i.get('sortkey'))],
 66 |                 ['schema', str(i.get('schema'))],
 67 |                 ['table space', str(i.get('tablespace'))]
 68 |             ]
 69 | 
 70 |             for col in columns:
 71 |                 table.add_column(col)
 72 |             for row in data:
 73 |                 table.add_row(*row, style='bright_green')
 74 | 
 75 |             console = Console()
 76 |             console.print(table)
 77 |             print(Fore.BLUE +'*****************************************************************************************'+Style.RESET_ALL)
 78 |             print()
 79 | 
 80 |     else:
 81 |         print(Fore.RED + 'Error occurred while parsing ' + context_name + ' aborting ' + Style.RESET_ALL)
 82 | 
 83 | def main_app():
 84 |     '''
 85 |     This will be exposed as CLI app in setup.py
 86 |     :return: Object
 87 |     '''
 88 |     f = pyfiglet.Figlet(font='big')
 89 |     print(Fore.CYAN + f.renderText('Compare DDL') + Style.RESET_ALL)
 90 |     sleep(0.5)
 91 |     print(Fore.BLUE + '> author : koushik dutta ')
 92 |     sleep(0.5)
 93 |     print(Fore.BLUE + '> date : 28-Dec-2022 ')
 94 |     sleep(0.5)
 95 |     print(Fore.BLUE + '> purpose : compare two DDL ')
 96 |     sleep(0.5)
 97 |     print(Fore.BLUE + '> version : 1.0.0 ')
 98 |     sleep(0.5)
 99 |     print(Fore.BLUE + '> OS : ubuntu 18.04 ')
100 |     sleep(0.5)
101 |     print(Fore.BLUE + '> python version : 3.8 ')
102 |     sleep(0.5)
103 |     print(Fore.BLUE + '> help : please give me a star in github ')
104 |     sleep(0.6)
105 |     print(Fore.BLUE + '> docs : read the docs making is in progress ')
106 |     sleep(0.6)
107 |     print(Fore.BLUE + '> unit test : check sondesh/tests ')
108 |     sleep(0.6)
109 |     print(Fore.BLUE + '> powered by : Flex and YACC in python ')
110 |     sleep(0.6)
111 |     print(Fore.BLUE + '> Supported DDL : Redshift , Oracle , Mysql , sparkSQL ( tested ) ' + Style.RESET_ALL)
112 |     print()
113 |     print(Fore.BLUE)
114 | 
115 |     with tqdm(total=100) as pbar:
116 |         pbar.set_description('initiating process')
117 |         pbar.update(3)
118 |         sleep(0.5)
119 |         try:
120 |             from sondesh.dialects import redshift
121 |             pbar.update(10)
122 |             pbar.set_description('Loading Redshift Dialect')
123 |         except:
124 |             print(Fore.RED + 'No Redshift Dialect detected , aborting . To fix it contact koushik')
125 |             sys.exit()
126 | 
127 |         try:
128 |             sleep(0.5)
129 |             from sondesh.dialects import oracle
130 |             pbar.update(10)
131 |             pbar.set_description('Loading Oracle Dialect')
132 |         except:
133 |             print(Fore.RED + 'No Oracle Dialect detected , aborting . To fix it contact koushik')
134 |             sys.exit()
135 | 
136 |         try:
137 |             sleep(0.5)
138 |             from sondesh.dialects import spark_sql
139 |             pbar.update(12)
140 |             pbar.set_description('Loading spark sql Dialect')
141 |         except:
142 |             print(Fore.RED + 'No spark sql Dialect detected , aborting . To fix it contact koushik')
143 |             sys.exit()
144 | 
145 |         try:
146 |             sleep(0.5)
147 |             from sondesh.dialects import sql
148 |             pbar.update(25)
149 |             pbar.set_description('Loading ansi sql Dialect')
150 |         except:
151 |             print(Fore.RED + 'No Ansi sql Dialect detected , aborting . To fix it contact koushik')
152 |             sys.exit()
153 | 
154 |         try:
155 |             sleep(0.5)
156 |             from sondesh.dialects import mysql
157 |             pbar.update(8)
158 |             pbar.set_description('Loading mysql Dialect')
159 |         except:
160 |             print(Fore.RED + 'No mysql Dialect detected , aborting . To fix it contact koushik')
161 |             sys.exit()
162 | 
163 |         try:
164 |             sleep(0.5)
165 |             from sondesh.dialects import hql
166 |             pbar.update(7)
167 |             pbar.set_description('Loading HiveQL Dialect')
168 |         except:
169 |             print(Fore.RED + 'No hiveQL Dialect detected , aborting . To fix it contact koushik')
170 |             sys.exit()
171 | 
172 |         try:
173 |             sleep(0.5)
174 |             from sondesh.ddl_parser import parse_from_file
175 |             pbar.update(10)
176 |             pbar.set_description('Loading SQL file parser')
177 |         except:
178 |             print(Fore.RED + 'No .sql file parser detected , aborting . To fix it contact koushik')
179 |             sys.exit()
180 | 
181 |         try:
182 |             sleep(0.5)
183 |             from sondesh.ddl_parser import  parse_the_ddl
184 |             pbar.update(7)
185 |             pbar.set_description('Loading raw sql parser')
186 |         except:
187 |             print(Fore.RED + 'No raw sql parser detected , aborting . It is required to parse from user input .'
188 |                              ' To fix it contact koushik')
189 |             sys.exit()
190 | 
191 |         try:
192 |             sleep(0.5)
193 |             from sondesh import compare
194 |             pbar.update(8)
195 |             pbar.set_description('Loading comparator')
196 |         except:
197 |             print(Fore.RED + 'No raw sql parser detected , aborting . It is required to parse from user input .'
198 |                              ' To fix it contact koushik')
199 |             sys.exit()
200 | 
201 |         pbar.set_description('Everything is loaded')
202 | 
203 |     print()
204 |     print(Fore.GREEN + "All dialects and parser have been loaded successfully"+ Style.RESET_ALL)
205 |     print()
206 |     print()
207 | 
208 |     if os.path.exists('profile.json') and os.path.getsize('profile.json'):
209 |         print(Fore.BLUE + 'Profile already exist , proceeding with that , if you want to reset remove profile.json')
210 |         print()
211 |     else:
212 |         while True:
213 | 
214 |             print(Fore.BLUE + 'There is no profile of you , let me set one , don\'t worry i m not a spy and this is one time only \n' + Style.RESET_ALL)
215 |             name = input(Fore.BLUE + '> what should i call you : '+Style.RESET_ALL)
216 |             print(Fore.BLUE + '  >> hey '+name+' welcome to DDL Comparator '+'\n')
217 |             favourite_db = input(Fore.BLUE + '> which DB you like most : '+Style.RESET_ALL)
218 |             purpose = input(Fore.BLUE + '> are you going to use it for commercial purpose : '+Style.RESET_ALL)
219 |             what_you_do = input(Fore.BLUE + '> what is your job role : '+Style.RESET_ALL)
220 |             default_outdir = input(Fore.BLUE + '> default output dir for report (leave blank for current directory) : ' + Style.RESET_ALL)
221 |             cloud_platform = input(Fore.BLUE + '> which cloud platform you are going to use : ' + Style.RESET_ALL)
222 |             reporting_style = input(Fore.BLUE + '> Reporting style \n1.excel\n2.html\ (leave blank for excel): ' + Style.RESET_ALL)
223 |             print()
224 |             profile = {name:name,favourite_db:favourite_db,
225 |                        purpose:purpose,what_you_do:what_you_do,
226 |                        default_outdir:default_outdir,
227 |                        cloud_platform:cloud_platform,reporting_style:reporting_style}
228 |             with open('profile.json', 'w') as fp:
229 |                 json.dump(profile,fp)
230 |             print(Fore.CYAN + 'profile has setup successfully \n'+ Style.RESET_ALL)
231 |             break
232 | 
233 |     if os.path.exists('validator.json') and os.path.getsize('validator.json'):
234 |         print(Fore.BLUE + 'DDL Validator already exist , proceeding with that , if you want to reset remove validator.json')
235 |         print()
236 |     else:
237 |         validator_err_ct = 0
238 |         validator_payload = dict()
239 |         while True and validator_err_ct < 2:
240 | 
241 |             print(Fore.BLUE + 'There is no DDL Validator setup , let me set one, this is for first time only \n' + Style.RESET_ALL)
242 | 
243 |             string_vs_varchar = input(Fore.BLUE + '> Should i highlight STRING vs VARCHAR diff (regardless of size) (Y/N): '+Style.RESET_ALL)
244 |             if string_vs_varchar.upper() not in ('Y','N'):
245 |                 print(Fore.RED + '\n Please enter either y/n'+Style.RESET_ALL)
246 |                 validator_err_ct+=1
247 |                 continue
248 |             elif validator_err_ct == 2:
249 |                 print(Fore.RED + '\n Maximum limit reached . aborting'+Style.RESET_ALL)
250 |                 sys.exit()
251 |             else:
252 |                 validator_payload['string_vs_varchar'] = string_vs_varchar
253 |                 validator_err_ct = 0
254 | 
255 |             timezone_diff = input(Fore.BLUE + '> Should i highlight timezone diff (Y/N): ' + Style.RESET_ALL)
256 |             if timezone_diff.upper() not in ('Y', 'N'):
257 |                 print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL)
258 |                 validator_err_ct += 1
259 |             elif validator_err_ct == 2:
260 |                 print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL)
261 |                 sys.exit()
262 |             else:
263 |                 validator_payload['timezone_diff'] = timezone_diff
264 |                 validator_err_ct = 0
265 | 
266 |             encoding_diff = input(Fore.BLUE + '> Should i highlight encoding diff (Y/N): ' + Style.RESET_ALL)
267 |             if encoding_diff.upper() not in ('Y', 'N'):
268 |                 print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL)
269 |                 validator_err_ct += 1
270 |             elif validator_err_ct == 2:
271 |                 print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL)
272 |                 sys.exit()
273 |             else:
274 |                 validator_payload['encoding_diff'] = encoding_diff
275 |                 validator_err_ct = 0
276 | 
277 |             distyle_diff = input(Fore.BLUE + '> Should i highlight distyle diff (Y/N): ' + Style.RESET_ALL)
278 |             if distyle_diff.upper() not in ('Y', 'N'):
279 |                 print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL)
280 |                 validator_err_ct += 1
281 |             elif validator_err_ct == 2:
282 |                 print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL)
283 |                 sys.exit()
284 |             else:
285 |                 validator_payload['distyle_diff'] = distyle_diff
286 |                 validator_err_ct = 0
287 | 
288 |             with open('validator.json', 'w') as fp:
289 |                 json.dump(validator_payload,fp)
290 |             print(Fore.CYAN + 'validator has setup successfully \n'+ Style.RESET_ALL)
291 | 
292 |             break
293 | 
294 |     # Validation profiler will be setup accordingly
295 |     # if os.path.exists('validation.json') and os.path.getsize('profile.json'):
296 |     error_ct = 0
297 |     choice = 'none'
298 |     while True and error_ct<2:
299 |         choice = input(Fore.CYAN + 'Do you want to compare file or provide SQL as user input (please type either file or raw) \n'+Style.RESET_ALL)
300 |         if choice.upper() not in ('FILE','RAW'):
301 |             print(Fore.RED + '\n Wrong input given , answer should be either file or raw '+Style.RESET_ALL)
302 |             error_ct+=1
303 |             continue
304 |         elif error_ct == 2:
305 |             print(Fore.RED + '\n You have crossed maximum limit of choice , aborting '+Style.RESET_ALL)
306 |             sys.exit()
307 |         else:
308 |             print(Fore.CYAN + '\n You have entered '+choice+' for this session '+Style.RESET_ALL)
309 |             break
310 | 
311 |     print()
312 | 
313 |     err_dialect = 0
314 |     while True and err_dialect < 2:
315 |         dialect = input(Fore.BLUE + '> Which dialect you want to use now , \n'
316 |                                     '1.redshift\n2.oracle\n3.hql\n4.snowflake\n5.mysql\n'+Style.RESET_ALL)
317 |         if dialect.upper() not in ['REDSHIFT','ORACLE','SNOWFLAKE','MYSQL','HQL']:
318 |             print(Fore.RED + '\n Please enter a valid value '+Style.RESET_ALL)
319 |             err_dialect+=1
320 |             continue
321 |         elif err_dialect == 2:
322 |             print(Fore.RED + '\n Exceeded maximum limit of providing input'+Style.RESET_ALL)
323 |             sys.exit()
324 |         else:
325 |             break
326 | 
327 |     print()
328 | 
329 |     while True:
330 |         if choice.upper() == 'FILE':
331 |             first_file = input(Fore.BLUE + '> Your first .sql file ? '+Style.RESET_ALL)
332 |             second_file = input(Fore.BLUE + '> Your second .sql file ? ' + Style.RESET_ALL)
333 |             print()
334 | 
335 |             if os.path.exists(first_file) and os.path.getsize(first_file) :
336 |                 if os.path.splitext(first_file)[1].upper() != '.SQL':
337 |                     print(Fore.YELLOW + ' WARNING !! your first input '+first_file+' is not a .sql file '+Style.RESET_ALL)
338 |                     print()
339 |             else:
340 |                 print(Fore.RED + ' file not found '+first_file+Style.RESET_ALL)
341 |                 print()
342 |                 sys.exit()
343 | 
344 |             if os.path.exists(second_file) and os.path.getsize(second_file):
345 |                 if os.path.splitext(second_file)[1].upper() != '.SQL':
346 |                     print(Fore.YELLOW + ' WARNING !! your second input '+second_file+' is not a .sql file '+Style.RESET_ALL)
347 |                     print()
348 |             else:
349 |                 print(Fore.RED + ' file not found '+second_file+Style.RESET_ALL)
350 |                 print()
351 |                 sys.exit()
352 | 
353 |             print(Fore.CYAN + 'parsing '+first_file+Style.RESET_ALL)
354 |             first_file_parse_result = parse_from_file(first_file)
355 |             print(Fore.CYAN + 'done!!'+first_file+Style.RESET_ALL)
356 | 
357 |             print()
358 | 
359 |             print(Fore.CYAN + 'parsing '+second_file+Style.RESET_ALL)
360 |             second_file_parse_result = parse_from_file(second_file)
361 |             print(Fore.CYAN + 'done!!'+first_file+Style.RESET_ALL)
362 | 
363 |             print()
364 | 
365 |             print(Fore.CYAN + 'comparison engine initiated ' + Style.RESET_ALL)
366 |             if first_file_parse_result and second_file_parse_result:
367 |                 compare.compare_df(first_file_parse_result[0], second_file_parse_result[0], first_file, second_file)
368 | 
369 |             print ()
370 | 
371 |             question_ = input(Fore.CYAN + 'Do you want to see the table parse result leave blank for NO else YES '+Style.RESET_ALL)
372 |             if question_.upper()=='YES' or question_.upper()=='Y' or (not question_):
373 |                 print_cli_table(first_file_parse_result,'first_file_parse_result')
374 |                 print()
375 |                 print_cli_table(second_file_parse_result, 'second_file_parse_result')
376 |                 print()
377 | 
378 |         else:
379 |             print(Fore.YELLOW+'RAW Input Comparator has not been developed yet'+Style.RESET_ALL)
380 |             '''
381 |             first_sql_input = input(Fore.BLUE + 'Please enter your first sql '+Style.RESET_ALL)
382 |             second_sql_input = input(Fore.BLUE + 'Please enter your second sql ' + Style.RESET_ALL)
383 |     
384 |             if first_sql_input is None or second_sql_input is None:
385 |                 print(Fore.RED + 'Please provide both of the mandatory input'+Style.RESET_ALL)
386 |     
387 |             print(Fore.CYAN + 'parsing first_sql_input '+ Style.RESET_ALL)
388 |             first_sql_parse_result = parse_from_file(first_sql_input)
389 |             print(Fore.CYAN + 'done!!' + Style.RESET_ALL)
390 |     
391 |             print()
392 |     
393 |             print(Fore.CYAN + 'parsing second_sql_input ' + Style.RESET_ALL)
394 |             second_sql_parse_result = parse_from_file(second_sql_input)
395 |             print(Fore.CYAN + 'done!!' + Style.RESET_ALL)
396 |     
397 |             print()
398 |     
399 |             question_ = input('Do you want to see the table parse result leave blank for NO else YES ')
400 |             if question_:
401 |                 print_cli_table(first_sql_parse_result, 'first_sql_parse_result')
402 |                 print()
403 |                 print_cli_table(second_sql_parse_result, 'second_sql_parse_result')
404 |                 print()
405 |     
406 |             print(Fore.CYAN + 'comparison engine initiated '+Style.RESET_ALL)
407 |             if first_sql_parse_result and second_sql_parse_result:
408 |                 compare.compare_df(first_sql_parse_result,second_sql_parse_result,first_sql_input,second_sql_input)
409 |             '''
410 | 
411 | 
412 |         redo_choice = input(Fore.CYAN + '> Do you want to use the tool again ? N for No , press anything else for Yes '+Style.RESET_ALL)
413 |         if redo_choice.upper() == 'N':
414 |             print(Fore.CYAN + 'Good Bye , have a good day\n'+ Style.RESET_ALL)
415 |             break
416 |         else:
417 |             continue


--------------------------------------------------------------------------------
/sondesh/compare.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from colorama import Fore,Back, init,Style
  4 | from rich.console import Console
  5 | from rich.table import Table
  6 | 
  7 | init()
  8 | 
  9 | def compare_df(query_one_df,query_two_df,context_one,context_two):
 10 |     validator = None
 11 |     string_vs_varchar = None
 12 |     timezone_diff = None
 13 |     encoding_diff = None
 14 |     distyle_diff = None
 15 | 
 16 |     if os.path.exists('../validator.json') and os.path.getsize('../validator.json'):
 17 |         with open('../validator.json') as fp:
 18 |             validator = json.load(fp)
 19 |             if validator :
 20 |                 string_vs_varchar = validator['string_vs_varchar']
 21 |                 timezone_diff = validator['timezone_diff']
 22 |                 encoding_diff = validator['encoding_diff']
 23 |                 distyle_diff = validator['distyle_diff']
 24 | 
 25 |     if validator is None:
 26 |         print(Fore.YELLOW + ' WARNING !! User validator profile is Blank ')
 27 | 
 28 |     if query_one_df and query_two_df :
 29 |         print(Fore.CYAN + 'visualizing compare result ' + Style.RESET_ALL)
 30 |         table = Table(title='comparing '+context_one+' vs '+context_two+' column level ')
 31 |         table_tab = Table(title='comparing ' + context_one + ' vs ' + context_two+' table level ')
 32 | 
 33 |         difference_tab = []
 34 |         columns_tab = ["property name", "value in " + context_one, "value in " + context_two]
 35 | 
 36 |         if query_one_df.get('table_name')!=query_two_df.get('table_name'):
 37 |             difference_tab.append(['table name found in sql',query_one_df.get('table_name'),query_two_df.get('table_name')])
 38 | 
 39 |         if query_one_df.get('tablespace')!=query_two_df.get('tablespace'):
 40 |             difference_tab.append(['tablespace',query_one_df.get('tablespace'),query_two_df.get('tablespace')])
 41 | 
 42 |         if query_one_df.get('schema')!=query_two_df.get('schema'):
 43 |             difference_tab.append(['schema',query_one_df.get('schema'),query_two_df.get('schema')])
 44 | 
 45 |         keys_one = None
 46 |         keys_two = None
 47 |         type_one = None
 48 |         type_two = None
 49 | 
 50 | 
 51 |         if query_one_df.get('sortkey'):
 52 |             keys_one = ','.join(query_one_df.get('sortkey').get('keys'))
 53 |             type_one = query_one_df.get('sortkey').get('type')
 54 | 
 55 |         if query_two_df.get('sortkey'):
 56 |             keys_two = ','.join(query_two_df.get('sortkey').get('keys'))
 57 |             type_two = query_two_df.get('sortkey').get('type')
 58 | 
 59 |         if (keys_one != keys_two) and (keys_one or keys_two):
 60 |             difference_tab.append(['sort keys',keys_one,keys_two])
 61 | 
 62 |         if (type_one != type_two) and (type_one or type_two):
 63 |             difference_tab.append(['sort type',type_one,type_two])
 64 | 
 65 |         if query_two_df.get('index') != query_one_df.get('index'):
 66 |             difference_tab.append(['index',','.join(query_one_df.get('index')),','.join(query_two_df.get('index'))])
 67 | 
 68 |         if query_two_df.get('partitioned_by') != query_one_df.get('partitioned_by'):
 69 |             difference_tab.append(['partition',','.join(query_one_df.get('partitioned_by')),
 70 |                                    ','.join(query_two_df.get('partitioned_by'))])
 71 | 
 72 |         if query_two_df.get('diststyle') != query_one_df.get('diststyle'):
 73 |             difference_tab.append(['distribution style',query_one_df.get('diststyle'),query_two_df.get('diststyle')])
 74 | 
 75 |         if query_two_df.get('checks') != query_one_df.get('checks'):
 76 |             difference_tab.append(['checks constraints', ','.join(query_one_df.get('checks')),
 77 |                                    ','.join(query_two_df.get('checks'))])
 78 | 
 79 |         if difference_tab:
 80 |             for col in columns_tab:
 81 |                 table_tab.add_column(col)
 82 |             for row in difference_tab:
 83 |                 table_tab.add_row(*row, style='bright_green')
 84 |             # table level difference
 85 | 
 86 |             console = Console()
 87 |             print(Fore.BLUE + '*****************************************************************************************' + Style.RESET_ALL)
 88 |             console.print(table_tab)
 89 |             print()
 90 |         else:
 91 |             print()
 92 |             print(Fore.GREEN + 'No Table Level Difference could be found '+Style.RESET_ALL)
 93 | 
 94 |         columns = ["column name", "property" , "value in "+context_one, "value in "+context_two]
 95 | 
 96 |         query_one_cols = query_one_df['columns']
 97 |         query_two_cols = query_two_df['columns']
 98 | 
 99 |         difference = []
100 | 
101 |         for j in query_one_cols:
102 |             refers_to_one, on_delete_one, on_update_one, is_foreign_key_one = None, None, None, None
103 | 
104 |             if j.get('references'):
105 |                 refers_to_one = str(j.get('references').get('table'))
106 |             if j.get('on_delete'):
107 |                 on_delete_one = str(j.get('references').get('on_delete'))
108 |             if j.get('on_update'):
109 |                 on_update_one = str(j.get('references').get('on_update'))
110 |             if j.get('references'):
111 |                 is_foreign_key_one = 'yes'
112 | 
113 |             col_name_one = str(j.get('name'))
114 |             col_type_one = str(j.get('type'))
115 |             col_size_one = str(j.get('size'))
116 |             isunique_one = str(j.get('unique'))
117 |             isnull_one = str(j.get('nullable'))
118 |             default_val_one = str(j.get('default'))
119 |             check_val_one = str(j.get('check'))
120 |             encode_one = str(j.get('encode'))
121 | 
122 |             temp_two = list(filter(lambda x:x['name']==col_name_one,query_two_cols))
123 |             col_name_two = None
124 |             if temp_two:
125 |                 temp_two = temp_two[0]
126 |                 col_name_two = str(temp_two.get('name'))
127 |                 col_type_two = str(temp_two.get('type'))
128 |                 col_size_two = str(temp_two.get('size'))
129 |                 isunique_two = str(temp_two.get('unique'))
130 |                 isnull_two = str(temp_two.get('nullable'))
131 |                 default_val_two = str(temp_two.get('default'))
132 |                 check_val_two = str(temp_two.get('check'))
133 |                 encode_two = str(temp_two.get('encode'))
134 | 
135 |                 if col_type_one != col_type_two:
136 |                     difference.append([col_name_two,'datatype',col_type_one,col_type_two])
137 | 
138 |                 if col_size_two != col_size_one:
139 |                     difference.append([col_name_two,'size',col_size_one,col_size_two])
140 | 
141 |                 if isunique_two != isunique_one:
142 |                     difference.append([col_name_two,'is unique',isunique_one,isunique_two])
143 | 
144 |                 if isnull_one != isnull_two:
145 |                     difference.append([col_name_two,'nullable',isnull_one,isnull_two])
146 | 
147 |                 if default_val_two != default_val_one:
148 |                     difference.append([col_name_two,'default value',default_val_one,default_val_two])
149 | 
150 |                 if check_val_two != check_val_one:
151 |                     difference.append([col_name_two,'check constraint',check_val_one,check_val_two])
152 | 
153 |                 if encode_one != encode_two:
154 |                     difference.append([col_name_two,'encode',encode_one,encode_two])
155 | 
156 |                 refers_to_two, on_delete_two, on_update_two, is_foreign_key_two = None, None, None, None
157 | 
158 |                 if j.get('references'):
159 |                     refers_to_two = str(j.get('references').get('table'))
160 |                 if j.get('on_delete'):
161 |                     on_delete_two = str(j.get('references').get('on_delete'))
162 |                 if j.get('on_update'):
163 |                     on_update_two = str(j.get('references').get('on_update'))
164 |                 if j.get('references'):
165 |                     is_foreign_key_two = 'yes'
166 | 
167 |                 if is_foreign_key_two != is_foreign_key_one:
168 |                     difference.append([col_name_two,'foreign key',is_foreign_key_one,is_foreign_key_two])
169 | 
170 |                 if refers_to_two != refers_to_one:
171 |                     difference.append([col_name_two,'foreign key reference',refers_to_one,refers_to_two])
172 | 
173 |                 if on_delete_two != on_delete_one:
174 |                     difference.append([col_name_two,'on delete clause',on_delete_one,on_delete_two])
175 | 
176 |                 if on_update_two != on_update_one:
177 |                     difference.append([col_name_two,'on update clause',on_update_one,on_update_two])
178 | 
179 |                 query_two_cols = list(filter(lambda g:g['name']!=col_name_one,query_two_cols))
180 | 
181 |             else:
182 |                 difference.append([col_name_one,'is_found','yes','no'])
183 | 
184 |         if query_two_cols:
185 |             for k in query_two_cols:
186 |                 difference.append([k['name'],'is_found','no','yes'])
187 | 
188 |         if difference:
189 |             for col in columns:
190 |                 table.add_column(col)
191 |             for row in difference:
192 |                 table.add_row(*row, style='bright_green')
193 |             # table level difference
194 | 
195 |             console = Console()
196 |             print(
197 |                 Fore.BLUE + '*****************************************************************************************' + Style.RESET_ALL)
198 |             console.print(table)
199 |             print()
200 |         else:
201 |             print()
202 |             print(Fore.GREEN + 'No Column Level Difference could be found '+Style.RESET_ALL)
203 | 


--------------------------------------------------------------------------------
/sondesh/ddl_parser.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Optional
  2 | 
  3 | from ply.lex import LexToken
  4 | 
  5 | from sondesh import tokens as tok
  6 | from sondesh.dialects.bigquery import BigQuery
  7 | from sondesh.dialects.hql import HQL
  8 | from sondesh.dialects.mssql import MSSQL
  9 | from sondesh.dialects.mysql import MySQL
 10 | from sondesh.dialects.oracle import Oracle
 11 | from sondesh.dialects.redshift import Redshift
 12 | from sondesh.dialects.snowflake import Snowflake
 13 | from sondesh.dialects.spark_sql import SparkSQL
 14 | from sondesh.dialects.sql import BaseSQL
 15 | from sondesh.parser import Parser
 16 | 
 17 | 
 18 | class parse_the_ddl_error(Exception):
 19 |     pass
 20 | 
 21 | 
 22 | class parse_the_ddl(
 23 |     Parser, SparkSQL, Snowflake, BaseSQL, HQL, MySQL, MSSQL, Oracle, Redshift, BigQuery
 24 | ):
 25 | 
 26 |     tokens = tok.tokens
 27 |     t_ignore = "\t  \r"
 28 | 
 29 |     def get_tag_symbol_value_and_increment(self, t: LexToken) -> LexToken:
 30 |         # todo: need to find less hacky way to parse HQL structure types
 31 |         if "<" in t.value:
 32 |             t.type = "LT"
 33 |             self.lexer.lt_open += t.value.count("<")
 34 |         if ">" in t.value and not self.lexer.check:
 35 |             t.type = "RT"
 36 |             self.lexer.lt_open -= t.value.count(">")
 37 |         return t
 38 | 
 39 |     def after_columns_tokens(self, t: LexToken) -> LexToken:
 40 |         t.type = tok.after_columns_tokens.get(t.value.upper(), t.type)
 41 |         if t.type != "ID":
 42 |             self.lexer.after_columns = True
 43 |         elif self.lexer.columns_def:
 44 |             t.type = tok.columns_defenition.get(t.value.upper(), t.type)
 45 |         return t
 46 | 
 47 |     def process_body_tokens(self, t: LexToken) -> LexToken:
 48 |         if (
 49 |             self.lexer.last_par == "RP" and not self.lexer.lp_open
 50 |         ) or self.lexer.after_columns:
 51 |             t = self.after_columns_tokens(t)
 52 |         elif self.lexer.columns_def:
 53 |             t.type = tok.columns_defenition.get(t.value.upper(), t.type)
 54 |         elif self.lexer.sequence:
 55 |             t.type = tok.sequence_reserved.get(t.value.upper(), "ID")
 56 |         return t
 57 | 
 58 |     def parse_tags_symbols(self, t) -> Optional[LexToken]:
 59 |         """like symbols < >"""
 60 |         if not self.lexer.check:
 61 |             for key in tok.symbol_tokens_no_check:
 62 |                 if key in t.value:
 63 |                     return self.get_tag_symbol_value_and_increment(t)
 64 | 
 65 |     def tokens_not_columns_names(self, t: LexToken) -> LexToken:
 66 | 
 67 |         t_tag = self.parse_tags_symbols(t)
 68 |         if t_tag:
 69 |             return t_tag
 70 | 
 71 |         if "ARRAY" in t.value:
 72 |             t.type = "ARRAY"
 73 |             return t
 74 |         elif self.lexer.is_like:
 75 |             t.type = tok.after_columns_tokens.get(t.value.upper(), t.type)
 76 |         elif not self.lexer.is_table:
 77 |             # if is_table mean wi already met INDEX or TABLE statement and
 78 |             # the definition already done and this is a string
 79 |             t.type = tok.defenition_statements.get(
 80 |                 t.value.upper(), t.type
 81 |             )  # Check for reserved word
 82 |         elif self.lexer.last_token != "COMMA":
 83 |             t.type = tok.common_statements.get(t.value.upper(), t.type)
 84 |         else:
 85 |             t.type = tok.first_liners.get(t.value.upper(), t.type)
 86 | 
 87 |         # get tokens from other token dicts
 88 |         t = self.process_body_tokens(t)
 89 | 
 90 |         self.set_lexer_tags(t)
 91 | 
 92 |         return t
 93 | 
 94 |     def set_lexer_tags(self, t: LexToken) -> None:
 95 |         if t.type == "SEQUENCE":
 96 |             self.lexer.sequence = True
 97 |         elif t.type == "CHECK":
 98 |             self.lexer.check = True
 99 | 
100 |     def t_DOT(self, t: LexToken) -> LexToken:
101 |         r"\."
102 |         t.type = "DOT"
103 |         return self.set_last_token(t)
104 | 
105 |     def t_STRING(self, t: LexToken) -> LexToken:
106 |         r"((\')([a-zA-Z_,`0-9:><\=\-\+.\~\%$\!() {}\[\]\/\\\"\#\*&^|?;±§@~]*)(\')){1}"
107 |         t.type = "STRING"
108 |         return self.set_last_token(t)
109 | 
110 |     def t_DQ_STRING(self, t: LexToken) -> LexToken:
111 |         r"((\")([a-zA-Z_,`0-9:><\=\-\+.\~\%$\!() {}'\[\]\/\\\\#\*&^|?;±§@~]*)(\")){1}"
112 |         t.type = "DQ_STRING"
113 |         return self.set_last_token(t)
114 | 
115 |     def is_token_column_name(self, t: LexToken) -> bool:
116 |         """many of reserved words can be used as column name,
117 |         to decide is it a column name or not we need do some checks"""
118 |         skip_id_tokens = ["(", ")", ","]
119 |         return (
120 |             t.value not in skip_id_tokens
121 |             and self.lexer.is_table
122 |             and self.lexer.lp_open
123 |             and not self.lexer.is_like
124 |             and (self.lexer.last_token == "COMMA" or self.lexer.last_token == "LP")
125 |             and t.value.upper() not in tok.first_liners
126 |         )
127 | 
128 |     def is_creation_name(self, t: LexToken) -> bool:
129 |         """many of reserved words can be used as column name,
130 |         to decide is it a column name or not we need do some checks"""
131 |         skip_id_tokens = ["(", ")", ","]
132 |         exceptional_keys = [
133 |             "SCHEMA",
134 |             "TABLE",
135 |             "DATABASE",
136 |             "TYPE",
137 |             "DOMAIN",
138 |             "TABLESPACE",
139 |             "INDEX",
140 |             "CONSTRAINT",
141 |             "EXISTS",
142 |         ]
143 |         return (
144 |             t.value not in skip_id_tokens
145 |             and t.value.upper() not in ["IF"]
146 |             and self.lexer.last_token in exceptional_keys
147 |             and not self.exceptional_cases(t.value.upper())
148 |         )
149 | 
150 |     def exceptional_cases(self, value: str) -> bool:
151 |         if value == "TABLESPACE" and self.lexer.last_token == "INDEX":
152 |             return True
153 |         return False
154 | 
155 |     def t_AUTOINCREMENT(self, t: LexToken):
156 |         r"(AUTO_INCREMENT|AUTOINCREMENT)(?i)\b"
157 |         t.type = "AUTOINCREMENT"
158 |         return self.set_last_token(t)
159 | 
160 |     def t_ID(self, t: LexToken):
161 |         r"([0-9]+[.][0-9]*([e][+-]?[0-9]+)?|[0-9]\.[0-9])\w|([a-zA-Z_,0-9:><\/\\\=\-\+\~\%$@#\|&?;*\()!{}\[\]\`\[\]]+)"
162 |         t.type = tok.symbol_tokens.get(t.value, "ID")
163 | 
164 |         if t.type == "LP":
165 |             self.lexer.lp_open += 1
166 |             self.lexer.columns_def = True
167 |             self.lexer.last_token = "LP"
168 |             return t
169 |         elif self.is_token_column_name(t) or self.lexer.last_token == "DOT":
170 |             t.type = "ID"
171 |         elif t.type != "DQ_STRING" and self.is_creation_name(t):
172 |             t.type = "ID"
173 |         else:
174 |             t = self.tokens_not_columns_names(t)
175 | 
176 |         self.capitalize_tokens(t)
177 |         self.commat_type(t)
178 | 
179 |         self.set_lexx_tags(t)
180 | 
181 |         return self.set_last_token(t)
182 | 
183 |     def commat_type(self, t: LexToken):
184 |         if t.type == "COMMA" and self.lexer.lt_open:
185 |             t.type = "COMMAT"
186 | 
187 |     def capitalize_tokens(self, t: LexToken):
188 |         if t.type != "ID" and t.type not in ["LT", "RT"]:
189 |             t.value = t.value.upper()
190 | 
191 |     def set_parathesis_tokens(self, t: LexToken):
192 |         if t.type in ["RP", "LP"]:
193 |             if t.type == "RP" and self.lexer.lp_open:
194 |                 self.lexer.lp_open -= 1
195 |             self.lexer.last_par = t.type
196 | 
197 |     def set_lexx_tags(self, t: LexToken):
198 |         self.set_parathesis_tokens(t)
199 | 
200 |         if t.type == "ALTER":
201 |             self.lexer.is_alter = True
202 |         if t.type == "LIKE":
203 |             self.lexer.is_like = True
204 |         elif t.type in ["TYPE", "DOMAIN", "TABLESPACE"]:
205 |             self.lexer.is_table = False
206 |         elif t.type in ["TABLE", "INDEX"] and not self.lexer.is_alter:
207 |             self.lexer.is_table = True
208 | 
209 |     def set_last_token(self, t: LexToken):
210 |         self.lexer.last_token = t.type
211 |         return t
212 | 
213 |     def p_id(self, p):
214 |         """id : ID
215 |         | DQ_STRING"""
216 |         delimeters_to_start = ["`", '"', "["]
217 |         delimeters_to_end = ["`", '"', "]"]
218 |         p[0] = p[1]
219 | 
220 |         if self.normalize_names:
221 |             for num, symbol in enumerate(delimeters_to_start):
222 |                 if p[0].startswith(symbol) and p[0].endswith(delimeters_to_end[num]):
223 |                     p[0] = p[0][1:-1]
224 | 
225 |     def p_id_or_string(self, p):
226 |         """id_or_string : id
227 |         | STRING"""
228 |         p[0] = p[1]
229 | 
230 |     def t_error(self, t: LexToken):
231 |         raise parse_the_ddl_error("Unknown symbol %r" % (t.value[0],))
232 | 
233 |     def p_error(self, p):
234 |         if not self.silent:
235 |             raise parse_the_ddl_error(f"Unknown statement at {p}")
236 | 
237 | 
238 | def parse_from_file(file_path: str, parser_settings: Optional[dict] = None, **kwargs) -> List[Dict]:
239 |     """get useful data from ddl"""
240 |     with open(file_path, "r") as df:
241 |         return parse_the_ddl(df.read(), **(parser_settings or {})).run(file_path=file_path, **kwargs)
242 | 


--------------------------------------------------------------------------------
/sondesh/dialects/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/dialects/__init__.py


--------------------------------------------------------------------------------
/sondesh/dialects/bigquery.py:
--------------------------------------------------------------------------------
 1 | class BigQuery:
 2 |     def p_expression_options(self, p):
 3 |         """expr : expr multiple_options"""
 4 |         p[0] = p[1]
 5 |         p[1].update(p[2])
 6 | 
 7 |     def p_multiple_options(self, p):
 8 |         """multiple_options : options
 9 |         | multiple_options options
10 |         """
11 |         if len(p) > 2:
12 |             p[1]["options"].extend(p[2]["options"])
13 |             p[0] = p[1]
14 |         else:
15 |             p[0] = p[1]
16 | 
17 |     def p_options(self, p):
18 |         """options : OPTIONS LP id_equals RP"""
19 |         p_list = list(p)
20 |         if not isinstance(p[1], dict):
21 |             p[0] = {"options": p[3]}
22 |         else:
23 |             p[0] = p[1]
24 |             if len(p) == 4:
25 |                 p[0]["options"].append(p_list[-1][0])
26 | 


--------------------------------------------------------------------------------
/sondesh/dialects/hql.py:
--------------------------------------------------------------------------------
  1 | from sondesh.utils import check_spec, remove_par
  2 | 
  3 | 
  4 | class HQL:
  5 |     def p_expression_location(self, p):
  6 |         """expr : expr LOCATION STRING
  7 |         | expr LOCATION DQ_STRING"""
  8 |         p[0] = p[1]
  9 |         p_list = list(p)
 10 |         p[0]["location"] = p_list[-1]
 11 | 
 12 |     def p_expression_clustered(self, p):
 13 |         """expr : expr ID ON LP pid RP
 14 |         |  expr ID BY LP pid RP"""
 15 |         p[0] = p[1]
 16 |         p_list = list(p)
 17 |         p[0][f"{p_list[2].lower()}_{p_list[3].lower()}"] = p_list[-2]
 18 | 
 19 |     def p_expression_into_buckets(self, p):
 20 |         """expr : expr INTO ID ID"""
 21 |         p[0] = p[1]
 22 |         p_list = list(p)
 23 |         p[0][f"{p_list[2].lower()}_{p_list[-1].lower()}"] = p_list[-2]
 24 | 
 25 |     def p_row_format(self, p):
 26 |         """row_format : ROW FORMAT SERDE
 27 |         | ROW FORMAT
 28 |         """
 29 |         p_list = list(p)
 30 |         p[0] = {"serde": p_list[-1] == "SERDE"}
 31 | 
 32 |     def p_expression_row_format(self, p):
 33 |         """expr : expr row_format id
 34 |         | expr row_format STRING
 35 |         """
 36 |         p[0] = p[1]
 37 |         p_list = list(p)
 38 |         if p[2]["serde"]:
 39 |             format = {"serde": True, "java_class": p_list[-1]}
 40 |         else:
 41 |             format = check_spec(p_list[-1])
 42 | 
 43 |         p[0]["row_format"] = format
 44 | 
 45 |     def p_expression_with_serde(self, p):
 46 |         """expr : expr WITH SERDEPROPERTIES multi_assigments"""
 47 |         p[0] = p[1]
 48 |         p_list = list(p)
 49 | 
 50 |         row_format = p[0]["row_format"]
 51 |         row_format["properties"] = p_list[-1]
 52 |         p[0]["row_format"] = row_format
 53 | 
 54 |     def p_expression_tblproperties(self, p):
 55 |         """expr : expr TBLPROPERTIES multi_assigments"""
 56 |         p[0] = p[1]
 57 |         p[0]["tblproperties"] = list(p)[-1]
 58 | 
 59 |     def p_multi_assigments(self, p):
 60 |         """multi_assigments : LP assigment
 61 |         | multi_assigments RP
 62 |         | multi_assigments COMMA assigment"""
 63 |         p_list = remove_par(list(p))
 64 |         p[0] = p_list[1]
 65 |         p[0].update(p_list[-1])
 66 | 
 67 |     def p_assigment(self, p):
 68 |         """assigment : id id id
 69 |         |  STRING id STRING
 70 |         |  id id STRING
 71 |         |  STRING id id
 72 |         |  STRING id"""
 73 |         p_list = remove_par(list(p))
 74 |         if "state" in self.lexer.__dict__:
 75 |             p[0] = {p[1]: self.lexer.state.get(p_list[-1])}
 76 |         else:
 77 |             if "=" in p_list[-1]:
 78 |                 p_list[-1] = p_list[-1].split("=")[-1]
 79 |             p[0] = {p_list[1]: p_list[-1]}
 80 | 
 81 |     def p_expression_comment(self, p):
 82 |         """expr : expr COMMENT STRING"""
 83 |         p[0] = p[1]
 84 |         p_list = list(p)
 85 |         p[0]["comment"] = check_spec(p_list[-1])
 86 | 
 87 |     def p_expression_terminated_by(self, p):
 88 |         """expr : expr id TERMINATED BY id
 89 |         | expr id TERMINATED BY STRING
 90 |         """
 91 |         p[0] = p[1]
 92 |         p_list = list(p)
 93 |         p[0][f"{p[2].lower()}_terminated_by"] = check_spec(p_list[-1])
 94 | 
 95 |     def p_expression_map_keys_terminated_by(self, p):
 96 |         """expr : expr MAP KEYS TERMINATED BY id
 97 |         | expr MAP KEYS TERMINATED BY STRING
 98 |         """
 99 |         p[0] = p[1]
100 |         p_list = list(p)
101 |         p[0]["map_keys_terminated_by"] = check_spec(p_list[-1])
102 | 
103 |     def p_expression_skewed_by(self, p):
104 |         """expr : expr SKEWED BY LP id RP ON LP pid RP"""
105 |         p[0] = p[1]
106 |         p_list = remove_par(list(p))
107 |         p[0]["skewed_by"] = {"key": p_list[4], "on": p_list[-1]}
108 | 
109 |     def p_expression_collection_terminated_by(self, p):
110 |         """expr : expr COLLECTION ITEMS TERMINATED BY id
111 |         | expr COLLECTION ITEMS TERMINATED BY STRING
112 |         """
113 |         p[0] = p[1]
114 |         p_list = list(p)
115 |         p[0]["collection_items_terminated_by"] = check_spec(p_list[-1])
116 | 
117 |     def p_expression_stored_as(self, p):
118 |         """expr : expr STORED AS id
119 |         |  expr STORED AS id STRING
120 |         |  expr STORED AS id STRING id STRING
121 |         """
122 |         p[0] = p[1]
123 |         p_list = list(p)
124 |         if len(p_list) >= 6:
125 |             # only input or output format
126 |             p[0]["stored_as"] = {p_list[-2].lower(): p_list[-1]}
127 |             if len(p_list) == 8:
128 |                 # both input & output
129 |                 p[0]["stored_as"].update({p_list[-4].lower(): p_list[-3]})
130 |         else:
131 |             p[0]["stored_as"] = p_list[-1]
132 | 
133 |     def p_expression_partitioned_by_hql(self, p):
134 |         """expr : expr PARTITIONED BY pid_with_type
135 |         | expr PARTITIONED BY LP pid RP
136 |         | expr PARTITIONED BY LP multiple_funct RP
137 |         """
138 |         p[0] = p[1]
139 |         p_list = remove_par(list(p))
140 |         p[0]["partitioned_by"] = p_list[-1]
141 | 
142 |     def p_pid_with_type(self, p):
143 |         """pid_with_type :  LP column
144 |         | pid_with_type COMMA column
145 |         | pid_with_type RP
146 |         """
147 |         p_list = remove_par(list(p))
148 |         if not isinstance(p_list[1], list):
149 |             p[0] = [p_list[1]]
150 |         else:
151 |             p[0] = p_list[1]
152 |             if len(p_list) > 2:
153 |                 p[0].append(p_list[-1])
154 | 


--------------------------------------------------------------------------------
/sondesh/dialects/mssql.py:
--------------------------------------------------------------------------------
 1 | import sondesh  # noqa: F401 weird issue with failed tests
 2 | 
 3 | 
 4 | class MSSQL:
 5 |     def p_pkey_constraint(self, p):
 6 |         """pkey_constraint : constraint pkey_statement id LP index_pid RP
 7 |         | constraint pkey_statement LP index_pid RP
 8 |         | pkey_constraint with
 9 |         | pkey_constraint with ON id
10 |         """
11 |         p_list = list(p)
12 |         p[0] = p[1]
13 |         if isinstance(p[2], dict) and "with" in p[2]:
14 |             data = p_list[2]
15 |             if "ON" in p_list:
16 |                 data["with"]["on"] = p_list[-1]
17 |         elif len(p_list) == 7:
18 |             data = {"primary_key": True, "columns": p_list[-2], p[3]: True}
19 |         else:
20 |             data = {"primary_key": True, "columns": p_list[-2]}
21 | 
22 |         p[0]["constraint"].update(data)
23 | 
24 |     def p_with(self, p):
25 |         """with : WITH with_args"""
26 |         p_list = list(p)
27 |         p[0] = {"with": {"properties": [], "on": None}}
28 |         if ")" not in p_list:
29 |             p[0]["with"]["properties"] = p_list[-1]["properties"]
30 | 
31 |     def p_equals(self, p):
32 |         """equals : id id id
33 |         | id id ON
34 |         | id id id DOT id
35 |         """
36 |         p_list = list(p)
37 |         if "." in p_list:
38 |             p[0] = {"name": p_list[1], "value": f"{p_list[3]}.{p_list[5]}"}
39 |         else:
40 |             p[0] = {"name": p_list[-3], "value": p_list[-1]}
41 | 
42 |     def p_with_args(self, p):
43 |         """with_args : LP equals
44 |         | with_args COMMA equals
45 |         | with_args with_args
46 |         | with_args RP
47 |         """
48 |         p_list = list(p)
49 |         if isinstance(p[1], dict):
50 |             p[0] = p[1]
51 |         else:
52 |             p[0] = {"properties": []}
53 |         if ")" != p_list[2]:
54 |             if ")" == p_list[-1]:
55 |                 p[0]["properties"].append(p_list[-1])
56 |             else:
57 |                 p[0]["properties"].append(p_list[-1])
58 | 
59 |     def p_period_for(self, p):
60 |         """period_for : id FOR id LP pid RP"""
61 |         p[0] = {"period_for_system_time": p[5]}
62 | 
63 |     def p_expression_on_primary(self, p):
64 |         """expr : expr ON id"""
65 |         p[0] = p[1]
66 |         p[0]["on"] = p[3]
67 | 
68 |     def p_expression_with(self, p):
69 |         """expr : expr with"""
70 |         p[0] = p[1]
71 |         p[0].update(p[2])
72 | 
73 |     def p_expression_text_image_on(self, p):
74 |         """expr : expr TEXTIMAGE_ON id"""
75 |         p[0] = p[1]
76 |         p[0].update({"textimage_on": p[3]})
77 | 


--------------------------------------------------------------------------------
/sondesh/dialects/mysql.py:
--------------------------------------------------------------------------------
 1 | import sondesh  # noqa: F401 weird issue with failed tests
 2 | 
 3 | 
 4 | class MySQL:
 5 |     def p_on_update(self, p):
 6 |         """on_update : ON UPDATE id
 7 |         | ON UPDATE STRING
 8 |         | ON UPDATE f_call
 9 |         """
10 |         p_list = list(p)
11 |         if not ")" == p_list[-1]:
12 |             p[0] = {"on_update": p_list[-1]}
13 |         else:
14 |             p[0] = {"on_update": p_list[-2]}
15 | 


--------------------------------------------------------------------------------
/sondesh/dialects/oracle.py:
--------------------------------------------------------------------------------
 1 | from sondesh.utils import remove_par
 2 | 
 3 | 
 4 | class Oracle:
 5 |     def p_encrypt(self, p):
 6 |         """encrypt : ENCRYPT
 7 |         | encrypt NO SALT
 8 |         | encrypt SALT
 9 |         | encrypt USING STRING
10 |         | encrypt STRING
11 |         """
12 |         p_list = list(p)
13 |         if isinstance(p[1], dict):
14 |             p[0] = p[1]
15 |             if "NO" in p_list:
16 |                 p[0]["encrypt"]["salt"] = False
17 |             elif "USING" in p_list:
18 |                 p[0]["encrypt"]["encryption_algorithm"] = p_list[-1]
19 |             elif "SALT" not in p_list:
20 |                 p[0]["encrypt"]["integrity_algorithm"] = p_list[-1]
21 | 
22 |         else:
23 |             p[0] = {
24 |                 "encrypt": {
25 |                     "salt": True,
26 |                     "encryption_algorithm": "'AES192'",
27 |                     "integrity_algorithm": "SHA-1",
28 |                 }
29 |             }
30 | 
31 |     def p_storage(self, p):
32 |         """storage : STORAGE LP
33 |         | storage id id
34 |         | storage id id RP
35 |         """
36 |         # Initial 5m Next 5m Maxextents Unlimited
37 |         p_list = remove_par(list(p))
38 |         param = {}
39 |         if len(p_list) == 4:
40 |             param = {p_list[2].lower(): p_list[3]}
41 |         if isinstance(p_list[1], dict):
42 |             p[0] = p[1]
43 |         else:
44 |             p[0] = {}
45 |         p[0].update(param)
46 | 
47 |     def p_expr_storage(self, p):
48 |         """expr : expr storage"""
49 |         p_list = list(p)
50 |         p[0] = p[1]
51 |         p[0]["storage"] = p_list[-1]
52 | 
53 |     def p_expr_index(self, p):
54 |         """expr : expr ID INDEX"""
55 |         p[0] = p[1]
56 |         p[0][f"{p[2].lower()}_index"] = True
57 | 


--------------------------------------------------------------------------------
/sondesh/dialects/redshift.py:
--------------------------------------------------------------------------------
 1 | class Redshift:
 2 |     def p_expression_distkey(self, p):
 3 |         """expr : expr id LP id RP"""
 4 |         p_list = list(p)
 5 |         p[1].update({"distkey": p_list[-2]})
 6 |         p[0] = p[1]
 7 | 
 8 |     def p_encode(self, p):
 9 |         """encode : ENCODE id"""
10 |         p_list = list(p)
11 |         p[0] = {"encode": p_list[-1]}
12 | 
13 |     def p_expression_diststyle(self, p):
14 |         """expr : expr id id
15 |         | expr id KEY
16 |         """
17 |         p_list = list(p)
18 |         p[1].update({p_list[-2]: p_list[-1]})
19 |         p[0] = p[1]
20 | 
21 |     def p_expression_sortkey(self, p):
22 |         """expr : expr id id LP pid RP"""
23 |         p_list = list(p)
24 |         p[1].update({"sortkey": {"type": p_list[2], "keys": p_list[-2]}})
25 |         p[0] = p[1]
26 | 


--------------------------------------------------------------------------------
/sondesh/dialects/snowflake.py:
--------------------------------------------------------------------------------
 1 | from sondesh.utils import remove_par
 2 | 
 3 | 
 4 | class Snowflake:
 5 |     def p_clone(self, p):
 6 |         """clone : CLONE id"""
 7 |         p_list = list(p)
 8 |         p[0] = {"clone": {"from": p_list[-1]}}
 9 | 
10 |     def p_expression_cluster_by(self, p):
11 |         """expr : expr CLUSTER BY LP pid RP
12 |         | expr CLUSTER BY pid
13 |         """
14 |         p[0] = p[1]
15 |         p_list = remove_par(list(p))
16 |         p[0]["cluster_by"] = p_list[-1]
17 | 
18 |     def p_table_comment(self, p):
19 |         """expr : expr option_comment
20 |         """
21 |         p[0] = p[1]
22 |         if p[2]:
23 |             p[0].update(p[2])
24 | 
25 |     def p_option_comment(self, p):
26 |         """option_comment : ID STRING
27 |         | ID DQ_STRING
28 |         | COMMENT ID STRING
29 |         | COMMENT ID DQ_STRING
30 |         """
31 |         p_list = remove_par(list(p))
32 |         if "comment" in p[1].lower():
33 |             p[0] = {"comment": p_list[-1]}
34 | 


--------------------------------------------------------------------------------
/sondesh/dialects/spark_sql.py:
--------------------------------------------------------------------------------
 1 | class SparkSQL:
 2 |     def p_expression_using(self, p):
 3 |         """expr : expr using"""
 4 |         p[0] = p[1]
 5 |         p[1].update(p[2])
 6 | 
 7 |     def p_using(self, p):
 8 |         """using : USING id"""
 9 |         p_list = list(p)
10 |         p[0] = {"using": p_list[-1]}
11 | 


--------------------------------------------------------------------------------
/sondesh/dialects/sql.py:
--------------------------------------------------------------------------------
   1 | import re
   2 | from collections import defaultdict
   3 | from copy import deepcopy
   4 | from typing import Any, Dict, List, Optional, Tuple, Union
   5 | 
   6 | from sondesh.utils import check_spec, remove_par
   7 | 
   8 | auth = "AUTHORIZATION"
   9 | 
  10 | 
  11 | class AfterColumns:
  12 |     def p_expression_partition_by(self, p: List) -> None:
  13 |         """expr : expr PARTITION BY LP pid RP
  14 |         | expr PARTITION BY id LP pid RP
  15 |         | expr PARTITION BY pid
  16 |         | expr PARTITION BY id pid"""
  17 |         p[0] = p[1]
  18 |         p_list = list(p)
  19 |         _type = None
  20 |         if isinstance(p[4], list):
  21 |             columns = p[4]
  22 |         else:
  23 |             columns = p_list[-2]
  24 |         if isinstance(p[4], str) and p[4].lower() != "(":
  25 |             _type = p[4]
  26 |         p[0]["partition_by"] = {"columns": columns, "type": _type}
  27 | 
  28 | 
  29 | class Database:
  30 |     def p_expression_create_database(self, p: List) -> None:
  31 |         """expr : expr database_base"""
  32 |         p[0] = p[1]
  33 |         p_list = list(p)
  34 |         p[0].update(p_list[-1])
  35 | 
  36 |     def p_database_base(self, p: List) -> None:
  37 |         """database_base : CREATE DATABASE id
  38 |         | CREATE ID DATABASE id
  39 |         | database_base clone
  40 |         """
  41 |         if isinstance(p[1], dict):
  42 |             p[0] = p[1]
  43 |         else:
  44 |             p[0] = {}
  45 |         p_list = list(p)
  46 |         if isinstance(p_list[-1], dict):
  47 |             p[0].update(p_list[-1])
  48 |         else:
  49 |             p[0]["database_name"] = p_list[-1]
  50 |         if len(p_list) == 5:
  51 |             p[0][p[2].lower()] = True
  52 | 
  53 | 
  54 | class TableSpaces:
  55 |     @staticmethod
  56 |     def get_tablespace_data(p_list):
  57 |         if p_list[1] == "TABLESPACE":
  58 |             _type = None
  59 |             temp = False
  60 |         else:
  61 |             if p_list[1].upper() == "TEMPORARY":
  62 |                 _type = None
  63 |                 temp = True
  64 |             else:
  65 |                 _type = p_list[1]
  66 |                 if p_list[2].upper() == "TEMPORARY":
  67 |                     temp = True
  68 |                 else:
  69 |                     temp = False
  70 |         if isinstance(p_list[-1], dict):
  71 |             properties = p_list[-1]
  72 |             tablespace_name = p_list[-2]
  73 |         else:
  74 |             properties = None
  75 |             tablespace_name = p_list[-1]
  76 |         result = {
  77 |             "tablespace_name": tablespace_name,
  78 |             "properties": properties,
  79 |             "type": _type,
  80 |             "temporary": temp,
  81 |         }
  82 |         return result
  83 | 
  84 |     def p_expression_create_tablespace(self, p: List) -> None:
  85 |         """expr : CREATE TABLESPACE id properties
  86 |         | CREATE id TABLESPACE id properties
  87 |         | CREATE id TABLESPACE id
  88 |         | CREATE TABLESPACE id
  89 |         | CREATE id id TABLESPACE id
  90 |         | CREATE id id TABLESPACE id properties
  91 |         """
  92 |         p_list = list(p)
  93 |         p[0] = self.get_tablespace_data(p_list[1:])
  94 | 
  95 |     def p_properties(self, p: List) -> None:
  96 |         """properties : property
  97 |         | properties property"""
  98 |         p_list = list(p)
  99 |         if len(p_list) == 3:
 100 |             p[0] = p[1]
 101 |             p[0].update(p[2])
 102 |         else:
 103 |             p[0] = p[1]
 104 | 
 105 |     def p_property(self, p: List) -> None:
 106 |         """property : id id
 107 |         | id STRING
 108 |         | id ON
 109 |         | id STORAGE
 110 |         | id ROW
 111 |         """
 112 |         p[0] = {p[1]: p[2]}
 113 | 
 114 | 
 115 | class Table:
 116 |     @staticmethod
 117 |     def add_if_not_exists(data: Dict, p_list: List):
 118 |         if "EXISTS" in p_list:
 119 |             data["if_not_exists"] = True
 120 |         return data
 121 | 
 122 |     def p_create_table(self, p: List):
 123 |         """create_table : CREATE TABLE IF NOT EXISTS
 124 |         | CREATE TABLE
 125 |         | CREATE OR REPLACE TABLE IF NOT EXISTS
 126 |         | CREATE OR REPLACE TABLE
 127 |         | CREATE id TABLE IF NOT EXISTS
 128 |         | CREATE id TABLE
 129 |         | CREATE OR REPLACE id TABLE IF NOT EXISTS
 130 |         | CREATE OR REPLACE id TABLE
 131 | 
 132 |         """
 133 |         # id - for EXTERNAL, TRANSIENT, TEMPORARY
 134 |         # get schema & table name
 135 |         p[0] = {}
 136 |         p_list = list(p)
 137 |         self.add_if_not_exists(p[0], p_list)
 138 | 
 139 |         if 'REPLACE' in p_list:
 140 |             p[0]["replace"] = True
 141 | 
 142 |         id_key = p_list[4] if 'REPLACE' in p_list else p_list[2]
 143 |         id_key = id_key.upper()
 144 | 
 145 |         if id_key in ["EXTERNAL", "TRANSIENT"]:
 146 |             p[0][id_key.lower()] = True
 147 |         elif id_key in ["TEMP", "TEMPORARY"]:
 148 |             p[0]["temp"] = True
 149 | 
 150 | 
 151 | class Column:
 152 |     def p_column_property(self, p: List):
 153 |         """c_property : id id"""
 154 |         p_list = list(p)
 155 |         if p[1].lower() == "auto":
 156 |             p[0] = {"increment": True}
 157 |         else:
 158 |             p[0] = {"property": {p_list[1]: p_list[-1]}}
 159 | 
 160 |     def set_base_column_propery(self, p: List) -> Dict:
 161 | 
 162 |         if "." in list(p):
 163 |             type_str = f"{p[2]}.{p[4]}"
 164 |         else:
 165 |             type_str = p[2]
 166 |         if isinstance(p[1], dict):
 167 |             p[0] = p[1]
 168 |         else:
 169 |             size = None
 170 |             p[0] = {"name": p[1], "type": type_str, "size": size}
 171 |         return p[0]
 172 | 
 173 |     @staticmethod
 174 |     def parse_complex_type(p_list: List[str]) -> str:
 175 |         # for complex <> types
 176 |         start_index = 1
 177 |         _type = ""
 178 |         if isinstance(p_list[1], dict):
 179 |             _type = p_list[1]["type"]
 180 |             start_index = 2
 181 |         for elem in p_list[start_index:]:
 182 |             if isinstance(elem, list):
 183 |                 for _elem in elem:
 184 |                     _type += f" {_elem.rstrip()}"
 185 |             elif "ARRAY" in elem and elem != "ARRAY":
 186 |                 _type += elem
 187 |             else:
 188 |                 _type += f" {elem}"
 189 |         return _type
 190 | 
 191 |     def p_c_type(self, p: List) -> None:
 192 |         """c_type : id
 193 |         | id id
 194 |         | id id id id
 195 |         | id id id
 196 |         | id DOT id
 197 |         | tid
 198 |         | ARRAY
 199 |         | c_type ARRAY
 200 |         | c_type tid
 201 |         """
 202 |         p[0] = {}
 203 |         p_list = remove_par(list(p))
 204 |         _type = None
 205 | 
 206 |         if len(p_list) == 2:
 207 |             _type = p_list[-1]
 208 |         elif isinstance(p[1], str) and p[1].lower() == "encode":
 209 |             p[0] = {"property": {"encode": p[2]}}
 210 |         else:
 211 |             _type = self.parse_complex_type(p_list)
 212 |         if _type:
 213 |             _type = self.process_type(_type, p_list, p)
 214 |         p[0]["type"] = _type
 215 | 
 216 |     def process_type(self, _type: Union[str, List], p_list: List, p: List) -> str:
 217 | 
 218 |         if isinstance(_type, list):
 219 |             _type = _type[0]
 220 | 
 221 |         elif isinstance(p_list[-1], str) and p_list[-1].lower() == "distkey":
 222 |             p[0] = {"property": {"distkey": True}}
 223 |             _type = _type.split("distkey")[0]
 224 | 
 225 |         _type = _type.strip().replace('" . "', '"."')
 226 | 
 227 |         _type = self.process_array_types(_type, p_list)
 228 |         return _type
 229 | 
 230 |     @staticmethod
 231 |     def process_array_types(_type: str, p_list: List) -> str:
 232 |         if "<" not in _type and "ARRAY" in _type:
 233 |             if "[" not in p_list[-1]:
 234 |                 _type = _type.replace(" ARRAY", "[]").replace("ARRAY", "[]")
 235 |             else:
 236 |                 _type = _type.replace("ARRAY", "")
 237 |         elif "<" in _type and "[]" in _type:
 238 |             _type = _type.replace("[]", "ARRAY")
 239 |         return _type
 240 | 
 241 |     @staticmethod
 242 |     def get_size(p_list: List):
 243 |         if p_list[-1].isnumeric():
 244 |             size = int(p_list[-1])
 245 |         else:
 246 |             size = p_list[-1]
 247 |         if len(p_list) != 3:
 248 |             if p_list[-3] != "*":
 249 |                 # oracle can contain * in column size
 250 |                 try:
 251 |                     value_0 = int(p_list[-3])
 252 |                 except ValueError:
 253 |                     # we have column like p Geometry(MultiPolygon, 26918)
 254 |                     value_0 = p_list[-3]
 255 |             else:
 256 |                 value_0 = p_list[-3]
 257 |             size = (value_0, int(p_list[-1]))
 258 |         return size
 259 | 
 260 |     @staticmethod
 261 |     def get_column_details(p_list: List, p: List):
 262 |         if p_list[-1].get("type"):
 263 |             p[0]["type"] += f"{p_list[-1]['type'].strip()}"
 264 |         elif p_list[-1].get("comment"):
 265 |             p[0].update(p_list[-1])
 266 |         elif p_list[-1].get("property"):
 267 |             for key, value in p_list[-1]["property"].items():
 268 |                 p[0][key] = value
 269 |         p_list.pop(-1)
 270 | 
 271 |     @staticmethod
 272 |     def check_type_parameter(size: Union[tuple, int]) -> bool:
 273 |         if isinstance(size, tuple) and not (
 274 |                 isinstance(size[0], str) and size[0].strip() == '*') and not (
 275 |                     isinstance(size[0], int) or isinstance(size[0], float)):
 276 |             return True
 277 |         return False
 278 | 
 279 |     @staticmethod
 280 |     def process_oracle_type_size(p_list):
 281 |         if p_list[-1] == ')' and p_list[-4] == '(':
 282 |             # for Oracle sizes like 30 CHAR
 283 |             p_list[-3] += f" {p_list[-2]}"
 284 |             del p_list[-2]
 285 |         return p_list
 286 | 
 287 |     def p_column(self, p: List) -> None:
 288 |         """column : id c_type
 289 |         | column comment
 290 |         | column LP id RP
 291 |         | column LP id id RP
 292 |         | column LP id RP c_type
 293 |         | column LP id COMMA id RP
 294 |         | column LP id COMMA id RP c_type
 295 |         """
 296 |         p[0] = self.set_base_column_propery(p)
 297 |         p_list = list(p)
 298 | 
 299 |         p_list = self.process_oracle_type_size(p_list)
 300 | 
 301 |         p_list = remove_par(p_list)
 302 | 
 303 |         if isinstance(p_list[-1], dict) and "type" in p_list[-1] and len(p_list) <= 3:
 304 |             p[0]["type"] = p_list[-1]["type"]
 305 |             if p_list[-1].get("property"):
 306 |                 for key, value in p_list[-1]["property"].items():
 307 |                     p[0][key] = value
 308 |         elif isinstance(p_list[-1], dict):
 309 |             self.get_column_details(p_list, p)
 310 |         self.set_column_size(p_list, p)
 311 | 
 312 |     def set_column_size(self, p_list: List, p: List):
 313 |         if (
 314 |             not isinstance(p_list[-1], dict)
 315 |             and bool(re.match(r"[0-9]+", p_list[-1]))
 316 |             or p_list[-1] == "max"
 317 |         ):
 318 |             size = self.get_size(p_list)
 319 |             if self.check_type_parameter(size):
 320 |                 p[0]["type_parameters"] = size
 321 |             else:
 322 |                 p[0]["size"] = size
 323 | 
 324 |     @staticmethod
 325 |     def set_property(p: List) -> List:
 326 |         for item in p[1:]:
 327 |             if isinstance(item, dict):
 328 |                 if "property" in item:
 329 |                     for key, value in item["property"].items():
 330 |                         p[0][key] = value
 331 |                     del item["property"]
 332 |                 p[0].update(item)
 333 |         return p
 334 | 
 335 |     @staticmethod
 336 |     def get_column_properties(p_list: List) -> Tuple:
 337 |         pk = False
 338 |         nullable = True
 339 |         default = None
 340 |         unique = False
 341 |         references = None
 342 |         if isinstance(p_list[-1], str):
 343 |             if p_list[-1].upper() == "KEY":
 344 |                 pk = True
 345 |                 nullable = False
 346 |             elif p_list[-1].upper() == "UNIQUE":
 347 |                 unique = True
 348 |         elif isinstance(p_list[-1], dict) and "references" in p_list[-1]:
 349 |             p_list[-1]["references"]["column"] = p_list[-1]["references"]["columns"][0]
 350 |             del p_list[-1]["references"]["columns"]
 351 |             references = p_list[-1]["references"]
 352 |         return pk, default, unique, references, nullable
 353 | 
 354 |     def p_autoincrement(self, p: List) -> None:
 355 |         """ autoincrement : AUTOINCREMENT"""
 356 |         p[0] = {"autoincrement": True}
 357 | 
 358 |     def p_defcolumn(self, p: List) -> None:
 359 |         """defcolumn : column
 360 |         | defcolumn comment
 361 |         | defcolumn null
 362 |         | defcolumn encode
 363 |         | defcolumn PRIMARY KEY
 364 |         | defcolumn UNIQUE KEY
 365 |         | defcolumn UNIQUE
 366 |         | defcolumn check_ex
 367 |         | defcolumn default
 368 |         | defcolumn collate
 369 |         | defcolumn enforced
 370 |         | defcolumn ref
 371 |         | defcolumn foreign ref
 372 |         | defcolumn encrypt
 373 |         | defcolumn generated
 374 |         | defcolumn c_property
 375 |         | defcolumn on_update
 376 |         | defcolumn options
 377 |         | defcolumn autoincrement
 378 |         """
 379 |         p[0] = p[1]
 380 |         p_list = list(p)
 381 | 
 382 |         pk, default, unique, references, nullable = self.get_column_properties(p_list)
 383 | 
 384 |         self.set_property(p)
 385 | 
 386 |         p[0]["references"] = p[0].get("references", references)
 387 |         p[0]["unique"] = unique or p[0].get("unique", unique)
 388 |         p[0]["primary_key"] = pk or p[0].get("primary_key", pk)
 389 |         p[0]["nullable"] = (
 390 |             nullable if nullable is not True else p[0].get("nullable", nullable)
 391 |         )
 392 |         p[0]["default"] = p[0].get("default", default)
 393 |         p[0]["check"] = p[0].get("check", None)
 394 |         if isinstance(p_list[-1], dict) and p_list[-1].get("encode"):
 395 |             p[0]["encode"] = p[0].get("encode", p_list[-1]["encode"])
 396 |         p[0]["check"] = self.set_check_in_columm(p[0].get("check"))
 397 | 
 398 |     @staticmethod
 399 |     def set_check_in_columm(check: Optional[List]) -> Optional[str]:
 400 |         if check:
 401 |             check_statement = ""
 402 |             for n, item in enumerate(check):
 403 |                 if isinstance(item, list):
 404 |                     in_clause = ", ".join(item)
 405 |                     check_statement += f" ({in_clause})"
 406 |                 else:
 407 |                     check_statement += f" {item}" if n > 0 else f"{item}"
 408 | 
 409 |             return check_statement
 410 | 
 411 |     def p_check_ex(self, p: List) -> None:
 412 |         """check_ex :  check_st
 413 |         | constraint check_st
 414 |         """
 415 |         name = None
 416 |         if isinstance(p[1], dict):
 417 |             if "constraint" in p[1]:
 418 |                 p[0] = {
 419 |                     "check": {
 420 |                         "constraint_name": p[1]["constraint"]["name"],
 421 |                         "statement": " ".join(p[2]["check"]),
 422 |                     }
 423 |                 }
 424 |             elif "check" in p[1]:
 425 |                 p[0] = p[1]
 426 |                 if isinstance(p[1], list):
 427 |                     p[0] = {
 428 |                         "check": {"constraint_name": name, "statement": p[1]["check"]}
 429 |                     }
 430 |                 if len(p) >= 3:
 431 |                     for item in list(p)[2:]:
 432 |                         p[0]["check"]["statement"].append(item)
 433 |         else:
 434 |             p[0] = {"check": {"statement": [p[2]], "constraint_name": name}}
 435 | 
 436 | 
 437 | class Schema:
 438 |     def p_expression_schema(self, p: List) -> None:
 439 |         """expr : create_schema
 440 |         | create_database
 441 |         | expr id
 442 |         | expr clone
 443 |         """
 444 |         p[0] = p[1]
 445 |         p_list = list(p)
 446 | 
 447 |         if isinstance(p_list[-1], dict):
 448 |             p[0].update(p_list[-1])
 449 |         elif len(p) > 2:
 450 |             p[0]["authorization"] = p[2]
 451 | 
 452 |     def set_properties_for_schema_and_database(self, p: List, p_list: List) -> None:
 453 |         if not p[0].get("properties"):
 454 |             if len(p_list) == 3:
 455 |                 properties = p_list[-1]
 456 |             elif len(p_list) > 3:
 457 |                 properties = {p_list[-3]: p_list[-1]}
 458 |             else:
 459 |                 properties = {}
 460 |             if properties:
 461 |                 p[0]["properties"] = properties
 462 |         else:
 463 |             p[0]["properties"].update({p_list[-3]: p_list[-1]})
 464 | 
 465 |     def set_auth_property_in_schema(self, p: List, p_list: List) -> None:
 466 |         if p_list[2] == auth:
 467 |             p[0] = {"schema_name": p_list[3], auth.lower(): p_list[3]}
 468 |         else:
 469 |             p[0] = {"schema_name": p_list[2], auth.lower(): p_list[-1]}
 470 | 
 471 |     def p_c_schema(self, p: List) -> None:
 472 |         """c_schema : CREATE SCHEMA
 473 |         | CREATE ID SCHEMA"""
 474 | 
 475 |         if len(p) == 4:
 476 |             p[0] = {"remote": True}
 477 | 
 478 |     def p_create_schema(self, p: List) -> None:
 479 |         """create_schema : c_schema id id
 480 |         | c_schema id id id
 481 |         | c_schema id
 482 |         | c_schema id DOT id
 483 |         | c_schema id option_comment
 484 |         | c_schema id DOT id option_comment
 485 |         | c_schema IF NOT EXISTS id
 486 |         | c_schema IF NOT EXISTS id DOT id
 487 |         | create_schema id id id
 488 |         | create_schema id id STRING
 489 |         | create_schema options
 490 |         """
 491 |         p_list = list(p)
 492 | 
 493 |         p[0] = {}
 494 |         auth_index = None
 495 | 
 496 |         if "comment" in p_list[-1]:
 497 |             p[0].update(p_list[-1])
 498 |             del p_list[-1]
 499 | 
 500 |         self.add_if_not_exists(p[0], p_list)
 501 |         if isinstance(p_list[1], dict):
 502 |             p[0] = p_list[1]
 503 |             self.set_properties_for_schema_and_database(p, p_list)
 504 |         elif auth in p_list:
 505 |             auth_index = p_list.index(auth)
 506 |             self.set_auth_property_in_schema(p, p_list)
 507 | 
 508 |         if isinstance(p_list[-1], str):
 509 |             if auth_index:
 510 |                 schema_name = p_list[auth_index - 1]
 511 |                 if schema_name is None:
 512 |                     schema_name = p_list[auth_index + 1]
 513 |             else:
 514 |                 schema_name = p_list[-1]
 515 |             p[0]["schema_name"] = schema_name.replace("`", "")
 516 | 
 517 |         p[0] = self.set_project_in_schema(p[0], p_list, auth_index)
 518 | 
 519 |     @staticmethod
 520 |     def set_project_in_schema(data: Dict, p_list: List, auth_index: int) -> Dict:
 521 |         if len(p_list) > 4 and not auth_index and "." in p_list:
 522 |             data["project"] = p_list[-3].replace("`", "")
 523 |         return data
 524 | 
 525 |     def p_create_database(self, p: List) -> None:
 526 |         """create_database : database_base
 527 |         | create_database id id id
 528 |         | create_database id id STRING
 529 |         | create_database options
 530 |         """
 531 |         p_list = list(p)
 532 | 
 533 |         if isinstance(p_list[1], dict):
 534 |             p[0] = p_list[1]
 535 |             self.set_properties_for_schema_and_database(p, p_list)
 536 |         else:
 537 |             p[0] = {f"{p[2].lower()}_name": p_list[-1]}
 538 | 
 539 | 
 540 | class Drop:
 541 |     def p_expression_drop_table(self, p: List) -> None:
 542 |         """expr : DROP TABLE id
 543 |         | DROP TABLE id DOT id
 544 |         """
 545 |         # get schema & table name
 546 |         p_list = list(p)
 547 |         schema = None
 548 |         if len(p) > 4:
 549 |             if "." in p:
 550 |                 schema = p_list[-3]
 551 |                 table_name = p_list[-1]
 552 |         else:
 553 |             table_name = p_list[-1]
 554 |         p[0] = {"schema": schema, "table_name": table_name}
 555 | 
 556 | 
 557 | class Type:
 558 |     def p_multiple_column_names(self, p: List) -> None:
 559 |         """multiple_column_names : column
 560 |         | multiple_column_names COMMA
 561 |         | multiple_column_names column
 562 |         """
 563 |         p_list = list(p)
 564 |         if isinstance(p[1], dict):
 565 |             p[0] = [p[1]]
 566 |         else:
 567 |             p[0] = p[1]
 568 |             if p_list[-1] != ",":
 569 |                 p[0].append(p_list[-1])
 570 | 
 571 |     @staticmethod
 572 |     def add_columns_property_for_type(data: Dict, p_list: List) -> Dict:
 573 |         if "TABLE" in p_list or isinstance(p_list[-1], dict) and p_list[-1].get("name"):
 574 |             if not data["properties"].get("columns"):
 575 |                 data["properties"]["columns"] = []
 576 |             data["properties"]["columns"].append(p_list[-1])
 577 |         return data
 578 | 
 579 |     @staticmethod
 580 |     def set_base_type(data: Dict, p_list: List) -> Dict:
 581 |         if len(p_list) > 3:
 582 |             data["base_type"] = p_list[2]
 583 |         else:
 584 |             data["base_type"] = None
 585 |         return data
 586 | 
 587 |     @staticmethod
 588 |     def process_str_base_type(data: Dict, p_list: List) -> Dict:
 589 |         base_type = data["base_type"].upper()
 590 |         if base_type == "ENUM":
 591 |             data["properties"]["values"] = p_list[3]
 592 |         elif data["base_type"] == "OBJECT":
 593 |             if "type" in p_list[3][0]:
 594 |                 data["properties"]["attributes"] = p_list[3]
 595 |         return data
 596 | 
 597 |     def p_type_definition(self, p: List) -> None:  # noqa: C901
 598 |         """type_definition : type_name id LP pid RP
 599 |         | type_name id LP multiple_column_names RP
 600 |         | type_name LP id_equals RP
 601 |         | type_name TABLE LP defcolumn
 602 |         | type_definition COMMA defcolumn
 603 |         | type_definition RP
 604 |         """
 605 |         p_list = remove_par(list(p))
 606 |         p[0] = p[1]
 607 |         if not p[0].get("properties"):
 608 |             p[0]["properties"] = {}
 609 | 
 610 |         p[0] = self.add_columns_property_for_type(p[0], p_list)
 611 | 
 612 |         p[0] = self.set_base_type(p[0], p_list)
 613 | 
 614 |         if isinstance(p[0]["base_type"], str):
 615 |             p[0] = self.process_str_base_type(p[0], p_list)
 616 |         elif isinstance(p_list[-1], list):
 617 |             for item in p_list[-1]:
 618 |                 p[0]["properties"].update(item)
 619 | 
 620 |     def p_expression_type_as(self, p: List) -> None:
 621 |         """expr : type_definition"""
 622 |         p[0] = p[1]
 623 | 
 624 |     def p_type_name(self, p: List) -> None:
 625 |         """type_name : type_create id AS
 626 |         | type_create id DOT id AS
 627 |         | type_create id DOT id
 628 |         | type_create id
 629 |         """
 630 |         p_list = list(p)
 631 |         p[0] = {}
 632 |         if "." not in p_list:
 633 |             p[0]["schema"] = None
 634 |             p[0]["type_name"] = p_list[2]
 635 |         else:
 636 |             p[0]["schema"] = p[2]
 637 |             p[0]["type_name"] = p_list[4]
 638 | 
 639 |     def p_type_create(self, p: List) -> None:
 640 |         """type_create : CREATE TYPE
 641 |         | CREATE OR REPLACE TYPE
 642 |         """
 643 |         p[0] = None
 644 | 
 645 | 
 646 | class Domain:
 647 |     def p_expression_domain_as(self, p: List) -> None:
 648 |         """expr : domain_name id LP pid RP"""
 649 |         p_list = list(p)
 650 |         p[0] = p[1]
 651 |         p[0]["base_type"] = p[2]
 652 |         p[0]["properties"] = {}
 653 |         if p[0]["base_type"] == "ENUM":
 654 |             p[0]["properties"]["values"] = p_list[4]
 655 | 
 656 |     def p_domain_name(self, p: List) -> None:
 657 |         """domain_name : CREATE DOMAIN id AS
 658 |         | CREATE DOMAIN id DOT id AS
 659 |         | CREATE DOMAIN id DOT id
 660 |         | CREATE DOMAIN id
 661 |         """
 662 |         p_list = list(p)
 663 |         p[0] = {}
 664 |         if "." not in p_list:
 665 |             p[0]["schema"] = None
 666 |         else:
 667 |             p[0]["schema"] = p[3]
 668 |         p[0]["domain_name"] = p_list[-2]
 669 | 
 670 | 
 671 | class BaseSQL(
 672 |     Database, Table, Drop, Domain, Column, AfterColumns, Type, Schema, TableSpaces
 673 | ):
 674 |     def clean_up_id_list_in_equal(self, p_list: List) -> List:  # noqa R701
 675 |         if isinstance(p_list[1], str) and p_list[1].endswith("="):
 676 |             p_list[1] = p_list[1][:-1]
 677 |         elif "," in p_list:
 678 |             if len(p_list) == 4:
 679 |                 p_list = p_list[-1].split("=")
 680 |             elif len(p_list) == 5 and p_list[-2].endswith("="):
 681 |                 p_list[-2] = p_list[-2][:-1]
 682 |         elif "=" == p_list[-2]:
 683 |             p_list.pop(-2)
 684 |         return p_list
 685 | 
 686 |     def get_property(self, p_list: List) -> Dict:
 687 |         _property = None
 688 |         if not isinstance(p_list[-2], list):
 689 |             _value = True
 690 |             value = None
 691 |             if p_list[-2]:
 692 |                 if not p_list[-2] == "=":
 693 |                     key = p_list[-2]
 694 |                 else:
 695 |                     key = p_list[-3]
 696 | 
 697 |             else:
 698 |                 _value = False
 699 |                 key = p_list[-1]
 700 |             if "=" in key:
 701 |                 key = key.split("=")
 702 |                 if _value:
 703 |                     value = f"{key[1]} {p_list[-1]}"
 704 |                 key = key[0]
 705 |             else:
 706 |                 value = p_list[-1]
 707 |             _property = {key: value}
 708 |         else:
 709 |             _property = p_list[-2][0]
 710 |         return _property
 711 | 
 712 |     def p_id_equals(self, p: List) -> None:
 713 |         """id_equals : id id id_or_string
 714 |         | id id_or_string
 715 |         | id_equals COMMA
 716 |         | id_equals COMMA id id id_or_string
 717 |         | id
 718 |         | id_equals LP pid RP
 719 |         | id_equals LP pid RP id
 720 |         | id_equals COMMA id id
 721 |         | id_equals COMMA id
 722 |         """
 723 |         p_list = remove_par(list(p))
 724 |         if p_list[-1] == "]":
 725 |             p_list = p_list[:-1]
 726 |         if isinstance(p_list[-1], list):
 727 |             p[0] = p[1]
 728 |             p[0][-1][list(p[0][-1].keys())[0]] = p_list[-1]
 729 |         else:
 730 |             p_list = self.clean_up_id_list_in_equal(p_list)
 731 |             _property = self.get_property(p_list)
 732 | 
 733 |             if _property:
 734 |                 if not isinstance(p[1], list):
 735 |                     p[0] = [_property]
 736 |                 else:
 737 |                     p[0] = p[1]
 738 |                     if not p_list[-1] == ",":
 739 |                         p[0].append(_property)
 740 | 
 741 |     def p_expression_index(self, p: List) -> None:
 742 |         """expr : index_table_name LP index_pid RP"""
 743 |         p_list = remove_par(list(p))
 744 |         p[0] = p[1]
 745 |         for item in ["detailed_columns", "columns"]:
 746 |             if item not in p[0]:
 747 |                 p[0][item] = p_list[-1][item]
 748 |             else:
 749 |                 p[0][item].extend(p_list[-1][item])
 750 | 
 751 |     def p_index_table_name(self, p: List) -> None:
 752 |         """index_table_name : create_index ON id
 753 |         | create_index ON id DOT id
 754 |         """
 755 |         p[0] = p[1]
 756 |         p_list = list(p)
 757 |         schema = None
 758 |         if "." in p_list:
 759 |             schema = p_list[-3]
 760 |             table_name = p_list[-1]
 761 |         else:
 762 |             table_name = p_list[-1]
 763 |         p[0].update({"schema": schema, "table_name": table_name})
 764 | 
 765 |     def p_create_index(self, p: List) -> None:
 766 |         """create_index : CREATE INDEX id
 767 |         | CREATE UNIQUE INDEX id
 768 |         | create_index ON id
 769 |         | CREATE CLUSTERED INDEX id
 770 |         """
 771 |         p_list = list(p)
 772 |         if "CLUSTERED" in p_list:
 773 |             clustered = True
 774 |         else:
 775 |             clustered = False
 776 |         if isinstance(p[1], dict):
 777 |             p[0] = p[1]
 778 |         else:
 779 |             p[0] = {
 780 |                 "schema": None,
 781 |                 "index_name": p_list[-1],
 782 |                 "unique": "UNIQUE" in p_list,
 783 |                 "clustered": clustered,
 784 |             }
 785 | 
 786 |     def extract_check_data(self, p, p_list):
 787 |         if isinstance(p_list[-1]["check"], list):
 788 |             check = " ".join(p_list[-1]["check"])
 789 |             if isinstance(check, str):
 790 |                 check = {"constraint_name": None, "statement": check}
 791 |         else:
 792 |             check = p_list[-1]["check"]
 793 |             p[0] = self.set_constraint(p[0], "checks", check, check["constraint_name"])
 794 |         if not p[0].get("checks"):
 795 |             p[0]["checks"] = []
 796 |         p[0]["checks"].append(check)
 797 |         return p[0]
 798 | 
 799 |     def p_expression_table(self, p: List) -> None:  # noqa R701
 800 |         """expr : table_name defcolumn
 801 |         | table_name LP defcolumn
 802 |         | table_name
 803 |         | expr COMMA defcolumn
 804 |         | expr COMMA
 805 |         | expr COMMA constraint
 806 |         | expr COMMA check_ex
 807 |         | expr COMMA foreign
 808 |         | expr COMMA pkey
 809 |         | expr COMMA uniq
 810 |         | expr COMMA statem_by_id
 811 |         | expr COMMA constraint uniq
 812 |         | expr COMMA period_for
 813 |         | expr COMMA pkey_constraint
 814 |         | expr COMMA constraint pkey
 815 |         | expr COMMA constraint pkey enforced
 816 |         | expr COMMA constraint foreign ref
 817 |         | expr COMMA foreign ref
 818 |         | expr encode
 819 |         | expr DEFAULT id id id
 820 |         | expr RP
 821 |         """
 822 |         p[0] = p[1] or defaultdict(list)
 823 |         p_list = remove_par(list(p))
 824 |         if p_list[-1] != "," and p_list[-1] is not None:
 825 |             if "type" in p_list[-1] and "name" in p_list[-1]:
 826 |                 if not p[0].get("columns"):
 827 |                     p[0]["columns"] = []
 828 |                 p[0]["columns"].append(p_list[-1])
 829 |             elif "check" in p_list[-1]:
 830 |                 p[0] = self.extract_check_data(p, p_list)
 831 |             elif "enforced" in p_list[-1]:
 832 |                 p_list[-2].update(p_list[-1])
 833 |                 p[0].update({"primary_key_enforced": p_list[-1]["enforced"]})
 834 |             elif 'DEFAULT' in p_list:
 835 |                 p[0].update({"default_charset": p_list[-1]})
 836 |             elif isinstance(p_list[-1], dict):
 837 |                 p[0].update(p_list[-1])
 838 | 
 839 |         if isinstance(p_list[-1], dict):
 840 |             p[0] = self.process_constraints_and_refs(p[0], p_list)
 841 | 
 842 |     def process_unique_and_primary_constraint(self, data: Dict, p_list: List) -> Dict:
 843 |         if p_list[-1].get("unique_statement"):
 844 |             data = self.set_constraint(
 845 |                 data,
 846 |                 "uniques",
 847 |                 {"columns": p_list[-1]["unique_statement"]},
 848 |                 p_list[-2]["constraint"]["name"],
 849 |             )
 850 |         else:
 851 |             data = self.set_constraint(
 852 |                 data,
 853 |                 "primary_keys",
 854 |                 {"columns": p_list[-1]["primary_key"]},
 855 |                 p_list[-2]["constraint"]["name"],
 856 |             )
 857 |         return data
 858 | 
 859 |     def process_constraints_and_refs(self, data: Dict, p_list: List) -> Dict:
 860 | 
 861 |         if "constraint" in p_list[-2]:
 862 |             data = self.process_unique_and_primary_constraint(data, p_list)
 863 |         elif (
 864 |             len(p_list) >= 4
 865 |             and isinstance(p_list[3], dict)
 866 |             and p_list[3].get("constraint")
 867 |             and p_list[3]["constraint"].get("primary_key")
 868 |         ):
 869 |             del p_list[3]["constraint"]["primary_key"]
 870 |             data = self.set_constraint(
 871 |                 target_dict=data,
 872 |                 _type="primary_keys",
 873 |                 constraint=p_list[3]["constraint"],
 874 |                 constraint_name=p_list[3]["constraint"]["name"],
 875 |             )
 876 |             del data["constraint"]
 877 |         elif p_list[-1].get("references"):
 878 |             data = self.add_ref_information_to_table(data, p_list)
 879 |         return data
 880 | 
 881 |     def add_ref_information_to_table(self, data, p_list):
 882 |         if len(p_list) > 4 and "constraint" in p_list[3]:
 883 |             data = self.set_constraint(
 884 |                 data,
 885 |                 "references",
 886 |                 p_list[-1]["references"],
 887 |                 p_list[3]["constraint"]["name"],
 888 |             )
 889 |         elif isinstance(p_list[-2], list):
 890 |             if "ref_columns" not in data:
 891 |                 data["ref_columns"] = []
 892 | 
 893 |             for num, column in enumerate(p_list[-2]):
 894 |                 ref = deepcopy(p_list[-1]["references"])
 895 |                 ref["column"] = ref["columns"][num]
 896 |                 del ref["columns"]
 897 |                 ref["name"] = column
 898 |                 data["ref_columns"].append(ref)
 899 |         return data
 900 | 
 901 |     @staticmethod
 902 |     def set_constraint(
 903 |         target_dict: Dict, _type: str, constraint: Dict, constraint_name: str
 904 |     ) -> Dict:
 905 |         if not target_dict.get("constraints"):
 906 |             target_dict["constraints"] = {}
 907 |         if not target_dict["constraints"].get(_type):
 908 |             target_dict["constraints"][_type] = []
 909 |         if "name" in constraint:
 910 |             del constraint["name"]
 911 |         constraint.update({"constraint_name": constraint_name})
 912 |         target_dict["constraints"][_type].append(constraint)
 913 |         return target_dict
 914 | 
 915 |     def p_likke(self, p: List) -> None:
 916 |         """likke : LIKE
 917 |         | CLONE
 918 |         """
 919 |         p[0] = None
 920 | 
 921 |     def p_expression_like_table(self, p: List) -> None:
 922 |         """expr : table_name likke id
 923 |         | table_name likke id DOT id
 924 |         | table_name LP likke id DOT id RP
 925 |         | table_name LP likke id RP
 926 |         """
 927 |         # get schema & table name
 928 |         p_list = remove_par(list(p))
 929 |         if len(p_list) > 4:
 930 |             if "." in p:
 931 |                 schema = p_list[-3]
 932 |                 table_name = p_list[-1]
 933 |         else:
 934 |             table_name = p_list[-1]
 935 |             schema = None
 936 |         p[0] = p[1]
 937 |         p[0].update({"like": {"schema": schema, "table_name": table_name}})
 938 | 
 939 |     def p_t_name(self, p: List) -> None:
 940 |         """t_name : id DOT id
 941 |         | id
 942 |         | id DOT id DOT id
 943 |         """
 944 |         p_list = list(p)
 945 | 
 946 |         project = None
 947 | 
 948 |         if len(p) > 3:
 949 |             if "." in p:
 950 |                 schema = p_list[-3]
 951 |                 table_name = p_list[-1]
 952 |                 if len(p) == 6:
 953 |                     project = p_list[1]
 954 |         else:
 955 |             table_name = p_list[-1]
 956 |             schema = None
 957 | 
 958 |         p[0] = {"schema": schema, "table_name": table_name, "columns": [], "checks": []}
 959 | 
 960 |         if project:
 961 |             p[0]["project"] = project
 962 | 
 963 |     def p_table_name(self, p: List) -> None:
 964 |         """table_name : create_table t_name
 965 |         | table_name likke id
 966 |         """
 967 |         # can contain additional properties like 'external for HQL
 968 |         p[0] = p[1]
 969 | 
 970 |         p[0].update(list(p)[-1])
 971 | 
 972 |     def p_expression_seq(self, p: List) -> None:
 973 |         """expr : seq_name
 974 |         | expr INCREMENT id
 975 |         | expr INCREMENT id id
 976 |         | expr START id
 977 |         | expr START id id
 978 |         | expr MINVALUE id
 979 |         | expr NO MINVALUE
 980 |         | expr NO MAXVALUE
 981 |         | expr MAXVALUE id
 982 |         | expr CACHE id
 983 |         | expr CACHE
 984 |         """
 985 |         # get schema & table name
 986 |         p_list = list(p)
 987 |         p[0] = p[1]
 988 |         value = None
 989 |         if len(p) == 4:
 990 |             if p[2] == "NO":
 991 |                 value = {p_list[-1].lower(): False}
 992 |             else:
 993 |                 value = {p[2].lower(): int(p_list[-1])}
 994 |         elif len(p) == 3:
 995 |             value = {p[2].lower(): True}
 996 |         elif len(p) == 5:
 997 |             value = {f"{p[2].lower()}_{p[3].lower()}": int(p_list[-1])}
 998 |         if value:
 999 |             p[0].update(value)
1000 | 
1001 |     def p_seq_name(self, p: List) -> None:
1002 |         """seq_name : create_seq id DOT id
1003 |         | create_seq id
1004 |         """
1005 |         # get schema & table name
1006 |         p_list = list(p)
1007 |         schema = None
1008 |         if len(p) > 4:
1009 |             if "." in p:
1010 |                 schema = p_list[-3]
1011 |                 seq_name = p_list[-1]
1012 |         else:
1013 |             seq_name = p_list[-1]
1014 |         p[0] = {"schema": schema, "sequence_name": seq_name}
1015 | 
1016 |     def p_create_seq(self, p: List) -> None:
1017 |         """create_seq : CREATE SEQUENCE IF NOT EXISTS
1018 |         | CREATE SEQUENCE
1019 | 
1020 |         """
1021 |         # get schema & table name
1022 | 
1023 |         self.add_if_not_exists(p[0], list(p))
1024 | 
1025 |     def p_tid(self, p: List) -> None:
1026 |         """tid : LT id
1027 |         | LT
1028 |         | tid LT
1029 |         | tid id
1030 |         | tid COMMAT
1031 |         | tid RT
1032 |         """
1033 |         if not isinstance(p[1], list):
1034 |             p[0] = [p[1]]
1035 |         else:
1036 |             p[0] = p[1]
1037 | 
1038 |         for i in list(p)[2:]:
1039 |             if not i == "[]" and not i == ",":
1040 |                 p[0][0] += f" {i}"
1041 |             else:
1042 |                 p[0][0] += f"{i}"
1043 | 
1044 |     @staticmethod
1045 |     def get_complex_type(p, p_list):
1046 |         if len(p_list) == 4:
1047 |             p[0]["type"] = f"{p[2]} {p[3][0]}"
1048 |         elif p[0]["type"]:
1049 |             if len(p[0]["type"]) == 1 and isinstance(p[0]["type"], list):
1050 |                 p[0]["type"] = p[0]["type"][0]
1051 |             p[0]["type"] = f'{p[0]["type"]} {p_list[-1][0]}'
1052 |         else:
1053 |             p[0]["type"] = p_list[-1][0]
1054 |         return p[0]
1055 | 
1056 |     def extract_references(self, table_data: Dict):
1057 |         ref = {
1058 |             "table": table_data["table_name"],
1059 |             "columns": [None],
1060 |             "schema": table_data["schema"],
1061 |             "on_delete": None,
1062 |             "on_update": None,
1063 |             "deferrable_initially": None,
1064 |         }
1065 | 
1066 |         if table_data.get("project"):
1067 |             ref["project"] = table_data["project"]
1068 | 
1069 |         return ref
1070 | 
1071 |     def p_null(self, p: List) -> None:
1072 |         """null : NULL
1073 |         | NOT NULL
1074 |         """
1075 |         nullable = True
1076 |         if "NULL" in p or "null" in p:
1077 |             if "NOT" in p or "not" in p:
1078 |                 nullable = False
1079 |         p[0] = {"nullable": nullable}
1080 | 
1081 |     def p_f_call(self, p: List) -> None:
1082 |         """f_call : id LP RP
1083 |         | id LP f_call RP
1084 |         | id LP multi_id RP
1085 |         | id LP pid RP
1086 |         """
1087 |         p_list = list(p)
1088 |         if isinstance(p[1], list):
1089 |             p[0] = p[1]
1090 |             p[0].append(p_list[-1])
1091 |         else:
1092 |             value = ""
1093 |             for elem in p_list[1:]:
1094 |                 if isinstance(elem, list):
1095 |                     elem = ",".join(elem)
1096 |                 value += elem
1097 |             p[0] = value
1098 | 
1099 |     def p_multi_id(self, p: List) -> None:
1100 |         """multi_id : id
1101 |         | multi_id id
1102 |         | f_call
1103 |         | multi_id f_call
1104 |         """
1105 |         p_list = list(p)
1106 |         if isinstance(p[1], list):
1107 |             p[0] = p[1]
1108 |             p[0].append(p_list[-1])
1109 |         else:
1110 |             value = " ".join(p_list[1:])
1111 |             p[0] = value
1112 | 
1113 |     def p_funct_args(self, p: List) -> None:
1114 |         """funct_args : LP multi_id RP"""
1115 |         p[0] = {"args": f"({p[2]})"}
1116 | 
1117 |     def p_funct(self, p: List) -> None:
1118 |         """funct : id LP multi_id RP"""
1119 |         p[0] = {"func_name": p[1], "args": f"({p[3]})"}
1120 | 
1121 |     def p_multiple_funct(self, p: List) -> None:
1122 |         """multiple_funct : funct
1123 |         | multiple_funct COMMA funct
1124 |         | multiple_funct COMMA
1125 |         """
1126 |         if not isinstance(p[1], list):
1127 |             p[0] = [p[1]]
1128 |         else:
1129 |             p[0] = p[1]
1130 |             p[0].append(p[-1])
1131 | 
1132 |     def p_funct_expr(self, p: List) -> None:
1133 |         """funct_expr : LP multi_id RP
1134 |         | multi_id
1135 |         """
1136 |         if len(p) > 2:
1137 |             p[0] = p[2]
1138 |         else:
1139 |             p[0] = p[1]
1140 | 
1141 |     def p_dot_id(self, p: List) -> None:
1142 |         """dot_id : id DOT id"""
1143 |         p[0] = f"{p[1]}.{p[3]}"
1144 | 
1145 |     def p_default(self, p: List) -> None:
1146 |         """default : DEFAULT id
1147 |         | DEFAULT STRING
1148 |         | DEFAULT NULL
1149 |         | default FOR dot_id
1150 |         | DEFAULT funct_expr
1151 |         | DEFAULT LP pid RP
1152 |         | DEFAULT LP funct_expr pid RP
1153 |         | default id
1154 |         | default LP RP
1155 |         """
1156 |         p_list = remove_par(list(p))
1157 | 
1158 |         default = self.pre_process_default(p_list)
1159 | 
1160 |         if isinstance(p_list[-1], list):
1161 |             p_list[-1] = " ".join(p_list[-1])
1162 |             default = " ".join(p_list[1:])
1163 |         elif not isinstance(default, dict) and default.isnumeric():
1164 |             default = int(default)
1165 | 
1166 |         if isinstance(p[1], dict):
1167 |             p[0] = self.process_dict_default_value(p_list, default)
1168 |         else:
1169 |             p[0] = {"default": default}
1170 | 
1171 |     @staticmethod
1172 |     def pre_process_default(p_list: List) -> Any:
1173 |         if len(p_list) == 5 and isinstance(p_list[3], list):
1174 |             default = p_list[3][0]
1175 |         elif "DEFAULT" in p_list and len(p_list) == 4:
1176 |             default = f"{p_list[2]} {p_list[3]}"
1177 |         else:
1178 |             default = p_list[2]
1179 |         return default
1180 | 
1181 |     @staticmethod
1182 |     def process_dict_default_value(p_list: List, default: Any) -> Dict:
1183 |         data = p_list[1]
1184 |         if "FOR" in default:
1185 |             data["default"] = {"next_value_for": p_list[-1]}
1186 |         else:
1187 |             for i in p_list[2:]:
1188 |                 if isinstance(p_list[2], str):
1189 |                     p_list[2] = p_list[2].replace("\\'", "'")
1190 |                     if i == ")" or i == "(":
1191 |                         data["default"] = str(data["default"]) + f"{i}"
1192 |                     else:
1193 |                         data["default"] = str(data["default"]) + f" {i}"
1194 |                     data["default"] = data["default"].replace("))", ")")
1195 |         return data
1196 | 
1197 |     def p_enforced(self, p: List) -> None:
1198 |         """enforced : ENFORCED
1199 |         | NOT ENFORCED
1200 |         """
1201 |         p_list = list(p)
1202 |         p[0] = {"enforced": len(p_list) == 1}
1203 | 
1204 |     def p_collate(self, p: List) -> None:
1205 |         """collate : COLLATE id
1206 |         | COLLATE STRING
1207 |         """
1208 |         p_list = list(p)
1209 |         p[0] = {"collate": p_list[-1]}
1210 | 
1211 |     def p_constraint(self, p: List) -> None:
1212 |         """
1213 |         constraint : CONSTRAINT id
1214 |         """
1215 | 
1216 |         p_list = list(p)
1217 | 
1218 |         p[0] = {"constraint": {"name": p_list[-1]}}
1219 | 
1220 |     def p_generated(self, p: List) -> None:
1221 |         """
1222 |         generated : gen_always funct_expr
1223 |         | gen_always funct_expr id
1224 |         | gen_always LP multi_id RP
1225 |         | gen_always f_call
1226 |         """
1227 |         p_list = list(p)
1228 |         stored = False
1229 |         if len(p) > 3 and p_list[-1].lower() == "stored":
1230 |             stored = True
1231 |         _as = p[2]
1232 |         p[0] = {"generated": {"always": True, "as": _as, "stored": stored}}
1233 | 
1234 |     def p_gen_always(self, p: List) -> None:
1235 |         """
1236 |         gen_always : GENERATED id AS
1237 |         """
1238 |         p[0] = {"generated": {"always": True}}
1239 | 
1240 |     def p_check_st(self, p: List) -> None:
1241 |         """check_st : CHECK LP id
1242 |         | check_st id
1243 |         | check_st STRING
1244 |         | check_st id STRING
1245 |         | check_st id RP
1246 |         | check_st STRING RP
1247 |         | check_st funct_args
1248 |         | check_st LP pid RP
1249 |         """
1250 |         p_list = remove_par(list(p))
1251 |         if isinstance(p[1], dict):
1252 |             p[0] = p[1]
1253 |         else:
1254 |             p[0] = {"check": []}
1255 |         for item in p_list[2:]:
1256 |             if isinstance(p_list[-1], dict) and p_list[-1].get("args"):
1257 |                 p[0]["check"][-1] += p_list[-1]["args"]
1258 |             elif isinstance(item, list):
1259 |                 p[0]["check"].append(f"({','.join(item)})")
1260 |             else:
1261 |                 p[0]["check"].append(item)
1262 | 
1263 |     def p_using_tablespace(self, p: List) -> None:
1264 |         """using_tablespace : USING INDEX tablespace"""
1265 |         p_list = list(p)
1266 |         p[0] = {"using": {"tablespace": p_list[-1], "index": True}}
1267 | 
1268 |     def p_expression_alter(self, p: List) -> None:
1269 |         """expr : alter_foreign ref
1270 |         | alter_check
1271 |         | alter_unique
1272 |         | alter_default
1273 |         | alter_primary_key
1274 |         | alter_primary_key using_tablespace
1275 |         """
1276 |         p[0] = p[1]
1277 |         if len(p) == 3:
1278 |             p[0].update(p[2])
1279 | 
1280 |     def p_alter_primary_key(self, p: List) -> None:
1281 |         """alter_primary_key : alt_table PRIMARY KEY LP pid RP
1282 |         | alt_table constraint PRIMARY KEY LP pid RP
1283 |         """
1284 | 
1285 |         p_list = remove_par(list(p))
1286 |         p[0] = p[1]
1287 |         p[0]["primary_key"] = {"constraint_name": None, "columns": p_list[-1]}
1288 |         if "constraint" in p[2]:
1289 |             p[0]["primary_key"]["constraint_name"] = p[2]["constraint"]["name"]
1290 | 
1291 |     def p_alter_unique(self, p: List) -> None:
1292 |         """alter_unique : alt_table UNIQUE LP pid RP
1293 |         | alt_table constraint UNIQUE LP pid RP
1294 |         """
1295 | 
1296 |         p_list = remove_par(list(p))
1297 |         p[0] = p[1]
1298 |         p[0]["unique"] = {"constraint_name": None, "columns": p_list[-1]}
1299 |         if "constraint" in p[2]:
1300 |             p[0]["unique"]["constraint_name"] = p[2]["constraint"]["name"]
1301 | 
1302 |     @staticmethod
1303 |     def get_column_and_value_from_alter(p: List) -> Tuple:
1304 | 
1305 |         p_list = remove_par(list(p))
1306 | 
1307 |         column = None
1308 |         value = None
1309 | 
1310 |         if isinstance(p_list[2], str) and "FOR" == p_list[2].upper():
1311 |             column = p_list[-1]
1312 |         elif p[0].get("default") and p[0]["default"].get("value"):
1313 |             value = p[0]["default"]["value"] + " " + p_list[-1]
1314 |         else:
1315 |             value = p_list[-1]
1316 |         return column, value
1317 | 
1318 |     def p_alter_default(self, p: List) -> None:
1319 |         """alter_default : alt_table id id
1320 |         | alt_table constraint id id
1321 |         | alt_table id STRING
1322 |         | alt_table constraint id STRING
1323 |         | alter_default id
1324 |         | alter_default FOR pid
1325 |         """
1326 | 
1327 |         p[0] = p[1]
1328 |         column, value = self.get_column_and_value_from_alter(p)
1329 | 
1330 |         if "default" not in p[0]:
1331 | 
1332 |             p[0]["default"] = {
1333 |                 "constraint_name": None,
1334 |                 "columns": column,
1335 |                 "value": value,
1336 |             }
1337 |         else:
1338 |             p[0]["default"].update(
1339 |                 {
1340 |                     "columns": p[0]["default"].get("column") or column,
1341 |                     "value": value or p[0]["default"].get("value"),
1342 |                 }
1343 |             )
1344 |         if "constraint" in p[2]:
1345 |             p[0]["default"]["constraint_name"] = p[2]["constraint"]["name"]
1346 | 
1347 |     def p_pid(self, p: List) -> None:
1348 |         """pid :  id
1349 |         | STRING
1350 |         | pid id
1351 |         | pid STRING
1352 |         | STRING LP RP
1353 |         | id LP RP
1354 |         | pid COMMA id
1355 |         | pid COMMA STRING
1356 |         """
1357 |         p_list = list(p)
1358 | 
1359 |         if len(p_list) == 4 and isinstance(p[1], str):
1360 |             p[0] = ["".join(p[1:])]
1361 |         elif not isinstance(p_list[1], list):
1362 |             p[0] = [p_list[1]]
1363 |         else:
1364 |             p[0] = p_list[1]
1365 |             p[0].append(p_list[-1])
1366 | 
1367 |     def p_alter_check(self, p: List) -> None:
1368 |         """alter_check : alt_table check_st
1369 |         | alt_table constraint check_st
1370 |         """
1371 |         p_list = remove_par(list(p))
1372 |         p[0] = p[1]
1373 |         if isinstance(p[1], dict):
1374 |             p[0] = p[1]
1375 |         if not p[0].get("check"):
1376 |             p[0]["check"] = {"constraint_name": None, "statement": []}
1377 |         if isinstance(p[2], dict) and "constraint" in p[2]:
1378 |             p[0]["check"]["constraint_name"] = p[2]["constraint"]["name"]
1379 |         p[0]["check"]["statement"] = p_list[-1]["check"]
1380 | 
1381 |     def p_index_pid(self, p: List) -> None:
1382 |         """index_pid :  id
1383 |         | index_pid id
1384 |         | index_pid COMMA index_pid
1385 |         """
1386 |         p_list = list(p)
1387 |         if len(p_list) == 2:
1388 |             detailed_column = {"name": p_list[1], "order": "ASC", "nulls": "LAST"}
1389 |             column = p_list[1]
1390 |             p[0] = {"detailed_columns": [detailed_column], "columns": [column]}
1391 |         else:
1392 |             p[0] = p[1]
1393 |             if len(p) == 3:
1394 |                 if p_list[-1] in ["DESC", "ASC"]:
1395 |                     p[0]["detailed_columns"][0]["order"] = p_list[-1]
1396 |                 else:
1397 |                     p[0]["detailed_columns"][0]["nulls"] = p_list[-1]
1398 | 
1399 |                 column = p_list[2]
1400 |             elif isinstance(p_list[-1], dict):
1401 |                 for i in p_list[-1]["columns"]:
1402 |                     p[0]["columns"].append(i)
1403 |                 for i in p_list[-1]["detailed_columns"]:
1404 |                     p[0]["detailed_columns"].append(i)
1405 | 
1406 |     def p_alter_foreign(self, p: List) -> None:
1407 |         """alter_foreign : alt_table foreign
1408 |         | alt_table constraint foreign
1409 |         """
1410 | 
1411 |         p_list = list(p)
1412 | 
1413 |         p[0] = p[1]
1414 |         if isinstance(p_list[-1], list):
1415 |             p[0]["columns"] = [{"name": i} for i in p_list[-1]]
1416 |         else:
1417 |             column = p_list[-1]
1418 | 
1419 |             if not p[0].get("columns"):
1420 |                 p[0]["columns"] = []
1421 |             p[0]["columns"].append(column)
1422 | 
1423 |         for column in p[0]["columns"]:
1424 |             if isinstance(p_list[2], dict) and "constraint" in p_list[2]:
1425 |                 column.update({"constraint_name": p_list[2]["constraint"]["name"]})
1426 | 
1427 |     def p_alt_table_name(self, p: List) -> None:
1428 |         """alt_table : ALTER TABLE t_name ADD
1429 |         | ALTER TABLE IF EXISTS t_name ADD
1430 |         | ALTER TABLE ID t_name ADD"""
1431 |         p_list = list(p)
1432 |         table_data = p_list[-2]
1433 |         p[0] = {
1434 |             "alter_table_name": table_data["table_name"],
1435 |             "schema": table_data["schema"],
1436 |         }
1437 |         if "IF" in p_list:
1438 |             p[0]["if_exists"] = True
1439 |         if len(p_list) == 6:
1440 |             p[0]["only"] = True
1441 |         if table_data.get("project"):
1442 |             p[0]["project"] = table_data["project"]
1443 | 
1444 |     def p_foreign(self, p):
1445 |         # todo: need to redone id lists
1446 |         """foreign : FOREIGN KEY LP pid RP
1447 |         | FOREIGN KEY"""
1448 |         p_list = remove_par(list(p))
1449 |         if len(p_list) == 4:
1450 |             columns = p_list[-1]
1451 |             p[0] = columns
1452 | 
1453 |     def p_ref(self, p: List) -> None:
1454 |         """ref : REFERENCES t_name
1455 |         | ref LP pid RP
1456 |         | ref ON DELETE id
1457 |         | ref ON UPDATE id
1458 |         | ref DEFERRABLE INITIALLY id
1459 |         | ref NOT DEFERRABLE
1460 |         """
1461 |         p_list = remove_par(list(p))
1462 |         if isinstance(p[1], dict):
1463 |             p[0] = p[1]
1464 |             if "ON" not in p_list and "DEFERRABLE" not in p_list:
1465 |                 p[0]["references"]["columns"] = p_list[-1]
1466 |             else:
1467 |                 p[0]["references"]["columns"] = p[0]["references"].get(
1468 |                     "columns", [None]
1469 |                 )
1470 |         else:
1471 |             data = {"references": self.extract_references(p_list[-1])}
1472 |             p[0] = data
1473 |         p[0] = self.process_references_with_properties(p[0], p_list)
1474 | 
1475 |     @staticmethod
1476 |     def process_references_with_properties(data: Dict, p_list: List) -> Dict:
1477 |         if "ON" in p_list:
1478 |             if "DELETE" in p_list:
1479 |                 data["references"]["on_delete"] = p_list[-1]
1480 |             elif "UPDATE" in p_list:
1481 |                 data["references"]["on_update"] = p_list[-1]
1482 |         elif "DEFERRABLE" in p_list:
1483 |             if "NOT" not in p_list:
1484 |                 data["references"]["deferrable_initially"] = p_list[-1]
1485 |             else:
1486 |                 data["references"]["deferrable_initially"] = "NOT"
1487 |         return data
1488 | 
1489 |     def p_expression_primary_key(self, p):
1490 |         "expr : pkey"
1491 |         p[0] = p[1]
1492 | 
1493 |     def p_uniq(self, p: List) -> None:
1494 |         """uniq : UNIQUE LP pid RP"""
1495 |         p_list = remove_par(list(p))
1496 |         p[0] = {"unique_statement": p_list[-1]}
1497 | 
1498 |     def p_statem_by_id(self, p: List) -> None:
1499 |         """statem_by_id : id LP pid RP
1500 |         | id KEY LP pid RP
1501 |         """
1502 |         p_list = remove_par(list(p))
1503 |         if p[1].upper() == "UNIQUE":
1504 |             p[0] = {"unique_statement": p_list[-1]}
1505 |         elif p[1].upper() == "CHECK":
1506 |             p[0] = {"check": p_list[-1]}
1507 |         elif p[1].upper() == "PRIMARY":
1508 |             p[0] = {"primary_key": p_list[-1]}
1509 | 
1510 |     def p_pkey(self, p: List) -> None:
1511 |         """pkey : pkey_statement LP pid RP
1512 |         | pkey_statement ID LP pid RP
1513 |         """
1514 |         p_list = remove_par(list(p))
1515 | 
1516 |         columns = []
1517 | 
1518 |         p[0] = {}
1519 | 
1520 |         if isinstance(p_list[2], str) and "CLUSTERED" == p_list[2]:
1521 |             order = None
1522 |             column = None
1523 |             for item in p_list[-1]:
1524 |                 if item not in ["ASC", "DESC"]:
1525 |                     column = item
1526 |                 else:
1527 |                     order = item
1528 |                 if column and order:
1529 |                     columns.append({"column": column, "order": order})
1530 |                     column = None
1531 |                     order = None
1532 |             p[0]["clustered_primary_key"] = columns
1533 | 
1534 |         p[0] = self.process_order_in_pk(p[0], p_list)
1535 | 
1536 |     @staticmethod
1537 |     def process_order_in_pk(data: Dict, p_list: List) -> Dict:
1538 |         columns = []
1539 |         for item in p_list[-1]:
1540 |             if item not in ["ASC", "DESC"]:
1541 |                 columns.append(item)
1542 |         data["primary_key"] = columns
1543 |         return data
1544 | 
1545 |     def p_pkey_statement(self, p: List) -> None:
1546 |         """pkey_statement : PRIMARY KEY"""
1547 |         p[0] = {"primary_key": None}
1548 | 
1549 |     def p_comment(self, p: List) -> None:
1550 |         """comment : COMMENT STRING"""
1551 |         p_list = remove_par(list(p))
1552 |         p[0] = {"comment": check_spec(p_list[-1])}
1553 | 
1554 |     def p_tablespace(self, p: List) -> None:
1555 |         """tablespace : TABLESPACE id
1556 |         | TABLESPACE id properties
1557 |         """
1558 |         # Initial 5m Next 5m Maxextents Unlimited
1559 |         p[0] = self.get_tablespace_data(list(p))
1560 | 
1561 |     def p_expr_tablespace(self, p: List) -> None:
1562 |         """expr : expr tablespace"""
1563 |         p_list = list(p)
1564 |         p[0] = p[1]
1565 |         p[0]["tablespace"] = p_list[-1]
1566 | 


--------------------------------------------------------------------------------
/sondesh/output/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/output/__init__.py


--------------------------------------------------------------------------------
/sondesh/output/common.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | from copy import deepcopy
  5 | from typing import Dict, List, Tuple
  6 | 
  7 | from sondesh.output import dialects as d
  8 | 
  9 | output_modes = [
 10 |     "mssql",
 11 |     "mysql",
 12 |     "oracle",
 13 |     "hql",
 14 |     "sql",
 15 |     "snowflake",
 16 |     "redshift",
 17 |     "bigquery",
 18 | ]
 19 | 
 20 | 
 21 | logger = logging.getLogger('sondesh')
 22 | 
 23 | 
 24 | def get_table_from_tables_data(tables_dict: Dict, table_id: Tuple[str, str]) -> Dict:
 25 |     """get table by name and schema or rise exception"""
 26 |     target_table = tables_dict.get(table_id)
 27 |     if target_table is None:
 28 | 
 29 |         raise ValueError(
 30 |             f"Found ALTER statement to not existed TABLE {table_id[0]} with SCHEMA {table_id[1]}"
 31 |         )
 32 |     return target_table
 33 | 
 34 | 
 35 | def add_index_to_table(tables_dict: Dict, statement: Dict, output_mode: str) -> Dict:
 36 |     """populate 'index' key in output data"""
 37 |     table_id = (statement["table_name"], statement["schema"])
 38 |     target_table = get_table_from_tables_data(tables_dict, table_id)
 39 | 
 40 |     del statement["schema"]
 41 |     del statement["table_name"]
 42 | 
 43 |     if output_mode != "mssql":
 44 |         del statement["clustered"]
 45 | 
 46 |     target_table["index"].append(statement)
 47 | 
 48 |     return tables_dict
 49 | 
 50 | 
 51 | def create_alter_column(index: int, column: Dict, ref_statement: Dict) -> Dict:
 52 |     """create alter column metadata"""
 53 |     column_reference = ref_statement["columns"][index]
 54 |     alter_column = {
 55 |         "name": column["name"],
 56 |         "constraint_name": column.get("constraint_name"),
 57 |     }
 58 |     alter_column["references"] = deepcopy(ref_statement)
 59 |     alter_column["references"]["column"] = column_reference
 60 |     del alter_column["references"]["columns"]
 61 |     return alter_column
 62 | 
 63 | 
 64 | def prepare_alter_columns(target_table: Dict, statement: Dict) -> Dict:
 65 |     """prepare alters column metadata"""
 66 |     alter_columns = []
 67 |     for num, column in enumerate(statement["columns"]):
 68 |         alter_columns.append(create_alter_column(num, column, statement["references"]))
 69 |     if not target_table["alter"].get("columns"):
 70 |         target_table["alter"]["columns"] = alter_columns
 71 |     else:
 72 |         target_table["alter"]["columns"].extend(alter_columns)
 73 |     return target_table
 74 | 
 75 | 
 76 | def add_alter_to_table(tables_dict: Dict, statement: Dict) -> Dict:
 77 |     """add 'alter' statement to the table"""
 78 |     table_id = (statement["alter_table_name"], statement["schema"])
 79 | 
 80 |     target_table = get_table_from_tables_data(tables_dict, table_id)
 81 | 
 82 |     if "columns" in statement:
 83 |         prepare_alter_columns(target_table, statement)
 84 |     elif "check" in statement:
 85 |         if not target_table["alter"].get("checks"):
 86 |             target_table["alter"]["checks"] = []
 87 |         statement["check"]["statement"] = " ".join(statement["check"]["statement"])
 88 |         target_table["alter"]["checks"].append(statement["check"])
 89 |     elif "unique" in statement:
 90 |         target_table = set_alter_to_table_data("unique", statement, target_table)
 91 |         target_table = set_unique_columns_from_alter(statement, target_table)
 92 |     elif "default" in statement:
 93 |         target_table = set_alter_to_table_data("default", statement, target_table)
 94 |         target_table = set_default_columns_from_alter(statement, target_table)
 95 |     elif "primary_key" in statement:
 96 |         target_table = set_alter_to_table_data("primary_key", statement, target_table)
 97 |     return tables_dict
 98 | 
 99 | 
100 | def set_default_columns_from_alter(statement: Dict, target_table: Dict) -> Dict:
101 |     for column in target_table["columns"]:
102 |         if statement["default"]["columns"]:
103 |             for column_name in statement["default"]["columns"]:
104 |                 if column["name"] == column_name:
105 |                     column["default"] = statement["default"]["value"]
106 |     return target_table
107 | 
108 | 
109 | def set_unique_columns_from_alter(statement: Dict, target_table: Dict) -> Dict:
110 |     for column in target_table["columns"]:
111 |         for column_name in statement["unique"]["columns"]:
112 |             if column["name"] == column_name:
113 |                 column["unique"] = True
114 |     return target_table
115 | 
116 | 
117 | def set_alter_to_table_data(key: str, statement: Dict, target_table: Dict) -> Dict:
118 |     if not target_table["alter"].get(key + "s"):
119 |         target_table["alter"][key + "s"] = []
120 |     if "using" in statement:
121 |         statement[key]["using"] = statement["using"]
122 |     target_table["alter"][key + "s"].append(statement[key])
123 |     return target_table
124 | 
125 | 
126 | def init_table_data() -> Dict:
127 |     return {
128 |         "columns": [],
129 |         "primary_key": None,
130 |         "alter": {},
131 |         "checks": [],
132 |         "index": [],
133 |         "partitioned_by": [],
134 |         "tablespace": None,
135 |     }
136 | 
137 | 
138 | def process_alter_and_index_result(
139 |     tables_dict: Dict, table: Dict, output_mode: str
140 | ) -> Dict:
141 |     if table.get("index_name"):
142 |         tables_dict = add_index_to_table(tables_dict, table, output_mode)
143 | 
144 |     elif table.get("alter_table_name"):
145 |         tables_dict = add_alter_to_table(tables_dict, table)
146 | 
147 |     return tables_dict
148 | 
149 | 
150 | def process_entities(tables_dict: Dict, table: Dict, output_mode: str) -> Dict:
151 |     """process tables, types, sequence and etc. data"""
152 |     is_it_table = True
153 | 
154 |     if table.get("table_name"):
155 |         table_data = init_table_data()
156 |         table_data = d.populate_dialects_table_data(output_mode, table_data)
157 |         table_data.update(table)
158 |         table_data = set_unique_columns(table_data)
159 |     else:
160 |         table_data = table
161 |         is_it_table = False
162 | 
163 |     if is_it_table:
164 |         table_data = process_is_it_table_item(table_data, tables_dict)
165 | 
166 |     table_data = normalize_ref_columns_in_final_output(table_data)
167 | 
168 |     d.dialects_clean_up(output_mode, table_data)
169 |     return table_data
170 | 
171 | 
172 | def result_format(
173 |     result: List[Dict], output_mode: str, group_by_type: bool
174 | ) -> List[Dict]:
175 |     """method to format final output after parser"""
176 |     final_result = []
177 |     tables_dict = {}
178 |     for table in result:
179 |         # process each item in parser output
180 |         if "index_name" in table or "alter_table_name" in table:
181 |             tables_dict = process_alter_and_index_result(
182 |                 tables_dict, table, output_mode
183 |             )
184 |         else:
185 |             # process tables, types, sequence and etc. data
186 |             table_data = process_entities(tables_dict, table, output_mode)
187 |             final_result.append(table_data)
188 |     if group_by_type:
189 |         final_result = group_by_type_result(final_result)
190 |     return final_result
191 | 
192 | 
193 | def process_is_it_table_item(table_data: Dict, tables_dict: Dict) -> Dict:
194 |     if table_data.get("table_name"):
195 |         tables_dict[(table_data["table_name"], table_data["schema"])] = table_data
196 |     else:
197 |         logger.error(
198 |             "\n Something goes wrong. Possible you try to parse unsupported statement \n "
199 |         )
200 |     if not table_data.get("primary_key"):
201 |         table_data = check_pk_in_columns_and_constraints(table_data)
202 |     else:
203 |         table_data = remove_pk_from_columns(table_data)
204 | 
205 |     if table_data.get("unique"):
206 |         table_data = add_unique_columns(table_data)
207 | 
208 |     for column in table_data["columns"]:
209 |         if column["name"] in table_data["primary_key"]:
210 |             column["nullable"] = False
211 |     return table_data
212 | 
213 | 
214 | def normalize_ref_columns_in_final_output(table_data: Dict) -> Dict:
215 |     # todo: this is hack, need to remove it
216 |     if "references" in table_data:
217 |         del table_data["references"]
218 |     if "ref_columns" in table_data:
219 |         for col_ref in table_data["ref_columns"]:
220 |             name = col_ref["name"]
221 |             for column in table_data["columns"]:
222 |                 if name == column["name"]:
223 |                     del col_ref["name"]
224 |                     column["references"] = col_ref
225 |         del table_data["ref_columns"]
226 |     return table_data
227 | 
228 | 
229 | def set_column_unique_param(table_data: Dict, key: str) -> Dict:
230 |     for column in table_data["columns"]:
231 |         if key == "constraints":
232 |             unique = table_data[key].get("unique", [])
233 |             if unique:
234 |                 check_in = unique["columns"]
235 |             else:
236 |                 check_in = []
237 |         else:
238 |             check_in = table_data[key]
239 |         if column["name"] in check_in:
240 |             column["unique"] = True
241 |     return table_data
242 | 
243 | 
244 | def set_unique_columns(table_data: Dict) -> Dict:
245 | 
246 |     unique_keys = ["unique_statement", "constraints"]
247 | 
248 |     for key in unique_keys:
249 |         if table_data.get(key, None):
250 |             # get column names from unique constraints & statements
251 |             table_data = set_column_unique_param(table_data, key)
252 |     if "unique_statement" in table_data:
253 |         del table_data["unique_statement"]
254 |     return table_data
255 | 
256 | 
257 | def group_by_type_result(final_result: List[Dict]) -> Dict[str, List]:
258 |     result_as_dict = {
259 |         "tables": [],
260 |         "types": [],
261 |         "sequences": [],
262 |         "domains": [],
263 |         "schemas": [],
264 |         "ddl_properties": [],
265 |         "comments": [],
266 |     }
267 |     keys_map = {
268 |         "table_name": "tables",
269 |         "sequence_name": "sequences",
270 |         "type_name": "types",
271 |         "domain_name": "domains",
272 |         "schema_name": "schemas",
273 |         "tablespace_name": "tablespaces",
274 |         "database_name": "databases",
275 |         "value": "ddl_properties",
276 |         "comments": "comments",
277 |     }
278 |     for item in final_result:
279 |         for key in keys_map:
280 |             if key in item:
281 |                 _type = result_as_dict.get(keys_map.get(key))
282 |                 if _type is None:
283 |                     result_as_dict[keys_map.get(key)] = []
284 |                     _type = result_as_dict[keys_map.get(key)]
285 |                 if key != "comments":
286 |                     _type.append(item)
287 |                 else:
288 |                     _type.extend(item["comments"])
289 |                 break
290 |     if result_as_dict["comments"] == []:
291 |         del result_as_dict["comments"]
292 |     return result_as_dict
293 | 
294 | 
295 | def add_unique_columns(table_data: Dict) -> Dict:
296 |     for column in table_data["columns"]:
297 |         if column["name"] in table_data["unique"]:
298 |             column["unique"] = True
299 |     del table_data["unique"]
300 |     return table_data
301 | 
302 | 
303 | def remove_pk_from_columns(table_data: Dict) -> Dict:
304 |     for column in table_data["columns"]:
305 |         del column["primary_key"]
306 |     return table_data
307 | 
308 | 
309 | def check_pk_in_columns_and_constraints(table_data: Dict) -> Dict:
310 |     pk = []
311 |     for column in table_data["columns"]:
312 |         if column["primary_key"]:
313 |             pk.append(column["name"])
314 |         del column["primary_key"]
315 |     if table_data.get("constraints") and table_data["constraints"].get("primary_keys"):
316 |         for key_constraints in table_data["constraints"]["primary_keys"]:
317 |             pk.extend(key_constraints["columns"])
318 |     table_data["primary_key"] = pk
319 |     return table_data
320 | 
321 | 
322 | def dump_data_to_file(table_name: str, dump_path: str, data: List[Dict]) -> None:
323 |     """method to dump json schema"""
324 |     if not os.path.isdir(dump_path):
325 |         os.makedirs(dump_path, exist_ok=True)
326 |     with open("{}/{}_schema.json".format(dump_path, table_name), "w+") as schema_file:
327 |         json.dump(data, schema_file, indent=1)
328 | 


--------------------------------------------------------------------------------
/sondesh/output/dialects.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List
  2 | 
  3 | hql_clean_up_list = ["deferrable_initially"]
  4 | 
  5 | 
  6 | sql_clean_up_list = [
  7 |     "external",
  8 |     "external",
  9 |     "stored_as",
 10 |     "row_format",
 11 |     "lines_terminated_by",
 12 |     "fields_terminated_by",
 13 |     "collection_items_terminated_by",
 14 |     "map_keys_terminated_by",
 15 | ]
 16 | 
 17 | 
 18 | def add_additional_hql_keys(table_data: Dict) -> Dict:
 19 |     table_data.update(
 20 |         {
 21 |             "stored_as": None,
 22 |             "location": None,
 23 |             "comment": None,
 24 |             "row_format": None,
 25 |             "fields_terminated_by": None,
 26 |             "lines_terminated_by": None,
 27 |             "fields_terminated_by": None,
 28 |             "map_keys_terminated_by": None,
 29 |             "collection_items_terminated_by": None,
 30 |             "external": table_data.get("external", False),
 31 |         }
 32 |     )
 33 |     return table_data
 34 | 
 35 | 
 36 | def add_additional_oracle_keys(table_data: Dict) -> Dict:
 37 |     table_data.update(
 38 |         {
 39 |             "constraints": {"uniques": None, "checks": None, "references": None},
 40 |             "storage": None,
 41 |         }
 42 |     )
 43 |     return table_data
 44 | 
 45 | 
 46 | def update_bigquery_output(table_data: Dict) -> Dict:
 47 |     if table_data.get("schema"):
 48 |         table_data["dataset"] = table_data["schema"]
 49 |         del table_data["schema"]
 50 |     return table_data
 51 | 
 52 | 
 53 | def add_additional_redshift_keys(table_data: Dict) -> Dict:
 54 |     table_data.update(
 55 |         {
 56 |             "diststyle": None,
 57 |             "distkey": None,
 58 |             "sortkey": {"type": None, "keys": []},
 59 |             "encode": None,
 60 |             "temp": False,
 61 |         }
 62 |     )
 63 |     return table_data
 64 | 
 65 | 
 66 | def add_additional_snowflake_keys(table_data: Dict) -> Dict:
 67 |     table_data.update({"clone": None, "primary_key_enforced": None})
 68 |     return table_data
 69 | 
 70 | 
 71 | def add_additional_oracle_keys_in_column(column_data: Dict) -> Dict:
 72 |     column_data.update({"encrypt": None})
 73 |     return column_data
 74 | 
 75 | 
 76 | def add_additional_snowflake_keys_in_column(column_data: Dict) -> Dict:
 77 |     return column_data
 78 | 
 79 | 
 80 | def add_additional_redshift_keys_in_column(column_data: Dict, table_data: Dict) -> Dict:
 81 |     column_data["encode"] = column_data.get("encode", None)
 82 |     if column_data.get("distkey"):
 83 |         table_data["distkey"] = column_data["name"]
 84 |         del column_data["distkey"]
 85 |     return column_data, table_data
 86 | 
 87 | 
 88 | def add_additional_mssql_keys(table_data: Dict) -> Dict:
 89 |     table_data.update(
 90 |         {
 91 |             "constraints": {"uniques": None, "checks": None, "references": None},
 92 |         }
 93 |     )
 94 |     return table_data
 95 | 
 96 | 
 97 | def clean_up_output(table_data: Dict, key_list: List[str]) -> Dict:
 98 |     for key in key_list:
 99 |         if key in table_data:
100 |             del table_data[key]
101 |     return table_data
102 | 
103 | 
104 | def populate_dialects_table_data(output_mode: str, table_data: Dict) -> Dict:
105 | 
106 |     mehtod_mapper = {
107 |         "hql": add_additional_hql_keys,
108 |         "mssql": add_additional_mssql_keys,
109 |         "mysql": add_additional_mssql_keys,
110 |         "oracle": add_additional_oracle_keys,
111 |         "redshift": add_additional_redshift_keys,
112 |         "snowflake": add_additional_snowflake_keys,
113 |     }
114 | 
115 |     method = mehtod_mapper.get(output_mode)
116 | 
117 |     if method:
118 |         table_data = method(table_data)
119 | 
120 |     return table_data
121 | 
122 | 
123 | def key_cleaning(table_data: Dict, output_mode: str) -> Dict:
124 |     if output_mode != "hql":
125 |         table_data = clean_up_output(table_data, sql_clean_up_list)
126 |     else:
127 |         table_data = clean_up_output(table_data, hql_clean_up_list)
128 |         # todo: need to figure out how workaround it normally
129 |         if "_ddl_parser_comma_only_str" == table_data.get("fields_terminated_by"):
130 |             table_data["fields_terminated_by"] = "','"
131 |     return table_data
132 | 
133 | 
134 | def process_redshift_dialect(table_data: List[Dict]) -> List[Dict]:
135 |     for column in table_data.get("columns", []):
136 |         column, table_data = add_additional_redshift_keys_in_column(column, table_data)
137 |         if table_data.get("encode"):
138 |             column["encode"] = column["encode"] or table_data.get("encode")
139 |     return table_data
140 | 
141 | 
142 | def dialects_clean_up(output_mode: str, table_data: Dict) -> Dict:
143 |     key_cleaning(table_data, output_mode)
144 |     update_mappers_for_table_properties = {"bigquery": update_bigquery_output}
145 |     update_table_prop = update_mappers_for_table_properties.get(output_mode)
146 |     if update_table_prop:
147 |         table_data = update_table_prop(table_data)
148 | 
149 |     if output_mode == "oracle":
150 |         for column in table_data.get("columns", []):
151 |             column = add_additional_oracle_keys_in_column(column)
152 |     elif output_mode == "snowflake":
153 |         # can be no columns if it is a create database or create schema
154 |         for column in table_data.get("columns", []):
155 |             column = add_additional_snowflake_keys_in_column(column)
156 | 
157 |     elif output_mode == "redshift":
158 |         table_data = process_redshift_dialect(table_data)
159 |     return table_data
160 | 


--------------------------------------------------------------------------------
/sondesh/parser.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | import re
  5 | from typing import Dict, List, Optional, Tuple, Union
  6 | 
  7 | from ply import lex, yacc
  8 | 
  9 | from sondesh.output.common import dump_data_to_file, result_format
 10 | from sondesh.utils import find_first_unpair_closed_par
 11 | 
 12 | # open comment
 13 | OP_COM = "/*"
 14 | # close comment
 15 | CL_COM = "*/"
 16 | 
 17 | IN_COM = "--"
 18 | MYSQL_COM = "#"
 19 | 
 20 | 
 21 | def set_logging_config(
 22 |         log_level: Union[str, int],
 23 |         log_file: Optional[str] = None) -> None:
 24 | 
 25 |     if log_file:
 26 |         logging.basicConfig(
 27 |             level=log_level,
 28 |             filename=log_file,
 29 |             filemode="w",
 30 |             format="%(filename)10s:%(lineno)4d:%(message)s",
 31 |         )
 32 |     else:
 33 |         logging.basicConfig(
 34 |             level=log_level,
 35 |             format="%(filename)10s:%(lineno)4d:%(message)s",
 36 |         )
 37 | 
 38 | 
 39 | class Parser:
 40 |     """
 41 |     Base class for a lexer/parser that has the rules defined as methods
 42 | 
 43 |         It could not be loaded or called without Subclass,
 44 | 
 45 |         for example: DDLParser
 46 | 
 47 |         Subclass must include tokens for parser and rules
 48 | 
 49 |     This class contains logic for lines pre-processing before passing them to lexx&yacc parser:
 50 | 
 51 |         - clean up
 52 |         - catch comments
 53 |         - catch statements like 'SET' (they are not parsed by parser)
 54 |         - etc
 55 |     """
 56 | 
 57 |     def __init__(
 58 |         self,
 59 |         content: str,
 60 |         silent: bool = True,
 61 |         debug: bool = False,
 62 |         normalize_names: bool = False,
 63 |         log_file: Optional[str] = None,
 64 |         log_level: Union[str, int] = logging.DEBUG,
 65 |     ) -> None:
 66 |         """
 67 |             content: is a file content for processing
 68 |             silent: if true - will not raise errors, just return empty output
 69 |             debug: if True - parser will produce huge tokens tree & parser.out file, normally you don't want this enable
 70 |             normalize_names: if flag is True (default 'False') then all identifiers will be returned without
 71 |                             '[', '"' and other delimeters that used in different SQL dialects to separate custom names
 72 |                             from reserverd words & statements.
 73 |                                 For example, if flag set 'True' and you pass this input:
 74 | 
 75 |                                 CREATE TABLE [dbo].[TO_Requests](
 76 |                                     [Request_ID] [int] IDENTITY(1,1) NOT NULL,
 77 |                                     [user_id] [int]
 78 | 
 79 |                             In output you will have names like 'dbo' and 'TO_Requests', not '[dbo]' and '[TO_Requests]'.
 80 |             log_file: path to file for logging
 81 |             log_level: set logging level for parser
 82 |         """
 83 |         self.tables = []
 84 |         self.silent = not debug if debug else silent
 85 |         self.data = content.encode("unicode_escape")
 86 |         self.paren_count = 0
 87 |         self.normalize_names = normalize_names
 88 |         set_logging_config(log_level, log_file)
 89 |         log = logging.getLogger()
 90 |         self.lexer = lex.lex(object=self, debug=False, debuglog=log)
 91 |         self.yacc = yacc.yacc(module=self, debug=False, debuglog=log)
 92 |         self.columns_closed = False
 93 |         self.statement = None
 94 |         self.block_comments = []
 95 |         self.comments = []
 96 | 
 97 |     def catch_comment_or_process_line(self, code_line: str) -> str:
 98 |         if self.multi_line_comment:
 99 |             self.comments.append(self.line)
100 |             if CL_COM in self.line:
101 |                 self.multi_line_comment = False
102 |             return ''
103 | 
104 |         elif not (
105 |             self.line.strip().startswith(MYSQL_COM)
106 |             or self.line.strip().startswith(IN_COM)
107 |         ):
108 |             return self.process_inline_comments(code_line)
109 |         return code_line
110 | 
111 |     def pre_process_line(self) -> Tuple[str, List]:
112 |         code_line = ""
113 |         comma_only_str = r"((\')|(' ))+(,)((\')|( '))+\B"
114 |         self.line = re.sub(comma_only_str, "_ddl_parser_comma_only_str", self.line)
115 |         code_line = self.catch_comment_or_process_line(code_line)
116 |         if self.line.startswith(OP_COM) and CL_COM not in self.line:
117 |             self.multi_line_comment = True
118 |         elif self.line.startswith(CL_COM):
119 |             self.multi_line_comment = False
120 |         self.line = code_line
121 | 
122 |     def process_in_comment(self, line: str) -> str:
123 |         if re.search(r"((\")|(\'))+(.)*(--)+(.)*((\")|(\'))+", line):
124 |             code_line = line
125 |         else:
126 |             splitted_line = line.split(IN_COM)
127 |             code_line = splitted_line[0]
128 |             self.comments.append(splitted_line[1])
129 |         return code_line
130 | 
131 |     def process_line_before_comment(self) -> str:
132 |         """ get useful codeline - remove comment """
133 |         code_line = ""
134 |         if IN_COM in self.line:
135 |             code_line = self.process_in_comment(self.line)
136 |         elif CL_COM not in self.line and OP_COM not in self.line:
137 |             code_line = self.line
138 |         return code_line
139 | 
140 |     def process_inline_comments(self, code_line: str) -> Tuple[str, List]:
141 |         """ this method сatches comments like "create table ( # some comment" - inline this statement"""
142 |         comment = None
143 |         code_line = self.process_line_before_comment()
144 |         if OP_COM in self.line:
145 |             splitted_line = self.line.split(OP_COM)
146 |             code_line += splitted_line[0]
147 |             comment = splitted_line[1]
148 |             self.block_comments.append(OP_COM)
149 |         if CL_COM in code_line and self.block_comments:
150 |             splitted_line = self.line.split(CL_COM)
151 |             self.block_comments.pop(-1)
152 |             code_line += splitted_line[1]
153 |             comment = splitted_line[0]
154 | 
155 |         if comment:
156 |             self.comments.append(comment)
157 |         return code_line
158 | 
159 |     def process_regex_input(self, data):
160 |         regex = data.split('"input.regex"')[1].split("=")[1]
161 |         index = find_first_unpair_closed_par(regex)
162 |         regex = regex[:index]
163 |         data = data.replace(regex, " lexer_state_regex ")
164 |         data = data.replace('"input.regex"', "parse_m_input_regex")
165 |         self.lexer.state = {"lexer_state_regex": regex}
166 |         return data
167 | 
168 |     def pre_process_data(self, data):
169 |         data = data.decode("utf-8")
170 |         # todo: not sure how to workaround ',' normal way
171 |         if "input.regex" in data:
172 |             data = self.process_regex_input(data)
173 | 
174 |         data = (
175 |             data.replace(",", " , ")
176 |             .replace("(", " ( ")
177 |             .replace(")", " ) ")
178 |             .replace("\\x", "\\0")
179 |             .replace("‘", "'")
180 |             .replace("’", "'")
181 |             .replace("\\u2018", "'")
182 |             .replace("\\u2019", "'")
183 |             .replace("'\\t'", "'pars_m_t'")
184 |             .replace("'\\n'", "'pars_m_n'")
185 |             .replace("\\'", "pars_m_single")
186 |             .replace("\\t", " ")
187 |         )
188 |         return data
189 | 
190 |     def process_set(self) -> None:
191 |         self.set_line = self.set_line.split()
192 |         if self.set_line[-2] == "=":
193 |             name = self.set_line[1]
194 |         else:
195 |             name = self.set_line[-2]
196 |         value = self.set_line[-1].replace(";", "")
197 |         self.tables.append({"name": name, "value": value})
198 | 
199 |     def parse_set_statement(self):
200 |         if re.match(r"SET ", self.line.upper()):
201 |             self.set_was_in_line = True
202 |             if not self.set_line:
203 |                 self.set_line = self.line
204 |             else:
205 |                 self.process_set()
206 |                 self.set_line = self.line
207 |         elif (self.set_line and len(self.set_line.split()) == 3) or (
208 |             self.set_line and self.set_was_in_line
209 |         ):
210 |             self.process_set()
211 |             self.set_line = None
212 |             self.set_was_in_line = False
213 | 
214 |     def check_new_statement_start(self, line: str) -> bool:
215 |         self.new_statement = False
216 |         if self.statement and self.statement.count("(") == self.statement.count(")"):
217 |             new_statements_tokens = ["ALTER ", "CREATE ", "DROP ", "SET "]
218 |             for key in new_statements_tokens:
219 |                 if line.upper().startswith(key):
220 |                     self.new_statement = True
221 |         return self.new_statement
222 | 
223 |     def check_line_on_skip_words(self) -> bool:
224 |         skip_regex = r"^(GO|USE|INSERT)\b"
225 | 
226 |         self.skip = False
227 | 
228 |         if re.match(skip_regex, self.line.upper()):
229 |             self.skip = True
230 |         return self.skip
231 | 
232 |     def add_line_to_statement(self) -> str:
233 | 
234 |         if (
235 |             self.line
236 |             and not self.skip
237 |             and not self.set_was_in_line
238 |             and not self.new_statement
239 |         ):
240 |             if self.statement is None:
241 |                 self.statement = self.line
242 |             else:
243 |                 self.statement += f" {self.line}"
244 | 
245 |     def parse_data(self) -> List[Dict]:
246 |         self.tables: List[Dict] = []
247 |         data = self.pre_process_data(self.data)
248 |         lines = data.replace("\\t", "").split("\\n")
249 | 
250 |         self.set_line: Optional[str] = None
251 | 
252 |         self.set_was_in_line: bool = False
253 | 
254 |         self.multi_line_comment = False
255 | 
256 |         for num, self.line in enumerate(lines):
257 |             self.process_line(num != len(lines) - 1)
258 |         if self.comments:
259 |             self.tables.append({"comments": self.comments})
260 |         return self.tables
261 | 
262 |     def process_line(
263 |         self,
264 |         last_line: bool,
265 |     ) -> Tuple[Optional[str], bool]:
266 |         self.pre_process_line()
267 | 
268 |         self.line = self.line.strip().replace("\n", "").replace("\t", "")
269 |         self.skip = self.check_line_on_skip_words()
270 | 
271 |         self.parse_set_statement()
272 |         # to avoid issues when comma or parath are glued to column name
273 |         self.check_new_statement_start(self.line)
274 | 
275 |         final_line = self.line.endswith(";") and not self.set_was_in_line
276 |         self.add_line_to_statement()
277 | 
278 |         if (final_line or self.new_statement) and self.statement:
279 |             # end of sql operation, remove ; from end of line
280 |             self.statement = self.statement[:-1]
281 |         elif last_line and not self.skip:
282 |             # continue combine lines in one massive
283 |             return
284 | 
285 |         self.set_default_flags_in_lexer()
286 | 
287 |         self.process_statement()
288 | 
289 |     def process_statement(self) -> None:
290 | 
291 |         if not self.set_line and self.statement:
292 |             self.parse_statement()
293 |         if self.new_statement:
294 |             self.statement = self.line
295 |         else:
296 |             self.statement = None
297 | 
298 |     def parse_statement(self) -> None:
299 | 
300 |         _parse_result = yacc.parse(self.statement)
301 |         if _parse_result:
302 |             self.tables.append(_parse_result)
303 | 
304 |     def set_default_flags_in_lexer(self) -> None:
305 |         attrs = [
306 |             "is_table",
307 |             "sequence",
308 |             "last_token",
309 |             "columns_def",
310 |             "after_columns",
311 |             "check",
312 |             "is_table",
313 |             "last_par",
314 |             "lp_open",
315 |             "is_alter",
316 |             "is_like",
317 |         ]
318 |         for attr in attrs:
319 |             setattr(self.lexer, attr, False)
320 |         self.lexer.lt_open = 0
321 | 
322 |     def run(
323 |         self,
324 |         *,
325 |         dump: bool = False,
326 |         dump_path="schemas",
327 |         file_path: Optional[str] = None,
328 |         output_mode: str = "sql",
329 |         group_by_type: bool = False,
330 |         json_dump=False,
331 |     ) -> List[Dict]:
332 |         """
333 |         dump: provide 'True' if you need to dump output in file
334 |         dump_path: folder where you want to store result dump files
335 |         file_path: pass full path to ddl file if you want to use this
336 |             file name as name for the target output file
337 |         output_mode: change output mode to get information relative to specific dialect,
338 |             for example, in output_mode='hql' you will see also in self.tables such information as
339 |             'external', 'stored_as', etc. Possible variants: ["mssql", "mysql", "oracle", "hql", "sql", "redshift"]
340 |         group_by_type: if you set True, output will be formed as Dict with keys ['self.tables',
341 |                 'sequences', 'types', 'domains']
342 |             and each dict will contain list of parsed entities. Without it output is a List with Dicts where each
343 |             Dict == one entity from ddl - one table or sequence or type.
344 |         """
345 |         self.tables = self.parse_data()
346 |         self.tables = result_format(self.tables, output_mode, group_by_type)
347 |         if dump:
348 |             if file_path:
349 |                 # if we run parse from one file - save same way to one file
350 |                 dump_data_to_file(
351 |                     os.path.basename(file_path).split(".")[0], dump_path, self.tables
352 |                 )
353 |             else:
354 |                 for table in self.tables:
355 |                     dump_data_to_file(table["table_name"], dump_path, table)
356 |         if json_dump:
357 |             self.tables = json.dumps(self.tables)
358 |         return self.tables
359 | 


--------------------------------------------------------------------------------
/sondesh/tokens.py:
--------------------------------------------------------------------------------
  1 | # statements that used at the start of defenition or in statements without columns
  2 | defenition_statements = {
  3 |     "DROP": "DROP",
  4 |     "CREATE": "CREATE",
  5 |     "TABLE": "TABLE",
  6 |     "DATABASE": "DATABASE",
  7 |     "SCHEMA": "SCHEMA",
  8 |     "ALTER": "ALTER",
  9 |     "TYPE": "TYPE",
 10 |     "DOMAIN": "DOMAIN",
 11 |     "REPLACE": "REPLACE",
 12 |     "OR": "OR",
 13 |     "CLUSTERED": "CLUSTERED",
 14 |     "SEQUENCE": "SEQUENCE",
 15 |     "TABLESPACE": "TABLESPACE",
 16 | }
 17 | common_statements = {
 18 |     "INDEX": "INDEX",
 19 |     "REFERENCES": "REFERENCES",
 20 |     "KEY": "KEY",
 21 |     "ADD": "ADD",
 22 |     "AS": "AS",
 23 |     "CLONE": "CLONE",
 24 |     "DEFERRABLE": "DEFERRABLE",
 25 |     "INITIALLY": "INITIALLY",
 26 |     "IF": "IF",
 27 |     "NOT": "NOT",
 28 |     "EXISTS": "EXISTS",
 29 |     "ON": "ON",
 30 |     "FOR": "FOR",
 31 |     "ENCRYPT": "ENCRYPT",
 32 |     "SALT": "SALT",
 33 |     "NO": "NO",
 34 |     "USING": "USING",
 35 |     # bigquery
 36 |     "OPTIONS": "OPTIONS",
 37 | }
 38 | 
 39 | columns_defenition = {
 40 |     "DELETE": "DELETE",
 41 |     "UPDATE": "UPDATE",
 42 |     "NULL": "NULL",
 43 |     "ARRAY": "ARRAY",
 44 |     ",": "COMMA",
 45 |     "DEFAULT": "DEFAULT",
 46 |     "COLLATE": "COLLATE",
 47 |     "ENFORCED": "ENFORCED",
 48 |     "ENCODE": "ENCODE",
 49 |     "GENERATED": "GENERATED",
 50 |     "COMMENT": "COMMENT"
 51 | }
 52 | first_liners = {
 53 |     "LIKE": "LIKE",
 54 |     "CONSTRAINT": "CONSTRAINT",
 55 |     "FOREIGN": "FOREIGN",
 56 |     "PRIMARY": "PRIMARY",
 57 |     "UNIQUE": "UNIQUE",
 58 |     "CHECK": "CHECK",
 59 |     "WITH": "WITH",
 60 | }
 61 | 
 62 | common_statements.update(first_liners)
 63 | defenition_statements.update(common_statements)
 64 | after_columns_tokens = {
 65 |     "PARTITIONED": "PARTITIONED",
 66 |     "PARTITION": "PARTITION",
 67 |     "BY": "BY",
 68 |     # hql
 69 |     "INTO": "INTO",
 70 |     "STORED": "STORED",
 71 |     "LOCATION": "LOCATION",
 72 |     "ROW": "ROW",
 73 |     "FORMAT": "FORMAT",
 74 |     "TERMINATED": "TERMINATED",
 75 |     "COLLECTION": "COLLECTION",
 76 |     "ITEMS": "ITEMS",
 77 |     "MAP": "MAP",
 78 |     "KEYS": "KEYS",
 79 |     "SERDE": "SERDE",
 80 |     "CLUSTER": "CLUSTER",
 81 |     "SERDEPROPERTIES": "SERDEPROPERTIES",
 82 |     "TBLPROPERTIES": "TBLPROPERTIES",
 83 |     "USING": "USING",
 84 |     "SKEWED": "SKEWED",
 85 |     # oracle
 86 |     "STORAGE": "STORAGE",
 87 |     "TABLESPACE": "TABLESPACE",
 88 |     # mssql
 89 |     "TEXTIMAGE_ON": "TEXTIMAGE_ON",
 90 | }
 91 | sequence_reserved = {
 92 |     "INCREMENT": "INCREMENT",
 93 |     "START": "START",
 94 |     "MINVALUE": "MINVALUE",
 95 |     "MAXVALUE": "MAXVALUE",
 96 |     "CACHE": "CACHE",
 97 |     "NO": "NO",
 98 | }
 99 | 
100 | 
101 | tokens = tuple(
102 |     set(
103 |         ["ID", "DOT", "STRING", "DQ_STRING", "LP", "RP", "LT", "RT", "COMMAT", "AUTOINCREMENT"]
104 |         + list(defenition_statements.values())
105 |         + list(common_statements.values())
106 |         + list(columns_defenition.values())
107 |         + list(sequence_reserved.values())
108 |         + list(after_columns_tokens.values())
109 |     )
110 | )
111 | 
112 | symbol_tokens = {
113 |     ")": "RP",
114 |     "(": "LP",
115 | }
116 | 
117 | symbol_tokens_no_check = {"<": "LT", ">": "RT"}
118 | 


--------------------------------------------------------------------------------
/sondesh/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | 
 4 | def remove_par(p_list: List[str]) -> List[str]:
 5 |     remove_list = ["(", ")"]
 6 |     for symbol in remove_list:
 7 |         while symbol in p_list:
 8 |             p_list.remove(symbol)
 9 |     return p_list
10 | 
11 | 
12 | spec_mapper = {
13 |     "'pars_m_t'": "'\t'",
14 |     "'pars_m_n'": "'\n'",
15 |     "'pars_m_dq'": '"',
16 |     "pars_m_single": "'",
17 | }
18 | 
19 | 
20 | def check_spec(value: str) -> str:
21 |     replace_value = spec_mapper.get(value)
22 |     if not replace_value:
23 |         for item in spec_mapper:
24 |             if item in value:
25 |                 replace_value = value.replace(item, spec_mapper[item])
26 |                 break
27 |         else:
28 |             replace_value = value
29 |     return replace_value
30 | 
31 | 
32 | def find_first_unpair_closed_par(str_: str) -> int:
33 |     stack = []
34 |     n = -1
35 |     for i in str_:
36 |         n += 1
37 |         if i == ")":
38 |             if not stack:
39 |                 return n
40 |             else:
41 |                 stack.pop(-1)
42 |         elif i == "(":
43 |             stack.append(i)
44 | 


--------------------------------------------------------------------------------
/test/read_from_file.py:
--------------------------------------------------------------------------------
1 | import pprint
2 | from sondesh.ddl_parser import parse_from_file
3 | 
4 | result = parse_from_file('sql_files/one.sql')
5 | pprint.pprint(result)
6 | 


--------------------------------------------------------------------------------
/test/sql_files/one.sql:
--------------------------------------------------------------------------------
 1 | create table sales(
 2 |             salesid integer not null,
 3 |             listid integer not null,
 4 |             sellerid integer not null,
 5 |             buyerid integer not null encode auto,
 6 |             eventid integer not null encode mostly16,
 7 |             dateid smallint,
 8 |             qtysold smallint not null encode mostly8,
 9 |             pricepaid decimal(8,2) encode delta32k,
10 |             commission decimal(8,2) encode delta32k,
11 |             saletime timestamp,
12 |             test_col varchar(160),
13 |             test_col2 varchar(130),
14 |             primary key(salesid),
15 |             foreign key(listid) references listing(listid),
16 |             foreign key(sellerid) references users(userid),
17 |             foreign key(buyerid) references users(userid),
18 |             foreign key(dateid) references date(dateid)
19 |             )
20 |           diststyle auto1
21 |           compound sortkey(salesid,sellerid);


--------------------------------------------------------------------------------
/test/sql_files/test_sql.sql:
--------------------------------------------------------------------------------
 1 | create table sales(
 2 |             salesid integer not null,
 3 |             listid integer not null,
 4 |             sellerid integer not null,
 5 |             buyerid integer not null encode auto,
 6 |             eventid integer not null encode mostly16,
 7 |             dateid smallint,
 8 |             qtysold smallint not null encode mostly8,
 9 |             pricepaid decimal(8,2) encode delta32k,
10 |             commission decimal(8,2) encode delta32k,
11 |             saletime timestamp without time zone encode az64,
12 |             test_col varchar(100),
13 |             primary key(salesid),
14 |             foreign key(listid) references listing(listid),
15 |             foreign key(sellerid) references users(userid),
16 |             foreign key(buyerid) references users(userid),
17 |             foreign key(dateid) references date(dateid)
18 |             )
19 |           diststyle auto1
20 |           compound sortkey(salesid,sellerid);


--------------------------------------------------------------------------------
/test/sql_files/two.sql:
--------------------------------------------------------------------------------
 1 | create table sales(
 2 |             salesid integer not null,
 3 |             listid integer not null,
 4 |             sellerid varchar not null,
 5 |             buyerid integer not null encode auto,
 6 |             eventid integer not null encode mostly16,
 7 |             dateid smallint not null,
 8 |             qtysold smallint not null encode mostly8,
 9 |             pricepaid decimal(8,2) encode delta32k,
10 |             commission decimal(8,2) encode delta32k,
11 |             saletime timestamp without time zone encode az64,
12 |             test_col varchar(120),
13 |             primary key(salesid),
14 |             foreign key(listid) references listing(listid),
15 |             foreign key(sellerid) references users(userid),
16 |             foreign key(buyerid) references users(userid),
17 |             foreign key(dateid) references date(dateid)
18 |             )
19 |           diststyle auto
20 |           compound sortkey(listid,sellerid);


--------------------------------------------------------------------------------
/test/test_oracle.py:
--------------------------------------------------------------------------------
  1 | import pprint
  2 | 
  3 | from sondesh.ddl_parser import parse_the_ddl
  4 | 
  5 | def test_oracle_ddl():
  6 | 
  7 |     ddl = '''
  8 |         CREATE TABLE employee (
  9 |             employee_id number(100),
 10 |             first_name VARCHAR2(128) NOT NULL,
 11 |             last_name VARCHAR2(128) NOT NULL,
 12 |             salary NUMBER(6) ENCRYPT USING 'SHA256',
 13 |             emp_photo Blob,
 14 |             dept_id NUMBER(10),
 15 |             car_vin_no NUMBER(*,10),            
 16 |             include_exclude_ind CHAR(1) DEFAULT 'Y',
 17 |             TEXT2_ NVARCHAR2(2000),
 18 |             CONSTRAINT check_employee_name CHECK (first_name = upper(first_name)),
 19 |             CONSTRAINT dept_fk FOREIGN KEY(dept_id) REFERENCES department(dept_id),
 20 |             CONSTRAINT employees_pk PRIMARY KEY (employee_id)
 21 |         )  
 22 |         PARTITION BY REFERENCE(dept_fk)
 23 |         Storage ( Initial 5m Next 5m Maxextents Unlimited )
 24 |         ;
 25 |     '''
 26 | 
 27 |     result = parse_the_ddl(ddl).run(group_by_type=True)
 28 |     pprint.pprint(result)
 29 | 
 30 |     expected = '''
 31 |                     {'ddl_properties': [],
 32 |                  'domains': [],
 33 |                  'schemas': [],
 34 |                  'sequences': [],
 35 |                  'tables': [{'alter': {},
 36 |                              'checks': [{'constraint_name': 'check_employee_name',
 37 |                                          'statement': 'first_name = upper(first_name)'}],
 38 |                              'columns': [{'check': None,
 39 |                                           'default': None,
 40 |                                           'name': 'employee_id',
 41 |                                           'nullable': False,
 42 |                                           'references': None,
 43 |                                           'size': 100,
 44 |                                           'type': 'number',
 45 |                                           'unique': False},
 46 |                                          {'check': None,
 47 |                                           'default': None,
 48 |                                           'name': 'first_name',
 49 |                                           'nullable': False,
 50 |                                           'references': None,
 51 |                                           'size': 128,
 52 |                                           'type': 'VARCHAR2',
 53 |                                           'unique': False},
 54 |                                          {'check': None,
 55 |                                           'default': None,
 56 |                                           'name': 'last_name',
 57 |                                           'nullable': False,
 58 |                                           'references': None,
 59 |                                           'size': 128,
 60 |                                           'type': 'VARCHAR2',
 61 |                                           'unique': False},
 62 |                                          {'check': None,
 63 |                                           'default': None,
 64 |                                           'encrypt': {'encryption_algorithm': "'SHA256'",
 65 |                                                       'integrity_algorithm': 'SHA-1',
 66 |                                                       'salt': True},
 67 |                                           'name': 'salary',
 68 |                                           'nullable': True,
 69 |                                           'references': None,
 70 |                                           'size': 6,
 71 |                                           'type': 'NUMBER',
 72 |                                           'unique': False},
 73 |                                          {'check': None,
 74 |                                           'default': None,
 75 |                                           'name': 'emp_photo',
 76 |                                           'nullable': True,
 77 |                                           'references': None,
 78 |                                           'size': None,
 79 |                                           'type': 'Blob',
 80 |                                           'unique': False},
 81 |                                          {'check': None,
 82 |                                           'default': None,
 83 |                                           'name': 'dept_id',
 84 |                                           'nullable': True,
 85 |                                           'references': None,
 86 |                                           'size': 10,
 87 |                                           'type': 'NUMBER',
 88 |                                           'unique': False},
 89 |                                          {'check': None,
 90 |                                           'default': None,
 91 |                                           'name': 'car_vin_no',
 92 |                                           'nullable': True,
 93 |                                           'references': None,
 94 |                                           'size': ('*', 10),
 95 |                                           'type': 'NUMBER',
 96 |                                           'unique': False},
 97 |                                          {'check': None,
 98 |                                           'default': "'Y'",
 99 |                                           'name': 'include_exclude_ind',
100 |                                           'nullable': True,
101 |                                           'references': None,
102 |                                           'size': 1,
103 |                                           'type': 'CHAR',
104 |                                           'unique': False},
105 |                                          {'check': None,
106 |                                           'default': None,
107 |                                           'name': 'TEXT2_',
108 |                                           'nullable': True,
109 |                                           'references': None,
110 |                                           'size': 2000,
111 |                                           'type': 'NVARCHAR2',
112 |                                           'unique': False}],
113 |                              'constraints': {'checks': [{'constraint_name': 'check_employee_name',
114 |                                                          'statement': 'first_name = '
115 |                                                                       'upper(first_name)'}],
116 |                                              'primary_keys': [{'columns': ['employee_id'],
117 |                                                                'constraint_name': 'employees_pk'}],
118 |                                              'references': [{'columns': ['dept_id'],
119 |                                                              'constraint_name': 'dept_fk',
120 |                                                              'deferrable_initially': None,
121 |                                                              'on_delete': None,
122 |                                                              'on_update': None,
123 |                                                              'schema': None,
124 |                                                              'table': 'department'}]},
125 |                              'index': [],
126 |                              'partition_by': {'columns': ['dept_fk'], 'type': 'REFERENCE'},
127 |                              'partitioned_by': [],
128 |                              'primary_key': ['employee_id'],
129 |                              'schema': None,
130 |                              'storage': {'initial': '5m',
131 |                                          'maxextents': 'Unlimited',
132 |                                          'next': '5m'},
133 |                              'table_name': 'employee',
134 |                              'tablespace': None}],
135 |                  'types': []}
136 |     '''
137 |     #assert expected == result
138 |     pprint.pprint(result['tables'][0]['checks'])
139 | 
140 | test_oracle_ddl()


--------------------------------------------------------------------------------
/test/test_redshift.py:
--------------------------------------------------------------------------------
 1 | import pprint
 2 | 
 3 | from sondesh.ddl_parser import parse_the_ddl
 4 | 
 5 | def test_redshift():
 6 | 
 7 |     ddl = '''
 8 |         create table sales(
 9 |             salesid integer not null,
10 |             listid integer not null,
11 |             sellerid integer not null,
12 |             buyerid integer not null encode auto,
13 |             eventid integer not null encode mostly16,
14 |             dateid smallint not null,
15 |             qtysold smallint not null encode mostly8,
16 |             pricepaid decimal(8,2) encode delta32k,
17 |             commission decimal(8,2) encode delta32k,
18 |             saletime timestamp,
19 |             primary key(salesid),
20 |             foreign key(listid) references listing(listid),
21 |             foreign key(sellerid) references users(userid),
22 |             foreign key(buyerid) references users(userid),
23 |             foreign key(dateid) references date(dateid)
24 |             )
25 |             distkey(listid)
26 |             compound sortkey(listid,sellerid)
27 |     '''
28 |     result = parse_the_ddl(ddl).run(group_by_type=True, output_mode="redshift")
29 |     pprint.pprint(result)
30 | 
31 | test_redshift()


--------------------------------------------------------------------------------