DDLj

├── APP ├── App.JPG ├── DDLj_App.py └── templates │ └── index.html ├── DDLj ├── DDLj.py └── __init__.py ├── LICENSE.txt ├── README.md ├── setup.py └── test ├── GenDDL.sql ├── GenDDLGlossary.sql ├── GlossaryEmptyTestFile.csv ├── GlossaryTestFile.csv ├── ReadMe.txt.txt └── TestJsonSchema.json /APP/App.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepstartup/jsonutils/987a0a407d0212e8374e5e8ce35088fa5a6b05cf/APP/App.JPG -------------------------------------------------------------------------------- /APP/DDLj_App.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | import cgi 4 | from flask import Flask, render_template, request 5 | from flask import render_template 6 | from flask import Flask 7 | from flask import Response 8 | app = Flask(__name__) 9 | form = cgi.FieldStorage() 10 | from DDLj import genddl 11 | @app.route('/') 12 | def nthome(): 13 | return render_template('index.html') 14 | @app.route('/getfile', methods=['POST', 'GET']) 15 | def getfile(): 16 | if request.method == 'POST': 17 | jsonschema=request.form['Schempath'] 18 | gloss=request.form['Gloss'] 19 | domain=request.form['domain'] 20 | sqlpath=request.form['SQL'] 21 | try: 22 | f=genddl(jsonschema,domain,gloss,sqlpath) 23 | except Exception as e: 24 | print(e) 25 | return Response(f,mimetype='application/json',headers={'Content-Disposition':'attachment;filename='+domain+'.sql'}) 26 | if __name__ == '__main__': 27 | app.run(debug=True) 28 | -------------------------------------------------------------------------------- /APP/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | DDLj 6 | 7 | 8 |

9 | 10 | 11 | 12 | 15 | 18 | 19 | 20 | 23 | 35 | 36 | 37 | 40 | 43 | 44 | 45 | 48 | 51 | 52 | 53 |

13 \| Input JSON Schema Path 14 \|	16 \| 17 \|
21 \| DataBase Name 22 \|	24 \| 34 \|
38 \| Input Glossary Path 39 \|	41 \| 42 \|
46 \| Output SQL file Path 47 \|	49 \| 50 \|

54 | 55 |

56 |

57 | *Glossay file should be in .csv format. 58 | 59 | 60 | -------------------------------------------------------------------------------- /DDLj/DDLj.py: -------------------------------------------------------------------------------- 1 | # This Source Code Form is subject to the terms of the MIT Public 2 | # License, v. 2.0. If a copy of the MIT was not distributed with this 3 | # file, You can obtain one at MIT License :: OSI Approved :: MIT License 4 | #Author : Arghadeep Chaudhury,Siddhartha Bhattacharya 5 | #author_email"siddhbhatt@gmail.com,arghadeep.chaudhury@gmail.com" 6 | #Created on Thu Jun 27 15:55:51 2019 7 | # -*- coding: utf-8 -*- 8 | 9 | import json 10 | import pandas as pd 11 | import re 12 | from flatten_json import flatten,unflatten 13 | temp=[] 14 | DDF_list=[] 15 | ColList=[] 16 | Domain_Com=[] 17 | MinMaxList=[] 18 | colBuild='' 19 | sqlstring='' 20 | colName='' 21 | ArrayList=[] 22 | Empty_Df=[] 23 | 24 | #L2P_builder function convert the logical name to physical name using glossary words 25 | #separated by '_', if No glossary input has been provided, this has return as is logical name 26 | def L2P_builder(char,Glossarypath): 27 | if Glossarypath==None or Glossarypath=='': 28 | try: 29 | df=pd.DataFrame(columns=['Name','Abbreviation']) 30 | except: 31 | print('No Glossary') 32 | else: 33 | try: 34 | df=pd.read_csv(Glossarypath) 35 | except: 36 | df=pd.DataFrame(columns=['Name','Abbreviation']) 37 | q=0 38 | colval='' 39 | splitted = re.sub('(?!^)([A-Z][a-z]+)', r' \1', char).split() 40 | for val in splitted: 41 | q=q+1 42 | if q==len(splitted): 43 | if val[len(val)-2:len(val)]=='ID': 44 | try: 45 | val=df.loc[df.Name == val[0:len(val)-2], 'Abbreviation'].item() 46 | val=val+'_ID' 47 | colval+=val 48 | except: 49 | val=val[0:len(val)-2] 50 | val=val+'_ID' 51 | colval+=val 52 | else: 53 | try: 54 | val=df.loc[df.Name == val, 'Abbreviation'].item() 55 | colval+=val 56 | except: 57 | colval+=val 58 | else: 59 | try: 60 | val=df.loc[df.Name == val, 'Abbreviation'].item() 61 | val=val+'_' 62 | colval+=val 63 | except: 64 | val=val+'_' 65 | colval+=val 66 | return colval 67 | #StringEnds: return the last character value from a String with camelcase 68 | def StringEnds(char): 69 | q=0 70 | colval='' 71 | splitted = re.sub('(?!^)([A-Z][a-z]+)', r' \1', char).split() 72 | for val in splitted: 73 | q=q+1 74 | if q==len(splitted): 75 | return val 76 | #Recursive function to generate the DDL string array using the flatten json 77 | def sqlStrinNthLevel(json_schema_flat,n,ValStr,Glossarypath): 78 | sqlstring='' 79 | ColList=[] 80 | for keys,vals in json_schema_flat.items(): 81 | try: 82 | arrayField=keys.split('_')[1] 83 | except: 84 | arrayField=None 85 | if ValStr==None: 86 | if (keys.endswith('_type') or keys.endswith('_format')) and keys.count('_')==n: 87 | if vals=='string' and keys.endswith('_type') and keys.count('_')==n: 88 | try: 89 | maxLength=eval("json_schema_flat['"+keys[:len(keys)-5]+"_maxLength"+"']") 90 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Varchar2("+str(maxLength)+")," 91 | ColList.append(sqlstring) 92 | except Exception as e: 93 | if keys[:len(keys)-5].endswith('Date') and keys.count('_')==n: 94 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Date," 95 | ColList.append(sqlstring) 96 | elif (keys[:len(keys)-5].endswith('Timestamp') or keys[:len(keys)-5].endswith('DateTime')) and keys.count('_')==n: 97 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Timestamp(6)," 98 | ColList.append(sqlstring) 99 | else: 100 | e 101 | elif vals=='number' and keys.count('_')==n: 102 | Endword=StringEnds(keys.split("_")[n-1]) 103 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Number(38,10)," 104 | ColList.append(sqlstring) 105 | elif vals=='date' and keys.endswith('_format') and keys.count('_')==n: 106 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Date," 107 | ColList.append(sqlstring) 108 | elif vals=='date-time' and keys.endswith('_format') and keys.count('_')==n: 109 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Timestamp(6)," 110 | ColList.append(sqlstring) 111 | elif vals=='boolean' and keys.endswith('_format') and keys.count('_')==n: 112 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Char(1)," 113 | ColList.append(sqlstring) 114 | elif vals=='object' and keys.count('_')==n: 115 | sqlStrinNthLevel(json_schema_flat,n+2,None,Glossarypath) 116 | elif ValStr!=None and arrayField==ValStr: 117 | if keys.endswith('_type') and arrayField==ValStr and keys.count('_')==n: 118 | if vals=='string' and keys.count('_')==n: 119 | try: 120 | maxLength=eval("json_schema_flat['"+keys[:len(keys)-5]+"_maxLength"+"']") 121 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Varchar2("+str(maxLength)+")," 122 | ColList.append(sqlstring) 123 | except Exception as e: 124 | if keys[:len(keys)-5].endswith('Date') and keys.count('_')==n: 125 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Date," 126 | ColList.append(sqlstring) 127 | elif (keys[:len(keys)-5].endswith('Timestamp') or keys[:len(keys)-5].endswith('DateTime')) and keys.count('_')==n: 128 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Timestamp(6)," 129 | ColList.append(sqlstring) 130 | else: 131 | e 132 | elif vals=='number' and arrayField==ValStr and keys.count('_')==n: 133 | Endword=StringEnds(keys.split("_")[n-1]) 134 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Number(38,10)," 135 | ColList.append(sqlstring) 136 | elif vals=='date' and keys.endswith('_format') and keys.count('_')==n: 137 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Date," 138 | ColList.append(sqlstring) 139 | elif vals=='date-time' and keys.endswith('_format') and keys.count('_')==n: 140 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Timestamp(6)," 141 | ColList.append(sqlstring) 142 | elif vals=='boolean' and keys.endswith('_format') and keys.count('_')==n: 143 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Char(1)," 144 | ColList.append(sqlstring) 145 | elif vals=='object' and arrayField==ValStr and keys.count('_')==n: 146 | sqlStrinNthLevel(sqlStrinNthLevel,n+2,ValStr,Glossarypath) 147 | return ColList 148 | 149 | #Writing to the outputfile path 150 | def callMain(file_path,Database,Glossarypath,outputfilePath): 151 | full_sql_str='' 152 | with open(file_path) as json_data: 153 | json_schema=json.load(json_data) 154 | json_schema_flat = flatten(json_schema) 155 | if Glossarypath==None or Glossarypath=='': 156 | try: 157 | df=pd.DataFrame(columns=['Name','Abbreviation']) 158 | except: 159 | print('no glossary') 160 | else: 161 | try: 162 | df=pd.read_csv(Glossarypath) 163 | except: 164 | df=pd.DataFrame(columns=['Name','Abbreviation']) 165 | try: 166 | FileOpen=open(outputfilePath, "w+") 167 | f=sqlStrinNthLevel(json_schema_flat,2,None,Glossarypath) 168 | f = list(dict.fromkeys(f)) 169 | for flat_x,flat_y in json_schema_flat.items(): 170 | if flat_x=='title': 171 | Table_Name=flat_y 172 | DDLout='Create Table '+Table_Name+' (' 173 | for ddlTxt in f: 174 | #Default Database is Oracle,Supported for below as well 175 | if Database in ('PostgreSQL','MYSQL','DB2','MariaDB'): 176 | ddlTxt=ddlTxt.replace(' Varchar2',' Varchar') 177 | ddlTxt=ddlTxt.replace(' Number(38,10)',' NUMERIC(38,10)') 178 | ddlTxt=ddlTxt.replace(' Timestamp(6)',' Timestamp') 179 | ddlTxt=ddlTxt.replace(' Char(1)',' boolean') 180 | DDLout=DDLout+ddlTxt+'\n' 181 | DDLout=DDLout[:len(DDLout)-2]+');' 182 | FileOpen.write(DDLout) 183 | full_sql_str=DDLout 184 | for p,q in json_schema_flat.items(): 185 | if q=='array': 186 | ArrayList.append(p.split('_')[1]) 187 | for Array_DDLs in ArrayList: 188 | DDLoutChild='\n\n' 189 | First=sqlStrinNthLevel(json_schema_flat,6,Array_DDLs,Glossarypath) 190 | First = list(dict.fromkeys(First)) 191 | DDLoutChild='\nCreate Table '+Array_DDLs[:len(Array_DDLs)-5]+' (' 192 | for listFst in First: 193 | DDLoutChild=DDLoutChild+listFst+'\n' 194 | DDLoutChild=DDLoutChild[:len(DDLoutChild)-2]+');' 195 | FileOpen.write(DDLoutChild) 196 | full_sql_str=full_sql_str+DDLoutChild 197 | FileOpen.close() 198 | return full_sql_str 199 | except: 200 | df=pd.DataFrame(columns=['Name','Abbreviation']) 201 | def genddl(file_path,Database,Glossarypath,outputfilePath): 202 | try: 203 | with open(file_path) as json_data: 204 | json_schema=json.load(json_data) 205 | json_schema_flat = flatten(json_schema) 206 | x=callMain(file_path,Database,Glossarypath,outputfilePath) 207 | return x 208 | except Exception as e: 209 | print(e) 210 | if __name__ == '__main__': 211 | genddl(file_path,Database,Glossarypath,outputfilePath) 212 | -------------------------------------------------------------------------------- /DDLj/__init__.py: -------------------------------------------------------------------------------- 1 | from .DDLj import * -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) [2019] [Arghadeep Chaudhury,Siddhartha Bhattacharya,{DDLJ}] 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JSON Utils Package (DDLj) 2 | This is a python package having multiple utilities for handling JSON Files. 3 | 4 | Module1 - DDLj : Converts JSON Schema Files into ANSI SQL DDLs 5 | Supports foll databases: 6 | A.PostgreSQL 7 | B.MYSQL 8 | C.DB2 9 | D.MariaDB 10 | E.Oracle 11 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 12 | Usage: 13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 14 | pip install DDLJ 15 | 16 | python 17 | 18 | >>> from DDLj import genddl 19 | 20 | >>> genddl(*param1,param2,*param3,*param4) 21 | 22 | Where 23 | 24 | param1= JSON Schema File 25 | 26 | param2=Database (Default Oracle) 27 | 28 | Param3= Glossary file 29 | 30 | Param4= DDL output script 31 | 32 | Note : * indicates mandatory parameters 33 | 34 | It also includes a Flask module for front-end if used as a standalone tool. Refer to App directory. 35 | ******************************************* 36 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 37 | Example: 38 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 39 | Input JSON schema as: 40 | { 41 | "schema": "Http://Json-Schema.Org/Draft-07/Schema#", 42 | "type": "object", 43 | "title": "TableNameTest", 44 | "additionalProperties": false, 45 | "properties": { 46 | "ColumnNameOne": { 47 | "type": "string", 48 | "maxLength": 10 49 | }, 50 | "ColumnNameTwo": { 51 | "type": "string", 52 | "format": "date-time" 53 | }, 54 | "ColumnNameThree": { 55 | "type": "string", 56 | "maxLength": 200 57 | }, 58 | "ColumnNameFour": { 59 | "type": "string", 60 | "maxLength": 300 61 | }, 62 | "ColumnNameFive": { 63 | "type": "string", 64 | "format": "date" 65 | }, 66 | "ColumnNameSix": { 67 | "type": "number" 68 | }, 69 | "ColumnNameSeven": { 70 | "type": "number" 71 | }, 72 | "ColumnNameEight": { 73 | "type": "string", 74 | "maxLength": 1000 75 | }, 76 | "ColumnNameNine": { 77 | "type": "string", 78 | "maxLength": 2000 79 | }, 80 | "ColumnNameTen": { 81 | "type": "number" 82 | } 83 | } 84 | } 85 | 86 | Code Usage: 87 | >>> from DDLj import genddl 88 | >>> genddl('TestJsonSchema.json','Oracle','GlossaryTestFile.csv','GenDDLGlossary.sql') 89 | 90 | Output: 91 | Create Table TableNameTest (COL_NAM_One Varchar2(10), 92 | COL_NAM_Two Timestamp(6), 93 | COL_NAM_Three Varchar2(200), 94 | COL_NAM_Four Varchar2(300), 95 | COL_NAM_Five Date, 96 | COL_NAM_Six Number(38,10), 97 | COL_NAM_Seven Number(38,10), 98 | COL_NAM_Eight Varchar2(1000), 99 | COL_NAM_Nine Varchar2(2000), 100 | COL_NAM_Ten Number(38,10)); 101 | 102 | Please see the Test Folder for JSON schema, glossary file and output. 103 | **************************** 104 | 105 | Note: Other modules to come soon. 106 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup, find_packages 4 | 5 | with open("README.md", "r") as fh: 6 | long_description = fh.read() 7 | 8 | setup( 9 | name="DDLJ", 10 | version="0.0.15", 11 | author="Arghadeep Chaudhury,Siddhartha Bhattacharya", 12 | author_email="siddhbhatt@gmail.com,arghadeep.chaudhury@gmail.com", 13 | description="JSON Utils for generating DDL from JSON Schema", 14 | long_description=long_description, 15 | long_description_content_type="text/markdown", 16 | url="https://github.com/deepstartup/jsonutils", 17 | packages=find_packages(), 18 | install_requires=['pandas', 'flatten_json'], 19 | classifiers=[ 20 | "Programming Language :: Python :: 3", 21 | "License :: OSI Approved :: MIT License", 22 | "Operating System :: OS Independent", 23 | ], 24 | ) -------------------------------------------------------------------------------- /test/GenDDL.sql: -------------------------------------------------------------------------------- 1 | Create Table TableNameTest (Column_Name_One Varchar2(10), 2 | Column_Name_Two Timestamp(6), 3 | Column_Name_Three Varchar2(200), 4 | Column_Name_Four Varchar2(300), 5 | Column_Name_Five Date, 6 | Column_Name_Six Number(38,10), 7 | Column_Name_Seven Number(38,10), 8 | Column_Name_Eight Varchar2(1000), 9 | Column_Name_Nine Varchar2(2000), 10 | Column_Name_Ten Number(38,10)); -------------------------------------------------------------------------------- /test/GenDDLGlossary.sql: -------------------------------------------------------------------------------- 1 | Create Table TableNameTest (COL_NAM_One Varchar2(10), 2 | COL_NAM_Two Timestamp(6), 3 | COL_NAM_Three Varchar2(200), 4 | COL_NAM_Four Varchar2(300), 5 | COL_NAM_Five Date, 6 | COL_NAM_Six Number(38,10), 7 | COL_NAM_Seven Number(38,10), 8 | COL_NAM_Eight Varchar2(1000), 9 | COL_NAM_Nine Varchar2(2000), 10 | COL_NAM_Ten Number(38,10)); -------------------------------------------------------------------------------- /test/GlossaryEmptyTestFile.csv: -------------------------------------------------------------------------------- 1 | GlossaryValues,GlossaryAbbreviations -------------------------------------------------------------------------------- /test/GlossaryTestFile.csv: -------------------------------------------------------------------------------- 1 | Name,Abbreviation 2 | First,FST 3 | Secoand,SEC 4 | Third,TRD 5 | Name,NAM 6 | Column,COL -------------------------------------------------------------------------------- /test/ReadMe.txt.txt: -------------------------------------------------------------------------------- 1 | 1.TestJsonSchema.json : Sample JSON Schema 2 | 3 | 4 | ##Glossary File: 5 | Glossary file is the unique feature for this API which allow the user to generate Abbreviated colums if the 6 | JSON key values are too big to fit in the database metadata 7 | If user wanted to generate the column name as is then use dummy glossay *(GlossaryEmptyTestFile.csv) as attached. 8 | 9 | 2.GlossaryEmptyTestFile.csv : DDL file generated after puting Dummy glossary file. 10 | 11 | 3.GlossaryTestFile.csv : Sameple Glossary File 12 | 13 | 4.GenDDL.sql: DDL file generated after puting Dummy glossary file.(GlossaryEmptyTestFile.csv ) 14 | 15 | 5.GenDDLGlossary.sql : DDL file generated after puting glossary file.(GlossaryTestFile.csv) -------------------------------------------------------------------------------- /test/TestJsonSchema.json: -------------------------------------------------------------------------------- 1 | { 2 | "schema": "Http://Json-Schema.Org/Draft-07/Schema#", 3 | "type": "object", 4 | "title": "TableNameTest", 5 | "additionalProperties": false, 6 | "properties": { 7 | "ColumnNameOne": { 8 | "type": "string", 9 | "maxLength": 10 10 | }, 11 | "ColumnNameTwo": { 12 | "type": "string", 13 | "format": "date-time" 14 | }, 15 | "ColumnNameThree": { 16 | "type": "string", 17 | "maxLength": 200 18 | }, 19 | "ColumnNameFour": { 20 | "type": "string", 21 | "maxLength": 300 22 | }, 23 | "ColumnNameFive": { 24 | "type": "string", 25 | "format": "date" 26 | }, 27 | "ColumnNameSix": { 28 | "type": "number" 29 | }, 30 | "ColumnNameSeven": { 31 | "type": "number" 32 | }, 33 | "ColumnNameEight": { 34 | "type": "string", 35 | "maxLength": 1000 36 | }, 37 | "ColumnNameNine": { 38 | "type": "string", 39 | "maxLength": 2000 40 | }, 41 | "ColumnNameTen": { 42 | "type": "number" 43 | } 44 | } 45 | } --------------------------------------------------------------------------------

13 \| Input JSON Schema Path 14 \|	16 \| 17 \|
21 \| DataBase Name 22 \|	24 \| 34 \|
38 \| Input Glossary Path 39 \|	41 \| 42 \|
46 \| Output SQL file Path 47 \|	49 \| 50 \|