├── APP
├── App.JPG
├── DDLj_App.py
└── templates
│ └── index.html
├── DDLj
├── DDLj.py
└── __init__.py
├── LICENSE.txt
├── README.md
├── setup.py
└── test
├── GenDDL.sql
├── GenDDLGlossary.sql
├── GlossaryEmptyTestFile.csv
├── GlossaryTestFile.csv
├── ReadMe.txt.txt
└── TestJsonSchema.json
/APP/App.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deepstartup/jsonutils/987a0a407d0212e8374e5e8ce35088fa5a6b05cf/APP/App.JPG
--------------------------------------------------------------------------------
/APP/DDLj_App.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import cgi
4 | from flask import Flask, render_template, request
5 | from flask import render_template
6 | from flask import Flask
7 | from flask import Response
8 | app = Flask(__name__)
9 | form = cgi.FieldStorage()
10 | from DDLj import genddl
11 | @app.route('/')
12 | def nthome():
13 | return render_template('index.html')
14 | @app.route('/getfile', methods=['POST', 'GET'])
15 | def getfile():
16 | if request.method == 'POST':
17 | jsonschema=request.form['Schempath']
18 | gloss=request.form['Gloss']
19 | domain=request.form['domain']
20 | sqlpath=request.form['SQL']
21 | try:
22 | f=genddl(jsonschema,domain,gloss,sqlpath)
23 | except Exception as e:
24 | print(e)
25 | return Response(f,mimetype='application/json',headers={'Content-Disposition':'attachment;filename='+domain+'.sql'})
26 | if __name__ == '__main__':
27 | app.run(debug=True)
28 |
--------------------------------------------------------------------------------
/APP/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | DDLj
6 |
7 |
8 |
56 |
57 | *Glossay file should be in .csv format.
58 |
59 |
60 |
--------------------------------------------------------------------------------
/DDLj/DDLj.py:
--------------------------------------------------------------------------------
1 | # This Source Code Form is subject to the terms of the MIT Public
2 | # License, v. 2.0. If a copy of the MIT was not distributed with this
3 | # file, You can obtain one at MIT License :: OSI Approved :: MIT License
4 | #Author : Arghadeep Chaudhury,Siddhartha Bhattacharya
5 | #author_email"siddhbhatt@gmail.com,arghadeep.chaudhury@gmail.com"
6 | #Created on Thu Jun 27 15:55:51 2019
7 | # -*- coding: utf-8 -*-
8 |
9 | import json
10 | import pandas as pd
11 | import re
12 | from flatten_json import flatten,unflatten
13 | temp=[]
14 | DDF_list=[]
15 | ColList=[]
16 | Domain_Com=[]
17 | MinMaxList=[]
18 | colBuild=''
19 | sqlstring=''
20 | colName=''
21 | ArrayList=[]
22 | Empty_Df=[]
23 |
24 | #L2P_builder function convert the logical name to physical name using glossary words
25 | #separated by '_', if No glossary input has been provided, this has return as is logical name
26 | def L2P_builder(char,Glossarypath):
27 | if Glossarypath==None or Glossarypath=='':
28 | try:
29 | df=pd.DataFrame(columns=['Name','Abbreviation'])
30 | except:
31 | print('No Glossary')
32 | else:
33 | try:
34 | df=pd.read_csv(Glossarypath)
35 | except:
36 | df=pd.DataFrame(columns=['Name','Abbreviation'])
37 | q=0
38 | colval=''
39 | splitted = re.sub('(?!^)([A-Z][a-z]+)', r' \1', char).split()
40 | for val in splitted:
41 | q=q+1
42 | if q==len(splitted):
43 | if val[len(val)-2:len(val)]=='ID':
44 | try:
45 | val=df.loc[df.Name == val[0:len(val)-2], 'Abbreviation'].item()
46 | val=val+'_ID'
47 | colval+=val
48 | except:
49 | val=val[0:len(val)-2]
50 | val=val+'_ID'
51 | colval+=val
52 | else:
53 | try:
54 | val=df.loc[df.Name == val, 'Abbreviation'].item()
55 | colval+=val
56 | except:
57 | colval+=val
58 | else:
59 | try:
60 | val=df.loc[df.Name == val, 'Abbreviation'].item()
61 | val=val+'_'
62 | colval+=val
63 | except:
64 | val=val+'_'
65 | colval+=val
66 | return colval
67 | #StringEnds: return the last character value from a String with camelcase
68 | def StringEnds(char):
69 | q=0
70 | colval=''
71 | splitted = re.sub('(?!^)([A-Z][a-z]+)', r' \1', char).split()
72 | for val in splitted:
73 | q=q+1
74 | if q==len(splitted):
75 | return val
76 | #Recursive function to generate the DDL string array using the flatten json
77 | def sqlStrinNthLevel(json_schema_flat,n,ValStr,Glossarypath):
78 | sqlstring=''
79 | ColList=[]
80 | for keys,vals in json_schema_flat.items():
81 | try:
82 | arrayField=keys.split('_')[1]
83 | except:
84 | arrayField=None
85 | if ValStr==None:
86 | if (keys.endswith('_type') or keys.endswith('_format')) and keys.count('_')==n:
87 | if vals=='string' and keys.endswith('_type') and keys.count('_')==n:
88 | try:
89 | maxLength=eval("json_schema_flat['"+keys[:len(keys)-5]+"_maxLength"+"']")
90 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Varchar2("+str(maxLength)+"),"
91 | ColList.append(sqlstring)
92 | except Exception as e:
93 | if keys[:len(keys)-5].endswith('Date') and keys.count('_')==n:
94 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Date,"
95 | ColList.append(sqlstring)
96 | elif (keys[:len(keys)-5].endswith('Timestamp') or keys[:len(keys)-5].endswith('DateTime')) and keys.count('_')==n:
97 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Timestamp(6),"
98 | ColList.append(sqlstring)
99 | else:
100 | e
101 | elif vals=='number' and keys.count('_')==n:
102 | Endword=StringEnds(keys.split("_")[n-1])
103 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Number(38,10),"
104 | ColList.append(sqlstring)
105 | elif vals=='date' and keys.endswith('_format') and keys.count('_')==n:
106 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Date,"
107 | ColList.append(sqlstring)
108 | elif vals=='date-time' and keys.endswith('_format') and keys.count('_')==n:
109 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Timestamp(6),"
110 | ColList.append(sqlstring)
111 | elif vals=='boolean' and keys.endswith('_format') and keys.count('_')==n:
112 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Char(1),"
113 | ColList.append(sqlstring)
114 | elif vals=='object' and keys.count('_')==n:
115 | sqlStrinNthLevel(json_schema_flat,n+2,None,Glossarypath)
116 | elif ValStr!=None and arrayField==ValStr:
117 | if keys.endswith('_type') and arrayField==ValStr and keys.count('_')==n:
118 | if vals=='string' and keys.count('_')==n:
119 | try:
120 | maxLength=eval("json_schema_flat['"+keys[:len(keys)-5]+"_maxLength"+"']")
121 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Varchar2("+str(maxLength)+"),"
122 | ColList.append(sqlstring)
123 | except Exception as e:
124 | if keys[:len(keys)-5].endswith('Date') and keys.count('_')==n:
125 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Date,"
126 | ColList.append(sqlstring)
127 | elif (keys[:len(keys)-5].endswith('Timestamp') or keys[:len(keys)-5].endswith('DateTime')) and keys.count('_')==n:
128 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Timestamp(6),"
129 | ColList.append(sqlstring)
130 | else:
131 | e
132 | elif vals=='number' and arrayField==ValStr and keys.count('_')==n:
133 | Endword=StringEnds(keys.split("_")[n-1])
134 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Number(38,10),"
135 | ColList.append(sqlstring)
136 | elif vals=='date' and keys.endswith('_format') and keys.count('_')==n:
137 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Date,"
138 | ColList.append(sqlstring)
139 | elif vals=='date-time' and keys.endswith('_format') and keys.count('_')==n:
140 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Timestamp(6),"
141 | ColList.append(sqlstring)
142 | elif vals=='boolean' and keys.endswith('_format') and keys.count('_')==n:
143 | sqlstring=L2P_builder(keys.split("_")[n-1],Glossarypath)+" Char(1),"
144 | ColList.append(sqlstring)
145 | elif vals=='object' and arrayField==ValStr and keys.count('_')==n:
146 | sqlStrinNthLevel(sqlStrinNthLevel,n+2,ValStr,Glossarypath)
147 | return ColList
148 |
149 | #Writing to the outputfile path
150 | def callMain(file_path,Database,Glossarypath,outputfilePath):
151 | full_sql_str=''
152 | with open(file_path) as json_data:
153 | json_schema=json.load(json_data)
154 | json_schema_flat = flatten(json_schema)
155 | if Glossarypath==None or Glossarypath=='':
156 | try:
157 | df=pd.DataFrame(columns=['Name','Abbreviation'])
158 | except:
159 | print('no glossary')
160 | else:
161 | try:
162 | df=pd.read_csv(Glossarypath)
163 | except:
164 | df=pd.DataFrame(columns=['Name','Abbreviation'])
165 | try:
166 | FileOpen=open(outputfilePath, "w+")
167 | f=sqlStrinNthLevel(json_schema_flat,2,None,Glossarypath)
168 | f = list(dict.fromkeys(f))
169 | for flat_x,flat_y in json_schema_flat.items():
170 | if flat_x=='title':
171 | Table_Name=flat_y
172 | DDLout='Create Table '+Table_Name+' ('
173 | for ddlTxt in f:
174 | #Default Database is Oracle,Supported for below as well
175 | if Database in ('PostgreSQL','MYSQL','DB2','MariaDB'):
176 | ddlTxt=ddlTxt.replace(' Varchar2',' Varchar')
177 | ddlTxt=ddlTxt.replace(' Number(38,10)',' NUMERIC(38,10)')
178 | ddlTxt=ddlTxt.replace(' Timestamp(6)',' Timestamp')
179 | ddlTxt=ddlTxt.replace(' Char(1)',' boolean')
180 | DDLout=DDLout+ddlTxt+'\n'
181 | DDLout=DDLout[:len(DDLout)-2]+');'
182 | FileOpen.write(DDLout)
183 | full_sql_str=DDLout
184 | for p,q in json_schema_flat.items():
185 | if q=='array':
186 | ArrayList.append(p.split('_')[1])
187 | for Array_DDLs in ArrayList:
188 | DDLoutChild='\n\n'
189 | First=sqlStrinNthLevel(json_schema_flat,6,Array_DDLs,Glossarypath)
190 | First = list(dict.fromkeys(First))
191 | DDLoutChild='\nCreate Table '+Array_DDLs[:len(Array_DDLs)-5]+' ('
192 | for listFst in First:
193 | DDLoutChild=DDLoutChild+listFst+'\n'
194 | DDLoutChild=DDLoutChild[:len(DDLoutChild)-2]+');'
195 | FileOpen.write(DDLoutChild)
196 | full_sql_str=full_sql_str+DDLoutChild
197 | FileOpen.close()
198 | return full_sql_str
199 | except:
200 | df=pd.DataFrame(columns=['Name','Abbreviation'])
201 | def genddl(file_path,Database,Glossarypath,outputfilePath):
202 | try:
203 | with open(file_path) as json_data:
204 | json_schema=json.load(json_data)
205 | json_schema_flat = flatten(json_schema)
206 | x=callMain(file_path,Database,Glossarypath,outputfilePath)
207 | return x
208 | except Exception as e:
209 | print(e)
210 | if __name__ == '__main__':
211 | genddl(file_path,Database,Glossarypath,outputfilePath)
212 |
--------------------------------------------------------------------------------
/DDLj/__init__.py:
--------------------------------------------------------------------------------
1 | from .DDLj import *
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) [2019] [Arghadeep Chaudhury,Siddhartha Bhattacharya,{DDLJ}]
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # JSON Utils Package (DDLj)
2 | This is a python package having multiple utilities for handling JSON Files.
3 |
4 | Module1 - DDLj : Converts JSON Schema Files into ANSI SQL DDLs
5 | Supports foll databases:
6 | A.PostgreSQL
7 | B.MYSQL
8 | C.DB2
9 | D.MariaDB
10 | E.Oracle
11 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12 | Usage:
13 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14 | pip install DDLJ
15 |
16 | python
17 |
18 | >>> from DDLj import genddl
19 |
20 | >>> genddl(*param1,param2,*param3,*param4)
21 |
22 | Where
23 |
24 | param1= JSON Schema File
25 |
26 | param2=Database (Default Oracle)
27 |
28 | Param3= Glossary file
29 |
30 | Param4= DDL output script
31 |
32 | Note : * indicates mandatory parameters
33 |
34 | It also includes a Flask module for front-end if used as a standalone tool. Refer to App directory.
35 | *******************************************
36 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~
37 | Example:
38 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
39 | Input JSON schema as:
40 | {
41 | "schema": "Http://Json-Schema.Org/Draft-07/Schema#",
42 | "type": "object",
43 | "title": "TableNameTest",
44 | "additionalProperties": false,
45 | "properties": {
46 | "ColumnNameOne": {
47 | "type": "string",
48 | "maxLength": 10
49 | },
50 | "ColumnNameTwo": {
51 | "type": "string",
52 | "format": "date-time"
53 | },
54 | "ColumnNameThree": {
55 | "type": "string",
56 | "maxLength": 200
57 | },
58 | "ColumnNameFour": {
59 | "type": "string",
60 | "maxLength": 300
61 | },
62 | "ColumnNameFive": {
63 | "type": "string",
64 | "format": "date"
65 | },
66 | "ColumnNameSix": {
67 | "type": "number"
68 | },
69 | "ColumnNameSeven": {
70 | "type": "number"
71 | },
72 | "ColumnNameEight": {
73 | "type": "string",
74 | "maxLength": 1000
75 | },
76 | "ColumnNameNine": {
77 | "type": "string",
78 | "maxLength": 2000
79 | },
80 | "ColumnNameTen": {
81 | "type": "number"
82 | }
83 | }
84 | }
85 |
86 | Code Usage:
87 | >>> from DDLj import genddl
88 | >>> genddl('TestJsonSchema.json','Oracle','GlossaryTestFile.csv','GenDDLGlossary.sql')
89 |
90 | Output:
91 | Create Table TableNameTest (COL_NAM_One Varchar2(10),
92 | COL_NAM_Two Timestamp(6),
93 | COL_NAM_Three Varchar2(200),
94 | COL_NAM_Four Varchar2(300),
95 | COL_NAM_Five Date,
96 | COL_NAM_Six Number(38,10),
97 | COL_NAM_Seven Number(38,10),
98 | COL_NAM_Eight Varchar2(1000),
99 | COL_NAM_Nine Varchar2(2000),
100 | COL_NAM_Ten Number(38,10));
101 |
102 | Please see the Test Folder for JSON schema, glossary file and output.
103 | ****************************
104 |
105 | Note: Other modules to come soon.
106 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from setuptools import setup, find_packages
4 |
5 | with open("README.md", "r") as fh:
6 | long_description = fh.read()
7 |
8 | setup(
9 | name="DDLJ",
10 | version="0.0.15",
11 | author="Arghadeep Chaudhury,Siddhartha Bhattacharya",
12 | author_email="siddhbhatt@gmail.com,arghadeep.chaudhury@gmail.com",
13 | description="JSON Utils for generating DDL from JSON Schema",
14 | long_description=long_description,
15 | long_description_content_type="text/markdown",
16 | url="https://github.com/deepstartup/jsonutils",
17 | packages=find_packages(),
18 | install_requires=['pandas', 'flatten_json'],
19 | classifiers=[
20 | "Programming Language :: Python :: 3",
21 | "License :: OSI Approved :: MIT License",
22 | "Operating System :: OS Independent",
23 | ],
24 | )
--------------------------------------------------------------------------------
/test/GenDDL.sql:
--------------------------------------------------------------------------------
1 | Create Table TableNameTest (Column_Name_One Varchar2(10),
2 | Column_Name_Two Timestamp(6),
3 | Column_Name_Three Varchar2(200),
4 | Column_Name_Four Varchar2(300),
5 | Column_Name_Five Date,
6 | Column_Name_Six Number(38,10),
7 | Column_Name_Seven Number(38,10),
8 | Column_Name_Eight Varchar2(1000),
9 | Column_Name_Nine Varchar2(2000),
10 | Column_Name_Ten Number(38,10));
--------------------------------------------------------------------------------
/test/GenDDLGlossary.sql:
--------------------------------------------------------------------------------
1 | Create Table TableNameTest (COL_NAM_One Varchar2(10),
2 | COL_NAM_Two Timestamp(6),
3 | COL_NAM_Three Varchar2(200),
4 | COL_NAM_Four Varchar2(300),
5 | COL_NAM_Five Date,
6 | COL_NAM_Six Number(38,10),
7 | COL_NAM_Seven Number(38,10),
8 | COL_NAM_Eight Varchar2(1000),
9 | COL_NAM_Nine Varchar2(2000),
10 | COL_NAM_Ten Number(38,10));
--------------------------------------------------------------------------------
/test/GlossaryEmptyTestFile.csv:
--------------------------------------------------------------------------------
1 | GlossaryValues,GlossaryAbbreviations
--------------------------------------------------------------------------------
/test/GlossaryTestFile.csv:
--------------------------------------------------------------------------------
1 | Name,Abbreviation
2 | First,FST
3 | Secoand,SEC
4 | Third,TRD
5 | Name,NAM
6 | Column,COL
--------------------------------------------------------------------------------
/test/ReadMe.txt.txt:
--------------------------------------------------------------------------------
1 | 1.TestJsonSchema.json : Sample JSON Schema
2 |
3 |
4 | ##Glossary File:
5 | Glossary file is the unique feature for this API which allow the user to generate Abbreviated colums if the
6 | JSON key values are too big to fit in the database metadata
7 | If user wanted to generate the column name as is then use dummy glossay *(GlossaryEmptyTestFile.csv) as attached.
8 |
9 | 2.GlossaryEmptyTestFile.csv : DDL file generated after puting Dummy glossary file.
10 |
11 | 3.GlossaryTestFile.csv : Sameple Glossary File
12 |
13 | 4.GenDDL.sql: DDL file generated after puting Dummy glossary file.(GlossaryEmptyTestFile.csv )
14 |
15 | 5.GenDDLGlossary.sql : DDL file generated after puting glossary file.(GlossaryTestFile.csv)
--------------------------------------------------------------------------------
/test/TestJsonSchema.json:
--------------------------------------------------------------------------------
1 | {
2 | "schema": "Http://Json-Schema.Org/Draft-07/Schema#",
3 | "type": "object",
4 | "title": "TableNameTest",
5 | "additionalProperties": false,
6 | "properties": {
7 | "ColumnNameOne": {
8 | "type": "string",
9 | "maxLength": 10
10 | },
11 | "ColumnNameTwo": {
12 | "type": "string",
13 | "format": "date-time"
14 | },
15 | "ColumnNameThree": {
16 | "type": "string",
17 | "maxLength": 200
18 | },
19 | "ColumnNameFour": {
20 | "type": "string",
21 | "maxLength": 300
22 | },
23 | "ColumnNameFive": {
24 | "type": "string",
25 | "format": "date"
26 | },
27 | "ColumnNameSix": {
28 | "type": "number"
29 | },
30 | "ColumnNameSeven": {
31 | "type": "number"
32 | },
33 | "ColumnNameEight": {
34 | "type": "string",
35 | "maxLength": 1000
36 | },
37 | "ColumnNameNine": {
38 | "type": "string",
39 | "maxLength": 2000
40 | },
41 | "ColumnNameTen": {
42 | "type": "number"
43 | }
44 | }
45 | }
--------------------------------------------------------------------------------