├── DataWizard.py ├── DataWizard.sublime-commands ├── Default (Linux).sublime-keymap ├── Default (OSX).sublime-keymap ├── Default (Windows).sublime-keymap ├── Default.sublime-commands ├── LICENSE ├── Main.sublime-menu ├── README.md └── context.sublime-menu /DataWizard.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sublime 3 | import sublime_plugin 4 | import csv 5 | import re 6 | import pprint 7 | 8 | import json 9 | from random import shuffle 10 | from random import randrange 11 | from collections import OrderedDict 12 | import webbrowser 13 | from csv import DictReader 14 | import io 15 | import itertools 16 | # import datetime 17 | import os 18 | # from dateutil.tz import tzutc 19 | import time 20 | 21 | def getDelimiter(text): 22 | dct={'|':0} 23 | for i in set(text.splitlines()[0]): 24 | if i not in ('abcdefghijklmnopqrstuvwxqyzABCDEFGHIJKLMNOPQRSTUVWXQYZ0123456789''=:_- "().[]{}/\\'): 25 | dct[i]=text.splitlines()[0].count(i) 26 | print((max(dct,key=dct.get))) 27 | return (max(dct,key=dct.get)) 28 | 29 | def to_list(ob): 30 | return ob if isinstance(ob,list) else [ob] 31 | 32 | def maske(element): 33 | if isinstance(element,list): 34 | return [maske(el) for el in element] 35 | elif isinstance(element,str): 36 | # newElement=re.sub('[0-9]', '9',re.sub('[a-z]', 'a',re.sub('[A-Z]', 'A', element))) 37 | newElement='' 38 | for char in element: 39 | if char in ('abcdefghijklmnopqrstuvwxqyz'): 40 | newElement+='a' 41 | elif char in ('ABCDEFGHIJKLMNOPQRSTUVWXQYZ'): 42 | newElement+='A' 43 | elif char in ('0123456789'): 44 | newElement+='9' 45 | else: 46 | newElement+=char 47 | return newElement 48 | 49 | def runEdit(self, edit): 50 | # st = time.time() 51 | 52 | text='' 53 | for sel in self.view.sel(): 54 | text=text+self.view.substr(sel) 55 | if text=='': 56 | self.view.run_command("select_all") 57 | for region in self.view.sel(): 58 | inData=self.view.substr(region) 59 | outData=self.format(inData) 60 | self.view.replace(edit, region, outData) 61 | 62 | # et = time.time() 63 | # elapsed_time = et - st 64 | # print('Execution time:', elapsed_time, 'seconds') 65 | 66 | 67 | 68 | def splitSpecial(line,delimiter,quotechar): 69 | 70 | maxPosition=len(line)-1 71 | outList=[] 72 | cell='' 73 | qouted=False 74 | 75 | def deqoute(cell,delimiter,qouted): 76 | if len(cell)>0 and cell[0]=='"' and cell[-1]=='"' and delimiter not in cell: 77 | cell=cell[1:-1] 78 | return cell 79 | 80 | 81 | for i,c in enumerate(line): 82 | if c!=delimiter and c!=quotechar and i !=maxPosition: 83 | cell+=c 84 | continue 85 | if i==maxPosition and c!=delimiter: 86 | cell+=c 87 | cell=deqoute(cell,delimiter,qouted) 88 | outList.append(cell) 89 | cell='' 90 | continue 91 | if i==maxPosition and c==delimiter: 92 | cell=deqoute(cell,delimiter,qouted) 93 | outList.append(cell) 94 | cell='' 95 | outList.append('') 96 | continue 97 | if c==quotechar and qouted==False and cell=='': 98 | qouted=True 99 | cell+=c 100 | continue 101 | if c==quotechar and i==maxPosition: 102 | cell+=c 103 | cell=deqoute(cell,delimiter,qouted) 104 | outList.append(cell) 105 | cell='' 106 | break 107 | if c==quotechar and qouted==True and i!=maxPosition and line[i+1]==delimiter: 108 | cell+=c 109 | qouted=False 110 | continue 111 | if c==delimiter and qouted==False: 112 | cell=deqoute(cell,delimiter,qouted) 113 | outList.append(cell) 114 | cell='' 115 | continue 116 | cell+=c 117 | 118 | return [cell.strip() for cell in outList] 119 | 120 | def flatten_json(hunk): 121 | ''' 122 | flattens json and returns json as string 123 | or 124 | flattens py-dictionary and returns pydict 125 | ''' 126 | 127 | def _flatten(x, name=''): 128 | if type(x) is dict: 129 | for a in x: 130 | _flatten(x[a], name + a + '__') 131 | elif type(x) is list: 132 | i = 0 133 | for a in x: 134 | _flatten(a, name + str(i) + '__') 135 | i += 1 136 | else: 137 | rst[name[:-2]] = x 138 | 139 | 140 | hunk_type='unknown' 141 | if type(hunk) is str: 142 | hunk_type='json' 143 | hunk=json.loads(hunk) 144 | 145 | rst = {} 146 | _flatten(hunk) 147 | 148 | return json.dumps(rst) if hunk_type=='json' else rst 149 | 150 | 151 | class dataGrid: 152 | 153 | text='' 154 | delimiter='' 155 | grid=[] 156 | sampleGrid=[] 157 | maxColWidth=[] 158 | sampleMaxColWidth=[] 159 | 160 | def __init__(self, text): 161 | self.delimiter = self.getDelimiter(text) 162 | self.text = text if len(text.splitlines()[0].split(self.delimiter))==1 else re.sub(" +", "", text.strip('\n')) 163 | self.grid = self.getGrid(self.text,self.delimiter,'"') 164 | self.len = len(self.grid) 165 | self.headers = self.grid[0] 166 | # self.body = self.grid[1:] 167 | 168 | for i in range(len(self.grid)): 169 | if len(self.grid[i])!=len(self.grid[0]): 170 | print('\n\n{1} UNEQUAL NUMBER OF COLUMNS ON LINE {0} IN FILE PLEASE CORRECT AND RE-RUN {1}\n'.format(i,'*'*50)) 171 | 172 | def getDelimiter(self,text): 173 | dct={'|':0} 174 | print(text.splitlines()[0]) 175 | for i in set(text.splitlines()[0]): 176 | if i not in ('abcdefghijklmnopqrstuvwxqyzABCDEFGHIJKLMNOPQRSTUVWXQYZ0123456789\'=:_- "().[]{}/\\'): 177 | dct[i]=text.splitlines()[0].count(i) 178 | print((max(dct,key=dct.get))) 179 | return (max(dct,key=dct.get)) 180 | 181 | def getGrid(self,text,delimiter,quotechar): 182 | # grid=[[col.strip().replace('"','""') if delimiter not in col.strip() else '"'+col.strip().replace('"','""')+'"' for col in row] for row in csv.reader(text.splitlines(), delimiter=delimiter, quotechar=quotechar)] 183 | grid=[[col.strip() if delimiter not in col.strip() else '"'+col.strip().replace('"','""')+'"' for col in row] for row in csv.reader(text.splitlines(), delimiter=delimiter, quotechar=quotechar)] 184 | # grid=[[col if delimiter not in col else '"'+col.strip().replace('"','""')+'"' for col in row] for row in csv.reader(text.splitlines(), delimiter=delimiter, quotechar=quotechar)] 185 | # maxColWidth=self.getMaxColumnWidth(grid)################################ 186 | return grid 187 | 188 | def formatGrid(self): 189 | for row in self.grid: 190 | for i,col in enumerate(row): 191 | if col and self.delimiter in col and col[-1]!='"': 192 | row[i]='"'+col+'"' 193 | 194 | 195 | def getMaxColumnWidth(self,grid): 196 | maxColWidth=[] 197 | temp=[list(x) for x in zip(*self.grid)] 198 | for i in temp: 199 | maxColWidth.append(max([len(r) for r in i])) 200 | return maxColWidth 201 | 202 | def getSampleGrid(self,masked=False): 203 | data=[list(set(x)) for x in zip(*self.grid[1:])] 204 | if masked==True: 205 | data=maske(data) 206 | data=[set(i) for i in data] 207 | data=[[header]+sorted(data[i]) for i,header in enumerate(self.headers)] 208 | data=list(map(list, itertools.zip_longest(*data, fillvalue=''))) 209 | self.sampleGrid=data 210 | # self.sampleMaxColWidth=self.getMaxColumnWidth(self.sampleGrid) 211 | 212 | def constructTextFromGrid(self,grid,delimiter,maxColWidth=None):#list of list ie grid 213 | if maxColWidth==None: 214 | rst='\n'.join([delimiter.join(i) for i in grid]) 215 | else: 216 | maxColWidth=self.getMaxColumnWidth(grid) 217 | rst= '\n'.join([delimiter.join("{:<{width}}".format(col, width=maxColWidth[index]) for index, col in enumerate(row)) for row in grid]) 218 | return rst 219 | 220 | def pivotGrid(self): 221 | self.grid=[list(x) for x in zip(*self.grid)] 222 | # self.maxColWidth=self.getMaxColumnWidth(self.grid) 223 | 224 | def popGrid(self,direction='left'): 225 | for i in range(len(self.grid)): 226 | if direction=='left': 227 | temp=self.grid[i].pop(1) 228 | self.grid[i].append(temp) 229 | else: 230 | temp=self.grid[i].pop(-1) 231 | self.grid[i].insert(1,temp) 232 | # self.maxColWidth=self.getMaxColumnWidth(self.grid) 233 | 234 | 235 | class datawizardjustifycolumnsCommand(sublime_plugin.TextCommand): 236 | def format(self,text): 237 | a=dataGrid(text) 238 | rst=a.constructTextFromGrid(a.grid,a.delimiter,True) 239 | return rst 240 | 241 | def run(self, edit): 242 | runEdit(self, edit) 243 | 244 | class datawizardsqlwhereclauseCommand(sublime_plugin.TextCommand): 245 | def format(self,text): 246 | a=dataGrid(text) 247 | a.pivotGrid() 248 | sqlcmd='where 1=1' 249 | for r in a.grid: 250 | col=r.pop(0) 251 | r=list(set(r)) 252 | l=','.join("'"+v+"'" for v in r) 253 | sqlcmd+='\nand '+col+' in ('+l+')' 254 | return sqlcmd 255 | 256 | def run(self, edit): 257 | runEdit(self, edit) 258 | 259 | class datawizardcollapsecolumnsCommand(sublime_plugin.TextCommand): 260 | def format(self,text): 261 | a=dataGrid(text) 262 | rst=a.constructTextFromGrid(a.grid,a.delimiter) 263 | return rst 264 | 265 | def run(self, edit): 266 | runEdit(self, edit) 267 | 268 | 269 | class datawizardpivotCommand(sublime_plugin.TextCommand): 270 | def format(self,text): 271 | a=dataGrid(text) 272 | a.pivotGrid() 273 | rst=a.constructTextFromGrid(a.grid,a.delimiter) 274 | return rst 275 | 276 | def run(self, edit): 277 | runEdit(self, edit) 278 | 279 | 280 | class datawizardpivotjustifyCommand(sublime_plugin.TextCommand): 281 | def format(self,text): 282 | a=dataGrid(text) 283 | a.pivotGrid() 284 | rst=a.constructTextFromGrid(a.grid,a.delimiter,True) 285 | return rst 286 | 287 | def run(self, edit): 288 | runEdit(self, edit) 289 | 290 | 291 | class datawizardpopleftCommand(sublime_plugin.TextCommand): 292 | def format(self,text): 293 | a=dataGrid(text) 294 | a.popGrid() 295 | rst=a.constructTextFromGrid(a.grid,a.delimiter,True) 296 | return rst 297 | 298 | def run(self, edit): 299 | runEdit(self, edit) 300 | 301 | 302 | class datawizardpoprightCommand(sublime_plugin.TextCommand): 303 | def format(self,text): 304 | a=dataGrid(text) 305 | a.popGrid(direction='right') 306 | rst=a.constructTextFromGrid(a.grid,a.delimiter,True) 307 | return rst 308 | 309 | def run(self, edit): 310 | runEdit(self, edit) 311 | 312 | 313 | class datawizarddistinctcharsCommand(sublime_plugin.TextCommand): 314 | def format(self,text): 315 | text='\n'.join(sorted(list(set(text)))) 316 | return text 317 | 318 | def run(self, edit): 319 | runEdit(self, edit) 320 | 321 | 322 | class datawizardkeepdelimitersCommand(sublime_plugin.TextCommand): 323 | def format(self,text): 324 | a=dataGrid(text) 325 | newtext=''.join([c for c in text if c in ('\n',a.delimiter)]) 326 | return newtext 327 | 328 | def run(self, edit): 329 | runEdit(self, edit) 330 | 331 | 332 | class datawizardleadingzerosaddCommand(sublime_plugin.TextCommand): 333 | def run(self,text): 334 | 335 | lines=[] 336 | for region in self.view.sel(): 337 | lines.append(self.view.substr(region).strip(' ')) 338 | 339 | maxlen = max(map(len, lines)) 340 | new=[] 341 | for line in lines: 342 | new.append(line.zfill(maxlen)) 343 | outData= new 344 | 345 | for region in self.view.sel(): 346 | self.view.replace(text, region, outData.pop(0)) 347 | 348 | 349 | 350 | 351 | 352 | 353 | class datawizardleadingzerosremoveCommand(sublime_plugin.TextCommand): 354 | def format(self,text): 355 | 356 | lines=text.split('\n') 357 | 358 | def replace_str_index(text,index=0,replacement=''): 359 | return '%s%s%s'%(text[:index],replacement,text[index+1:]) 360 | 361 | for y in range(0,len(lines)): 362 | for x in range(0,len(lines[y])): 363 | if lines[y][x]=='0': 364 | lines[y]=replace_str_index(lines[y],x,' ') 365 | else: 366 | break 367 | 368 | return '\n'.join(lines) 369 | 370 | def run(self, edit): 371 | runEdit(self, edit) 372 | 373 | 374 | class datawizardlowercasesqlkeywordsCommand(sublime_plugin.TextCommand): 375 | def format(self,text): 376 | lines=text.splitlines() 377 | #sql server 378 | # words = ['if','cascade','serial','integer','owner','to','grant','serial','integer','while','deallocate','row_number','else','then','string_agg','returns','bit','nolock','use','go','clustered','after','nocount','on','raiserror','instead','of','enable','trigger','upper','isnull','lower','rank','over','partition','when','datediff','cast','convert','add','constraint','alter','column','table','all','and','any','as','asc','backup','database','between','case','check','create','index','or','replace','view','procedure','unique','default','delete','desc','distinct','drop','exec','exists','foreign','key','from','full','outer','join','group','by','having','in','inner','insert','into','select','is','null','not','left','like','limit','order','primary','right','rownum','top','set','truncate','union','update','values','where','cross','date','datetime','execute','max','concat','for','fetch','next','close','open','varchar','int','object','declare','end','try','print','catch','with','begin','proc'] 379 | #postgres 380 | words=['abort','abs','absent','absolute','access','according','acos','action','ada','add','admin','after','aggregate','all','allocate','also','alter','always','analyse','analyze','and','any','are','array','array_agg','array_max_cardinality','as','asc','asensitive','asin','assertion','assignment','asymmetric','at','atan','atomic','attach','attribute','attributes','authorization','avg','backward','base64','before','begin','begin_frame','begin_partition','bernoulli','between','bigint','binary','bit','bit_length','blob','blocked','bom','boolean','both','breadth','by','cache','call','called','cardinality','cascade','cascaded','case','cast','catalog','catalog_name','ceil','ceiling','chain','chaining','char','char_length','character','character_length','character_set_catalog','character_set_name','character_set_schema','characteristics','characters','check','checkpoint','class','class_origin','classifier','clob','close','cluster','coalesce','cobol','collate','collation','collation_catalog','collation_name','collation_schema','collect','column','column_name','columns','command_function','command_function_code','comment','comments','commit','committed','compression','concurrently','condition','condition_number','conditional','configuration','conflict','connect','connection','connection_name','constraint','constraint_catalog','constraint_name','constraint_schema','constraints','constructor','contains','content','continue','control','conversion','convert','copy','corr','corresponding','cos','cosh','cost','count','covar_pop','covar_samp','create','cross','cube','cume_dist','current','current_catalog','current_date','current_default_transform_group','current_path','current_role','current_row','current_schema','current_time','current_timestamp','current_transform_group_for_type','current_user','cursor','cursor_name','cycle','data','database','datalink','date','datetime_interval_code','datetime_interval_precision','day','db','deallocate','dec','decfloat','decimal','declare','default','defaults','deferrable','deferred','define','defined','definer','degree','delete','delimiter','delimiters','dense_rank','depends','depth','deref','derived','desc','describe','descriptor','detach','deterministic','diagnostics','dictionary','disable','discard','disconnect','dispatch','distinct','dlnewcopy','dlpreviouscopy','dlurlcomplete','dlurlcompleteonly','dlurlcompletewrite','dlurlpath','dlurlpathonly','dlurlpathwrite','dlurlscheme','dlurlserver','dlvalue','do','document','domain','double','drop','dynamic','dynamic_function','dynamic_function_code','each','element','else','empty','enable','encoding','encrypted','end','end_frame','end_partition','enforced','enum','equals','error','escape','event','every','except','exception','exclude','excluding','exclusive','exec','execute','exists','exp','explain','expression','extension','external','extract','false','family','fetch','file','filter','final','finalize','finish','first','first_value','flag','float','floor','following','for','force','foreign','format','fortran','forward','found','frame_row','free','freeze','from','fs','fulfill','full','function','functions','fusion','general','generated','get','global','go','goto','grant','granted','greatest','group','grouping','groups','handler','having','header','hex','hierarchy','hold','hour','id','identity','if','ignore','ilike','immediate','immediately','immutable','implementation','implicit','import','in','include','including','increment','indent','index','indexes','indicator','inherit','inherits','initial','initially','inline','inner','inout','input','insensitive','insert','instance','instantiable','instead','int','integer','integrity','intersect','intersection','interval','into','invoker','is','isnull','isolation','join','json','json_array','json_arrayagg','json_exists','json_object','json_objectagg','json_query','json_table','json_table_primitive','json_value','keep','key','key_member','key_type','keys','label','lag','language','large','last','last_value','lateral','lead','leading','leakproof','least','left','length','level','library','like','like_regex','limit','link','listagg','listen','ln','load','local','localtime','localtimestamp','location','locator','lock','locked','log','log10','logged','lower','map','mapping','match','match_number','match_recognize','matched','matches','materialized','max','maxvalue','measures','member','merge','message_length','message_octet_length','message_text','method','min','minute','minvalue','mod','mode','modifies','module','month','more','move','multiset','mumps','name','names','namespace','national','natural','nchar','nclob','nested','nesting','new','next','nfc','nfd','nfkc','nfkd','nil','no','none','normalize','normalized','not','nothing','notify','notnull','nowait','nth_value','ntile','nullif','nulls','number','numeric','object','occurrences_regex','octet_length','octets','of','off','offset','oids','old','omit','on','one','only','open','operator','option','options','or','order','ordering','ordinality','others','out','outer','output','over','overflow','overlaps','overlay','overriding','owned','owner','pad','parallel','parameter','parameter_mode','parameter_name','parameter_ordinal_position','parameter_specific_catalog','parameter_specific_name','parameter_specific_schema','parser','partial','partition','pascal','pass','passing','passthrough','password','past','path','pattern','per','percent','percent_rank','percentile_cont','percentile_disc','period','permission','permute','placing','plan','plans','pli','policy','portion','position','position_regex','power','precedes','preceding','precision','prepare','prepared','preserve','primary','prior','private','privileges','procedural','procedure','procedures','program','prune','ptf','public','publication','quote','quotes','range','rank','read','reads','real','reassign','recheck','recovery','recursive','ref','references','referencing','refresh','regr_avgx','regr_avgy','regr_count','regr_intercept','regr_r2','regr_slope','regr_sxx','regr_sxy','regr_syy','reindex','relative','release','rename','repeatable','replace','replica','requiring','reset','respect','restart','restore','restrict','result','return','returned_cardinality','returned_length','returned_octet_length','returned_sqlstate','returning','returns','revoke','right','role','rollback','rollup','routine','routine_catalog','routine_name','routine_schema','routines','row','row_count','row_number','rows','rule','running','savepoint','scalar','scale','schema','schema_name','schemas','scope','scope_catalog','scope_name','scope_schema','scroll','search','second','section','security','seek','select','selective','self','sensitive','sequence','sequences','serializable','server','server_name','session','session_user','set','setof','sets','share','show','similar','simple','sin','sinh','size','skip','smallint','snapshot','some','source','space','specific','specific_name','specifictype','sql','sqlcode','sqlerror','sqlexception','sqlstate','sqlwarning','sqrt','stable','standalone','start','state','statement','static','statistics','stddev_pop','stddev_samp','stdin','stdout','storage','stored','strict','string','strip','structure','style','subclass_origin','submultiset','subscription','subset','substring','substring_regex','succeeds','sum','support','symmetric','sysid','system','system_time','system_user','table','table_name','tables','tablesample','tablespace','tan','tanh','temp','template','temporary','text','then','through','ties','time','timestamp','timezone_hour','timezone_minute','to','token','top_level_count','trailing','transaction','transaction_active','transactions_committed','transactions_rolled_back','transform','transforms','translate','translate_regex','translation','treat','trigger','trigger_catalog','trigger_name','trigger_schema','trim','trim_array','true','truncate','trusted','type','types','uescape','unbounded','uncommitted','unconditional','under','unencrypted','union','unique','unknown','unlink','unlisten','unlogged','unmatched','unnamed','unnest','until','untyped','update','upper','uri','usage','user','user_defined_type_catalog','user_defined_type_code','user_defined_type_name','user_defined_type_schema','using','utf16','utf32','utf8','vacuum','valid','validate','validator','value','value_of','values','var_pop','var_samp','varbinary','varchar','variadic','varying','verbose','version','versioning','view','views','volatile','when','whenever','where','whitespace','width_bucket','window','with','within','without','work','wrapper','write','xml','xmlagg','xmlattributes','xmlbinary','xmlcast','xmlcomment','xmlconcat','xmldeclaration','xmldocument','xmlelement','xmlexists','xmlforest','xmliterate','xmlnamespaces','xmlparse','xmlpi','xmlquery','xmlroot','xmlschema','xmlserialize','xmltable','xmltext','xmlvalidate','year','yes','zone'] 381 | 382 | lowercase = lambda x: x.group(1).lower() 383 | test = '\b({})\b'.format('|'.join(words)) 384 | re_replace = re.compile(r'\b({})\b'.format('|'.join(words)),re.IGNORECASE) 385 | cflag=0 386 | for i in range(0,len(lines)): 387 | if '/*' in lines[i] and '*/' not in lines[i]: 388 | cflag=1 389 | if '*/' in lines[i]: 390 | cflag=0 391 | if cflag==1: 392 | continue 393 | temp=lines[i].split('--') 394 | temp[0]=re_replace.sub(lowercase, temp[0]) 395 | lines[i]='--'.join(temp) 396 | return '\n'.join(lines) 397 | 398 | def run(self, edit): 399 | runEdit(self, edit) 400 | 401 | 402 | 403 | class datawizarduppercasesqlkeywordsCommand(sublime_plugin.TextCommand): 404 | def format(self,text): 405 | lines=text.splitlines() 406 | #sql server 407 | # words = ['if','cascade','serial','integer','owner','to','grant','serial','integer','while','deallocate','row_number','else','then','string_agg','returns','bit','nolock','use','go','clustered','after','nocount','on','raiserror','instead','of','enable','trigger','upper','isnull','lower','rank','over','partition','when','datediff','cast','convert','add','constraint','alter','column','table','all','and','any','as','asc','backup','database','between','case','check','create','index','or','replace','view','procedure','unique','default','delete','desc','distinct','drop','exec','exists','foreign','key','from','full','outer','join','group','by','having','in','inner','insert','into','select','is','null','not','left','like','limit','order','primary','right','rownum','top','set','truncate','union','update','values','where','cross','date','datetime','execute','max','concat','for','fetch','next','close','open','varchar','int','object','declare','end','try','print','catch','with','begin','proc'] 408 | #postgres 409 | words=['abort','abs','absent','absolute','access','according','acos','action','ada','add','admin','after','aggregate','all','allocate','also','alter','always','analyse','analyze','and','any','are','array','array_agg','array_max_cardinality','as','asc','asensitive','asin','assertion','assignment','asymmetric','at','atan','atomic','attach','attribute','attributes','authorization','avg','backward','base64','before','begin','begin_frame','begin_partition','bernoulli','between','bigint','binary','bit','bit_length','blob','blocked','bom','boolean','both','breadth','by','cache','call','called','cardinality','cascade','cascaded','case','cast','catalog','catalog_name','ceil','ceiling','chain','chaining','char','char_length','character','character_length','character_set_catalog','character_set_name','character_set_schema','characteristics','characters','check','checkpoint','class','class_origin','classifier','clob','close','cluster','coalesce','cobol','collate','collation','collation_catalog','collation_name','collation_schema','collect','column','column_name','columns','command_function','command_function_code','comment','comments','commit','committed','compression','concurrently','condition','condition_number','conditional','configuration','conflict','connect','connection','connection_name','constraint','constraint_catalog','constraint_name','constraint_schema','constraints','constructor','contains','content','continue','control','conversion','convert','copy','corr','corresponding','cos','cosh','cost','count','covar_pop','covar_samp','create','cross','cube','cume_dist','current','current_catalog','current_date','current_default_transform_group','current_path','current_role','current_row','current_schema','current_time','current_timestamp','current_transform_group_for_type','current_user','cursor','cursor_name','cycle','data','database','datalink','date','datetime_interval_code','datetime_interval_precision','day','db','deallocate','dec','decfloat','decimal','declare','default','defaults','deferrable','deferred','define','defined','definer','degree','delete','delimiter','delimiters','dense_rank','depends','depth','deref','derived','desc','describe','descriptor','detach','deterministic','diagnostics','dictionary','disable','discard','disconnect','dispatch','distinct','dlnewcopy','dlpreviouscopy','dlurlcomplete','dlurlcompleteonly','dlurlcompletewrite','dlurlpath','dlurlpathonly','dlurlpathwrite','dlurlscheme','dlurlserver','dlvalue','do','document','domain','double','drop','dynamic','dynamic_function','dynamic_function_code','each','element','else','empty','enable','encoding','encrypted','end','end_frame','end_partition','enforced','enum','equals','error','escape','event','every','except','exception','exclude','excluding','exclusive','exec','execute','exists','exp','explain','expression','extension','external','extract','false','family','fetch','file','filter','final','finalize','finish','first','first_value','flag','float','floor','following','for','force','foreign','format','fortran','forward','found','frame_row','free','freeze','from','fs','fulfill','full','function','functions','fusion','general','generated','get','global','go','goto','grant','granted','greatest','group','grouping','groups','handler','having','header','hex','hierarchy','hold','hour','id','identity','if','ignore','ilike','immediate','immediately','immutable','implementation','implicit','import','in','include','including','increment','indent','index','indexes','indicator','inherit','inherits','initial','initially','inline','inner','inout','input','insensitive','insert','instance','instantiable','instead','int','integer','integrity','intersect','intersection','interval','into','invoker','is','isnull','isolation','join','json','json_array','json_arrayagg','json_exists','json_object','json_objectagg','json_query','json_table','json_table_primitive','json_value','keep','key','key_member','key_type','keys','label','lag','language','large','last','last_value','lateral','lead','leading','leakproof','least','left','length','level','library','like','like_regex','limit','link','listagg','listen','ln','load','local','localtime','localtimestamp','location','locator','lock','locked','log','log10','logged','lower','map','mapping','match','match_number','match_recognize','matched','matches','materialized','max','maxvalue','measures','member','merge','message_length','message_octet_length','message_text','method','min','minute','minvalue','mod','mode','modifies','module','month','more','move','multiset','mumps','name','names','namespace','national','natural','nchar','nclob','nested','nesting','new','next','nfc','nfd','nfkc','nfkd','nil','no','none','normalize','normalized','not','nothing','notify','notnull','nowait','nth_value','ntile','nullif','nulls','number','numeric','object','occurrences_regex','octet_length','octets','of','off','offset','oids','old','omit','on','one','only','open','operator','option','options','or','order','ordering','ordinality','others','out','outer','output','over','overflow','overlaps','overlay','overriding','owned','owner','pad','parallel','parameter','parameter_mode','parameter_name','parameter_ordinal_position','parameter_specific_catalog','parameter_specific_name','parameter_specific_schema','parser','partial','partition','pascal','pass','passing','passthrough','password','past','path','pattern','per','percent','percent_rank','percentile_cont','percentile_disc','period','permission','permute','placing','plan','plans','pli','policy','portion','position','position_regex','power','precedes','preceding','precision','prepare','prepared','preserve','primary','prior','private','privileges','procedural','procedure','procedures','program','prune','ptf','public','publication','quote','quotes','range','rank','read','reads','real','reassign','recheck','recovery','recursive','ref','references','referencing','refresh','regr_avgx','regr_avgy','regr_count','regr_intercept','regr_r2','regr_slope','regr_sxx','regr_sxy','regr_syy','reindex','relative','release','rename','repeatable','replace','replica','requiring','reset','respect','restart','restore','restrict','result','return','returned_cardinality','returned_length','returned_octet_length','returned_sqlstate','returning','returns','revoke','right','role','rollback','rollup','routine','routine_catalog','routine_name','routine_schema','routines','row','row_count','row_number','rows','rule','running','savepoint','scalar','scale','schema','schema_name','schemas','scope','scope_catalog','scope_name','scope_schema','scroll','search','second','section','security','seek','select','selective','self','sensitive','sequence','sequences','serializable','server','server_name','session','session_user','set','setof','sets','share','show','similar','simple','sin','sinh','size','skip','smallint','snapshot','some','source','space','specific','specific_name','specifictype','sql','sqlcode','sqlerror','sqlexception','sqlstate','sqlwarning','sqrt','stable','standalone','start','state','statement','static','statistics','stddev_pop','stddev_samp','stdin','stdout','storage','stored','strict','string','strip','structure','style','subclass_origin','submultiset','subscription','subset','substring','substring_regex','succeeds','sum','support','symmetric','sysid','system','system_time','system_user','table','table_name','tables','tablesample','tablespace','tan','tanh','temp','template','temporary','text','then','through','ties','time','timestamp','timezone_hour','timezone_minute','to','token','top_level_count','trailing','transaction','transaction_active','transactions_committed','transactions_rolled_back','transform','transforms','translate','translate_regex','translation','treat','trigger','trigger_catalog','trigger_name','trigger_schema','trim','trim_array','true','truncate','trusted','type','types','uescape','unbounded','uncommitted','unconditional','under','unencrypted','union','unique','unknown','unlink','unlisten','unlogged','unmatched','unnamed','unnest','until','untyped','update','upper','uri','usage','user','user_defined_type_catalog','user_defined_type_code','user_defined_type_name','user_defined_type_schema','using','utf16','utf32','utf8','vacuum','valid','validate','validator','value','value_of','values','var_pop','var_samp','varbinary','varchar','variadic','varying','verbose','version','versioning','view','views','volatile','when','whenever','where','whitespace','width_bucket','window','with','within','without','work','wrapper','write','xml','xmlagg','xmlattributes','xmlbinary','xmlcast','xmlcomment','xmlconcat','xmldeclaration','xmldocument','xmlelement','xmlexists','xmlforest','xmliterate','xmlnamespaces','xmlparse','xmlpi','xmlquery','xmlroot','xmlschema','xmlserialize','xmltable','xmltext','xmlvalidate','year','yes','zone'] 410 | 411 | uppercase = lambda x: x.group(1).upper() 412 | test = '\b({})\b'.format('|'.join(words)) 413 | re_replace = re.compile(r'\b({})\b'.format('|'.join(words)),re.IGNORECASE) 414 | cflag=0 415 | for i in range(0,len(lines)): 416 | if '/*' in lines[i] and '*/' not in lines[i]: 417 | cflag=1 418 | if '*/' in lines[i]: 419 | cflag=0 420 | if cflag==1: 421 | continue 422 | temp=lines[i].split('--') 423 | temp[0]=re_replace.sub(uppercase, temp[0]) 424 | lines[i]='--'.join(temp) 425 | return '\n'.join(lines) 426 | 427 | def run(self, edit): 428 | runEdit(self, edit) 429 | 430 | 431 | class datawizardpyvartotextCommand(sublime_plugin.TextCommand): 432 | def format(self,text): 433 | return text.replace('\\n','\n').replace('\\t','\t') 434 | 435 | def run(self, edit): 436 | runEdit(self, edit) 437 | 438 | 439 | class datawizardshufflecolumnverticallyCommand(sublime_plugin.TextCommand): 440 | def run(self, edit): 441 | temp=[self.view.substr(selection) for selection in self.view.sel()] 442 | 443 | length=len(temp) 444 | locsAll=[i for i in range(0,length)] 445 | result=[] 446 | 447 | locsTemp=[l for l in locsAll] 448 | shuffle(locsTemp) 449 | for r in range(0,length): 450 | RandomLoc=locsTemp.pop() 451 | result.append(temp[RandomLoc]) 452 | 453 | cnt=0 454 | for selection in self.view.sel(): 455 | self.view.insert(edit, selection.begin(),result[cnt]) 456 | cnt+=1 457 | for selection in self.view.sel(): 458 | self.view.erase(edit, selection) 459 | 460 | 461 | class datawizardshufflecharverticallyCommand(sublime_plugin.TextCommand): 462 | def run(self, edit): 463 | temp=[self.view.substr(selection) for selection in self.view.sel()] 464 | 465 | 466 | width=len(temp[0]) 467 | length=len(temp) 468 | locsAll=[i for i in range(0,length)] 469 | 470 | result=['' for i in temp] 471 | 472 | for i in range(0,width): 473 | locsTemp=[l for l in locsAll] 474 | shuffle(locsTemp) 475 | for r in range(0,length): 476 | RandomLoc=locsTemp.pop() 477 | result[r]=result[r]+temp[RandomLoc][i] 478 | 479 | 480 | cnt=0 481 | for selection in self.view.sel(): 482 | self.view.insert(edit, selection.begin(),result[cnt]) 483 | cnt+=1 484 | for selection in self.view.sel(): 485 | self.view.erase(edit, selection) 486 | 487 | 488 | class datawizarddistinctcolumnstojsonCommand(sublime_plugin.TextCommand): 489 | def format(self,text): 490 | a=dataGrid(text) 491 | 492 | stats={} 493 | for i in range(len(a.grid[0])): 494 | row=[row[i] for row in a.grid[1:]] 495 | stats[a.grid[0][i]]=[(row.count(val),val) for val in set(row)] 496 | 497 | json_object = json.dumps(stats, indent = 4) 498 | 499 | return json_object 500 | 501 | def run(self, edit): 502 | runEdit(self, edit) 503 | 504 | 505 | class datawizarddistinctcolumnformatstojsonCommand(sublime_plugin.TextCommand): 506 | def format(self,text): 507 | a=dataGrid(text) 508 | a.grid=[a.grid[0]]+[maske(i) for i in a.grid[1:]] 509 | 510 | stats={} 511 | for i in range(len(a.grid[0])): 512 | row=[row[i] for row in a.grid[1:]] 513 | stats[a.grid[0][i]]=[(row.count(val),val) for val in set(row)] 514 | 515 | json_object = json.dumps(stats, indent = 4) 516 | 517 | return json_object 518 | 519 | def run(self, edit): 520 | runEdit(self, edit) 521 | 522 | 523 | class datawizarddistinctcolumnsCommand(sublime_plugin.TextCommand): 524 | def format(self,text): 525 | a=dataGrid(text) 526 | a.getSampleGrid() 527 | 528 | rst=a.constructTextFromGrid(a.sampleGrid,a.delimiter,a.sampleMaxColWidth) 529 | return rst 530 | 531 | def run(self, edit): 532 | runEdit(self, edit) 533 | 534 | 535 | class datawizarddistinctcolumnformatsCommand(sublime_plugin.TextCommand): 536 | def format(self,text): 537 | a=dataGrid(text) 538 | a.getSampleGrid(masked=True) 539 | 540 | rst=a.constructTextFromGrid(a.sampleGrid,a.delimiter,a.sampleMaxColWidth) 541 | return rst 542 | 543 | def run(self, edit): 544 | runEdit(self, edit) 545 | 546 | 547 | class datawizardstatisticsjsonCommand(sublime_plugin.TextCommand): 548 | def format(self,text): 549 | a=dataGrid(text) 550 | a.pivotGrid() 551 | 552 | def _numeric(input): 553 | input=input.strip('"').strip('$').replace(',','') 554 | try: 555 | rst=int(input) 556 | return rst 557 | except: 558 | rst=float(input) 559 | return rst 560 | 561 | def _try(func ,input): 562 | try: 563 | rst=func(input) 564 | return rst 565 | except: 566 | return False 567 | 568 | 569 | def median(lst): 570 | lst.sort() 571 | mid = len(lst) // 2 572 | res = (lst[mid] + lst[~mid]) / 2 573 | return res 574 | from functools import reduce 575 | def mean(lst): 576 | return reduce(lambda a, b: a + b, lst) / len(lst) 577 | 578 | rst=OrderedDict() 579 | for col in a.grid: 580 | col_name=col.pop(0) 581 | rst[col_name]=OrderedDict() 582 | 583 | tmp=[_numeric(n) for n in col if _try(_numeric,n)] 584 | rst[col_name]['min']=min(tmp) if tmp else min(col) 585 | rst[col_name]['max']=max(tmp) if tmp else max(col) 586 | try: 587 | rst[col_name]['sum']=sum(tmp) 588 | except: 589 | pass 590 | try: 591 | rst[col_name]['mean']=mean(tmp) 592 | except: 593 | pass 594 | try: 595 | rst[col_name]['median']=median(tmp) 596 | except: 597 | pass 598 | 599 | 600 | try: 601 | rst[col_name]['percent_blank']=col.count('')/len(col) 602 | except: 603 | pass 604 | try: 605 | rst[col_name]['percent_distinct']=len(set(col))/len(col) 606 | except: 607 | pass 608 | try: 609 | rst[col_name]['percent_dupe']=(len(col)-len(set(col)))/len(col) 610 | except: 611 | pass 612 | col.sort() 613 | distinct_col=set(col) 614 | if len(distinct_col)<100: 615 | counts=[(val,col.count(val)) for val in distinct_col] 616 | temp=OrderedDict() 617 | for i,item in enumerate(sorted(counts, key=lambda item: item[1],reverse=True)): 618 | temp[item[0]]=item[1] 619 | rst[col_name]['distribution_top_100']=temp 620 | else: 621 | rst[col_name]['distribution_top_100']=None 622 | 623 | 624 | return json.dumps(rst, indent = 4,default='str') 625 | 626 | 627 | def run(self, edit): 628 | runEdit(self, edit) 629 | 630 | 631 | class datawizardstatisticssampledelimiteddiffsCommand(sublime_plugin.TextCommand): 632 | def format(self,text): 633 | a=dataGrid(text) 634 | a.pivotGrid() 635 | 636 | temp=[] 637 | for row in a.grid: 638 | if len(set(row[1:]))!=1: 639 | temp.append(row) 640 | a.grid=temp 641 | a.pivotGrid() 642 | a.getSampleGrid() 643 | a.pivotGrid() 644 | 645 | rst=a.constructTextFromGrid(a.grid,a.delimiter,True) 646 | return rst 647 | 648 | def run(self, edit): 649 | runEdit(self, edit) 650 | 651 | 652 | class datawizardconverttosqlinsertsqlserverCommand(sublime_plugin.TextCommand): 653 | def format(self,text): 654 | a=dataGrid(text) 655 | a.maxColWidth=a.getMaxColumnWidth(a.grid) 656 | headers=a.grid[0] 657 | def unqoute(value): 658 | try: 659 | if value[0]=='"' and value[-1]=='"' and a.delimiter in value: 660 | return value[1:-1] 661 | else: 662 | return value 663 | except: 664 | return value 665 | def formatvalue(value): 666 | return "'"+unqoute(value).replace("'","''")+"'" if value!='' else 'NULL' 667 | 668 | table=[[formatvalue(f) for f in row] for row in a.grid[1:] ] 669 | table=[',('+','.join(row)+')\n' for row in table] 670 | 671 | filename=self.view.file_name() or 'temptable' 672 | table_name=os.path.basename(filename).replace('.csv','').replace('.txt','').replace('.CSV','').replace('.TXT','').replace('-','_').replace('/','_').replace('\\','_') 673 | 674 | sql='--drop table if exists #{table_name}\ngo\n\ncreate table #{table_name}\n(\n\trow_id int identity(1,1),\n'.format(table_name=table_name) 675 | 676 | for i in range(len(headers)): 677 | sql+='\t'+'['+headers[i]+']'+' nvarchar('+str(a.maxColWidth[i])+'),\n' 678 | sql+='\n)\n' 679 | insert='\n\ngo\n\ninsert into #{table_name} ('.format(table_name=table_name)+','.join(['['+i+']' for i in headers])+')\nvaluesx' 680 | 681 | if len(table)>1000: 682 | tableInsertLocs=[1000*i-1 for i in range(1,int(len(table)/1000)+1)] 683 | while tableInsertLocs: 684 | loc=tableInsertLocs.pop() 685 | table.insert(loc,insert) 686 | table.insert(0,insert) 687 | 688 | sql+=''.join(table) 689 | sql=sql.replace('valuesx,','values ') 690 | return sql 691 | 692 | def run(self, edit): 693 | runEdit(self, edit) 694 | 695 | 696 | class datawizardconverttosqlinsertoracleCommand(sublime_plugin.TextCommand): 697 | def format(self,text): 698 | a=dataGrid(text) 699 | a.maxColWidth=a.getMaxColumnWidth(a.grid) 700 | headers=a.grid[0] 701 | 702 | def unqoute(value): 703 | try: 704 | if value[0]=='"' and value[-1]=='"' and a.delimiter in value: 705 | return value[1:-1] 706 | else: 707 | return value 708 | except: 709 | return value 710 | 711 | def formatvalue(value): 712 | return "'"+unqoute(value).replace("'","''")+"'" if value!='' else 'NULL' 713 | 714 | table=[[formatvalue(val)+' as '+headers[i] for i,val in enumerate(row)] for row in a.grid[1:] ] 715 | table=['\t select '+','.join(row)+' from dual' for row in table] 716 | 717 | filename=self.view.file_name() or 'temptable' 718 | table_name=os.path.basename(filename).replace('.csv','').replace('.txt','').replace('.CSV','').replace('.TXT','').replace('-','_').replace('/','_').replace('\\','_') 719 | 720 | sql='--drop table {table_name};\n\ncreate global temporary table {table_name}\n(\n\t'.format(table_name=table_name) 721 | 722 | for i in range(len(headers)): 723 | sql+='\t,' if i !=0 else ' ' 724 | sql+=headers[i]+' varchar2('+str(a.maxColWidth[i])+')\n' 725 | sql+=')on commit preserve rows;' 726 | 727 | 728 | insert='\n\n\ninsert into {table_name} ('.format(table_name=table_name)+','.join([i for i in headers])+')\n with tempdata as (