├── README.md └── show_create_table.py /README.md: -------------------------------------------------------------------------------- 1 | # redshift_show_create_table 2 | python script, 'show create table' equivalent for aws redshift. Command-line syntax is similar to pg_dump. 3 | 4 | Another option for getting this information is the `v_generate_tbl_ddl.sql` admin view from AWS Labs: https://github.com/awslabs/amazon-redshift-utils/blob/master/src/AdminViews/ 5 | 6 | # Command-line parameters 7 | 8 | Basic usage: 9 | ``` 10 | ./show_create_table.py -h HOST -U USER -d DBNAME [-W PASSWORD] [-p PORT] 11 | [-f FILE] [-F {directory}] [-n SCHEMANAME] 12 | [-t TABLENAME] 13 | ``` 14 | 15 | ## Required parameters 16 | * [-h/--host=] HOSTNAME: hostname for Redshift database 17 | * [-U/--user=] USERNAME: username to connect to Redshift database with 18 | * [-d/--dbname=] DBNAME: name of database to connect to on host 19 | 20 | ## Optional parameters 21 | * [-W/--password=] PASSWORD: Redshift password for username. If not provided, it will look for .pgpass credential file under user home directory, or file defined in PGPASSFILE system variable. See https://www.postgresql.org/docs/9.1/static/libpq-pgpass.html 22 | * [-p/--port=] PORT: port to connect to, defaults to 5432 23 | * [-f/--file=] FILE: file/directory to write output to, defaults to standard output 24 | * [-F/--format=] FORMAT: requires --file, currently only valid option (and default) is 'directory', 25 | which creates directories for each non-system schema and creates a separate SQL file for each table/view 26 | * [-n/--schema=] SCHEMANAME: name of schema to show tables from, if none provided it will iterate over all 27 | non-system schemas 28 | * [-t/--table=] TABLENAME: name of a single table to dump, if none provided it will iterate over all in schema 29 | -------------------------------------------------------------------------------- /show_create_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | 'show create table' equivalent for aws redshift 5 | 6 | Authors: 7 | xiuming chen 8 | Neil Halelamien 9 | """ 10 | 11 | from os import path, makedirs 12 | 13 | import psycopg2 14 | 15 | 16 | __all__ = ['show_create_table'] 17 | 18 | 19 | def add_where_stmts(schemaname, tablename): 20 | wheres = [] 21 | if tablename: 22 | wheres.append('tablename = %(table)s') 23 | if schemaname: 24 | wheres.append('schemaname = %(schema)s') 25 | return ' AND '.join(wheres) 26 | 27 | 28 | def get_table_infos(cur, schemaname, tablename): 29 | sql = ''' 30 | SELECT schemaname, tablename, tableowner, tablespace 31 | FROM pg_tables 32 | ''' 33 | where = add_where_stmts(schemaname, tablename) 34 | if where: 35 | sql += ' WHERE ' + where 36 | cur.execute(sql, dict(table=tablename, schema=schemaname)) 37 | d = {} 38 | for r in cur.fetchall(): 39 | table = get_table_name(r[0], r[1]) 40 | d[table] = { 41 | 'owner': r[2], 42 | 'space': r[3], 43 | } 44 | return d 45 | 46 | 47 | DISTSTYLES = { 48 | 0: 'EVEN', 49 | 1: 'KEY', 50 | 8: 'ALL', 51 | } 52 | 53 | SYSTEM_SCHEMAS = ['information_schema', 'pg_catalog', 'sys'] 54 | 55 | 56 | def get_table_diststyles(cur, schemaname, tablename): 57 | sql = ''' 58 | SELECT n.nspname AS schemaname, c.relname AS tablename, c.reldiststyle AS diststyle 59 | FROM pg_namespace n, pg_class c 60 | WHERE n.oid = c.relnamespace AND pg_table_is_visible(c.oid) 61 | ''' 62 | where = add_where_stmts(schemaname, tablename) 63 | if where: 64 | sql += ' AND ' + where 65 | cur.execute(sql, dict(table=tablename, schema=schemaname)) 66 | d = {} 67 | for r in cur.fetchall(): 68 | table = get_table_name(r[0], r[1]) 69 | d[table] = DISTSTYLES.get(r[2]) 70 | return d 71 | 72 | 73 | def get_table_defs(cur, schemaname, tablename): 74 | sql = ''' 75 | SELECT 76 | n.nspname AS "schemaname", 77 | c.relname AS "tablename", 78 | a.attname AS "column", 79 | format_type(a.atttypid, a.atttypmod) AS "type", 80 | format_encoding(a.attencodingtype::integer) AS "encoding", 81 | a.attisdistkey AS "distkey", 82 | a.attsortkeyord AS "sortkey", 83 | a.attnotnull AS "notnull", 84 | a.atthasdef AS "hasdef", 85 | d.adsrc as "default" 86 | FROM pg_attribute a 87 | JOIN pg_class c ON c.oid = a.attrelid 88 | JOIN pg_namespace n ON n.oid = c.relnamespace 89 | LEFT JOIN pg_attrdef d ON d.adrelid = c.oid AND d.adnum = a.attnum 90 | WHERE a.attnum > 0 AND NOT a.attisdropped AND pg_table_is_visible(c.oid) AND c.relkind = 'r' 91 | ''' 92 | where = add_where_stmts(schemaname, tablename) 93 | if where: 94 | sql += ' AND ' + where 95 | sql += ' ORDER BY n.nspname, c.relname, a.attnum;' 96 | cur.execute(sql, dict(table=tablename, schema=schemaname)) 97 | out = [] 98 | for r in cur.fetchall(): 99 | out.append(dict(zip( 100 | ['schemaname', 'tablename', 'column', 'type', 101 | 'encoding', 'distkey', 'sortkey', 'notnull', 102 | 'hasdef', 'default'], r))) 103 | return out 104 | 105 | 106 | def get_table_name(schema, table): 107 | if '.' not in schema and '.' not in table: 108 | return '%s.%s' % (schema, table) 109 | return '"%s"."%s"' % (schema, table) 110 | 111 | 112 | def group_table_defs(table_defs): 113 | curr_table = None 114 | defs = [] 115 | for r in table_defs: 116 | table = get_table_name(r['schemaname'], r['tablename']) 117 | if curr_table and curr_table != table: 118 | yield defs 119 | defs = [] 120 | curr_table = table 121 | defs.append(r) 122 | if defs: 123 | yield defs 124 | 125 | 126 | def format_comment(table, schema, owner, tablespace, model_type='TABLE'): 127 | comment = ('--\n' 128 | '-- Name: %(table)s; Type: %(model_type)s; Schema: %(schema)s; Owner: %(owner)s; Tablespace: %(tablespace)s\n' 129 | '--\n\n') \ 130 | % { 131 | 'table': table, 132 | 'schema': schema, 133 | 'owner': owner, 134 | 'model_type': model_type, 135 | 'tablespace': tablespace, 136 | } 137 | return comment 138 | 139 | 140 | def build_table_stmts(table_defs, table_diststyles, table_infos): 141 | for defs in group_table_defs(table_defs): 142 | schemaname = defs[0]['schemaname'] 143 | tablename = defs[0]['tablename'] 144 | table = get_table_name(schemaname, tablename) 145 | table_info = table_infos.get(table) 146 | if table_info: 147 | owner = table_info['owner'] or '' 148 | space = table_info['space'] or '' 149 | else: 150 | owner = space = '' 151 | s = format_comment(tablename, schemaname, owner, space) 152 | s += 'CREATE TABLE %s (\n' % table 153 | cols = [] 154 | sk = {} 155 | interleaved = False 156 | for d in defs: 157 | c = [ 158 | '"%s"' % d['column'], 159 | d['type'], 160 | ] 161 | if d['encoding'] != 'none': 162 | c.append('ENCODE') 163 | c.append(d['encoding']) 164 | if d['distkey']: 165 | c.append('DISTKEY') 166 | if d['sortkey']: 167 | if d['sortkey'] < 0: 168 | interleaved = True; 169 | sk[int(abs(d['sortkey']))] = d['column'] 170 | if d['notnull']: 171 | c.append('NOT NULL') 172 | if d['hasdef']: 173 | c.append('DEFAULT %s' % d['default']) 174 | cols.append(' '.join(c)) 175 | s += ',\n'.join(map(lambda c: ' ' + c, cols)) 176 | s += '\n)' 177 | diststyle = table_diststyles.get(table) 178 | if diststyle: 179 | s += ' DISTSTYLE ' + diststyle 180 | if sk: 181 | if interleaved: 182 | s += ' INTERLEAVED' 183 | s += ' SORTKEY ("%s")' % '", "'.join([sk[k] for k in sorted(sk)]) 184 | s += ';\n' 185 | yield schemaname, table, s 186 | 187 | 188 | def build_view_stmts_for_schema(cur, schema): 189 | sql = ''' 190 | SELECT c.relname, pg_get_userbyid(c.relowner) AS owner, pg_get_viewdef(c.oid) AS definition 191 | FROM pg_class c 192 | LEFT JOIN pg_namespace n ON n.oid = c.relnamespace 193 | WHERE c.relkind = 'v'::"char" and nspname = %(schema)s 194 | ''' 195 | cur.execute(sql, {'schema': schema}) 196 | for v in cur.fetchall(): 197 | view_name = v[0] 198 | owner = v[1] 199 | base_statement = v[2] 200 | s = format_comment(view_name, schema, owner, tablespace='', model_type='VIEW') 201 | s += 'CREATE OR REPLACE VIEW %s AS' % get_table_name(schema, view_name) 202 | s += '\n' + base_statement + '\n' 203 | yield schema, view_name, s 204 | 205 | 206 | # gets list of all non-system schemas 207 | def get_all_schemas(cur): 208 | sql = 'SELECT schemaname FROM pg_stat_all_tables GROUP BY schemaname' 209 | cur.execute(sql) 210 | schemas = [] 211 | for s in cur.fetchall(): 212 | schema = s[0] 213 | if schema not in SYSTEM_SCHEMAS: 214 | schemas.append(schema) 215 | return schemas 216 | 217 | 218 | def show_create_table(host, user, dbname, schemaname=None, tablename=None, port=5432, password=None): 219 | if password: 220 | conn = psycopg2.connect( 221 | host=host, port=port, database=dbname, user=user, password=password) 222 | else: 223 | conn = psycopg2.connect( 224 | host=host, port=port, database=dbname, user=user) 225 | cur = conn.cursor() 226 | try: 227 | if schemaname is None and tablename is None: # scan all non-system schemas and tables 228 | schema_list = get_all_schemas(cur) 229 | search_path_sql = 'SET SEARCH_PATH = ' + (','.join(schema_list)) + ';' 230 | cur.execute(search_path_sql) 231 | elif schemaname: 232 | cur.execute('SET SEARCH_PATH = %s;', (schemaname,)) 233 | schema_list = [schemaname] 234 | else: 235 | raise RuntimeError('If passing a table name, schema name must also be provided') 236 | 237 | statements = [] 238 | for schema in schema_list: 239 | table_diststyles = get_table_diststyles(cur, schema, tablename) 240 | table_defs = get_table_defs(cur, schema, tablename) 241 | table_infos = get_table_infos(cur, schema, tablename) 242 | for s in build_table_stmts(table_defs, table_diststyles, table_infos): 243 | statements.append(s) 244 | for s in build_view_stmts_for_schema(cur, schema): 245 | statements.append(s) 246 | return statements 247 | finally: 248 | cur.close() 249 | 250 | 251 | def main(host, user, dbname, filename, file_format, schemaname=None, tablename=None, port=5432, password=None): 252 | for schema, table, stmt in show_create_table( 253 | host, user, dbname, schemaname, tablename, port, password): 254 | if filename: 255 | if file_format == 'directory': 256 | basedir = filename 257 | if not path.exists(basedir): 258 | makedirs(basedir) 259 | schemadir = path.join(basedir, schema) 260 | if not path.exists(schemadir): 261 | makedirs(schemadir) 262 | full_filename = path.join(schemadir, table + '.sql') 263 | with open(full_filename, 'w') as f: 264 | f.write(stmt + '\n') 265 | else: 266 | raise RuntimeError('Invalid format: ' + file_format) 267 | else: 268 | print(stmt) 269 | 270 | 271 | if __name__ == '__main__': 272 | import argparse 273 | 274 | # arguments similar to those for pg_dump 275 | parser = argparse.ArgumentParser(add_help=False) # add_help=False because of conflict with '-h' 276 | parser.add_argument('-h', '--host', required=True, dest='host') 277 | parser.add_argument('-U', '--user', required=True, dest='user') 278 | parser.add_argument('-d', '--dbname', required=True, dest='dbname') 279 | parser.add_argument('-W', '--password', required=False, dest='password', 280 | help='If no password is provided, the connector will attempt to authorize with .pgpass file in user\'s home directory, ' 281 | 'or the file defined in PGPASSFILE system variable') 282 | parser.add_argument('-p', '--port', default=5432, dest='port') 283 | parser.add_argument('-f', '--file', default=False, dest='file', 284 | help='file/directory to write output to, defaults to standard output') 285 | parser.add_argument('-F', '--format', default='directory', dest='format', 286 | choices=['directory'], 287 | help='Requires --file, valid options: directory') 288 | parser.add_argument('-n', '--schema', dest='schemaname', 289 | help='Name of schema to show tables from, if not provided it will iterate over all non-system' 290 | 'schemas') 291 | parser.add_argument('-t', '--table', dest='tablename') 292 | 293 | args = parser.parse_args() 294 | main( 295 | args.host, 296 | args.user, 297 | args.dbname, 298 | args.file, 299 | args.format, 300 | args.schemaname, 301 | args.tablename, 302 | args.port, 303 | args.password, 304 | ) 305 | --------------------------------------------------------------------------------