├── .gitignore ├── alignschema ├── __init__.py └── __main__.py ├── setup.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | *.pyc 3 | __pycache__ 4 | -------------------------------------------------------------------------------- /alignschema/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.0.1' 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="alignschema", 5 | version="0.0.1", 6 | 7 | author="Neil Freeman", 8 | author_email="contact@fakeisthenewreal.org", 9 | packages=["alignschema"], 10 | entry_points={ 11 | 'console_scripts': [ 12 | 'alignschema=alignschema.__main__:main', 13 | ], 14 | }, 15 | ) 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## alignschema 2 | 3 | Generate and run `ogr2ogr` commands using a CSV to rename fields, thus aligning the schemas of many input files. 4 | 5 | The csv should be set up with column names that contain the desired field names. The values of the field should be the field name in source. Blank fields are ignored. 6 | Column names that match `ogr2ogr` options will be used to create those options and flags. For example: 7 | 8 | ``` 9 | src_datasource_name,id,name,year,skipfailures,dst_datasource_name 10 | espanol.shp,gid,nombre,,,PG:dbname=example 11 | francais.shp,ID,nom,1,,PG:dbname=example 12 | ``` 13 | 14 | This will generate two `ogr2ogr` commands. In the second command, the `skipfailures` flag will be added. In the first command, the field `year` won't be populated because that column is blank. Additional flags can be added to `alignschema`: 15 | 16 | ``` 17 | alignschema input.csv -t_srs EPSG:4326 18 | ``` 19 | 20 | Values in the csv that are wrapped in single quotes will be treated like string literals. 21 | 22 | Additional arguments will be string interpolated based on the values in the fieldmap. So for instance in the argument `-oo CLOSING_STATEMENTS="ALTER TABLE {nln} SET OWNER to foo"`, `{nln}` will be replaced by the value of the `nln` field. 23 | 24 | ### Usage 25 | 26 | ``` 27 | usage: alignschema [-h] [--dry-run] 28 | [--dst-datasource-name DST_DATASOURCE_NAME] [--layer LAYER] 29 | csvfile 30 | 31 | Construct an ogr2ogr command that maps field names based on a CSV. 32 | 33 | positional arguments: 34 | csvfile Contains columns that match ogr2ogr import options. 35 | Any unrecognized columns will be used in sql 36 | statement, e.g. SELECT value AS column 37 | 38 | optional arguments: 39 | -h, --help show this help message and exit 40 | --dry-run echo command, do not execute 41 | --dst-datasource-name DST_DATASOURCE_NAME 42 | --layer LAYER 43 | 44 | Additional arguments are passed to ogr2ogr. 45 | ``` 46 | -------------------------------------------------------------------------------- /alignschema/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from os import path 3 | import subprocess 4 | import argparse 5 | import csv 6 | 7 | OGR2OGR_OPTIONS = { 8 | 'options': ( 9 | 'where', 'dialect', 'fid', 'limit', 'spat', 'spat_srs', 'geomfield', 10 | 'a_srs', 't_srs', 's_srs', 'f', 'overwrite', 'dsco', 'lco', 'nln', 11 | 'nlt', 'dim', 'gt', 'oo', 'doo', 'clipsrc', 'clipsrcsql', 'clipsrclayer', 12 | 'clipsrcwhere', 'clipdst', 'clipdstsql', 'clipdstlayer', 'clipdstwhere', 13 | 'datelineoffset', 'simplify', 'segmentize', 'addfields', 'unsetFid', 'relaxedFieldNameMatch', 14 | 'fieldTypeToString', 'unsetFieldWidth', 'mapFieldType', 'fieldmap', 'maxsubfields', 15 | 'zfield', 'gcp', 'order', 'mo', 16 | ), 17 | 'flags': ( 18 | 'skipfailures', 'preserve_fid', 'append', 'update', 'progress', 19 | 'splitlistfields', 'explodecollections', 'ds_transaction', 'nomd', 20 | 'noNativeData', 'wrapdateline', 'tps', 'forceNullable', 'unsetDefault', 21 | ), 22 | 'positional': ('dst_datasource_name', 'src_datasource_name', 'layer') 23 | 24 | } 25 | 26 | 27 | def generate(entry): 28 | output, fields = [], [] 29 | 30 | # First, append positional arguments 31 | for k in OGR2OGR_OPTIONS['positional']: 32 | if k in entry: 33 | output.append("{}".format(entry[k])) 34 | 35 | # Next, append options and flags 36 | for k, v in entry.items(): 37 | if k in OGR2OGR_OPTIONS['options']: 38 | output.extend(['-{}'.format(k), v]) 39 | 40 | elif k in OGR2OGR_OPTIONS['flags'] and v: 41 | output.append('-{}'.format(k)) 42 | 43 | elif k in OGR2OGR_OPTIONS['positional']: 44 | pass 45 | 46 | else: 47 | if v: 48 | if v.startswith("'") and v.endswith("'"): 49 | f = '{} AS {}' 50 | else: 51 | f = 'a.{} AS {}' 52 | 53 | fields.append(f.format(v, k)) 54 | 55 | # Finally, generate the -sql flag 56 | if fields: 57 | if entry['layer'] == '': 58 | del entry['layer'] 59 | layer = entry.get('layer', path.splitext(path.basename(entry.get('src_datasource_name')))[0]) 60 | sql = ['-sql', '"SELECT {} FROM \\"{}\\" a"'.format(', '.join(fields), layer)] 61 | output.extend(sql) 62 | 63 | return output 64 | 65 | 66 | def main(): 67 | parser = argparse.ArgumentParser( 68 | description="Construct an ogr2ogr command that maps field names based on a CSV.", 69 | epilog="Additional arguments are passed to ogr2ogr." 70 | ) 71 | parser.add_argument('csvfile', help=( 72 | 'Contains columns that match ogr2ogr import options. ' 73 | 'Any unrecognized columns will be used in sql statement, e.g. SELECT value AS column' 74 | )) 75 | parser.add_argument('--dry-run', action='store_true', help='echo command, do not execute') 76 | parser.add_argument('--dst-datasource-name', type=str) 77 | parser.add_argument('--layer', type=str) 78 | 79 | args, extra = parser.parse_known_args() 80 | 81 | kwargs = {} 82 | if args.dst_datasource_name: 83 | kwargs['dst_datasource_name'] = args.dst_datasource_name 84 | if args.layer: 85 | kwargs['layer'] = args.layer 86 | 87 | with open(args.csvfile, 'r') as f: 88 | reader = csv.DictReader(f) 89 | for row in reader: 90 | row.update(kwargs) 91 | result = generate(row) 92 | extra_interp = [x.format(**row) for x in extra] 93 | command = ['ogr2ogr'] + result + extra_interp 94 | 95 | if args.dry_run: 96 | print(' '.join(command)) 97 | else: 98 | subprocess.check_call(command) 99 | 100 | 101 | if __name__ == '__main__': 102 | main() 103 | --------------------------------------------------------------------------------