├── .gitignore
├── alignschema
    ├── __init__.py
    └── __main__.py
├── setup.py
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | *.pyc
3 | __pycache__
4 | 


--------------------------------------------------------------------------------
/alignschema/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.0.1'
2 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name="alignschema",
 5 |     version="0.0.1",
 6 | 
 7 |     author="Neil Freeman",
 8 |     author_email="contact@fakeisthenewreal.org",
 9 |     packages=["alignschema"],
10 |     entry_points={
11 |         'console_scripts': [
12 |             'alignschema=alignschema.__main__:main',
13 |         ],
14 |     },
15 | )
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## alignschema
 2 | 
 3 | Generate and run `ogr2ogr` commands using a CSV to rename fields, thus aligning the schemas of many input files.
 4 | 
 5 | The csv should be set up with column names that contain the desired field names. The values of the field should be the field name in source. Blank fields are ignored.
 6 | Column names that match `ogr2ogr` options will be used to create those options and flags. For example:
 7 | 
 8 | ```
 9 | src_datasource_name,id,name,year,skipfailures,dst_datasource_name
10 | espanol.shp,gid,nombre,,,PG:dbname=example
11 | francais.shp,ID,nom,1,,PG:dbname=example
12 | ```
13 | 
14 | This will generate two `ogr2ogr` commands. In the second command, the `skipfailures` flag will be added. In the first command, the field `year` won't be populated because that column is blank. Additional flags can be added to `alignschema`:
15 | 
16 | ```
17 | alignschema input.csv -t_srs EPSG:4326
18 | ```
19 | 
20 | Values in the csv that are wrapped in single quotes will be treated like string literals.
21 | 
22 | Additional arguments will be string interpolated based on the values in the fieldmap. So for instance in the argument `-oo CLOSING_STATEMENTS="ALTER TABLE {nln} SET OWNER to foo"`, `{nln}` will be replaced by the value of the `nln` field.
23 | 
24 | ### Usage
25 | 
26 | ```
27 | usage: alignschema [-h] [--dry-run]
28 |                    [--dst-datasource-name DST_DATASOURCE_NAME] [--layer LAYER]
29 |                    csvfile
30 | 
31 | Construct an ogr2ogr command that maps field names based on a CSV.
32 | 
33 | positional arguments:
34 |   csvfile               Contains columns that match ogr2ogr import options.
35 |                         Any unrecognized columns will be used in sql
36 |                         statement, e.g. SELECT value AS column
37 | 
38 | optional arguments:
39 |   -h, --help            show this help message and exit
40 |   --dry-run             echo command, do not execute
41 |   --dst-datasource-name DST_DATASOURCE_NAME
42 |   --layer LAYER
43 | 
44 | Additional arguments are passed to ogr2ogr.
45 | ```
46 | 


--------------------------------------------------------------------------------
/alignschema/__main__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | from os import path
  3 | import subprocess
  4 | import argparse
  5 | import csv
  6 | 
  7 | OGR2OGR_OPTIONS = {
  8 |     'options': (
  9 |         'where', 'dialect', 'fid', 'limit', 'spat', 'spat_srs', 'geomfield',
 10 |         'a_srs', 't_srs', 's_srs', 'f', 'overwrite', 'dsco', 'lco', 'nln',
 11 |         'nlt', 'dim', 'gt', 'oo', 'doo', 'clipsrc', 'clipsrcsql', 'clipsrclayer',
 12 |         'clipsrcwhere', 'clipdst', 'clipdstsql', 'clipdstlayer', 'clipdstwhere',
 13 |         'datelineoffset', 'simplify', 'segmentize', 'addfields', 'unsetFid', 'relaxedFieldNameMatch',
 14 |         'fieldTypeToString', 'unsetFieldWidth', 'mapFieldType', 'fieldmap', 'maxsubfields',
 15 |         'zfield', 'gcp', 'order', 'mo',
 16 |     ),
 17 |     'flags': (
 18 |         'skipfailures', 'preserve_fid', 'append', 'update', 'progress',
 19 |         'splitlistfields', 'explodecollections', 'ds_transaction', 'nomd',
 20 |         'noNativeData', 'wrapdateline', 'tps', 'forceNullable', 'unsetDefault',
 21 |     ),
 22 |     'positional': ('dst_datasource_name', 'src_datasource_name', 'layer')
 23 | 
 24 | }
 25 | 
 26 | 
 27 | def generate(entry):
 28 |     output, fields = [], []
 29 | 
 30 |     # First, append positional arguments
 31 |     for k in OGR2OGR_OPTIONS['positional']:
 32 |         if k in entry:
 33 |             output.append("{}".format(entry[k]))
 34 | 
 35 |     # Next, append options and flags
 36 |     for k, v in entry.items():
 37 |         if k in OGR2OGR_OPTIONS['options']:
 38 |             output.extend(['-{}'.format(k), v])
 39 | 
 40 |         elif k in OGR2OGR_OPTIONS['flags'] and v:
 41 |             output.append('-{}'.format(k))
 42 | 
 43 |         elif k in OGR2OGR_OPTIONS['positional']:
 44 |             pass
 45 | 
 46 |         else:
 47 |             if v:
 48 |                 if v.startswith("'") and v.endswith("'"):
 49 |                     f = '{} AS {}'
 50 |                 else:
 51 |                     f = 'a.{} AS {}'
 52 | 
 53 |                 fields.append(f.format(v, k))
 54 | 
 55 |     # Finally, generate the -sql flag
 56 |     if fields:
 57 |         if entry['layer'] == '':
 58 |             del entry['layer']
 59 |         layer = entry.get('layer', path.splitext(path.basename(entry.get('src_datasource_name')))[0])
 60 |         sql = ['-sql', '"SELECT {} FROM \\"{}\\" a"'.format(', '.join(fields), layer)]
 61 |         output.extend(sql)
 62 | 
 63 |     return output
 64 | 
 65 | 
 66 | def main():
 67 |     parser = argparse.ArgumentParser(
 68 |         description="Construct an ogr2ogr command that maps field names based on a CSV.",
 69 |         epilog="Additional arguments are passed to ogr2ogr."
 70 |     )
 71 |     parser.add_argument('csvfile', help=(
 72 |         'Contains columns that match ogr2ogr import options. '
 73 |         'Any unrecognized columns will be used in sql statement, e.g. SELECT value AS column'
 74 |     ))
 75 |     parser.add_argument('--dry-run', action='store_true', help='echo command, do not execute')
 76 |     parser.add_argument('--dst-datasource-name', type=str)
 77 |     parser.add_argument('--layer', type=str)
 78 | 
 79 |     args, extra = parser.parse_known_args()
 80 | 
 81 |     kwargs = {}
 82 |     if args.dst_datasource_name:
 83 |         kwargs['dst_datasource_name'] = args.dst_datasource_name
 84 |     if args.layer:
 85 |         kwargs['layer'] = args.layer
 86 | 
 87 |     with open(args.csvfile, 'r') as f:
 88 |         reader = csv.DictReader(f)
 89 |         for row in reader:
 90 |             row.update(kwargs)
 91 |             result = generate(row)
 92 |             extra_interp = [x.format(**row) for x in extra]
 93 |             command = ['ogr2ogr'] + result + extra_interp
 94 | 
 95 |             if args.dry_run:
 96 |                 print(' '.join(command))
 97 |             else:
 98 |                 subprocess.check_call(command)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     main()
103 | 


--------------------------------------------------------------------------------