├── AUTHORS ├── requirements.txt ├── MANIFEST.in ├── .gitignore ├── examples └── nepal.csv ├── CHANGELOG ├── COPYING ├── setup.py ├── test.yaml ├── README.md └── mapturner └── __init__.py /AUTHORS: -------------------------------------------------------------------------------- 1 | * Christopher Groskopf 2 | * Tyler Fisher 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests>=2.5.0 2 | envoy>=0.0.3 3 | pyyaml>=3.11 4 | tqdm>=4.8.4 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include AUTHORS 2 | include CHANGELOG 3 | include COPYING 4 | include README.md 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | .DS_Store 4 | build 5 | *.egg-info 6 | reference 7 | dist 8 | *.swo 9 | docs/_build 10 | .coverage 11 | .tox 12 | cover 13 | -------------------------------------------------------------------------------- /examples/nepal.csv: -------------------------------------------------------------------------------- 1 | date,intensity,latitude,longitude 5/12/15,6.3,27.618,86.166 5/12/15,7.3,27.837,86.077 4/26/15,6.7,27.782,85.997 4/25/15,6.6,28.193,84.865 4/25/15,7.8,28.147,84.708 -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 0.2.0 2 | ----- 3 | 4 | * Added complete list of configuration parameters to documentation. 5 | * `bbox` and `where` queries now apply to `csv` layers. 6 | * `bbox` and `where` queries now apply to `json` layers. 7 | * Added progress bar (of sorts) when downloading files. 8 | * Improved console output. 9 | * Added location of downloaded data to documentation. 10 | * Added Python 3.4 and 3.5 support. 11 | * Remove Python 2.6 support. 12 | 13 | 0.1.0 14 | ----- 15 | 16 | * Initial version. 17 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2016 Christopher Groskopf and contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | from setuptools import setup 5 | 6 | install_requires = [ 7 | 'requests>=2.5.0', 8 | 'envoy>=0.0.3', 9 | 'pyyaml>=3.11', 10 | 'tqdm>=4.8.4' 11 | ] 12 | 13 | setup( 14 | name='mapturner', 15 | version='0.2.0', 16 | description='A command line utility for compiling map data.', 17 | long_description=open('README.md').read(), 18 | author='Christopher Groskopf', 19 | author_email='chrisgroskopf@gmail.com', 20 | url='https://github.com/nprapps/mapturner', 21 | license='MIT', 22 | classifiers=[ 23 | 'Development Status :: 2 - Beta', 24 | 'Environment :: Console', 25 | 'Intended Audience :: Developers', 26 | 'License :: OSI Approved :: MIT License', 27 | 'Natural Language :: English', 28 | 'Operating System :: OS Independent', 29 | 'Programming Language :: Python', 30 | 'Programming Language :: Python :: 2.7', 31 | 'Programming Language :: Python :: 3.4', 32 | 'Programming Language :: Python :: 3.5', 33 | 'Topic :: Software Development :: Libraries :: Python Modules', 34 | 'Topic :: Utilities' 35 | ], 36 | packages=[ 37 | 'mapturner' 38 | ], 39 | entry_points={ 40 | 'console_scripts': [ 41 | 'mapturner = mapturner:_main' 42 | ] 43 | }, 44 | install_requires=install_requires 45 | ) 46 | -------------------------------------------------------------------------------- /test.yaml: -------------------------------------------------------------------------------- 1 | bbox: '77.25 24.28 91.45 31.5' 2 | layers: 3 | countries: 4 | type: 'shp' 5 | path: 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' 6 | id-property: 'NAME' 7 | properties: 8 | - 'country=NAME' 9 | cities: 10 | type: 'shp' 11 | path: 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places_simple.zip' 12 | id-property: 'name' 13 | properties: 14 | - 'featurecla' 15 | - 'city=name' 16 | where: adm0name = 'Nepal' AND scalerank < 8 17 | 18 | neighbors: 19 | type: 'shp' 20 | path: 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places_simple.zip' 21 | id-property: 'name' 22 | properties: 23 | - 'featurecla' 24 | - 'city=name' 25 | where: adm0name != 'Nepal' AND scalerank <= 2 26 | 27 | lakes: 28 | type: 'shp' 29 | path: 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_lakes.zip' 30 | 31 | rivers: 32 | type: 'shp' 33 | path: 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/physical/ne_10m_rivers_lake_centerlines.zip' 34 | where: featurecla = 'River' AND scalerank < 8 35 | 36 | quakes: 37 | type: 'csv' 38 | path: 'examples/nepal.csv' 39 | all-properties: True 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mapturner 2 | 3 | A command line utility for generating consolidated [TopoJSON](https://github.com/mbostock/topojson/wiki/Command-Line-Reference) from various data sources. Used for making fast vector maps with D3. 4 | 5 | Important links: 6 | 7 | * Repository: https://github.com/nprapps/mapturner 8 | * Issues: https://github.com/nprapps/mapturner/issues 9 | 10 | ## Install 11 | 12 | You will need to have the following non-Python dependencies installed: 13 | 14 | * ogr2ogr (GDAL): `brew install ogr2ogr` 15 | * topojson: `npm install topojson@1.6.27` (topojson 2+ does not work) 16 | 17 | mapturner itself can be installed with pip: 18 | 19 | ``` 20 | pip install mapturner 21 | ``` 22 | 23 | Developer install process: 24 | 25 | ``` 26 | git clone git://github.com/nprapps/mapturner.git 27 | cd mapturner 28 | mkvirtualenv mapturner 29 | 30 | pip install -r requirements.txt 31 | 32 | python setup.py develop 33 | ``` 34 | 35 | ## Usage 36 | 37 | Define a YAML configuration file, such as the following example. The complete list of valid options is further on in this documentation. 38 | 39 | ``` 40 | bbox: '77.25 24.28 91.45 31.5' 41 | layers: 42 | countries: 43 | type: 'shp' 44 | path: 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_admin_0_countries.zip' 45 | id-property: 'NAME' 46 | properties: 47 | - 'country=NAME' 48 | 49 | cities: 50 | type: 'shp' 51 | path: 'http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/10m/cultural/ne_10m_populated_places_simple.zip' 52 | id-property: 'name' 53 | properties: 54 | - 'featurecla' 55 | - 'city=name' 56 | where: adm0name = 'Nepal' AND scalerank < 8 57 | 58 | quakes: 59 | type: 'csv' 60 | path: 'examples/nepal.csv' 61 | all-properties: True 62 | ``` 63 | 64 | Then run it! 65 | 66 | ``` 67 | mapturner input.yaml output.json 68 | ``` 69 | 70 | The following layer types are currently supported: 71 | 72 | * ESRI Shapefile (`shp`) 73 | * GeoJSON or TopoJSON (`json`) 74 | * CSV (`csv`) 75 | 76 | ## How it works 77 | 78 | For each layer defined in the configuration file: 79 | 80 | * If path is a URL the file will be downloaded and cached locally. (It will not be redownloaded on subsequent runs.) 81 | * If path is to a zipped file it will be unzipped. 82 | * All layers will be clipped to the specified bounding box (using ogr2ogr). 83 | * For each layer, if a `where` attribute is specified, the layer data will be filtered by that clause. 84 | * For each layer, all fields in the layer *not* specified in the `properties` array will be removed (to reduce file size), unless `all-properties` is specified, in which case all will be kept. 85 | * For each layer, if an `id-property` is set, data from that property will be set as the identifier for the features in the layer. 86 | * The layer will be converted to [TopoJSON](https://github.com/mbostock/topojson/wiki/Command-Line-Reference). 87 | 88 | After each layer has been processed all of them will be concatenated into a single TopoJSON file. Each layer's key name will be used to identify it in the output. 89 | 90 | ## Complete list of configuration options 91 | 92 | For all layer types: 93 | 94 | * `type`: The type of layer. Valid types are `shp`, `json` (GeoJSON or TopoJSON), and `csv`. **(Required)** 95 | * `path`: The path (relative or absolute) to the layer data file. **(Required)** 96 | * `id-property`: A property from the data file to use as the unique identifier for features. See also, [the TopoJSON command-line documentation](https://github.com/mbostock/topojson/wiki/Command-Line-Reference). 97 | * `properties`: A list of properties from the data to be kept in the output. All other properties are dropped. 98 | * `all-properties`: If true, then all properties are kept for this layer. 99 | * `where`: A SQL-like query predicate that will filter the feature data. This This uses exactly the same query syntax as [ogr2ogr](http://www.gdal.org/ogr2ogr.html). 100 | 101 | CSV layers only: 102 | 103 | * `latitude`: The name of a column in the data containing the latitude of the point/feature. 104 | * `longitude`: The name of a column in the data containing the longitude of the point/feature. 105 | 106 | ## Cached data 107 | 108 | Cached shapefiles are storied in `~/.mapturner`. You may wish to clear this folder periodically to free up space and ensure updated shapefiles are redownloaded. 109 | -------------------------------------------------------------------------------- /mapturner/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import os 5 | import re 6 | import shutil 7 | import sys 8 | import zipfile 9 | 10 | import envoy 11 | import requests 12 | from tqdm import tqdm 13 | import yaml 14 | 15 | 16 | ROOT_DIRECTORY = os.path.expanduser('~/.mapturner') 17 | DATA_DIRECTORY = os.path.join(ROOT_DIRECTORY, 'data') 18 | TEMP_DIRECTORY = os.path.join(ROOT_DIRECTORY, 'tmp') 19 | 20 | SUPPORTED_FILE_TYPES = ['shp', 'json', 'csv'] 21 | 22 | VRT_TEMPLATE = """ 23 | 24 | 25 | %(source)s 26 | wkbPoint 27 | WGS84 28 | 29 | 30 | 31 | """ 32 | 33 | class MapTurner(object): 34 | """ 35 | A command line utility for generating data for locator maps. 36 | """ 37 | def __init__(self): 38 | """ 39 | Setup and parse command line arguments. 40 | """ 41 | self._install_exception_handler() 42 | 43 | self.argparser = argparse.ArgumentParser( 44 | description='A command line utility for generating data for locator maps.' 45 | ) 46 | 47 | self.argparser.add_argument( 48 | dest='config', action='store', 49 | help='Path to YAML configuration file.' 50 | ) 51 | 52 | self.argparser.add_argument( 53 | dest='output_path', action='store', 54 | help='Path to save save TopoJSON file.' 55 | ) 56 | 57 | self.argparser.add_argument( 58 | '-r', '--redownload', 59 | dest='redownload', action='store_true', 60 | help='Redownload all cached files from urls.' 61 | ) 62 | 63 | self.argparser.add_argument( 64 | '-v', '--verbose', 65 | dest='verbose', action='store_true', 66 | help='Print detailed tracebacks when errors occur.' 67 | ) 68 | 69 | self.args = self.argparser.parse_args() 70 | 71 | # Verify mapturner directories exists 72 | if not os.path.exists(DATA_DIRECTORY): 73 | os.makedirs(DATA_DIRECTORY) 74 | 75 | if not os.path.exists(TEMP_DIRECTORY): 76 | os.makedirs(TEMP_DIRECTORY) 77 | 78 | # Load configuration file 79 | with open(self.args.config, 'r') as f: 80 | self.config = yaml.load(f) 81 | 82 | geojson_paths = [] 83 | 84 | # Process layers 85 | for name, layer in self.config['layers'].items(): 86 | if 'path' not in layer: 87 | raise ValueError('Path missing for layer: %s\n' % name) 88 | return 89 | 90 | layer_path = self.get_real_layer_path(layer['path']) 91 | 92 | sys.stdout.write('Layer: %s\n' % name) 93 | 94 | if layer['type'] not in SUPPORTED_FILE_TYPES: 95 | raise ValueError('Unsupported layer type: %s\n' % layer['type']) 96 | 97 | if layer['type'] in ['shp', 'json']: 98 | input_path = layer_path 99 | elif layer['type'] == 'csv': 100 | input_path = self.create_vrt(name, layer_path, layer) 101 | 102 | geojson_path = self.process_ogr2ogr(name, layer, input_path) 103 | geojson_paths.append(self.process_topojson(name, layer, geojson_path)) 104 | 105 | # Merge layers 106 | self.merge(geojson_paths) 107 | 108 | shutil.rmtree(TEMP_DIRECTORY) 109 | 110 | def _install_exception_handler(self): 111 | """ 112 | Installs a replacement for sys.excepthook, which handles pretty-printing uncaught exceptions. 113 | """ 114 | def handler(t, value, traceback): 115 | if self.args.verbose: 116 | sys.__excepthook__(t, value, traceback) 117 | else: 118 | sys.stderr.write('%s\n' % str(value).encode('utf-8')) 119 | 120 | sys.excepthook = handler 121 | 122 | def get_real_layer_path(self, path): 123 | """ 124 | Get the path the actual layer file. 125 | """ 126 | filename = path.split('/')[-1] 127 | local_path = path 128 | filetype = os.path.splitext(filename)[1] 129 | 130 | # Url 131 | if re.match(r'^[a-zA-Z]+://', path): 132 | local_path = os.path.join(DATA_DIRECTORY, filename) 133 | 134 | if not os.path.exists(local_path): 135 | sys.stdout.write('* Downloading %s...\n' % filename) 136 | self.download_file(path, local_path) 137 | elif self.args.redownload: 138 | os.remove(local_path) 139 | 140 | sys.stdout.write('* Redownloading %s...\n' % filename) 141 | self.download_file(path, local_path) 142 | # Non-existant file 143 | elif not os.path.exists(local_path): 144 | raise Exception('%s does not exist' % local_path) 145 | 146 | real_path = path 147 | 148 | # Zip files 149 | if filetype == '.zip': 150 | slug = os.path.splitext(filename)[0] 151 | real_path = os.path.join(DATA_DIRECTORY, slug) 152 | 153 | if not os.path.exists(real_path): 154 | sys.stdout.write('* Unzipping...\n') 155 | self.unzip_file(local_path, real_path) 156 | 157 | return real_path 158 | 159 | def download_file(self, url, local_path): 160 | """ 161 | Download a file from a remote host. 162 | """ 163 | response = requests.get(url, stream=True) 164 | 165 | with open(local_path, 'wb') as f: 166 | for chunk in tqdm(response.iter_content(chunk_size=1024), unit='KB'): 167 | if chunk: # filter out keep-alive new chunks 168 | f.write(chunk) 169 | f.flush() 170 | 171 | def unzip_file(self, zip_path, output_path): 172 | """ 173 | Unzip a local file into a specified directory. 174 | """ 175 | with zipfile.ZipFile(zip_path, 'r') as z: 176 | z.extractall(output_path) 177 | 178 | def create_vrt(self, layer_name, layer_path, layer): 179 | vrt_path = os.path.join(TEMP_DIRECTORY, '%s.vrt' % layer_name) 180 | 181 | vrt_body = VRT_TEMPLATE % { 182 | 'name': layer_name, 183 | 'source': layer_path, 184 | 'latitude': layer.get('latitude', 'latitude'), 185 | 'longitude': layer.get('longitude', 'longitude') 186 | } 187 | 188 | with open(vrt_path, 'w') as f: 189 | f.write(vrt_body) 190 | 191 | return vrt_path 192 | 193 | def process_ogr2ogr(self, name, layer, input_path): 194 | """ 195 | Process a layer using ogr2ogr. 196 | """ 197 | output_path = os.path.join(TEMP_DIRECTORY, '%s.json' % name) 198 | 199 | if os.path.exists(output_path): 200 | os.remove(output_path) 201 | 202 | ogr2ogr_cmd = [ 203 | 'ogr2ogr', 204 | '-f', 'GeoJSON', 205 | '-clipsrc', self.config['bbox'] 206 | ] 207 | 208 | if 'where' in layer: 209 | ogr2ogr_cmd.extend([ 210 | '-where', '"%s"' % layer['where'] 211 | ]) 212 | 213 | ogr2ogr_cmd.extend([ 214 | output_path, 215 | input_path 216 | ]) 217 | 218 | sys.stdout.write('* Running ogr2ogr\n') 219 | 220 | if self.args.verbose: 221 | sys.stdout.write(' %s\n' % ' '.join(ogr2ogr_cmd)) 222 | 223 | r = envoy.run(' '.join(ogr2ogr_cmd)) 224 | 225 | if r.status_code != 0: 226 | sys.stderr.write(r.std_err) 227 | 228 | return output_path 229 | 230 | def process_topojson(self, name, layer, input_path): 231 | """ 232 | Process layer using topojson. 233 | """ 234 | output_path = os.path.join(TEMP_DIRECTORY, '%s.topojson' % name) 235 | 236 | topo_cmd = [ 237 | 'topojson', 238 | '-o', output_path 239 | ] 240 | 241 | if 'id-property' in layer: 242 | topo_cmd.extend([ 243 | '--id-property', layer['id-property'] 244 | ]) 245 | 246 | if layer.get('all-properties', False): 247 | topo_cmd.append('-p') 248 | elif 'properties' in layer: 249 | topo_cmd.extend([ 250 | '-p', ','.join(layer['properties']) 251 | ]) 252 | 253 | topo_cmd.extend([ 254 | '--', 255 | input_path 256 | ]) 257 | 258 | sys.stdout.write('* Running TopoJSON\n') 259 | 260 | if self.args.verbose: 261 | sys.stdout.write(' %s\n' % ' '.join(topo_cmd)) 262 | 263 | r = envoy.run(' '.join(topo_cmd)) 264 | 265 | if r.status_code != 0: 266 | sys.stderr.write(r.std_err) 267 | 268 | return output_path 269 | 270 | def merge(self, paths): 271 | """ 272 | Merge data layers into a single topojson file. 273 | """ 274 | merge_cmd = 'topojson -o %(output_path)s --bbox -p -- %(paths)s' % { 275 | 'output_path': self.args.output_path, 276 | 'paths': ' '.join(paths) 277 | } 278 | 279 | sys.stdout.write('Merging layers\n') 280 | 281 | if self.args.verbose: 282 | sys.stdout.write(' %s\n' % merge_cmd) 283 | 284 | r = envoy.run(merge_cmd) 285 | 286 | if r.status_code != 0: 287 | sys.stderr.write(r.std_err) 288 | 289 | 290 | def _main(): 291 | MapTurner() 292 | 293 | if __name__ == "__main__": 294 | _main() 295 | --------------------------------------------------------------------------------