├── tests ├── __init__.py └── test_breaks.py ├── tox.ini ├── .gitignore ├── Makefile ├── .travis.yml ├── setup.py ├── breaks ├── cli.py └── __init__.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # This file is part of breaks. 5 | # https://github.com/fitnr/breaks 6 | 7 | # Licensed under the GPL license: 8 | # https://opensource.org/licenses/GPL-3.0 9 | # Copyright (c) 2016, Neil Freeman 10 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # This file is part of breaks. 2 | # https://github.com/fitnr/breaks 3 | 4 | # Licensed under the GPL license: 5 | # https://opensource.org/licenses/GPL-3.0 6 | # Copyright (c) 2016, Neil Freeman 7 | 8 | [tox] 9 | envlist = py27, py33, py34 10 | 11 | [testenv] 12 | deps = docutils, coverage 13 | 14 | commands = 15 | python setup.py install 16 | make test 17 | 18 | whitelist_externals = make 19 | -------------------------------------------------------------------------------- /tests/test_breaks.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # This file is part of breaks. 5 | # https://github.com/fitnr/breaks 6 | 7 | # Licensed under the GPL license: 8 | # https://opensource.org/licenses/GPL-3.0 9 | # Copyright (c) 2016, Neil Freeman 10 | 11 | import unittest 12 | import breaks 13 | 14 | 15 | class TestCase(unittest.TestCase): 16 | 17 | def testBreaks(self): 18 | assert breaks.bisect([1, 10, 20, 30], 25) == 3 19 | assert breaks.bisect([1, 10, 20, 30], None) is None 20 | 21 | if __name__ == '__main__': 22 | unittest.main() 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # This file is part of breaks. 2 | # https://github.com/fitnr/breaks 3 | 4 | # Licensed under the GPL license: 5 | # https://opensource.org/licenses/GPL-3.0 6 | # Copyright (c) 2016, Neil Freeman 7 | 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | bin/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | .eggs 29 | 30 | # Installer logs 31 | pip-log.txt 32 | pip-delete-this-directory.txt 33 | 34 | # Unit test / coverage reports 35 | htmlcov/ 36 | .tox/ 37 | .coverage 38 | .cache 39 | 40 | # Sphinx documentation 41 | docs/_build/ 42 | 43 | *.css 44 | *.cpg 45 | *.dbf 46 | *.iso.xml 47 | *.prj 48 | *.sh[xp] 49 | *.svg 50 | *.png 51 | *.zip 52 | README.rst 53 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # This file is part of breaks. 2 | # https://github.com/fitnr/breaks 3 | 4 | # Licensed under the GPL license: 5 | # https://opensource.org/licenses/GPL-3.0 6 | # Copyright (c) 2016, Neil Freeman 7 | 8 | CPL_MAX_ERROR_REPORTS = 0 9 | export CPL_MAX_ERROR_REPORTS 10 | 11 | TIGER = http://www2.census.gov/geo/tiger 12 | 13 | .PHONY: test deploy 14 | 15 | test: 16 | coverage run --include='breaks/*' setup.py test 17 | coverage report 18 | coverage html 19 | 20 | bins.json bins.shp: State_2010Census_DP1.shp 21 | breaks $< DP0180001 $@ 22 | 23 | State_2010Census_DP1.shp: State_2010Census_DP1.zip 24 | unzip -qod . $< '$(basename $<).*' 25 | @touch $@ 26 | 27 | State_2010Census_DP1.zip: ; curl -O $(TIGER)/TIGER2010DP1/$@ 28 | 29 | deploy: README.rst 30 | python setup.py register 31 | git push 32 | git push --tags 33 | rm -rf dist build 34 | python3 setup.py bdist_wheel --universal 35 | twine upload dist/* 36 | 37 | README.rst: README.md 38 | - pandoc $< -o $@ 39 | @touch $@ 40 | python setup.py check -r -s -m -q 41 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This file is part of breaks. 2 | # https://github.com/fitnr/breaks 3 | 4 | # Licensed under the GPL license: 5 | # https://opensource.org/licenses/GPL-3.0 6 | # Copyright (c) 2016, Neil Freeman 7 | 8 | language: python 9 | 10 | python: 11 | - 2.7 12 | - 3.3 13 | - 3.5 14 | 15 | os: 16 | - linux 17 | 18 | before_install: 19 | - gcc --version 20 | - pip install -U pip docutils coverage 21 | - if [[ $TRAVIS_OS_NAME == "osx" ]]; then brew update; fi 22 | - if [[ $TRAVIS_OS_NAME == "osx" ]]; then brew list gdal || brew install gdal; fi 23 | - if [[ $TRAVIS_OS_NAME == "osx" ]]; then brew list scipy || brew install scipy --with-openblas; fi 24 | - if [[ $TRAVIS_OS_NAME == "linux" ]]; then sudo apt-get -q update; fi 25 | - if [[ $TRAVIS_OS_NAME == "linux" ]]; then sudo apt-get -q install -y libgdal1-dev libblas-dev liblapack-dev gfortran; fi 26 | - if [[ $TRAVIS_OS_NAME == "linux" ]]; then sudo apt-get -q install -y python-numpy python-scipy; fi 27 | 28 | install: 29 | - python setup.py install 30 | 31 | script: 32 | - make test 33 | - make bins.shp 34 | - make bins.json 35 | 36 | after_script: 37 | - grep bins bins.json > /dev/null && echo bins 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # This file is part of breaks. 5 | # https://github.com/fitnr/breaks 6 | 7 | # Licensed under the GPL license: 8 | # https://opensource.org/licenses/GPL-3.0 9 | # Copyright (c) 2016, Neil Freeman 10 | 11 | from setuptools import setup 12 | 13 | try: 14 | readme = open('README.rst').read() 15 | except IOError: 16 | readme = '' 17 | 18 | with open('breaks/__init__.py') as i: 19 | version = next(r for r in i.readlines() if '__version__' in r).split('=')[1].strip('"\' \n') 20 | 21 | setup( 22 | name='breaks', 23 | version=version, 24 | description='calculate bins on spatial data', 25 | long_description=readme, 26 | keywords='gis geodata chloropleth', 27 | author='fitnr', 28 | author_email='contact@fakeisthenewreal.org', 29 | url='https://github.com/fitnr/breaks', 30 | license='GPL', 31 | classifiers=[ 32 | 'Development Status :: 4 - Beta', 33 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 34 | 'Natural Language :: English', 35 | 'Programming Language :: Python :: 2.7', 36 | 'Programming Language :: Python :: 3.3', 37 | 'Programming Language :: Python :: 3.4', 38 | 'Operating System :: OS Independent', 39 | ], 40 | packages=['breaks'], 41 | include_package_data=False, 42 | install_requires=[ 43 | 'numpy >=1.10.4, <1.11', 44 | 'scipy >=0.15.1, <0.20', 45 | 'pysal >=1.11.0, <1.12', 46 | 'fiona >=1.6.0, <2.0', 47 | 'fionautil >=0.5.1, <0.6.0', 48 | 'click >=6.2, <7', 49 | ], 50 | entry_points={ 51 | 'console_scripts': [ 52 | 'breaks=breaks.cli:main', 53 | ], 54 | }, 55 | test_suite='tests', 56 | zip_safe=True, 57 | ) 58 | -------------------------------------------------------------------------------- /breaks/cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # This file is part of breaks. 5 | # https://github.com/fitnr/breaks 6 | 7 | # Licensed under the GPL license: 8 | # https://opensource.org/licenses/GPL-3.0 9 | # Copyright (c) 2016, Neil Freeman 10 | from __future__ import print_function 11 | import sys 12 | from os import environ 13 | import click 14 | from . import __version__, LOWER_METHODS, breaks 15 | 16 | 17 | @click.command() 18 | @click.argument('infile', metavar='input', type=click.Path(exists=True)) 19 | @click.argument('data-field', type=str, metavar='data-field') 20 | @click.argument('outfile', metavar='output', type=click.Path(writable=True, allow_dash=True)) 21 | @click.option('-m', '--method', metavar='METHOD', default='quantiles', type=click.Choice(LOWER_METHODS), 22 | help='Binning method:\n' + '\n'.join(LOWER_METHODS) + ' (default)') 23 | @click.option('-b', '--bin-field', type=str, metavar='FIELD', default='bin', help='name of new field') 24 | @click.option('-n', '--norm-field', type=str, metavar='FIELD', default=None, 25 | help='Normalize (divide) bin-field by this name field') 26 | @click.option('-k', type=int, metavar='COUNT', default=5, help='Number of bins (default: 5)') 27 | @click.option('-B', '--bins', type=str, help='Comma-separated list of breaks (a series of upper-bounds)') 28 | @click.option('-i', '--id-field', type=str, metavar='FIELD', default=None, 29 | help='If given, only write this field, bin-field, and data-field (and norm-field, if given).') 30 | @click.version_option(version=__version__, message='%(prog)s %(version)s') 31 | def main(infile, outfile, **kwargs): 32 | '''Write a geodata file with bins based on a data field.''' 33 | # Set OGR error reporting limit 34 | environ['CPL_MAX_ERROR_REPORTS'] = environ.get('CPL_MAX_ERROR_REPORTS', '5') 35 | 36 | result = breaks(infile, outfile, **kwargs) 37 | 38 | print(result, file=sys.stderr) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # breaks 2 | 3 | Command line tool for adding data classes to geodata files. 4 | 5 | Built on top of [Fiona](http://toblerity.org/fiona/README.html) and [Pysal](http://pysal.readthedocs.org/en/latest/) [mapclassify](http://pysal.org/1.2/library/esda/mapclassify.html). 6 | 7 | ## Install 8 | 9 | Requires [GDAL](http://gdal.org). [Numpy](http://www.numpy.org) and Pysal will be installed if not available. 10 | 11 | ``` 12 | pip install breaks 13 | ``` 14 | 15 | ## These are the breaks 16 | ```` 17 | Usage: breaks [OPTIONS] input data-field output 18 | 19 | Write a geodata file with bins based on a data field 20 | 21 | Options: 22 | -m, --method METHOD Binning method: 23 | equal_interval 24 | fisher_jenks 25 | jenks_caspall 26 | jenks_caspall_forced 27 | jenks_caspall_sampled 28 | max_p_classifier 29 | maximum_breaks 30 | natural_breaks 31 | quantiles (default) 32 | -b, --bin-field FIELD name of new field 33 | -n, --norm-field FIELD Normalize (divide) bin-field by this name field 34 | -k COUNT Number of bins (default: 5) 35 | -B, --bins TEXT Comma-separated list of breaks (a series of upper- 36 | bounds) 37 | -i, --id-field FIELD If given, only write this field, bin-field, and 38 | data-field (and norm-field, if given). 39 | --version Show the version and exit. 40 | --help Show this message and exit. 41 | ```` 42 | 43 | Add quintile bins on 'fieldname': 44 | ``` 45 | breaks data.shp fieldname data_binned.shp 46 | ``` 47 | 48 | This writes a file called `data_binned.shp` which is a copy of `data.shp`, but with an additional `bin` field, which contains a number from 0 to 4 (it will contain `NULL` values for rows with missing data). 49 | 50 | Add decile bins on 'fieldname': 51 | ``` 52 | breaks -k 10 data.shp fieldname data_binned.geojson 53 | ``` 54 | 55 | Add five [Fisher-Jenks](https://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization) bins on 'fieldname': 56 | ``` 57 | breaks --method fisher_jenks data.json fieldname data_binned.json 58 | ``` 59 | 60 | Add decile bins on 'fieldname' to a field called 'mybin': 61 | ``` 62 | breaks --bin-field mybin data.geojson fieldname data_binned.geojson 63 | ``` 64 | 65 | Divide one field by another. If you have more complicated manipulations you would like to work, alter your data with `ogr2ogr` or another tool. 66 | ``` 67 | # Calculates bins for population / area 68 | breaks data.geojson population data_binned.geojson --norm-field area 69 | ``` 70 | 71 | Add custom bins on 'fieldname': 72 | ``` 73 | breaks --bins 50,75,150,250,500 data.geojson fieldname data_binned.shp 74 | ``` 75 | (Give the upper-bounds as a comma-separated list.) 76 | 77 | Only write an ID field and the data fields: 78 | ``` 79 | breaks --id-field GEOID data.shp field1 data_lookup.shp 80 | ``` 81 | 82 | ## License 83 | 84 | Copyright 2016 Neil Freeman. Available under the GNU Public License. 85 | -------------------------------------------------------------------------------- /breaks/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # This file is part of breaks. 5 | # https://github.com/fitnr/breaks 6 | 7 | # Licensed under the GPL license: 8 | # https://opensource.org/licenses/GPL-3.0 9 | # Copyright (c) 2016, Neil Freeman 10 | 11 | from bisect import bisect_left 12 | from collections import OrderedDict 13 | import numpy as np 14 | import fiona 15 | import fionautil.drivers 16 | from pysal.esda import mapclassify 17 | 18 | __version__ = '0.2.0' 19 | 20 | LOWER_METHODS = ( 21 | 'equal_interval', 22 | 'fisher_jenks', 23 | 'jenks_caspall', 24 | 'jenks_caspall_forced', 25 | 'jenks_caspall_sampled', 26 | 'max_p_classifier', 27 | 'maximum_breaks', 28 | 'natural_breaks', 29 | 'quantiles', 30 | ) 31 | 32 | 33 | def bisect(bins, value): 34 | '''Bisect left, returning None if value is None.''' 35 | if value is None: 36 | return None 37 | return bisect_left(bins, value) 38 | 39 | 40 | def write(outfile, features, **kwargs): 41 | '''Use Fiona to write features to . Kewyord args should be Fiona meta arguments.''' 42 | kwargs['driver'] = fionautil.drivers.from_path(outfile) 43 | with fiona.open(outfile, 'w', **kwargs) as sink: 44 | sink.writerecords(features) 45 | 46 | 47 | def getter(data_field, norm_field=None): 48 | '''Returns a function for getting data value from a feature.''' 49 | if norm_field: 50 | def get(f): 51 | try: 52 | return float(f['properties'][data_field]) / float(f['properties'][norm_field]) 53 | except TypeError: 54 | return None 55 | else: 56 | def get(f): 57 | return f['properties'][data_field] 58 | 59 | return get 60 | 61 | 62 | def setter(bins, data_field, bin_field, **kwargs): 63 | '''Returns a function for creating an output feature.''' 64 | norm_field = kwargs.get('norm_field') 65 | id_field = kwargs.get('id_field') 66 | 67 | get = getter(data_field, norm_field) 68 | 69 | def _set(feature): 70 | f = { 71 | 'properties': {}, 72 | 'geometry': feature['geometry'] 73 | } 74 | 75 | if id_field: 76 | f['properties'][id_field] = feature['properties'][id_field] 77 | f['properties'][data_field] = feature['properties'][data_field] 78 | 79 | if norm_field: 80 | f['properties'][norm_field] = feature['properties'][norm_field] 81 | else: 82 | f['properties'] = feature['properties'] 83 | 84 | f['properties'][bin_field] = bisect(bins, get(feature)) 85 | 86 | return f 87 | 88 | return _set 89 | 90 | 91 | def binfeatures(features, method, data_field, k, bin_field=None, **kwargs): 92 | '''Classify input features according to ''' 93 | bin_field = 'bin' or bin_field 94 | 95 | if kwargs.get('bins'): 96 | method = 'User_Defined' 97 | k = kwargs.pop('bins') 98 | 99 | classify = getattr(mapclassify, method) 100 | 101 | get = getter(data_field, kwargs.get('norm_field')) 102 | 103 | data = (get(f) for f in features) 104 | 105 | return classify(np.array([d for d in data if d is not None]), k) 106 | 107 | 108 | def get_features(infile, fields=None): 109 | ''' 110 | Return the features of . Includes error checking that given fields exist. 111 | 112 | Args: 113 | infile (str): path 114 | fields (Sequence/Generator): Check that these fields exist in . 115 | Raises ValueError if one doesn't appear. 116 | 117 | Returns: 118 | (tuple) list of features and Fiona metadata for 119 | ''' 120 | fields = fields or [] 121 | with fiona.drivers(): 122 | with fiona.open(infile) as source: 123 | try: 124 | for f in fields: 125 | assert f in source.schema['properties'] 126 | except AssertionError: 127 | raise ValueError('field not found in {}: {}'.format(infile, f)) 128 | 129 | meta = { 130 | 'schema': source.schema, 131 | 'crs': source.crs, 132 | } 133 | 134 | features = list(source) 135 | 136 | return features, meta 137 | 138 | 139 | def breaks(infile, outfile, method, data_field, **kwargs): 140 | ''' 141 | Calculate bins on via , writing result to . 142 | This is essentially a wrapper for what the breaks CLI does. 143 | 144 | Args: 145 | infile (str): path to input file 146 | outfile (str): path to output file 147 | method (str): a valid pysal.esda.mapclassify method 148 | data_field (str): field in to read 149 | k (int): number of bins to create (default: 5) 150 | bin_field (str): field in to create (default: bin) 151 | bins (list): Upper bounds of bins to use in User_Defined classifying. 152 | Overrides method and k. 153 | norm_field (str): Field to divide data_field by (both will be coerced to float). 154 | 155 | Returns: 156 | mapclassify bins instance 157 | ''' 158 | if kwargs.get('bins'): 159 | kwargs['bins'] = sorted(float(x) for x in kwargs['bins'].split(',')) 160 | 161 | bin_field = kwargs.pop('bin_field', 'bin') 162 | id_field = kwargs.get('id_field') 163 | norm_field = kwargs.get('norm_field') 164 | kwargs['k'] = kwargs.get('k', 5) 165 | 166 | fields = [f for f in (data_field, id_field, norm_field) if f is not None] 167 | features, meta = get_features(infile, fields) 168 | 169 | if id_field: 170 | p = meta['schema']['properties'] 171 | meta['schema']['properties'] = OrderedDict((k, v) for k, v in p.items() if k in fields) 172 | 173 | meta['schema']['properties'][bin_field] = 'int' 174 | 175 | classes = binfeatures(features, method.title(), data_field, **kwargs) 176 | 177 | create = setter(classes.bins, data_field, bin_field, id_field=id_field, norm_field=norm_field) 178 | 179 | new_features = (create(f) for f in features) 180 | write(outfile, new_features, **meta) 181 | 182 | return classes 183 | --------------------------------------------------------------------------------