├── tests
    ├── __init__.py
    └── test_breaks.py
├── tox.ini
├── .gitignore
├── Makefile
├── .travis.yml
├── setup.py
├── breaks
    ├── cli.py
    └── __init__.py
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # This file is part of breaks.
 5 | # https://github.com/fitnr/breaks
 6 | 
 7 | # Licensed under the GPL license:
 8 | # https://opensource.org/licenses/GPL-3.0
 9 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
10 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # This file is part of breaks.
 2 | # https://github.com/fitnr/breaks
 3 | 
 4 | # Licensed under the GPL license:
 5 | # https://opensource.org/licenses/GPL-3.0
 6 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
 7 | 
 8 | [tox]
 9 | envlist = py27, py33, py34
10 | 
11 | [testenv]
12 | deps = docutils, coverage
13 | 
14 | commands =
15 |     python setup.py install
16 |     make test
17 | 
18 | whitelist_externals = make
19 | 


--------------------------------------------------------------------------------
/tests/test_breaks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # This file is part of breaks.
 5 | # https://github.com/fitnr/breaks
 6 | 
 7 | # Licensed under the GPL license:
 8 | # https://opensource.org/licenses/GPL-3.0
 9 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
10 | 
11 | import unittest
12 | import breaks
13 | 
14 | 
15 | class TestCase(unittest.TestCase):
16 | 
17 |     def testBreaks(self):
18 |         assert breaks.bisect([1, 10, 20, 30], 25) == 3
19 |         assert breaks.bisect([1, 10, 20, 30], None) is None
20 | 
21 | if __name__ == '__main__':
22 |     unittest.main()
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # This file is part of breaks.
 2 | # https://github.com/fitnr/breaks
 3 | 
 4 | # Licensed under the GPL license:
 5 | # https://opensource.org/licenses/GPL-3.0
 6 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
 7 | 
 8 | # Byte-compiled / optimized / DLL files
 9 | __pycache__/
10 | *.py[cod]
11 | 
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | bin/
16 | build/
17 | develop-eggs/
18 | dist/
19 | eggs/
20 | lib/
21 | lib64/
22 | parts/
23 | sdist/
24 | var/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | .eggs
29 | 
30 | # Installer logs
31 | pip-log.txt
32 | pip-delete-this-directory.txt
33 | 
34 | # Unit test / coverage reports
35 | htmlcov/
36 | .tox/
37 | .coverage
38 | .cache
39 | 
40 | # Sphinx documentation
41 | docs/_build/
42 | 
43 | *.css
44 | *.cpg
45 | *.dbf
46 | *.iso.xml
47 | *.prj
48 | *.sh[xp]
49 | *.svg
50 | *.png
51 | *.zip
52 | README.rst
53 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # This file is part of breaks.
 2 | # https://github.com/fitnr/breaks
 3 | 
 4 | # Licensed under the GPL license:
 5 | # https://opensource.org/licenses/GPL-3.0
 6 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
 7 | 
 8 | CPL_MAX_ERROR_REPORTS = 0
 9 | export CPL_MAX_ERROR_REPORTS
10 | 
11 | TIGER = http://www2.census.gov/geo/tiger
12 | 
13 | .PHONY: test deploy
14 | 
15 | test:
16 | 	coverage run --include='breaks/*' setup.py test
17 | 	coverage report
18 | 	coverage html
19 | 
20 | bins.json bins.shp: State_2010Census_DP1.shp
21 | 	breaks $< DP0180001 $@
22 | 
23 | State_2010Census_DP1.shp: State_2010Census_DP1.zip
24 | 	unzip -qod . $< '$(basename $<).*'
25 | 	@touch $@
26 | 
27 | State_2010Census_DP1.zip: ; curl -O $(TIGER)/TIGER2010DP1/$@
28 | 
29 | deploy: README.rst
30 | 	python setup.py register
31 | 	git push
32 | 	git push --tags
33 | 	rm -rf dist build
34 | 	python3 setup.py bdist_wheel --universal
35 | 	twine upload dist/*
36 | 
37 | README.rst: README.md
38 | 	- pandoc $< -o $@
39 | 	@touch $@
40 | 	python setup.py check -r -s -m -q
41 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # This file is part of breaks.
 2 | # https://github.com/fitnr/breaks
 3 | 
 4 | # Licensed under the GPL license:
 5 | # https://opensource.org/licenses/GPL-3.0
 6 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
 7 | 
 8 | language: python
 9 | 
10 | python:
11 |     - 2.7
12 |     - 3.3
13 |     - 3.5
14 | 
15 | os:
16 |  - linux
17 | 
18 | before_install:
19 |     - gcc --version
20 |     - pip install -U pip docutils coverage
21 |     - if [[ $TRAVIS_OS_NAME == "osx" ]]; then brew update; fi
22 |     - if [[ $TRAVIS_OS_NAME == "osx" ]]; then brew list gdal || brew install gdal; fi
23 |     - if [[ $TRAVIS_OS_NAME == "osx" ]]; then brew list scipy || brew install scipy --with-openblas; fi
24 |     - if [[ $TRAVIS_OS_NAME == "linux" ]]; then sudo apt-get -q update; fi
25 |     - if [[ $TRAVIS_OS_NAME == "linux" ]]; then sudo apt-get -q install -y libgdal1-dev libblas-dev liblapack-dev gfortran; fi
26 |     - if [[ $TRAVIS_OS_NAME == "linux" ]]; then sudo apt-get -q install -y python-numpy python-scipy; fi
27 | 
28 | install:
29 |     - python setup.py install
30 | 
31 | script:
32 |     - make test
33 |     - make bins.shp
34 |     - make bins.json
35 | 
36 | after_script:
37 |     - grep bins bins.json > /dev/null && echo bins
38 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # This file is part of breaks.
 5 | # https://github.com/fitnr/breaks
 6 | 
 7 | # Licensed under the GPL license:
 8 | # https://opensource.org/licenses/GPL-3.0
 9 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
10 | 
11 | from setuptools import setup
12 | 
13 | try:
14 |     readme = open('README.rst').read()
15 | except IOError:
16 |     readme = ''
17 | 
18 | with open('breaks/__init__.py') as i:
19 |     version = next(r for r in i.readlines() if '__version__' in r).split('=')[1].strip('"\' \n')
20 | 
21 | setup(
22 |     name='breaks',
23 |     version=version,
24 |     description='calculate bins on spatial data',
25 |     long_description=readme,
26 |     keywords='gis geodata chloropleth',
27 |     author='fitnr',
28 |     author_email='contact@fakeisthenewreal.org',
29 |     url='https://github.com/fitnr/breaks',
30 |     license='GPL',
31 |     classifiers=[
32 |         'Development Status :: 4 - Beta',
33 |         'License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
34 |         'Natural Language :: English',
35 |         'Programming Language :: Python :: 2.7',
36 |         'Programming Language :: Python :: 3.3',
37 |         'Programming Language :: Python :: 3.4',
38 |         'Operating System :: OS Independent',
39 |     ],
40 |     packages=['breaks'],
41 |     include_package_data=False,
42 |     install_requires=[
43 |         'numpy >=1.10.4, <1.11',
44 |         'scipy >=0.15.1, <0.20',
45 |         'pysal >=1.11.0, <1.12',
46 |         'fiona >=1.6.0, <2.0',
47 |         'fionautil >=0.5.1, <0.6.0',
48 |         'click >=6.2, <7',
49 |     ],
50 |     entry_points={
51 |         'console_scripts': [
52 |             'breaks=breaks.cli:main',
53 |         ],
54 |     },
55 |     test_suite='tests',
56 |     zip_safe=True,
57 | )
58 | 


--------------------------------------------------------------------------------
/breaks/cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # This file is part of breaks.
 5 | # https://github.com/fitnr/breaks
 6 | 
 7 | # Licensed under the GPL license:
 8 | # https://opensource.org/licenses/GPL-3.0
 9 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
10 | from __future__ import print_function
11 | import sys
12 | from os import environ
13 | import click
14 | from . import __version__, LOWER_METHODS, breaks
15 | 
16 | 
17 | @click.command()
18 | @click.argument('infile', metavar='input', type=click.Path(exists=True))
19 | @click.argument('data-field', type=str, metavar='data-field')
20 | @click.argument('outfile', metavar='output', type=click.Path(writable=True, allow_dash=True))
21 | @click.option('-m', '--method', metavar='METHOD', default='quantiles', type=click.Choice(LOWER_METHODS),
22 |               help='Binning method:\n' + '\n'.join(LOWER_METHODS) + ' (default)')
23 | @click.option('-b', '--bin-field', type=str, metavar='FIELD', default='bin', help='name of new field')
24 | @click.option('-n', '--norm-field', type=str, metavar='FIELD', default=None,
25 |               help='Normalize (divide) bin-field by this name field')
26 | @click.option('-k', type=int, metavar='COUNT', default=5, help='Number of bins (default: 5)')
27 | @click.option('-B', '--bins', type=str, help='Comma-separated list of breaks (a series of upper-bounds)')
28 | @click.option('-i', '--id-field', type=str, metavar='FIELD', default=None,
29 |               help='If given, only write this field, bin-field, and data-field (and norm-field, if given).')
30 | @click.version_option(version=__version__, message='%(prog)s %(version)s')
31 | def main(infile, outfile, **kwargs):
32 |     '''Write a geodata file with bins based on a data field.'''
33 |     # Set OGR error reporting limit
34 |     environ['CPL_MAX_ERROR_REPORTS'] = environ.get('CPL_MAX_ERROR_REPORTS', '5')
35 | 
36 |     result = breaks(infile, outfile, **kwargs)
37 | 
38 |     print(result, file=sys.stderr)
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # breaks
 2 | 
 3 | Command line tool for adding data classes to geodata files.
 4 | 
 5 | Built on top of [Fiona](http://toblerity.org/fiona/README.html) and [Pysal](http://pysal.readthedocs.org/en/latest/) [mapclassify](http://pysal.org/1.2/library/esda/mapclassify.html).
 6 | 
 7 | ## Install
 8 | 
 9 | Requires [GDAL](http://gdal.org). [Numpy](http://www.numpy.org) and Pysal will be installed if not available.
10 | 
11 | ```
12 | pip install breaks
13 | ```
14 | 
15 | ## These are the breaks
16 | ````
17 | Usage: breaks [OPTIONS] input data-field output
18 | 
19 |   Write a geodata file with bins based on a data field
20 | 
21 | Options:
22 |   -m, --method METHOD     Binning method:
23 |                           equal_interval
24 |                           fisher_jenks
25 |                           jenks_caspall
26 |                           jenks_caspall_forced
27 |                           jenks_caspall_sampled
28 |                           max_p_classifier
29 |                           maximum_breaks
30 |                           natural_breaks
31 |                           quantiles (default)
32 |   -b, --bin-field FIELD   name of new field
33 |   -n, --norm-field FIELD  Normalize (divide) bin-field by this name field
34 |   -k COUNT                Number of bins (default: 5)
35 |   -B, --bins TEXT         Comma-separated list of breaks (a series of upper-
36 |                           bounds)
37 |   -i, --id-field FIELD    If given, only write this field, bin-field, and
38 |                           data-field (and norm-field, if given).
39 |   --version               Show the version and exit.
40 |   --help                  Show this message and exit.
41 | ````
42 | 
43 | Add quintile bins on 'fieldname':
44 | ```
45 | breaks data.shp fieldname data_binned.shp
46 | ```
47 | 
48 | This writes a file called `data_binned.shp` which is a copy of `data.shp`, but with an additional `bin` field, which contains a number from 0 to 4 (it will contain `NULL` values for rows with missing data).
49 | 
50 | Add decile bins on 'fieldname':
51 | ```
52 | breaks -k 10 data.shp fieldname data_binned.geojson
53 | ```
54 | 
55 | Add five [Fisher-Jenks](https://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization) bins on 'fieldname':
56 | ```
57 | breaks --method fisher_jenks data.json fieldname data_binned.json
58 | ```
59 | 
60 | Add decile bins on 'fieldname' to a field called 'mybin':
61 | ```
62 | breaks --bin-field mybin data.geojson fieldname data_binned.geojson
63 | ```
64 | 
65 | Divide one field by another. If you have more complicated manipulations you would like to work, alter your data with `ogr2ogr` or another tool. 
66 | ```
67 | # Calculates bins for population / area
68 | breaks data.geojson population data_binned.geojson --norm-field area
69 | ```
70 | 
71 | Add custom bins on 'fieldname':
72 | ```
73 | breaks --bins 50,75,150,250,500 data.geojson fieldname data_binned.shp
74 | ```
75 | (Give the upper-bounds as a comma-separated list.)
76 | 
77 | Only write an ID field and the data fields:
78 | ```
79 | breaks --id-field GEOID data.shp field1 data_lookup.shp
80 | ```
81 | 
82 | ## License
83 | 
84 | Copyright 2016 Neil Freeman. Available under the GNU Public License.
85 | 


--------------------------------------------------------------------------------
/breaks/__init__.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # This file is part of breaks.
  5 | # https://github.com/fitnr/breaks
  6 | 
  7 | # Licensed under the GPL license:
  8 | # https://opensource.org/licenses/GPL-3.0
  9 | # Copyright (c) 2016, Neil Freeman <contact@fakeisthenewreal.org>
 10 | 
 11 | from bisect import bisect_left
 12 | from collections import OrderedDict
 13 | import numpy as np
 14 | import fiona
 15 | import fionautil.drivers
 16 | from pysal.esda import mapclassify
 17 | 
 18 | __version__ = '0.2.0'
 19 | 
 20 | LOWER_METHODS = (
 21 |     'equal_interval',
 22 |     'fisher_jenks',
 23 |     'jenks_caspall',
 24 |     'jenks_caspall_forced',
 25 |     'jenks_caspall_sampled',
 26 |     'max_p_classifier',
 27 |     'maximum_breaks',
 28 |     'natural_breaks',
 29 |     'quantiles',
 30 | )
 31 | 
 32 | 
 33 | def bisect(bins, value):
 34 |     '''Bisect left, returning None if value is None.'''
 35 |     if value is None:
 36 |         return None
 37 |     return bisect_left(bins, value)
 38 | 
 39 | 
 40 | def write(outfile, features, **kwargs):
 41 |     '''Use Fiona to write features to <outfile>. Kewyord args should be Fiona meta arguments.'''
 42 |     kwargs['driver'] = fionautil.drivers.from_path(outfile)
 43 |     with fiona.open(outfile, 'w', **kwargs) as sink:
 44 |         sink.writerecords(features)
 45 | 
 46 | 
 47 | def getter(data_field, norm_field=None):
 48 |     '''Returns a function for getting data value from a feature.'''
 49 |     if norm_field:
 50 |         def get(f):
 51 |             try:
 52 |                 return float(f['properties'][data_field]) / float(f['properties'][norm_field])
 53 |             except TypeError:
 54 |                 return None
 55 |     else:
 56 |         def get(f):
 57 |             return f['properties'][data_field]
 58 | 
 59 |     return get
 60 | 
 61 | 
 62 | def setter(bins, data_field, bin_field, **kwargs):
 63 |     '''Returns a function for creating an output feature.'''
 64 |     norm_field = kwargs.get('norm_field')
 65 |     id_field = kwargs.get('id_field')
 66 | 
 67 |     get = getter(data_field, norm_field)
 68 | 
 69 |     def _set(feature):
 70 |         f = {
 71 |             'properties': {},
 72 |             'geometry': feature['geometry']
 73 |         }
 74 | 
 75 |         if id_field:
 76 |             f['properties'][id_field] = feature['properties'][id_field]
 77 |             f['properties'][data_field] = feature['properties'][data_field]
 78 | 
 79 |             if norm_field:
 80 |                 f['properties'][norm_field] = feature['properties'][norm_field]
 81 |         else:
 82 |             f['properties'] = feature['properties']
 83 | 
 84 |         f['properties'][bin_field] = bisect(bins, get(feature))
 85 | 
 86 |         return f
 87 | 
 88 |     return _set
 89 | 
 90 | 
 91 | def binfeatures(features, method, data_field, k, bin_field=None, **kwargs):
 92 |     '''Classify input features according to <method>'''
 93 |     bin_field = 'bin' or bin_field
 94 | 
 95 |     if kwargs.get('bins'):
 96 |         method = 'User_Defined'
 97 |         k = kwargs.pop('bins')
 98 | 
 99 |     classify = getattr(mapclassify, method)
100 | 
101 |     get = getter(data_field, kwargs.get('norm_field'))
102 | 
103 |     data = (get(f) for f in features)
104 | 
105 |     return classify(np.array([d for d in data if d is not None]), k)
106 | 
107 | 
108 | def get_features(infile, fields=None):
109 |     '''
110 |     Return the features of <infile>. Includes error checking that given fields exist.
111 | 
112 |     Args:
113 |         infile (str): path
114 |         fields (Sequence/Generator): Check that these fields exist in <infile>.
115 |                             Raises ValueError if one doesn't appear.
116 | 
117 |     Returns:
118 |         (tuple) list of features and Fiona metadata for <infile>
119 |     '''
120 |     fields = fields or []
121 |     with fiona.drivers():
122 |         with fiona.open(infile) as source:
123 |             try:
124 |                 for f in fields:
125 |                     assert f in source.schema['properties']
126 |             except AssertionError:
127 |                 raise ValueError('field not found in {}: {}'.format(infile, f))
128 | 
129 |             meta = {
130 |                 'schema': source.schema,
131 |                 'crs': source.crs,
132 |             }
133 | 
134 |             features = list(source)
135 | 
136 |     return features, meta
137 | 
138 | 
139 | def breaks(infile, outfile, method, data_field, **kwargs):
140 |     '''
141 |     Calculate bins on <infile> via <method>, writing result to <outfile>.
142 |     This is essentially a wrapper for what the breaks CLI does.
143 | 
144 |     Args:
145 |         infile (str): path to input file
146 |         outfile (str): path to output file
147 |         method (str): a valid pysal.esda.mapclassify method
148 |         data_field (str): field in <infile> to read
149 |         k (int): number of bins to create (default: 5)
150 |         bin_field (str): field in <outfile> to create (default: bin)
151 |         bins (list): Upper bounds of bins to use in User_Defined classifying.
152 |                      Overrides method and k.
153 |         norm_field (str): Field to divide data_field by (both will be coerced to float).
154 | 
155 |     Returns:
156 |         mapclassify bins instance
157 |     '''
158 |     if kwargs.get('bins'):
159 |         kwargs['bins'] = sorted(float(x) for x in kwargs['bins'].split(','))
160 | 
161 |     bin_field = kwargs.pop('bin_field', 'bin')
162 |     id_field = kwargs.get('id_field')
163 |     norm_field = kwargs.get('norm_field')
164 |     kwargs['k'] = kwargs.get('k', 5)
165 | 
166 |     fields = [f for f in (data_field, id_field, norm_field) if f is not None]
167 |     features, meta = get_features(infile, fields)
168 | 
169 |     if id_field:
170 |         p = meta['schema']['properties']
171 |         meta['schema']['properties'] = OrderedDict((k, v) for k, v in p.items() if k in fields)
172 | 
173 |     meta['schema']['properties'][bin_field] = 'int'
174 | 
175 |     classes = binfeatures(features, method.title(), data_field, **kwargs)
176 | 
177 |     create = setter(classes.bins, data_field, bin_field, id_field=id_field, norm_field=norm_field)
178 | 
179 |     new_features = (create(f) for f in features)
180 |     write(outfile, new_features, **meta)
181 | 
182 |     return classes
183 | 


--------------------------------------------------------------------------------