├── tests ├── __init__.py ├── data │ ├── lines.shp │ ├── lines.shx │ ├── slope.tif │ ├── multilines.dbf │ ├── multipoints.dbf │ ├── multipolygons.dbf │ ├── points.shp │ ├── points.shx │ ├── polygons.shp │ ├── polygons.shx │ ├── all_nodata.tif │ ├── lines.dbf │ ├── multilines.shp │ ├── multilines.shx │ ├── dataset_mask.tif │ ├── multipoints.shp │ ├── multipoints.shx │ ├── multipolygons.shp │ ├── multipolygons.shx │ ├── points_noproj.shp │ ├── points_noproj.shx │ ├── polygons.dbf │ ├── slope_classes.tif │ ├── slope_nodata.tif │ ├── points.dbf │ ├── points_noproj.dbf │ ├── polygons_no_overlap.shp │ ├── polygons_no_overlap.shx │ ├── polygons_partial_overlap.shp │ ├── polygons_partial_overlap.shx │ ├── multilines.prj │ ├── multipoints.prj │ ├── multipolygons.prj │ ├── polygons_no_overlap.dbf │ ├── polygons_partial_overlap.dbf │ ├── lines.qpj │ ├── points.qpj │ ├── multilines.qpj │ ├── multipoints.qpj │ ├── polygons.qpj │ ├── multipolygons.qpj │ ├── geometry.geojson │ ├── polygons_no_overlap.prj │ ├── polygons_partial_overlap.prj │ ├── polygons_no_overlap.qpj │ ├── polygons_partial_overlap.qpj │ ├── feature.geojson │ ├── lines.prj │ ├── points.prj │ ├── polygons.prj │ └── featurecollection.geojson ├── conftest.py ├── myfunc.py ├── test_utils.py ├── test_point.py ├── test_cli.py ├── test_io.py └── test_zonal.py ├── docs ├── .gitignore ├── img │ ├── bahamas.png │ ├── rasterization.png │ └── zones_elevation.png ├── rasterstats.io.rst ├── rasterstats.utils.rst ├── rasterstats.rst ├── installation.rst ├── index.rst ├── notebooks │ ├── Basic Usage.ipynb │ ├── Basic Timing and Profiling.ipynb │ ├── ArcPy and rasterstats integration.ipynb │ └── Precipitation and Vegetation by State.ipynb ├── cli.rst ├── Makefile ├── conf.py └── manual.rst ├── scripts ├── .gitignore ├── profile.sh └── release.sh ├── examples ├── .gitignore ├── simple.py ├── README.md ├── multiproc.py └── benchmark.py ├── src └── rasterstats │ ├── _version.py │ ├── __init__.py │ ├── utils.py │ ├── cli.py │ ├── point.py │ ├── io.py │ └── main.py ├── pytest.ini ├── requirements.txt ├── MANIFEST.in ├── setup.cfg ├── requirements_dev.txt ├── .gitignore ├── .github ├── workflows │ └── test-rasterstats.yml └── ISSUE_TEMPLATE │ └── bug_report.md ├── LICENSE.txt ├── setup.py ├── README.rst └── CHANGELOG.txt /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _* 2 | -------------------------------------------------------------------------------- /scripts/.gitignore: -------------------------------------------------------------------------------- 1 | sample_data/* 2 | -------------------------------------------------------------------------------- /examples/.gitignore: -------------------------------------------------------------------------------- 1 | benchmark_data/* 2 | -------------------------------------------------------------------------------- /src/rasterstats/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.16.0" 2 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | filterwarnings = 3 | error 4 | ignore::UserWarning -------------------------------------------------------------------------------- /docs/img/bahamas.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/docs/img/bahamas.png -------------------------------------------------------------------------------- /tests/data/lines.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/lines.shp -------------------------------------------------------------------------------- /tests/data/lines.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/lines.shx -------------------------------------------------------------------------------- /tests/data/slope.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/slope.tif -------------------------------------------------------------------------------- /tests/data/multilines.dbf: -------------------------------------------------------------------------------- 1 | _A WidN 2 | 1 -------------------------------------------------------------------------------- /tests/data/multipoints.dbf: -------------------------------------------------------------------------------- 1 | _A WidN 2 | 1 -------------------------------------------------------------------------------- /tests/data/multipolygons.dbf: -------------------------------------------------------------------------------- 1 | _A WidN 2 | 1 -------------------------------------------------------------------------------- /tests/data/points.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/points.shp -------------------------------------------------------------------------------- /tests/data/points.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/points.shx -------------------------------------------------------------------------------- /tests/data/polygons.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/polygons.shp -------------------------------------------------------------------------------- /tests/data/polygons.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/polygons.shx -------------------------------------------------------------------------------- /tests/data/all_nodata.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/all_nodata.tif -------------------------------------------------------------------------------- /tests/data/lines.dbf: -------------------------------------------------------------------------------- 1 | _A WidN 2 | 1 2 -------------------------------------------------------------------------------- /tests/data/multilines.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/multilines.shp -------------------------------------------------------------------------------- /tests/data/multilines.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/multilines.shx -------------------------------------------------------------------------------- /docs/img/rasterization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/docs/img/rasterization.png -------------------------------------------------------------------------------- /docs/img/zones_elevation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/docs/img/zones_elevation.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | affine<3.0 2 | shapely 3 | numpy>=1.9 4 | rasterio>=1.0 5 | cligj>=0.4 6 | fiona 7 | simplejson 8 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | logging.basicConfig(stream=sys.stderr, level=logging.INFO) 4 | -------------------------------------------------------------------------------- /tests/data/dataset_mask.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/dataset_mask.tif -------------------------------------------------------------------------------- /tests/data/multipoints.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/multipoints.shp -------------------------------------------------------------------------------- /tests/data/multipoints.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/multipoints.shx -------------------------------------------------------------------------------- /tests/data/multipolygons.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/multipolygons.shp -------------------------------------------------------------------------------- /tests/data/multipolygons.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/multipolygons.shx -------------------------------------------------------------------------------- /tests/data/points_noproj.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/points_noproj.shp -------------------------------------------------------------------------------- /tests/data/points_noproj.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/points_noproj.shx -------------------------------------------------------------------------------- /tests/data/polygons.dbf: -------------------------------------------------------------------------------- 1 | _A WidN 2 | 1 2 -------------------------------------------------------------------------------- /tests/data/slope_classes.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/slope_classes.tif -------------------------------------------------------------------------------- /tests/data/slope_nodata.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/slope_nodata.tif -------------------------------------------------------------------------------- /tests/data/points.dbf: -------------------------------------------------------------------------------- 1 | _A WidN 2 | 1 2 3 -------------------------------------------------------------------------------- /tests/data/points_noproj.dbf: -------------------------------------------------------------------------------- 1 | _A WidN 2 | 1 2 3 -------------------------------------------------------------------------------- /tests/data/polygons_no_overlap.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/polygons_no_overlap.shp -------------------------------------------------------------------------------- /tests/data/polygons_no_overlap.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/polygons_no_overlap.shx -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE.txt 2 | include README.rst 3 | include requirements.txt 4 | exclude MANIFEST.in 5 | exclude Vagrantfile 6 | -------------------------------------------------------------------------------- /tests/data/polygons_partial_overlap.shp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/polygons_partial_overlap.shp -------------------------------------------------------------------------------- /tests/data/polygons_partial_overlap.shx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fitnr/python-rasterstats/master/tests/data/polygons_partial_overlap.shx -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # content of setup.cfg 2 | [tool:pytest] 3 | norecursedirs = examples* src* scripts* docs* 4 | # addopts = --verbose -rf --ipdb --maxfail=1 5 | 6 | -------------------------------------------------------------------------------- /tests/data/multilines.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /requirements_dev.txt: -------------------------------------------------------------------------------- 1 | # https://github.com/pytest-dev/pytest/issues/1043 and 1032 2 | pytest>=4.6 3 | 4 | coverage 5 | simplejson 6 | twine 7 | numpydoc 8 | pytest-cov 9 | -------------------------------------------------------------------------------- /scripts/profile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -m cProfile -o $1.prof $1 3 | python -c "import pstats; s = pstats.Stats('$1.prof'); s.sort_stats('cumulative').print_stats()" 4 | -------------------------------------------------------------------------------- /tests/data/multipoints.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /tests/data/multipolygons.prj: -------------------------------------------------------------------------------- 1 | GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]] -------------------------------------------------------------------------------- /docs/rasterstats.io.rst: -------------------------------------------------------------------------------- 1 | rasterstats.io module 2 | ===================== 3 | 4 | .. automodule:: rasterstats.io 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/rasterstats.utils.rst: -------------------------------------------------------------------------------- 1 | rasterstats.utils module 2 | ======================== 3 | 4 | .. automodule:: rasterstats.utils 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /tests/data/polygons_no_overlap.dbf: -------------------------------------------------------------------------------- 1 | _ A WidN 2 | 0 1 2 3 4 5 6 7 8 -------------------------------------------------------------------------------- /tests/data/polygons_partial_overlap.dbf: -------------------------------------------------------------------------------- 1 | _ A WidN 2 | 3 3 3 3 3 3 3 3 3 -------------------------------------------------------------------------------- /tests/myfunc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Additional functions to be used in raster stat computation 3 | from __future__ import division 4 | import numpy as np 5 | 6 | def mymean(x): 7 | return np.ma.mean(x) 8 | -------------------------------------------------------------------------------- /examples/simple.py: -------------------------------------------------------------------------------- 1 | from rasterstats import zonal_stats 2 | 3 | polys = "../tests/data/multilines.shp" 4 | raster = "../tests/data/slope.tif" 5 | stats = zonal_stats(polys, raster, stats="*") 6 | 7 | from pprint import pprint 8 | pprint(stats) 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .coverage 3 | TEST*.xml 4 | build/* 5 | dist/* 6 | *.swp 7 | .DS_Store 8 | *.orig 9 | *.egg-info 10 | MANIFEST 11 | Vagrantfile 12 | .vagrant/* 13 | .coverage 14 | *.aux.xml 15 | *.ipynb_checkpoints* 16 | .idea 17 | venv 18 | .eggs 19 | .cache 20 | -------------------------------------------------------------------------------- /tests/data/lines.qpj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] 2 | -------------------------------------------------------------------------------- /tests/data/points.qpj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] 2 | -------------------------------------------------------------------------------- /tests/data/multilines.qpj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] 2 | -------------------------------------------------------------------------------- /tests/data/multipoints.qpj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] 2 | -------------------------------------------------------------------------------- /tests/data/polygons.qpj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] 2 | -------------------------------------------------------------------------------- /tests/data/multipolygons.qpj: -------------------------------------------------------------------------------- 1 | GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]] 2 | -------------------------------------------------------------------------------- /docs/rasterstats.rst: -------------------------------------------------------------------------------- 1 | rasterstats package 2 | =================== 3 | 4 | .. automodule:: rasterstats 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | Submodules 10 | ---------- 11 | 12 | .. toctree:: 13 | 14 | rasterstats.io 15 | rasterstats.utils 16 | 17 | -------------------------------------------------------------------------------- /tests/data/geometry.geojson: -------------------------------------------------------------------------------- 1 | { "type": "Polygon", "coordinates": [ [ [ 244697.451795243832748, 1000369.230757493642159 ], [ 244827.154939680622192, 1000373.045555859454907 ], [ 244933.969293922709767, 1000353.971564030507579 ], [ 244933.969293922709767, 1000353.971564030507579 ], [ 244930.154495556926122, 1000147.97245227789972 ], [ 244697.451795243832748, 1000159.41684737522155 ], [ 244697.451795243832748, 1000369.230757493642159 ] ] ] } 2 | -------------------------------------------------------------------------------- /tests/data/polygons_no_overlap.prj: -------------------------------------------------------------------------------- 1 | PROJCS["Albers",GEOGCS["GCS_GRS 1980(IUGG, 1980)",DATUM["D_unknown",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",43],PARAMETER["standard_parallel_2",48],PARAMETER["latitude_of_origin",34],PARAMETER["central_meridian",-120],PARAMETER["false_easting",600000],PARAMETER["false_northing",0],UNIT["Meter",1]] -------------------------------------------------------------------------------- /src/rasterstats/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from .main import gen_zonal_stats, raster_stats, zonal_stats 3 | from .point import gen_point_query, point_query 4 | from rasterstats import cli 5 | from rasterstats._version import __version__ 6 | 7 | __all__ = ['gen_zonal_stats', 8 | 'gen_point_query', 9 | 'raster_stats', 10 | 'zonal_stats', 11 | 'point_query', 12 | 'cli'] 13 | -------------------------------------------------------------------------------- /tests/data/polygons_partial_overlap.prj: -------------------------------------------------------------------------------- 1 | PROJCS["Albers",GEOGCS["GCS_GRS 1980(IUGG, 1980)",DATUM["D_unknown",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Albers"],PARAMETER["standard_parallel_1",43],PARAMETER["standard_parallel_2",48],PARAMETER["latitude_of_origin",34],PARAMETER["central_meridian",-120],PARAMETER["false_easting",600000],PARAMETER["false_northing",0],UNIT["Meter",1]] -------------------------------------------------------------------------------- /tests/data/polygons_no_overlap.qpj: -------------------------------------------------------------------------------- 1 | PROJCS["unnamed",GEOGCS["GRS 1980(IUGG, 1980)",DATUM["unknown",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",43],PARAMETER["standard_parallel_2",48],PARAMETER["latitude_of_center",34],PARAMETER["longitude_of_center",-120],PARAMETER["false_easting",600000],PARAMETER["false_northing",0],UNIT["Meter",1]] 2 | -------------------------------------------------------------------------------- /tests/data/polygons_partial_overlap.qpj: -------------------------------------------------------------------------------- 1 | PROJCS["unnamed",GEOGCS["GRS 1980(IUGG, 1980)",DATUM["unknown",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["standard_parallel_1",43],PARAMETER["standard_parallel_2",48],PARAMETER["latitude_of_center",34],PARAMETER["longitude_of_center",-120],PARAMETER["false_easting",600000],PARAMETER["false_northing",0],UNIT["Meter",1]] 2 | -------------------------------------------------------------------------------- /scripts/release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python setup.py sdist --formats=gztar,zip bdist_wheel 4 | # Redirect any warnings and check for failures 5 | if [[ -n $(twine check dist/* 2>/dev/null | grep "Failed") ]]; then 6 | echo "Detected invalid markup, exiting!" 7 | exit 1 8 | fi 9 | twine upload dist/* 10 | 11 | echo "Don't forget to publish the docs..." 12 | echo " cd docs && make zip # then manually upload via https://pypi.python.org/pypi?%3Aaction=pkg_edit&name=rasterstats" 13 | -------------------------------------------------------------------------------- /tests/data/feature.geojson: -------------------------------------------------------------------------------- 1 | { "type": "Feature", "properties": { "id": 1 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 244697.451795243832748, 1000369.230757493642159 ], [ 244827.154939680622192, 1000373.045555859454907 ], [ 244933.969293922709767, 1000353.971564030507579 ], [ 244933.969293922709767, 1000353.971564030507579 ], [ 244930.154495556926122, 1000147.97245227789972 ], [ 244697.451795243832748, 1000159.41684737522155 ], [ 244697.451795243832748, 1000369.230757493642159 ] ] ] } } 2 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============ 3 | 4 | Depends on libgdal, rasterio, fiona, shapely and numpy 5 | 6 | Using Ubuntu 14.04:: 7 | 8 | sudo apt-get install python-numpy libgdal1h gdal-bin libgdal-dev 9 | pip install rasterstats 10 | 11 | Or homebrew on OS X:: 12 | 13 | brew install gdal 14 | pip install rasterstats 15 | 16 | For Windows, follow the `rasterio installation `_ and then run:: 17 | 18 | pip install rasterstats 19 | 20 | -------------------------------------------------------------------------------- /tests/data/lines.prj: -------------------------------------------------------------------------------- 1 | PROJCS["unnamed", 2 | GEOGCS["GRS 1980(IUGG, 1980)", 3 | DATUM["unknown", 4 | SPHEROID["GRS80",6378137,298.257222101]], 5 | PRIMEM["Greenwich",0], 6 | UNIT["degree",0.0174532925199433]], 7 | PROJECTION["Albers_Conic_Equal_Area"], 8 | PARAMETER["standard_parallel_1",43], 9 | PARAMETER["standard_parallel_2",48], 10 | PARAMETER["latitude_of_center",34], 11 | PARAMETER["longitude_of_center",-120], 12 | PARAMETER["false_easting",600000], 13 | PARAMETER["false_northing",0], 14 | UNIT["metre",1, 15 | AUTHORITY["EPSG","9001"]]] 16 | 17 | -------------------------------------------------------------------------------- /tests/data/points.prj: -------------------------------------------------------------------------------- 1 | PROJCS["unnamed", 2 | GEOGCS["GRS 1980(IUGG, 1980)", 3 | DATUM["unknown", 4 | SPHEROID["GRS80",6378137,298.257222101]], 5 | PRIMEM["Greenwich",0], 6 | UNIT["degree",0.0174532925199433]], 7 | PROJECTION["Albers_Conic_Equal_Area"], 8 | PARAMETER["standard_parallel_1",43], 9 | PARAMETER["standard_parallel_2",48], 10 | PARAMETER["latitude_of_center",34], 11 | PARAMETER["longitude_of_center",-120], 12 | PARAMETER["false_easting",600000], 13 | PARAMETER["false_northing",0], 14 | UNIT["metre",1, 15 | AUTHORITY["EPSG","9001"]]] 16 | 17 | -------------------------------------------------------------------------------- /tests/data/polygons.prj: -------------------------------------------------------------------------------- 1 | PROJCS["unnamed", 2 | GEOGCS["GRS 1980(IUGG, 1980)", 3 | DATUM["unknown", 4 | SPHEROID["GRS80",6378137,298.257222101]], 5 | PRIMEM["Greenwich",0], 6 | UNIT["degree",0.0174532925199433]], 7 | PROJECTION["Albers_Conic_Equal_Area"], 8 | PARAMETER["standard_parallel_1",43], 9 | PARAMETER["standard_parallel_2",48], 10 | PARAMETER["latitude_of_center",34], 11 | PARAMETER["longitude_of_center",-120], 12 | PARAMETER["false_easting",600000], 13 | PARAMETER["false_northing",0], 14 | UNIT["metre",1, 15 | AUTHORITY["EPSG","9001"]]] 16 | 17 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | For basic usage, check the `simple.py` script. 2 | 3 | For other examples of `rasterstats` usage in the wild, see: 4 | 5 | * [Integrating with GeoPandas and Numpy](http://nbviewer.ipython.org/github/perrygeo/python-raster-stats/blob/master/docs/notebooks/Integrating%20with%20GeoPandas%20and%20Numpy.ipynb) 6 | * [Bioclimatic Envelope Modeling](http://nbviewer.ipython.org/github/Ecotrust/growth-yield-batch/blob/master/scripts/blm/Bioclimatic%20Envelope%20Modeling%20with%20False%20Negative%20Masking.ipynb) 7 | * [Agricultural zone climate summary](http://nbviewer.ipython.org/github/Ecotrust/aez-viewer/blob/master/docs/climate_summary/Ag%20Zone%20Climate%20Summary.ipynb) 8 | -------------------------------------------------------------------------------- /.github/workflows/test-rasterstats.yml: -------------------------------------------------------------------------------- 1 | name: Rasterstats Python package 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [ $default-branch ] 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | strategy: 12 | matrix: 13 | python-version: [3.6, 3.7, 3.8, 3.9] 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install -r requirements_dev.txt 23 | python -m pip install -e . 24 | - name: Test with pytest 25 | run: | 26 | pytest 27 | -------------------------------------------------------------------------------- /examples/multiproc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import itertools 3 | import multiprocessing 4 | 5 | from rasterstats import zonal_stats 6 | import fiona 7 | 8 | 9 | shp = "benchmark_data/ne_50m_admin_0_countries.shp" 10 | tif = "benchmark_data/srtm.tif" 11 | 12 | 13 | def chunks(data, n): 14 | """Yield successive n-sized chunks from a slice-able iterable.""" 15 | for i in range(0, len(data), n): 16 | yield data[i:i+n] 17 | 18 | 19 | def zonal_stats_partial(feats): 20 | """Wrapper for zonal stats, takes a list of features""" 21 | return zonal_stats(feats, tif, all_touched=True) 22 | 23 | 24 | if __name__ == "__main__": 25 | 26 | with fiona.open(shp) as src: 27 | features = list(src) 28 | 29 | # Create a process pool using all cores 30 | cores = multiprocessing.cpu_count() 31 | p = multiprocessing.Pool(cores) 32 | 33 | # parallel map 34 | stats_lists = p.map(zonal_stats_partial, chunks(features, cores)) 35 | 36 | # flatten to a single list 37 | stats = list(itertools.chain(*stats_lists)) 38 | 39 | assert len(stats) == len(features) 40 | -------------------------------------------------------------------------------- /tests/data/featurecollection.geojson: -------------------------------------------------------------------------------- 1 | { 2 | "type": "FeatureCollection", 3 | 4 | "features": [ 5 | { "type": "Feature", "properties": { "id": 1 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 244697.451795243832748, 1000369.230757493642159 ], [ 244827.154939680622192, 1000373.045555859454907 ], [ 244933.969293922709767, 1000353.971564030507579 ], [ 244933.969293922709767, 1000353.971564030507579 ], [ 244930.154495556926122, 1000147.97245227789972 ], [ 244697.451795243832748, 1000159.41684737522155 ], [ 244697.451795243832748, 1000369.230757493642159 ] ] ] } }, 6 | { "type": "Feature", "properties": { "id": 2 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 246082.223602025071159, 1000453.156321540940553 ], [ 246139.445577511884039, 1000460.78591827256605 ], [ 246189.03795626712963, 1000403.563942785724066 ], [ 246189.03795626712963, 1000403.563942785724066 ], [ 246086.038400390854804, 1000132.71325881476514 ], [ 245990.668441246147268, 1000205.194427764741704 ], [ 246082.223602025071159, 1000453.156321540940553 ] ] ] } } 7 | ] 8 | } 9 | -------------------------------------------------------------------------------- /examples/benchmark.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | """ 3 | First, download the data and place in `benchmark_data` 4 | 5 | 1. Download countries from 6 | wget http://www.naturalearthdata.com/http//www.naturalearthdata.com/download/50m/cultural/ne_50m_admin_0_countries.zip 7 | unzip 8 | 9 | 2. Download the SRTM data from 10 | https://hc.app.box.com/shared/1yidaheouv 11 | Password is `ThanksCSI!` 12 | select `SRTM_1km_TIF.rar` 13 | $ unrar e SRTM_1km_TIF.rar 14 | 15 | Runtime history: 16 | 1bc8711 130.93s MacBook Pro (Retina, 15-inch, Mid 2014) 2.2GHz i7, 16GB RAM 17 | 2277962 80.68s MacBook Pro (Retina, 15-inch, Mid 2014) 2.2GHz i7, 16GB RAM 18 | """ 19 | from rasterstats import zonal_stats 20 | import time 21 | 22 | class Timer(): 23 | def __enter__(self): 24 | self.start = time.time() 25 | 26 | def __exit__(self, *args): 27 | print("Time:", time.time() - self.start) 28 | 29 | countries = "./benchmark_data/ne_50m_admin_0_countries.shp" 30 | elevation = "./benchmark_data/SRTM_1km.tif" 31 | 32 | with Timer(): 33 | stats = zonal_stats(countries, elevation, stats="*") 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve python-rasterstats 4 | title: "" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | Welcome to the `python-rasterstats` issue tracker. Thanks for putting together a bug report! By following the template below, we'll be better able to reproduce the problem on our end. 11 | 12 | If you don't have a bug specifically but a general support question, please visit https://gis.stackexchange.com/ 13 | 14 | **Describe the bug** 15 | A clear and concise description of what the bug is. What you expected to happen vs. what did happen. 16 | 17 | **To Reproduce** 18 | Steps to reproduce the behavior: 19 | 1. How did you install rasterstats and its dependencies? 20 | 2. What datasets are necessary to reproduce the bug? Please provide links to example data if necessary. 21 | 3. What code is necessary to reproduce the bug? Provide the code directly below or provide links to it. 22 | 23 | ```python 24 | # Code to reproduce the error 25 | ``` 26 | 27 | **Feature Requests** 28 | 29 | `python-rasterstats` is not currently accepting any feature requests via the issue tracker. If you'd like to add a backwards-compatible feature, please open a pull request - it doesn't need to be 100% ready but should include a working proof-of-concept, tests, and should not break the existing API. 30 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Matthew Perry 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, this 11 | list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | * Neither the name of the software nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | rasterstats 2 | =========== 3 | 4 | ``rasterstats`` is a Python module for summarizing geospatial raster datasets based on vector geometries. 5 | It includes functions for zonal statistics and interpolated point queries. The command-line interface allows for 6 | easy interoperability with other GeoJSON tools. 7 | 8 | Raster data support 9 | ------------------- 10 | Can work with any raster data source supported by `rasterio `_. 11 | Data can be categorical (e.g. vegetation types) or continuous values (e.g. elevation). 12 | 13 | Vector data support 14 | ------------------- 15 | Flexible support for vector features with Point, LineString, Polygon or Multi\* geometries. 16 | Any `fiona `_ data source, 17 | GeoJSON-like mapping, objects with a `geo\_interface `_, 18 | GeoJSON strings and Well-Known Text/Binary (WKT/WKB) geometries are all supported via the ``io`` submodule. 19 | 20 | Quickstart 21 | ------------------------ 22 | 23 | Install:: 24 | 25 | pip install rasterstats 26 | 27 | 28 | Given a polygon vector layer and a digital elevation model (DEM) raster: 29 | 30 | .. figure:: https://github.com/perrygeo/python-raster-stats/raw/master/docs/img/zones_elevation.png 31 | :align: center 32 | :alt: zones elevation 33 | 34 | calculate summary statistics of elevation for each polygon using:: 35 | 36 | from rasterstats import zonal_stats 37 | zonal_stats("polygons.shp", "elevation.tif", 38 | stats="count min mean max median") 39 | 40 | returns a ``list`` of ``dicts``, one for each Feature in ``polygons.shp``:: 41 | 42 | [..., 43 | {'count': 89, 44 | 'max': 69.52958679199219, 45 | 'mean': 20.08093536034059, 46 | 'median': 19.33736801147461, 47 | 'min': 1.5106816291809082}, 48 | ] 49 | 50 | 51 | Next steps 52 | ---------- 53 | 54 | .. toctree:: 55 | :maxdepth: 2 56 | 57 | installation 58 | manual 59 | cli 60 | rasterstats 61 | 62 | 63 | Indices and tables 64 | ================== 65 | 66 | * :ref:`genindex` 67 | * :ref:`modindex` 68 | * :ref:`search` 69 | 70 | -------------------------------------------------------------------------------- /tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import pytest 4 | from shapely.geometry import LineString 5 | from rasterstats.utils import \ 6 | stats_to_csv, get_percentile, remap_categories, boxify_points 7 | from rasterstats import zonal_stats 8 | from rasterstats.utils import VALID_STATS 9 | 10 | 11 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 12 | DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") 13 | raster = os.path.join(DATA, 'slope.tif') 14 | 15 | 16 | def test_csv(): 17 | polygons = os.path.join(DATA, 'polygons.shp') 18 | stats = zonal_stats(polygons, raster, stats="*") 19 | csv = stats_to_csv(stats) 20 | assert csv.split()[0] == ','.join(sorted(VALID_STATS)) 21 | 22 | 23 | def test_categorical_csv(): 24 | polygons = os.path.join(DATA, 'polygons.shp') 25 | categorical_raster = os.path.join(DATA, 'slope_classes.tif') 26 | stats = zonal_stats(polygons, categorical_raster, categorical=True) 27 | csv = stats_to_csv(stats) 28 | assert csv.split()[0] == "1.0,2.0,5.0" 29 | 30 | 31 | def test_get_percentile(): 32 | assert get_percentile('percentile_0') == 0.0 33 | assert get_percentile('percentile_100') == 100.0 34 | assert get_percentile('percentile_13.2') == 13.2 35 | 36 | 37 | def test_get_bad_percentile(): 38 | with pytest.raises(ValueError): 39 | get_percentile('foo') 40 | 41 | with pytest.raises(ValueError): 42 | get_percentile('percentile_101') 43 | 44 | with pytest.raises(ValueError): 45 | get_percentile('percentile_101') 46 | 47 | with pytest.raises(ValueError): 48 | get_percentile('percentile_-1') 49 | 50 | with pytest.raises(ValueError): 51 | get_percentile('percentile_foobar') 52 | 53 | 54 | def test_remap_categories(): 55 | feature_stats = {1: 22.343, 2: 54.34, 3: 987.5} 56 | category_map = {1: 'grassland', 2: 'forest'} 57 | new_stats = remap_categories(category_map, feature_stats) 58 | assert 1 not in new_stats.keys() 59 | assert 'grassland' in new_stats.keys() 60 | assert 3 in new_stats.keys() 61 | 62 | 63 | def test_boxify_non_point(): 64 | line = LineString([(0, 0), (1, 1)]) 65 | with pytest.raises(ValueError): 66 | boxify_points(line, None) 67 | 68 | # TODO # def test_boxify_multi_point 69 | # TODO # def test_boxify_point -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | from setuptools import setup 5 | from setuptools.command.test import test as TestCommand 6 | 7 | 8 | def read(fname): 9 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 10 | 11 | 12 | def get_version(): 13 | vfile = os.path.join( 14 | os.path.dirname(__file__), "src", "rasterstats", "_version.py") 15 | with open(vfile, "r") as vfh: 16 | vline = vfh.read() 17 | vregex = r"^__version__ = ['\"]([^'\"]*)['\"]" 18 | match = re.search(vregex, vline, re.M) 19 | if match: 20 | return match.group(1) 21 | else: 22 | raise RuntimeError("Unable to find version string in {}.".format(vfile)) 23 | 24 | class PyTest(TestCommand): 25 | def finalize_options(self): 26 | TestCommand.finalize_options(self) 27 | self.test_args = [] 28 | self.test_suite = True 29 | 30 | def run_tests(self): 31 | import pytest 32 | errno = pytest.main(self.test_args) 33 | sys.exit(errno) 34 | 35 | 36 | setup( 37 | name="rasterstats", 38 | version=get_version(), 39 | author="Matthew Perry", 40 | author_email="perrygeo@gmail.com", 41 | description="Summarize geospatial raster datasets based on vector geometries", 42 | license="BSD", 43 | keywords="gis geospatial geographic raster vector zonal statistics", 44 | url="https://github.com/perrygeo/python-raster-stats", 45 | package_dir={'': 'src'}, 46 | packages=['rasterstats'], 47 | long_description=read('README.rst'), 48 | install_requires=read('requirements.txt').splitlines(), 49 | tests_require=['pytest', 'pytest-cov>=2.2.0', 'pyshp>=1.1.4', 50 | 'coverage', 'simplejson'], 51 | cmdclass={'test': PyTest}, 52 | classifiers=[ 53 | "Development Status :: 4 - Beta", 54 | 'Intended Audience :: Developers', 55 | 'Intended Audience :: Science/Research', 56 | "License :: OSI Approved :: BSD License", 57 | 'Operating System :: OS Independent', 58 | 'Programming Language :: Python :: 3.6', 59 | 'Programming Language :: Python :: 3.7', 60 | 'Programming Language :: Python :: 3.8', 61 | 'Programming Language :: Python :: 3.9', 62 | "Topic :: Utilities", 63 | 'Topic :: Scientific/Engineering :: GIS', 64 | ], 65 | entry_points=""" 66 | [rasterio.rio_plugins] 67 | zonalstats=rasterstats.cli:zonalstats 68 | pointquery=rasterstats.cli:pointquery 69 | """) 70 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | rasterstats 2 | =========== 3 | 4 | |BuildStatus|_ 5 | 6 | ``rasterstats`` is a Python module for summarizing geospatial raster datasets based on vector geometries. 7 | It includes functions for **zonal statistics** and interpolated **point queries**. The command-line interface allows for 8 | easy interoperability with other GeoJSON tools. 9 | 10 | Documentation 11 | ------------- 12 | For details on installation and usage, visit the documentation at `http://pythonhosted.org/rasterstats `_. 13 | 14 | What does it do? 15 | ---------------- 16 | Given a vector layer and a raster band, calculate the summary statistics of each vector geometry. 17 | For example, with a polygon vector layer and a digital elevation model (DEM) raster, compute the 18 | mean elevation of each polygon. 19 | 20 | .. figure:: https://github.com/perrygeo/python-raster-stats/raw/master/docs/img/zones_elevation.png 21 | :align: center 22 | :alt: zones elevation 23 | 24 | Command Line Quick Start 25 | ------------------------ 26 | 27 | The command line interfaces to zonalstats and point_query 28 | are `rio` subcommands which read and write geojson features 29 | 30 | .. code-block:: bash 31 | 32 | $ fio cat polygon.shp | rio zonalstats -r elevation.tif 33 | 34 | $ fio cat points.shp | rio pointquery -r elevation.tif 35 | 36 | See the `CLI Docs `_. for more detail. 37 | 38 | Python Quick Start 39 | ------------------ 40 | 41 | For zonal statistics 42 | 43 | .. code-block:: python 44 | 45 | >>> from rasterstats import zonal_stats 46 | >>> stats = zonal_stats("tests/data/polygons.shp", "tests/data/slope.tif") 47 | >>> stats[0].keys() 48 | dict_keys(['min', 'max', 'mean', 'count']) 49 | >>> [f['mean'] for f in stats] 50 | [14.660084635416666, 56.60576171875] 51 | 52 | and for point queries 53 | 54 | .. code-block:: python 55 | 56 | >>> from rasterstats import point_query 57 | >>> point = {'type': 'Point', 'coordinates': (245309.0, 1000064.0)} 58 | >>> point_query(point, "tests/data/slope.tif") 59 | [74.09817594635244] 60 | 61 | 62 | Issues 63 | ------ 64 | 65 | Find a bug? Report it via github issues by providing 66 | 67 | - a link to download the smallest possible raster and vector dataset necessary to reproduce the error 68 | - python code or command to reproduce the error 69 | - information on your environment: versions of python, gdal and numpy and system memory 70 | 71 | .. |BuildStatus| image:: https://github.com/perrygeo/python-rasterstats/workflows/Rasterstats%20Python%20package/badge.svg 72 | .. _BuildStatus: https://github.com/perrygeo/python-rasterstats/actions 73 | -------------------------------------------------------------------------------- /docs/notebooks/Basic Usage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "from rasterstats import raster_stats\n", 15 | "elev = '/data/projects/murdock/zonal_fcid_test/dem_aea2_feet.tif'\n", 16 | "polys = '/data/projects/murdock/zonal_fcid_test/fcid.shp'\n", 17 | "stats = raster_stats(polys, elev, stats=\"*\")\n", 18 | "len(stats)" 19 | ], 20 | "language": "python", 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "metadata": {}, 25 | "output_type": "pyout", 26 | "prompt_number": 1, 27 | "text": [ 28 | "20763" 29 | ] 30 | } 31 | ], 32 | "prompt_number": 1 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "We can ask interesting questions such as \"*What polygon has the highest standard deviation in elevation?*\" by sorting the list by the `std` key." 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "collapsed": false, 44 | "input": [ 45 | "sorted(stats, key=lambda k: k['std'], reverse=True)[0]" 46 | ], 47 | "language": "python", 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "metadata": {}, 52 | "output_type": "pyout", 53 | "prompt_number": 2, 54 | "text": [ 55 | "{'__fid__': 17038,\n", 56 | " 'count': 1087,\n", 57 | " 'majority': 1523.3047,\n", 58 | " 'max': 2710.309814453125,\n", 59 | " 'mean': 2199.236430542778,\n", 60 | " 'median': 2233.90380859375,\n", 61 | " 'min': 1457.9298095703125,\n", 62 | " 'std': 339.93001065442024,\n", 63 | " 'sum': 2390570.0}" 64 | ] 65 | } 66 | ], 67 | "prompt_number": 2 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "Let's see how this performs with a 4.8 MB raster and a 3.3MB shapefile with > 20k polygons. " 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "collapsed": false, 79 | "input": [ 80 | "%timeit raster_stats(polys, elev)" 81 | ], 82 | "language": "python", 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "output_type": "stream", 87 | "stream": "stdout", 88 | "text": [ 89 | "1 loops, best of 3: 15.1 s per loop\n" 90 | ] 91 | } 92 | ], 93 | "prompt_number": 3 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "Compare that to other alternatives:\n", 100 | "\n", 101 | "* QGIS Zonal Statistics Plugin (only does count, sum and mean): 1 min 51 sec\n", 102 | "\n", 103 | "Much faster, plus rasterstats is running in a VM in this case! " 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Let's try to optimize our raster_stats call by using `global_src_extent=True`. This will load the raster into memory once for the entire extent of the vector layer; less disk reads *can* mean better performance if raster access from disk is your limiting factor and you can fit the raster and resulting temporary arrays into memory! " 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "collapsed": false, 116 | "input": [ 117 | "%timeit raster_stats(polys, elev, global_src_extent=True)" 118 | ], 119 | "language": "python", 120 | "metadata": {}, 121 | "outputs": [ 122 | { 123 | "output_type": "stream", 124 | "stream": "stdout", 125 | "text": [ 126 | "1 loops, best of 3: 14.9 s per loop\n" 127 | ] 128 | } 129 | ], 130 | "prompt_number": 4 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "No improvement at all. This indicates that the raster data source is relatively quick to read from disk. For other formats (such as jpeg or networked data sources) where the pixel values are slower to read from disk, the `global_src_extent` can increase performace." 137 | ] 138 | } 139 | ], 140 | "metadata": {} 141 | } 142 | ] 143 | } -------------------------------------------------------------------------------- /docs/notebooks/Basic Timing and Profiling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "code", 12 | "collapsed": false, 13 | "input": [ 14 | "from rasterstats import raster_stats\n", 15 | "polys = \"../../tests/data/polygons.shp\"\n", 16 | "raster = \"../../tests/data/slope.tif\"\n", 17 | "%timeit raster_stats(polys, raster)" 18 | ], 19 | "language": "python", 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "output_type": "stream", 24 | "stream": "stdout", 25 | "text": [ 26 | "10 loops, best of 3: 22.9 ms per loop\n" 27 | ] 28 | } 29 | ], 30 | "prompt_number": "*" 31 | }, 32 | { 33 | "cell_type": "code", 34 | "collapsed": false, 35 | "input": [ 36 | "%timeit raster_stats(polys, raster, stats=\"*\")" 37 | ], 38 | "language": "python", 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "stream": "stdout", 44 | "text": [ 45 | "10 loops, best of 3: 28.3 ms per loop\n" 46 | ] 47 | } 48 | ], 49 | "prompt_number": "*" 50 | }, 51 | { 52 | "cell_type": "code", 53 | "collapsed": false, 54 | "input": [ 55 | "from rasterstats.util import VALID_STATS\n", 56 | "for vs in VALID_STATS:\n", 57 | " print vs\n", 58 | " %timeit raster_stats(polys, raster, stats=vs)" 59 | ], 60 | "language": "python", 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "output_type": "stream", 65 | "stream": "stdout", 66 | "text": [ 67 | "count\n", 68 | "10 loops, best of 3: 22 ms per loop" 69 | ] 70 | }, 71 | { 72 | "output_type": "stream", 73 | "stream": "stdout", 74 | "text": [ 75 | "\n", 76 | "min\n", 77 | "10 loops, best of 3: 21.9 ms per loop" 78 | ] 79 | }, 80 | { 81 | "output_type": "stream", 82 | "stream": "stdout", 83 | "text": [ 84 | "\n", 85 | "max\n", 86 | "10 loops, best of 3: 22.4 ms per loop" 87 | ] 88 | }, 89 | { 90 | "output_type": "stream", 91 | "stream": "stdout", 92 | "text": [ 93 | "\n", 94 | "mean\n", 95 | "10 loops, best of 3: 22.1 ms per loop" 96 | ] 97 | }, 98 | { 99 | "output_type": "stream", 100 | "stream": "stdout", 101 | "text": [ 102 | "\n", 103 | "sum\n", 104 | "10 loops, best of 3: 24.1 ms per loop" 105 | ] 106 | }, 107 | { 108 | "output_type": "stream", 109 | "stream": "stdout", 110 | "text": [ 111 | "\n", 112 | "std\n", 113 | "10 loops, best of 3: 23.4 ms per loop" 114 | ] 115 | }, 116 | { 117 | "output_type": "stream", 118 | "stream": "stdout", 119 | "text": [ 120 | "\n", 121 | "median\n", 122 | "10 loops, best of 3: 23.1 ms per loop" 123 | ] 124 | }, 125 | { 126 | "output_type": "stream", 127 | "stream": "stdout", 128 | "text": [ 129 | "\n", 130 | "majority\n", 131 | "10 loops, best of 3: 24 ms per loop" 132 | ] 133 | }, 134 | { 135 | "output_type": "stream", 136 | "stream": "stdout", 137 | "text": [ 138 | "\n", 139 | "minority\n", 140 | "10 loops, best of 3: 22.2 ms per loop" 141 | ] 142 | }, 143 | { 144 | "output_type": "stream", 145 | "stream": "stdout", 146 | "text": [ 147 | "\n", 148 | "unique\n", 149 | "10 loops, best of 3: 23.1 ms per loop" 150 | ] 151 | }, 152 | { 153 | "output_type": "stream", 154 | "stream": "stdout", 155 | "text": [ 156 | "\n", 157 | "range\n", 158 | "10 loops, best of 3: 22.5 ms per loop" 159 | ] 160 | }, 161 | { 162 | "output_type": "stream", 163 | "stream": "stdout", 164 | "text": [ 165 | "\n" 166 | ] 167 | } 168 | ], 169 | "prompt_number": "*" 170 | }, 171 | { 172 | "cell_type": "code", 173 | "collapsed": false, 174 | "input": [], 175 | "language": "python", 176 | "metadata": {}, 177 | "outputs": [] 178 | } 179 | ], 180 | "metadata": {} 181 | } 182 | ] 183 | } 184 | -------------------------------------------------------------------------------- /src/rasterstats/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | import sys 5 | from rasterio import features 6 | from shapely.geometry import box, MultiPolygon 7 | from .io import window_bounds 8 | 9 | 10 | DEFAULT_STATS = ['count', 'min', 'max', 'mean'] 11 | VALID_STATS = DEFAULT_STATS + \ 12 | ['sum', 'std', 'median', 'majority', 'minority', 'unique', 'range', 'nodata', 'nan'] 13 | # also percentile_{q} but that is handled as special case 14 | 15 | 16 | def get_percentile(stat): 17 | if not stat.startswith('percentile_'): 18 | raise ValueError("must start with 'percentile_'") 19 | qstr = stat.replace("percentile_", '') 20 | q = float(qstr) 21 | if q > 100.0: 22 | raise ValueError('percentiles must be <= 100') 23 | if q < 0.0: 24 | raise ValueError('percentiles must be >= 0') 25 | return q 26 | 27 | 28 | def rasterize_geom(geom, like, all_touched=False): 29 | """ 30 | Parameters 31 | ---------- 32 | geom: GeoJSON geometry 33 | like: raster object with desired shape and transform 34 | all_touched: rasterization strategy 35 | 36 | Returns 37 | ------- 38 | ndarray: boolean 39 | """ 40 | geoms = [(geom, 1)] 41 | rv_array = features.rasterize( 42 | geoms, 43 | out_shape=like.shape, 44 | transform=like.affine, 45 | fill=0, 46 | dtype='uint8', 47 | all_touched=all_touched) 48 | 49 | return rv_array.astype(bool) 50 | 51 | 52 | def stats_to_csv(stats): 53 | if sys.version_info[0] >= 3: 54 | from io import StringIO as IO # pragma: no cover 55 | else: 56 | from cStringIO import StringIO as IO # pragma: no cover 57 | 58 | import csv 59 | 60 | csv_fh = IO() 61 | 62 | keys = set() 63 | for stat in stats: 64 | for key in list(stat.keys()): 65 | keys.add(key) 66 | 67 | fieldnames = sorted(list(keys), key=str) 68 | 69 | csvwriter = csv.DictWriter(csv_fh, delimiter=str(","), fieldnames=fieldnames) 70 | csvwriter.writerow(dict((fn, fn) for fn in fieldnames)) 71 | for row in stats: 72 | csvwriter.writerow(row) 73 | contents = csv_fh.getvalue() 74 | csv_fh.close() 75 | return contents 76 | 77 | 78 | def check_stats(stats, categorical): 79 | if not stats: 80 | if not categorical: 81 | stats = DEFAULT_STATS 82 | else: 83 | stats = [] 84 | else: 85 | if isinstance(stats, str): 86 | if stats in ['*', 'ALL']: 87 | stats = VALID_STATS 88 | else: 89 | stats = stats.split() 90 | for x in stats: 91 | if x.startswith("percentile_"): 92 | get_percentile(x) 93 | elif x not in VALID_STATS: 94 | raise ValueError( 95 | "Stat `%s` not valid; " 96 | "must be one of \n %r" % (x, VALID_STATS)) 97 | 98 | run_count = False 99 | if categorical or 'majority' in stats or 'minority' in stats or 'unique' in stats: 100 | # run the counter once, only if needed 101 | run_count = True 102 | 103 | return stats, run_count 104 | 105 | 106 | def remap_categories(category_map, stats): 107 | def lookup(m, k): 108 | """ Dict lookup but returns original key if not found 109 | """ 110 | try: 111 | return m[k] 112 | except KeyError: 113 | return k 114 | 115 | return {lookup(category_map, k): v 116 | for k, v in stats.items()} 117 | 118 | 119 | def key_assoc_val(d, func, exclude=None): 120 | """return the key associated with the value returned by func 121 | """ 122 | vs = list(d.values()) 123 | ks = list(d.keys()) 124 | key = ks[vs.index(func(vs))] 125 | return key 126 | 127 | 128 | def boxify_points(geom, rast): 129 | """ 130 | Point and MultiPoint don't play well with GDALRasterize 131 | convert them into box polygons 99% cellsize, centered on the raster cell 132 | """ 133 | if 'Point' not in geom.type: 134 | raise ValueError("Points or multipoints only") 135 | 136 | buff = -0.01 * abs(min(rast.affine.a, rast.affine.e)) 137 | 138 | if geom.type == 'Point': 139 | pts = [geom] 140 | elif geom.type == "MultiPoint": 141 | pts = geom.geoms 142 | geoms = [] 143 | for pt in pts: 144 | row, col = rast.index(pt.x, pt.y) 145 | win = ((row, row + 1), (col, col + 1)) 146 | geoms.append(box(*window_bounds(win, rast.affine)).buffer(buff)) 147 | 148 | return MultiPolygon(geoms) 149 | -------------------------------------------------------------------------------- /src/rasterstats/cli.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | import logging 5 | 6 | import click 7 | import cligj 8 | import simplejson as json 9 | 10 | from rasterstats import gen_zonal_stats, gen_point_query 11 | from rasterstats._version import __version__ as version 12 | 13 | SETTINGS = dict(help_option_names=['-h', '--help']) 14 | 15 | @click.command(context_settings=SETTINGS) 16 | @cligj.features_in_arg 17 | @click.version_option(version=version, message='%(version)s') 18 | @click.option('--raster', '-r', required=True) 19 | @click.option('--all-touched/--no-all-touched', default=False) 20 | @click.option('--band', type=int, default=1) 21 | @click.option('--categorical/--no-categorical', default=False) 22 | @click.option('--indent', type=int, default=None) 23 | @click.option('--info/--no-info', default=False) 24 | @click.option('--nodata', type=int, default=None) 25 | @click.option('--prefix', type=str, default='_') 26 | @click.option('--stats', type=str, default=None) 27 | @click.option('--sequence/--no-sequence', type=bool, default=False) 28 | @cligj.use_rs_opt 29 | def zonalstats(features, raster, all_touched, band, categorical, 30 | indent, info, nodata, prefix, stats, sequence, use_rs): 31 | '''zonalstats generates summary statistics of geospatial raster datasets 32 | based on vector features. 33 | 34 | The input arguments to zonalstats should be valid GeoJSON Features. (see cligj) 35 | 36 | The output GeoJSON will be mostly unchanged but have additional properties per feature 37 | describing the summary statistics (min, max, mean, etc.) of the underlying raster dataset. 38 | 39 | The raster is specified by the required -r/--raster argument. 40 | 41 | Example, calculate rainfall stats for each state and output to file: 42 | 43 | \b 44 | rio zonalstats states.geojson -r rainfall.tif > mean_rainfall_by_state.geojson 45 | ''' 46 | if info: 47 | logging.basicConfig(level=logging.INFO) 48 | 49 | if stats is not None: 50 | stats = stats.split(" ") 51 | if 'all' in [x.lower() for x in stats]: 52 | stats = "ALL" 53 | 54 | zonal_results = gen_zonal_stats( 55 | features, 56 | raster, 57 | all_touched=all_touched, 58 | band=band, 59 | categorical=categorical, 60 | nodata=nodata, 61 | stats=stats, 62 | prefix=prefix, 63 | geojson_out=True) 64 | 65 | if sequence: 66 | for feature in zonal_results: 67 | if use_rs: 68 | click.echo(b'\x1e', nl=False) 69 | click.echo(json.dumps(feature)) 70 | else: 71 | click.echo(json.dumps( 72 | {'type': 'FeatureCollection', 73 | 'features': list(zonal_results)})) 74 | 75 | 76 | @click.command(context_settings=SETTINGS) 77 | @cligj.features_in_arg 78 | @click.version_option(version=version, message='%(version)s') 79 | @click.option('--raster', '-r', required=True) 80 | @click.option('--band', type=int, default=1) 81 | @click.option('--nodata', type=int, default=None) 82 | @click.option('--indent', type=int, default=None) 83 | @click.option('--interpolate', type=str, default='bilinear') 84 | @click.option('--property-name', type=str, default='value') 85 | @click.option('--sequence/--no-sequence', type=bool, default=False) 86 | @cligj.use_rs_opt 87 | def pointquery(features, raster, band, indent, nodata, 88 | interpolate, property_name, sequence, use_rs): 89 | """ 90 | Queries the raster values at the points of the input GeoJSON Features. 91 | The raster values are added to the features properties and output as GeoJSON 92 | Feature Collection. 93 | 94 | If the Features are Points, the point geometry is used. 95 | For other Feauture types, all of the vertices of the geometry will be queried. 96 | For example, you can provide a linestring and get the profile along the line 97 | if the vertices are spaced properly. 98 | 99 | You can use either bilinear (default) or nearest neighbor interpolation. 100 | """ 101 | 102 | results = gen_point_query( 103 | features, 104 | raster, 105 | band=band, 106 | nodata=nodata, 107 | interpolate=interpolate, 108 | property_name=property_name, 109 | geojson_out=True) 110 | 111 | if sequence: 112 | for feature in results: 113 | if use_rs: 114 | click.echo(b'\x1e', nl=False) 115 | click.echo(json.dumps(feature)) 116 | else: 117 | click.echo(json.dumps( 118 | {'type': 'FeatureCollection', 119 | 'features': list(results)})) 120 | -------------------------------------------------------------------------------- /CHANGELOG.txt: -------------------------------------------------------------------------------- 1 | 0.16.0 2 | - Fix deprecation warning with shapely 1.8+ #250 3 | 4 | 0.15.0 5 | - Fix deprecation warning with Affine #211 6 | - Avoid unnecessary memory copy operation #213 7 | 8 | 0.14.0 9 | - Add support return statement to zone_func #203 10 | - Take into account per dataset mask #198 11 | - Accessing geometry properties for user-defined stats #193 12 | - Updated method calls for numpy v1.16 #184 13 | 14 | 0.13.1 15 | - Bug fix for io.read_features with Fiona 1.8+ 16 | 17 | 0.13.0 18 | - Require Rasterio>=1.0 19 | - Fix buffer logic for boxify_points (#171) 20 | 21 | 0.12.1 22 | - Cast all integer data to int64 if we're on a 64 bit platform (#159) 23 | 24 | 0.12.0 25 | - zone_func argument to apply a function to the masked array before computing stats 26 | - support shapely 1.6 exceptions 27 | 28 | 0.11 29 | - change `band_num` parameter to `band` 30 | - add example of multiprocessing 31 | - updated for compatibility with upcoming Rasterio 1.0 release 32 | - using latest pytest and pytest-cov for testing 33 | - overhauled NaN handling logic to treat them directly as nodata 34 | 35 | 0.10.3 36 | - initial attempt at nan logic 37 | 38 | 0.10.2 39 | - Setup.py bugfix: Include requirements in manifest 40 | 41 | 0.10.1 42 | - Bug fix for geopandas dataframes 43 | 44 | 0.10.0 45 | - Added a generator variant of zonal_stats (gen_zonal_stats) and point_query 46 | (gen_point_query) which yield results instead of returning a list 47 | - Dependency on cligj to standardize the geojson input/output args and opts 48 | - Input/Output can be geojson sequences; allows for stream processing 49 | 50 | 0.9.2 51 | - added __version__ attribute 52 | 53 | 0.9.0 54 | - Completely removed osgeo.ogr in favor of fiona 55 | - Completely removed osgeo.osr, spatial referencing is external to this module 56 | - Removed optional dependency on GeoRaster lib 57 | - Extensive cleanup of tests 58 | - Extensive refactoring to encapsulate coordinate transforms and data access 59 | - Use of affine lib instead of GDAL-style geotransform tuples 60 | - Support for category maps to get human readable keys for categorical rasters 61 | - Greatly improve speed of categorical and any stat requiring pixel counts 62 | - Simplified and sped up travis builds for faster feedback 63 | - Nones instead on nans returned when polygon doesn't intersect raster pixels 64 | - Removed deprecated rasterstats script and unused util functions 65 | - Added support for `nodata` count 66 | - GeoJSON features and geometries accepted as CLI inputs 67 | - point_query function and CLI 68 | - fixed bug in zonal_stats points with all_touched 69 | - GeoJSON Feature output supported directly, geojson_out=True 70 | 71 | 0.8.0 72 | - Rasterio CLI plugin; rio zonalstats 73 | 74 | 0.7.2 75 | - install bug, completely remove GDAL dep 76 | 77 | 0.7.1 78 | - Utility function to enable CLI with geojson features 79 | 80 | 0.7.0 81 | - Removed dependency on osgeo.gdal 82 | - Added dependency on rasterio; used for rasterizing geometries and raster reads 83 | - Optional use of the affine library to handle transformations ala rasterio 84 | - Use OGR fids if available 85 | - Raise builtin Exceptions (IOError, etc) for general errors rather than RasterStatsError 86 | - global_src_extent doesn’t require OGR vector layer, assumes full raster extent 87 | - Optional support for GeoRaster output (http://github.com/ozak/georasters) 88 | 89 | 0.6.2 90 | - fix bug in percentiles where feature covers only nodata values 91 | - pep8 cleanup 92 | 93 | 0.6.1 94 | - Use GetNextFeature to iterate over OGR layers 95 | 96 | 0.6 97 | - Added a `percentile_q` statistic to calculate the qth percentile for each feature. 98 | - Removed undocumented rasterstats script (may be replaced with more robust CLI tool by 1.0) 99 | - Optional support for outputing "mini-rasters": the clipped, masked numpy array for each feature. (thanks to @ozak again!) 100 | - Support for dynamically adding third-party stats functions - which take a masked array and return a scalar value. (thanks to @ozak) 101 | - Additional examples and ipython notebooks in docs 102 | 103 | 0.5 104 | - new method for clipping to raster extents, changes bbox_to_pixel_offsets to fix src_array edge cases (thanks @allhailwesttexas and @timcera) 105 | 106 | 0.4 107 | - Added examples directory with ipython notebooks 108 | - proper pip requirements files 109 | - Deprecated `raster_stats` in favor of `zonal_stats` 110 | - Support for GeoJSON-like FeatureCollections (any things that implements FeatureCollections in the __geo_interface__) 111 | - Support for in-memory numpy arrays and geopandas dataframes 112 | 113 | 0.3.5 114 | - Add optional "all_touched" rasterization strategy whereby all pixels that touch a geometry are considered (thanks @ozak) 115 | 116 | 0.3.4 117 | - Gaurd against touching (but not overlapping) polygons. Fixes #27... thanks @engelmannjens 118 | 119 | 0.3.3 120 | - Create in-memory layer with proper spatial reference object 121 | - don't call ogr.UseExceptions unless needed 122 | 123 | -------------------------------------------------------------------------------- /tests/test_point.py: -------------------------------------------------------------------------------- 1 | import os 2 | import rasterio 3 | from rasterstats.point import point_window_unitxy, bilinear, geom_xys 4 | from rasterstats import point_query 5 | 6 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 7 | raster_nodata = os.path.join(os.path.dirname(__file__), 'data/slope_nodata.tif') 8 | 9 | with rasterio.open(raster) as src: 10 | affine = src.transform 11 | 12 | 13 | def test_unitxy_ul(): 14 | win, unitxy = point_window_unitxy(245300, 1000073, affine) 15 | assert win == ((30, 32), (38, 40)) 16 | x, y = unitxy 17 | # should be in LR of new unit square 18 | assert x > 0.5 19 | assert y < 0.5 20 | 21 | 22 | def test_unitxy_ur(): 23 | win, unitxy = point_window_unitxy(245318, 1000073, affine) 24 | assert win == ((30, 32), (39, 41)) 25 | x, y = unitxy 26 | # should be in LL of new unit square 27 | assert x < 0.5 28 | assert y < 0.5 29 | 30 | win, unitxy = point_window_unitxy(245296, 1000073, affine) 31 | assert win == ((30, 32), (38, 40)) 32 | x, y = unitxy 33 | # should be in LL of new unit square 34 | assert x < 0.5 35 | assert y < 0.5 36 | 37 | 38 | def test_unitxy_lr(): 39 | win, unitxy = point_window_unitxy(245318, 1000056, affine) 40 | assert win == ((31, 33), (39, 41)) 41 | x, y = unitxy 42 | # should be in UL of new unit square 43 | assert x < 0.5 44 | assert y > 0.5 45 | 46 | 47 | def test_unitxy_ll(): 48 | win, unitxy = point_window_unitxy(245300, 1000056, affine) 49 | assert win == ((31, 33), (38, 40)) 50 | x, y = unitxy 51 | # should be in UR of new unit square 52 | assert x > 0.5 53 | assert y > 0.5 54 | 55 | 56 | def test_bilinear(): 57 | import numpy as np 58 | arr = np.array([[1.0, 2.0], 59 | [3.0, 4.0]]) 60 | 61 | assert bilinear(arr, 0, 0) == 3.0 62 | assert bilinear(arr, 1, 0) == 4.0 63 | assert bilinear(arr, 1, 1) == 2.0 64 | assert bilinear(arr, 0, 1) == 1.0 65 | assert bilinear(arr, 0.5, 0.5) == arr.mean() 66 | assert bilinear(arr, 0.95, 0.95) < 4.0 67 | assert bilinear(arr, 0.05, 0.95) > 1.0 68 | 69 | 70 | def test_xy_array_bilinear_window(): 71 | """ integration test 72 | """ 73 | x, y = (245309, 1000064) 74 | 75 | with rasterio.open(raster) as src: 76 | win, unitxy = point_window_unitxy(x, y, affine) 77 | arr = src.read(1, window=win) 78 | 79 | val = bilinear(arr, *unitxy) 80 | assert round(val) == 74 81 | 82 | 83 | def test_point_query(): 84 | point = "POINT(245309 1000064)" 85 | val = point_query(point, raster)[0] 86 | assert round(val) == 74 87 | 88 | 89 | def test_point_query_geojson(): 90 | point = "POINT(245309 1000064)" 91 | features = point_query(point, raster, property_name="TEST", geojson_out=True) 92 | for feature in features: 93 | assert 'TEST' in feature['properties'] 94 | assert round(feature['properties']['TEST']) == 74 95 | 96 | 97 | def test_point_query_nodata(): 98 | # all nodata, on the grid 99 | point = "POINT(245309 1000308)" 100 | val = point_query(point, raster_nodata)[0] 101 | assert val is None 102 | 103 | # all nodata, off the grid 104 | point = "POINT(244000 1000308)" 105 | val = point_query(point, raster_nodata)[0] 106 | assert val is None 107 | point = "POINT(244000 1000308)" 108 | val = point_query(point, raster_nodata, interpolate="nearest")[0] 109 | assert val is None 110 | 111 | # some nodata, should fall back to nearest 112 | point = "POINT(245905 1000361)" 113 | val = point_query(point, raster_nodata, interpolate="nearest")[0] 114 | assert round(val) == 43 115 | val = point_query(point, raster_nodata)[0] 116 | assert round(val) == 43 117 | 118 | 119 | def test_geom_xys(): 120 | from shapely.geometry import (Point, MultiPoint, 121 | LineString, MultiLineString, 122 | Polygon, MultiPolygon) 123 | pt = Point(0, 0) 124 | assert list(geom_xys(pt)) == [(0, 0)] 125 | 126 | mpt = MultiPoint([(0, 0), (1, 1)]) 127 | assert list(geom_xys(mpt)) == [(0, 0), (1, 1)] 128 | 129 | line = LineString([(0, 0), (1, 1)]) 130 | assert list(geom_xys(line)) == [(0, 0), (1, 1)] 131 | 132 | mline = MultiLineString([((0, 0), (1, 1)), ((-1, 0), (1, 0))]) 133 | assert list(geom_xys(mline)) == [(0, 0), (1, 1), (-1, 0), (1, 0)] 134 | 135 | poly = Polygon([(0, 0), (1, 1), (1, 0), (0, 0)]) 136 | assert list(geom_xys(poly)) == [(0, 0), (1, 1), (1, 0), (0, 0)] 137 | 138 | ring = poly.exterior 139 | assert list(geom_xys(ring)) == [(0, 0), (1, 1), (1, 0), (0, 0)] 140 | 141 | mpoly = MultiPolygon([poly, Polygon([(2, 2), (3, 3), (3, 2)])]) 142 | assert list(geom_xys(mpoly)) == [(0, 0), (1, 1), (1, 0), (0, 0), 143 | (2, 2), (3, 3), (3, 2), (2, 2)] 144 | 145 | mpt3d = MultiPoint([(0, 0, 1), (1, 1, 2)]) 146 | assert list(geom_xys(mpt3d)) == [(0, 0), (1, 1)] 147 | 148 | 149 | # TODO # gen_point_query(interpolation="fake") 150 | # TODO # gen_point_query(interpolation="bilinear") 151 | # TODO # gen_point_query() 152 | -------------------------------------------------------------------------------- /docs/notebooks/ArcPy and rasterstats integration.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "First off, installing on windows is a bit tricker than other operating systems. I'm assuming you already have ArcGIS/arcpy installed. We have to manually seak out and install the other dependencies... \n", 15 | "\n", 16 | "Grab the following packages and install them normally\n", 17 | "\n", 18 | "* `gdal-110-1500-core.msi` and `GDAL-1.10.0.win32-py2.7.msi` from http://www.gisinternals.com/sdk/PackageList.aspx?file=release-1500-gdal-mapserver.zip\n", 19 | "* `numpy-1.7.1.win32-py2.7.exe` from https://pypi.python.org/pypi/numpy\n", 20 | "* `Shapely-1.2.18.win32-py2.7.exe` from http://www.lfd.uci.edu/~gohlke/pythonlibs/\n", 21 | "* The latest version of rasterstats, e.g. `rasterstats-0.3.2.win32.exe` from https://pypi.python.org/pypi/rasterstats\n", 22 | "\n", 23 | "If you're using ArcGIS 10+, you probably have a system-wide python installation at `C:\\\\Python27\\\\ArcGIS10\\\\`. *If* there are other versions of python in your registry, you'll want to point the python installers here. Otherwise, it should be picked up as the default. \n", 24 | "\n", 25 | "Now we're ready to use the `rasterstats` module alongside `arcpy`." 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "collapsed": false, 31 | "input": [ 32 | "from rasterstats import raster_stats\n", 33 | "import arcpy\n", 34 | "arcpy.env = 'E:\\\\workspace\\\\rasterstats_blog'\n", 35 | "states = 'E:\\\\workspace\\\\rasterstats_blog\\\\boundaries_contus.shp'\n", 36 | "precip = 'E:\\\\workspace\\\\rasterstats_blog\\\\NA_Annual_Precipitation_GRID\\\\NA_Annual_Precipitation\\\\data\\\\na_anprecip\\\\hdr.adf'" 37 | ], 38 | "language": "python", 39 | "metadata": {}, 40 | "outputs": [], 41 | "prompt_number": 4 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "One technique would be to use the arcpy SearchCursor to iterate through the features. This allows us to catch errors that result from ESRI's slightly broken implementation of the `__geo_interface__`" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "collapsed": false, 53 | "input": [ 54 | "def get_stats(shp):\n", 55 | " cursor = arcpy.SearchCursor(shp)\n", 56 | "\n", 57 | " stats = []\n", 58 | " for feature in cursor:\n", 59 | " geom = feature.Shape\n", 60 | " \n", 61 | " try:\n", 62 | " rain_stats = raster_stats(geom, precip, stats=\"*\")[0]\n", 63 | " except TypeError:\n", 64 | " # arcpy's geo_interface is broken; reports type=polygon for some multipolygons\n", 65 | " # fall back to WKT\n", 66 | " rain_stats = raster_stats(geom.WKT, precip, stats=\"*\")[0]\n", 67 | " \n", 68 | " #rain_stats['NAME'] = feature.NAME\n", 69 | " #rain_stats['__fid__'] = feature.FID\n", 70 | " stats.append(rain_stats)\n", 71 | " return stats\n", 72 | "\n", 73 | "%timeit stats = get_stats(states)\n" 74 | ], 75 | "language": "python", 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "output_type": "stream", 80 | "stream": "stdout", 81 | "text": [ 82 | "1 loops, best of 3: 57.9 s per loop\n" 83 | ] 84 | } 85 | ], 86 | "prompt_number": 9 87 | }, 88 | { 89 | "cell_type": "code", 90 | "collapsed": false, 91 | "input": [ 92 | "print [x for x in stats if x['NAME'] == \"Oregon\"][0]" 93 | ], 94 | "language": "python", 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "output_type": "stream", 99 | "stream": "stdout", 100 | "text": [ 101 | "{'count': 250510, 'std': 631.539502512283, 'minority': 3193, 'min': 205.0, 'max': 3193.0, 'sum': 195203001.0, 'median': 461.0, 'majority': 263, 'range': 2988.0, 'NAME': u'Oregon', 'unique': 2865, '__fid__': 35, 'mean': 779.2223903237395}\n" 102 | ] 103 | } 104 | ], 105 | "prompt_number": 6 106 | }, 107 | { 108 | "cell_type": "code", 109 | "collapsed": false, 110 | "input": [ 111 | "# Try it the straight rasterstats way\n", 112 | "%timeit stats = raster_stats(states, precip, stats=\"*\")" 113 | ], 114 | "language": "python", 115 | "metadata": {}, 116 | "outputs": [ 117 | { 118 | "output_type": "stream", 119 | "stream": "stdout", 120 | "text": [ 121 | "1 loops, best of 3: 8.25 s per loop\n" 122 | ] 123 | } 124 | ], 125 | "prompt_number": 7 126 | }, 127 | { 128 | "cell_type": "code", 129 | "collapsed": false, 130 | "input": [ 131 | "print [x for x in stats if x['NAME'] == \"Oregon\"][0] # same as before" 132 | ], 133 | "language": "python", 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "output_type": "stream", 138 | "stream": "stdout", 139 | "text": [ 140 | "{'count': 250510, 'std': 631.539502512283, 'minority': 3193, 'min': 205.0, 'max': 3193.0, 'sum': 195203001.0, 'median': 461.0, 'majority': 263, 'range': 2988.0, 'NAME': u'Oregon', 'unique': 2865, '__fid__': 35, 'mean': 779.2223903237395}\n" 141 | ] 142 | } 143 | ], 144 | "prompt_number": 8 145 | }, 146 | { 147 | "cell_type": "code", 148 | "collapsed": false, 149 | "input": [], 150 | "language": "python", 151 | "metadata": {}, 152 | "outputs": [] 153 | } 154 | ], 155 | "metadata": {} 156 | } 157 | ] 158 | } -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import json 3 | import warnings 4 | # Some warnings must be ignored to parse output properly 5 | # https://github.com/pallets/click/issues/371#issuecomment-223790894 6 | 7 | from click.testing import CliRunner 8 | from rasterstats.cli import zonalstats, pointquery 9 | 10 | 11 | def test_cli_feature(): 12 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 13 | vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson') 14 | runner = CliRunner() 15 | warnings.simplefilter('ignore') 16 | result = runner.invoke(zonalstats, [vector, 17 | '--raster', raster, 18 | '--stats', 'mean', 19 | '--prefix', 'test_']) 20 | assert result.exit_code == 0 21 | outdata = json.loads(result.output) 22 | assert len(outdata['features']) == 1 23 | feature = outdata['features'][0] 24 | assert 'test_mean' in feature['properties'] 25 | assert round(feature['properties']['test_mean'], 2) == 14.66 26 | assert 'test_count' not in feature['properties'] 27 | 28 | 29 | def test_cli_feature_stdin(): 30 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 31 | vector = os.path.join(os.path.dirname(__file__), 'data/feature.geojson') 32 | 33 | runner = CliRunner() 34 | warnings.simplefilter('ignore') 35 | result = runner.invoke(zonalstats, 36 | ['--raster', raster, 37 | '--stats', 'all', 38 | '--prefix', 'test_'], 39 | input=open(vector, 'r').read()) 40 | assert result.exit_code == 0 41 | outdata = json.loads(result.output) 42 | assert len(outdata['features']) == 1 43 | feature = outdata['features'][0] 44 | assert 'test_mean' in feature['properties'] 45 | assert 'test_std' in feature['properties'] 46 | 47 | 48 | def test_cli_features_sequence(): 49 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 50 | vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') 51 | runner = CliRunner() 52 | result = runner.invoke(zonalstats, [vector, 53 | '--raster', raster, 54 | '--stats', 'mean', 55 | '--prefix', 'test_', 56 | '--sequence']) 57 | assert result.exit_code == 0 58 | results = result.output.splitlines() 59 | for r in results: 60 | outdata = json.loads(r) 61 | assert outdata['type'] == 'Feature' 62 | 63 | 64 | def test_cli_features_sequence_rs(): 65 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 66 | vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') 67 | runner = CliRunner() 68 | result = runner.invoke(zonalstats, [vector, 69 | '--raster', raster, 70 | '--stats', 'mean', 71 | '--prefix', 'test_', 72 | '--sequence', '--rs']) 73 | assert result.exit_code == 0 74 | # assert result.output.startswith(b'\x1e') 75 | assert result.output[0] == '\x1e' 76 | 77 | 78 | def test_cli_featurecollection(): 79 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 80 | vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') 81 | runner = CliRunner() 82 | result = runner.invoke(zonalstats, [vector, 83 | '--raster', raster, 84 | '--stats', 'mean', 85 | '--prefix', 'test_']) 86 | assert result.exit_code == 0 87 | outdata = json.loads(result.output) 88 | assert len(outdata['features']) == 2 89 | feature = outdata['features'][0] 90 | assert 'test_mean' in feature['properties'] 91 | assert round(feature['properties']['test_mean'], 2) == 14.66 92 | assert 'test_count' not in feature['properties'] 93 | 94 | 95 | def test_cli_pointquery(): 96 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 97 | vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') 98 | runner = CliRunner() 99 | result = runner.invoke(pointquery, [vector, 100 | '--raster', raster, 101 | '--property-name', 'slope']) 102 | assert result.exit_code == 0 103 | outdata = json.loads(result.output) 104 | assert len(outdata['features']) == 2 105 | feature = outdata['features'][0] 106 | assert 'slope' in feature['properties'] 107 | 108 | def test_cli_point_sequence(): 109 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 110 | vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') 111 | runner = CliRunner() 112 | result = runner.invoke(pointquery, [vector, 113 | '--raster', raster, 114 | '--property-name', 'slope', 115 | '--sequence']) 116 | assert result.exit_code == 0 117 | results = result.output.splitlines() 118 | for r in results: 119 | outdata = json.loads(r) 120 | assert outdata['type'] == 'Feature' 121 | 122 | 123 | def test_cli_point_sequence_rs(): 124 | raster = os.path.join(os.path.dirname(__file__), 'data/slope.tif') 125 | vector = os.path.join(os.path.dirname(__file__), 'data/featurecollection.geojson') 126 | runner = CliRunner() 127 | result = runner.invoke(pointquery, [vector, 128 | '--raster', raster, 129 | '--property-name', 'slope', 130 | '--sequence', '--rs']) 131 | assert result.exit_code == 0 132 | assert result.output[0] == '\x1e' 133 | -------------------------------------------------------------------------------- /src/rasterstats/point.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from shapely.geometry import shape 4 | from shapely.ops import transform 5 | from numpy.ma import masked 6 | from .io import read_features, Raster 7 | 8 | 9 | def point_window_unitxy(x, y, affine): 10 | """ Given an x, y and a geotransform 11 | Returns 12 | - rasterio window representing 2x2 window whose center points encompass point 13 | - the cartesian x, y coordinates of the point on the unit square 14 | defined by the array center points. 15 | 16 | ((row1, row2), (col1, col2)), (unitx, unity) 17 | """ 18 | fcol, frow = ~affine * (x, y) 19 | r, c = int(round(frow)), int(round(fcol)) 20 | 21 | # The new source window for our 2x2 array 22 | new_win = ((r - 1, r + 1), (c - 1, c + 1)) 23 | 24 | # the new x, y coords on the unit square 25 | unitxy = (0.5 - (c - fcol), 26 | 0.5 + (r - frow)) 27 | 28 | return new_win, unitxy 29 | 30 | 31 | def bilinear(arr, x, y): 32 | """ Given a 2x2 array, an x, and y, treat center points as a unit square 33 | return the value for the fractional row/col 34 | using bilinear interpolation between the cells 35 | 36 | +---+---+ 37 | | A | B | +----+ 38 | +---+---+ => | | 39 | | C | D | +----+ 40 | +---+---+ 41 | 42 | e.g.: Center of A is at (0, 1) on unit square, D is at (1, 0), etc 43 | """ 44 | # for now, only 2x2 arrays 45 | assert arr.shape == (2, 2) 46 | ulv, urv, llv, lrv = arr[0:2, 0:2].flatten().tolist() 47 | 48 | # not valid if not on unit square 49 | assert 0.0 <= x <= 1.0 50 | assert 0.0 <= y <= 1.0 51 | 52 | if hasattr(arr, 'count') and arr.count() != 4: 53 | # a masked array with at least one nodata 54 | # fall back to nearest neighbor 55 | val = arr[int(round(1 - y)), int(round(x))] 56 | if val is masked: 57 | return None 58 | else: 59 | return val.item() 60 | 61 | # bilinear interp on unit square 62 | return ((llv * (1 - x) * (1 - y)) + 63 | (lrv * x * (1 - y)) + 64 | (ulv * (1 - x) * y) + 65 | (urv * x * y)) 66 | 67 | 68 | def geom_xys(geom): 69 | """Given a shapely geometry, 70 | generate a flattened series of 2D points as x,y tuples 71 | """ 72 | if geom.has_z: 73 | # convert to 2D 74 | geom = transform(lambda x, y, z=None: (x, y), geom) 75 | 76 | if hasattr(geom, "geoms"): 77 | geoms = geom.geoms 78 | else: 79 | geoms = [geom] 80 | 81 | for g in geoms: 82 | if hasattr(g, "exterior"): 83 | yield from geom_xys(g.exterior) 84 | for interior in g.interiors: 85 | yield from geom_xys(interior) 86 | else: 87 | for pair in g.coords: 88 | yield pair 89 | 90 | 91 | def point_query(*args, **kwargs): 92 | """The primary point query entry point. 93 | 94 | All arguments are passed directly to ``gen_point_query``. 95 | See its docstring for details. 96 | 97 | The only difference is that ``point_query`` will 98 | return a list rather than a generator.""" 99 | return list(gen_point_query(*args, **kwargs)) 100 | 101 | 102 | def gen_point_query( 103 | vectors, 104 | raster, 105 | band=1, 106 | layer=0, 107 | nodata=None, 108 | affine=None, 109 | interpolate='bilinear', 110 | property_name='value', 111 | geojson_out=False, 112 | boundless=True): 113 | """ 114 | Given a set of vector features and a raster, 115 | generate raster values at each vertex of the geometry 116 | 117 | For features with point geometry, 118 | the values will be a 1D with the index refering to the feature 119 | 120 | For features with other geometry types, 121 | it effectively creates a 2D list, such that 122 | the first index is the feature, the second is the vertex within the geometry 123 | 124 | Parameters 125 | ---------- 126 | vectors: path to an vector source or geo-like python objects 127 | 128 | raster: ndarray or path to a GDAL raster source 129 | If ndarray is passed, the `transform` kwarg is required. 130 | 131 | layer: int or string, optional 132 | If `vectors` is a path to a fiona source, 133 | specify the vector layer to use either by name or number. 134 | defaults to 0 135 | 136 | band: int, optional 137 | If `raster` is a GDAL source, the band number to use (counting from 1). 138 | defaults to 1. 139 | 140 | nodata: float, optional 141 | If `raster` is a GDAL source, this value overrides any NODATA value 142 | specified in the file's metadata. 143 | If `None`, the file's metadata's NODATA value (if any) will be used. 144 | defaults to `None`. 145 | 146 | affine: Affine instance 147 | required only for ndarrays, otherwise it is read from src 148 | 149 | interpolate: string 150 | 'bilinear' or 'nearest' interpolation 151 | 152 | property_name: string 153 | name of property key if geojson_out 154 | 155 | geojson_out: boolean 156 | generate GeoJSON-like features (default: False) 157 | original feature geometry and properties will be retained 158 | point query values appended as additional properties. 159 | 160 | boundless: boolean 161 | Allow features that extend beyond the raster dataset’s extent, default: True 162 | Cells outside dataset extents are treated as nodata. 163 | 164 | Returns 165 | ------- 166 | generator of arrays (if ``geojson_out`` is False) 167 | generator of geojson features (if ``geojson_out`` is True) 168 | """ 169 | if interpolate not in ['nearest', 'bilinear']: 170 | raise ValueError("interpolate must be nearest or bilinear") 171 | 172 | features_iter = read_features(vectors, layer) 173 | 174 | with Raster(raster, nodata=nodata, affine=affine, band=band) as rast: 175 | 176 | for feat in features_iter: 177 | geom = shape(feat['geometry']) 178 | vals = [] 179 | for x, y in geom_xys(geom): 180 | if interpolate == 'nearest': 181 | r, c = rast.index(x, y) 182 | window = ((int(r), int(r+1)), (int(c), int(c+1))) 183 | src_array = rast.read(window=window, masked=True, boundless=boundless).array 184 | val = src_array[0, 0] 185 | if val is masked: 186 | vals.append(None) 187 | else: 188 | vals.append(val.item()) 189 | 190 | elif interpolate == 'bilinear': 191 | window, unitxy = point_window_unitxy(x, y, rast.affine) 192 | src_array = rast.read(window=window, masked=True, boundless=boundless).array 193 | vals.append(bilinear(src_array, *unitxy)) 194 | 195 | if len(vals) == 1: 196 | vals = vals[0] # flatten single-element lists 197 | 198 | if geojson_out: 199 | if 'properties' not in feat: 200 | feat['properties'] = {} 201 | feat['properties'][property_name] = vals 202 | yield feat 203 | else: 204 | yield vals 205 | -------------------------------------------------------------------------------- /docs/cli.rst: -------------------------------------------------------------------------------- 1 | Command Line Interface 2 | ====================== 3 | 4 | As of version 0.8, ``rasterstats`` includes a command line interface (as a `rasterio plugin `_) 5 | for performing zonal statistics and point_queries at the command line. 6 | 7 | 8 | .. code-block:: console 9 | 10 | Usage: rio zonalstats [OPTIONS] FEATURES... 11 | 12 | zonalstats generates summary statistics of geospatial raster datasets 13 | based on vector features. 14 | 15 | The input arguments to zonalstats should be valid GeoJSON Features. (see 16 | cligj) 17 | 18 | The output GeoJSON will be mostly unchanged but have additional properties 19 | per feature describing the summary statistics (min, max, mean, etc.) of 20 | the underlying raster dataset. 21 | 22 | The raster is specified by the required -r/--raster argument. 23 | 24 | Example, calculate rainfall stats for each state and output to file: 25 | 26 | rio zonalstats states.geojson -r rainfall.tif > mean_rainfall_by_state.geojson 27 | 28 | Options: 29 | --version Show the version and exit. 30 | -r, --raster PATH [required] 31 | --all-touched / --no-all-touched 32 | --band INTEGER 33 | --categorical / --no-categorical 34 | --indent INTEGER 35 | --info / --no-info 36 | --nodata INTEGER 37 | --prefix TEXT 38 | --stats TEXT 39 | --sequence / --no-sequence Write a LF-delimited sequence of texts 40 | containing individual objects or write a 41 | single JSON text containing a feature 42 | collection object (the default). 43 | --rs / --no-rs Use RS (0x1E) as a prefix for individual 44 | texts in a sequence as per 45 | http://tools.ietf.org/html/draft-ietf-json- 46 | text-sequence-13 (default is False). 47 | -h, --help Show this message and exit. 48 | 49 | 50 | .. code-block:: console 51 | 52 | $ rio pointquery --help 53 | Usage: rio pointquery [OPTIONS] FEATURES... 54 | 55 | Queries the raster values at the points of the input GeoJSON Features. The 56 | raster values are added to the features properties and output as GeoJSON 57 | Feature Collection. 58 | 59 | If the Features are Points, the point geometery is used. For other 60 | Feauture types, all of the verticies of the geometry will be queried. For 61 | example, you can provide a linestring and get the profile along the line 62 | if the verticies are spaced properly. 63 | 64 | You can use either bilinear (default) or nearest neighbor interpolation. 65 | 66 | Options: 67 | --version Show the version and exit. 68 | -r, --raster PATH [required] 69 | --band INTEGER 70 | --nodata INTEGER 71 | --indent INTEGER 72 | --interpolate TEXT 73 | --property-name TEXT 74 | --sequence / --no-sequence Write a LF-delimited sequence of texts 75 | containing individual objects or write a single 76 | JSON text containing a feature collection object 77 | (the default). 78 | --rs / --no-rs Use RS (0x1E) as a prefix for individual texts 79 | in a sequence as per http://tools.ietf.org/html 80 | /draft-ietf-json-text-sequence-13 (default is 81 | False). 82 | -h, --help Show this message and exit. 83 | 84 | 85 | Example 86 | ----------- 87 | 88 | In the following examples we use a polygon shapefile representing countries (``countries.shp``) and a raster digital elevation model (``dem.tif``). The data are assumed to be in the same spatial reference system. 89 | 90 | GeoJSON inputs 91 | ^^^^^^^^^^^^^^ 92 | First we must get our data into GeoJSON format. There are a number of options for that but we will use ``fio cat`` command that ships with the ``fiona`` python library:: 93 | 94 | fio cat countries.shp 95 | 96 | This will print the GeoJSON Features to the terminal (stdout) with Features like:: 97 | 98 | {"type": Feature, "geometry": {...} ,"properties": {...}} 99 | 100 | We'll use unix pipes to pass this data directly into our zonal stats command without an intermediate file. 101 | 102 | Specifying the Raster 103 | ^^^^^^^^^^^^^^^^^^^^^ 104 | 105 | There is one required option to ``rio zonalstats``: the ``--raster`` or ``-r`` option which is a file path to a raster dataset that can be read by rasterio. 106 | 107 | So now our command becomes:: 108 | 109 | fio cat countries.shp | rio zonalstats -r dem.tif 110 | 111 | GeoJSON Output 112 | ^^^^^^^^^^^^^^ 113 | 114 | The output FeatureCollection will contain the same number of features, same geometries, etc. but will have several additional properties attached to each feature:: 115 | 116 | 117 | { 118 | "type": "Feature", 119 | "geometry": {...} , 120 | "properties": { 121 | "country_name": "Grenada", 122 | "_min": 0.0, 123 | "_mean": 210.47, 124 | "_max": 840.33, 125 | "_count": 94 126 | } 127 | } 128 | 129 | Fairly self explanatory; the min, mean and max are the default summary statistics and the count is the number of overlapping raster cells. By default the property names are prefixed with ``_`` but you can specify your own with ``--prefix``:: 130 | 131 | $ fio cat countries.shp | rio zonalstats -r dem.tif --prefix "elevation_" 132 | ... 133 | { 134 | "type": "Feature", 135 | "geometry": {...} , 136 | "properties": { 137 | "country_name": "Grenada", 138 | "elevation_min": 0.0, 139 | "elevation_mean": 210.47, 140 | "elevation_max": 840.33, 141 | "elevation_count": 94 142 | } 143 | } 144 | 145 | If we want to save the output, simply redirect to a file:: 146 | 147 | fio cat countries.shp | rio zonalstats -r dem.tif --prefix "elevation_" > countries_with_elevation.geojson 148 | 149 | Sequences or FeatureCollections 150 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 151 | By default, all of the features are collected into a single GeoJSON FeatureCollection which is echoed to ``stdout``. 152 | 153 | You can choose to emit sequences of line-delimited Features with `--use-sequence` and add the optional rs-delimiter with ``--use-rs``. The use of sequences for input and output features allows you to stream large datasets without memory limitations:: 154 | 155 | fio cat large.shp | rio zonalstats -r elevation.tif --sequence | some-other-process 156 | 157 | 158 | Other statistics 159 | ^^^^^^^^^^^^^^^^ 160 | 161 | The main README contains the complete list of summary statistics, any number of which can be specified using the ``--stats`` option in the form of a space-delimited string:: 162 | 163 | $ fio cat countries.shp \ 164 | | rio zonalstats -r dem.tif \ 165 | --prefix "elevation_" \ 166 | --stats "min max median percentile_95" 167 | ... 168 | { 169 | "type": "Feature", 170 | "geometry": {...} , 171 | "properties": { 172 | "country_name": "Grenada", 173 | "elevation_min": 0.0, 174 | "elevation_median": 161.33 175 | "elevation_max": 840.33, 176 | "elevation_percentile_95": 533.6 177 | } 178 | } 179 | 180 | Rasterization strategy 181 | ^^^^^^^^^^^^^^^^^^^^^^ 182 | 183 | As discussed in the main README, the default rasterization of each feature only considers those cells whose *centroids* intersect with the geometry. If you want to include all cells touched by the geometry, even if there is only a small degree of overlap, you can specify the ``--all-touched`` option. This is helpful if your features are much smaller scale than your raster data (e.g. tax lot parcels on a coarse weather data raster) 184 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | ZIPFILE = rasterstats.zip 10 | 11 | # User-friendly check for sphinx-build 12 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 13 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 14 | endif 15 | 16 | # Internal variables. 17 | PAPEROPT_a4 = -D latex_paper_size=a4 18 | PAPEROPT_letter = -D latex_paper_size=letter 19 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 20 | # the i18n builder cannot share the environment and doctrees with the others 21 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 22 | 23 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext 24 | 25 | help: 26 | @echo "Please use \`make ' where is one of" 27 | @echo " html to make standalone HTML files" 28 | @echo " dirhtml to make HTML files named index.html in directories" 29 | @echo " singlehtml to make a single large HTML file" 30 | @echo " pickle to make pickle files" 31 | @echo " json to make JSON files" 32 | @echo " htmlhelp to make HTML files and a HTML help project" 33 | @echo " qthelp to make HTML files and a qthelp project" 34 | @echo " applehelp to make an Apple Help Book" 35 | @echo " devhelp to make HTML files and a Devhelp project" 36 | @echo " epub to make an epub" 37 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 38 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 39 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 40 | @echo " text to make text files" 41 | @echo " man to make manual pages" 42 | @echo " texinfo to make Texinfo files" 43 | @echo " info to make Texinfo files and run them through makeinfo" 44 | @echo " gettext to make PO message catalogs" 45 | @echo " changes to make an overview of all changed/added/deprecated items" 46 | @echo " xml to make Docutils-native XML files" 47 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 48 | @echo " linkcheck to check all external links for integrity" 49 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 50 | @echo " coverage to run coverage check of the documentation (if enabled)" 51 | 52 | clean: 53 | rm -rf $(BUILDDIR)/* 54 | 55 | html: apidocs 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | dirhtml: 61 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 62 | @echo 63 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 64 | 65 | singlehtml: 66 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 67 | @echo 68 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 69 | 70 | pickle: 71 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 72 | @echo 73 | @echo "Build finished; now you can process the pickle files." 74 | 75 | json: 76 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 77 | @echo 78 | @echo "Build finished; now you can process the JSON files." 79 | 80 | htmlhelp: 81 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 82 | @echo 83 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 84 | ".hhp project file in $(BUILDDIR)/htmlhelp." 85 | 86 | qthelp: 87 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 88 | @echo 89 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 90 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 91 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/rasterstats.qhcp" 92 | @echo "To view the help file:" 93 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/rasterstats.qhc" 94 | 95 | applehelp: 96 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 97 | @echo 98 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 99 | @echo "N.B. You won't be able to view it unless you put it in" \ 100 | "~/Library/Documentation/Help or install it in your application" \ 101 | "bundle." 102 | 103 | devhelp: 104 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 105 | @echo 106 | @echo "Build finished." 107 | @echo "To view the help file:" 108 | @echo "# mkdir -p $$HOME/.local/share/devhelp/rasterstats" 109 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/rasterstats" 110 | @echo "# devhelp" 111 | 112 | epub: 113 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 114 | @echo 115 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 116 | 117 | latex: 118 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 119 | @echo 120 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 121 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 122 | "(use \`make latexpdf' here to do that automatically)." 123 | 124 | latexpdf: 125 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 126 | @echo "Running LaTeX files through pdflatex..." 127 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 128 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 129 | 130 | latexpdfja: 131 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 132 | @echo "Running LaTeX files through platex and dvipdfmx..." 133 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 134 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 135 | 136 | text: 137 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 138 | @echo 139 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 140 | 141 | man: 142 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 143 | @echo 144 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 145 | 146 | texinfo: 147 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 148 | @echo 149 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 150 | @echo "Run \`make' in that directory to run these through makeinfo" \ 151 | "(use \`make info' here to do that automatically)." 152 | 153 | info: 154 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 155 | @echo "Running Texinfo files through makeinfo..." 156 | make -C $(BUILDDIR)/texinfo info 157 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 158 | 159 | gettext: 160 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 161 | @echo 162 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 163 | 164 | changes: 165 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 166 | @echo 167 | @echo "The overview file is in $(BUILDDIR)/changes." 168 | 169 | linkcheck: 170 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 171 | @echo 172 | @echo "Link check complete; look for any errors in the above output " \ 173 | "or in $(BUILDDIR)/linkcheck/output.txt." 174 | 175 | doctest: 176 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 177 | @echo "Testing of doctests in the sources finished, look at the " \ 178 | "results in $(BUILDDIR)/doctest/output.txt." 179 | 180 | coverage: 181 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 182 | @echo "Testing of coverage in the sources finished, look at the " \ 183 | "results in $(BUILDDIR)/coverage/python.txt." 184 | 185 | xml: 186 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 187 | @echo 188 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 189 | 190 | pseudoxml: 191 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 192 | @echo 193 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 194 | 195 | zip: html 196 | touch $(BUILDDIR)/$(ZIPFILE) 197 | rm $(BUILDDIR)/$(ZIPFILE) 198 | cd $(BUILDDIR)/html && zip -r9 ../$(ZIPFILE) * 199 | 200 | test: 201 | cd ../ && py.test --doctest-glob="*.rst" docs/*.rst 202 | 203 | .PHONY: apidocs 204 | apidocs: 205 | sphinx-apidoc -f -e -T -M -o . ../src/rasterstats ../src/rasterstats/point.py ../src/rasterstats/main.py ../src/rasterstats/cli.py 206 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # rasterstats documentation build configuration file, created by 5 | # sphinx-quickstart on Mon Aug 31 09:59:38 2015. 6 | # 7 | # This file is execfile()d with the current directory set to its 8 | # containing dir. 9 | # 10 | # Note that not all possible configuration values are present in this 11 | # autogenerated file. 12 | # 13 | # All configuration values have a default; values that are commented out 14 | # serve to show the default. 15 | 16 | import sys 17 | import os 18 | import shlex 19 | import re 20 | 21 | # If extensions (or modules to document with autodoc) are in another directory, 22 | # add these directories to sys.path here. If the directory is relative to the 23 | # documentation root, use os.path.abspath to make it absolute, like shown here. 24 | #sys.path.insert(0, os.path.abspath('.')) 25 | 26 | # -- General configuration ------------------------------------------------ 27 | 28 | # If your documentation needs a minimal Sphinx version, state it here. 29 | #needs_sphinx = '1.0' 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | 'sphinx.ext.autodoc', 36 | 'sphinx.ext.doctest', 37 | 'sphinx.ext.autosummary', 38 | 'sphinx.ext.intersphinx', 39 | 'sphinx.ext.todo', 40 | 'sphinx.ext.coverage', 41 | 'sphinx.ext.mathjax', 42 | 'sphinx.ext.ifconfig', 43 | 'sphinx.ext.viewcode', 44 | 'numpydoc', 45 | ] 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | # The suffix(es) of source filenames. 51 | # You can specify multiple suffix as a list of string: 52 | # source_suffix = ['.rst', '.md'] 53 | source_suffix = '.rst' 54 | 55 | # The encoding of source files. 56 | #source_encoding = 'utf-8-sig' 57 | 58 | # The master toctree document. 59 | master_doc = 'index' 60 | 61 | # General information about the project. 62 | project = 'rasterstats' 63 | copyright = '2015, Matthew T. Perry' 64 | author = 'Matthew T. Perry' 65 | 66 | # The version info for the project you're documenting, acts as replacement for 67 | # |version| and |release|, also used in various other places throughout the 68 | # built documents. 69 | # 70 | # The short X.Y version. 71 | def get_version(): 72 | vfile = os.path.join( 73 | os.path.dirname(__file__), "..", "src", "rasterstats", "_version.py") 74 | with open(vfile, "r") as vfh: 75 | vline = vfh.read() 76 | vregex = r"^__version__ = ['\"]([^'\"]*)['\"]" 77 | match = re.search(vregex, vline, re.M) 78 | if match: 79 | return match.group(1) 80 | else: 81 | raise RuntimeError("Unable to find version string in {}.".format(vfile)) 82 | 83 | version = '.'.join(get_version().split(".")[0:2]) 84 | # The full version, including alpha/beta/rc tags. 85 | release = get_version() 86 | 87 | # The language for content autogenerated by Sphinx. Refer to documentation 88 | # for a list of supported languages. 89 | # 90 | # This is also used if you do content translation via gettext catalogs. 91 | # Usually you set "language" from the command line for these cases. 92 | language = None 93 | 94 | # There are two options for replacing |today|: either, you set today to some 95 | # non-false value, then it is used: 96 | #today = '' 97 | # Else, today_fmt is used as the format for a strftime call. 98 | #today_fmt = '%B %d, %Y' 99 | 100 | # List of patterns, relative to source directory, that match files and 101 | # directories to ignore when looking for source files. 102 | exclude_patterns = ['_build'] 103 | 104 | # The reST default role (used for this markup: `text`) to use for all 105 | # documents. 106 | #default_role = None 107 | 108 | # If true, '()' will be appended to :func: etc. cross-reference text. 109 | #add_function_parentheses = True 110 | 111 | # If true, the current module name will be prepended to all description 112 | # unit titles (such as .. function::). 113 | #add_module_names = True 114 | 115 | # If true, sectionauthor and moduleauthor directives will be shown in the 116 | # output. They are ignored by default. 117 | #show_authors = False 118 | 119 | # The name of the Pygments (syntax highlighting) style to use. 120 | pygments_style = 'sphinx' 121 | 122 | # A list of ignored prefixes for module index sorting. 123 | #modindex_common_prefix = [] 124 | 125 | # If true, keep warnings as "system message" paragraphs in the built documents. 126 | #keep_warnings = False 127 | 128 | # If true, `todo` and `todoList` produce output, else they produce nothing. 129 | todo_include_todos = False 130 | 131 | 132 | # -- Options for HTML output ---------------------------------------------- 133 | 134 | # The theme to use for HTML and HTML Help pages. See the documentation for 135 | # a list of builtin themes. 136 | html_theme = 'alabaster' 137 | 138 | # Theme options are theme-specific and customize the look and feel of a theme 139 | # further. For a list of options available for each theme, see the 140 | # documentation. 141 | #html_theme_options = {} 142 | 143 | # Add any paths that contain custom themes here, relative to this directory. 144 | #html_theme_path = [] 145 | 146 | # The name for this set of Sphinx documents. If None, it defaults to 147 | # " v documentation". 148 | #html_title = None 149 | 150 | # A shorter title for the navigation bar. Default is the same as html_title. 151 | #html_short_title = None 152 | 153 | # The name of an image file (relative to this directory) to place at the top 154 | # of the sidebar. 155 | #html_logo = None 156 | 157 | # The name of an image file (within the static path) to use as favicon of the 158 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 159 | # pixels large. 160 | #html_favicon = None 161 | 162 | # Add any paths that contain custom static files (such as style sheets) here, 163 | # relative to this directory. They are copied after the builtin static files, 164 | # so a file named "default.css" will overwrite the builtin "default.css". 165 | html_static_path = ['_static'] 166 | 167 | # Add any extra paths that contain custom files (such as robots.txt or 168 | # .htaccess) here, relative to this directory. These files are copied 169 | # directly to the root of the documentation. 170 | #html_extra_path = [] 171 | 172 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 173 | # using the given strftime format. 174 | #html_last_updated_fmt = '%b %d, %Y' 175 | 176 | # If true, SmartyPants will be used to convert quotes and dashes to 177 | # typographically correct entities. 178 | #html_use_smartypants = True 179 | 180 | # Custom sidebar templates, maps document names to template names. 181 | #html_sidebars = {} 182 | 183 | # Additional templates that should be rendered to pages, maps page names to 184 | # template names. 185 | #html_additional_pages = {} 186 | 187 | # If false, no module index is generated. 188 | #html_domain_indices = True 189 | 190 | # If false, no index is generated. 191 | #html_use_index = True 192 | 193 | # If true, the index is split into individual pages for each letter. 194 | #html_split_index = False 195 | 196 | # If true, links to the reST sources are added to the pages. 197 | #html_show_sourcelink = True 198 | 199 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 200 | #html_show_sphinx = True 201 | 202 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 203 | #html_show_copyright = True 204 | 205 | # If true, an OpenSearch description file will be output, and all pages will 206 | # contain a tag referring to it. The value of this option must be the 207 | # base URL from which the finished HTML is served. 208 | #html_use_opensearch = '' 209 | 210 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 211 | #html_file_suffix = None 212 | 213 | # Language to be used for generating the HTML full-text search index. 214 | # Sphinx supports the following languages: 215 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 216 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 217 | #html_search_language = 'en' 218 | 219 | # A dictionary with options for the search language support, empty by default. 220 | # Now only 'ja' uses this config value 221 | #html_search_options = {'type': 'default'} 222 | 223 | # The name of a javascript file (relative to the configuration directory) that 224 | # implements a search results scorer. If empty, the default will be used. 225 | #html_search_scorer = 'scorer.js' 226 | 227 | # Output file base name for HTML help builder. 228 | htmlhelp_basename = 'rasterstatsdoc' 229 | 230 | # -- Options for LaTeX output --------------------------------------------- 231 | 232 | latex_elements = { 233 | # The paper size ('letterpaper' or 'a4paper'). 234 | #'papersize': 'letterpaper', 235 | 236 | # The font size ('10pt', '11pt' or '12pt'). 237 | #'pointsize': '10pt', 238 | 239 | # Additional stuff for the LaTeX preamble. 240 | #'preamble': '', 241 | 242 | # Latex figure (float) alignment 243 | #'figure_align': 'htbp', 244 | } 245 | 246 | # Grouping the document tree into LaTeX files. List of tuples 247 | # (source start file, target name, title, 248 | # author, documentclass [howto, manual, or own class]). 249 | latex_documents = [ 250 | (master_doc, 'rasterstats.tex', 'rasterstats Documentation', 251 | 'Matthew T. Perry', 'manual'), 252 | ] 253 | 254 | # The name of an image file (relative to this directory) to place at the top of 255 | # the title page. 256 | #latex_logo = None 257 | 258 | # For "manual" documents, if this is true, then toplevel headings are parts, 259 | # not chapters. 260 | #latex_use_parts = False 261 | 262 | # If true, show page references after internal links. 263 | #latex_show_pagerefs = False 264 | 265 | # If true, show URL addresses after external links. 266 | #latex_show_urls = False 267 | 268 | # Documents to append as an appendix to all manuals. 269 | #latex_appendices = [] 270 | 271 | # If false, no module index is generated. 272 | #latex_domain_indices = True 273 | 274 | 275 | # -- Options for manual page output --------------------------------------- 276 | 277 | # One entry per manual page. List of tuples 278 | # (source start file, name, description, authors, manual section). 279 | man_pages = [ 280 | (master_doc, 'rasterstats', 'rasterstats Documentation', 281 | [author], 1) 282 | ] 283 | 284 | # If true, show URL addresses after external links. 285 | #man_show_urls = False 286 | 287 | 288 | # -- Options for Texinfo output ------------------------------------------- 289 | 290 | # Grouping the document tree into Texinfo files. List of tuples 291 | # (source start file, target name, title, author, 292 | # dir menu entry, description, category) 293 | texinfo_documents = [ 294 | (master_doc, 'rasterstats', 'rasterstats Documentation', 295 | author, 'rasterstats', 'One line description of project.', 296 | 'Miscellaneous'), 297 | ] 298 | 299 | # Documents to append as an appendix to all manuals. 300 | #texinfo_appendices = [] 301 | 302 | # If false, no module index is generated. 303 | #texinfo_domain_indices = True 304 | 305 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 306 | #texinfo_show_urls = 'footnote' 307 | 308 | # If true, do not generate a @detailmenu in the "Top" node's menu. 309 | #texinfo_no_detailmenu = False 310 | 311 | html_theme = 'sphinx_rtd_theme' 312 | -------------------------------------------------------------------------------- /docs/notebooks/Precipitation and Vegetation by State.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "" 4 | }, 5 | "nbformat": 3, 6 | "nbformat_minor": 0, 7 | "worksheets": [ 8 | { 9 | "cells": [ 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "A common task in many of my data workflows involves summarizing geospatial raster datasets based on vector geometries (i.e. zonal statistics). Despite many alternatives (starspan, the QGIS Zonal Statistics plugin, ArcPy and R) there were none that were\n", 15 | "\n", 16 | "* open source\n", 17 | "* fast enough\n", 18 | "* flexible enough\n", 19 | "* worked with python data structures\n", 20 | "\n", 21 | "We'd written a wrapper around starspan for madrona (see [`madrona.raster_stats`](https://github.com/Ecotrust/madrona/blob/master/docs/raster_stats.rst)) but relying on shell calls and an aging, unmaintained C++ code base was not cutting it. \n", 22 | "\n", 23 | "So I set out to create a solution using numpy, GDAL and python. The `rasterstats` package was born..." 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": {}, 29 | "source": [ 30 | "Let's jump into an example. I've got a polygon shapefile of continental US *state boundaries* and a raster dataset of *annual precipitation* from the [North American Environmental Atlas](http://www.cec.org/Page.asp?PageID=924&ContentID=2336). \n", 31 | "\n" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "collapsed": false, 37 | "input": [ 38 | "states = \"/data/workspace/rasterstats_blog/boundaries_contus.shp\"\n", 39 | "precip = \"/data/workspace/rasterstats_blog/NA_Annual_Precipitation_GRID/NA_Annual_Precipitation/data/na_anprecip/hdr.adf\"" 40 | ], 41 | "language": "python", 42 | "metadata": {}, 43 | "outputs": [], 44 | "prompt_number": "*" 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "The `raster_stats` function is the main entry point. Provide a vector and a raster as input and expect a list of dicts, one for each input feature. " 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "collapsed": false, 56 | "input": [ 57 | "from rasterstats import raster_stats\n", 58 | "rain_stats = raster_stats(states, precip, stats=\"*\", copy_properties=True)\n", 59 | "len(rain_stats) # continental US; 48 states plus District of Columbia" 60 | ], 61 | "language": "python", 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "metadata": {}, 66 | "output_type": "pyout", 67 | "prompt_number": 5, 68 | "text": [ 69 | "49" 70 | ] 71 | } 72 | ], 73 | "prompt_number": "*" 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "metadata": {}, 78 | "source": [ 79 | "Print out the stats for a given state:" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "collapsed": true, 85 | "input": [ 86 | "[x for x in rain_stats if x['NAME'] == \"Oregon\"][0]" 87 | ], 88 | "language": "python", 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "metadata": {}, 93 | "output_type": "pyout", 94 | "prompt_number": 6, 95 | "text": [ 96 | "{'COUNTRY': 'USA',\n", 97 | " 'EDIT': 'NEW',\n", 98 | " 'EDIT_DATE': '20060803',\n", 99 | " 'NAME': 'Oregon',\n", 100 | " 'STATEABB': 'US-OR',\n", 101 | " 'Shape_Area': 250563567264.0,\n", 102 | " 'Shape_Leng': 2366783.00361,\n", 103 | " 'UIDENT': 124704,\n", 104 | " '__fid__': 35,\n", 105 | " 'count': 250510,\n", 106 | " 'majority': 263,\n", 107 | " 'max': 3193.0,\n", 108 | " 'mean': 779.2223903237395,\n", 109 | " 'median': 461.0,\n", 110 | " 'min': 205.0,\n", 111 | " 'minority': 3193,\n", 112 | " 'range': 2988.0,\n", 113 | " 'std': 631.539502512283,\n", 114 | " 'sum': 195203001.0,\n", 115 | " 'unique': 2865}" 116 | ] 117 | } 118 | ], 119 | "prompt_number": "*" 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Find the three driest states:" 126 | ] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "collapsed": false, 131 | "input": [ 132 | "[(x['NAME'], x['mean']) for x in \n", 133 | " sorted(rain_stats, key=lambda k: k['mean'])[:3]]" 134 | ], 135 | "language": "python", 136 | "metadata": {}, 137 | "outputs": [ 138 | { 139 | "metadata": {}, 140 | "output_type": "pyout", 141 | "prompt_number": 7, 142 | "text": [ 143 | "[('Nevada', 248.23814034118908),\n", 144 | " ('Utah', 317.668743027571),\n", 145 | " ('Arizona', 320.6157232064074)]" 146 | ] 147 | } 148 | ], 149 | "prompt_number": "*" 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "And write the data out to a csv." 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "collapsed": false, 161 | "input": [ 162 | "from rasterstats import stats_to_csv\n", 163 | "with open('out.csv', 'w') as fh:\n", 164 | " fh.write(stats_to_csv(rain_stats))\n" 165 | ], 166 | "language": "python", 167 | "metadata": {}, 168 | "outputs": [], 169 | "prompt_number": "*" 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "We're not limited to descriptive statistics for *continuous* rasters either; we can get unique pixel counts for *categorical* rasters as well. In this example, we've got a raster of 2005 land cover (i.e. general vegetation type). Note that we can specify only the stats that make sense and the `categorical=True` provides a count of each pixel value." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "collapsed": false, 181 | "input": [ 182 | "landcover = \"/data/workspace/rasterstats_blog/NA_LandCover_2005.img\"\n", 183 | "\n", 184 | "veg_stats = raster_stats(states, landcover, \n", 185 | " stats=\"count majority minority unique\",\n", 186 | " copy_properties=True,\n", 187 | " nodata_value=0,\n", 188 | " categorical=True)\n", 189 | "\n", 190 | "[x for x in veg_stats if x['NAME'] == \"Oregon\"][0]" 191 | ], 192 | "language": "python", 193 | "metadata": {}, 194 | "outputs": [ 195 | { 196 | "metadata": {}, 197 | "output_type": "pyout", 198 | "prompt_number": 9, 199 | "text": [ 200 | "{1: 999956,\n", 201 | " 3: 6,\n", 202 | " 5: 3005,\n", 203 | " 6: 198535,\n", 204 | " 8: 2270805,\n", 205 | " 10: 126199,\n", 206 | " 14: 20883,\n", 207 | " 15: 301884,\n", 208 | " 16: 17452,\n", 209 | " 17: 39246,\n", 210 | " 18: 28872,\n", 211 | " 19: 2174,\n", 212 | " 'COUNTRY': 'USA',\n", 213 | " 'EDIT': 'NEW',\n", 214 | " 'EDIT_DATE': '20060803',\n", 215 | " 'NAME': 'Oregon',\n", 216 | " 'STATEABB': 'US-OR',\n", 217 | " 'Shape_Area': 250563567264.0,\n", 218 | " 'Shape_Leng': 2366783.00361,\n", 219 | " 'UIDENT': 124704,\n", 220 | " '__fid__': 35,\n", 221 | " 'count': 4009017,\n", 222 | " 'majority': 8,\n", 223 | " 'minority': 3,\n", 224 | " 'unique': 12}" 225 | ] 226 | }, 227 | { 228 | "metadata": {}, 229 | "output_type": "pyout", 230 | "prompt_number": 10, 231 | "text": [ 232 | "{1: 999956,\n", 233 | " 3: 6,\n", 234 | " 5: 3005,\n", 235 | " 6: 198535,\n", 236 | " 8: 2270805,\n", 237 | " 10: 126199,\n", 238 | " 14: 20883,\n", 239 | " 15: 301884,\n", 240 | " 16: 17452,\n", 241 | " 17: 39246,\n", 242 | " 18: 28872,\n", 243 | " 19: 2174,\n", 244 | " 'COUNTRY': 'USA',\n", 245 | " 'EDIT': 'NEW',\n", 246 | " 'EDIT_DATE': '20060803',\n", 247 | " 'NAME': 'Oregon',\n", 248 | " 'STATEABB': 'US-OR',\n", 249 | " 'Shape_Area': 250563567264.0,\n", 250 | " 'Shape_Leng': 2366783.00361,\n", 251 | " 'UIDENT': 124704,\n", 252 | " '__fid__': 35,\n", 253 | " 'count': 4009017,\n", 254 | " 'majority': 8,\n", 255 | " 'minority': 3,\n", 256 | " 'unique': 12}" 257 | ] 258 | } 259 | ], 260 | "prompt_number": "*" 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "Of course the pixel values alone don't make much sense. We need to interpret the pixel values as land cover classes:\n", 267 | "\n", 268 | "```\n", 269 | "Value, Class_name\n", 270 | "1\tTemperate or sub-polar needleleaf forest\n", 271 | "2\tSub-polar taiga needleleaf forest\n", 272 | "3\tTropical or sub-tropical broadleaf evergreen\n", 273 | "4\tTropical or sub-tropical broadleaf deciduous\n", 274 | "5\tTemperate or sub-polar broadleaf deciduous\n", 275 | "6\t Mixed Forest\n", 276 | "7\tTropical or sub-tropical shrubland\n", 277 | "8\tTemperate or sub-polar shrubland\n", 278 | "9\tTropical or sub-tropical grassland\n", 279 | "10\tTemperate or sub-polar grassland\n", 280 | "11\tSub-polar or polar shrubland-lichen-moss\n", 281 | "12\tSub-polar or polar grassland-lichen-moss\n", 282 | "13\tSub-polar or polar barren-lichen-moss\n", 283 | "14\tWetland\n", 284 | "15\tCropland\n", 285 | "16\tBarren Lands\n", 286 | "17\tUrban and Built-up\n", 287 | "18\tWater\n", 288 | "19\tSnow and Ice\n", 289 | "```\n", 290 | "So, for our Oregon example above we can see that, despite Oregon's reputation as a lush green landscape, the majority land cover class (#8) is \"Temperate or sub-polar shrubland\" at 2.27m pixels out of 4 millions total. " 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "collapsed": false, 296 | "input": [ 297 | "geom = {'coordinates': [[\n", 298 | " [-594335.108537269, -570957.932799394],\n", 299 | " [-422374.54395311, -593387.5716581973],\n", 300 | " [-444804.1828119133, -765348.1362423564],\n", 301 | " [-631717.839968608, -735441.9510972851],\n", 302 | " [-594335.108537269, -570957.932799394]]],\n", 303 | " 'type': 'Polygon'}\n", 304 | "\n", 305 | "raster_stats(geom, precip, stats=\"min median max\")\n" 306 | ], 307 | "language": "python", 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "metadata": {}, 312 | "output_type": "pyout", 313 | "prompt_number": 14, 314 | "text": [ 315 | "[{'__fid__': 0, 'max': 1011.0, 'median': 451.0, 'min': 229.0}]" 316 | ] 317 | } 318 | ], 319 | "prompt_number": "*" 320 | }, 321 | { 322 | "cell_type": "code", 323 | "collapsed": false, 324 | "input": [], 325 | "language": "python", 326 | "metadata": {}, 327 | "outputs": [] 328 | } 329 | ], 330 | "metadata": {} 331 | } 332 | ] 333 | } -------------------------------------------------------------------------------- /src/rasterstats/io.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | import json 5 | import math 6 | import fiona 7 | from fiona.errors import DriverError 8 | import rasterio 9 | import warnings 10 | from rasterio.transform import guard_transform 11 | from rasterio.enums import MaskFlags 12 | from affine import Affine 13 | import numpy as np 14 | from shapely import wkt, wkb 15 | 16 | try: 17 | from shapely.errors import ReadingError 18 | except ImportError: # pragma: no cover 19 | from shapely.geos import ReadingError 20 | 21 | try: 22 | from json.decoder import JSONDecodeError 23 | except ImportError: # pragma: no cover 24 | JSONDecodeError = ValueError 25 | 26 | try: 27 | from collections.abc import Iterable, Mapping 28 | except ImportError: # pragma: no cover 29 | from collections import Iterable, Mapping 30 | 31 | 32 | geom_types = ["Point", "LineString", "Polygon", 33 | "MultiPoint", "MultiLineString", "MultiPolygon"] 34 | 35 | 36 | def wrap_geom(geom): 37 | """ Wraps a geometry dict in an GeoJSON Feature 38 | """ 39 | return {'type': 'Feature', 40 | 'properties': {}, 41 | 'geometry': geom} 42 | 43 | 44 | def parse_feature(obj): 45 | """ Given a python object 46 | attemp to a GeoJSON-like Feature from it 47 | """ 48 | 49 | # object implementing geo_interface 50 | if hasattr(obj, '__geo_interface__'): 51 | gi = obj.__geo_interface__ 52 | if gi['type'] in geom_types: 53 | return wrap_geom(gi) 54 | elif gi['type'] == 'Feature': 55 | return gi 56 | 57 | # wkt 58 | try: 59 | shape = wkt.loads(obj) 60 | return wrap_geom(shape.__geo_interface__) 61 | except (ReadingError, TypeError, AttributeError): 62 | pass 63 | 64 | # wkb 65 | try: 66 | shape = wkb.loads(obj) 67 | return wrap_geom(shape.__geo_interface__) 68 | except (ReadingError, TypeError): 69 | pass 70 | 71 | # geojson-like python mapping 72 | try: 73 | if obj['type'] in geom_types: 74 | return wrap_geom(obj) 75 | elif obj['type'] == 'Feature': 76 | return obj 77 | except (AssertionError, TypeError): 78 | pass 79 | 80 | raise ValueError("Can't parse %s as a geojson Feature object" % obj) 81 | 82 | 83 | def read_features(obj, layer=0): 84 | features_iter = None 85 | if isinstance(obj, str): 86 | try: 87 | # test it as fiona data source 88 | with fiona.open(obj, 'r', layer=layer) as src: 89 | assert len(src) > 0 90 | 91 | def fiona_generator(obj): 92 | with fiona.open(obj, 'r', layer=layer) as src: 93 | for feature in src: 94 | yield feature 95 | 96 | features_iter = fiona_generator(obj) 97 | except (AssertionError, TypeError, IOError, OSError, DriverError, UnicodeDecodeError): 98 | try: 99 | mapping = json.loads(obj) 100 | if 'type' in mapping and mapping['type'] == 'FeatureCollection': 101 | features_iter = mapping['features'] 102 | elif mapping['type'] in geom_types + ['Feature']: 103 | features_iter = [parse_feature(mapping)] 104 | except (ValueError, JSONDecodeError): 105 | # Single feature-like string 106 | features_iter = [parse_feature(obj)] 107 | elif isinstance(obj, Mapping): 108 | if 'type' in obj and obj['type'] == 'FeatureCollection': 109 | features_iter = obj['features'] 110 | else: 111 | features_iter = [parse_feature(obj)] 112 | elif isinstance(obj, bytes): 113 | # Single binary object, probably a wkb 114 | features_iter = [parse_feature(obj)] 115 | elif hasattr(obj, '__geo_interface__'): 116 | mapping = obj.__geo_interface__ 117 | if mapping['type'] == 'FeatureCollection': 118 | features_iter = mapping['features'] 119 | else: 120 | features_iter = [parse_feature(mapping)] 121 | elif isinstance(obj, Iterable): 122 | # Iterable of feature-like objects 123 | features_iter = (parse_feature(x) for x in obj) 124 | 125 | if not features_iter: 126 | raise ValueError("Object is not a recognized source of Features") 127 | return features_iter 128 | 129 | 130 | def read_featurecollection(obj, layer=0): 131 | features = read_features(obj, layer=layer) 132 | fc = {'type': 'FeatureCollection', 'features': []} 133 | fc['features'] = [f for f in features] 134 | return fc 135 | 136 | 137 | def rowcol(x, y, affine, op=math.floor): 138 | """ Get row/col for a x/y 139 | """ 140 | r = int(op((y - affine.f) / affine.e)) 141 | c = int(op((x - affine.c) / affine.a)) 142 | return r, c 143 | 144 | 145 | def bounds_window(bounds, affine): 146 | """Create a full cover rasterio-style window 147 | """ 148 | w, s, e, n = bounds 149 | row_start, col_start = rowcol(w, n, affine) 150 | row_stop, col_stop = rowcol(e, s, affine, op=math.ceil) 151 | return (row_start, row_stop), (col_start, col_stop) 152 | 153 | 154 | def window_bounds(window, affine): 155 | (row_start, row_stop), (col_start, col_stop) = window 156 | w, s = affine * (col_start, row_stop) 157 | e, n = affine * (col_stop, row_start) 158 | return w, s, e, n 159 | 160 | 161 | def beyond_extent(window, shape): 162 | """Checks if window references pixels beyond the raster extent""" 163 | (wr_start, wr_stop), (wc_start, wc_stop) = window 164 | return wr_start < 0 or wc_start < 0 or wr_stop > shape[0] or wc_stop > shape[1] 165 | 166 | 167 | def boundless_array(arr, window, nodata, masked=False): 168 | dim3 = False 169 | if len(arr.shape) == 3: 170 | dim3 = True 171 | elif len(arr.shape) != 2: 172 | raise ValueError("Must be a 2D or 3D array") 173 | 174 | # unpack for readability 175 | (wr_start, wr_stop), (wc_start, wc_stop) = window 176 | 177 | # Calculate overlap 178 | olr_start = max(min(window[0][0], arr.shape[-2:][0]), 0) 179 | olr_stop = max(min(window[0][1], arr.shape[-2:][0]), 0) 180 | olc_start = max(min(window[1][0], arr.shape[-2:][1]), 0) 181 | olc_stop = max(min(window[1][1], arr.shape[-2:][1]), 0) 182 | 183 | # Calc dimensions 184 | overlap_shape = (olr_stop - olr_start, olc_stop - olc_start) 185 | if dim3: 186 | window_shape = (arr.shape[0], wr_stop - wr_start, wc_stop - wc_start) 187 | else: 188 | window_shape = (wr_stop - wr_start, wc_stop - wc_start) 189 | 190 | # create an array of nodata values 191 | out = np.empty(shape=window_shape, dtype=arr.dtype) 192 | out[:] = nodata 193 | 194 | # Fill with data where overlapping 195 | nr_start = olr_start - wr_start 196 | nr_stop = nr_start + overlap_shape[0] 197 | nc_start = olc_start - wc_start 198 | nc_stop = nc_start + overlap_shape[1] 199 | if dim3: 200 | out[:, nr_start:nr_stop, nc_start:nc_stop] = \ 201 | arr[:, olr_start:olr_stop, olc_start:olc_stop] 202 | else: 203 | out[nr_start:nr_stop, nc_start:nc_stop] = \ 204 | arr[olr_start:olr_stop, olc_start:olc_stop] 205 | 206 | if masked: 207 | out = np.ma.MaskedArray(out, mask=(out == nodata)) 208 | 209 | return out 210 | 211 | 212 | class Raster(object): 213 | """ Raster abstraction for data access to 2/3D array-like things 214 | 215 | Use as a context manager to ensure dataset gets closed properly:: 216 | 217 | >>> with Raster(path) as rast: 218 | ... 219 | 220 | Parameters 221 | ---------- 222 | raster: 2/3D array-like data source, required 223 | Currently supports paths to rasterio-supported rasters and 224 | numpy arrays with Affine transforms. 225 | 226 | affine: Affine object 227 | Maps row/col to coordinate reference system 228 | required if raster is ndarray 229 | 230 | nodata: nodata value, optional 231 | Overrides the datasource's internal nodata if specified 232 | 233 | band: integer 234 | raster band number, optional (default: 1) 235 | 236 | Methods 237 | ------- 238 | index 239 | read 240 | """ 241 | 242 | def __init__(self, raster, affine=None, nodata=None, band=1): 243 | self.array = None 244 | self.src = None 245 | 246 | if isinstance(raster, np.ndarray): 247 | if affine is None: 248 | raise ValueError("Specify affine transform for numpy arrays") 249 | self.array = raster 250 | self.affine = affine 251 | self.shape = raster.shape 252 | self.nodata = nodata 253 | else: 254 | self.src = rasterio.open(raster, 'r') 255 | self.affine = guard_transform(self.src.transform) 256 | self.shape = (self.src.height, self.src.width) 257 | self.band = band 258 | 259 | if nodata is not None: 260 | # override with specified nodata 261 | self.nodata = float(nodata) 262 | else: 263 | self.nodata = self.src.nodata 264 | 265 | def index(self, x, y): 266 | """ Given (x, y) in crs, return the (row, column) on the raster 267 | """ 268 | col, row = [math.floor(a) for a in (~self.affine * (x, y))] 269 | return row, col 270 | 271 | def read(self, bounds=None, window=None, masked=False, boundless=True): 272 | """ Performs a read against the underlying array source 273 | 274 | Parameters 275 | ---------- 276 | bounds: bounding box 277 | in w, s, e, n order, iterable, optional 278 | window: rasterio-style window, optional 279 | bounds OR window are required, 280 | specifying both or neither will raise exception 281 | masked: boolean 282 | return a masked numpy array, default: False 283 | bounds OR window are required, specifying both or neither will raise exception 284 | boundless: boolean 285 | allow window/bounds that extend beyond the dataset’s extent, default: True 286 | partially or completely filled arrays will be returned as appropriate. 287 | 288 | Returns 289 | ------- 290 | Raster object with update affine and array info 291 | """ 292 | # Calculate the window 293 | if bounds and window: 294 | raise ValueError("Specify either bounds or window") 295 | 296 | if bounds: 297 | win = bounds_window(bounds, self.affine) 298 | elif window: 299 | win = window 300 | else: 301 | raise ValueError("Specify either bounds or window") 302 | 303 | if not boundless and beyond_extent(win, self.shape): 304 | raise ValueError("Window/bounds is outside dataset extent and boundless reads are disabled") 305 | 306 | c, _, _, f = window_bounds(win, self.affine) # c ~ west, f ~ north 307 | a, b, _, d, e, _, _, _, _ = tuple(self.affine) 308 | new_affine = Affine(a, b, c, d, e, f) 309 | 310 | nodata = self.nodata 311 | if nodata is None: 312 | nodata = -999 313 | warnings.warn("Setting nodata to -999; specify nodata explicitly") 314 | 315 | if self.array is not None: 316 | # It's an ndarray already 317 | new_array = boundless_array( 318 | self.array, window=win, nodata=nodata, masked=masked) 319 | elif self.src: 320 | # It's an open rasterio dataset 321 | if all(MaskFlags.per_dataset in flags for flags in self.src.mask_flag_enums): 322 | if not masked: 323 | masked = True 324 | warnings.warn("Setting masked to True because dataset mask has been detected") 325 | 326 | new_array = self.src.read( 327 | self.band, window=win, boundless=boundless, masked=masked) 328 | 329 | return Raster(new_array, new_affine, nodata) 330 | 331 | def __enter__(self): 332 | return self 333 | 334 | def __exit__(self, *args): 335 | if self.src is not None: 336 | # close the rasterio reader 337 | self.src.close() 338 | -------------------------------------------------------------------------------- /src/rasterstats/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | 5 | from affine import Affine 6 | from shapely.geometry import shape 7 | import numpy as np 8 | import platform 9 | import warnings 10 | 11 | from .io import read_features, Raster 12 | from .utils import (rasterize_geom, get_percentile, check_stats, 13 | remap_categories, key_assoc_val, boxify_points) 14 | 15 | 16 | def raster_stats(*args, **kwargs): 17 | """Deprecated. Use zonal_stats instead.""" 18 | warnings.warn("'raster_stats' is an alias to 'zonal_stats'" 19 | " and will disappear in 1.0", DeprecationWarning) 20 | return zonal_stats(*args, **kwargs) 21 | 22 | 23 | def zonal_stats(*args, **kwargs): 24 | """The primary zonal statistics entry point. 25 | 26 | All arguments are passed directly to ``gen_zonal_stats``. 27 | See its docstring for details. 28 | 29 | The only difference is that ``zonal_stats`` will 30 | return a list rather than a generator.""" 31 | return list(gen_zonal_stats(*args, **kwargs)) 32 | 33 | 34 | def gen_zonal_stats( 35 | vectors, raster, 36 | layer=0, 37 | band=1, 38 | nodata=None, 39 | affine=None, 40 | stats=None, 41 | all_touched=False, 42 | categorical=False, 43 | category_map=None, 44 | add_stats=None, 45 | zone_func=None, 46 | raster_out=False, 47 | prefix=None, 48 | geojson_out=False, 49 | boundless=True, **kwargs): 50 | """Zonal statistics of raster values aggregated to vector geometries. 51 | 52 | Parameters 53 | ---------- 54 | vectors: path to an vector source or geo-like python objects 55 | 56 | raster: ndarray or path to a GDAL raster source 57 | If ndarray is passed, the ``affine`` kwarg is required. 58 | 59 | layer: int or string, optional 60 | If `vectors` is a path to a fiona source, 61 | specify the vector layer to use either by name or number. 62 | defaults to 0 63 | 64 | band: int, optional 65 | If `raster` is a GDAL source, the band number to use (counting from 1). 66 | defaults to 1. 67 | 68 | nodata: float, optional 69 | If `raster` is a GDAL source, this value overrides any NODATA value 70 | specified in the file's metadata. 71 | If `None`, the file's metadata's NODATA value (if any) will be used. 72 | defaults to `None`. 73 | 74 | affine: Affine instance 75 | required only for ndarrays, otherwise it is read from src 76 | 77 | stats: list of str, or space-delimited str, optional 78 | Which statistics to calculate for each zone. 79 | All possible choices are listed in ``utils.VALID_STATS``. 80 | defaults to ``DEFAULT_STATS``, a subset of these. 81 | 82 | all_touched: bool, optional 83 | Whether to include every raster cell touched by a geometry, or only 84 | those having a center point within the polygon. 85 | defaults to `False` 86 | 87 | categorical: bool, optional 88 | 89 | category_map: dict 90 | A dictionary mapping raster values to human-readable categorical names. 91 | Only applies when categorical is True 92 | 93 | add_stats: dict 94 | with names and functions of additional stats to compute, optional 95 | 96 | zone_func: callable 97 | function to apply to zone ndarray prior to computing stats 98 | 99 | raster_out: boolean 100 | Include the masked numpy array for each feature?, optional 101 | 102 | Each feature dictionary will have the following additional keys: 103 | mini_raster_array: The clipped and masked numpy array 104 | mini_raster_affine: Affine transformation 105 | mini_raster_nodata: NoData Value 106 | 107 | prefix: string 108 | add a prefix to the keys (default: None) 109 | 110 | geojson_out: boolean 111 | Return list of GeoJSON-like features (default: False) 112 | Original feature geometry and properties will be retained 113 | with zonal stats appended as additional properties. 114 | Use with `prefix` to ensure unique and meaningful property names. 115 | 116 | boundless: boolean 117 | Allow features that extend beyond the raster dataset’s extent, default: True 118 | Cells outside dataset extents are treated as nodata. 119 | 120 | Returns 121 | ------- 122 | generator of dicts (if geojson_out is False) 123 | Each item corresponds to a single vector feature and 124 | contains keys for each of the specified stats. 125 | 126 | generator of geojson features (if geojson_out is True) 127 | GeoJSON-like Feature as python dict 128 | """ 129 | stats, run_count = check_stats(stats, categorical) 130 | 131 | # Handle 1.0 deprecations 132 | transform = kwargs.get('transform') 133 | if transform: 134 | warnings.warn("GDAL-style transforms will disappear in 1.0. " 135 | "Use affine=Affine.from_gdal(*transform) instead", 136 | DeprecationWarning) 137 | if not affine: 138 | affine = Affine.from_gdal(*transform) 139 | 140 | cp = kwargs.get('copy_properties') 141 | if cp: 142 | warnings.warn("Use `geojson_out` to preserve feature properties", 143 | DeprecationWarning) 144 | 145 | band_num = kwargs.get('band_num') 146 | if band_num: 147 | warnings.warn("Use `band` to specify band number", DeprecationWarning) 148 | band = band_num 149 | 150 | with Raster(raster, affine, nodata, band) as rast: 151 | features_iter = read_features(vectors, layer) 152 | for _, feat in enumerate(features_iter): 153 | geom = shape(feat['geometry']) 154 | 155 | if 'Point' in geom.type: 156 | geom = boxify_points(geom, rast) 157 | 158 | geom_bounds = tuple(geom.bounds) 159 | 160 | fsrc = rast.read(bounds=geom_bounds, boundless=boundless) 161 | 162 | # rasterized geometry 163 | rv_array = rasterize_geom(geom, like=fsrc, all_touched=all_touched) 164 | 165 | # nodata mask 166 | isnodata = (fsrc.array == fsrc.nodata) 167 | 168 | # add nan mask (if necessary) 169 | has_nan = ( 170 | np.issubdtype(fsrc.array.dtype, np.floating) 171 | and np.isnan(fsrc.array.min())) 172 | if has_nan: 173 | isnodata = (isnodata | np.isnan(fsrc.array)) 174 | 175 | # Mask the source data array 176 | # mask everything that is not a valid value or not within our geom 177 | masked = np.ma.MaskedArray( 178 | fsrc.array, 179 | mask=(isnodata | ~rv_array)) 180 | 181 | # If we're on 64 bit platform and the array is an integer type 182 | # make sure we cast to 64 bit to avoid overflow. 183 | # workaround for https://github.com/numpy/numpy/issues/8433 184 | if platform.architecture()[0] == '64bit' and \ 185 | masked.dtype != np.int64 and \ 186 | issubclass(masked.dtype.type, np.integer): 187 | masked = masked.astype(np.int64) 188 | 189 | # execute zone_func on masked zone ndarray 190 | if zone_func is not None: 191 | if not callable(zone_func): 192 | raise TypeError(('zone_func must be a callable ' 193 | 'which accepts function a ' 194 | 'single `zone_array` arg.')) 195 | value = zone_func(masked) 196 | 197 | # check if zone_func has return statement 198 | if value is not None: 199 | masked = value 200 | 201 | if masked.compressed().size == 0: 202 | # nothing here, fill with None and move on 203 | feature_stats = dict([(stat, None) for stat in stats]) 204 | if 'count' in stats: # special case, zero makes sense here 205 | feature_stats['count'] = 0 206 | else: 207 | if run_count: 208 | keys, counts = np.unique(masked.compressed(), return_counts=True) 209 | try: 210 | pixel_count = dict(zip([k.item() for k in keys], 211 | [c.item() for c in counts])) 212 | except AttributeError: 213 | pixel_count = dict(zip([np.asscalar(k) for k in keys], 214 | [np.asscalar(c) for c in counts])) 215 | 216 | if categorical: 217 | feature_stats = dict(pixel_count) 218 | if category_map: 219 | feature_stats = remap_categories(category_map, feature_stats) 220 | else: 221 | feature_stats = {} 222 | 223 | if 'min' in stats: 224 | feature_stats['min'] = float(masked.min()) 225 | if 'max' in stats: 226 | feature_stats['max'] = float(masked.max()) 227 | if 'mean' in stats: 228 | feature_stats['mean'] = float(masked.mean()) 229 | if 'count' in stats: 230 | feature_stats['count'] = int(masked.count()) 231 | # optional 232 | if 'sum' in stats: 233 | feature_stats['sum'] = float(masked.sum()) 234 | if 'std' in stats: 235 | feature_stats['std'] = float(masked.std()) 236 | if 'median' in stats: 237 | feature_stats['median'] = float(np.median(masked.compressed())) 238 | if 'majority' in stats: 239 | feature_stats['majority'] = float(key_assoc_val(pixel_count, max)) 240 | if 'minority' in stats: 241 | feature_stats['minority'] = float(key_assoc_val(pixel_count, min)) 242 | if 'unique' in stats: 243 | feature_stats['unique'] = len(list(pixel_count.keys())) 244 | if 'range' in stats: 245 | try: 246 | rmin = feature_stats['min'] 247 | except KeyError: 248 | rmin = float(masked.min()) 249 | try: 250 | rmax = feature_stats['max'] 251 | except KeyError: 252 | rmax = float(masked.max()) 253 | feature_stats['range'] = rmax - rmin 254 | 255 | for pctile in [s for s in stats if s.startswith('percentile_')]: 256 | q = get_percentile(pctile) 257 | pctarr = masked.compressed() 258 | feature_stats[pctile] = np.percentile(pctarr, q) 259 | 260 | if 'nodata' in stats or 'nan' in stats: 261 | featmasked = np.ma.MaskedArray(fsrc.array, mask=(~rv_array)) 262 | 263 | if 'nodata' in stats: 264 | feature_stats['nodata'] = float((featmasked == fsrc.nodata).sum()) 265 | if 'nan' in stats: 266 | feature_stats['nan'] = float(np.isnan(featmasked).sum()) if has_nan else 0 267 | 268 | if add_stats is not None: 269 | for stat_name, stat_func in add_stats.items(): 270 | try: 271 | feature_stats[stat_name] = stat_func(masked, feat['properties']) 272 | except TypeError: 273 | # backwards compatible with single-argument function 274 | feature_stats[stat_name] = stat_func(masked) 275 | 276 | if raster_out: 277 | feature_stats['mini_raster_array'] = masked 278 | feature_stats['mini_raster_affine'] = fsrc.affine 279 | feature_stats['mini_raster_nodata'] = fsrc.nodata 280 | 281 | if prefix is not None: 282 | prefixed_feature_stats = {} 283 | for key, val in feature_stats.items(): 284 | newkey = "{}{}".format(prefix, key) 285 | prefixed_feature_stats[newkey] = val 286 | feature_stats = prefixed_feature_stats 287 | 288 | if geojson_out: 289 | for key, val in feature_stats.items(): 290 | if 'properties' not in feat: 291 | feat['properties'] = {} 292 | feat['properties'][key] = val 293 | yield feat 294 | else: 295 | yield feature_stats 296 | -------------------------------------------------------------------------------- /tests/test_io.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import fiona 4 | import rasterio 5 | import json 6 | import pytest 7 | from shapely.geometry import shape 8 | from rasterstats.io import read_features, read_featurecollection, Raster # todo parse_feature 9 | from rasterstats.io import boundless_array, window_bounds, bounds_window, rowcol 10 | 11 | 12 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 13 | DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") 14 | polygons = os.path.join(DATA, 'polygons.shp') 15 | raster = os.path.join(DATA, 'slope.tif') 16 | 17 | import numpy as np 18 | arr = np.array([[1, 1, 1], 19 | [1, 1, 1], 20 | [1, 1, 1]]) 21 | 22 | arr3d = np.array([[[1, 1, 1], 23 | [1, 1, 1], 24 | [1, 1, 1]]]) 25 | 26 | eps = 1e-6 27 | 28 | with fiona.open(polygons, 'r') as src: 29 | target_features = [f for f in src] 30 | 31 | target_geoms = [shape(f['geometry']) for f in target_features] 32 | 33 | 34 | def _compare_geomlists(aa, bb): 35 | for a, b in zip(aa, bb): 36 | assert a.equals_exact(b, eps) 37 | 38 | 39 | def _test_read_features(indata): 40 | features = list(read_features(indata)) 41 | # multi 42 | geoms = [shape(f['geometry']) for f in features] 43 | _compare_geomlists(geoms, target_geoms) 44 | 45 | 46 | def _test_read_features_single(indata): 47 | # single (first target geom) 48 | geom = shape(list(read_features(indata))[0]['geometry']) 49 | assert geom.equals_exact(target_geoms[0], eps) 50 | 51 | 52 | def test_fiona_path(): 53 | assert list(read_features(polygons)) == target_features 54 | 55 | 56 | def test_layer_index(): 57 | layer = fiona.listlayers(DATA).index('polygons') 58 | assert list(read_features(DATA, layer=layer)) == target_features 59 | 60 | 61 | def test_layer_name(): 62 | assert list(read_features(DATA, layer='polygons')) == target_features 63 | 64 | 65 | def test_path_unicode(): 66 | try: 67 | upolygons = unicode(polygons) 68 | except NameError: 69 | # python3, it's already unicode 70 | upolygons = polygons 71 | assert list(read_features(upolygons)) == target_features 72 | 73 | 74 | def test_featurecollection(): 75 | assert read_featurecollection(polygons)['features'] == \ 76 | list(read_features(polygons)) == \ 77 | target_features 78 | 79 | 80 | def test_shapely(): 81 | with fiona.open(polygons, 'r') as src: 82 | indata = [shape(f['geometry']) for f in src] 83 | _test_read_features(indata) 84 | _test_read_features_single(indata[0]) 85 | 86 | 87 | def test_wkt(): 88 | with fiona.open(polygons, 'r') as src: 89 | indata = [shape(f['geometry']).wkt for f in src] 90 | _test_read_features(indata) 91 | _test_read_features_single(indata[0]) 92 | 93 | 94 | def test_wkb(): 95 | with fiona.open(polygons, 'r') as src: 96 | indata = [shape(f['geometry']).wkb for f in src] 97 | _test_read_features(indata) 98 | _test_read_features_single(indata[0]) 99 | 100 | 101 | def test_mapping_features(): 102 | # list of Features 103 | with fiona.open(polygons, 'r') as src: 104 | indata = [f for f in src] 105 | _test_read_features(indata) 106 | 107 | 108 | def test_mapping_feature(): 109 | # list of Features 110 | with fiona.open(polygons, 'r') as src: 111 | indata = [f for f in src] 112 | _test_read_features(indata[0]) 113 | 114 | 115 | def test_mapping_geoms(): 116 | with fiona.open(polygons, 'r') as src: 117 | indata = [f for f in src] 118 | _test_read_features(indata[0]['geometry']) 119 | 120 | 121 | def test_mapping_collection(): 122 | indata = {'type': "FeatureCollection"} 123 | with fiona.open(polygons, 'r') as src: 124 | indata['features'] = [f for f in src] 125 | _test_read_features(indata) 126 | 127 | 128 | def test_jsonstr(): 129 | # Feature str 130 | with fiona.open(polygons, 'r') as src: 131 | indata = [f for f in src] 132 | indata = json.dumps(indata[0]) 133 | _test_read_features(indata) 134 | 135 | 136 | def test_jsonstr_geom(): 137 | # geojson geom str 138 | with fiona.open(polygons, 'r') as src: 139 | indata = [f for f in src] 140 | indata = json.dumps(indata[0]['geometry']) 141 | _test_read_features(indata) 142 | 143 | 144 | def test_jsonstr_collection(): 145 | indata = {'type': "FeatureCollection"} 146 | with fiona.open(polygons, 'r') as src: 147 | indata['features'] = [f for f in src] 148 | indata = json.dumps(indata) 149 | _test_read_features(indata) 150 | 151 | 152 | def test_jsonstr_collection_without_features(): 153 | indata = {'type': "FeatureCollection", 'features': []} 154 | indata = json.dumps(indata) 155 | with pytest.raises(ValueError): 156 | _test_read_features(indata) 157 | 158 | 159 | def test_invalid_jsonstr(): 160 | indata = {'type': "InvalidGeometry", 'coordinates': [30, 10]} 161 | indata = json.dumps(indata) 162 | with pytest.raises(ValueError): 163 | _test_read_features(indata) 164 | 165 | 166 | class MockGeoInterface: 167 | def __init__(self, f): 168 | self.__geo_interface__ = f 169 | 170 | 171 | def test_geo_interface(): 172 | with fiona.open(polygons, 'r') as src: 173 | indata = [MockGeoInterface(f) for f in src] 174 | _test_read_features(indata) 175 | 176 | 177 | def test_geo_interface_geom(): 178 | with fiona.open(polygons, 'r') as src: 179 | indata = [MockGeoInterface(f['geometry']) for f in src] 180 | _test_read_features(indata) 181 | 182 | 183 | def test_geo_interface_collection(): 184 | # geointerface for featurecollection? 185 | indata = {'type': "FeatureCollection"} 186 | with fiona.open(polygons, 'r') as src: 187 | indata['features'] = [f for f in src] 188 | indata = MockGeoInterface(indata) 189 | _test_read_features(indata) 190 | 191 | 192 | def test_notafeature(): 193 | with pytest.raises(ValueError): 194 | list(read_features(['foo', 'POINT(-122 42)'])) 195 | 196 | with pytest.raises(ValueError): 197 | list(read_features(Exception())) 198 | 199 | 200 | # Raster tests 201 | def test_boundless(): 202 | # Exact 203 | assert boundless_array(arr, window=((0, 3), (0, 3)), nodata=0).sum() == 9 204 | 205 | # Intersects 206 | assert boundless_array(arr, window=((-1, 2), (-1, 2)), nodata=0).sum() == 4 207 | assert boundless_array(arr, window=((1, 4), (-1, 2)), nodata=0).sum() == 4 208 | assert boundless_array(arr, window=((1, 4), (1, 4)), nodata=0).sum() == 4 209 | assert boundless_array(arr, window=((-1, 2), (1, 4)), nodata=0).sum() == 4 210 | 211 | # No overlap 212 | assert boundless_array(arr, window=((-4, -1), (-4, -1)), nodata=0).sum() == 0 213 | assert boundless_array(arr, window=((-4, -1), (4, 7)), nodata=0).sum() == 0 214 | assert boundless_array(arr, window=((4, 7), (4, 7)), nodata=0).sum() == 0 215 | assert boundless_array(arr, window=((4, 7), (-4, -1)), nodata=0).sum() == 0 216 | assert boundless_array(arr, window=((-3, 0), (-3, 0)), nodata=0).sum() == 0 217 | 218 | # Covers 219 | assert boundless_array(arr, window=((-1, 4), (-1, 4)), nodata=0).sum() == 9 220 | 221 | # 3D 222 | assert boundless_array(arr3d, window=((0, 3), (0, 3)), nodata=0).sum() == 9 223 | assert boundless_array(arr3d, window=((-1, 2), (-1, 2)), nodata=0).sum() == 4 224 | assert boundless_array(arr3d, window=((-3, 0), (-3, 0)), nodata=0).sum() == 0 225 | 226 | # 1D 227 | with pytest.raises(ValueError): 228 | boundless_array(np.array([1, 1, 1]), window=((0, 3),), nodata=0) 229 | 230 | 231 | def test_boundless_masked(): 232 | a = boundless_array(arr, window=((-4, -1), (-4, -1)), nodata=0, masked=True) 233 | assert a.mask.all() 234 | b = boundless_array(arr, window=((0, 3), (0, 3)), nodata=0, masked=True) 235 | assert not b.mask.any() 236 | c = boundless_array(arr, window=((-1, 2), (-1, 2)), nodata=0, masked=True) 237 | assert c.mask.any() and not c.mask.all() 238 | 239 | 240 | def test_window_bounds(): 241 | with rasterio.open(raster) as src: 242 | win = ((0, src.shape[0]), (0, src.shape[1])) 243 | assert src.bounds == window_bounds(win, src.transform) 244 | 245 | win = ((5, 10), (5, 10)) 246 | assert src.window_bounds(win) == window_bounds(win, src.transform) 247 | 248 | 249 | def test_bounds_window(): 250 | with rasterio.open(raster) as src: 251 | assert bounds_window(src.bounds, src.transform) == \ 252 | ((0, src.shape[0]), (0, src.shape[1])) 253 | 254 | 255 | def test_rowcol(): 256 | import math 257 | with rasterio.open(raster) as src: 258 | x, _, _, y = src.bounds 259 | x += 1.0 260 | y -= 1.0 261 | assert rowcol(x, y, src.transform, op=math.floor) == (0, 0) 262 | assert rowcol(x, y, src.transform, op=math.ceil) == (1, 1) 263 | 264 | def test_Raster_index(): 265 | x, y = 245114, 1000968 266 | with rasterio.open(raster) as src: 267 | c1, r1 = src.index(x, y) 268 | with Raster(raster) as rast: 269 | c2, r2 = rast.index(x, y) 270 | assert c1 == c2 271 | assert r1 == r2 272 | 273 | 274 | def test_Raster(): 275 | import numpy as np 276 | 277 | bounds = (244156, 1000258, 245114, 1000968) 278 | r1 = Raster(raster, band=1).read(bounds) 279 | 280 | with rasterio.open(raster) as src: 281 | arr = src.read(1) 282 | affine = src.transform 283 | nodata = src.nodata 284 | 285 | r2 = Raster(arr, affine, nodata, band=1).read(bounds) 286 | 287 | with pytest.raises(ValueError): 288 | r3 = Raster(arr, affine, nodata, band=1).read() 289 | with pytest.raises(ValueError): 290 | r4 = Raster(arr, affine, nodata, band=1).read(bounds=1, window=1) 291 | 292 | # If the abstraction is correct, the arrays are equal 293 | assert np.array_equal(r1.array, r2.array) 294 | 295 | def test_Raster_boundless_disabled(): 296 | import numpy as np 297 | 298 | bounds = (244300.61494985913, 998877.8262535353, 246444.72726211764, 1000868.7876863468) 299 | outside_bounds = (244156, 1000258, 245114, 1000968) 300 | 301 | # rasterio src fails outside extent 302 | with pytest.raises(ValueError): 303 | r1 = Raster(raster, band=1).read(outside_bounds, boundless=False) 304 | 305 | # rasterio src works inside extent 306 | r2 = Raster(raster, band=1).read(bounds, boundless=False) 307 | 308 | with rasterio.open(raster) as src: 309 | arr = src.read(1) 310 | affine = src.transform 311 | nodata = src.nodata 312 | 313 | # ndarray works inside extent 314 | r3 = Raster(arr, affine, nodata, band=1).read(bounds, boundless=False) 315 | 316 | # ndarray src fails outside extent 317 | with pytest.raises(ValueError): 318 | r4 = Raster(arr, affine, nodata, band=1).read(outside_bounds, boundless=False) 319 | 320 | # If the abstraction is correct, the arrays are equal 321 | assert np.array_equal(r2.array, r3.array) 322 | 323 | def test_Raster_context(): 324 | # Assigned a regular name, stays open 325 | r1 = Raster(raster, band=1) 326 | assert not r1.src.closed 327 | r1.src.close() 328 | 329 | # Used as a context manager, closes itself 330 | with Raster(raster, band=1) as r2: 331 | pass 332 | assert r2.src.closed 333 | 334 | 335 | def test_geointerface(): 336 | class MockGeo(object): 337 | def __init__(self, features): 338 | self.__geo_interface__ = { 339 | 'type': "FeatureCollection", 340 | 'features': features} 341 | 342 | # Make it iterable just to ensure that geo interface 343 | # takes precendence over iterability 344 | def __iter__(self): 345 | pass 346 | 347 | def __next__(self): 348 | pass 349 | 350 | def next(self): 351 | pass 352 | 353 | features = [{ 354 | "type": "Feature", 355 | "properties": {}, 356 | "geometry": { 357 | "type": "Point", 358 | "coordinates": [0, 0]} 359 | }, { 360 | "type": "Feature", 361 | "properties": {}, 362 | "geometry": { 363 | "type": "Polygon", 364 | "coordinates": [[[-50, -10], [-40, 10], [-30, -10], [-50, -10]]]}}] 365 | 366 | geothing = MockGeo(features) 367 | assert list(read_features(geothing)) == features 368 | 369 | 370 | # Optional tests 371 | def test_geodataframe(): 372 | try: 373 | import geopandas as gpd 374 | df = gpd.read_file(polygons) 375 | if not hasattr(df, '__geo_interface__'): 376 | pytest.skip("This version of geopandas doesn't support df.__geo_interface__") 377 | except ImportError: 378 | pytest.skip("Can't import geopands") 379 | assert list(read_features(df)) 380 | 381 | 382 | # TODO # io.parse_features on a feature-only geo_interface 383 | # TODO # io.parse_features on a feature-only geojson-like object 384 | # TODO # io.read_features on a feature-only 385 | # TODO # io.Raster.read() on an open rasterio dataset -------------------------------------------------------------------------------- /docs/manual.rst: -------------------------------------------------------------------------------- 1 | User Manual 2 | =========== 3 | 4 | Introduction 5 | ------------ 6 | Geospatial data typically comes in one of two data models: 7 | *rasters* which are similar to images with a regular grid of pixels whose values represent some spatial phenomenon (e.g. elevation) and 8 | *vectors* which are entities with discrete geometries (e.g. state boundaries). 9 | This software, ``rasterstats``, exists solely to extract information from geospatial raster data 10 | based on vector geometries. 11 | 12 | Primarily, this involves *zonal statistics*: a method of summarizing and aggregating the raster values intersecting a vector geometry. For example, zonal statistics provides answers such as the mean precipitation or maximum elevation of an administrative unit. Additionally, functions are provided for *point queries*, most notably the ability to query a raster at a point and get an interpolated value rather than the simple nearest pixel. 13 | 14 | Basic Example 15 | ------------- 16 | 17 | The typical usage of rasterstats functions involves two arguments, a vector and a raster dataset:: 18 | 19 | >>> from rasterstats import zonal_stats, point_query 20 | >>> stats = zonal_stats('tests/data/polygons.shp', 'tests/data/slope.tif') 21 | >>> pts = point_query('tests/data/points.shp', 'tests/data/slope.tif') 22 | 23 | ``zonal_stats`` gives us a list of two dictionaries corresponding to each input polygon:: 24 | 25 | >>> from pprint import pprint 26 | >>> pprint(stats) 27 | [{'count': 75, 28 | 'max': 22.273418426513672, 29 | 'mean': 14.660084635416666, 30 | 'min': 6.575114727020264}, 31 | {'count': 50, 32 | 'max': 82.69043731689453, 33 | 'mean': 56.60576171875, 34 | 'min': 16.940950393676758}] 35 | 36 | while ``point_query`` gives us a list of raster values corresponding to each input point:: 37 | 38 | >>> pts 39 | [14.037668283186257, 33.1370268256543, 36.46848854950241] 40 | 41 | Vector Data Sources 42 | ------------------- 43 | The most common use case is having vector data sources in a file such as an ESRI Shapefile or any 44 | other format supported by ``fiona``. The path to the file can be passed in directly as the first argument:: 45 | 46 | >>> zs = zonal_stats('tests/data/polygons.shp', 'tests/data/slope.tif') 47 | 48 | If you have multi-layer sources, you can specify the ``layer`` by either name or index:: 49 | 50 | >>> zs = zonal_stats('tests/data', 'tests/data/slope.tif', layer="polygons") 51 | 52 | In addition to the basic usage above, rasterstats supports other 53 | mechanisms of specifying vector geometries. 54 | 55 | The vector argument can be an iterable of GeoJSON-like features such as a fiona source:: 56 | 57 | >>> import fiona 58 | >>> with fiona.open('tests/data/polygons.shp') as src: 59 | ... zs = zonal_stats(src, 'tests/data/slope.tif') 60 | 61 | 62 | You can also pass in an iterable of python objects that support 63 | the ``__geo_interface__`` (e.g. Shapely, ArcPy, PyShp, GeoDjango):: 64 | 65 | >>> from shapely.geometry import Point 66 | >>> pt = Point(245000, 1000000) 67 | >>> pt.__geo_interface__ 68 | {'type': 'Point', 'coordinates': (245000.0, 1000000.0)} 69 | >>> point_query([pt], 'tests/data/slope.tif') 70 | [21.32739672330894] 71 | 72 | 73 | Strings in well known text (WKT) and binary (WKB) format :: 74 | 75 | >>> pt.wkt 76 | 'POINT (245000 1000000)' 77 | >>> point_query([pt], 'tests/data/slope.tif') 78 | [21.32739672330894] 79 | 80 | >>> pt.wkb 81 | '\x01\x01\x00\x00\x00\x00\x00\x00\x00@\xe8\rA\x00\x00\x00\x00\x80\x84.A' 82 | >>> point_query([pt], 'tests/data/slope.tif') 83 | [21.32739672330894] 84 | 85 | 86 | Raster Data Sources 87 | ------------------- 88 | 89 | Any format that can be read by ``rasterio`` is supported by ``rasterstats``. 90 | To test if a data source is supported by your installation (this might differ depending on the 91 | format support of the underlying GDAL library), use the rio command line tool:: 92 | 93 | $ rio info raster.tif 94 | 95 | You can specify the path to the raster directly:: 96 | 97 | >>> zs = zonal_stats('tests/data/polygons.shp', 'tests/data/slope.tif') 98 | 99 | If the raster contains multiple bands, you must specify the band (1-indexed):: 100 | 101 | >>> zs = zonal_stats('tests/data/polygons.shp', 'tests/data/slope.tif', band=1) 102 | 103 | Or you can pass a numpy ``ndarray`` with an affine transform mapping the array dimensions 104 | to a coordinate reference system:: 105 | 106 | >>> import rasterio 107 | >>> with rasterio.open('tests/data/slope.tif') as src: 108 | ... affine = src.transform 109 | ... array = src.read(1) 110 | >>> zs = zonal_stats('tests/data/polygons.shp', array, affine=affine) 111 | 112 | 113 | Zonal Statistics 114 | ---------------- 115 | 116 | Statistics 117 | ^^^^^^^^^^ 118 | 119 | By default, the ``zonal_stats`` function will return the following statistics 120 | 121 | - min 122 | - max 123 | - mean 124 | - count 125 | 126 | Optionally, these statistics are also available. 127 | 128 | - sum 129 | - std 130 | - median 131 | - majority 132 | - minority 133 | - unique 134 | - range 135 | - nodata 136 | - percentile (see note below for details) 137 | 138 | You can specify the statistics to calculate using the ``stats`` argument:: 139 | 140 | >>> stats = zonal_stats("tests/data/polygons.shp", 141 | ... "tests/data/slope.tif", 142 | ... stats=['min', 'max', 'median', 'majority', 'sum']) 143 | 144 | You can also specify as a space-delimited string:: 145 | 146 | >>> stats = zonal_stats("tests/data/polygons.shp", 147 | ... "tests/data/slope.tif", 148 | ... stats="min max median majority sum") 149 | 150 | 151 | Note that certain statistics (majority, minority, and unique) require significantly more processing 152 | due to expensive counting of unique occurrences for each pixel value. 153 | 154 | You can also use a percentile statistic by specifying 155 | ``percentile_`` where ```` can be a floating point number between 0 and 100. 156 | 157 | User-defined Statistics 158 | ^^^^^^^^^^^^^^^^^^^^^^^ 159 | You can define your own aggregate functions using the ``add_stats`` argument. 160 | This is a dictionary with the name(s) of your statistic as keys and the function(s) 161 | as values. For example, to reimplement the `mean` statistic:: 162 | 163 | >>> from __future__ import division 164 | >>> import numpy as np 165 | 166 | >>> def mymean(x): 167 | ... return np.ma.mean(x) 168 | 169 | then use it in your ``zonal_stats`` call like so:: 170 | 171 | >>> zonal_stats("tests/data/polygons.shp", 172 | ... "tests/data/slope.tif", 173 | ... stats="count", 174 | ... add_stats={'mymean':mymean}) 175 | [{'count': 75, 'mymean': 14.660084635416666}, {'count': 50, 'mymean': 56.605761718750003}] 176 | 177 | To have access to geometry properties, a dictionary can be passed to the user-defined function:: 178 | 179 | >>> def mymean_prop(x,prop): 180 | ... return np.ma.mean(x) * prop['id'] 181 | 182 | then use it in your ``zonal_stats`` call like so:: 183 | 184 | >>> zonal_stats("tests/data/polygons.shp", 185 | ... "tests/data/slope.tif", 186 | ... stats="count", 187 | ... add_stats={'mymean_prop':mymean_prop}, 188 | ... properties=['id']) 189 | [{'count': 75, 'mymean_prop': 14.660084635416666}, {'count': 50, 'mymean_prop': 113.2115234375}] 190 | 191 | 192 | GeoJSON output 193 | ^^^^^^^^^^^^^^ 194 | 195 | If you want to retain the geometries and properties of the input features, 196 | you can output a list of geojson features using ``geojson_out``. The features 197 | contain the zonal statistics as additional properties:: 198 | 199 | >>> stats = zonal_stats("tests/data/polygons.shp", 200 | ... "tests/data/slope.tif", 201 | ... geojson_out=True) 202 | 203 | >>> stats[0]['type'] 204 | 'Feature' 205 | >>> stats[0]['properties'].keys() 206 | [u'id', 'count', 'max', 'mean', 'min'] 207 | 208 | 209 | Rasterization Strategy 210 | ^^^^^^^^^^^^^^^^^^^^^^ 211 | 212 | There is no right or wrong way to rasterize a vector. The default strategy is to include all pixels along the line render path (for lines), or cells where the *center point* is within the polygon (for polygons). Alternatively, you can opt for the ``all_touched`` strategy which rasterizes the geometry by including all pixels that it touches. You can enable this specifying:: 213 | 214 | >>> zs = zonal_stats("tests/data/polygons.shp", 215 | ... "tests/data/slope.tif", 216 | ... all_touched=True) 217 | 218 | .. figure:: https://github.com/perrygeo/python-raster-stats/raw/master/docs/img/rasterization.png 219 | :align: center 220 | :alt: rasterization 221 | 222 | The figure above illustrates the difference; the default ``all_touched=False`` is on the left 223 | while the ``all_touched=True`` option is on the right. 224 | Both approaches are valid and there are tradeoffs to consider. Using the default rasterizer may miss polygons that are smaller than your cell size resulting in ``None`` stats for those geometries. Using the ``all_touched`` strategy includes many cells along the edges that may not be representative of the geometry and may give severely biased results in some cases. 225 | 226 | 227 | Working with categorical rasters 228 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 229 | 230 | You can treat rasters as categorical (i.e. raster values represent 231 | discrete classes) if you're only interested in the counts of unique pixel 232 | values. 233 | 234 | For example, you may have a raster vegetation dataset and want to summarize 235 | vegetation by polygon. Statistics such as mean, median, sum, etc. don't make much sense in this context 236 | (What's the sum of ``oak + grassland``?). 237 | 238 | Using ``categorical``, the output is dictionary with the unique raster values as keys 239 | and pixel counts as values:: 240 | 241 | >>> zonal_stats('tests/data/polygons.shp', 242 | ... 'tests/data/slope_classes.tif', 243 | ... categorical=True)[1] 244 | {1.0: 1, 2.0: 9, 5.0: 40} 245 | 246 | rasterstats will report using the pixel values as keys. 247 | To associate the pixel values with their appropriate meaning, 248 | you can use a ``category_map``:: 249 | 250 | >>> cmap = {1.0: 'low', 2.0: 'med', 5.0: 'high'} 251 | >>> zonal_stats('tests/data/polygons.shp', 252 | ... 'tests/data/slope_classes.tif', 253 | ... categorical=True, category_map=cmap)[1] 254 | {'high': 40, 'med': 9, 'low': 1} 255 | 256 | "Mini-Rasters" 257 | ^^^^^^^^^^^^^^^ 258 | 259 | Internally, we create a masked raster dataset for each feature in order to 260 | calculate statistics. Optionally, we can include these data in the output 261 | of ``zonal_stats`` using the ``raster_out`` argument:: 262 | 263 | >>> zonal_stats('tests/data/polygons.shp', 264 | ... 'tests/data/slope_classes.tif', 265 | ... stats="count", 266 | ... raster_out=True)[0].keys() 267 | ['count', 'mini_raster_affine', 'mini_raster_array', 'mini_raster_nodata'] 268 | 269 | Notice we have three additional keys:: 270 | 271 | * ``mini_raster_array``: The clipped and masked numpy array 272 | * ``mini_raster_affine``: transformation as an Affine object 273 | * ``mini_raster_nodata``: The nodata value 274 | 275 | Keep in mind that having ndarrays in your stats dictionary means it is more 276 | difficult to serialize to json and other text formats. 277 | 278 | 279 | Design Goals 280 | ------------ 281 | 282 | ``rasterstats`` aims to do only one thing well: getting information from rasters based on vector geometry. 283 | This module doesn't support coordinate reprojection, raster re-sampling, geometry manipulations or any other 284 | geospatial data transformations as those are better left to other Python packages. To the extent possible, 285 | data input is handled by ``fiona`` and ``rasterio``, though there are some wrapper functions for IO to 286 | maintain usability. Where interoperability between packages is needed, loose coupling, simple python data structure 287 | and standard interfaces like GeoJSON are employed to keep the core library lean. 288 | 289 | History 290 | -------- 291 | This work grew out of a need to have a native python implementation (based on numpy) for zonal statistics. 292 | I had been `using starspan `_, a C++ 293 | command line tool, as well as GRASS's `r.statistics `_ for many years. 294 | They were suitable for offline analyses but were rather clunky to deploy in a large python application. 295 | In 2013, I implemented a proof-of-concept zonal stats function which eventually became ``rasterstats``. It has 296 | been in production in several large python web applications ever since, replacing the starspan wrapper `madrona.raster_stats `_. 297 | 298 | 299 | -------------------------------------------------------------------------------- /tests/test_zonal.py: -------------------------------------------------------------------------------- 1 | # test zonal stats 2 | import os 3 | import pytest 4 | import simplejson 5 | import json 6 | import sys 7 | import numpy as np 8 | import rasterio 9 | from rasterstats import zonal_stats, raster_stats 10 | from rasterstats.utils import VALID_STATS 11 | from rasterstats.io import read_featurecollection, read_features 12 | from shapely.geometry import Polygon 13 | from affine import Affine 14 | 15 | sys.path.append(os.path.dirname(os.path.abspath(__file__))) 16 | 17 | DATA = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") 18 | raster = os.path.join(DATA, 'slope.tif') 19 | 20 | 21 | def test_main(): 22 | polygons = os.path.join(DATA, 'polygons.shp') 23 | stats = zonal_stats(polygons, raster) 24 | for key in ['count', 'min', 'max', 'mean']: 25 | assert key in stats[0] 26 | assert len(stats) == 2 27 | assert stats[0]['count'] == 75 28 | assert stats[1]['count'] == 50 29 | assert round(stats[0]['mean'], 2) == 14.66 30 | 31 | 32 | # remove after band_num alias is removed 33 | def test_band_alias(): 34 | polygons = os.path.join(DATA, 'polygons.shp') 35 | stats_a = zonal_stats(polygons, raster) 36 | stats_b = zonal_stats(polygons, raster, band=1) 37 | with pytest.deprecated_call(): 38 | stats_c = zonal_stats(polygons, raster, band_num=1) 39 | assert stats_a[0]['count'] == stats_b[0]['count'] == stats_c[0]['count'] 40 | 41 | 42 | def test_zonal_global_extent(): 43 | polygons = os.path.join(DATA, 'polygons.shp') 44 | stats = zonal_stats(polygons, raster) 45 | global_stats = zonal_stats(polygons, raster, global_src_extent=True) 46 | assert stats == global_stats 47 | 48 | 49 | def test_zonal_nodata(): 50 | polygons = os.path.join(DATA, 'polygons.shp') 51 | stats = zonal_stats(polygons, raster, nodata=0) 52 | assert len(stats) == 2 53 | assert stats[0]['count'] == 75 54 | assert stats[1]['count'] == 50 55 | 56 | 57 | def test_doesnt_exist(): 58 | nonexistent = os.path.join(DATA, 'DOESNOTEXIST.shp') 59 | with pytest.raises(ValueError): 60 | zonal_stats(nonexistent, raster) 61 | 62 | 63 | def test_nonsense(): 64 | polygons = os.path.join(DATA, 'polygons.shp') 65 | with pytest.raises(ValueError): 66 | zonal_stats("blaghrlargh", raster) 67 | with pytest.raises(IOError): 68 | zonal_stats(polygons, "blercherlerch") 69 | with pytest.raises(ValueError): 70 | zonal_stats(["blaghrlargh", ], raster) 71 | 72 | 73 | # Different geometry types 74 | def test_points(): 75 | points = os.path.join(DATA, 'points.shp') 76 | stats = zonal_stats(points, raster) 77 | # three features 78 | assert len(stats) == 3 79 | # three pixels 80 | assert sum([x['count'] for x in stats]) == 3 81 | assert round(stats[0]['mean'], 3) == 11.386 82 | assert round(stats[1]['mean'], 3) == 35.547 83 | 84 | 85 | def test_points_categorical(): 86 | points = os.path.join(DATA, 'points.shp') 87 | categorical_raster = os.path.join(DATA, 'slope_classes.tif') 88 | stats = zonal_stats(points, categorical_raster, categorical=True) 89 | # three features 90 | assert len(stats) == 3 91 | assert 'mean' not in stats[0] 92 | assert stats[0][1.0] == 1 93 | assert stats[1][2.0] == 1 94 | 95 | 96 | def test_lines(): 97 | lines = os.path.join(DATA, 'lines.shp') 98 | stats = zonal_stats(lines, raster) 99 | assert len(stats) == 2 100 | assert stats[0]['count'] == 58 101 | assert stats[1]['count'] == 32 102 | 103 | 104 | # Test multigeoms 105 | def test_multipolygons(): 106 | multipolygons = os.path.join(DATA, 'multipolygons.shp') 107 | stats = zonal_stats(multipolygons, raster) 108 | assert len(stats) == 1 109 | assert stats[0]['count'] == 125 110 | 111 | 112 | def test_multilines(): 113 | multilines = os.path.join(DATA, 'multilines.shp') 114 | stats = zonal_stats(multilines, raster) 115 | assert len(stats) == 1 116 | # can differ slightly based on platform/gdal version 117 | assert stats[0]['count'] in [89, 90] 118 | 119 | 120 | def test_multipoints(): 121 | multipoints = os.path.join(DATA, 'multipoints.shp') 122 | stats = zonal_stats(multipoints, raster) 123 | assert len(stats) == 1 124 | assert stats[0]['count'] == 3 125 | 126 | 127 | def test_categorical(): 128 | polygons = os.path.join(DATA, 'polygons.shp') 129 | categorical_raster = os.path.join(DATA, 'slope_classes.tif') 130 | stats = zonal_stats(polygons, categorical_raster, categorical=True) 131 | assert len(stats) == 2 132 | assert stats[0][1.0] == 75 133 | assert 5.0 in stats[1] 134 | 135 | 136 | def test_categorical_map(): 137 | polygons = os.path.join(DATA, 'polygons.shp') 138 | categorical_raster = os.path.join(DATA, 'slope_classes.tif') 139 | catmap = {5.0: 'cat5'} 140 | stats = zonal_stats(polygons, categorical_raster, 141 | categorical=True, category_map=catmap) 142 | assert len(stats) == 2 143 | assert stats[0][1.0] == 75 144 | assert 5.0 not in stats[1] 145 | assert 'cat5' in stats[1] 146 | 147 | 148 | def test_specify_stats_list(): 149 | polygons = os.path.join(DATA, 'polygons.shp') 150 | stats = zonal_stats(polygons, raster, stats=['min', 'max']) 151 | assert sorted(stats[0].keys()) == sorted(['min', 'max']) 152 | assert 'count' not in list(stats[0].keys()) 153 | 154 | 155 | def test_specify_all_stats(): 156 | polygons = os.path.join(DATA, 'polygons.shp') 157 | stats = zonal_stats(polygons, raster, stats='ALL') 158 | assert sorted(stats[0].keys()) == sorted(VALID_STATS) 159 | stats = zonal_stats(polygons, raster, stats='*') 160 | assert sorted(stats[0].keys()) == sorted(VALID_STATS) 161 | 162 | 163 | def test_specify_stats_string(): 164 | polygons = os.path.join(DATA, 'polygons.shp') 165 | stats = zonal_stats(polygons, raster, stats='min max') 166 | assert sorted(stats[0].keys()) == sorted(['min', 'max']) 167 | assert 'count' not in list(stats[0].keys()) 168 | 169 | 170 | def test_specify_stats_invalid(): 171 | polygons = os.path.join(DATA, 'polygons.shp') 172 | with pytest.raises(ValueError): 173 | zonal_stats(polygons, raster, stats='foo max') 174 | 175 | 176 | def test_optional_stats(): 177 | polygons = os.path.join(DATA, 'polygons.shp') 178 | stats = zonal_stats(polygons, raster, 179 | stats='min max sum majority median std') 180 | assert stats[0]['min'] <= stats[0]['median'] <= stats[0]['max'] 181 | 182 | 183 | def test_range(): 184 | polygons = os.path.join(DATA, 'polygons.shp') 185 | stats = zonal_stats(polygons, raster, stats="range min max") 186 | for stat in stats: 187 | assert stat['range'] == stat['max'] - stat['min'] 188 | ranges = [x['range'] for x in stats] 189 | # without min/max specified 190 | stats = zonal_stats(polygons, raster, stats="range") 191 | assert 'min' not in stats[0] 192 | assert ranges == [x['range'] for x in stats] 193 | 194 | 195 | def test_nodata(): 196 | polygons = os.path.join(DATA, 'polygons.shp') 197 | categorical_raster = os.path.join(DATA, 'slope_classes.tif') 198 | stats = zonal_stats(polygons, categorical_raster, stats="*", 199 | categorical=True, nodata=1.0) 200 | assert stats[0]['majority'] is None 201 | assert stats[0]['count'] == 0 # no pixels; they're all null 202 | assert stats[1]['minority'] == 2.0 203 | assert stats[1]['count'] == 49 # used to be 50 if we allowed 1.0 204 | assert '1.0' not in stats[0] 205 | 206 | 207 | def test_dataset_mask(): 208 | polygons = os.path.join(DATA, 'polygons.shp') 209 | raster = os.path.join(DATA, 'dataset_mask.tif') 210 | stats = zonal_stats(polygons, raster, stats="*") 211 | assert stats[0]['count'] == 75 212 | assert stats[1]['count'] == 0 213 | 214 | 215 | def test_partial_overlap(): 216 | polygons = os.path.join(DATA, 'polygons_partial_overlap.shp') 217 | stats = zonal_stats(polygons, raster, stats="count") 218 | for res in stats: 219 | # each polygon should have at least a few pixels overlap 220 | assert res['count'] > 0 221 | 222 | 223 | def test_no_overlap(): 224 | polygons = os.path.join(DATA, 'polygons_no_overlap.shp') 225 | stats = zonal_stats(polygons, raster, stats="count") 226 | for res in stats: 227 | # no polygon should have any overlap 228 | assert res['count'] is 0 229 | 230 | def test_all_touched(): 231 | polygons = os.path.join(DATA, 'polygons.shp') 232 | stats = zonal_stats(polygons, raster, all_touched=True) 233 | assert stats[0]['count'] == 95 # 75 if ALL_TOUCHED=False 234 | assert stats[1]['count'] == 73 # 50 if ALL_TOUCHED=False 235 | 236 | 237 | def test_ndarray_without_affine(): 238 | with rasterio.open(raster) as src: 239 | polygons = os.path.join(DATA, 'polygons.shp') 240 | with pytest.raises(ValueError): 241 | zonal_stats(polygons, src.read(1)) # needs affine kwarg 242 | 243 | 244 | def _assert_dict_eq(a, b): 245 | """Assert that dicts a and b similar within floating point precision 246 | """ 247 | err = 1e-5 248 | for k in set(a.keys()).union(set(b.keys())): 249 | if a[k] == b[k]: 250 | continue 251 | try: 252 | if abs(a[k]-b[k]) > err: 253 | raise AssertionError("{}: {} != {}".format(k, a[k], b[k])) 254 | except TypeError: # can't take abs, nan 255 | raise AssertionError("{} != {}".format(a[k], b[k])) 256 | 257 | 258 | def test_ndarray(): 259 | with rasterio.open(raster) as src: 260 | arr = src.read(1) 261 | affine = src.transform 262 | 263 | polygons = os.path.join(DATA, 'polygons.shp') 264 | stats = zonal_stats(polygons, arr, affine=affine) 265 | stats2 = zonal_stats(polygons, raster) 266 | for s1, s2 in zip(stats, stats2): 267 | _assert_dict_eq(s1, s2) 268 | with pytest.raises(AssertionError): 269 | _assert_dict_eq(stats[0], stats[1]) 270 | assert stats[0]['count'] == 75 271 | assert stats[1]['count'] == 50 272 | 273 | points = os.path.join(DATA, 'points.shp') 274 | stats = zonal_stats(points, arr, affine=affine) 275 | assert stats == zonal_stats(points, raster) 276 | assert sum([x['count'] for x in stats]) == 3 277 | assert round(stats[0]['mean'], 3) == 11.386 278 | assert round(stats[1]['mean'], 3) == 35.547 279 | 280 | 281 | def test_alias(): 282 | polygons = os.path.join(DATA, 'polygons.shp') 283 | stats = zonal_stats(polygons, raster) 284 | with pytest.deprecated_call(): 285 | stats2 = raster_stats(polygons, raster) 286 | assert stats == stats2 287 | 288 | 289 | def test_add_stats(): 290 | polygons = os.path.join(DATA, 'polygons.shp') 291 | 292 | def mymean(x): 293 | return np.ma.mean(x) 294 | 295 | stats = zonal_stats(polygons, raster, add_stats={'mymean': mymean}) 296 | for i in range(len(stats)): 297 | assert stats[i]['mean'] == stats[i]['mymean'] 298 | 299 | 300 | def test_add_stats_prop(): 301 | polygons = os.path.join(DATA, 'polygons.shp') 302 | 303 | def mymean_prop(x, prop): 304 | return np.ma.mean(x) * prop['id'] 305 | 306 | stats = zonal_stats(polygons, raster, add_stats={'mymean_prop': mymean_prop}) 307 | for i in range(len(stats)): 308 | assert stats[i]['mymean_prop'] == stats[i]['mean'] * (i+1) 309 | 310 | 311 | def test_mini_raster(): 312 | polygons = os.path.join(DATA, 'polygons.shp') 313 | stats = zonal_stats(polygons, raster, raster_out=True) 314 | stats2 = zonal_stats(polygons, stats[0]['mini_raster_array'], 315 | raster_out=True, affine=stats[0]['mini_raster_affine']) 316 | assert (stats[0]['mini_raster_array'] == stats2[0]['mini_raster_array']).sum() == \ 317 | stats[0]['count'] 318 | 319 | 320 | def test_percentile_good(): 321 | polygons = os.path.join(DATA, 'polygons.shp') 322 | stats = zonal_stats(polygons, raster, 323 | stats="median percentile_50 percentile_90") 324 | assert 'percentile_50' in stats[0].keys() 325 | assert 'percentile_90' in stats[0].keys() 326 | assert stats[0]['percentile_50'] == stats[0]['median'] 327 | assert stats[0]['percentile_50'] <= stats[0]['percentile_90'] 328 | 329 | 330 | def test_zone_func_has_return(): 331 | 332 | def example_zone_func(zone_arr): 333 | return np.ma.masked_array(np.full(zone_arr.shape, 1)) 334 | 335 | polygons = os.path.join(DATA, 'polygons.shp') 336 | stats = zonal_stats(polygons, 337 | raster, 338 | zone_func=example_zone_func) 339 | assert stats[0]['max'] == 1 340 | assert stats[0]['min'] == 1 341 | assert stats[0]['mean'] == 1 342 | 343 | 344 | def test_zone_func_good(): 345 | 346 | def example_zone_func(zone_arr): 347 | zone_arr[:] = 0 348 | 349 | polygons = os.path.join(DATA, 'polygons.shp') 350 | stats = zonal_stats(polygons, 351 | raster, 352 | zone_func=example_zone_func) 353 | assert stats[0]['max'] == 0 354 | assert stats[0]['min'] == 0 355 | assert stats[0]['mean'] == 0 356 | 357 | def test_zone_func_bad(): 358 | not_a_func = 'jar jar binks' 359 | polygons = os.path.join(DATA, 'polygons.shp') 360 | with pytest.raises(TypeError): 361 | zonal_stats(polygons, raster, zone_func=not_a_func) 362 | 363 | def test_percentile_nodata(): 364 | polygons = os.path.join(DATA, 'polygons.shp') 365 | categorical_raster = os.path.join(DATA, 'slope_classes.tif') 366 | # By setting nodata to 1, one of our polygons is within the raster extent 367 | # but has an empty masked array 368 | stats = zonal_stats(polygons, categorical_raster, 369 | stats=["percentile_90"], nodata=1) 370 | assert 'percentile_90' in stats[0].keys() 371 | assert [None, 5.0] == [x['percentile_90'] for x in stats] 372 | 373 | 374 | def test_percentile_bad(): 375 | polygons = os.path.join(DATA, 'polygons.shp') 376 | with pytest.raises(ValueError): 377 | zonal_stats(polygons, raster, stats="percentile_101") 378 | 379 | 380 | def test_json_serializable(): 381 | polygons = os.path.join(DATA, 'polygons.shp') 382 | stats = zonal_stats(polygons, raster, 383 | stats=VALID_STATS + ["percentile_90"], 384 | categorical=True) 385 | try: 386 | json.dumps(stats) 387 | simplejson.dumps(stats) 388 | except TypeError: 389 | pytest.fail("zonal_stats returned a list that wasn't JSON-serializable") 390 | 391 | 392 | def test_direct_features_collections(): 393 | polygons = os.path.join(DATA, 'polygons.shp') 394 | features = read_features(polygons) 395 | collection = read_featurecollection(polygons) 396 | 397 | stats_direct = zonal_stats(polygons, raster) 398 | stats_features = zonal_stats(features, raster) 399 | stats_collection = zonal_stats(collection, raster) 400 | 401 | assert stats_direct == stats_features == stats_collection 402 | 403 | 404 | def test_all_nodata(): 405 | polygons = os.path.join(DATA, 'polygons.shp') 406 | raster = os.path.join(DATA, 'all_nodata.tif') 407 | stats = zonal_stats(polygons, raster, stats=['nodata', 'count']) 408 | assert stats[0]['nodata'] == 75 409 | assert stats[0]['count'] == 0 410 | assert stats[1]['nodata'] == 50 411 | assert stats[1]['count'] == 0 412 | 413 | def test_some_nodata(): 414 | polygons = os.path.join(DATA, 'polygons.shp') 415 | raster = os.path.join(DATA, 'slope_nodata.tif') 416 | stats = zonal_stats(polygons, raster, stats=['nodata', 'count']) 417 | assert stats[0]['nodata'] == 36 418 | assert stats[0]['count'] == 39 419 | assert stats[1]['nodata'] == 19 420 | assert stats[1]['count'] == 31 421 | 422 | 423 | # update this if nan end up being incorporated into nodata 424 | def test_nan_nodata(): 425 | polygon = Polygon([[0, 0], [2, 0], [2, 2], [0, 2]]) 426 | arr = np.array([ 427 | [np.nan, 12.25], 428 | [-999, 12.75] 429 | ]) 430 | affine = Affine(1, 0, 0, 431 | 0, -1, 2) 432 | 433 | stats = zonal_stats(polygon, arr, affine=affine, nodata=-999, 434 | stats='nodata count sum mean min max') 435 | 436 | assert stats[0]['nodata'] == 1 437 | assert stats[0]['count'] == 2 438 | assert stats[0]['mean'] == 12.5 439 | assert stats[0]['min'] == 12.25 440 | assert stats[0]['max'] == 12.75 441 | 442 | 443 | def test_some_nodata_ndarray(): 444 | polygons = os.path.join(DATA, 'polygons.shp') 445 | raster = os.path.join(DATA, 'slope_nodata.tif') 446 | with rasterio.open(raster) as src: 447 | arr = src.read(1) 448 | affine = src.transform 449 | 450 | # without nodata 451 | stats = zonal_stats(polygons, arr, affine=affine, stats=['nodata', 'count', 'min']) 452 | assert stats[0]['min'] == -9999.0 453 | assert stats[0]['nodata'] == 0 454 | assert stats[0]['count'] == 75 455 | 456 | # with nodata 457 | stats = zonal_stats(polygons, arr, affine=affine, 458 | nodata=-9999.0, stats=['nodata', 'count', 'min']) 459 | assert stats[0]['min'] >= 0.0 460 | assert stats[0]['nodata'] == 36 461 | assert stats[0]['count'] == 39 462 | 463 | 464 | def test_transform(): 465 | with rasterio.open(raster) as src: 466 | arr = src.read(1) 467 | affine = src.transform 468 | polygons = os.path.join(DATA, 'polygons.shp') 469 | 470 | stats = zonal_stats(polygons, arr, affine=affine) 471 | with pytest.deprecated_call(): 472 | stats2 = zonal_stats(polygons, arr, transform=affine.to_gdal()) 473 | assert stats == stats2 474 | 475 | 476 | def test_prefix(): 477 | polygons = os.path.join(DATA, 'polygons.shp') 478 | stats = zonal_stats(polygons, raster, prefix="TEST") 479 | for key in ['count', 'min', 'max', 'mean']: 480 | assert key not in stats[0] 481 | for key in ['TESTcount', 'TESTmin', 'TESTmax', 'TESTmean']: 482 | assert key in stats[0] 483 | 484 | 485 | def test_geojson_out(): 486 | polygons = os.path.join(DATA, 'polygons.shp') 487 | features = zonal_stats(polygons, raster, geojson_out=True) 488 | for feature in features: 489 | assert feature['type'] == 'Feature' 490 | assert 'id' in feature['properties'] # from orig 491 | assert 'count' in feature['properties'] # from zonal stats 492 | 493 | 494 | # do not think this is actually testing the line i wanted it to 495 | # since the read_features func for this data type is generating 496 | # the properties field 497 | def test_geojson_out_with_no_properties(): 498 | polygon = Polygon([[0, 0], [0, 0,5], [1, 1.5], [1.5, 2], [2, 2], [2, 0]]) 499 | arr = np.array([ 500 | [100, 1], 501 | [100, 1] 502 | ]) 503 | affine = Affine(1, 0, 0, 504 | 0, -1, 2) 505 | 506 | stats = zonal_stats(polygon, arr, affine=affine, geojson_out=True) 507 | assert 'properties' in stats[0] 508 | for key in ['count', 'min', 'max', 'mean']: 509 | assert key in stats[0]['properties'] 510 | 511 | assert stats[0]['properties']['mean'] == 34 512 | 513 | 514 | # remove when copy_properties alias is removed 515 | def test_copy_properties_warn(): 516 | polygons = os.path.join(DATA, 'polygons.shp') 517 | # run once to trigger any other unrelated deprecation warnings 518 | # so the test does not catch them instead 519 | stats_a = zonal_stats(polygons, raster) 520 | with pytest.deprecated_call(): 521 | stats_b = zonal_stats(polygons, raster, copy_properties=True) 522 | assert stats_a == stats_b 523 | 524 | 525 | def test_nan_counts(): 526 | from affine import Affine 527 | transform = Affine(1, 0, 1, 0, -1, 3) 528 | 529 | data = np.array([ 530 | [np.nan, np.nan, np.nan], 531 | [0, 0, 0], 532 | [1, 4, 5] 533 | ]) 534 | 535 | # geom extends an additional row to left 536 | geom = 'POLYGON ((1 0, 4 0, 4 3, 1 3, 1 0))' 537 | 538 | # nan stat is requested 539 | stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="*") 540 | 541 | for res in stats: 542 | assert res['count'] == 3 # 3 pixels of valid data 543 | assert res['nodata'] == 3 # 3 pixels of nodata 544 | assert res['nan'] == 3 # 3 pixels of nans 545 | 546 | # nan are ignored if nan stat is not requested 547 | stats = zonal_stats(geom, data, affine=transform, nodata=0.0, stats="count nodata") 548 | 549 | for res in stats: 550 | assert res['count'] == 3 # 3 pixels of valid data 551 | assert res['nodata'] == 3 # 3 pixels of nodata 552 | assert 'nan' not in res 553 | 554 | 555 | # Optional tests 556 | def test_geodataframe_zonal(): 557 | polygons = os.path.join(DATA, 'polygons.shp') 558 | 559 | try: 560 | import geopandas as gpd 561 | df = gpd.read_file(polygons) 562 | if not hasattr(df, '__geo_interface__'): 563 | pytest.skip("This version of geopandas doesn't support df.__geo_interface__") 564 | except ImportError: 565 | pytest.skip("Can't import geopands") 566 | 567 | expected = zonal_stats(polygons, raster) 568 | assert zonal_stats(df, raster) == expected 569 | 570 | # TODO # gen_zonal_stats() 571 | # TODO # gen_zonal_stats(stats=nodata) 572 | # TODO # gen_zonal_stats() 573 | # TODO # gen_zonal_stats(transform AND affine>) 574 | --------------------------------------------------------------------------------