├── .gitignore ├── .pre-commit-config.yaml ├── .travis.yml ├── CHANGES ├── LICENSE ├── MANIFEST.in ├── README.md ├── README.rst ├── examples └── simple_read.py ├── requirements.txt ├── riomucho ├── __init__.py ├── single_process_pool.py └── utils.py ├── setup.py └── tests ├── conftest.py └── test_mod.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | 56 | # OS X 57 | .DS_Store 58 | .coverage* 59 | .pytest_cache 60 | *.orig 61 | *.rej 62 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - 3 | repo: https://github.com/ambv/black 4 | rev: ed50737290662f6ef4016a7ea44da78ee1eff1e2 5 | hooks: 6 | - id: black 7 | args: [--safe] 8 | language_version: python3.6 9 | - 10 | repo: 'https://github.com/pre-commit/pre-commit-hooks' 11 | # v1.3.0 12 | rev: a6209d8d4f97a09b61855ea3f1fb250f55147b8b 13 | hooks: 14 | - id: flake8 15 | args: [ 16 | # E501 let black handle all line length decisions 17 | # W503 black conflicts with "line break before operator" rule 18 | # E203 black conflicts with "whitespace before ':'" rule 19 | '--ignore=E501,W503,E203'] 20 | - 21 | repo: 'https://github.com/chewse/pre-commit-mirrors-pydocstyle' 22 | # 2.1.1 23 | rev: 22d3ccf6cf91ffce3b16caa946c155778f0cb20f 24 | hooks: 25 | - id: pydocstyle 26 | args: [ 27 | # Check for docstring presence only 28 | '--select=D1', 29 | # Don't require docstrings for tests 30 | '--match=(?!test).*\.py', 31 | # Skip docstring check for dunder methods 32 | --add-ignore=D105] 33 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | sudo: false 4 | 5 | cache: 6 | # Apparently if you override the install command that silently disables the 7 | # cache: pip support. This is less than ideal and I've opened up 8 | # travis-ci/travis-ci#3239 to hopefully get that addressed. For now I'll 9 | # manually add the pip cache directory to the build cache. 10 | directories: 11 | - ~/.cache/pip 12 | 13 | env: 14 | global: 15 | # These two environment variables could be set by Travis itself, or Travis 16 | # could configure itself in /etc/, ~/, or inside of the virtual 17 | # environments. In any case if these two values get configured then end 18 | # users only need to enable the pip cache and manually run pip wheel before 19 | # running pip install. 20 | - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels 21 | - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels 22 | 23 | python: 24 | - "2.7" 25 | - "3.6" 26 | 27 | cache: 28 | directories: 29 | - $HOME/.pip-cache/ 30 | - $HOME/wheelhouse 31 | 32 | before_install: 33 | - pip install -U pip 34 | - pip install -r requirements.txt 35 | - pip install pre-commit 36 | - pip install pytest~=3.10.0 pytest-cov 37 | 38 | install: 39 | - pip install -e .[test] 40 | 41 | script: 42 | - if [[ $TRAVIS_PYTHON_VERSION == 3.6 ]]; then pre-commit run --all-files; fi 43 | - python -m pytest -vv --cov riomucho --cov-report term-missing 44 | 45 | after_success: 46 | - coveralls 47 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | 1.0.0 (2018-07-19) 2 | ------------------ 3 | 4 | The rio-mucho packages requires rasterio~=1.0. There are no other changes since 5 | 1.0rc1. 6 | 7 | 1.0rc1 (2018-07-13) 8 | ------------------ 9 | 10 | :tada: 11 | 12 | Support for Python versions 3.0-3.4 will be dropped in 1.0.0. These versions do 13 | not have the fix for https://bugs.python.org/issue28699 and the multiprocessing 14 | pool in riomucho can hang when exceptions are raised by a worker. 15 | 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Mapbox 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # rio-mucho 2 | 3 | Windowed parallel processing wrapper for rasterio 4 | 5 | [![PyPI](https://img.shields.io/pypi/v/rio-mucho.svg?maxAge=2592000?style=plastic)]() [![Build Status](https://travis-ci.org/mapbox/rio-mucho.svg?branch=master)](https://travis-ci.org/mapbox/rio-mucho) [![Coverage Status](https://coveralls.io/repos/mapbox/rio-mucho/badge.svg?branch=master&service=github)](https://coveralls.io/github/mapbox/rio-mucho?branch=master) 6 | 7 | ## Install 8 | 9 | From pypi: 10 | 11 | `pip install rio-mucho` 12 | 13 | From github: 14 | 15 | `pip install pip install git+ssh://git@github.com/mapbox/rio-mucho.git@` 16 | 17 | Development: 18 | 19 | ``` 20 | git clone git@github.com:mapbox/rio-mucho.git 21 | cd rio-mucho 22 | pip install -e . 23 | ``` 24 | 25 | ## Usage 26 | 27 | ```python 28 | with riomucho.RioMucho([{inputs}], {output}, {run function}, 29 | windows={windows}, 30 | global_args={global arguments}, 31 | options={options to write}) as rios: 32 | 33 | rios.run({processes}) 34 | ``` 35 | 36 | ### Arguments 37 | 38 | #### `inputs` 39 | 40 | An list of file paths to open and read. 41 | 42 | #### `output` 43 | 44 | What file to write to. 45 | 46 | #### `run_function` 47 | 48 | A function to be applied to each window chunk. This should have input arguments of: 49 | 50 | 1. A data input, which can be one of: 51 | - A list of numpy arrays of shape (x,y,z), one for each file as specified in input file list `mode="simple_read" [default]` 52 | - A numpy array of shape ({_n_ input files x _n_ band count}, {window rows}, {window cols}) `mode=array_read"` 53 | - A list of open sources for reading `mode="manual_read"` 54 | 2. A `rasterio` window tuple 55 | 3. A `rasterio` window index (`ij`) 56 | 4. A global arguments object that you can use to pass in global arguments 57 | 58 | This should return: 59 | 60 | 1. An output array of ({depth|count}, {window rows}, {window cols}) shape, and of the correct data type for writing 61 | 62 | ```python 63 | def basic_run({data}, {window}, {ij}, {global args}): 64 | ## do something 65 | return {out} 66 | ``` 67 | 68 | ### Keyword arguments 69 | 70 | #### `windows={windows}` 71 | 72 | A list of `rasterio` (window, ij) tuples to operate on. `[Default = src[0].block_windows()]` 73 | 74 | #### `global_args={global arguments}` 75 | 76 | Since this is working in parallel, any other objects / values that you want to be accessible in the `run_function`. `[Default = {}]` 77 | 78 | ```python 79 | global_args = { 80 | 'divide_value': 2 81 | } 82 | ``` 83 | 84 | #### `options={keyword args}` 85 | 86 | The options to pass to the writing output. `[Default = srcs[0].meta]` 87 | 88 | ## Example 89 | 90 | ```python 91 | import riomucho, rasterio, numpy 92 | 93 | def basic_run(data, window, ij, g_args): 94 | ## do something 95 | out = np.array( 96 | [d /= global_args['divide'] for d in data] 97 | ) 98 | return out 99 | 100 | # get windows from an input 101 | with rasterio.open('/tmp/test_1.tif') as src: 102 | ## grabbing the windows as an example. Default behavior is identical. 103 | windows = [[window, ij] for ij, window in src.block_windows()] 104 | options = src.meta 105 | # since we are only writing to 2 bands 106 | options.update(count=2) 107 | 108 | global_args = { 109 | 'divide': 2 110 | } 111 | 112 | processes = 4 113 | 114 | # run it 115 | with riomucho.RioMucho(['input1.tif','input2.tif'], 'output.tif', basic_run, 116 | windows=windows, 117 | global_args=global_args, 118 | options=options) as rm: 119 | 120 | rm.run(processes) 121 | 122 | ``` 123 | 124 | ## Utility functions 125 | 126 | ### `riomucho.utils.array_stack([array, array, array,...])` 127 | 128 | Given a list of ({depth}, {rows}, {cols}) numpy arrays, stack into a single ({list length * each image depth}, {rows}, {cols}) array. This is useful for handling variation between `rgb` inputs of a single file, or separate files for each. 129 | 130 | #### One RGB file 131 | 132 | ```python 133 | files = ['rgb.tif'] 134 | open_files = [rasterio.open(f) for f in files] 135 | rgb =riomucho.utils.array_stack([src.read() for src in open_files]) 136 | ``` 137 | 138 | #### Separate RGB files 139 | 140 | ```python 141 | files = ['r.tif', 'g.tif', 'b.tif'] 142 | open_files = [rasterio.open(f) for f in files] 143 | rgb = riomucho.utils.array_stack([src.read() for src in open_files]) 144 | ``` 145 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | rio-mucho 2 | ========= 3 | 4 | Parallel processing wrapper for rasterio 5 | 6 | |PyPI| |Build Status| |Coverage Status| 7 | 8 | Install 9 | ------- 10 | 11 | From pypi: 12 | 13 | ``pip install rio-mucho`` 14 | 15 | From github (usually for a branch / dev): 16 | 17 | ``pip install pip install git+ssh://git@github.com/mapbox/rio-mucho.git@#egg=riomucho`` 18 | 19 | Development: 20 | 21 | :: 22 | 23 | git clone git@github.com:mapbox/rio-mucho.git 24 | cd rio-mucho 25 | pip install -e . 26 | 27 | Usage 28 | ----- 29 | 30 | .. code:: python 31 | 32 | with riomucho.RioMucho([{inputs}], {output}, {run function}, 33 | windows={windows}, 34 | global_args={global arguments}, 35 | options={options to write}) as rios: 36 | 37 | rios.run({processes}) 38 | 39 | Arguments 40 | ~~~~~~~~~ 41 | 42 | ``inputs`` 43 | ^^^^^^^^^^ 44 | 45 | An list of file paths to open and read. 46 | 47 | ``output`` 48 | ^^^^^^^^^^ 49 | 50 | What file to write to. 51 | 52 | ``run_function`` 53 | ^^^^^^^^^^^^^^^^ 54 | 55 | A function to be applied to each window chunk. This should have input 56 | arguments of: 57 | 58 | 1. A data input, which can be one of: 59 | 60 | - A list of numpy arrays of shape (x,y,z), one for each file as 61 | specified in input file list ``mode="simple_read" [default]`` 62 | - A numpy array of shape ({*n* input files x *n* band count}, {window 63 | rows}, {window cols}) ``mode=array_read"`` 64 | - A list of open sources for reading ``mode="manual_read"`` 65 | 66 | 2. A ``rasterio`` window tuple 67 | 3. A ``rasterio`` window index (``ij``) 68 | 4. A global arguments object that you can use to pass in global 69 | arguments 70 | 71 | This should return: 72 | 73 | 1. An output array of ({count}, {window rows}, {window cols}) shape, and 74 | of the correct data type for writing 75 | 76 | .. code:: python 77 | 78 | def basic_run({data}, {window}, {ij}, {global args}): 79 | ## do something 80 | return {out} 81 | 82 | Keyword arguments 83 | ~~~~~~~~~~~~~~~~~ 84 | 85 | ``windows={windows}`` 86 | ^^^^^^^^^^^^^^^^^^^^^ 87 | 88 | A list of ``rasterio`` (window, ij) tuples to operate on. 89 | ``[Default = src[0].block_windows()]`` 90 | 91 | ``global_args={global arguments}`` 92 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 93 | 94 | Since this is working in parallel, any other objects / values that you 95 | want to be accessible in the ``run_function``. ``[Default = {}]`` 96 | 97 | .. code:: python 98 | 99 | global_args = { 100 | 'divide_value': 2 101 | } 102 | 103 | ``options={keyword args}`` 104 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 105 | 106 | The options to pass to the writing output. ``[Default = srcs[0].meta`` 107 | 108 | Example 109 | ------- 110 | 111 | .. code:: python 112 | 113 | import riomucho, rasterio, numpy 114 | 115 | def basic_run(data, window, ij, g_args): 116 | ## do something 117 | out = np.array( 118 | [d[0] /= global_args['divide'] for d in data] 119 | ) 120 | return out 121 | 122 | # get windows from an input 123 | with rasterio.open('/tmp/test_1.tif') as src: 124 | ## grabbing the windows as an example. Default behavior is identical. 125 | windows = [[window, ij] for ij, window in src.block_windows()] 126 | options = src.meta 127 | # since we are only writing to 2 bands 128 | options.update(count=2) 129 | 130 | global_args = { 131 | 'divide': 2 132 | } 133 | 134 | processes = 4 135 | 136 | # run it 137 | with riomucho.RioMucho(['input1.tif','input2.tif'], 'output.tif', basic_run, 138 | windows=windows, 139 | global_args=global_args, 140 | options=options) as rm: 141 | 142 | rm.run(processes) 143 | 144 | Utility functions 145 | ----------------- 146 | 147 | \`riomucho.utils.array\_stack([array, array, array,...]) 148 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 149 | 150 | Given a list of ({depth}, {rows}, {cols}) numpy arrays, stack into a 151 | single (l{list length \* each image depth}, {rows}, {cols}) array. This 152 | is useful for handling variation between ``rgb`` inputs of a single 153 | file, or separate files for each. 154 | 155 | One RGB file 156 | ^^^^^^^^^^^^ 157 | 158 | .. code:: python 159 | 160 | files = ['rgb.tif'] 161 | open_files = [rasterio.open(f) for f in files] 162 | rgb = `riomucho.utils.array_stack([src.read() for src in open_files]) 163 | 164 | Separate RGB files 165 | ^^^^^^^^^^^^^^^^^^ 166 | 167 | .. code:: python 168 | 169 | files = ['r.tif', 'g.tif', 'b.tif'] 170 | open_files = [rasterio.open(f) for f in files] 171 | rgb = `riomucho.utils.array_stack([src.read() for src in open_files]) 172 | 173 | .. |PyPI| image:: https://img.shields.io/pypi/v/rio-mucho.svg?maxAge=2592000?style=plastic 174 | :target: 175 | .. |Build Status| image:: https://travis-ci.org/mapbox/rio-mucho.svg?branch=master 176 | :target: https://travis-ci.org/mapbox/rio-mucho 177 | .. |Coverage Status| image:: https://coveralls.io/repos/mapbox/rio-mucho/badge.svg?branch=master&service=github 178 | :target: https://coveralls.io/github/mapbox/rio-mucho?branch=master 179 | -------------------------------------------------------------------------------- /examples/simple_read.py: -------------------------------------------------------------------------------- 1 | """Simple reading pattern example""" 2 | 3 | import riomucho 4 | import numpy 5 | 6 | 7 | def read_function(data, window, ij, g_args): 8 | """Takes an array, and sets any value above the mean to the max, the rest to 0""" 9 | output = (data[0] > numpy.mean(data[0])).astype(data[0].dtype) * data[0].max() 10 | return output 11 | 12 | 13 | # Open w/ simple read mode, and work in parallel. 14 | with riomucho.RioMucho( 15 | ["/tmp/test_1.tif"], "/tmp/test_z_out.tif", read_function, global_args={} 16 | ) as rm: 17 | rm.run(4) 18 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | click==6.7 2 | numpy==1.14.5 3 | rasterio==1.0rc5 4 | -------------------------------------------------------------------------------- /riomucho/__init__.py: -------------------------------------------------------------------------------- 1 | """TODO 2 | """ 3 | 4 | from __future__ import with_statement 5 | from functools import wraps 6 | from multiprocessing import Pool 7 | import sys 8 | import traceback 9 | 10 | import rasterio 11 | from rasterio.transform import guard_transform 12 | 13 | from riomucho import utils 14 | from riomucho.single_process_pool import MockTub 15 | 16 | 17 | global_args = None 18 | srcs = None 19 | 20 | 21 | class MuchoChildError(Exception): 22 | """A wrapper for exceptions in a child process. 23 | 24 | See https://bugs.python.org/issue13831 25 | """ 26 | 27 | def __init__(self): 28 | """Wrap the last exception.""" 29 | exc_type, exc_value, exc_tb = sys.exc_info() 30 | self.exception = exc_value 31 | self.formatted = "".join( 32 | traceback.format_exception(exc_type, exc_value, exc_tb) 33 | ) 34 | 35 | def __str__(self): 36 | return "{}\nChild process's traceback:\n{}".format( 37 | Exception.__str__(self), self.formatted 38 | ) 39 | 40 | 41 | def tb_capture(func): 42 | """A decorator which captures worker tracebacks. 43 | 44 | Tracebacks in particular, are captured. Inspired by an example in 45 | https://bugs.python.org/issue13831. 46 | 47 | This decorator wraps rio-mucho worker tasks. 48 | 49 | Parameters 50 | ---------- 51 | func : function 52 | A function to be decorated. 53 | 54 | Returns 55 | ------- 56 | func 57 | 58 | """ 59 | 60 | @wraps(func) 61 | def wrapper(*args, **kwds): 62 | try: 63 | return func(*args, **kwds) 64 | 65 | except Exception: 66 | raise MuchoChildError() 67 | 68 | return wrapper 69 | 70 | 71 | def init_worker(inpaths, g_args): 72 | """The multiprocessing worker initializer 73 | 74 | Parameters 75 | ---------- 76 | inpaths : list of str 77 | A list of dataset paths. 78 | g_args : dict 79 | Global arguments. 80 | 81 | Returns 82 | ------- 83 | None 84 | 85 | """ 86 | global global_args 87 | global srcs 88 | global_args = g_args 89 | srcs = [rasterio.open(i) for i in inpaths] 90 | 91 | 92 | class ReaderBase(object): 93 | """Base class for readers""" 94 | 95 | def __init__(self, user_func): 96 | """Create new instance 97 | 98 | Parameters 99 | ---------- 100 | user_func : function 101 | The user function with signature (data, window, ij, global_args) 102 | 103 | Returns 104 | ------- 105 | ReaderBase 106 | 107 | """ 108 | self.user_func = user_func 109 | 110 | 111 | class manual_reader(ReaderBase): 112 | """Warps the user's func in a manual reading pattern. 113 | """ 114 | 115 | @tb_capture 116 | def __call__(self, args): 117 | """Execute the user function.""" 118 | window, ij = args 119 | return self.user_func(srcs, window, ij, global_args), window 120 | 121 | 122 | class array_reader(ReaderBase): 123 | """Wraps the user's func in an array reading pattern. 124 | """ 125 | 126 | @tb_capture 127 | def __call__(self, args): 128 | """Execute the user function.""" 129 | window, ij = args 130 | return ( 131 | self.user_func( 132 | utils.array_stack([src.read(window=window) for src in srcs]), 133 | window, 134 | ij, 135 | global_args, 136 | ), 137 | window, 138 | ) 139 | 140 | 141 | class simple_reader(ReaderBase): 142 | """Wraps the user's func in a simple reading pattern. 143 | """ 144 | 145 | @tb_capture 146 | def __call__(self, args): 147 | """Execute the user function.""" 148 | window, ij = args 149 | return ( 150 | self.user_func( 151 | [src.read(window=window) for src in srcs], window, ij, global_args 152 | ), 153 | window, 154 | ) 155 | 156 | 157 | class RioMucho(object): 158 | """Maps a raster processing function over blocks of data. 159 | 160 | Uses a multiprocessing pool to distribute the work. 161 | """ 162 | 163 | def __init__( 164 | self, 165 | inpaths, 166 | outpath_or_dataset, 167 | run_function, 168 | mode="simple_read", 169 | windows=None, 170 | options=None, 171 | global_args=None, 172 | ): 173 | """Create a new instance 174 | 175 | Parameters 176 | ---------- 177 | inpaths : list of str 178 | A list of input dataset paths or identifiers. 179 | outpath_or_dataset: str or dataset opened in 'w' mode 180 | This parameter specifies the dataset to which results will be 181 | written. If a str, a new dataset object will be created. Otherwise 182 | the results will be written to the open dataset. 183 | run_function : function 184 | The function to be mapped. 185 | mode : str, optional 186 | One of ["simple_read", "manual_read", "array_read"]. 187 | windows : list, optional 188 | A list of windows to work on. If not overridden, this will be the 189 | block windows of the first source dataset. 190 | options : dict 191 | Creation options for the output dataset. If not overridden, this 192 | will be the profile of the first source dataset. 193 | global_args : dict 194 | Extra arguments for the user function. 195 | 196 | Returns 197 | ------- 198 | RioMucho 199 | 200 | """ 201 | self.inpaths = inpaths 202 | self.outpath_or_dataset = outpath_or_dataset 203 | self.run_function = run_function 204 | 205 | if mode not in ["simple_read", "manual_read", "array_read"]: 206 | raise ValueError( 207 | 'mode must be one of: ["simple_read", "manual_read", "array_read"]' 208 | ) 209 | 210 | else: 211 | self.mode = mode 212 | 213 | self.windows = windows or utils.getWindows(inpaths[0]) 214 | self.options = options or utils.getOptions(inpaths[0]) 215 | self.global_args = global_args or {} 216 | 217 | def __enter__(self): 218 | return self 219 | 220 | def __exit__(self, ext_t, ext_v, trace): 221 | pass 222 | 223 | def run(self, processes=4): 224 | """TODO""" 225 | if processes == 1: 226 | self.pool = MockTub(init_worker, (self.inpaths, self.global_args)) 227 | else: 228 | self.pool = Pool(processes, init_worker, (self.inpaths, self.global_args)) 229 | 230 | self.options["transform"] = guard_transform(self.options["transform"]) 231 | 232 | if self.mode == "manual_read": 233 | reader_worker = manual_reader(self.run_function) 234 | elif self.mode == "array_read": 235 | reader_worker = array_reader(self.run_function) 236 | else: 237 | reader_worker = simple_reader(self.run_function) 238 | 239 | if isinstance(self.outpath_or_dataset, rasterio.io.DatasetWriter): 240 | destination = self.outpath_or_dataset 241 | else: 242 | destination = rasterio.open(self.outpath_or_dataset, "w", **self.options) 243 | 244 | # Open an output file, work through the function in parallel, 245 | # and write out the data. 246 | with destination as dst: 247 | for data, window in self.pool.imap_unordered(reader_worker, self.windows): 248 | dst.write(data, window=window) 249 | 250 | self.pool.close() 251 | self.pool.join() 252 | -------------------------------------------------------------------------------- /riomucho/single_process_pool.py: -------------------------------------------------------------------------------- 1 | """Multiprocessing Pool test double""" 2 | 3 | 4 | class MockTub(object): 5 | """Class to mock multiprocessing.Pool 6 | """ 7 | 8 | def __init__(self, main_worker, args): 9 | """Create a new instance""" 10 | main_worker(*args) 11 | 12 | def imap_unordered(self, func, iterable): 13 | """Map the func over the iterable""" 14 | for item in iterable: 15 | yield func(item) 16 | 17 | def close(self): 18 | """The pool is closed""" 19 | pass 20 | 21 | def join(self): 22 | """Everybody out of the pool""" 23 | pass 24 | -------------------------------------------------------------------------------- /riomucho/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions 2 | """ 3 | 4 | import rasterio 5 | import numpy as np 6 | 7 | 8 | def getOptions(input): 9 | """Get a source's profile""" 10 | with rasterio.open(input) as src: 11 | return src.profile 12 | 13 | 14 | def getWindows(input): 15 | """Get a source's windows""" 16 | with rasterio.open(input) as src: 17 | return [[window, ij] for ij, window in src.block_windows()] 18 | 19 | 20 | def array_stack(arrays): 21 | """Stack arrays""" 22 | shapes = np.array([a.shape for a in arrays]) 23 | 24 | if not np.all(np.roll(shapes[:, 1:], 1, axis=0) == shapes[:, 1:]): 25 | raise ValueError( 26 | "All input arrays must have the same height and width for this mode" 27 | ) 28 | 29 | width = arrays[0].shape[-1] 30 | height = arrays[0].shape[-2] 31 | 32 | return np.array([a for subarray in arrays for a in subarray]).reshape( 33 | shapes[:, 0].sum(), height, width 34 | ) 35 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """rio_mucho setup script.""" 2 | 3 | import os 4 | from codecs import open as codecs_open 5 | from setuptools import setup, find_packages 6 | 7 | 8 | # Get the long description from the relevant file 9 | with codecs_open("README.rst", encoding="utf-8") as f: 10 | long_description = f.read() 11 | 12 | 13 | def read(fname): 14 | """Read a file's contents.""" 15 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 16 | 17 | 18 | setup( 19 | name="rio-mucho", 20 | version="1.0.0", 21 | description=u"Windowed multiprocessing wrapper for rasterio", 22 | long_description=long_description, 23 | classifiers=[ 24 | "Programming Language :: Python :: 2.7", 25 | "Programming Language :: Python :: 3.5", 26 | "Programming Language :: Python :: 3.6", 27 | ], 28 | keywords="", 29 | author=u"Damon Burgett", 30 | author_email="damon@mapbox.com", 31 | url="https://github.com/mapbox/rio-mucho", 32 | license="MIT", 33 | packages=find_packages(exclude=["ez_setup", "examples", "tests"]), 34 | include_package_data=True, 35 | zip_safe=False, 36 | install_requires=["numpy", "rasterio~=1.0"], 37 | extras_require={"test": ["pytest", "pytest-cov", "coveralls"]}, 38 | ) 39 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Testing fixtures""" 2 | 3 | import rasterio 4 | from rasterio import Affine 5 | import numpy as np 6 | import pytest 7 | 8 | 9 | def makeTesting(output, size, windowsize, bands): 10 | """Construct test fixture""" 11 | kwargs = { 12 | "count": bands, 13 | "crs": {"init": u"epsg:3857"}, 14 | "dtype": "uint8", 15 | "driver": u"GTiff", 16 | "transform": Affine( 17 | 4.595839562240513, 18 | 0.0, 19 | -13550756.3744, 20 | 0.0, 21 | -4.595839562240513, 22 | 6315533.02503, 23 | ), 24 | "height": size, 25 | "width": size, 26 | "compress": "lzw", 27 | "blockxsize": windowsize, 28 | "blockysize": windowsize, 29 | "tiled": True, 30 | } 31 | 32 | randArr = np.array( 33 | [(np.random.rand(size, size) * 255).astype(np.uint8) for i in range(bands)] 34 | ) 35 | 36 | with rasterio.open(output, "w", **kwargs) as dst: 37 | dst.write(randArr) 38 | 39 | 40 | @pytest.fixture(scope="session") 41 | def test_1_tif(tmpdir_factory): 42 | """Source dataset number 1""" 43 | fn = tmpdir_factory.mktemp("data").join("test_1.tif") 44 | makeTesting(str(fn), 512, 256, 1) 45 | return fn 46 | 47 | 48 | @pytest.fixture(scope="session") 49 | def test_2_tif(tmpdir_factory): 50 | """Source dataset number 2""" 51 | fn = tmpdir_factory.mktemp("data").join("test_2.tif") 52 | makeTesting(str(fn), 512, 256, 1) 53 | return fn 54 | -------------------------------------------------------------------------------- /tests/test_mod.py: -------------------------------------------------------------------------------- 1 | import riomucho 2 | import rasterio 3 | import numpy 4 | import pytest 5 | 6 | from rasterio.errors import RasterioIOError 7 | 8 | 9 | def read_function_manual(open_files, window, ij, g_args): 10 | """A user function for testing""" 11 | return numpy.array([f.read(window=window)[0] for f in open_files]) 12 | 13 | 14 | def test_riomucho_manual(tmpdir, test_1_tif, test_2_tif): 15 | """Distribution of a manual read user function succeeds""" 16 | with rasterio.open(str(test_1_tif)) as src: 17 | windows = [[window, ij] for ij, window in src.block_windows()] 18 | options = src.meta 19 | options.update(count=2) 20 | 21 | with riomucho.RioMucho( 22 | [str(test_1_tif), str(test_2_tif)], 23 | str(tmpdir.join("test_xyz_out.tif")), 24 | read_function_manual, 25 | windows=windows, 26 | global_args={}, 27 | options=options, 28 | mode="manual_read", 29 | ) as rm: 30 | rm.run(4) 31 | 32 | with rasterio.open(str(test_1_tif)) as inumpyutsrc: 33 | with rasterio.open(str(tmpdir.join("test_xyz_out.tif"))) as outputsrc: 34 | assert inumpyutsrc.checksum(1) == outputsrc.checksum(1) 35 | 36 | 37 | def read_function_simple(data, window, ij, g_args): 38 | """A user function for testing""" 39 | data[0][:10, :10] = 0 40 | return data[0] 41 | 42 | 43 | def test_riomucho_simple(tmpdir, test_1_tif): 44 | """Distribution of a simple user function works""" 45 | with riomucho.RioMucho( 46 | [str(test_1_tif)], str(tmpdir.join("test_xyz_out.tif")), read_function_simple 47 | ) as rm: 48 | rm.run(1) 49 | 50 | with rasterio.open(str(tmpdir.join("test_xyz_out.tif"))) as outputsrc: 51 | assert numpy.sum(outputsrc.read(1)[:10, :10] != 0) == 0 52 | 53 | 54 | def test_riomucho_simple_fail(tmpdir): 55 | """Invalid source file fails normally""" 56 | with pytest.raises(RasterioIOError): 57 | with riomucho.RioMucho( 58 | ["test_999.tif"], str(tmpdir.join("test_xyz_out.tif")), read_function_simple 59 | ) as rm: 60 | rm.run(1) 61 | 62 | 63 | def read_function_arrayread(data, window, ij, g_args): 64 | """An array reading user function for testing""" 65 | return data 66 | 67 | 68 | def test_riomucho_arrayread(tmpdir, test_1_tif, test_2_tif): 69 | """Distribution of an array reading user function works""" 70 | with rasterio.open(str(test_1_tif)) as src: 71 | options = src.profile 72 | options.update(count=2) 73 | 74 | with riomucho.RioMucho( 75 | [str(test_1_tif), str(test_2_tif)], 76 | str(tmpdir.join("test_xyz_out.tif")), 77 | read_function_arrayread, 78 | mode="array_read", 79 | options=options, 80 | ) as rm: 81 | rm.run(4) 82 | 83 | with rasterio.open(str(test_1_tif)) as inumpyutsrc1: 84 | with rasterio.open(str(test_2_tif)) as inumpyutsrc2: 85 | with rasterio.open(str(tmpdir.join("test_xyz_out.tif"))) as outputsrc: 86 | assert inumpyutsrc1.checksum(1) == outputsrc.checksum(1) 87 | assert inumpyutsrc2.checksum(1) == outputsrc.checksum(2) 88 | 89 | 90 | def test_riomucho_readmode_fail(tmpdir, test_1_tif): 91 | """Invalid mode fails with ValueError""" 92 | with pytest.raises(ValueError): 93 | with riomucho.RioMucho( 94 | [str(test_1_tif)], 95 | str(tmpdir.join("test_xyz_out.tif")), 96 | read_function_arrayread, 97 | mode="mucho_gusto", 98 | ) as rm: 99 | rm.run(4) 100 | 101 | 102 | def makeRandomArrays(maxsize=100): 103 | """Make random arrays""" 104 | # TODO: hypothesize this? 105 | width = int(numpy.random.rand() * maxsize) + 1 106 | height = int(numpy.random.rand() * maxsize) + 1 107 | inumpyuts = int(numpy.random.rand() * 4 + 1) 108 | counts = [int(numpy.random.rand() * 3 + 1) for i in range(inumpyuts)] 109 | array_list = [numpy.zeros((i, height, width)) for i in counts] 110 | expected_shape = tuple((sum(counts), height, width)) 111 | return array_list, expected_shape 112 | 113 | 114 | def test_arraystack(): 115 | """Array stacker works""" 116 | t_array_list, expected_shape = makeRandomArrays() 117 | stacked = riomucho.utils.array_stack(t_array_list) 118 | assert stacked.shape == expected_shape 119 | 120 | 121 | def test_bad_arraystack(): 122 | """Stacking an array of wrong shape fails with ValueError""" 123 | t_array_list, expected_shape = makeRandomArrays() 124 | t_array_list.append(numpy.zeros((1, 1, 1))) 125 | with pytest.raises(ValueError): 126 | riomucho.utils.array_stack(t_array_list) 127 | 128 | 129 | def fail(data, window, ij, g_args): 130 | """User functions must be defined at the top of a module.""" 131 | return data * (1 / 0) 132 | 133 | 134 | def test_pool_worker_traceback_capture(tmpdir, test_1_tif, test_2_tif): 135 | """Worker tracebacks are captured""" 136 | with rasterio.open(str(test_1_tif)) as src: 137 | options = src.profile 138 | options.update(count=2) 139 | 140 | with riomucho.RioMucho( 141 | [str(test_1_tif), str(test_2_tif)], 142 | str(tmpdir.join("output.tif")), 143 | fail, 144 | mode="array_read", 145 | options=options, 146 | ) as rm: 147 | with pytest.raises(riomucho.MuchoChildError) as excinfo: 148 | rm.run(4) 149 | 150 | assert "ZeroDivisionError" in str(excinfo.value) 151 | 152 | 153 | def test_tb_capture(): 154 | """Exception in a job is captured""" 155 | 156 | @riomucho.tb_capture 157 | def foo(*args, **kwargs): 158 | return 1 / 0 159 | 160 | with pytest.raises(riomucho.MuchoChildError) as excinfo: 161 | foo() 162 | assert "ZeroDivisionError" in str(excinfo.value) 163 | 164 | 165 | def test_riomucho_simple_dataset_object(tmpdir, test_1_tif): 166 | """We can pass an open dataset for output""" 167 | with rasterio.open(str(test_1_tif)) as src: 168 | options = src.profile 169 | 170 | with rasterio.open(str(tmpdir.join("output.tif")), "w", **options) as dst: 171 | with riomucho.RioMucho([str(test_1_tif)], dst, read_function_simple) as rm: 172 | rm.run(1) 173 | 174 | with rasterio.open(str(tmpdir.join("output.tif"))) as outputsrc: 175 | assert numpy.sum(outputsrc.read(1)[:10, :10] != 0) == 0 176 | --------------------------------------------------------------------------------