├── .coveragerc ├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── cogdumper ├── __init__.py ├── cog_tiles.py ├── errors.py ├── filedumper.py ├── httpdumper.py ├── jpegreader.py ├── s3dumper.py ├── scripts │ ├── __init__.py │ └── cli.py └── tifftags.py ├── requirements.txt ├── setup.py └── tests ├── data ├── BigTIFF.tif ├── be_cog.tif └── cog.tif └── test_filedumper.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | cogdumper/filedumper.py 4 | cogdumper/httpdumper.py 5 | cogdumper/s3dumper.py 6 | cogdumper/jpegreader.py 7 | cogdumper/scripts/cli.py 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | cache: 4 | directories: 5 | - ~/.cache/pip 6 | 7 | python: 8 | - "3.6" 9 | before_install: 10 | - pip install -U pip 11 | install: 12 | - "pip install -r requirements.txt" 13 | - "pip install -e .[test]" 14 | script: 15 | - python -m pytest --cov=cogdumper --cov-report term-missing -vv 16 | - cd tests && python -m codecov 17 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | 1.1.0 (2018-04-24) 5 | ------------------ 6 | - Update CLI 7 | 8 | 1.0.0 (2018-03-30) 9 | ------------------ 10 | - initial release 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Mapbox 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapbox/COGDumper/89a5f05fc0ed88c36f44e42dfe8d48e4c4ff389b/MANIFEST.in -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # COG Dumper 2 | 3 | [![Build Status](https://travis-ci.org/mapbox/COGDumper.svg?branch=master)](https://travis-ci.org/mapbox/COGDumper) [![codecov](https://codecov.io/gh/mapbox/COGDumper/branch/master/graph/badge.svg?token=Yd3y5aTvGo)](https://codecov.io/gh/mapbox/COGDumper) 4 | 5 | A python (3.6) utility to extract a tile from a Cloud Optimized GeoTIFF (COG) without decompressing the contained data. Tiff data can be hosted locally, on a web server or S3. 6 | 7 | This can be useful for serving compressed tiles from a TIFF without invoking Rasterio and GDAL. This utility has been tested with Tiff that have JPEG compression. 8 | 9 | Tiled Tiff is an extension to TIFF 6.0 and more detail can be found [here](http://www.alternatiff.com/resources/TIFFphotoshop.pdf) 10 | 11 | Note that tiles are padded at the edge of an image. This requires an image [mask](https://trac.osgeo.org/gdal/wiki/rfc15_nodatabitmask) to be defined if tile sizes do not align with the image width / height (as in the test data which demonstrates this effect). 12 | 13 | 14 | ## Data Preparation 15 | 16 | Read the document on [COG](https://trac.osgeo.org/gdal/wiki/CloudOptimizedGeoTIFF) and create a tiled pyramid GeoTIFF. 17 | 18 | For example; 19 | 20 | ``` 21 | gdal_translate SENTINEL2_L1C:S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441.SAFE/MTD_MSIL1C.xml:TCI:EPSG_32630 \ 22 | S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441_TCI.tif \ 23 | -co TILED=YES -co COMPRESS=DEFLATE 24 | gdaladdo -r average S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441_TCI.tif 2 4 8 16 32 25 | gdal_translate S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441_TCI.tif \ 26 | S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441_TCI_cloudoptimized_2.tif \ 27 | -co TILED=YES -co COMPRESS=JPEG -co PHOTOMETRIC=YCBCR -co COPY_SRC_OVERVIEWS=YES 28 | ``` 29 | 30 | This library also works with a file hosted in an S3 bucket. 31 | 32 | ## Installation 33 | 34 | Python 3.6 is required. 35 | 36 | ``` 37 | pip install cogdumper 38 | ``` 39 | 40 | or from source 41 | 42 | ``` 43 | git clone https://github.com/mapbox/COGDumper 44 | cd COGDumper 45 | pip install . 46 | ``` 47 | 48 | ## Command line interface 49 | 50 | ``` 51 | $ cogdumper --help 52 | Usage: cogdumper [OPTIONS] COMMAND [ARGS]... 53 | 54 | Command line interface for COGDumper. 55 | 56 | Options: 57 | --version Show the version and exit. 58 | --help Show this message and exit. 59 | 60 | Commands: 61 | file COGDumper cli for local dataset. 62 | http COGDumper cli for web hosted dataset. 63 | s3 COGDumper cli for AWS S3 hosted dataset 64 | ``` 65 | 66 | ##### local files 67 | ``` 68 | cogdumper file --help 69 | Usage: cogdumper file [OPTIONS] 70 | 71 | COGDumper cli for local dataset. 72 | 73 | Options: 74 | --file PATH input file [required] 75 | --output PATH local output directory 76 | --xyz INTEGER... xyz tile coordinate where z is the overview level 77 | --version Show the version and exit. 78 | --help Show this message and exit. 79 | ``` 80 | e.g. `cogdumper file --file data/cog.tif --xyz 0 0 0` 81 | 82 | ##### web files 83 | 84 | ``` 85 | cogdumper http --help 86 | Usage: cogdumper http [OPTIONS] 87 | 88 | COGDumper cli for web hosted dataset. 89 | 90 | Options: 91 | --server TEXT server e.g. http://localhost:8080 [required] 92 | --path TEXT server path 93 | --resource TEXT server resource 94 | --output DIRECTORY local output directory 95 | --xyz INTEGER... xyz tile coordinates where z is the overview level 96 | --version Show the version and exit. 97 | --help Show this message and exit. 98 | ``` 99 | 100 | e.g. `cogdumper http --server http://localhost:8080 --path data --resource cog.tif` 101 | 102 | ##### S3 files 103 | ``` 104 | cogdumper s3 --help 105 | Usage: cogdumper s3 [OPTIONS] 106 | 107 | COGDumper cli for AWS S3 hosted dataset 108 | 109 | Options: 110 | --bucket TEXT AWS S3 bucket [required] 111 | --key TEXT AWS S3 key [required] 112 | --output DIRECTORY local output directory 113 | --xyz INTEGER... xyz tile coordinates where z is the overview level 114 | --help Show this message and exit. 115 | ``` 116 | 117 | e.g. `cogdumper s3 --bucket bucket_name --key key_name/image.tif --xyz 0 0 0` 118 | -------------------------------------------------------------------------------- /cogdumper/__init__.py: -------------------------------------------------------------------------------- 1 | # cogdumper 2 | __version__ = '1.1.0' 3 | -------------------------------------------------------------------------------- /cogdumper/cog_tiles.py: -------------------------------------------------------------------------------- 1 | """Function for extracting tiff tiles.""" 2 | 3 | import os 4 | 5 | from abc import abstractmethod 6 | from math import ceil 7 | import struct 8 | 9 | from cogdumper.errors import TIFFError 10 | from cogdumper.jpegreader import insert_tables 11 | from cogdumper.tifftags import compression as CompressionType 12 | from cogdumper.tifftags import sizes as TIFFSizes 13 | from cogdumper.tifftags import tags as TIFFTags 14 | 15 | 16 | class AbstractReader: # pragma: no cover 17 | @abstractmethod 18 | def read(offset, len): 19 | pass 20 | 21 | 22 | class COGTiff: 23 | """ 24 | Cloud Optimised GeoTIFF 25 | ----- 26 | Format 27 | TIFF / BigTIFF signature 28 | IFD (Image File Directory) of full resolution image 29 | Values of TIFF tags that don't fit inline in the IFD directory, such as TileOffsets, TileByteCounts and GeoTIFF keys 30 | Optional: IFD (Image File Directory) of first overview (typically subsampled by a factor of 2), followed by the values of its tags that don't fit inline 31 | Optional: IFD (Image File Directory) of second overview (typically subsampled by a factor of 4), followed by the values of its tags that don't fit inline 32 | ... 33 | Optional: IFD (Image File Directory) of last overview (typically subsampled by a factor of 2N), followed by the values of its tags that don't fit inline 34 | Optional: tile content of last overview level 35 | ... 36 | Optional: tile content of first overview level 37 | Tile content of full resolution image. 38 | """ 39 | def __init__(self, reader): 40 | """Parses a (Big)TIFF for image tiles. 41 | Parameters 42 | ---------- 43 | reader: 44 | A reader that implements the cogdumper.cog_tiles.AbstractReader methods 45 | """ 46 | self._endian = '<' 47 | self._version = 42 48 | self.read = reader 49 | self._big_tiff = False 50 | self.header = '' 51 | self._offset = 0 52 | self._image_ifds = [] 53 | self._mask_ifds = [] 54 | 55 | self.read_header() 56 | 57 | def _ifds(self): 58 | """Reads TIFF image file directories from a COG recursively. 59 | Parameters 60 | ----------- 61 | offset: 62 | number, offset into the tiff stream to read from, this is only 63 | required for the first image file directory 64 | overview: 65 | number, an identifier that is the overview level in the COG 66 | image pyramid 67 | Yield 68 | -------- 69 | dict: Image File Directory for the next IFD 70 | """ 71 | while self._offset != 0: 72 | next_offset = 0 73 | pos = 0 74 | tags = [] 75 | 76 | fallback_size = 4096 if self._big_tiff else 1024 77 | if self._offset > len(self.header): 78 | byte_starts = len(self.header) 79 | byte_ends = byte_starts + self._offset + fallback_size 80 | self.header += self.read(byte_starts, byte_ends) 81 | 82 | if self._big_tiff: 83 | bytes = self.header[self._offset: self._offset + 8] 84 | num_tags = struct.unpack(f'{self._endian}Q', bytes)[0] 85 | 86 | byte_starts = self._offset + 8 87 | byte_ends = (num_tags * 20) + 8 + byte_starts 88 | if byte_ends > len(self.header): 89 | s = len(self.header) 90 | self.header += self.read(s, byte_ends) 91 | 92 | bytes = self.header[byte_starts: byte_ends] 93 | 94 | for t in range(0, num_tags): 95 | code = struct.unpack( 96 | f'{self._endian}H', 97 | bytes[pos: pos + 2] 98 | )[0] 99 | 100 | if code in TIFFTags: 101 | dtype = struct.unpack( 102 | f'{self._endian}H', 103 | bytes[pos + 2: pos + 4] 104 | )[0] 105 | 106 | if dtype not in TIFFSizes: # pragma: no cover 107 | raise TIFFError(f'Unrecognised data type {dtype}') 108 | 109 | num_values = struct.unpack( 110 | f'{self._endian}Q', 111 | bytes[pos + 4: pos + 12] 112 | )[0] 113 | tag_len = num_values * TIFFSizes[dtype]['size'] 114 | if tag_len <= 8: 115 | data = bytes[pos + 12: pos + 12 + tag_len] 116 | else: # pragma: no cover 117 | data_offset = struct.unpack( 118 | f'{self._endian}Q', 119 | bytes[pos + 12: pos + 20] 120 | )[0] 121 | 122 | byte_starts = data_offset 123 | byte_ends = byte_starts + tag_len 124 | if byte_ends > len(self.header): 125 | s = len(self.header) 126 | self.header += self.read(s, byte_ends) 127 | 128 | data = self.header[byte_starts: byte_ends] 129 | 130 | tags.append( 131 | { 132 | 'code': code, 133 | 'dtype': TIFFSizes[dtype], 134 | 'num_values': num_values, 135 | 'data': data 136 | } 137 | ) 138 | 139 | pos = pos + 20 140 | 141 | self._offset = self._offset + 8 + pos 142 | next_offset = struct.unpack( 143 | f'{self._endian}Q', 144 | self.header[self._offset: self._offset + 8] 145 | )[0] 146 | else: 147 | bytes = self.header[self._offset: self._offset + 2] 148 | num_tags = struct.unpack(f'{self._endian}H', bytes)[0] 149 | 150 | byte_starts = self._offset + 2 151 | byte_ends = (num_tags * 12) + 2 + byte_starts 152 | if byte_ends > len(self.header): 153 | s = len(self.header) 154 | self.header += self.read(s, byte_ends) 155 | 156 | bytes = self.header[byte_starts: byte_ends] 157 | 158 | for t in range(0, num_tags): 159 | code = struct.unpack( 160 | f'{self._endian}H', 161 | bytes[pos: pos + 2] 162 | )[0] 163 | 164 | if code in TIFFTags: 165 | dtype = struct.unpack( 166 | f'{self._endian}H', 167 | bytes[pos + 2: pos + 4] 168 | )[0] 169 | 170 | if dtype not in TIFFSizes: # pragma: no cover 171 | raise TIFFError(f'Unrecognised data type {dtype}') 172 | 173 | num_values = struct.unpack( 174 | f'{self._endian}L', 175 | bytes[pos + 4: pos + 8] 176 | )[0] 177 | tag_len = num_values * TIFFSizes[dtype]['size'] 178 | if tag_len <= 4: 179 | data = bytes[pos + 8: pos + 8 + tag_len] 180 | else: 181 | data_offset = struct.unpack( 182 | f'{self._endian}L', 183 | bytes[pos + 8: pos + 12] 184 | )[0] 185 | 186 | byte_starts = data_offset 187 | byte_ends = byte_starts + tag_len 188 | if byte_ends > len(self.header): 189 | s = len(self.header) 190 | self.header += self.read(s, byte_ends) 191 | data = self.header[byte_starts: byte_ends] 192 | 193 | tags.append( 194 | { 195 | 'code': code, 196 | 'dtype': TIFFSizes[dtype], 197 | 'num_values': num_values, 198 | 'data': data 199 | } 200 | ) 201 | 202 | pos = pos + 12 203 | 204 | self._offset = self._offset + 2 + pos 205 | next_offset = struct.unpack( 206 | f'{self._endian}L', 207 | self.header[self._offset: self._offset + 4] 208 | )[0] 209 | 210 | self._offset = next_offset 211 | 212 | yield { 213 | 'tags': tags, 214 | 'next_offset': next_offset 215 | } 216 | 217 | def read_header(self): 218 | """Read and parse COG header.""" 219 | buff_size = int(os.environ.get('COG_INGESTED_BYTES_AT_OPEN', '16384')) 220 | self.header = self.read(0, buff_size) 221 | 222 | # read first 4 bytes to determine tiff or bigtiff and byte order 223 | if self.header[:2] == b'MM': 224 | self._endian = '>' 225 | 226 | self._version = struct.unpack(f'{self._endian}H', self.header[2:4])[0] 227 | 228 | if self._version == 42: 229 | # TIFF 230 | self._big_tiff = False 231 | # read offset to first IFD 232 | self._offset = struct.unpack(f'{self._endian}L', self.header[4:8])[0] 233 | elif self._version == 43: 234 | # BIGTIFF 235 | self._big_tiff = True 236 | bytes = self.header[4:16] 237 | bytesize = struct.unpack(f'{self._endian}H', bytes[0:2])[0] 238 | w = struct.unpack(f'{self._endian}H', bytes[2:4])[0] 239 | self._offset = struct.unpack(f'{self._endian}Q', bytes[4:])[0] 240 | if bytesize != 8 or w != 0: # pragma: no cover 241 | raise TIFFError(f"Invalid BigTIFF with bytesize {bytesize} and word {w}") 242 | else: # pragma: no cover 243 | raise TIFFError(f"Invalid version {self._version} for TIFF file") 244 | 245 | self._init = True 246 | 247 | # for JPEG we need to read all IFDs, they are at the front of the file 248 | for ifd in self._ifds(): 249 | mime_type = 'image/jpeg' 250 | # tile offsets are an extension but if they aren't in the file then 251 | # you can't get a tile back! 252 | offsets = [] 253 | byte_counts = [] 254 | image_width = 0 255 | image_height = 0 256 | tile_width = 0 257 | tile_height = 0 258 | jpeg_tables = None 259 | 260 | for t in ifd['tags']: 261 | code = t['code'] 262 | fmt = t['dtype']['format'] 263 | if code == 256: 264 | # image width 265 | image_width = struct.unpack( 266 | f'{self._endian}{fmt}', 267 | t['data'] 268 | )[0] 269 | elif code == 257: 270 | # image height 271 | image_height = struct.unpack( 272 | f'{self._endian}{fmt}', 273 | t['data'] 274 | )[0] 275 | elif code == 259: 276 | # compression 277 | val = struct.unpack( 278 | f'{self._endian}{fmt}', 279 | t['data'] 280 | )[0] 281 | if val in CompressionType: 282 | mime_type = CompressionType[val] 283 | else: 284 | mime_type = 'application/octet-stream' 285 | elif code == 322: 286 | # tile width 287 | tile_width = struct.unpack( 288 | f'{self._endian}{fmt}', 289 | t['data'] 290 | )[0] 291 | elif code == 323: 292 | # tile height 293 | tile_height = struct.unpack( 294 | f'{self._endian}{fmt}', 295 | t['data'] 296 | )[0] 297 | elif code == 324: 298 | # tile offsets 299 | offsets = struct.unpack( 300 | f'{self._endian}{t["num_values"]}{fmt}', 301 | t['data'] 302 | ) 303 | elif code == 325: 304 | # tile byte counts 305 | byte_counts = struct.unpack( 306 | f'{self._endian}{t["num_values"]}{fmt}', 307 | t['data'] 308 | ) 309 | elif code == 347: 310 | # JPEG Tables 311 | jpeg_tables = t['data'] 312 | 313 | if len(offsets) == 0: 314 | raise TIFFError('TIFF Tiles are not found in IFD {z}') 315 | 316 | ifd['image_width'] = image_width 317 | ifd['image_height'] = image_height 318 | ifd['compression'] = mime_type 319 | ifd['tile_width'] = tile_width 320 | ifd['tile_height'] = tile_height 321 | ifd['offsets'] = offsets 322 | ifd['byte_counts'] = byte_counts 323 | ifd['jpeg_tables'] = jpeg_tables 324 | 325 | ifd['nx_tiles'] = ceil(image_width / float(tile_width)) 326 | ifd['ny_tiles'] = ceil(image_height / float(tile_height)) 327 | 328 | if (ifd['compression'] == 'deflate'): 329 | self._mask_ifds.append(ifd) 330 | else: 331 | self._image_ifds.append(ifd) 332 | 333 | if len(self._image_ifds) == 0 and len(self._mask_ifds) > 0: # pragma: no cover 334 | self._image_ifds = self._mask_ifds 335 | self._mask_ifds = [] 336 | 337 | def get_tile(self, x, y, z): 338 | """Read tile data.""" 339 | if z < len(self._image_ifds): 340 | image_ifd = self._image_ifds[z] 341 | idx = (y * image_ifd['ny_tiles']) + x 342 | if idx > len(image_ifd['offsets']): 343 | raise TIFFError(f'Tile {x} {y} {z} does not exist') 344 | else: 345 | offset = image_ifd['offsets'][idx] 346 | byte_count = image_ifd['byte_counts'][idx] 347 | tile = self.read(offset, byte_count) 348 | if image_ifd['compression'] == 'image/jpeg': 349 | # fix up jpeg tile with missing quantization tables 350 | tile = insert_tables(tile, image_ifd['jpeg_tables']) 351 | # look for a bit mask file 352 | if z < len(self._mask_ifds): 353 | mask_ifd = self._mask_ifds[z] 354 | mask_offset = mask_ifd['offsets'][idx] 355 | mask_byte_count = mask_ifd['byte_counts'][idx] 356 | mask_tile = self.read( 357 | mask_offset, 358 | mask_byte_count 359 | ) 360 | tile = tile + mask_tile 361 | return image_ifd['compression'], tile 362 | else: 363 | return image_ifd['compression'], tile 364 | else: 365 | raise TIFFError(f'Overview {z} is out of bounds.') 366 | 367 | @property 368 | def version(self): 369 | return self._version 370 | -------------------------------------------------------------------------------- /cogdumper/errors.py: -------------------------------------------------------------------------------- 1 | """TIFF read exceptions.""" 2 | 3 | class TIFFError(Exception): 4 | exit_code = 1 5 | 6 | def __init__(self, message): 7 | self.message = message 8 | 9 | class JPEGError(Exception): 10 | exit_code = 1 11 | 12 | def __init__(self, message): 13 | self.message = message 14 | -------------------------------------------------------------------------------- /cogdumper/filedumper.py: -------------------------------------------------------------------------------- 1 | """A utility to dump tiles directly from a local tiff file.""" 2 | 3 | import logging 4 | from cogdumper.cog_tiles import AbstractReader 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | class Reader(AbstractReader): 10 | """Wraps the remote COG.""" 11 | 12 | def __init__(self, handle): 13 | self._handle = handle 14 | 15 | def read(self, offset, length): 16 | start = offset 17 | stop = offset + length - 1 18 | logger.info(f'Reading bytes: {start} to {stop}') 19 | self._handle.seek(offset) 20 | return self._handle.read(length) 21 | -------------------------------------------------------------------------------- /cogdumper/httpdumper.py: -------------------------------------------------------------------------------- 1 | """A utility to dump tiles directly from a tiff file on a http server.""" 2 | 3 | import logging 4 | 5 | import requests 6 | from requests.auth import HTTPBasicAuth 7 | 8 | from cogdumper.errors import TIFFError 9 | from cogdumper.cog_tiles import AbstractReader 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class Reader(AbstractReader): 15 | """Wraps the remote COG.""" 16 | 17 | def __init__(self, server, path, resource, user=None, password=None): 18 | self.server = server 19 | self.path = path 20 | self.resource = resource 21 | if path: 22 | self.url = f'{server}/{path}/{resource}' 23 | else: 24 | self.url = f'{server}/{resource}' 25 | if user: 26 | self.auth = HTTPBasicAuth(user, password) 27 | else: 28 | self.auth = None 29 | 30 | self._resource_exists = True 31 | 32 | self.session = requests.Session() 33 | r = self.session.head(self.url, auth=self.auth) 34 | if r.status_code != requests.codes.ok: 35 | self._resource_exists = False 36 | 37 | @property 38 | def resource_exists(self): 39 | return self._resource_exists 40 | 41 | def read(self, offset, length): 42 | start = offset 43 | stop = offset + length - 1 44 | logger.info(f'Reading bytes: {start} to {stop}') 45 | headers = {'Range': f'bytes={start}-{stop}'} 46 | r = self.session.get(self.url, auth=self.auth, headers=headers) 47 | if r.status_code != requests.codes.partial_content: 48 | raise TIFFError(f'HTTP byte range {offset}-{length} ' 49 | 'not available. HTTP code {r.status_code}') 50 | else: 51 | return r.content 52 | -------------------------------------------------------------------------------- /cogdumper/jpegreader.py: -------------------------------------------------------------------------------- 1 | """Simple JPEG reader for inserting missing markers.""" 2 | 3 | from cogdumper.errors import JPEGError 4 | 5 | 6 | SOI = 0xd8 7 | 8 | def insert_tables(data, tables): 9 | if tables: 10 | if data[0] == 0xFF and data[1] == SOI: 11 | # insert tables, first removing the SOI and EOI 12 | return data[0:2] + tables[2:-2] + data[2:] 13 | else: 14 | raise JPEGError('Missing SOI marker for JPEG tile') 15 | else: 16 | # no-op as per the spec, segment contains all of the JPEG data required 17 | return data 18 | -------------------------------------------------------------------------------- /cogdumper/s3dumper.py: -------------------------------------------------------------------------------- 1 | """A utility to dump tiles directly from a tiff file in an S3 bucket.""" 2 | 3 | import os 4 | import logging 5 | 6 | import boto3 7 | 8 | from cogdumper.cog_tiles import AbstractReader 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | region = os.environ.get('AWS_REGION', 'us-east-1') 13 | s3 = boto3.resource('s3', region_name=region) 14 | 15 | 16 | class Reader(AbstractReader): 17 | """Wraps the remote COG.""" 18 | 19 | def __init__(self, bucket_name, key): 20 | """Init reader object.""" 21 | self.bucket = bucket_name 22 | self.key = key 23 | self.source = s3.Object(self.bucket, self.key) 24 | 25 | def read(self, offset, length): 26 | """Read method.""" 27 | start = offset 28 | stop = offset + length - 1 29 | logger.info(f'Reading bytes: {start} to {stop}') 30 | r = self.source.get(Range=f'bytes={start}-{stop}') 31 | return r['Body'].read() 32 | -------------------------------------------------------------------------------- /cogdumper/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | """scripts.""" 2 | -------------------------------------------------------------------------------- /cogdumper/scripts/cli.py: -------------------------------------------------------------------------------- 1 | """cli.""" 2 | import logging 3 | import mimetypes 4 | 5 | import click 6 | 7 | from cogdumper import __version__ as cogdumper_version 8 | from cogdumper.cog_tiles import COGTiff 9 | from cogdumper.s3dumper import Reader as S3Reader 10 | from cogdumper.httpdumper import Reader as HTTPReader 11 | from cogdumper.filedumper import Reader as FileReader 12 | 13 | 14 | @click.group(short_help="Command line interface for COGDumper") 15 | @click.version_option(version=cogdumper_version, message='%(version)s') 16 | def cogdumper(): 17 | """Command line interface for COGDumper.""" 18 | pass 19 | 20 | 21 | @cogdumper.command(help='COGDumper cli for AWS S3 hosted dataset') 22 | @click.option('--bucket', required=True, help='AWS S3 bucket') 23 | @click.option('--key', required=True, help='AWS S3 key') 24 | @click.option('--output', default=None, type=click.Path(exists=False, file_okay=False, writable=True), 25 | help='local output directory') 26 | @click.option('--xyz', type=click.INT, default=[0, 0, 0], nargs=3, 27 | help='xyz tile coordinates where z is the overview level') 28 | @click.option('--verbose', '-v', is_flag=True, help='Show logs') 29 | @click.version_option(version=cogdumper_version, message='%(version)s') 30 | def s3(bucket, key, output, xyz, verbose): 31 | """Read AWS S3 hosted dataset.""" 32 | if verbose: 33 | logging.basicConfig(level=logging.INFO) 34 | 35 | reader = S3Reader(bucket, key) 36 | cog = COGTiff(reader.read) 37 | mime_type, tile = cog.get_tile(*xyz) 38 | if output is None: 39 | ext = mimetypes.guess_extension(mime_type) 40 | # work around a bug with mimetypes 41 | if ext == '.jpe': 42 | ext = '.jpg' 43 | 44 | output = f's3_{xyz[0]}_{xyz[1]}_{xyz[2]}{ext}' 45 | 46 | with open(output, 'wb') as dst: 47 | dst.write(tile) 48 | 49 | 50 | @cogdumper.command(help='COGDumper cli for web hosted dataset.') 51 | @click.option('--server', required=True, help='server e.g. http://localhost:8080') 52 | @click.option('--path', default=None, help='server path') 53 | @click.option('--resource', help='server resource') 54 | @click.option('--output', default=None, type=click.Path(exists=False, file_okay=False, writable=True), 55 | help='local output directory') 56 | @click.option('--xyz', type=click.INT, default=[0, 0, 0], nargs=3, 57 | help='xyz tile coordinates where z is the overview level') 58 | @click.option('--verbose', '-v', is_flag=True, help='Show logs') 59 | @click.version_option(version=cogdumper_version, message='%(version)s') 60 | def http(server, path, resource, output, xyz, verbose): 61 | """Read web hosted dataset.""" 62 | if verbose: 63 | logging.basicConfig(level=logging.INFO) 64 | 65 | reader = HTTPReader(server, path, resource) 66 | cog = COGTiff(reader.read) 67 | mime_type, tile = cog.get_tile(*xyz) 68 | if output is None: 69 | ext = mimetypes.guess_extension(mime_type) 70 | # work around a bug with mimetypes 71 | if ext == '.jpe': 72 | ext = '.jpg' 73 | 74 | output = f'http_{xyz[0]}_{xyz[1]}_{xyz[2]}{ext}' 75 | 76 | with open(output, 'wb') as dst: 77 | dst.write(tile) 78 | 79 | 80 | @cogdumper.command(help='COGDumper cli for local dataset.') 81 | @click.option('--file', required=True, type=click.Path(exists=True, file_okay=True, dir_okay=False), help='input file') 82 | @click.option('--output', default=None, type=click.Path(exists=False, dir_okay=False, file_okay=False, writable=True), 83 | help='local output directory') 84 | @click.option('--xyz', type=click.INT, default=[0, 0, 0], nargs=3, 85 | help='xyz tile coordinate where z is the overview level') 86 | @click.option('--verbose', '-v', is_flag=True, help='Show logs') 87 | @click.version_option(version=cogdumper_version, message='%(version)s') 88 | def file(file, output, xyz, verbose): 89 | """Read local dataset.""" 90 | if verbose: 91 | logging.basicConfig(level=logging.INFO) 92 | 93 | with open(file, 'rb') as src: 94 | reader = FileReader(src) 95 | cog = COGTiff(reader.read) 96 | mime_type, tile = cog.get_tile(*xyz) 97 | if output is None: 98 | ext = mimetypes.guess_extension(mime_type) 99 | # work around a bug with mimetypes 100 | if ext == '.jpe': 101 | ext = '.jpg' 102 | 103 | output = f'file_{xyz[0]}_{xyz[1]}_{xyz[2]}{ext}' 104 | 105 | with open(output, 'wb') as dst: 106 | dst.write(tile) 107 | -------------------------------------------------------------------------------- /cogdumper/tifftags.py: -------------------------------------------------------------------------------- 1 | """List of supported tiff tags.""" 2 | 3 | 4 | tags = { 5 | 256: 'ImageWidth', 6 | 257: 'ImageLength', 7 | 322: 'TileWidth', 8 | 323: 'TileLength', 9 | 324: 'TileOffsets', 10 | 325: 'TileByteCounts', 11 | 259: 'Compression', 12 | 347: 'JPEGTables' 13 | } 14 | 15 | compression = { 16 | 6: 'image/jpeg', 17 | 7: 'image/jpeg', 18 | 8: 'deflate', 19 | 34712: 'image/jp2' 20 | } 21 | 22 | sizes = { 23 | 1: { 24 | # TIFFByte 25 | 'format': 'B', 26 | 'size': 1 27 | }, 28 | 2: { 29 | # TIFFascii 30 | 'format': 'c', 31 | 'size': 1 32 | }, 33 | 3: { 34 | # TIFFshort 35 | 'format': 'H', 36 | 'size': 2 37 | }, 38 | 4: { 39 | # TIFFlong 40 | 'format': 'L', 41 | 'size': 4 42 | }, 43 | 5: { 44 | # TIFFrational 45 | 'format': 'f', 46 | 'size': 4 47 | }, 48 | 7: { 49 | # undefined 50 | 'format': 'B', 51 | 'size': 1 52 | }, 53 | 12: { 54 | # TIFFdouble 55 | 'format': 'd', 56 | 'size': 8 57 | }, 58 | 16: { 59 | # TIFFlong8 60 | 'format': 'Q', 61 | 'size': 8 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3==1.6.2 2 | click==6.7 3 | pytest==3.4.2 4 | requests==2.20.0 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup.""" 2 | from setuptools import setup, find_packages 3 | 4 | # Parse the version from the pxmcli module. 5 | with open('cogdumper/__init__.py') as f: 6 | for line in f: 7 | if line.find("__version__") >= 0: 8 | version = line.split("=")[1].strip() 9 | version = version.strip('"') 10 | version = version.strip("'") 11 | continue 12 | 13 | inst_reqs = ['boto3>=1.6.2', 'click>=6.7', 'requests>=2.18.4'] 14 | extra_reqs = {'test': ['pytest', 'pytest-cov', 'codecov']} 15 | 16 | setup( 17 | name='cogdumper', 18 | version=version, 19 | packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), 20 | python_requires='>=3', 21 | keywords='CloudOptimized Geotiff', 22 | url='https://github.com/mapbox/COGDumper', 23 | classifiers=[ 24 | 'Intended Audience :: Information Technology', 25 | 'Intended Audience :: Science/Research', 26 | 'Programming Language :: Python :: 3.6', 27 | 'Topic :: Scientific/Engineering :: GIS'], 28 | author=u"Norman Barker", 29 | author_email='norman.barker@mapbox.com', 30 | license='MIT', 31 | long_description=open('README.md').read(), 32 | install_requires=inst_reqs, 33 | extras_require=extra_reqs, 34 | entry_points={ 35 | 'console_scripts': [ 36 | 'cogdumper = cogdumper.scripts.cli:cogdumper' 37 | ] 38 | } 39 | ) 40 | -------------------------------------------------------------------------------- /tests/data/BigTIFF.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapbox/COGDumper/89a5f05fc0ed88c36f44e42dfe8d48e4c4ff389b/tests/data/BigTIFF.tif -------------------------------------------------------------------------------- /tests/data/be_cog.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapbox/COGDumper/89a5f05fc0ed88c36f44e42dfe8d48e4c4ff389b/tests/data/be_cog.tif -------------------------------------------------------------------------------- /tests/data/cog.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mapbox/COGDumper/89a5f05fc0ed88c36f44e42dfe8d48e4c4ff389b/tests/data/cog.tif -------------------------------------------------------------------------------- /tests/test_filedumper.py: -------------------------------------------------------------------------------- 1 | """Tests the filedumper.""" 2 | 3 | import os 4 | 5 | import pytest 6 | 7 | from cogdumper.cog_tiles import COGTiff 8 | from cogdumper.errors import TIFFError 9 | from cogdumper.filedumper import Reader as FileReader 10 | 11 | 12 | @pytest.fixture 13 | def data_dir(): 14 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data') 15 | 16 | 17 | @pytest.fixture 18 | def tiff(data_dir): 19 | f = os.path.join( 20 | data_dir, 21 | 'cog.tif' 22 | ) 23 | with open(f, 'rb') as src: 24 | yield src 25 | 26 | 27 | @pytest.fixture 28 | def bigtiff(data_dir): 29 | f = os.path.join( 30 | data_dir, 31 | 'BigTIFF.tif' 32 | ) 33 | with open(f, 'rb') as src: 34 | yield src 35 | 36 | 37 | @pytest.fixture 38 | def be_tiff(data_dir): 39 | f = os.path.join( 40 | data_dir, 41 | 'be_cog.tif' 42 | ) 43 | with open(f, 'rb') as src: 44 | yield src 45 | 46 | 47 | def test_tiff_version(tiff): 48 | reader = FileReader(tiff) 49 | cog = COGTiff(reader.read) 50 | assert cog.version == 42 51 | 52 | 53 | def test_bigtiff_version(bigtiff): 54 | reader = FileReader(bigtiff) 55 | cog = COGTiff(reader.read) 56 | assert cog.version == 43 57 | 58 | 59 | def test_be_tiff_version(be_tiff): 60 | reader = FileReader(be_tiff) 61 | cog = COGTiff(reader.read) 62 | assert cog.version == 42 63 | 64 | 65 | def test_tiff_ifds(tiff): 66 | reader = FileReader(tiff) 67 | cog = COGTiff(reader.read) 68 | # read private variable directly for testing 69 | assert len(cog._image_ifds) > 0 70 | assert 8 == len(cog._image_ifds[0]['tags']) 71 | assert 0 == cog._image_ifds[4]['next_offset'] 72 | 73 | 74 | def test_be_tiff_ifds(be_tiff): 75 | reader = FileReader(be_tiff) 76 | cog = COGTiff(reader.read) 77 | # read private variable directly for testing 78 | assert len(cog._image_ifds) > 0 79 | assert 8 == len(cog._image_ifds[0]['tags']) 80 | assert 0 == cog._image_ifds[4]['next_offset'] 81 | 82 | 83 | def test_bigtiff_ifds(bigtiff): 84 | reader = FileReader(bigtiff) 85 | cog = COGTiff(reader.read) 86 | # read private variable directly for testing 87 | assert len(cog._image_ifds) > 0 88 | assert 7 == len(cog._image_ifds[0]['tags']) 89 | assert 0 == cog._image_ifds[4]['next_offset'] 90 | 91 | 92 | def test_tiff_tile(tiff): 93 | reader = FileReader(tiff) 94 | cog = COGTiff(reader.read) 95 | mime_type, tile = cog.get_tile(0, 0, 0) 96 | assert 1 == len(cog._image_ifds[0]['offsets']) 97 | assert 1 == len(cog._image_ifds[0]['byte_counts']) 98 | assert 'jpeg_tables' in cog._image_ifds[0] 99 | assert 73 == len(cog._image_ifds[0]['jpeg_tables']) 100 | assert mime_type == 'image/jpeg' 101 | 102 | 103 | def test_tiff_tile_env(tiff, monkeypatch): 104 | monkeypatch.setenv("COG_INGESTED_BYTES_AT_OPEN", "1024") 105 | reader = FileReader(tiff) 106 | cog = COGTiff(reader.read) 107 | mime_type, tile = cog.get_tile(0, 0, 0) 108 | assert 1 == len(cog._image_ifds[0]['offsets']) 109 | assert 1 == len(cog._image_ifds[0]['byte_counts']) 110 | assert 'jpeg_tables' in cog._image_ifds[0] 111 | assert 73 == len(cog._image_ifds[0]['jpeg_tables']) 112 | assert mime_type == 'image/jpeg' 113 | 114 | 115 | def test_bad_tiff_tile(tiff): 116 | reader = FileReader(tiff) 117 | cog = COGTiff(reader.read) 118 | with pytest.raises(TIFFError) as err: 119 | cog.get_tile(10, 10, 0) 120 | with pytest.raises(TIFFError) as err: 121 | cog.get_tile(10, 10, 10) 122 | 123 | def test_bigtiff_tile(bigtiff): 124 | reader = FileReader(bigtiff) 125 | cog = COGTiff(reader.read) 126 | mime_type, tile = cog.get_tile(0, 0, 0) 127 | assert 1 == len(cog._image_ifds[0]['offsets']) 128 | assert 1 == len(cog._image_ifds[0]['byte_counts']) 129 | assert 'jpeg_tables' in cog._image_ifds[0] 130 | assert cog._image_ifds[0]['jpeg_tables'] is None 131 | assert mime_type == 'application/octet-stream' 132 | --------------------------------------------------------------------------------