├── .coveragerc
├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── cogdumper
    ├── __init__.py
    ├── cog_tiles.py
    ├── errors.py
    ├── filedumper.py
    ├── httpdumper.py
    ├── jpegreader.py
    ├── s3dumper.py
    ├── scripts
    │   ├── __init__.py
    │   └── cli.py
    └── tifftags.py
├── requirements.txt
├── setup.py
└── tests
    ├── data
        ├── BigTIFF.tif
        ├── be_cog.tif
        └── cog.tif
    └── test_filedumper.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit =
3 |   cogdumper/filedumper.py
4 |   cogdumper/httpdumper.py
5 |   cogdumper/s3dumper.py
6 |   cogdumper/jpegreader.py
7 |   cogdumper/scripts/cli.py
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | sudo: false
 3 | cache:
 4 |   directories:
 5 |     - ~/.cache/pip
 6 | 
 7 | python:
 8 |   - "3.6"
 9 | before_install:
10 |   - pip install -U pip
11 | install:
12 |   - "pip install -r requirements.txt"
13 |   - "pip install -e .[test]"
14 | script:
15 |   - python -m pytest --cov=cogdumper --cov-report term-missing -vv
16 |   - cd tests && python -m codecov
17 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | Changelog
 2 | =========
 3 | 
 4 | 1.1.0 (2018-04-24)
 5 | ------------------
 6 | - Update CLI
 7 | 
 8 | 1.0.0 (2018-03-30)
 9 | ------------------
10 | - initial release
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Mapbox
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mapbox/COGDumper/89a5f05fc0ed88c36f44e42dfe8d48e4c4ff389b/MANIFEST.in


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # COG Dumper
  2 | 
  3 | [![Build Status](https://travis-ci.org/mapbox/COGDumper.svg?branch=master)](https://travis-ci.org/mapbox/COGDumper) [![codecov](https://codecov.io/gh/mapbox/COGDumper/branch/master/graph/badge.svg?token=Yd3y5aTvGo)](https://codecov.io/gh/mapbox/COGDumper)
  4 | 
  5 | A python (3.6) utility to extract a tile from a Cloud Optimized GeoTIFF (COG) without decompressing the contained data. Tiff data can be hosted locally, on a web server or S3.
  6 | 
  7 | This can be useful for serving compressed tiles from a TIFF without invoking Rasterio and GDAL. This utility has been tested with Tiff that have JPEG compression.
  8 | 
  9 | Tiled Tiff is an extension to TIFF 6.0 and more detail can be found [here](http://www.alternatiff.com/resources/TIFFphotoshop.pdf)
 10 | 
 11 | Note that tiles are padded at the edge of an image. This requires an image [mask](https://trac.osgeo.org/gdal/wiki/rfc15_nodatabitmask) to be defined if tile sizes do not align with the image width / height (as in the test data which demonstrates this effect).
 12 | 
 13 | 
 14 | ## Data Preparation
 15 | 
 16 | Read the document on [COG](https://trac.osgeo.org/gdal/wiki/CloudOptimizedGeoTIFF) and create a tiled pyramid GeoTIFF.
 17 | 
 18 | For example;
 19 | 
 20 | ```
 21 | gdal_translate SENTINEL2_L1C:S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441.SAFE/MTD_MSIL1C.xml:TCI:EPSG_32630 \
 22 |                S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441_TCI.tif \
 23 |                -co TILED=YES -co COMPRESS=DEFLATE
 24 | gdaladdo -r average  S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441_TCI.tif 2 4 8 16 32
 25 | gdal_translate S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441_TCI.tif \
 26 |                S2A_MSIL1C_20170102T111442_N0204_R137_T30TXT_20170102T111441_TCI_cloudoptimized_2.tif \
 27 |                -co TILED=YES -co COMPRESS=JPEG -co PHOTOMETRIC=YCBCR -co COPY_SRC_OVERVIEWS=YES
 28 | ```
 29 | 
 30 | This library also works with a file hosted in an S3 bucket.
 31 | 
 32 | ## Installation
 33 | 
 34 | Python 3.6 is required.
 35 | 
 36 | ```
 37 | pip install cogdumper
 38 | ```
 39 | 
 40 | or from source
 41 | 
 42 | ```
 43 | git clone https://github.com/mapbox/COGDumper
 44 | cd COGDumper
 45 | pip install .
 46 | ```
 47 | 
 48 | ## Command line interface
 49 | 
 50 | ```
 51 | $ cogdumper --help
 52 | Usage: cogdumper [OPTIONS] COMMAND [ARGS]...
 53 | 
 54 |   Command line interface for COGDumper.
 55 | 
 56 | Options:
 57 |   --version  Show the version and exit.
 58 |   --help     Show this message and exit.
 59 | 
 60 | Commands:
 61 |   file  COGDumper cli for local dataset.
 62 |   http  COGDumper cli for web hosted dataset.
 63 |   s3    COGDumper cli for AWS S3 hosted dataset
 64 | ```
 65 | 
 66 | ##### local files
 67 | ```
 68 | cogdumper file --help
 69 | Usage: cogdumper file [OPTIONS]
 70 | 
 71 |   COGDumper cli for local dataset.
 72 | 
 73 | Options:
 74 |   --file PATH       input file  [required]
 75 |   --output PATH     local output directory
 76 |   --xyz INTEGER...  xyz tile coordinate where z is the overview level
 77 |   --version         Show the version and exit.
 78 |   --help            Show this message and exit.
 79 | ```
 80 | e.g. `cogdumper file --file data/cog.tif --xyz 0 0 0`
 81 | 
 82 | ##### web files
 83 | 
 84 | ```
 85 | cogdumper http --help
 86 | Usage: cogdumper http [OPTIONS]
 87 | 
 88 |   COGDumper cli for web hosted dataset.
 89 | 
 90 | Options:
 91 |   --server TEXT       server e.g. http://localhost:8080  [required]
 92 |   --path TEXT         server path
 93 |   --resource TEXT     server resource
 94 |   --output DIRECTORY  local output directory
 95 |   --xyz INTEGER...    xyz tile coordinates where z is the overview level
 96 |   --version           Show the version and exit.
 97 |   --help              Show this message and exit.
 98 | ```
 99 | 
100 | e.g. `cogdumper http --server http://localhost:8080 --path data --resource cog.tif`
101 | 
102 | ##### S3 files
103 | ```
104 | cogdumper s3 --help
105 | Usage: cogdumper s3 [OPTIONS]
106 | 
107 |   COGDumper cli for AWS S3 hosted dataset
108 | 
109 | Options:
110 |   --bucket TEXT       AWS S3 bucket  [required]
111 |   --key TEXT          AWS S3 key  [required]
112 |   --output DIRECTORY  local output directory
113 |   --xyz INTEGER...    xyz tile coordinates where z is the overview level
114 |   --help              Show this message and exit.
115 | ```
116 | 
117 | e.g. `cogdumper s3 --bucket bucket_name --key key_name/image.tif --xyz 0 0 0`
118 | 


--------------------------------------------------------------------------------
/cogdumper/__init__.py:
--------------------------------------------------------------------------------
1 | # cogdumper
2 | __version__ = '1.1.0'
3 | 


--------------------------------------------------------------------------------
/cogdumper/cog_tiles.py:
--------------------------------------------------------------------------------
  1 | """Function for extracting tiff tiles."""
  2 | 
  3 | import os
  4 | 
  5 | from abc import abstractmethod
  6 | from math import ceil
  7 | import struct
  8 | 
  9 | from cogdumper.errors import TIFFError
 10 | from cogdumper.jpegreader import insert_tables
 11 | from cogdumper.tifftags import compression as CompressionType
 12 | from cogdumper.tifftags import sizes as TIFFSizes
 13 | from cogdumper.tifftags import tags as TIFFTags
 14 | 
 15 | 
 16 | class AbstractReader:  # pragma: no cover
 17 |     @abstractmethod
 18 |     def read(offset, len):
 19 |         pass
 20 | 
 21 | 
 22 | class COGTiff:
 23 |     """
 24 |     Cloud Optimised GeoTIFF
 25 |     -----
 26 |     Format
 27 |         TIFF / BigTIFF signature
 28 |         IFD (Image File Directory) of full resolution image
 29 |         Values of TIFF tags that don't fit inline in the IFD directory, such as TileOffsets, TileByteCounts and GeoTIFF keys
 30 |         Optional: IFD (Image File Directory) of first overview (typically subsampled by a factor of 2), followed by the values of its tags that don't fit inline
 31 |         Optional: IFD (Image File Directory) of second overview (typically subsampled by a factor of 4), followed by the values of its tags that don't fit inline
 32 |         ...
 33 |         Optional: IFD (Image File Directory) of last overview (typically subsampled by a factor of 2N), followed by the values of its tags that don't fit inline
 34 |         Optional: tile content of last overview level
 35 |         ...
 36 |         Optional: tile content of first overview level
 37 |         Tile content of full resolution image.
 38 |     """
 39 |     def __init__(self, reader):
 40 |         """Parses a (Big)TIFF for image tiles.
 41 |         Parameters
 42 |         ----------
 43 |         reader:
 44 |             A reader that implements the cogdumper.cog_tiles.AbstractReader methods
 45 |         """
 46 |         self._endian = '<'
 47 |         self._version = 42
 48 |         self.read = reader
 49 |         self._big_tiff = False
 50 |         self.header = ''
 51 |         self._offset = 0
 52 |         self._image_ifds = []
 53 |         self._mask_ifds = []
 54 | 
 55 |         self.read_header()
 56 | 
 57 |     def _ifds(self):
 58 |         """Reads TIFF image file directories from a COG recursively.
 59 |         Parameters
 60 |         -----------
 61 |         offset:
 62 |             number, offset into the tiff stream to read from, this is only
 63 |             required for the first image file directory
 64 |         overview:
 65 |             number, an identifier that is the overview level in the COG
 66 |             image pyramid
 67 |         Yield
 68 |         --------
 69 |         dict: Image File Directory for the next IFD
 70 |         """
 71 |         while self._offset != 0:
 72 |             next_offset = 0
 73 |             pos = 0
 74 |             tags = []
 75 | 
 76 |             fallback_size = 4096 if self._big_tiff else 1024
 77 |             if self._offset > len(self.header):
 78 |                 byte_starts = len(self.header)
 79 |                 byte_ends = byte_starts + self._offset + fallback_size
 80 |                 self.header += self.read(byte_starts, byte_ends)
 81 | 
 82 |             if self._big_tiff:
 83 |                 bytes = self.header[self._offset: self._offset + 8]
 84 |                 num_tags = struct.unpack(f'{self._endian}Q', bytes)[0]
 85 | 
 86 |                 byte_starts = self._offset + 8
 87 |                 byte_ends = (num_tags * 20) + 8 + byte_starts
 88 |                 if byte_ends > len(self.header):
 89 |                     s = len(self.header)
 90 |                     self.header += self.read(s, byte_ends)
 91 | 
 92 |                 bytes = self.header[byte_starts: byte_ends]
 93 | 
 94 |                 for t in range(0, num_tags):
 95 |                     code = struct.unpack(
 96 |                         f'{self._endian}H',
 97 |                         bytes[pos: pos + 2]
 98 |                     )[0]
 99 | 
100 |                     if code in TIFFTags:
101 |                         dtype = struct.unpack(
102 |                             f'{self._endian}H',
103 |                             bytes[pos + 2: pos + 4]
104 |                         )[0]
105 | 
106 |                         if dtype not in TIFFSizes:  # pragma: no cover
107 |                             raise TIFFError(f'Unrecognised data type {dtype}')
108 | 
109 |                         num_values = struct.unpack(
110 |                             f'{self._endian}Q',
111 |                             bytes[pos + 4: pos + 12]
112 |                         )[0]
113 |                         tag_len = num_values * TIFFSizes[dtype]['size']
114 |                         if tag_len <= 8:
115 |                             data = bytes[pos + 12: pos + 12 + tag_len]
116 |                         else:  # pragma: no cover
117 |                             data_offset = struct.unpack(
118 |                                 f'{self._endian}Q',
119 |                                 bytes[pos + 12: pos + 20]
120 |                             )[0]
121 | 
122 |                             byte_starts = data_offset
123 |                             byte_ends = byte_starts + tag_len
124 |                             if byte_ends > len(self.header):
125 |                                 s = len(self.header)
126 |                                 self.header += self.read(s, byte_ends)
127 | 
128 |                             data = self.header[byte_starts: byte_ends]
129 | 
130 |                         tags.append(
131 |                             {
132 |                                 'code': code,
133 |                                 'dtype': TIFFSizes[dtype],
134 |                                 'num_values': num_values,
135 |                                 'data': data
136 |                             }
137 |                         )
138 | 
139 |                     pos = pos + 20
140 | 
141 |                 self._offset = self._offset + 8 + pos
142 |                 next_offset = struct.unpack(
143 |                     f'{self._endian}Q',
144 |                     self.header[self._offset: self._offset + 8]
145 |                 )[0]
146 |             else:
147 |                 bytes = self.header[self._offset: self._offset + 2]
148 |                 num_tags = struct.unpack(f'{self._endian}H', bytes)[0]
149 | 
150 |                 byte_starts = self._offset + 2
151 |                 byte_ends = (num_tags * 12) + 2 + byte_starts
152 |                 if byte_ends > len(self.header):
153 |                     s = len(self.header)
154 |                     self.header += self.read(s, byte_ends)
155 | 
156 |                 bytes = self.header[byte_starts: byte_ends]
157 | 
158 |                 for t in range(0, num_tags):
159 |                     code = struct.unpack(
160 |                         f'{self._endian}H',
161 |                         bytes[pos: pos + 2]
162 |                     )[0]
163 | 
164 |                     if code in TIFFTags:
165 |                         dtype = struct.unpack(
166 |                             f'{self._endian}H',
167 |                             bytes[pos + 2: pos + 4]
168 |                         )[0]
169 | 
170 |                         if dtype not in TIFFSizes:  # pragma: no cover
171 |                             raise TIFFError(f'Unrecognised data type {dtype}')
172 | 
173 |                         num_values = struct.unpack(
174 |                             f'{self._endian}L',
175 |                             bytes[pos + 4: pos + 8]
176 |                         )[0]
177 |                         tag_len = num_values * TIFFSizes[dtype]['size']
178 |                         if tag_len <= 4:
179 |                             data = bytes[pos + 8: pos + 8 + tag_len]
180 |                         else:
181 |                             data_offset = struct.unpack(
182 |                                 f'{self._endian}L',
183 |                                 bytes[pos + 8: pos + 12]
184 |                             )[0]
185 | 
186 |                             byte_starts = data_offset
187 |                             byte_ends = byte_starts + tag_len
188 |                             if byte_ends > len(self.header):
189 |                                 s = len(self.header)
190 |                                 self.header += self.read(s, byte_ends)
191 |                             data = self.header[byte_starts: byte_ends]
192 | 
193 |                         tags.append(
194 |                             {
195 |                                 'code': code,
196 |                                 'dtype': TIFFSizes[dtype],
197 |                                 'num_values': num_values,
198 |                                 'data': data
199 |                             }
200 |                         )
201 | 
202 |                     pos = pos + 12
203 | 
204 |                 self._offset = self._offset + 2 + pos
205 |                 next_offset = struct.unpack(
206 |                     f'{self._endian}L',
207 |                     self.header[self._offset: self._offset + 4]
208 |                 )[0]
209 | 
210 |             self._offset = next_offset
211 | 
212 |             yield {
213 |                 'tags': tags,
214 |                 'next_offset': next_offset
215 |             }
216 | 
217 |     def read_header(self):
218 |         """Read and parse COG header."""
219 |         buff_size = int(os.environ.get('COG_INGESTED_BYTES_AT_OPEN', '16384'))
220 |         self.header = self.read(0, buff_size)
221 | 
222 |         # read first 4 bytes to determine tiff or bigtiff and byte order
223 |         if self.header[:2] == b'MM':
224 |             self._endian = '>'
225 | 
226 |         self._version = struct.unpack(f'{self._endian}H', self.header[2:4])[0]
227 | 
228 |         if self._version == 42:
229 |             # TIFF
230 |             self._big_tiff = False
231 |             # read offset to first IFD
232 |             self._offset = struct.unpack(f'{self._endian}L', self.header[4:8])[0]
233 |         elif self._version == 43:
234 |             # BIGTIFF
235 |             self._big_tiff = True
236 |             bytes = self.header[4:16]
237 |             bytesize = struct.unpack(f'{self._endian}H', bytes[0:2])[0]
238 |             w = struct.unpack(f'{self._endian}H', bytes[2:4])[0]
239 |             self._offset = struct.unpack(f'{self._endian}Q', bytes[4:])[0]
240 |             if bytesize != 8 or w != 0:  # pragma: no cover
241 |                 raise TIFFError(f"Invalid BigTIFF with bytesize {bytesize} and word {w}")
242 |         else:  # pragma: no cover
243 |             raise TIFFError(f"Invalid version {self._version} for TIFF file")
244 | 
245 |         self._init = True
246 | 
247 |         # for JPEG we need to read all IFDs, they are at the front of the file
248 |         for ifd in self._ifds():
249 |             mime_type = 'image/jpeg'
250 |             # tile offsets are an extension but if they aren't in the file then
251 |             # you can't get a tile back!
252 |             offsets = []
253 |             byte_counts = []
254 |             image_width = 0
255 |             image_height = 0
256 |             tile_width = 0
257 |             tile_height = 0
258 |             jpeg_tables = None
259 | 
260 |             for t in ifd['tags']:
261 |                 code = t['code']
262 |                 fmt = t['dtype']['format']
263 |                 if code == 256:
264 |                     # image width
265 |                     image_width = struct.unpack(
266 |                         f'{self._endian}{fmt}',
267 |                         t['data']
268 |                     )[0]
269 |                 elif code == 257:
270 |                     # image height
271 |                     image_height = struct.unpack(
272 |                         f'{self._endian}{fmt}',
273 |                         t['data']
274 |                     )[0]
275 |                 elif code == 259:
276 |                     # compression
277 |                     val = struct.unpack(
278 |                         f'{self._endian}{fmt}',
279 |                         t['data']
280 |                     )[0]
281 |                     if val in CompressionType:
282 |                         mime_type = CompressionType[val]
283 |                     else:
284 |                         mime_type = 'application/octet-stream'
285 |                 elif code == 322:
286 |                     # tile width
287 |                     tile_width = struct.unpack(
288 |                         f'{self._endian}{fmt}',
289 |                         t['data']
290 |                     )[0]
291 |                 elif code == 323:
292 |                     # tile height
293 |                     tile_height = struct.unpack(
294 |                         f'{self._endian}{fmt}',
295 |                         t['data']
296 |                     )[0]
297 |                 elif code == 324:
298 |                     # tile offsets
299 |                     offsets = struct.unpack(
300 |                         f'{self._endian}{t["num_values"]}{fmt}',
301 |                         t['data']
302 |                     )
303 |                 elif code == 325:
304 |                     # tile byte counts
305 |                     byte_counts = struct.unpack(
306 |                         f'{self._endian}{t["num_values"]}{fmt}',
307 |                         t['data']
308 |                     )
309 |                 elif code == 347:
310 |                     # JPEG Tables
311 |                     jpeg_tables = t['data']
312 | 
313 |             if len(offsets) == 0:
314 |                 raise TIFFError('TIFF Tiles are not found in IFD {z}')
315 | 
316 |             ifd['image_width'] = image_width
317 |             ifd['image_height'] = image_height
318 |             ifd['compression'] = mime_type
319 |             ifd['tile_width'] = tile_width
320 |             ifd['tile_height'] = tile_height
321 |             ifd['offsets'] = offsets
322 |             ifd['byte_counts'] = byte_counts
323 |             ifd['jpeg_tables'] = jpeg_tables
324 | 
325 |             ifd['nx_tiles'] = ceil(image_width / float(tile_width))
326 |             ifd['ny_tiles'] = ceil(image_height / float(tile_height))
327 | 
328 |             if (ifd['compression'] == 'deflate'):
329 |                 self._mask_ifds.append(ifd)
330 |             else:
331 |                 self._image_ifds.append(ifd)
332 | 
333 |         if len(self._image_ifds) == 0 and len(self._mask_ifds) > 0:  # pragma: no cover
334 |             self._image_ifds = self._mask_ifds
335 |             self._mask_ifds = []
336 | 
337 |     def get_tile(self, x, y, z):
338 |         """Read tile data."""
339 |         if z < len(self._image_ifds):
340 |             image_ifd = self._image_ifds[z]
341 |             idx = (y * image_ifd['ny_tiles']) + x
342 |             if idx > len(image_ifd['offsets']):
343 |                 raise TIFFError(f'Tile {x} {y} {z} does not exist')
344 |             else:
345 |                 offset = image_ifd['offsets'][idx]
346 |                 byte_count = image_ifd['byte_counts'][idx]
347 |                 tile = self.read(offset, byte_count)
348 |                 if image_ifd['compression'] == 'image/jpeg':
349 |                     # fix up jpeg tile with missing quantization tables
350 |                     tile = insert_tables(tile, image_ifd['jpeg_tables'])
351 |                     # look for a bit mask file
352 |                     if z < len(self._mask_ifds):
353 |                         mask_ifd = self._mask_ifds[z]
354 |                         mask_offset = mask_ifd['offsets'][idx]
355 |                         mask_byte_count = mask_ifd['byte_counts'][idx]
356 |                         mask_tile = self.read(
357 |                             mask_offset,
358 |                             mask_byte_count
359 |                             )
360 |                         tile = tile + mask_tile
361 |                     return image_ifd['compression'], tile
362 |                 else:
363 |                     return image_ifd['compression'], tile
364 |         else:
365 |             raise TIFFError(f'Overview {z} is out of bounds.')
366 | 
367 |     @property
368 |     def version(self):
369 |         return self._version
370 | 


--------------------------------------------------------------------------------
/cogdumper/errors.py:
--------------------------------------------------------------------------------
 1 | """TIFF read exceptions."""
 2 | 
 3 | class TIFFError(Exception):
 4 |     exit_code = 1
 5 | 
 6 |     def __init__(self, message):
 7 |         self.message = message
 8 | 
 9 | class JPEGError(Exception):
10 |     exit_code = 1
11 | 
12 |     def __init__(self, message):
13 |         self.message = message
14 | 


--------------------------------------------------------------------------------
/cogdumper/filedumper.py:
--------------------------------------------------------------------------------
 1 | """A utility to dump tiles directly from a local tiff file."""
 2 | 
 3 | import logging
 4 | from cogdumper.cog_tiles import AbstractReader
 5 | 
 6 | logger = logging.getLogger(__name__)
 7 | 
 8 | 
 9 | class Reader(AbstractReader):
10 |     """Wraps the remote COG."""
11 | 
12 |     def __init__(self, handle):
13 |         self._handle = handle
14 | 
15 |     def read(self, offset, length):
16 |         start = offset
17 |         stop = offset + length - 1
18 |         logger.info(f'Reading bytes: {start} to {stop}')
19 |         self._handle.seek(offset)
20 |         return self._handle.read(length)
21 | 


--------------------------------------------------------------------------------
/cogdumper/httpdumper.py:
--------------------------------------------------------------------------------
 1 | """A utility to dump tiles directly from a tiff file on a http server."""
 2 | 
 3 | import logging
 4 | 
 5 | import requests
 6 | from requests.auth import HTTPBasicAuth
 7 | 
 8 | from cogdumper.errors import TIFFError
 9 | from cogdumper.cog_tiles import AbstractReader
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class Reader(AbstractReader):
15 |     """Wraps the remote COG."""
16 | 
17 |     def __init__(self, server, path, resource, user=None, password=None):
18 |         self.server = server
19 |         self.path = path
20 |         self.resource = resource
21 |         if path:
22 |             self.url = f'{server}/{path}/{resource}'
23 |         else:
24 |             self.url = f'{server}/{resource}'
25 |         if user:
26 |             self.auth = HTTPBasicAuth(user, password)
27 |         else:
28 |             self.auth = None
29 | 
30 |         self._resource_exists = True
31 | 
32 |         self.session = requests.Session()
33 |         r = self.session.head(self.url, auth=self.auth)
34 |         if r.status_code != requests.codes.ok:
35 |             self._resource_exists = False
36 | 
37 |     @property
38 |     def resource_exists(self):
39 |         return self._resource_exists
40 | 
41 |     def read(self, offset, length):
42 |         start = offset
43 |         stop = offset + length - 1
44 |         logger.info(f'Reading bytes: {start} to {stop}')
45 |         headers = {'Range': f'bytes={start}-{stop}'}
46 |         r = self.session.get(self.url, auth=self.auth, headers=headers)
47 |         if r.status_code != requests.codes.partial_content:
48 |             raise TIFFError(f'HTTP byte range {offset}-{length} '
49 |                             'not available. HTTP code {r.status_code}')
50 |         else:
51 |             return r.content
52 | 


--------------------------------------------------------------------------------
/cogdumper/jpegreader.py:
--------------------------------------------------------------------------------
 1 | """Simple JPEG reader for inserting missing markers."""
 2 | 
 3 | from cogdumper.errors import JPEGError
 4 | 
 5 | 
 6 | SOI = 0xd8
 7 | 
 8 | def insert_tables(data, tables):
 9 |     if tables:
10 |         if data[0] == 0xFF and data[1] == SOI:
11 |             # insert tables, first removing the SOI and EOI
12 |             return data[0:2] + tables[2:-2] + data[2:]
13 |         else:
14 |             raise JPEGError('Missing SOI marker for JPEG tile')
15 |     else:
16 |         # no-op as per the spec, segment contains all of the JPEG data required
17 |         return data
18 | 


--------------------------------------------------------------------------------
/cogdumper/s3dumper.py:
--------------------------------------------------------------------------------
 1 | """A utility to dump tiles directly from a tiff file in an S3 bucket."""
 2 | 
 3 | import os
 4 | import logging
 5 | 
 6 | import boto3
 7 | 
 8 | from cogdumper.cog_tiles import AbstractReader
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | region = os.environ.get('AWS_REGION', 'us-east-1')
13 | s3 = boto3.resource('s3', region_name=region)
14 | 
15 | 
16 | class Reader(AbstractReader):
17 |     """Wraps the remote COG."""
18 | 
19 |     def __init__(self, bucket_name, key):
20 |         """Init reader object."""
21 |         self.bucket = bucket_name
22 |         self.key = key
23 |         self.source = s3.Object(self.bucket, self.key)
24 | 
25 |     def read(self, offset, length):
26 |         """Read method."""
27 |         start = offset
28 |         stop = offset + length - 1
29 |         logger.info(f'Reading bytes: {start} to {stop}')
30 |         r = self.source.get(Range=f'bytes={start}-{stop}')
31 |         return r['Body'].read()
32 | 


--------------------------------------------------------------------------------
/cogdumper/scripts/__init__.py:
--------------------------------------------------------------------------------
1 | """scripts."""
2 | 


--------------------------------------------------------------------------------
/cogdumper/scripts/cli.py:
--------------------------------------------------------------------------------
  1 | """cli."""
  2 | import logging
  3 | import mimetypes
  4 | 
  5 | import click
  6 | 
  7 | from cogdumper import __version__ as cogdumper_version
  8 | from cogdumper.cog_tiles import COGTiff
  9 | from cogdumper.s3dumper import Reader as S3Reader
 10 | from cogdumper.httpdumper import Reader as HTTPReader
 11 | from cogdumper.filedumper import Reader as FileReader
 12 | 
 13 | 
 14 | @click.group(short_help="Command line interface for COGDumper")
 15 | @click.version_option(version=cogdumper_version, message='%(version)s')
 16 | def cogdumper():
 17 |     """Command line interface for COGDumper."""
 18 |     pass
 19 | 
 20 | 
 21 | @cogdumper.command(help='COGDumper cli for AWS S3 hosted dataset')
 22 | @click.option('--bucket', required=True, help='AWS S3 bucket')
 23 | @click.option('--key', required=True, help='AWS S3 key')
 24 | @click.option('--output', default=None, type=click.Path(exists=False, file_okay=False, writable=True),
 25 |               help='local output directory')
 26 | @click.option('--xyz', type=click.INT, default=[0, 0, 0], nargs=3,
 27 |               help='xyz tile coordinates where z is the overview level')
 28 | @click.option('--verbose', '-v', is_flag=True, help='Show logs')
 29 | @click.version_option(version=cogdumper_version, message='%(version)s')
 30 | def s3(bucket, key, output, xyz, verbose):
 31 |     """Read AWS S3 hosted dataset."""
 32 |     if verbose:
 33 |         logging.basicConfig(level=logging.INFO)
 34 | 
 35 |     reader = S3Reader(bucket, key)
 36 |     cog = COGTiff(reader.read)
 37 |     mime_type, tile = cog.get_tile(*xyz)
 38 |     if output is None:
 39 |         ext = mimetypes.guess_extension(mime_type)
 40 |         # work around a bug with mimetypes
 41 |         if ext == '.jpe':
 42 |             ext = '.jpg'
 43 | 
 44 |         output = f's3_{xyz[0]}_{xyz[1]}_{xyz[2]}{ext}'
 45 | 
 46 |     with open(output, 'wb') as dst:
 47 |         dst.write(tile)
 48 | 
 49 | 
 50 | @cogdumper.command(help='COGDumper cli for web hosted dataset.')
 51 | @click.option('--server', required=True, help='server e.g. http://localhost:8080')
 52 | @click.option('--path', default=None, help='server path')
 53 | @click.option('--resource', help='server resource')
 54 | @click.option('--output', default=None, type=click.Path(exists=False, file_okay=False, writable=True),
 55 |               help='local output directory')
 56 | @click.option('--xyz', type=click.INT, default=[0, 0, 0], nargs=3,
 57 |               help='xyz tile coordinates where z is the overview level')
 58 | @click.option('--verbose', '-v', is_flag=True, help='Show logs')
 59 | @click.version_option(version=cogdumper_version, message='%(version)s')
 60 | def http(server, path, resource, output, xyz, verbose):
 61 |     """Read web hosted dataset."""
 62 |     if verbose:
 63 |         logging.basicConfig(level=logging.INFO)
 64 | 
 65 |     reader = HTTPReader(server, path, resource)
 66 |     cog = COGTiff(reader.read)
 67 |     mime_type, tile = cog.get_tile(*xyz)
 68 |     if output is None:
 69 |         ext = mimetypes.guess_extension(mime_type)
 70 |         # work around a bug with mimetypes
 71 |         if ext == '.jpe':
 72 |             ext = '.jpg'
 73 | 
 74 |         output = f'http_{xyz[0]}_{xyz[1]}_{xyz[2]}{ext}'
 75 | 
 76 |     with open(output, 'wb') as dst:
 77 |         dst.write(tile)
 78 | 
 79 | 
 80 | @cogdumper.command(help='COGDumper cli for local dataset.')
 81 | @click.option('--file', required=True, type=click.Path(exists=True, file_okay=True, dir_okay=False), help='input file')
 82 | @click.option('--output', default=None, type=click.Path(exists=False, dir_okay=False, file_okay=False, writable=True),
 83 |               help='local output directory')
 84 | @click.option('--xyz', type=click.INT, default=[0, 0, 0], nargs=3,
 85 |               help='xyz tile coordinate where z is the overview level')
 86 | @click.option('--verbose', '-v', is_flag=True, help='Show logs')
 87 | @click.version_option(version=cogdumper_version, message='%(version)s')
 88 | def file(file, output, xyz, verbose):
 89 |     """Read local dataset."""
 90 |     if verbose:
 91 |         logging.basicConfig(level=logging.INFO)
 92 | 
 93 |     with open(file, 'rb') as src:
 94 |         reader = FileReader(src)
 95 |         cog = COGTiff(reader.read)
 96 |         mime_type, tile = cog.get_tile(*xyz)
 97 |         if output is None:
 98 |             ext = mimetypes.guess_extension(mime_type)
 99 |             # work around a bug with mimetypes
100 |             if ext == '.jpe':
101 |                 ext = '.jpg'
102 | 
103 |             output = f'file_{xyz[0]}_{xyz[1]}_{xyz[2]}{ext}'
104 | 
105 |         with open(output, 'wb') as dst:
106 |             dst.write(tile)
107 | 


--------------------------------------------------------------------------------
/cogdumper/tifftags.py:
--------------------------------------------------------------------------------
 1 | """List of supported tiff tags."""
 2 | 
 3 | 
 4 | tags = {
 5 |     256: 'ImageWidth',
 6 |     257: 'ImageLength',
 7 |     322: 'TileWidth',
 8 |     323: 'TileLength',
 9 |     324: 'TileOffsets',
10 |     325: 'TileByteCounts',
11 |     259: 'Compression',
12 |     347: 'JPEGTables'
13 | }
14 | 
15 | compression = {
16 |     6: 'image/jpeg',
17 |     7: 'image/jpeg',
18 |     8: 'deflate',
19 |     34712: 'image/jp2'
20 | }
21 | 
22 | sizes = {
23 |     1: {
24 |         # TIFFByte
25 |         'format': 'B',
26 |         'size': 1
27 |     },
28 |     2: {
29 |         # TIFFascii
30 |         'format': 'c',
31 |         'size': 1
32 |         },
33 |     3: {
34 |         # TIFFshort
35 |         'format': 'H',
36 |         'size': 2
37 |         },
38 |     4: {
39 |         # TIFFlong
40 |         'format': 'L',
41 |         'size': 4
42 |         },
43 |     5: {
44 |         # TIFFrational
45 |         'format': 'f',
46 |         'size': 4
47 |         },
48 |     7: {
49 |         # undefined
50 |         'format': 'B',
51 |         'size': 1
52 |         },
53 |     12: {
54 |         # TIFFdouble
55 |         'format': 'd',
56 |         'size': 8
57 |     },
58 |     16: {
59 |         # TIFFlong8
60 |         'format': 'Q',
61 |         'size': 8
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | boto3==1.6.2
2 | click==6.7
3 | pytest==3.4.2
4 | requests==2.20.0
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup."""
 2 | from setuptools import setup, find_packages
 3 | 
 4 | # Parse the version from the pxmcli module.
 5 | with open('cogdumper/__init__.py') as f:
 6 |     for line in f:
 7 |         if line.find("__version__") >= 0:
 8 |             version = line.split("=")[1].strip()
 9 |             version = version.strip('"')
10 |             version = version.strip("'")
11 |             continue
12 | 
13 | inst_reqs = ['boto3>=1.6.2', 'click>=6.7', 'requests>=2.18.4']
14 | extra_reqs = {'test': ['pytest', 'pytest-cov', 'codecov']}
15 | 
16 | setup(
17 |     name='cogdumper',
18 |     version=version,
19 |     packages=find_packages(exclude=['ez_setup', 'examples', 'tests']),
20 |     python_requires='>=3',
21 |     keywords='CloudOptimized Geotiff',
22 |     url='https://github.com/mapbox/COGDumper',
23 |     classifiers=[
24 |       'Intended Audience :: Information Technology',
25 |       'Intended Audience :: Science/Research',
26 |       'Programming Language :: Python :: 3.6',
27 |       'Topic :: Scientific/Engineering :: GIS'],
28 |     author=u"Norman Barker",
29 |     author_email='norman.barker@mapbox.com',
30 |     license='MIT',
31 |     long_description=open('README.md').read(),
32 |     install_requires=inst_reqs,
33 |     extras_require=extra_reqs,
34 |     entry_points={
35 |       'console_scripts': [
36 |         'cogdumper = cogdumper.scripts.cli:cogdumper'
37 |       ]
38 |     }
39 | )
40 | 


--------------------------------------------------------------------------------
/tests/data/BigTIFF.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mapbox/COGDumper/89a5f05fc0ed88c36f44e42dfe8d48e4c4ff389b/tests/data/BigTIFF.tif


--------------------------------------------------------------------------------
/tests/data/be_cog.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mapbox/COGDumper/89a5f05fc0ed88c36f44e42dfe8d48e4c4ff389b/tests/data/be_cog.tif


--------------------------------------------------------------------------------
/tests/data/cog.tif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mapbox/COGDumper/89a5f05fc0ed88c36f44e42dfe8d48e4c4ff389b/tests/data/cog.tif


--------------------------------------------------------------------------------
/tests/test_filedumper.py:
--------------------------------------------------------------------------------
  1 | """Tests the filedumper."""
  2 | 
  3 | import os
  4 | 
  5 | import pytest
  6 | 
  7 | from cogdumper.cog_tiles import COGTiff
  8 | from cogdumper.errors import TIFFError
  9 | from cogdumper.filedumper import Reader as FileReader
 10 | 
 11 | 
 12 | @pytest.fixture
 13 | def data_dir():
 14 |     return os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data')
 15 | 
 16 | 
 17 | @pytest.fixture
 18 | def tiff(data_dir):
 19 |     f = os.path.join(
 20 |         data_dir,
 21 |         'cog.tif'
 22 |     )
 23 |     with open(f, 'rb') as src:
 24 |         yield src
 25 | 
 26 | 
 27 | @pytest.fixture
 28 | def bigtiff(data_dir):
 29 |     f = os.path.join(
 30 |         data_dir,
 31 |         'BigTIFF.tif'
 32 |     )
 33 |     with open(f, 'rb') as src:
 34 |         yield src
 35 | 
 36 | 
 37 | @pytest.fixture
 38 | def be_tiff(data_dir):
 39 |     f = os.path.join(
 40 |         data_dir,
 41 |         'be_cog.tif'
 42 |     )
 43 |     with open(f, 'rb') as src:
 44 |         yield src
 45 | 
 46 | 
 47 | def test_tiff_version(tiff):
 48 |     reader = FileReader(tiff)
 49 |     cog = COGTiff(reader.read)
 50 |     assert cog.version == 42
 51 | 
 52 | 
 53 | def test_bigtiff_version(bigtiff):
 54 |     reader = FileReader(bigtiff)
 55 |     cog = COGTiff(reader.read)
 56 |     assert cog.version == 43
 57 | 
 58 | 
 59 | def test_be_tiff_version(be_tiff):
 60 |     reader = FileReader(be_tiff)
 61 |     cog = COGTiff(reader.read)
 62 |     assert cog.version == 42
 63 | 
 64 | 
 65 | def test_tiff_ifds(tiff):
 66 |     reader = FileReader(tiff)
 67 |     cog = COGTiff(reader.read)
 68 |     # read private variable directly for testing
 69 |     assert len(cog._image_ifds) > 0
 70 |     assert 8 == len(cog._image_ifds[0]['tags'])
 71 |     assert 0 == cog._image_ifds[4]['next_offset']
 72 | 
 73 | 
 74 | def test_be_tiff_ifds(be_tiff):
 75 |     reader = FileReader(be_tiff)
 76 |     cog = COGTiff(reader.read)
 77 |     # read private variable directly for testing
 78 |     assert len(cog._image_ifds) > 0
 79 |     assert 8 == len(cog._image_ifds[0]['tags'])
 80 |     assert 0 == cog._image_ifds[4]['next_offset']
 81 | 
 82 | 
 83 | def test_bigtiff_ifds(bigtiff):
 84 |     reader = FileReader(bigtiff)
 85 |     cog = COGTiff(reader.read)
 86 |     # read private variable directly for testing
 87 |     assert len(cog._image_ifds) > 0
 88 |     assert 7 == len(cog._image_ifds[0]['tags'])
 89 |     assert 0 == cog._image_ifds[4]['next_offset']
 90 | 
 91 | 
 92 | def test_tiff_tile(tiff):
 93 |     reader = FileReader(tiff)
 94 |     cog = COGTiff(reader.read)
 95 |     mime_type, tile = cog.get_tile(0, 0, 0)
 96 |     assert 1 == len(cog._image_ifds[0]['offsets'])
 97 |     assert 1 == len(cog._image_ifds[0]['byte_counts'])
 98 |     assert 'jpeg_tables' in cog._image_ifds[0]
 99 |     assert 73 == len(cog._image_ifds[0]['jpeg_tables'])
100 |     assert mime_type == 'image/jpeg'
101 | 
102 | 
103 | def test_tiff_tile_env(tiff, monkeypatch):
104 |     monkeypatch.setenv("COG_INGESTED_BYTES_AT_OPEN", "1024")
105 |     reader = FileReader(tiff)
106 |     cog = COGTiff(reader.read)
107 |     mime_type, tile = cog.get_tile(0, 0, 0)
108 |     assert 1 == len(cog._image_ifds[0]['offsets'])
109 |     assert 1 == len(cog._image_ifds[0]['byte_counts'])
110 |     assert 'jpeg_tables' in cog._image_ifds[0]
111 |     assert 73 == len(cog._image_ifds[0]['jpeg_tables'])
112 |     assert mime_type == 'image/jpeg'
113 | 
114 | 
115 | def test_bad_tiff_tile(tiff):
116 |     reader = FileReader(tiff)
117 |     cog = COGTiff(reader.read)
118 |     with pytest.raises(TIFFError) as err:
119 |         cog.get_tile(10, 10, 0)
120 |     with pytest.raises(TIFFError) as err:
121 |         cog.get_tile(10, 10, 10)
122 | 
123 | def test_bigtiff_tile(bigtiff):
124 |     reader = FileReader(bigtiff)
125 |     cog = COGTiff(reader.read)
126 |     mime_type, tile = cog.get_tile(0, 0, 0)
127 |     assert 1 == len(cog._image_ifds[0]['offsets'])
128 |     assert 1 == len(cog._image_ifds[0]['byte_counts'])
129 |     assert 'jpeg_tables' in cog._image_ifds[0]
130 |     assert cog._image_ifds[0]['jpeg_tables'] is None
131 |     assert mime_type == 'application/octet-stream'
132 | 


--------------------------------------------------------------------------------