├── requirements.txt ├── .gitignore ├── byte.tif ├── byte_cog_valid.tif ├── Makefile ├── templates ├── result.html └── main.html ├── Dockerfile ├── test.sh ├── lambda_main.py ├── LICENSE ├── awsgi.py ├── README.md ├── cog_validator.py └── validate_cloud_optimized_geotiff.py /requirements.txt: -------------------------------------------------------------------------------- 1 | flask 2 | requests 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | cog_validator.zip 3 | cog_validator_deps.zip 4 | -------------------------------------------------------------------------------- /byte.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rouault/cog_validator/HEAD/byte.tif -------------------------------------------------------------------------------- /byte_cog_valid.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rouault/cog_validator/HEAD/byte_cog_valid.tif -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: cog_validator.zip 2 | 3 | cog_validator_deps.zip: Dockerfile 4 | docker run --rm --entrypoint cat $$(docker build --build-arg http_proxy=$(http_proxy) -q -f $< .) /tmp/task.zip > $@ 5 | 6 | cog_validator.zip: cog_validator_deps.zip awsgi.py cog_validator.py lambda_main.py templates/* byte.tif byte_cog_valid.tif 7 | cp $< $@ 8 | zip -r $@ awsgi.py cog_validator.py lambda_main.py validate_cloud_optimized_geotiff.py templates byte.tif byte_cog_valid.tif 9 | -------------------------------------------------------------------------------- /templates/result.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Cloud optimized GeoTIFF validator 5 | 6 | 7 | 8 |

Cloud optimized GeoTIFF validator: result

9 | 10 |

{{ global_result}}

11 | 12 | {% if errors %} 13 |

Errors:

14 | 19 | {% endif %} 20 | 21 |

Return to submit page

22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Mostly derived from https://github.com/mojodna/marblecutter-tools/blob/master/aws/Dockerfile 2 | 3 | FROM lambci/lambda:build-python2.7 4 | 5 | ARG http_proxy 6 | 7 | # Install deps 8 | 9 | RUN \ 10 | rpm --rebuilddb && \ 11 | yum install -y \ 12 | automake16 \ 13 | libcurl-devel 14 | 15 | # Fetch PROJ.4 16 | 17 | RUN \ 18 | curl -L http://download.osgeo.org/proj/proj-4.9.3.tar.gz | tar zxf - -C /tmp 19 | 20 | # Build and install PROJ.4 21 | 22 | WORKDIR /tmp/proj-4.9.3 23 | 24 | RUN \ 25 | ./configure \ 26 | --prefix=/var/task && \ 27 | make -j $(nproc) && \ 28 | make install 29 | 30 | # Fetch GDAL 31 | 32 | RUN \ 33 | mkdir -p /tmp/gdal && \ 34 | curl -L http://download.osgeo.org/gdal/2.2.1/gdal-2.2.1.tar.gz | tar zxf - -C /tmp/gdal --strip-components=1 35 | 36 | # Build + install GDAL 37 | 38 | WORKDIR /tmp/gdal 39 | 40 | RUN \ 41 | ./configure \ 42 | --prefix=/var/task \ 43 | --datarootdir=/var/task/share/gdal \ 44 | --with-jpeg=internal \ 45 | --without-qhull \ 46 | --without-mrf \ 47 | --without-grib \ 48 | --without-pcraster \ 49 | --without-png \ 50 | --without-gif \ 51 | --without-pcidsk && \ 52 | make -j $(nproc) && \ 53 | cd swig/python && \ 54 | make && \ 55 | cd ../.. && \ 56 | make install 57 | 58 | # Install Python deps in a virtualenv 59 | 60 | RUN \ 61 | virtualenv /tmp/virtualenv 62 | 63 | ENV PATH /tmp/virtualenv/bin:/var/task/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin 64 | 65 | WORKDIR /var/task 66 | 67 | COPY requirements.txt /var/task/requirements.txt 68 | 69 | RUN pip install -r requirements.txt 70 | 71 | # Add GDAL libs to the function zip 72 | 73 | RUN \ 74 | strip lib/libgdal.so.20.3.0 && \ 75 | strip lib/libproj.so.12.0.0 76 | 77 | RUN \ 78 | zip --symlinks \ 79 | -r /tmp/task.zip \ 80 | lib/libgdal.so* \ 81 | lib/libproj.so* \ 82 | share/gdal/ 83 | 84 | # Add Python deps to the function zip 85 | 86 | WORKDIR /tmp/virtualenv/lib/python2.7/site-packages 87 | 88 | RUN find . -name \*.so\* -exec strip {} \; 89 | 90 | RUN \ 91 | zip -r /tmp/task.zip flask werkzeug jinja2 markupsafe itsdangerous.py* click requests idna chardet certifi urllib3 osgeo 92 | 93 | WORKDIR /tmp/gdal/swig/python/build/lib.linux-x86_64-2.7 94 | 95 | RUN find . -name \*.so\* -exec strip {} \; 96 | 97 | RUN zip -r /tmp/task.zip . 98 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Self-test script 3 | 4 | set -e 5 | 6 | python cog_validator.py & 7 | PYTHON_PID=$! 8 | sleep 2 9 | 10 | API_VALIDATE="http://127.0.0.1:5000/api/validate" 11 | 12 | echo 'Error expected: URL missing' 13 | ret=$(curl -s "$API_VALIDATE") 14 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 15 | echo '' 16 | echo '' 17 | 18 | echo 'Error expected: invalid URL' 19 | ret=$(curl -s "$API_VALIDATE?url=http://i_dont_exist.com") 20 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 21 | echo '' 22 | echo '' 23 | 24 | echo 'Error expected: not a GeoTIFF file' 25 | ret=$(curl -s "$API_VALIDATE?url=http://www.google.com/&use_vsicurl=false") 26 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 27 | echo '' 28 | echo '' 29 | 30 | echo 'Error expected: not a valid COG file' 31 | ret=$(curl -s "$API_VALIDATE?url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif") 32 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 33 | echo '' 34 | echo '' 35 | 36 | echo 'Testing use_vsicurl=true' 37 | ret=$(curl -s "$API_VALIDATE?url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif&use_vsicurl=true") 38 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 39 | echo '' 40 | echo '' 41 | 42 | echo 'Testing a server that does not support GET range downloading' 43 | ret=$(curl -s "$API_VALIDATE?url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/quad-lzw-old-style.tif") 44 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 45 | echo '' 46 | echo '' 47 | 48 | echo 'Testing posting a GeoTIFF file' 49 | ret=$(curl -s -F file=@byte_cog_valid.tif "$API_VALIDATE") 50 | echo $ret | grep "success" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 51 | echo '' 52 | echo '' 53 | 54 | echo 'Testing posting a GeoTIFF file encoded in base64' 55 | ret=$(curl -s -d "file_b64=$(base64 byte_cog_valid.tif)&filename=byte_cog_valid.tif" "$API_VALIDATE") 56 | echo $ret | grep "success" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 57 | echo '' 58 | echo '' 59 | 60 | echo 'Testing posting invalid base64 content' 61 | ret=$(curl -s -d "file_b64=x&file=byte_cog_valid.tif" "$API_VALIDATE") 62 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 63 | echo '' 64 | echo '' 65 | 66 | echo 'Testing the POST interface with a URL' 67 | ret=$(curl -s -d "url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif" "$API_VALIDATE") 68 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1) 69 | echo '' 70 | echo '' 71 | 72 | echo 'All tests passed !' 73 | kill -9 $PYTHON_PID 74 | -------------------------------------------------------------------------------- /lambda_main.py: -------------------------------------------------------------------------------- 1 | # From https://github.com/mojodna/marblecutter/blob/f5e16ea4ae0adcedaeb45d5fa66168dfe57b9232/functions/tiler/main.py 2 | # Original work Copyright 2016 Stamen Design 3 | # Modified work Copyright 2016-2017 Seth Fitzsimmons 4 | # Modified work Copyright 2016 American Red Cross 5 | # Modified work Copyright 2016-2017 Humanitarian OpenStreetMap Team 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its contributors 18 | # may be used to endorse or promote products derived from this software without 19 | # specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | # noqa 33 | # coding=utf-8 34 | 35 | import logging 36 | import os 37 | 38 | import awsgi 39 | from cog_validator import app 40 | 41 | 42 | # reset the Lambda logger 43 | root = logging.getLogger() 44 | if root.handlers: 45 | for handler in root.handlers: 46 | root.removeHandler(handler) 47 | 48 | logging.basicConfig(level=logging.INFO) 49 | 50 | 51 | def handle(event, context): # noqa 52 | if 'headers' in event and isinstance(event['headers'], dict): 53 | 54 | # Cloudfront isn't configured to pass Host headers, so the provided Host 55 | # header is the API Gateway hostname 56 | if 'SERVER_NAME' in os.environ: 57 | event['headers']['Host'] = os.environ['SERVER_NAME'] 58 | # Cloudfront drops X-Forwarded-Proto, so the value provided is from API 59 | # Gateway 60 | event['headers']['X-Forwarded-Proto'] = 'https' 61 | 62 | return awsgi.response(app, event, context) 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Part of this repository are licensed under: 2 | 3 | Original work Copyright 2016 Stamen Design 4 | Modified work Copyright 2016-2017 Seth Fitzsimmons 5 | Modified work Copyright 2016 American Red Cross 6 | Modified work Copyright 2016-2017 Humanitarian OpenStreetMap Team 7 | 8 | Redistribution and use in source and binary forms, with or without 9 | modification, are permitted provided that the following conditions are met: 10 | 11 | 1. Redistributions of source code must retain the above copyright notice, this 12 | list of conditions and the following disclaimer. 13 | 14 | 2. Redistributions in binary form must reproduce the above copyright notice, 15 | this list of conditions and the following disclaimer in the documentation 16 | and/or other materials provided with the distribution. 17 | 18 | 3. Neither the name of the copyright holder nor the names of its contributors 19 | may be used to endorse or promote products derived from this software without 20 | specific prior written permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 23 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 24 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 25 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 26 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 28 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | 33 | 34 | 35 | Other parts: 36 | 37 | 38 | Copyright (c) 2017, Even Rouault 39 | Copyright (c) 2017, Planet Labs 40 | 41 | Permission is hereby granted, free of charge, to any person obtaining a 42 | copy of this software and associated documentation files (the "Software"), 43 | to deal in the Software without restriction, including without limitation 44 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 45 | and/or sell copies of the Software, and to permit persons to whom the 46 | Software is furnished to do so, subject to the following conditions: 47 | 48 | The above copyright notice and this permission notice shall be included 49 | in all copies or substantial portions of the Software. 50 | 51 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 52 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 53 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 54 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 55 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 56 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 57 | DEALINGS IN THE SOFTWARE. 58 | -------------------------------------------------------------------------------- /templates/main.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Cloud optimized GeoTIFF validator 5 | 6 | 7 | 8 |

Cloud optimized GeoTIFF validator

9 | 10 |

This service enables you to check that a GeoTIFF file follows the 11 | specification of 12 | Cloud optimized GeoTIFF

13 | 14 |

Files should generally be specified by URLs since it will enable the service 15 | to check that the hosting server can honour HTTP Range GET requests, and this 16 | will speed-up the checking process itself since only IFDs will be downloaded.

17 | 18 |

In the case no public URL is available, the file content can be posted to 19 | the service, with a 4 MB limit when the service is hosted as a AWS Lambda 20 | function.

21 | 22 |
23 | File URL:
24 | or file upload:
25 | 26 |
27 | 28 | 29 | 30 | 70 | 71 |

72 |

You can also use the API at {{ root_url }}/api/validate that will return the validation result as a JSon document

73 |

To submit a file by URL: curl -s "{{ root_url }}/api/validate?url=http://path/to/my.tif"

74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /awsgi.py: -------------------------------------------------------------------------------- 1 | # https://github.com/slank/awsgi 2 | # 3 | # MIT License 4 | # 5 | # Copyright (c) 2016 Matthew Wedgwood 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | from io import StringIO 26 | import sys 27 | try: 28 | # Python 3 29 | from urllib.parse import urlencode 30 | 31 | # Convert bytes to str, if required 32 | def convert_str(s): 33 | return s.decode('utf-8') if isinstance(s, bytes) else s 34 | except: 35 | # Python 2 36 | from urllib import urlencode 37 | 38 | # No conversion required 39 | def convert_str(s): 40 | return s 41 | 42 | 43 | def response(app, event, context): 44 | sr = StartResponse() 45 | output = app(environ(event, context), sr) 46 | return sr.response(output) 47 | 48 | 49 | class StartResponse: 50 | def __init__(self): 51 | self.status = 500 52 | self.headers = [] 53 | self.body = StringIO() 54 | 55 | def __call__(self, status, headers, exc_info=None): 56 | self.status = status.split()[0] 57 | self.headers[:] = headers 58 | return self.body.write 59 | 60 | def response(self, output): 61 | return { 62 | 'statusCode': str(self.status), 63 | 'headers': dict(self.headers), 64 | 'body': self.body.getvalue() + ''.join(map(convert_str, output)), 65 | } 66 | 67 | 68 | def environ(event, context): 69 | environ = { 70 | 'REQUEST_METHOD': event['httpMethod'], 71 | 'SCRIPT_NAME': '', 72 | 'PATH_INFO': event['path'], 73 | 'QUERY_STRING': urlencode(event['queryStringParameters'] or {}), 74 | 'REMOTE_ADDR': '127.0.0.1', 75 | 'CONTENT_LENGTH': str(len(event.get('body', '') or '')), 76 | 'HTTP': 'on', 77 | 'SERVER_PROTOCOL': 'HTTP/1.1', 78 | 'wsgi.version': (1, 0), 79 | 'wsgi.input': StringIO(event.get('body')), 80 | 'wsgi.errors': sys.stderr, 81 | 'wsgi.multithread': False, 82 | 'wsgi.multiprocess': False, 83 | 'wsgi.run_once': False, 84 | } 85 | if 'requestContext' in event: 86 | if 'stage' in event['requestContext']: 87 | environ['AWS_API_GATEWAY_STAGE'] = event['requestContext']['stage'] 88 | 89 | headers = event.get('headers', {}) 90 | for k, v in headers.items(): 91 | k = k.upper().replace('-', '_') 92 | 93 | if k == 'CONTENT_TYPE': 94 | environ['CONTENT_TYPE'] = v 95 | elif k == 'HOST': 96 | environ['SERVER_NAME'] = v 97 | elif k == 'X_FORWARDED_FOR': 98 | environ['REMOTE_ADDR'] = v.split(', ')[0] 99 | elif k == 'X_FORWARDED_PROTO': 100 | environ['wsgi.url_scheme'] = v 101 | elif k == 'X_FORWARDED_PORT': 102 | environ['SERVER_PORT'] = v 103 | 104 | environ['HTTP_' + k] = v 105 | 106 | return environ 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cloud Optimized GeoTIFF validator 2 | 3 | This is a standalone (Python / Flask) service that allows users to submit 4 | GeoTIFF files (preferably by URL) and check their compliance with the 5 | Cloud Optimized GeoTIFF (COG) specification: 6 | https://trac.osgeo.org/gdal/wiki/CloudOptimizedGeoTIFF 7 | 8 | This utility is also compatible of being deployed as a AWS Lambda function, 9 | through the AWS API Gateway. 10 | 11 | ## API endpoint: /api/validate 12 | 13 | GET request, with the following query parameters : 14 | * url (required): URL to the GeoTIFF file 15 | * use_vsicurl=true/false (optional, defaults to true): if true, the file is read using the GDAL /vsicurl/ subsystem (using HTTP GET range requests). If false, the file is locally downloaded in its entirety before being validated (note: when the service run as a AWS Lambda function, only up to 500 MB can be downloaded) 16 | 17 | For example: /api/validate?url=http://path/to/my.tif 18 | 19 | POST request, with a form encoded with multipart/form-data 20 | * file: file content as multipart attachment 21 | 22 | POST request, with a form encoded with application/x-www-form-urlencoded 23 | * url (exclusive with file): URL to the GeoTIFF file 24 | * use_vsicurl=true/false (defaults to true). See above 25 | * filename (optional, recommended): file name 26 | * file_b64: file content as a Base64 encoded string 27 | 28 | This later interface is mostly needed to overcome a current limitation of the AWS API Gateway interface that does not accept multipart/form-data 29 | 30 | For all the above interfaces, the query will return a JSON document with the following keys: 31 | * status (required): 'success' or 'failure' 32 | * error (optional): error message. present when the request is invalid, or the file cannot be read 33 | * validation_errors (optional): array of errors. Only present if the file is a GeoTIFF file but does not comply with the COG requirements 34 | * gdal_info (optional): dictionary with the output of "gdalinfo -json". Only present if the file is a GeoTIFF file 35 | * details (optional): dictionary with file offsets of IFDs and first data block of each IFD. Only present if the file is a GeoTIFF file 36 | 37 | ## HTML endpoint: /html 38 | 39 | The service expose a basic HTML page for users to submit their GeoTIFF files 40 | and display the result of the validation 41 | 42 | ## AWS Lambda / API Gateway 43 | 44 | The service can be deployed as a AWS Lamba function, accessible through the AWS API Gateway. 45 | 46 | Running "make" will generate a cog_validator.zip that contains the Python code of this service, the Python dependencies as well as a GDAL 2.2 build. This requires Docker to be available, to generate the cog_validator_deps.zip (which contains the Python dependencies as well as a GDAL 2.2 build) 47 | 48 | Assuming you have a AWS account with initial setup, follow the following steps to deploy the service: 49 | 50 | - Role creation 51 | 52 | * Go to the AWS IAM management console 53 | * Click on "Roles" 54 | * Click on "Create new role" 55 | * Click on the Select button of "AWS Lambda" 56 | * In the Filter enter "AWSLambdaBasicExecutionRole" and check the corresponding checkbox 57 | * Click on "Next Step" 58 | * Enter "lambda_basic_execution" as role name 59 | * Click on "Create role" 60 | 61 | - Lambda function creation 62 | 63 | * Go to the AWS Lambda management console 64 | * "Create function" 65 | * In "Select Blueprint" step, select "Author from scratch" 66 | * Skip Add Trigger with "Next" 67 | * Give a name to the function, for example "cog_validator" 68 | * Select "Python 2.7" as Runtime 69 | * Select "Upload a .ZIP file" as "Code entry type" 70 | * In "Function package", click on Upload an select the generated cog_validator.zip 71 | * Enter "lambda_main.handle" in "Handler" 72 | * In "Existing role", select "lambda_basic_execution" 73 | * Click on Next, and Creation function to proceed on file uploading and lambda function creation 74 | * Edit the Configuration / Advanced settings, to increase the timeout to 5 minutes and the memory to 512 MB, and Save 75 | * To test everything works, in Actions dropdown list, choose "Configure test event" and enter the following payload. 76 | ``` 77 | { 78 | "headers": { "Host": "foo" }, 79 | "httpMethod": "GET", 80 | "queryStringParameters": { "url": "http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif" }, 81 | "path": "/api/validate" 82 | } 83 | ``` 84 | 85 | - API Gateway deployment 86 | 87 | * Go to the AWS API Gateway management console 88 | * In APIs tab, click on "Create API" 89 | * Enter "cog_validator" as API name 90 | * Click on "Create API" 91 | * In Resources tab, in Actions dropdown list, select "Create Resource" 92 | * Check the "Configure as Proxy resource" checkbox and click on "Create Resource" 93 | * In the "/{proxy+} - ANY - Setup" form that is now displayed, keep the "Lambda Function Proxy" integration type 94 | * Select the appropriate Lambda region (the one in which you created the Lambda function in the above steps) 95 | * In "Lambda Function" entry, type "cog_validator" 96 | * Click on "Save" and confirm that you add permission to the API Gateway to invoke your Lambda function 97 | * To test everything works, click on the TEST icon 98 | * A new form is displayed. Select GET as method 99 | * In Path entry, enter "/api/validate" 100 | * In "Query strings" entry, enter "url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif" 101 | * In "Headers" entry, enter "Host: foo" 102 | * Click on Test. A JSON document should be displayed (with validation errors) 103 | * In Resources tab, in Actions dropdown list, select "Deploy API" 104 | * In Deployment stage, select "New stage" 105 | * Enter "prod" as stage name 106 | * Click on Deploy 107 | * A new form is displayed with an invoke URL like https://some_value_here.execute-api.eu-central-1.amazonaws.com/prod 108 | * Copy-paste it in your browser and add "/html" at the end. A HTML page "Cloud optimized GeoTIFF validator" should now be displayed ! 109 | 110 | ## Development 111 | 112 | GDAL 2.2 with its Python (2.7) bindings must be installed, as well as the Python 113 | flask and requests modules. 114 | 115 | A basic self test is available with the ./test.sh script 116 | 117 | ## Credits 118 | 119 | The following resources have served as inspiration for AWS Lamba and API Gateway deployment 120 | * https://medium.com/@mojodna/slimming-down-lambda-deployment-zips-b3f6083a1dff 121 | * https://github.com/mojodna/marblecutter-tools 122 | * http://www.perrygeo.com/running-python-with-compiled-code-on-aws-lambda.html 123 | -------------------------------------------------------------------------------- /cog_validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # Copyright (c) 2017, Planet Labs 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a 7 | # copy of this software and associated documentation files (the "Software"), 8 | # to deal in the Software without restriction, including without limitation 9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 | # and/or sell copies of the Software, and to permit persons to whom the 11 | # Software is furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 | # DEALINGS IN THE SOFTWARE. 23 | # ***************************************************************************** 24 | 25 | import json 26 | import os 27 | from flask import Flask, request as flask_request, render_template 28 | from werkzeug.exceptions import RequestEntityTooLarge 29 | import requests 30 | from osgeo import gdal 31 | import validate_cloud_optimized_geotiff 32 | 33 | app = Flask(__name__) 34 | # http://docs.aws.amazon.com/lambda/latest/dg/limits.html 35 | app.config['MAX_CONTENT_LENGTH'] = 6 * 1024 * 1024 36 | 37 | tmpfilename = '/tmp/cog_validator_tmp.tif' 38 | 39 | @app.errorhandler(413) 40 | def handle_RequestEntityTooLarge(e): 41 | return json.dumps({'status': 'failure', 'error': 'Maximum accepted attachment size is %d' % app.config['MAX_CONTENT_LENGTH']}), \ 42 | 413, { "Content-Type": "application/json" } 43 | 44 | def validate(args): 45 | if 'url' not in args: 46 | return json.dumps({'status': 'failure', 'error': 'url missing'}), 400, \ 47 | { "Content-Type": "application/json" } 48 | 49 | remove_tmpfile = False 50 | url = args.get('url') 51 | if 'local_filename' in args: 52 | ds = gdal.OpenEx(args['local_filename'], allowed_drivers = ['GTiff']) 53 | else: 54 | 55 | use_vsicurl = args.get('use_vsicurl', 'true') 56 | if use_vsicurl.lower() not in ('true', 'false'): 57 | return json.dumps({'status': 'failure', 'error': 'invalid value for use_vsicurl option. Expected true or false'}), 400, { "Content-Type": "application/json" } 58 | use_vsicurl = use_vsicurl.lower() == 'true' 59 | 60 | gdal.SetConfigOption('GDAL_DISABLE_READDIR_ON_OPEN', 'EMPTY_DIR') 61 | if use_vsicurl: 62 | ds = gdal.OpenEx('/vsicurl/' + url, allowed_drivers = ['GTiff']) 63 | if ds is None: 64 | f = gdal.VSIFOpenL('/vsicurl/' + url, 'rb') 65 | if f is None: 66 | return json.dumps({'status': 'failure', 'error': 'Cannot download %s' % url}), 400, { "Content-Type": "application/json" } 67 | data = gdal.VSIFReadL(1,1,f) 68 | gdal.VSIFCloseL(f) 69 | if len(data) == 0: 70 | error_msg = 'Cannot download %s' % url 71 | gdal_error_msg = gdal.GetLastErrorMsg() 72 | if gdal_error_msg == '': 73 | gdal_error_msg = gdal.VSIGetLastErrorMsg() 74 | if gdal_error_msg != '': 75 | error_msg += ': '+ gdal_error_msg 76 | return json.dumps({'status': 'failure', 'error': error_msg}), 400, { "Content-Type": "application/json" } 77 | else: 78 | try: 79 | r = requests.get(url) 80 | except Exception, e: 81 | return json.dumps({'status': 'failure', 'error': 'Cannot download %s' % url}), 400, { "Content-Type": "application/json" } 82 | 83 | remove_tmpfile = True 84 | f = open(tmpfilename, 'wb') 85 | f.write(r.content) 86 | f.close() 87 | ds = gdal.OpenEx(tmpfilename, allowed_drivers = ['GTiff']) 88 | 89 | if ds is None: 90 | return json.dumps({'status': 'failure', 'error': '%s is not a GTiff file' % url}), 400, { "Content-Type": "application/json" } 91 | errors, details = validate_cloud_optimized_geotiff.validate(ds) 92 | info = gdal.Info(ds, format = 'json') 93 | if 'local_filename' in args or remove_tmpfile: 94 | del info['files'] 95 | info['description'] = url 96 | ds = None 97 | if remove_tmpfile: 98 | os.unlink(tmpfilename) 99 | 100 | if len(errors) == 0: 101 | return json.dumps({'status': 'success', 'gdal_info' : info, 'details': details}), 200, { "Content-Type": "application/json" } 102 | else: 103 | return json.dumps({'status': 'failure', 'gdal_info' : info, 'details': details, 'validation_errors': errors}), 400, { "Content-Type": "application/json" } 104 | 105 | 106 | @app.route('/api/validate', methods=['GET', 'POST']) 107 | def api_validate(): 108 | if flask_request.method == 'POST': 109 | if flask_request.form != {}: 110 | if 'url' in flask_request.form and flask_request.form['url'] != '': 111 | args = {} 112 | for k in flask_request.form: 113 | if k != 'local_filename': 114 | args[k] = flask_request.form[k] 115 | return validate(args) 116 | 117 | if 'filename' in flask_request.form: 118 | url = flask_request.form['filename'] 119 | else: 120 | url = 'unknown_file_name' 121 | 122 | if 'file_b64' not in flask_request.form: 123 | return json.dumps({'status': 'failure', 'error': 'Missing "file_b64" field in POSTed form data'}), 400, { "Content-Type": "application/json" } 124 | 125 | import base64 126 | b64 = flask_request.form['file_b64'] 127 | # Need to add padding to avoid sometimes a 'invalid padding exception' 128 | b64 += '==' 129 | 130 | # FileReader::readAsDataURL() prefixes the base64 content with other stuff 131 | base64_marker = b64.find(';base64,') 132 | if base64_marker >= 0: 133 | b64 = b64[base64_marker + len(';base64,'):] 134 | 135 | try: 136 | decoded = base64.b64decode(b64) 137 | except Exception as e: 138 | return json.dumps({'status': 'failure', 'error': 'Invalid content for file_b64: %s' % str(e)}), 400, { "Content-Type": "application/json" } 139 | 140 | open(tmpfilename, 'wb').write(decoded) 141 | else: 142 | if 'file' not in flask_request.files: 143 | return json.dumps({'status': 'failure', 'error': 'Missing "file" field in POSTed form data'}), 400, { "Content-Type": "application/json" } 144 | f = flask_request.files['file'] 145 | if f.filename == '': 146 | return json.dumps({'status': 'failure', 'error': 'Missing "file" field in POSTed form data'}), 400, { "Content-Type": "application/json" } 147 | f.save(tmpfilename) 148 | url = f.filename 149 | 150 | try: 151 | return validate({'local_filename': tmpfilename, 'url': url}) 152 | finally: 153 | os.unlink(tmpfilename) 154 | 155 | else: 156 | args = {} 157 | for k in flask_request.args: 158 | if k != 'local_filename': 159 | args[k] = flask_request.args[k] 160 | return validate(args) 161 | 162 | @app.route('/self_test/valid.tif', methods=['GET']) 163 | def self_test_valid(): 164 | return open(os.path.join(os.path.dirname(__file__), 'byte_cog_valid.tif'), 'rb').read(), 200, { "Content-Type": "image/tiff" } 165 | 166 | @app.route('/self_test/invalid.tif', methods=['GET']) 167 | def self_test_invalid(): 168 | return open(os.path.join(os.path.dirname(__file__), 'byte.tif'), 'rb').read(), 200, { "Content-Type": "image/tiff" } 169 | 170 | @app.route('/html', methods=['GET']) 171 | def html(): 172 | root_url = flask_request.url_root[0:-1] 173 | if 'AWS_API_GATEWAY_STAGE' in flask_request.environ: 174 | root_url += '/' + flask_request.environ['AWS_API_GATEWAY_STAGE'] 175 | return render_template('main.html', root_url = root_url) 176 | 177 | @app.route('/html/validate', methods=['POST']) 178 | def html_validate(): 179 | root_url = flask_request.url_root[0:-1] 180 | if 'AWS_API_GATEWAY_STAGE' in flask_request.environ: 181 | root_url += '/' + flask_request.environ['AWS_API_GATEWAY_STAGE'] 182 | ret, _, _ = api_validate() 183 | ret = json.loads(ret) 184 | errors = None 185 | 186 | if 'url' in flask_request.form and flask_request.form['url'] != '': 187 | name = flask_request.form['url'] 188 | elif 'filename' in flask_request.form and flask_request.form['filename'] != '': 189 | name = flask_request.form['filename'] 190 | else: 191 | name = 'This' 192 | 193 | if 'status' in ret and ret['status'] == 'success': 194 | global_result = 'Validation succeeded ! %s is a valid Cloud Optimized GeoTIFF.' % name 195 | else: 196 | global_result = 'Validation failed ! %s is NOT a valid Cloud Optimized GeoTIFF.' % name 197 | if 'error' in ret: 198 | errors = [ ret['error'] ] 199 | elif 'validation_errors' in ret: 200 | errors = ret['validation_errors'] 201 | return render_template('result.html', root_url = root_url, global_result = global_result, errors = errors) 202 | 203 | # We only need this for local development. 204 | env = os.environ 205 | DEBUG = env.get('DEBUG', 'False') 206 | 207 | if __name__ == '__main__': 208 | app.run(debug=DEBUG=="True") 209 | -------------------------------------------------------------------------------- /validate_cloud_optimized_geotiff.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # ***************************************************************************** 4 | # $Id$ 5 | # 6 | # Project: GDAL 7 | # Purpose: Validate Cloud Optimized GeoTIFF file structure 8 | # Author: Even Rouault, 9 | # 10 | # ***************************************************************************** 11 | # Copyright (c) 2017, Even Rouault 12 | # 13 | # Permission is hereby granted, free of charge, to any person obtaining a 14 | # copy of this software and associated documentation files (the "Software"), 15 | # to deal in the Software without restriction, including without limitation 16 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 17 | # and/or sell copies of the Software, and to permit persons to whom the 18 | # Software is furnished to do so, subject to the following conditions: 19 | # 20 | # The above copyright notice and this permission notice shall be included 21 | # in all copies or substantial portions of the Software. 22 | # 23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 24 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 26 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 28 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 29 | # DEALINGS IN THE SOFTWARE. 30 | # ***************************************************************************** 31 | 32 | import os.path 33 | import struct 34 | import sys 35 | 36 | from osgeo import gdal 37 | 38 | 39 | def Usage(): 40 | print( 41 | "Usage: validate_cloud_optimized_geotiff.py [-q] [--full-check=yes/no/auto] test.tif" 42 | ) 43 | print("") 44 | print("Options:") 45 | print("-q: quiet mode") 46 | print( 47 | "--full-check=yes/no/auto: check tile/strip leader/trailer bytes. auto=yes for local files, and no for remote files" 48 | ) 49 | return 2 50 | 51 | 52 | class ValidateCloudOptimizedGeoTIFFException(Exception): 53 | pass 54 | 55 | 56 | def full_check_band( 57 | f, 58 | band_name, 59 | band, 60 | errors, 61 | block_order_row_major, 62 | block_leader_size_as_uint4, 63 | block_trailer_last_4_bytes_repeated, 64 | mask_interleaved_with_imagery, 65 | ): 66 | 67 | block_size = band.GetBlockSize() 68 | mask_band = None 69 | if mask_interleaved_with_imagery: 70 | mask_band = band.GetMaskBand() 71 | mask_block_size = mask_band.GetBlockSize() 72 | if block_size != mask_block_size: 73 | errors += [ 74 | band_name + ": mask block size is different from its imagery band" 75 | ] 76 | mask_band = None 77 | 78 | yblocks = (band.YSize + block_size[1] - 1) // block_size[1] 79 | xblocks = (band.XSize + block_size[0] - 1) // block_size[0] 80 | last_offset = 0 81 | for y in range(yblocks): 82 | for x in range(xblocks): 83 | 84 | offset = band.GetMetadataItem("BLOCK_OFFSET_%d_%d" % (x, y), "TIFF") 85 | offset = int(offset) if offset is not None else 0 86 | bytecount = band.GetMetadataItem("BLOCK_SIZE_%d_%d" % (x, y), "TIFF") 87 | bytecount = int(bytecount) if bytecount is not None else 0 88 | 89 | if offset > 0: 90 | if block_order_row_major and offset < last_offset: 91 | errors += [ 92 | band_name 93 | + ": offset of block (%d, %d) is smaller than previous block" 94 | % (x, y) 95 | ] 96 | 97 | if block_leader_size_as_uint4: 98 | gdal.VSIFSeekL(f, offset - 4, 0) 99 | leader_size = struct.unpack("= 4: 109 | gdal.VSIFSeekL(f, offset + bytecount - 4, 0) 110 | last_bytes = gdal.VSIFReadL(8, 1, f) 111 | if last_bytes[0:4] != last_bytes[4:8]: 112 | errors += [ 113 | band_name 114 | + ": for block (%d, %d), trailer bytes are invalid" 115 | % (x, y) 116 | ] 117 | 118 | if mask_band: 119 | offset_mask = mask_band.GetMetadataItem( 120 | "BLOCK_OFFSET_%d_%d" % (x, y), "TIFF" 121 | ) 122 | offset_mask = int(offset_mask) if offset_mask is not None else 0 123 | if offset > 0 and offset_mask > 0: 124 | # bytecount_mask = int(mask_band.GetMetadataItem('BLOCK_SIZE_%d_%d' % (x,y), 'TIFF')) 125 | expected_offset_mask = ( 126 | offset 127 | + bytecount 128 | + (4 if block_leader_size_as_uint4 else 0) 129 | + (4 if block_trailer_last_4_bytes_repeated else 0) 130 | ) 131 | if offset_mask != expected_offset_mask: 132 | errors += [ 133 | "Mask of " 134 | + band_name 135 | + ": for block (%d, %d), offset is %d, whereas %d was expected" 136 | % (x, y, offset_mask, expected_offset_mask) 137 | ] 138 | elif offset == 0 and offset_mask > 0: 139 | if block_order_row_major and offset_mask < last_offset: 140 | errors += [ 141 | "Mask of " 142 | + band_name 143 | + ": offset of block (%d, %d) is smaller than previous block" 144 | % (x, y) 145 | ] 146 | 147 | offset = offset_mask 148 | 149 | last_offset = offset 150 | 151 | 152 | def validate(ds, check_tiled=True, full_check=False): 153 | """Check if a file is a (Geo)TIFF with cloud optimized compatible structure. 154 | 155 | Args: 156 | ds: GDAL Dataset for the file to inspect. 157 | check_tiled: Set to False to ignore missing tiling. 158 | full_check: Set to TRUe to check tile/strip leader/trailer bytes. Might be slow on remote files 159 | 160 | Returns: 161 | A tuple, whose first element is an array of error messages 162 | (empty if there is no error), and the second element, a dictionary 163 | with the structure of the GeoTIFF file. 164 | 165 | Raises: 166 | ValidateCloudOptimizedGeoTIFFException: Unable to open the file or the 167 | file is not a Tiff. 168 | """ 169 | 170 | if int(gdal.VersionInfo("VERSION_NUM")) < 2020000: 171 | raise ValidateCloudOptimizedGeoTIFFException("GDAL 2.2 or above required") 172 | 173 | unicode_type = type("".encode("utf-8").decode("utf-8")) 174 | if isinstance(ds, (str, unicode_type)): 175 | gdal.PushErrorHandler() 176 | ds = gdal.Open(ds) 177 | gdal.PopErrorHandler() 178 | if ds is None: 179 | raise ValidateCloudOptimizedGeoTIFFException( 180 | "Invalid file : %s" % gdal.GetLastErrorMsg() 181 | ) 182 | if ds.GetDriver().ShortName != "GTiff": 183 | raise ValidateCloudOptimizedGeoTIFFException("The file is not a GeoTIFF") 184 | 185 | details = {} 186 | errors = [] 187 | warnings = [] 188 | filename = ds.GetDescription() 189 | main_band = ds.GetRasterBand(1) 190 | ovr_count = main_band.GetOverviewCount() 191 | filelist = ds.GetFileList() 192 | if filelist is not None and filename + ".ovr" in filelist: 193 | errors += ["Overviews found in external .ovr file. They should be internal"] 194 | 195 | if main_band.XSize > 512 or main_band.YSize > 512: 196 | if check_tiled: 197 | block_size = main_band.GetBlockSize() 198 | if block_size[0] == main_band.XSize and block_size[0] > 1024: 199 | errors += ["The file is greater than 512xH or Wx512, but is not tiled"] 200 | 201 | if ovr_count == 0: 202 | warnings += [ 203 | "The file is greater than 512xH or Wx512, it is recommended " 204 | "to include internal overviews" 205 | ] 206 | 207 | ifd_offset = int(main_band.GetMetadataItem("IFD_OFFSET", "TIFF")) 208 | ifd_offsets = [ifd_offset] 209 | 210 | block_order_row_major = False 211 | block_leader_size_as_uint4 = False 212 | block_trailer_last_4_bytes_repeated = False 213 | mask_interleaved_with_imagery = False 214 | 215 | if ifd_offset not in (8, 16): 216 | 217 | # Check if there is GDAL hidden structural metadata 218 | f = gdal.VSIFOpenL(filename, "rb") 219 | if not f: 220 | raise ValidateCloudOptimizedGeoTIFFException("Cannot open file") 221 | signature = struct.unpack("B" * 4, gdal.VSIFReadL(4, 1, f)) 222 | bigtiff = signature in ((0x49, 0x49, 0x2B, 0x00), (0x4D, 0x4D, 0x00, 0x2B)) 223 | if bigtiff: 224 | expected_ifd_pos = 16 225 | else: 226 | expected_ifd_pos = 8 227 | gdal.VSIFSeekL(f, expected_ifd_pos, 0) 228 | pattern = "GDAL_STRUCTURAL_METADATA_SIZE=%06d bytes\n" % 0 229 | got = gdal.VSIFReadL(len(pattern), 1, f).decode("LATIN1") 230 | if len(got) == len(pattern) and got.startswith( 231 | "GDAL_STRUCTURAL_METADATA_SIZE=" 232 | ): 233 | size = int(got[len("GDAL_STRUCTURAL_METADATA_SIZE=") :][0:6]) 234 | extra_md = gdal.VSIFReadL(size, 1, f).decode("LATIN1") 235 | block_order_row_major = "BLOCK_ORDER=ROW_MAJOR" in extra_md 236 | block_leader_size_as_uint4 = "BLOCK_LEADER=SIZE_AS_UINT4" in extra_md 237 | block_trailer_last_4_bytes_repeated = ( 238 | "BLOCK_TRAILER=LAST_4_BYTES_REPEATED" in extra_md 239 | ) 240 | mask_interleaved_with_imagery = ( 241 | "MASK_INTERLEAVED_WITH_IMAGERY=YES" in extra_md 242 | ) 243 | if "KNOWN_INCOMPATIBLE_EDITION=YES" in extra_md: 244 | errors += ["KNOWN_INCOMPATIBLE_EDITION=YES is declared in the file"] 245 | expected_ifd_pos += len(pattern) + size 246 | expected_ifd_pos += ( 247 | expected_ifd_pos % 2 248 | ) # IFD offset starts on a 2-byte boundary 249 | gdal.VSIFCloseL(f) 250 | 251 | if expected_ifd_pos != ifd_offsets[0]: 252 | errors += [ 253 | "The offset of the main IFD should be %d. It is %d instead" 254 | % (expected_ifd_pos, ifd_offsets[0]) 255 | ] 256 | 257 | details["ifd_offsets"] = {} 258 | details["ifd_offsets"]["main"] = ifd_offset 259 | 260 | for i in range(ovr_count): 261 | # Check that overviews are by descending sizes 262 | ovr_band = ds.GetRasterBand(1).GetOverview(i) 263 | if i == 0: 264 | if ovr_band.XSize > main_band.XSize or ovr_band.YSize > main_band.YSize: 265 | errors += ["First overview has larger dimension than main band"] 266 | else: 267 | prev_ovr_band = ds.GetRasterBand(1).GetOverview(i - 1) 268 | if ( 269 | ovr_band.XSize > prev_ovr_band.XSize 270 | or ovr_band.YSize > prev_ovr_band.YSize 271 | ): 272 | errors += [ 273 | "Overview of index %d has larger dimension than " 274 | "overview of index %d" % (i, i - 1) 275 | ] 276 | 277 | if check_tiled: 278 | block_size = ovr_band.GetBlockSize() 279 | if block_size[0] == ovr_band.XSize and block_size[0] > 1024: 280 | errors += ["Overview of index %d is not tiled" % i] 281 | 282 | # Check that the IFD of descending overviews are sorted by increasing 283 | # offsets 284 | ifd_offset = int(ovr_band.GetMetadataItem("IFD_OFFSET", "TIFF")) 285 | ifd_offsets.append(ifd_offset) 286 | details["ifd_offsets"]["overview_%d" % i] = ifd_offset 287 | if ifd_offsets[-1] < ifd_offsets[-2]: 288 | if i == 0: 289 | errors += [ 290 | "The offset of the IFD for overview of index %d is %d, " 291 | "whereas it should be greater than the one of the main " 292 | "image, which is at byte %d" % (i, ifd_offsets[-1], ifd_offsets[-2]) 293 | ] 294 | else: 295 | errors += [ 296 | "The offset of the IFD for overview of index %d is %d, " 297 | "whereas it should be greater than the one of index %d, " 298 | "which is at byte %d" % (i, ifd_offsets[-1], i - 1, ifd_offsets[-2]) 299 | ] 300 | 301 | # Check that the imagery starts by the smallest overview and ends with 302 | # the main resolution dataset 303 | 304 | def get_block_offset(band): 305 | blockxsize, blockysize = band.GetBlockSize() 306 | for y in range(int((band.YSize + blockysize - 1) / blockysize)): 307 | for x in range(int((band.XSize + blockxsize - 1) / blockxsize)): 308 | block_offset = band.GetMetadataItem( 309 | "BLOCK_OFFSET_%d_%d" % (x, y), "TIFF" 310 | ) 311 | if block_offset: 312 | return int(block_offset) 313 | return 0 314 | 315 | block_offset = get_block_offset(main_band) 316 | data_offsets = [block_offset] 317 | details["data_offsets"] = {} 318 | details["data_offsets"]["main"] = block_offset 319 | for i in range(ovr_count): 320 | ovr_band = ds.GetRasterBand(1).GetOverview(i) 321 | block_offset = get_block_offset(ovr_band) 322 | data_offsets.append(block_offset) 323 | details["data_offsets"]["overview_%d" % i] = block_offset 324 | 325 | if data_offsets[-1] != 0 and data_offsets[-1] < ifd_offsets[-1]: 326 | if ovr_count > 0: 327 | errors += [ 328 | "The offset of the first block of the smallest overview " 329 | "should be after its IFD" 330 | ] 331 | else: 332 | errors += [ 333 | "The offset of the first block of the image should " "be after its IFD" 334 | ] 335 | for i in range(len(data_offsets) - 2, 0, -1): 336 | if data_offsets[i] != 0 and data_offsets[i] < data_offsets[i + 1]: 337 | errors += [ 338 | "The offset of the first block of overview of index %d should " 339 | "be after the one of the overview of index %d" % (i - 1, i) 340 | ] 341 | if ( 342 | len(data_offsets) >= 2 343 | and data_offsets[0] != 0 344 | and data_offsets[0] < data_offsets[1] 345 | ): 346 | errors += [ 347 | "The offset of the first block of the main resolution image " 348 | "should be after the one of the overview of index %d" % (ovr_count - 1) 349 | ] 350 | 351 | if full_check and ( 352 | block_order_row_major 353 | or block_leader_size_as_uint4 354 | or block_trailer_last_4_bytes_repeated 355 | or mask_interleaved_with_imagery 356 | ): 357 | f = gdal.VSIFOpenL(filename, "rb") 358 | if not f: 359 | raise ValidateCloudOptimizedGeoTIFFException("Cannot open file") 360 | 361 | full_check_band( 362 | f, 363 | "Main resolution image", 364 | main_band, 365 | errors, 366 | block_order_row_major, 367 | block_leader_size_as_uint4, 368 | block_trailer_last_4_bytes_repeated, 369 | mask_interleaved_with_imagery, 370 | ) 371 | if ( 372 | main_band.GetMaskFlags() == gdal.GMF_PER_DATASET 373 | and (filename + ".msk") not in ds.GetFileList() 374 | ): 375 | full_check_band( 376 | f, 377 | "Mask band of main resolution image", 378 | main_band.GetMaskBand(), 379 | errors, 380 | block_order_row_major, 381 | block_leader_size_as_uint4, 382 | block_trailer_last_4_bytes_repeated, 383 | False, 384 | ) 385 | for i in range(ovr_count): 386 | ovr_band = ds.GetRasterBand(1).GetOverview(i) 387 | full_check_band( 388 | f, 389 | "Overview %d" % i, 390 | ovr_band, 391 | errors, 392 | block_order_row_major, 393 | block_leader_size_as_uint4, 394 | block_trailer_last_4_bytes_repeated, 395 | mask_interleaved_with_imagery, 396 | ) 397 | if ( 398 | ovr_band.GetMaskFlags() == gdal.GMF_PER_DATASET 399 | and (filename + ".msk") not in ds.GetFileList() 400 | ): 401 | full_check_band( 402 | f, 403 | "Mask band of overview %d" % i, 404 | ovr_band.GetMaskBand(), 405 | errors, 406 | block_order_row_major, 407 | block_leader_size_as_uint4, 408 | block_trailer_last_4_bytes_repeated, 409 | False, 410 | ) 411 | gdal.VSIFCloseL(f) 412 | 413 | return warnings, errors, details 414 | 415 | 416 | def main(argv=sys.argv): 417 | """Return 0 in case of success, 1 for failure.""" 418 | 419 | i = 1 420 | filename = None 421 | quiet = False 422 | full_check = None 423 | while i < len(argv): 424 | if argv[i] == "-q": 425 | quiet = True 426 | elif argv[i] == "--full-check=yes": 427 | full_check = True 428 | elif argv[i] == "--full-check=no": 429 | full_check = False 430 | elif argv[i] == "--full-check=auto": 431 | full_check = None 432 | elif argv[i][0] == "-": 433 | return Usage() 434 | elif filename is None: 435 | filename = argv[i] 436 | else: 437 | return Usage() 438 | 439 | i += 1 440 | 441 | if filename is None: 442 | return Usage() 443 | 444 | if full_check is None: 445 | full_check = filename.startswith("/vsimem/") or os.path.exists(filename) 446 | 447 | try: 448 | ret = 0 449 | warnings, errors, details = validate(filename, full_check=full_check) 450 | if warnings: 451 | if not quiet: 452 | print("The following warnings were found:") 453 | for warning in warnings: 454 | print(" - " + warning) 455 | print("") 456 | if errors: 457 | if not quiet: 458 | print("%s is NOT a valid cloud optimized GeoTIFF." % filename) 459 | print("The following errors were found:") 460 | for error in errors: 461 | print(" - " + error) 462 | print("") 463 | ret = 1 464 | else: 465 | if not quiet: 466 | print("%s is a valid cloud optimized GeoTIFF" % filename) 467 | 468 | if not quiet and not warnings and not errors: 469 | headers_size = min( 470 | details["data_offsets"][k] for k in details["data_offsets"] 471 | ) 472 | if headers_size == 0: 473 | headers_size = gdal.VSIStatL(filename).size 474 | print("\nThe size of all IFD headers is %d bytes" % headers_size) 475 | except ValidateCloudOptimizedGeoTIFFException as e: 476 | if not quiet: 477 | print("%s is NOT a valid cloud optimized GeoTIFF : %s" % (filename, str(e))) 478 | ret = 1 479 | 480 | return ret 481 | 482 | 483 | if __name__ == "__main__": 484 | sys.exit(main(sys.argv)) 485 | --------------------------------------------------------------------------------