22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Mostly derived from https://github.com/mojodna/marblecutter-tools/blob/master/aws/Dockerfile
2 |
3 | FROM lambci/lambda:build-python2.7
4 |
5 | ARG http_proxy
6 |
7 | # Install deps
8 |
9 | RUN \
10 | rpm --rebuilddb && \
11 | yum install -y \
12 | automake16 \
13 | libcurl-devel
14 |
15 | # Fetch PROJ.4
16 |
17 | RUN \
18 | curl -L http://download.osgeo.org/proj/proj-4.9.3.tar.gz | tar zxf - -C /tmp
19 |
20 | # Build and install PROJ.4
21 |
22 | WORKDIR /tmp/proj-4.9.3
23 |
24 | RUN \
25 | ./configure \
26 | --prefix=/var/task && \
27 | make -j $(nproc) && \
28 | make install
29 |
30 | # Fetch GDAL
31 |
32 | RUN \
33 | mkdir -p /tmp/gdal && \
34 | curl -L http://download.osgeo.org/gdal/2.2.1/gdal-2.2.1.tar.gz | tar zxf - -C /tmp/gdal --strip-components=1
35 |
36 | # Build + install GDAL
37 |
38 | WORKDIR /tmp/gdal
39 |
40 | RUN \
41 | ./configure \
42 | --prefix=/var/task \
43 | --datarootdir=/var/task/share/gdal \
44 | --with-jpeg=internal \
45 | --without-qhull \
46 | --without-mrf \
47 | --without-grib \
48 | --without-pcraster \
49 | --without-png \
50 | --without-gif \
51 | --without-pcidsk && \
52 | make -j $(nproc) && \
53 | cd swig/python && \
54 | make && \
55 | cd ../.. && \
56 | make install
57 |
58 | # Install Python deps in a virtualenv
59 |
60 | RUN \
61 | virtualenv /tmp/virtualenv
62 |
63 | ENV PATH /tmp/virtualenv/bin:/var/task/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
64 |
65 | WORKDIR /var/task
66 |
67 | COPY requirements.txt /var/task/requirements.txt
68 |
69 | RUN pip install -r requirements.txt
70 |
71 | # Add GDAL libs to the function zip
72 |
73 | RUN \
74 | strip lib/libgdal.so.20.3.0 && \
75 | strip lib/libproj.so.12.0.0
76 |
77 | RUN \
78 | zip --symlinks \
79 | -r /tmp/task.zip \
80 | lib/libgdal.so* \
81 | lib/libproj.so* \
82 | share/gdal/
83 |
84 | # Add Python deps to the function zip
85 |
86 | WORKDIR /tmp/virtualenv/lib/python2.7/site-packages
87 |
88 | RUN find . -name \*.so\* -exec strip {} \;
89 |
90 | RUN \
91 | zip -r /tmp/task.zip flask werkzeug jinja2 markupsafe itsdangerous.py* click requests idna chardet certifi urllib3 osgeo
92 |
93 | WORKDIR /tmp/gdal/swig/python/build/lib.linux-x86_64-2.7
94 |
95 | RUN find . -name \*.so\* -exec strip {} \;
96 |
97 | RUN zip -r /tmp/task.zip .
98 |
--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Self-test script
3 |
4 | set -e
5 |
6 | python cog_validator.py &
7 | PYTHON_PID=$!
8 | sleep 2
9 |
10 | API_VALIDATE="http://127.0.0.1:5000/api/validate"
11 |
12 | echo 'Error expected: URL missing'
13 | ret=$(curl -s "$API_VALIDATE")
14 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
15 | echo ''
16 | echo ''
17 |
18 | echo 'Error expected: invalid URL'
19 | ret=$(curl -s "$API_VALIDATE?url=http://i_dont_exist.com")
20 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
21 | echo ''
22 | echo ''
23 |
24 | echo 'Error expected: not a GeoTIFF file'
25 | ret=$(curl -s "$API_VALIDATE?url=http://www.google.com/&use_vsicurl=false")
26 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
27 | echo ''
28 | echo ''
29 |
30 | echo 'Error expected: not a valid COG file'
31 | ret=$(curl -s "$API_VALIDATE?url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif")
32 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
33 | echo ''
34 | echo ''
35 |
36 | echo 'Testing use_vsicurl=true'
37 | ret=$(curl -s "$API_VALIDATE?url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif&use_vsicurl=true")
38 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
39 | echo ''
40 | echo ''
41 |
42 | echo 'Testing a server that does not support GET range downloading'
43 | ret=$(curl -s "$API_VALIDATE?url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/quad-lzw-old-style.tif")
44 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
45 | echo ''
46 | echo ''
47 |
48 | echo 'Testing posting a GeoTIFF file'
49 | ret=$(curl -s -F file=@byte_cog_valid.tif "$API_VALIDATE")
50 | echo $ret | grep "success" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
51 | echo ''
52 | echo ''
53 |
54 | echo 'Testing posting a GeoTIFF file encoded in base64'
55 | ret=$(curl -s -d "file_b64=$(base64 byte_cog_valid.tif)&filename=byte_cog_valid.tif" "$API_VALIDATE")
56 | echo $ret | grep "success" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
57 | echo ''
58 | echo ''
59 |
60 | echo 'Testing posting invalid base64 content'
61 | ret=$(curl -s -d "file_b64=x&file=byte_cog_valid.tif" "$API_VALIDATE")
62 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
63 | echo ''
64 | echo ''
65 |
66 | echo 'Testing the POST interface with a URL'
67 | ret=$(curl -s -d "url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif" "$API_VALIDATE")
68 | echo $ret | grep "error" || (echo $ret; echo 'Test failure !'; kill -9 $PYTHON_PID; exit 1)
69 | echo ''
70 | echo ''
71 |
72 | echo 'All tests passed !'
73 | kill -9 $PYTHON_PID
74 |
--------------------------------------------------------------------------------
/lambda_main.py:
--------------------------------------------------------------------------------
1 | # From https://github.com/mojodna/marblecutter/blob/f5e16ea4ae0adcedaeb45d5fa66168dfe57b9232/functions/tiler/main.py
2 | # Original work Copyright 2016 Stamen Design
3 | # Modified work Copyright 2016-2017 Seth Fitzsimmons
4 | # Modified work Copyright 2016 American Red Cross
5 | # Modified work Copyright 2016-2017 Humanitarian OpenStreetMap Team
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its contributors
18 | # may be used to endorse or promote products derived from this software without
19 | # specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
32 | # noqa
33 | # coding=utf-8
34 |
35 | import logging
36 | import os
37 |
38 | import awsgi
39 | from cog_validator import app
40 |
41 |
42 | # reset the Lambda logger
43 | root = logging.getLogger()
44 | if root.handlers:
45 | for handler in root.handlers:
46 | root.removeHandler(handler)
47 |
48 | logging.basicConfig(level=logging.INFO)
49 |
50 |
51 | def handle(event, context): # noqa
52 | if 'headers' in event and isinstance(event['headers'], dict):
53 |
54 | # Cloudfront isn't configured to pass Host headers, so the provided Host
55 | # header is the API Gateway hostname
56 | if 'SERVER_NAME' in os.environ:
57 | event['headers']['Host'] = os.environ['SERVER_NAME']
58 | # Cloudfront drops X-Forwarded-Proto, so the value provided is from API
59 | # Gateway
60 | event['headers']['X-Forwarded-Proto'] = 'https'
61 |
62 | return awsgi.response(app, event, context)
63 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Part of this repository are licensed under:
2 |
3 | Original work Copyright 2016 Stamen Design
4 | Modified work Copyright 2016-2017 Seth Fitzsimmons
5 | Modified work Copyright 2016 American Red Cross
6 | Modified work Copyright 2016-2017 Humanitarian OpenStreetMap Team
7 |
8 | Redistribution and use in source and binary forms, with or without
9 | modification, are permitted provided that the following conditions are met:
10 |
11 | 1. Redistributions of source code must retain the above copyright notice, this
12 | list of conditions and the following disclaimer.
13 |
14 | 2. Redistributions in binary form must reproduce the above copyright notice,
15 | this list of conditions and the following disclaimer in the documentation
16 | and/or other materials provided with the distribution.
17 |
18 | 3. Neither the name of the copyright holder nor the names of its contributors
19 | may be used to endorse or promote products derived from this software without
20 | specific prior written permission.
21 |
22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
23 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
26 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 |
33 |
34 |
35 | Other parts:
36 |
37 |
38 | Copyright (c) 2017, Even Rouault
39 | Copyright (c) 2017, Planet Labs
40 |
41 | Permission is hereby granted, free of charge, to any person obtaining a
42 | copy of this software and associated documentation files (the "Software"),
43 | to deal in the Software without restriction, including without limitation
44 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
45 | and/or sell copies of the Software, and to permit persons to whom the
46 | Software is furnished to do so, subject to the following conditions:
47 |
48 | The above copyright notice and this permission notice shall be included
49 | in all copies or substantial portions of the Software.
50 |
51 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
52 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
53 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
54 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
55 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
56 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
57 | DEALINGS IN THE SOFTWARE.
58 |
--------------------------------------------------------------------------------
/templates/main.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Cloud optimized GeoTIFF validator
5 |
6 |
7 |
8 |
Cloud optimized GeoTIFF validator
9 |
10 |
This service enables you to check that a GeoTIFF file follows the
11 | specification of
12 | Cloud optimized GeoTIFF
13 |
14 |
Files should generally be specified by URLs since it will enable the service
15 | to check that the hosting server can honour HTTP Range GET requests, and this
16 | will speed-up the checking process itself since only IFDs will be downloaded.
17 |
18 |
In the case no public URL is available, the file content can be posted to
19 | the service, with a 4 MB limit when the service is hosted as a AWS Lambda
20 | function.
21 |
22 |
27 |
28 |
29 |
30 |
70 |
71 |
72 |
You can also use the API at {{ root_url }}/api/validate that will return the validation result as a JSon document
73 |
To submit a file by URL: curl -s "{{ root_url }}/api/validate?url=http://path/to/my.tif"
74 |
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/awsgi.py:
--------------------------------------------------------------------------------
1 | # https://github.com/slank/awsgi
2 | #
3 | # MIT License
4 | #
5 | # Copyright (c) 2016 Matthew Wedgwood
6 | #
7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
8 | # of this software and associated documentation files (the "Software"), to deal
9 | # in the Software without restriction, including without limitation the rights
10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | # copies of the Software, and to permit persons to whom the Software is
12 | # furnished to do so, subject to the following conditions:
13 | #
14 | # The above copyright notice and this permission notice shall be included in all
15 | # copies or substantial portions of the Software.
16 | #
17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | # SOFTWARE.
24 |
25 | from io import StringIO
26 | import sys
27 | try:
28 | # Python 3
29 | from urllib.parse import urlencode
30 |
31 | # Convert bytes to str, if required
32 | def convert_str(s):
33 | return s.decode('utf-8') if isinstance(s, bytes) else s
34 | except:
35 | # Python 2
36 | from urllib import urlencode
37 |
38 | # No conversion required
39 | def convert_str(s):
40 | return s
41 |
42 |
43 | def response(app, event, context):
44 | sr = StartResponse()
45 | output = app(environ(event, context), sr)
46 | return sr.response(output)
47 |
48 |
49 | class StartResponse:
50 | def __init__(self):
51 | self.status = 500
52 | self.headers = []
53 | self.body = StringIO()
54 |
55 | def __call__(self, status, headers, exc_info=None):
56 | self.status = status.split()[0]
57 | self.headers[:] = headers
58 | return self.body.write
59 |
60 | def response(self, output):
61 | return {
62 | 'statusCode': str(self.status),
63 | 'headers': dict(self.headers),
64 | 'body': self.body.getvalue() + ''.join(map(convert_str, output)),
65 | }
66 |
67 |
68 | def environ(event, context):
69 | environ = {
70 | 'REQUEST_METHOD': event['httpMethod'],
71 | 'SCRIPT_NAME': '',
72 | 'PATH_INFO': event['path'],
73 | 'QUERY_STRING': urlencode(event['queryStringParameters'] or {}),
74 | 'REMOTE_ADDR': '127.0.0.1',
75 | 'CONTENT_LENGTH': str(len(event.get('body', '') or '')),
76 | 'HTTP': 'on',
77 | 'SERVER_PROTOCOL': 'HTTP/1.1',
78 | 'wsgi.version': (1, 0),
79 | 'wsgi.input': StringIO(event.get('body')),
80 | 'wsgi.errors': sys.stderr,
81 | 'wsgi.multithread': False,
82 | 'wsgi.multiprocess': False,
83 | 'wsgi.run_once': False,
84 | }
85 | if 'requestContext' in event:
86 | if 'stage' in event['requestContext']:
87 | environ['AWS_API_GATEWAY_STAGE'] = event['requestContext']['stage']
88 |
89 | headers = event.get('headers', {})
90 | for k, v in headers.items():
91 | k = k.upper().replace('-', '_')
92 |
93 | if k == 'CONTENT_TYPE':
94 | environ['CONTENT_TYPE'] = v
95 | elif k == 'HOST':
96 | environ['SERVER_NAME'] = v
97 | elif k == 'X_FORWARDED_FOR':
98 | environ['REMOTE_ADDR'] = v.split(', ')[0]
99 | elif k == 'X_FORWARDED_PROTO':
100 | environ['wsgi.url_scheme'] = v
101 | elif k == 'X_FORWARDED_PORT':
102 | environ['SERVER_PORT'] = v
103 |
104 | environ['HTTP_' + k] = v
105 |
106 | return environ
107 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Cloud Optimized GeoTIFF validator
2 |
3 | This is a standalone (Python / Flask) service that allows users to submit
4 | GeoTIFF files (preferably by URL) and check their compliance with the
5 | Cloud Optimized GeoTIFF (COG) specification:
6 | https://trac.osgeo.org/gdal/wiki/CloudOptimizedGeoTIFF
7 |
8 | This utility is also compatible of being deployed as a AWS Lambda function,
9 | through the AWS API Gateway.
10 |
11 | ## API endpoint: /api/validate
12 |
13 | GET request, with the following query parameters :
14 | * url (required): URL to the GeoTIFF file
15 | * use_vsicurl=true/false (optional, defaults to true): if true, the file is read using the GDAL /vsicurl/ subsystem (using HTTP GET range requests). If false, the file is locally downloaded in its entirety before being validated (note: when the service run as a AWS Lambda function, only up to 500 MB can be downloaded)
16 |
17 | For example: /api/validate?url=http://path/to/my.tif
18 |
19 | POST request, with a form encoded with multipart/form-data
20 | * file: file content as multipart attachment
21 |
22 | POST request, with a form encoded with application/x-www-form-urlencoded
23 | * url (exclusive with file): URL to the GeoTIFF file
24 | * use_vsicurl=true/false (defaults to true). See above
25 | * filename (optional, recommended): file name
26 | * file_b64: file content as a Base64 encoded string
27 |
28 | This later interface is mostly needed to overcome a current limitation of the AWS API Gateway interface that does not accept multipart/form-data
29 |
30 | For all the above interfaces, the query will return a JSON document with the following keys:
31 | * status (required): 'success' or 'failure'
32 | * error (optional): error message. present when the request is invalid, or the file cannot be read
33 | * validation_errors (optional): array of errors. Only present if the file is a GeoTIFF file but does not comply with the COG requirements
34 | * gdal_info (optional): dictionary with the output of "gdalinfo -json". Only present if the file is a GeoTIFF file
35 | * details (optional): dictionary with file offsets of IFDs and first data block of each IFD. Only present if the file is a GeoTIFF file
36 |
37 | ## HTML endpoint: /html
38 |
39 | The service expose a basic HTML page for users to submit their GeoTIFF files
40 | and display the result of the validation
41 |
42 | ## AWS Lambda / API Gateway
43 |
44 | The service can be deployed as a AWS Lamba function, accessible through the AWS API Gateway.
45 |
46 | Running "make" will generate a cog_validator.zip that contains the Python code of this service, the Python dependencies as well as a GDAL 2.2 build. This requires Docker to be available, to generate the cog_validator_deps.zip (which contains the Python dependencies as well as a GDAL 2.2 build)
47 |
48 | Assuming you have a AWS account with initial setup, follow the following steps to deploy the service:
49 |
50 | - Role creation
51 |
52 | * Go to the AWS IAM management console
53 | * Click on "Roles"
54 | * Click on "Create new role"
55 | * Click on the Select button of "AWS Lambda"
56 | * In the Filter enter "AWSLambdaBasicExecutionRole" and check the corresponding checkbox
57 | * Click on "Next Step"
58 | * Enter "lambda_basic_execution" as role name
59 | * Click on "Create role"
60 |
61 | - Lambda function creation
62 |
63 | * Go to the AWS Lambda management console
64 | * "Create function"
65 | * In "Select Blueprint" step, select "Author from scratch"
66 | * Skip Add Trigger with "Next"
67 | * Give a name to the function, for example "cog_validator"
68 | * Select "Python 2.7" as Runtime
69 | * Select "Upload a .ZIP file" as "Code entry type"
70 | * In "Function package", click on Upload an select the generated cog_validator.zip
71 | * Enter "lambda_main.handle" in "Handler"
72 | * In "Existing role", select "lambda_basic_execution"
73 | * Click on Next, and Creation function to proceed on file uploading and lambda function creation
74 | * Edit the Configuration / Advanced settings, to increase the timeout to 5 minutes and the memory to 512 MB, and Save
75 | * To test everything works, in Actions dropdown list, choose "Configure test event" and enter the following payload.
76 | ```
77 | {
78 | "headers": { "Host": "foo" },
79 | "httpMethod": "GET",
80 | "queryStringParameters": { "url": "http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif" },
81 | "path": "/api/validate"
82 | }
83 | ```
84 |
85 | - API Gateway deployment
86 |
87 | * Go to the AWS API Gateway management console
88 | * In APIs tab, click on "Create API"
89 | * Enter "cog_validator" as API name
90 | * Click on "Create API"
91 | * In Resources tab, in Actions dropdown list, select "Create Resource"
92 | * Check the "Configure as Proxy resource" checkbox and click on "Create Resource"
93 | * In the "/{proxy+} - ANY - Setup" form that is now displayed, keep the "Lambda Function Proxy" integration type
94 | * Select the appropriate Lambda region (the one in which you created the Lambda function in the above steps)
95 | * In "Lambda Function" entry, type "cog_validator"
96 | * Click on "Save" and confirm that you add permission to the API Gateway to invoke your Lambda function
97 | * To test everything works, click on the TEST icon
98 | * A new form is displayed. Select GET as method
99 | * In Path entry, enter "/api/validate"
100 | * In "Query strings" entry, enter "url=http://svn.osgeo.org/gdal/trunk/autotest/gcore/data/byte.tif"
101 | * In "Headers" entry, enter "Host: foo"
102 | * Click on Test. A JSON document should be displayed (with validation errors)
103 | * In Resources tab, in Actions dropdown list, select "Deploy API"
104 | * In Deployment stage, select "New stage"
105 | * Enter "prod" as stage name
106 | * Click on Deploy
107 | * A new form is displayed with an invoke URL like https://some_value_here.execute-api.eu-central-1.amazonaws.com/prod
108 | * Copy-paste it in your browser and add "/html" at the end. A HTML page "Cloud optimized GeoTIFF validator" should now be displayed !
109 |
110 | ## Development
111 |
112 | GDAL 2.2 with its Python (2.7) bindings must be installed, as well as the Python
113 | flask and requests modules.
114 |
115 | A basic self test is available with the ./test.sh script
116 |
117 | ## Credits
118 |
119 | The following resources have served as inspiration for AWS Lamba and API Gateway deployment
120 | * https://medium.com/@mojodna/slimming-down-lambda-deployment-zips-b3f6083a1dff
121 | * https://github.com/mojodna/marblecutter-tools
122 | * http://www.perrygeo.com/running-python-with-compiled-code-on-aws-lambda.html
123 |
--------------------------------------------------------------------------------
/cog_validator.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | #
4 | # Copyright (c) 2017, Planet Labs
5 | #
6 | # Permission is hereby granted, free of charge, to any person obtaining a
7 | # copy of this software and associated documentation files (the "Software"),
8 | # to deal in the Software without restriction, including without limitation
9 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 | # and/or sell copies of the Software, and to permit persons to whom the
11 | # Software is furnished to do so, subject to the following conditions:
12 | #
13 | # The above copyright notice and this permission notice shall be included
14 | # in all copies or substantial portions of the Software.
15 | #
16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 | # DEALINGS IN THE SOFTWARE.
23 | # *****************************************************************************
24 |
25 | import json
26 | import os
27 | from flask import Flask, request as flask_request, render_template
28 | from werkzeug.exceptions import RequestEntityTooLarge
29 | import requests
30 | from osgeo import gdal
31 | import validate_cloud_optimized_geotiff
32 |
33 | app = Flask(__name__)
34 | # http://docs.aws.amazon.com/lambda/latest/dg/limits.html
35 | app.config['MAX_CONTENT_LENGTH'] = 6 * 1024 * 1024
36 |
37 | tmpfilename = '/tmp/cog_validator_tmp.tif'
38 |
39 | @app.errorhandler(413)
40 | def handle_RequestEntityTooLarge(e):
41 | return json.dumps({'status': 'failure', 'error': 'Maximum accepted attachment size is %d' % app.config['MAX_CONTENT_LENGTH']}), \
42 | 413, { "Content-Type": "application/json" }
43 |
44 | def validate(args):
45 | if 'url' not in args:
46 | return json.dumps({'status': 'failure', 'error': 'url missing'}), 400, \
47 | { "Content-Type": "application/json" }
48 |
49 | remove_tmpfile = False
50 | url = args.get('url')
51 | if 'local_filename' in args:
52 | ds = gdal.OpenEx(args['local_filename'], allowed_drivers = ['GTiff'])
53 | else:
54 |
55 | use_vsicurl = args.get('use_vsicurl', 'true')
56 | if use_vsicurl.lower() not in ('true', 'false'):
57 | return json.dumps({'status': 'failure', 'error': 'invalid value for use_vsicurl option. Expected true or false'}), 400, { "Content-Type": "application/json" }
58 | use_vsicurl = use_vsicurl.lower() == 'true'
59 |
60 | gdal.SetConfigOption('GDAL_DISABLE_READDIR_ON_OPEN', 'EMPTY_DIR')
61 | if use_vsicurl:
62 | ds = gdal.OpenEx('/vsicurl/' + url, allowed_drivers = ['GTiff'])
63 | if ds is None:
64 | f = gdal.VSIFOpenL('/vsicurl/' + url, 'rb')
65 | if f is None:
66 | return json.dumps({'status': 'failure', 'error': 'Cannot download %s' % url}), 400, { "Content-Type": "application/json" }
67 | data = gdal.VSIFReadL(1,1,f)
68 | gdal.VSIFCloseL(f)
69 | if len(data) == 0:
70 | error_msg = 'Cannot download %s' % url
71 | gdal_error_msg = gdal.GetLastErrorMsg()
72 | if gdal_error_msg == '':
73 | gdal_error_msg = gdal.VSIGetLastErrorMsg()
74 | if gdal_error_msg != '':
75 | error_msg += ': '+ gdal_error_msg
76 | return json.dumps({'status': 'failure', 'error': error_msg}), 400, { "Content-Type": "application/json" }
77 | else:
78 | try:
79 | r = requests.get(url)
80 | except Exception, e:
81 | return json.dumps({'status': 'failure', 'error': 'Cannot download %s' % url}), 400, { "Content-Type": "application/json" }
82 |
83 | remove_tmpfile = True
84 | f = open(tmpfilename, 'wb')
85 | f.write(r.content)
86 | f.close()
87 | ds = gdal.OpenEx(tmpfilename, allowed_drivers = ['GTiff'])
88 |
89 | if ds is None:
90 | return json.dumps({'status': 'failure', 'error': '%s is not a GTiff file' % url}), 400, { "Content-Type": "application/json" }
91 | errors, details = validate_cloud_optimized_geotiff.validate(ds)
92 | info = gdal.Info(ds, format = 'json')
93 | if 'local_filename' in args or remove_tmpfile:
94 | del info['files']
95 | info['description'] = url
96 | ds = None
97 | if remove_tmpfile:
98 | os.unlink(tmpfilename)
99 |
100 | if len(errors) == 0:
101 | return json.dumps({'status': 'success', 'gdal_info' : info, 'details': details}), 200, { "Content-Type": "application/json" }
102 | else:
103 | return json.dumps({'status': 'failure', 'gdal_info' : info, 'details': details, 'validation_errors': errors}), 400, { "Content-Type": "application/json" }
104 |
105 |
106 | @app.route('/api/validate', methods=['GET', 'POST'])
107 | def api_validate():
108 | if flask_request.method == 'POST':
109 | if flask_request.form != {}:
110 | if 'url' in flask_request.form and flask_request.form['url'] != '':
111 | args = {}
112 | for k in flask_request.form:
113 | if k != 'local_filename':
114 | args[k] = flask_request.form[k]
115 | return validate(args)
116 |
117 | if 'filename' in flask_request.form:
118 | url = flask_request.form['filename']
119 | else:
120 | url = 'unknown_file_name'
121 |
122 | if 'file_b64' not in flask_request.form:
123 | return json.dumps({'status': 'failure', 'error': 'Missing "file_b64" field in POSTed form data'}), 400, { "Content-Type": "application/json" }
124 |
125 | import base64
126 | b64 = flask_request.form['file_b64']
127 | # Need to add padding to avoid sometimes a 'invalid padding exception'
128 | b64 += '=='
129 |
130 | # FileReader::readAsDataURL() prefixes the base64 content with other stuff
131 | base64_marker = b64.find(';base64,')
132 | if base64_marker >= 0:
133 | b64 = b64[base64_marker + len(';base64,'):]
134 |
135 | try:
136 | decoded = base64.b64decode(b64)
137 | except Exception as e:
138 | return json.dumps({'status': 'failure', 'error': 'Invalid content for file_b64: %s' % str(e)}), 400, { "Content-Type": "application/json" }
139 |
140 | open(tmpfilename, 'wb').write(decoded)
141 | else:
142 | if 'file' not in flask_request.files:
143 | return json.dumps({'status': 'failure', 'error': 'Missing "file" field in POSTed form data'}), 400, { "Content-Type": "application/json" }
144 | f = flask_request.files['file']
145 | if f.filename == '':
146 | return json.dumps({'status': 'failure', 'error': 'Missing "file" field in POSTed form data'}), 400, { "Content-Type": "application/json" }
147 | f.save(tmpfilename)
148 | url = f.filename
149 |
150 | try:
151 | return validate({'local_filename': tmpfilename, 'url': url})
152 | finally:
153 | os.unlink(tmpfilename)
154 |
155 | else:
156 | args = {}
157 | for k in flask_request.args:
158 | if k != 'local_filename':
159 | args[k] = flask_request.args[k]
160 | return validate(args)
161 |
162 | @app.route('/self_test/valid.tif', methods=['GET'])
163 | def self_test_valid():
164 | return open(os.path.join(os.path.dirname(__file__), 'byte_cog_valid.tif'), 'rb').read(), 200, { "Content-Type": "image/tiff" }
165 |
166 | @app.route('/self_test/invalid.tif', methods=['GET'])
167 | def self_test_invalid():
168 | return open(os.path.join(os.path.dirname(__file__), 'byte.tif'), 'rb').read(), 200, { "Content-Type": "image/tiff" }
169 |
170 | @app.route('/html', methods=['GET'])
171 | def html():
172 | root_url = flask_request.url_root[0:-1]
173 | if 'AWS_API_GATEWAY_STAGE' in flask_request.environ:
174 | root_url += '/' + flask_request.environ['AWS_API_GATEWAY_STAGE']
175 | return render_template('main.html', root_url = root_url)
176 |
177 | @app.route('/html/validate', methods=['POST'])
178 | def html_validate():
179 | root_url = flask_request.url_root[0:-1]
180 | if 'AWS_API_GATEWAY_STAGE' in flask_request.environ:
181 | root_url += '/' + flask_request.environ['AWS_API_GATEWAY_STAGE']
182 | ret, _, _ = api_validate()
183 | ret = json.loads(ret)
184 | errors = None
185 |
186 | if 'url' in flask_request.form and flask_request.form['url'] != '':
187 | name = flask_request.form['url']
188 | elif 'filename' in flask_request.form and flask_request.form['filename'] != '':
189 | name = flask_request.form['filename']
190 | else:
191 | name = 'This'
192 |
193 | if 'status' in ret and ret['status'] == 'success':
194 | global_result = 'Validation succeeded ! %s is a valid Cloud Optimized GeoTIFF.' % name
195 | else:
196 | global_result = 'Validation failed ! %s is NOT a valid Cloud Optimized GeoTIFF.' % name
197 | if 'error' in ret:
198 | errors = [ ret['error'] ]
199 | elif 'validation_errors' in ret:
200 | errors = ret['validation_errors']
201 | return render_template('result.html', root_url = root_url, global_result = global_result, errors = errors)
202 |
203 | # We only need this for local development.
204 | env = os.environ
205 | DEBUG = env.get('DEBUG', 'False')
206 |
207 | if __name__ == '__main__':
208 | app.run(debug=DEBUG=="True")
209 |
--------------------------------------------------------------------------------
/validate_cloud_optimized_geotiff.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # *****************************************************************************
4 | # $Id$
5 | #
6 | # Project: GDAL
7 | # Purpose: Validate Cloud Optimized GeoTIFF file structure
8 | # Author: Even Rouault,
9 | #
10 | # *****************************************************************************
11 | # Copyright (c) 2017, Even Rouault
12 | #
13 | # Permission is hereby granted, free of charge, to any person obtaining a
14 | # copy of this software and associated documentation files (the "Software"),
15 | # to deal in the Software without restriction, including without limitation
16 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
17 | # and/or sell copies of the Software, and to permit persons to whom the
18 | # Software is furnished to do so, subject to the following conditions:
19 | #
20 | # The above copyright notice and this permission notice shall be included
21 | # in all copies or substantial portions of the Software.
22 | #
23 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
26 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29 | # DEALINGS IN THE SOFTWARE.
30 | # *****************************************************************************
31 |
32 | import os.path
33 | import struct
34 | import sys
35 |
36 | from osgeo import gdal
37 |
38 |
39 | def Usage():
40 | print(
41 | "Usage: validate_cloud_optimized_geotiff.py [-q] [--full-check=yes/no/auto] test.tif"
42 | )
43 | print("")
44 | print("Options:")
45 | print("-q: quiet mode")
46 | print(
47 | "--full-check=yes/no/auto: check tile/strip leader/trailer bytes. auto=yes for local files, and no for remote files"
48 | )
49 | return 2
50 |
51 |
52 | class ValidateCloudOptimizedGeoTIFFException(Exception):
53 | pass
54 |
55 |
56 | def full_check_band(
57 | f,
58 | band_name,
59 | band,
60 | errors,
61 | block_order_row_major,
62 | block_leader_size_as_uint4,
63 | block_trailer_last_4_bytes_repeated,
64 | mask_interleaved_with_imagery,
65 | ):
66 |
67 | block_size = band.GetBlockSize()
68 | mask_band = None
69 | if mask_interleaved_with_imagery:
70 | mask_band = band.GetMaskBand()
71 | mask_block_size = mask_band.GetBlockSize()
72 | if block_size != mask_block_size:
73 | errors += [
74 | band_name + ": mask block size is different from its imagery band"
75 | ]
76 | mask_band = None
77 |
78 | yblocks = (band.YSize + block_size[1] - 1) // block_size[1]
79 | xblocks = (band.XSize + block_size[0] - 1) // block_size[0]
80 | last_offset = 0
81 | for y in range(yblocks):
82 | for x in range(xblocks):
83 |
84 | offset = band.GetMetadataItem("BLOCK_OFFSET_%d_%d" % (x, y), "TIFF")
85 | offset = int(offset) if offset is not None else 0
86 | bytecount = band.GetMetadataItem("BLOCK_SIZE_%d_%d" % (x, y), "TIFF")
87 | bytecount = int(bytecount) if bytecount is not None else 0
88 |
89 | if offset > 0:
90 | if block_order_row_major and offset < last_offset:
91 | errors += [
92 | band_name
93 | + ": offset of block (%d, %d) is smaller than previous block"
94 | % (x, y)
95 | ]
96 |
97 | if block_leader_size_as_uint4:
98 | gdal.VSIFSeekL(f, offset - 4, 0)
99 | leader_size = struct.unpack("= 4:
109 | gdal.VSIFSeekL(f, offset + bytecount - 4, 0)
110 | last_bytes = gdal.VSIFReadL(8, 1, f)
111 | if last_bytes[0:4] != last_bytes[4:8]:
112 | errors += [
113 | band_name
114 | + ": for block (%d, %d), trailer bytes are invalid"
115 | % (x, y)
116 | ]
117 |
118 | if mask_band:
119 | offset_mask = mask_band.GetMetadataItem(
120 | "BLOCK_OFFSET_%d_%d" % (x, y), "TIFF"
121 | )
122 | offset_mask = int(offset_mask) if offset_mask is not None else 0
123 | if offset > 0 and offset_mask > 0:
124 | # bytecount_mask = int(mask_band.GetMetadataItem('BLOCK_SIZE_%d_%d' % (x,y), 'TIFF'))
125 | expected_offset_mask = (
126 | offset
127 | + bytecount
128 | + (4 if block_leader_size_as_uint4 else 0)
129 | + (4 if block_trailer_last_4_bytes_repeated else 0)
130 | )
131 | if offset_mask != expected_offset_mask:
132 | errors += [
133 | "Mask of "
134 | + band_name
135 | + ": for block (%d, %d), offset is %d, whereas %d was expected"
136 | % (x, y, offset_mask, expected_offset_mask)
137 | ]
138 | elif offset == 0 and offset_mask > 0:
139 | if block_order_row_major and offset_mask < last_offset:
140 | errors += [
141 | "Mask of "
142 | + band_name
143 | + ": offset of block (%d, %d) is smaller than previous block"
144 | % (x, y)
145 | ]
146 |
147 | offset = offset_mask
148 |
149 | last_offset = offset
150 |
151 |
152 | def validate(ds, check_tiled=True, full_check=False):
153 | """Check if a file is a (Geo)TIFF with cloud optimized compatible structure.
154 |
155 | Args:
156 | ds: GDAL Dataset for the file to inspect.
157 | check_tiled: Set to False to ignore missing tiling.
158 | full_check: Set to TRUe to check tile/strip leader/trailer bytes. Might be slow on remote files
159 |
160 | Returns:
161 | A tuple, whose first element is an array of error messages
162 | (empty if there is no error), and the second element, a dictionary
163 | with the structure of the GeoTIFF file.
164 |
165 | Raises:
166 | ValidateCloudOptimizedGeoTIFFException: Unable to open the file or the
167 | file is not a Tiff.
168 | """
169 |
170 | if int(gdal.VersionInfo("VERSION_NUM")) < 2020000:
171 | raise ValidateCloudOptimizedGeoTIFFException("GDAL 2.2 or above required")
172 |
173 | unicode_type = type("".encode("utf-8").decode("utf-8"))
174 | if isinstance(ds, (str, unicode_type)):
175 | gdal.PushErrorHandler()
176 | ds = gdal.Open(ds)
177 | gdal.PopErrorHandler()
178 | if ds is None:
179 | raise ValidateCloudOptimizedGeoTIFFException(
180 | "Invalid file : %s" % gdal.GetLastErrorMsg()
181 | )
182 | if ds.GetDriver().ShortName != "GTiff":
183 | raise ValidateCloudOptimizedGeoTIFFException("The file is not a GeoTIFF")
184 |
185 | details = {}
186 | errors = []
187 | warnings = []
188 | filename = ds.GetDescription()
189 | main_band = ds.GetRasterBand(1)
190 | ovr_count = main_band.GetOverviewCount()
191 | filelist = ds.GetFileList()
192 | if filelist is not None and filename + ".ovr" in filelist:
193 | errors += ["Overviews found in external .ovr file. They should be internal"]
194 |
195 | if main_band.XSize > 512 or main_band.YSize > 512:
196 | if check_tiled:
197 | block_size = main_band.GetBlockSize()
198 | if block_size[0] == main_band.XSize and block_size[0] > 1024:
199 | errors += ["The file is greater than 512xH or Wx512, but is not tiled"]
200 |
201 | if ovr_count == 0:
202 | warnings += [
203 | "The file is greater than 512xH or Wx512, it is recommended "
204 | "to include internal overviews"
205 | ]
206 |
207 | ifd_offset = int(main_band.GetMetadataItem("IFD_OFFSET", "TIFF"))
208 | ifd_offsets = [ifd_offset]
209 |
210 | block_order_row_major = False
211 | block_leader_size_as_uint4 = False
212 | block_trailer_last_4_bytes_repeated = False
213 | mask_interleaved_with_imagery = False
214 |
215 | if ifd_offset not in (8, 16):
216 |
217 | # Check if there is GDAL hidden structural metadata
218 | f = gdal.VSIFOpenL(filename, "rb")
219 | if not f:
220 | raise ValidateCloudOptimizedGeoTIFFException("Cannot open file")
221 | signature = struct.unpack("B" * 4, gdal.VSIFReadL(4, 1, f))
222 | bigtiff = signature in ((0x49, 0x49, 0x2B, 0x00), (0x4D, 0x4D, 0x00, 0x2B))
223 | if bigtiff:
224 | expected_ifd_pos = 16
225 | else:
226 | expected_ifd_pos = 8
227 | gdal.VSIFSeekL(f, expected_ifd_pos, 0)
228 | pattern = "GDAL_STRUCTURAL_METADATA_SIZE=%06d bytes\n" % 0
229 | got = gdal.VSIFReadL(len(pattern), 1, f).decode("LATIN1")
230 | if len(got) == len(pattern) and got.startswith(
231 | "GDAL_STRUCTURAL_METADATA_SIZE="
232 | ):
233 | size = int(got[len("GDAL_STRUCTURAL_METADATA_SIZE=") :][0:6])
234 | extra_md = gdal.VSIFReadL(size, 1, f).decode("LATIN1")
235 | block_order_row_major = "BLOCK_ORDER=ROW_MAJOR" in extra_md
236 | block_leader_size_as_uint4 = "BLOCK_LEADER=SIZE_AS_UINT4" in extra_md
237 | block_trailer_last_4_bytes_repeated = (
238 | "BLOCK_TRAILER=LAST_4_BYTES_REPEATED" in extra_md
239 | )
240 | mask_interleaved_with_imagery = (
241 | "MASK_INTERLEAVED_WITH_IMAGERY=YES" in extra_md
242 | )
243 | if "KNOWN_INCOMPATIBLE_EDITION=YES" in extra_md:
244 | errors += ["KNOWN_INCOMPATIBLE_EDITION=YES is declared in the file"]
245 | expected_ifd_pos += len(pattern) + size
246 | expected_ifd_pos += (
247 | expected_ifd_pos % 2
248 | ) # IFD offset starts on a 2-byte boundary
249 | gdal.VSIFCloseL(f)
250 |
251 | if expected_ifd_pos != ifd_offsets[0]:
252 | errors += [
253 | "The offset of the main IFD should be %d. It is %d instead"
254 | % (expected_ifd_pos, ifd_offsets[0])
255 | ]
256 |
257 | details["ifd_offsets"] = {}
258 | details["ifd_offsets"]["main"] = ifd_offset
259 |
260 | for i in range(ovr_count):
261 | # Check that overviews are by descending sizes
262 | ovr_band = ds.GetRasterBand(1).GetOverview(i)
263 | if i == 0:
264 | if ovr_band.XSize > main_band.XSize or ovr_band.YSize > main_band.YSize:
265 | errors += ["First overview has larger dimension than main band"]
266 | else:
267 | prev_ovr_band = ds.GetRasterBand(1).GetOverview(i - 1)
268 | if (
269 | ovr_band.XSize > prev_ovr_band.XSize
270 | or ovr_band.YSize > prev_ovr_band.YSize
271 | ):
272 | errors += [
273 | "Overview of index %d has larger dimension than "
274 | "overview of index %d" % (i, i - 1)
275 | ]
276 |
277 | if check_tiled:
278 | block_size = ovr_band.GetBlockSize()
279 | if block_size[0] == ovr_band.XSize and block_size[0] > 1024:
280 | errors += ["Overview of index %d is not tiled" % i]
281 |
282 | # Check that the IFD of descending overviews are sorted by increasing
283 | # offsets
284 | ifd_offset = int(ovr_band.GetMetadataItem("IFD_OFFSET", "TIFF"))
285 | ifd_offsets.append(ifd_offset)
286 | details["ifd_offsets"]["overview_%d" % i] = ifd_offset
287 | if ifd_offsets[-1] < ifd_offsets[-2]:
288 | if i == 0:
289 | errors += [
290 | "The offset of the IFD for overview of index %d is %d, "
291 | "whereas it should be greater than the one of the main "
292 | "image, which is at byte %d" % (i, ifd_offsets[-1], ifd_offsets[-2])
293 | ]
294 | else:
295 | errors += [
296 | "The offset of the IFD for overview of index %d is %d, "
297 | "whereas it should be greater than the one of index %d, "
298 | "which is at byte %d" % (i, ifd_offsets[-1], i - 1, ifd_offsets[-2])
299 | ]
300 |
301 | # Check that the imagery starts by the smallest overview and ends with
302 | # the main resolution dataset
303 |
304 | def get_block_offset(band):
305 | blockxsize, blockysize = band.GetBlockSize()
306 | for y in range(int((band.YSize + blockysize - 1) / blockysize)):
307 | for x in range(int((band.XSize + blockxsize - 1) / blockxsize)):
308 | block_offset = band.GetMetadataItem(
309 | "BLOCK_OFFSET_%d_%d" % (x, y), "TIFF"
310 | )
311 | if block_offset:
312 | return int(block_offset)
313 | return 0
314 |
315 | block_offset = get_block_offset(main_band)
316 | data_offsets = [block_offset]
317 | details["data_offsets"] = {}
318 | details["data_offsets"]["main"] = block_offset
319 | for i in range(ovr_count):
320 | ovr_band = ds.GetRasterBand(1).GetOverview(i)
321 | block_offset = get_block_offset(ovr_band)
322 | data_offsets.append(block_offset)
323 | details["data_offsets"]["overview_%d" % i] = block_offset
324 |
325 | if data_offsets[-1] != 0 and data_offsets[-1] < ifd_offsets[-1]:
326 | if ovr_count > 0:
327 | errors += [
328 | "The offset of the first block of the smallest overview "
329 | "should be after its IFD"
330 | ]
331 | else:
332 | errors += [
333 | "The offset of the first block of the image should " "be after its IFD"
334 | ]
335 | for i in range(len(data_offsets) - 2, 0, -1):
336 | if data_offsets[i] != 0 and data_offsets[i] < data_offsets[i + 1]:
337 | errors += [
338 | "The offset of the first block of overview of index %d should "
339 | "be after the one of the overview of index %d" % (i - 1, i)
340 | ]
341 | if (
342 | len(data_offsets) >= 2
343 | and data_offsets[0] != 0
344 | and data_offsets[0] < data_offsets[1]
345 | ):
346 | errors += [
347 | "The offset of the first block of the main resolution image "
348 | "should be after the one of the overview of index %d" % (ovr_count - 1)
349 | ]
350 |
351 | if full_check and (
352 | block_order_row_major
353 | or block_leader_size_as_uint4
354 | or block_trailer_last_4_bytes_repeated
355 | or mask_interleaved_with_imagery
356 | ):
357 | f = gdal.VSIFOpenL(filename, "rb")
358 | if not f:
359 | raise ValidateCloudOptimizedGeoTIFFException("Cannot open file")
360 |
361 | full_check_band(
362 | f,
363 | "Main resolution image",
364 | main_band,
365 | errors,
366 | block_order_row_major,
367 | block_leader_size_as_uint4,
368 | block_trailer_last_4_bytes_repeated,
369 | mask_interleaved_with_imagery,
370 | )
371 | if (
372 | main_band.GetMaskFlags() == gdal.GMF_PER_DATASET
373 | and (filename + ".msk") not in ds.GetFileList()
374 | ):
375 | full_check_band(
376 | f,
377 | "Mask band of main resolution image",
378 | main_band.GetMaskBand(),
379 | errors,
380 | block_order_row_major,
381 | block_leader_size_as_uint4,
382 | block_trailer_last_4_bytes_repeated,
383 | False,
384 | )
385 | for i in range(ovr_count):
386 | ovr_band = ds.GetRasterBand(1).GetOverview(i)
387 | full_check_band(
388 | f,
389 | "Overview %d" % i,
390 | ovr_band,
391 | errors,
392 | block_order_row_major,
393 | block_leader_size_as_uint4,
394 | block_trailer_last_4_bytes_repeated,
395 | mask_interleaved_with_imagery,
396 | )
397 | if (
398 | ovr_band.GetMaskFlags() == gdal.GMF_PER_DATASET
399 | and (filename + ".msk") not in ds.GetFileList()
400 | ):
401 | full_check_band(
402 | f,
403 | "Mask band of overview %d" % i,
404 | ovr_band.GetMaskBand(),
405 | errors,
406 | block_order_row_major,
407 | block_leader_size_as_uint4,
408 | block_trailer_last_4_bytes_repeated,
409 | False,
410 | )
411 | gdal.VSIFCloseL(f)
412 |
413 | return warnings, errors, details
414 |
415 |
416 | def main(argv=sys.argv):
417 | """Return 0 in case of success, 1 for failure."""
418 |
419 | i = 1
420 | filename = None
421 | quiet = False
422 | full_check = None
423 | while i < len(argv):
424 | if argv[i] == "-q":
425 | quiet = True
426 | elif argv[i] == "--full-check=yes":
427 | full_check = True
428 | elif argv[i] == "--full-check=no":
429 | full_check = False
430 | elif argv[i] == "--full-check=auto":
431 | full_check = None
432 | elif argv[i][0] == "-":
433 | return Usage()
434 | elif filename is None:
435 | filename = argv[i]
436 | else:
437 | return Usage()
438 |
439 | i += 1
440 |
441 | if filename is None:
442 | return Usage()
443 |
444 | if full_check is None:
445 | full_check = filename.startswith("/vsimem/") or os.path.exists(filename)
446 |
447 | try:
448 | ret = 0
449 | warnings, errors, details = validate(filename, full_check=full_check)
450 | if warnings:
451 | if not quiet:
452 | print("The following warnings were found:")
453 | for warning in warnings:
454 | print(" - " + warning)
455 | print("")
456 | if errors:
457 | if not quiet:
458 | print("%s is NOT a valid cloud optimized GeoTIFF." % filename)
459 | print("The following errors were found:")
460 | for error in errors:
461 | print(" - " + error)
462 | print("")
463 | ret = 1
464 | else:
465 | if not quiet:
466 | print("%s is a valid cloud optimized GeoTIFF" % filename)
467 |
468 | if not quiet and not warnings and not errors:
469 | headers_size = min(
470 | details["data_offsets"][k] for k in details["data_offsets"]
471 | )
472 | if headers_size == 0:
473 | headers_size = gdal.VSIStatL(filename).size
474 | print("\nThe size of all IFD headers is %d bytes" % headers_size)
475 | except ValidateCloudOptimizedGeoTIFFException as e:
476 | if not quiet:
477 | print("%s is NOT a valid cloud optimized GeoTIFF : %s" % (filename, str(e)))
478 | ret = 1
479 |
480 | return ret
481 |
482 |
483 | if __name__ == "__main__":
484 | sys.exit(main(sys.argv))
485 |
--------------------------------------------------------------------------------