├── .gitignore ├── nbs └── bluetopo │ ├── __init__.py │ ├── cli │ └── cli.py │ └── core │ ├── fetch_tiles.py │ └── build_vrt.py ├── .github └── workflows │ └── pypi.yaml ├── pyproject.toml ├── README.md └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | *.pyc 3 | *.egg-info -------------------------------------------------------------------------------- /nbs/bluetopo/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.build_vrt import main as build_vrt 2 | from .core.fetch_tiles import main as fetch_tiles 3 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yaml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | deploy: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: Set up Python 17 | uses: actions/setup-python@v3 18 | with: 19 | python-version: "3.x" 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip 23 | pip install build 24 | - name: Build package 25 | run: python -m build 26 | - name: Publish package 27 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 28 | with: 29 | user: __token__ 30 | password: ${{ secrets.PYPI_API_TOKEN }} 31 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "BlueTopo" 7 | version = "0.7.0" 8 | authors = [ 9 | { name = "Glen Rice", email = "ocs.nbs@noaa.gov" }, 10 | { name = "Tashi Geleg", email = "ocs.nbs@noaa.gov" }, 11 | ] 12 | description = "National Bathymetric Source Project BlueTopo" 13 | readme = "README.md" 14 | license = { file = "LICENSE" } 15 | keywords = ["BlueTopo", "National Bathymetric Source", "Bathymetry"] 16 | dependencies = ["numpy", "boto3", "tqdm"] 17 | 18 | [project.scripts] 19 | fetch_tiles = "nbs.bluetopo.cli.cli:fetch_tiles_command" 20 | build_vrt = "nbs.bluetopo.cli.cli:build_vrt_command" 21 | 22 | [project.urls] 23 | homepage = "https://www.nauticalcharts.noaa.gov/data/bluetopo.html" 24 | source = "https://github.com/noaa-ocs-hydrography/BlueTopo" -------------------------------------------------------------------------------- /nbs/bluetopo/cli/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from argparse import ArgumentParser 3 | 4 | from nbs.bluetopo.core.build_vrt import main as build_vrt 5 | from nbs.bluetopo.core.fetch_tiles import main as fetch_tiles 6 | 7 | 8 | def str_to_bool(relative_to_vrt): 9 | if isinstance(relative_to_vrt, bool): 10 | return relative_to_vrt 11 | if relative_to_vrt.lower() in ("yes", "true", "t", "y", "1"): 12 | return True 13 | elif relative_to_vrt.lower() in ("no", "false", "f", "n", "0"): 14 | return False 15 | else: 16 | raise argparse.ArgumentTypeError("Boolean value expected.") 17 | 18 | 19 | def build_vrt_command(): 20 | """ 21 | console_scripts entry point for build_vrt cli command 22 | 23 | """ 24 | parser = ArgumentParser() 25 | parser.add_argument( 26 | "-d", 27 | "--dir", 28 | "--directory", 29 | help="The directory path to use. " "Will create if it does not currently exist. Required argument.", 30 | type=str, 31 | nargs="?", 32 | dest="dir", 33 | required=True, 34 | ) 35 | parser.add_argument( 36 | "-s", 37 | "--source", 38 | help=("The NBS offers various products to different end-users. " "Some are available publicly. Use this argument to identify " "the data source. BlueTopo is the default."), 39 | default="bluetopo", 40 | dest="source", 41 | nargs="?", 42 | ) 43 | parser.add_argument( 44 | "-r", 45 | "--rel", 46 | "--relative_to_vrt", 47 | help=("This bool argument will determine whether files referenced in the VRT " "are relative or absolute. The default value is true setting all paths " "inside the VRT to relative."), 48 | nargs="?", 49 | dest="relative_to_vrt", 50 | default="true", 51 | const=True, 52 | type=str_to_bool, 53 | ) 54 | args = parser.parse_args() 55 | build_vrt( 56 | project_dir=args.dir, 57 | data_source=args.source, 58 | relative_to_vrt=args.relative_to_vrt, 59 | ) 60 | 61 | 62 | def fetch_tiles_command(): 63 | """ 64 | console_scripts entry point for fetch_tiles cli command 65 | 66 | """ 67 | parser = ArgumentParser() 68 | parser.add_argument( 69 | "-d", 70 | "--dir", 71 | "--directory", 72 | help="The directory path to use. " "Will create if it does not currently exist. Required argument.", 73 | type=str, 74 | nargs="?", 75 | dest="dir", 76 | required=True, 77 | ) 78 | parser.add_argument( 79 | "-g", 80 | "--geom", 81 | "--geometry", 82 | help=("The geometry file to use to find intersecting available tiles. " "The returned tile ids at the time of intersection will be added to " "tracking. fetch_tiles will stay up to date with the latest data " "available from the NBS for all tracked tiles. This argument is " "not necessary if you do not want to add new tile ids to tracking."), 83 | type=str, 84 | dest="geom", 85 | nargs="?", 86 | ) 87 | parser.add_argument( 88 | "-s", 89 | "--source", 90 | help=("The NBS offers various products to different end-users. " "Some are available publicly. Use this argument to identify " "the data source. BlueTopo is the default."), 91 | default="bluetopo", 92 | dest="source", 93 | nargs="?", 94 | ) 95 | parser.add_argument( 96 | "-u", 97 | "--untrack", 98 | help=("This flag will untrack tiles that have missing files in your local " "download directory. fetch_tiles will no longer retrieve these tiles."), 99 | dest="untrack", 100 | action="store_true", 101 | ) 102 | args = parser.parse_args() 103 | fetch_tiles( 104 | project_dir=args.dir, 105 | desired_area_filename=args.geom, 106 | untrack_missing=args.untrack, 107 | data_source=args.source, 108 | ) 109 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![alt text](https://www.nauticalcharts.noaa.gov/data/images/bluetopo/logo.png)](https://www.nauticalcharts.noaa.gov/data/bluetopo.html) 2 | 3 | --- 4 | 5 |

6 | Background • 7 | Requirements • 8 | Installation • 9 | Quickstart • 10 | CLI • 11 | Notes • 12 | Contact 13 |

14 | 15 | ## Overview 16 | 17 | This package simplifies getting BlueTopo data in your area of interest. 18 | 19 | ## Background 20 | 21 | [BlueTopo](https://www.nauticalcharts.noaa.gov/data/bluetopo.html) is a compilation of the best available public bathymetric data of U.S. waters. 22 | 23 | Created by [NOAA Office of Coast Survey's](https://www.nauticalcharts.noaa.gov/) National Bathymetric Source project, [BlueTopo data](https://www.nauticalcharts.noaa.gov/data/bluetopo_specs.html) intends to provide depth information nationwide with the vertical uncertainty tied to that depth estimate as well as information on the survey source that it originated from. 24 | 25 | This data is presented in a multiband high resolution GeoTIFF with an associated raster attribute table. 26 | 27 | For answers to frequently asked questions, visit the [FAQ](https://www.nauticalcharts.noaa.gov/data/bluetopo_faq.html). 28 | 29 | ## Requirements 30 | 31 | This codebase is written for Python 3 and relies on the following python 32 | packages: 33 | 34 | - gdal / ogr 35 | - numpy 36 | - boto3 37 | - tqdm 38 | 39 | ## Installation 40 | 41 | Install conda (If you have not already): [conda installation](https://docs.conda.io/projects/conda/en/latest/user-guide/install/) 42 | 43 | In the command line, create an environment with the required packages: 44 | 45 | ``` 46 | conda create -n bluetopo_env -c conda-forge 'gdal>=3.4' 47 | ``` 48 | 49 | ``` 50 | conda activate bluetopo_env 51 | ``` 52 | 53 | ``` 54 | pip install bluetopo 55 | ``` 56 | 57 | ## Quickstart 58 | 59 | To download the desired files, first create a geometry file (such as a geopackage) with a polygon depicting the area of interest. Then run the following commands inside of a Python shell: 60 | 61 | ```python 62 | from nbs.bluetopo import fetch_tiles 63 | ``` 64 | 65 | ```python 66 | fetch_tiles(r'C:\download_path', 'area_of_interest.gpkg') 67 | ``` 68 | 69 | To build a GDAL VRT of the downloaded tiles: 70 | 71 | ```python 72 | from nbs.bluetopo import build_vrt 73 | ``` 74 | 75 | ```python 76 | build_vrt(r'C:\download_path') 77 | ``` 78 | 79 | ## CLI 80 | 81 | You can also use the command line. Confirm the environment we created during installation is activated. 82 | 83 | To fetch the latest BlueTopo data, use `fetch_tiles` passing a directory path and a geometry file path with a polygon depicting your area of interest: 84 | 85 | ``` 86 | fetch_tiles -d [DIRECTORY PATH] -g [GEOMETRY FILE PATH] 87 | ``` 88 | 89 | Pass the same directory path to `build_vrt` to create a VRT from the fetched data: 90 | 91 | ``` 92 | build_vrt -d [DIRECTORY PATH] 93 | ``` 94 | 95 | Use `-h` for help and to see additional arguments. 96 | 97 | For most usecases, reusing the commands above to stay up to date in your area of interest is adequate. 98 | 99 | ## Notes 100 | 101 | In addition to BlueTopo, modeling data is available. You can work with modeling data using the `source` argument in the CLI commands or the `data_source` argument if you're calling the function directly. 102 | 103 | The primary difference between BlueTopo and modeling data is the vertical datum. Modeling data is on a low water datum. 104 | 105 | ## Authors 106 | 107 | - Glen Rice (NOAA), 108 | 109 | - Tashi Geleg (Lynker / NOAA), 110 | 111 | ## License 112 | 113 | This work, as a whole, falls under Creative Commons Zero (see 114 | [LICENSE](LICENSE)). 115 | 116 | ## Disclaimer 117 | 118 | This repository is a scientific product and is not official 119 | communication of the National Oceanic and Atmospheric Administration, or 120 | the United States Department of Commerce. All NOAA GitHub project code 121 | is provided on an 'as is' basis and the user assumes responsibility for 122 | its use. Any claims against the Department of Commerce or Department of 123 | Commerce bureaus stemming from the use of this GitHub project will be 124 | governed by all applicable Federal law. Any reference to specific 125 | commercial products, processes, or services by service mark, trademark, 126 | manufacturer, or otherwise, does not constitute or imply their 127 | endorsement, recommendation or favoring by the Department of Commerce. 128 | The Department of Commerce seal and logo, or the seal and logo of a DOC 129 | bureau, shall not be used in any manner to imply endorsement of any 130 | commercial product or activity by DOC or the United States Government. 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | CC0 1.0 Universal 2 | 3 | Statement of Purpose 4 | 5 | The laws of most jurisdictions throughout the world automatically confer 6 | exclusive Copyright and Related Rights (defined below) upon the creator and 7 | subsequent owner(s) (each and all, an "owner") of an original work of 8 | authorship and/or a database (each, a "Work"). 9 | 10 | Certain owners wish to permanently relinquish those rights to a Work for the 11 | purpose of contributing to a commons of creative, cultural and scientific 12 | works ("Commons") that the public can reliably and without fear of later 13 | claims of infringement build upon, modify, incorporate in other works, reuse 14 | and redistribute as freely as possible in any form whatsoever and for any 15 | purposes, including without limitation commercial purposes. These owners may 16 | contribute to the Commons to promote the ideal of a free culture and the 17 | further production of creative, cultural and scientific works, or to gain 18 | reputation or greater distribution for their Work in part through the use and 19 | efforts of others. 20 | 21 | For these and/or other purposes and motivations, and without any expectation 22 | of additional consideration or compensation, the person associating CC0 with a 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work 25 | and publicly distribute the Work under its terms, with knowledge of his or her 26 | Copyright and Related Rights in the Work and the meaning and intended legal 27 | effect of CC0 on those rights. 28 | 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be 30 | protected by copyright and related or neighboring rights ("Copyright and 31 | Related Rights"). Copyright and Related Rights include, but are not limited 32 | to, the following: 33 | 34 | i. the right to reproduce, adapt, distribute, perform, display, communicate, 35 | and translate a Work; 36 | 37 | ii. moral rights retained by the original author(s) and/or performer(s); 38 | 39 | iii. publicity and privacy rights pertaining to a person's image or likeness 40 | depicted in a Work; 41 | 42 | iv. rights protecting against unfair competition in regards to a Work, 43 | subject to the limitations in paragraph 4(a), below; 44 | 45 | v. rights protecting the extraction, dissemination, use and reuse of data in 46 | a Work; 47 | 48 | vi. database rights (such as those arising under Directive 96/9/EC of the 49 | European Parliament and of the Council of 11 March 1996 on the legal 50 | protection of databases, and under any national implementation thereof, 51 | including any amended or successor version of such directive); and 52 | 53 | vii. other similar, equivalent or corresponding rights throughout the world 54 | based on applicable law or treaty, and any national implementations thereof. 55 | 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of, 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright 59 | and Related Rights and associated claims and causes of action, whether now 60 | known or unknown (including existing as well as future claims and causes of 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum 62 | duration provided by applicable law or treaty (including future time 63 | extensions), (iii) in any current or future medium and for any number of 64 | copies, and (iv) for any purpose whatsoever, including without limitation 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes 66 | the Waiver for the benefit of each member of the public at large and to the 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver 68 | shall not be subject to revocation, rescission, cancellation, termination, or 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work 70 | by the public as contemplated by Affirmer's express Statement of Purpose. 71 | 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be 73 | judged legally invalid or ineffective under applicable law, then the Waiver 74 | shall be preserved to the maximum extent permitted taking into account 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver 76 | is so judged Affirmer hereby grants to each affected person a royalty-free, 77 | non transferable, non sublicensable, non exclusive, irrevocable and 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration 80 | provided by applicable law or treaty (including future time extensions), (iii) 81 | in any current or future medium and for any number of copies, and (iv) for any 82 | purpose whatsoever, including without limitation commercial, advertising or 83 | promotional purposes (the "License"). The License shall be deemed effective as 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the 85 | License for any reason be judged legally invalid or ineffective under 86 | applicable law, such partial invalidity or ineffectiveness shall not 87 | invalidate the remainder of the License, and in such case Affirmer hereby 88 | affirms that he or she will not (i) exercise any of his or her remaining 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims 90 | and causes of action with respect to the Work, in either case contrary to 91 | Affirmer's express Statement of Purpose. 92 | 93 | 4. Limitations and Disclaimers. 94 | 95 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 96 | surrendered, licensed or otherwise affected by this document. 97 | 98 | b. Affirmer offers the Work as-is and makes no representations or warranties 99 | of any kind concerning the Work, express, implied, statutory or otherwise, 100 | including without limitation warranties of title, merchantability, fitness 101 | for a particular purpose, non infringement, or the absence of latent or 102 | other defects, accuracy, or the present or absence of errors, whether or not 103 | discoverable, all to the greatest extent permissible under applicable law. 104 | 105 | c. Affirmer disclaims responsibility for clearing rights of other persons 106 | that may apply to the Work or any use thereof, including without limitation 107 | any person's Copyright and Related Rights in the Work. Further, Affirmer 108 | disclaims responsibility for obtaining any necessary consents, permissions 109 | or other rights required for any use of the Work. 110 | 111 | d. Affirmer understands and acknowledges that Creative Commons is not a 112 | party to this document and has no duty or obligation with respect to this 113 | CC0 or use of the Work. 114 | 115 | For more information, please see 116 | 117 | -------------------------------------------------------------------------------- /nbs/bluetopo/core/fetch_tiles.py: -------------------------------------------------------------------------------- 1 | """ 2 | fetch_tiles.py 3 | 4 | 0.0.1 20220614 5 | 6 | glen.rice@noaa.gov 20220614 7 | 8 | An example script for downloading BlueTopo (and Modeling) datasets from AWS. 9 | 10 | """ 11 | 12 | import concurrent.futures 13 | import datetime 14 | import hashlib 15 | import os 16 | import platform 17 | import random 18 | import shutil 19 | import sqlite3 20 | import sys 21 | 22 | import boto3 23 | import numpy as np 24 | from botocore import UNSIGNED 25 | from botocore.client import Config 26 | from osgeo import gdal, ogr, osr 27 | from tqdm import tqdm 28 | 29 | from nbs.bluetopo.core.build_vrt import connect_to_survey_registry, connect_to_survey_registry_pmn1, connect_to_survey_registry_pmn2 30 | 31 | debug_info = f""" 32 | Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro} 33 | GDAL {gdal.VersionInfo()} 34 | SQLite {sqlite3.sqlite_version} 35 | Date {datetime.datetime.now()} 36 | """ 37 | 38 | def adapt_datetime_iso(val): 39 | """Adapt datetime.datetime to timezone-naive ISO 8601 date.""" 40 | return val.isoformat() 41 | 42 | sqlite3.register_adapter(datetime.datetime, adapt_datetime_iso) 43 | 44 | def convert_datetime(val): 45 | """Convert ISO 8601 datetime to datetime.datetime object.""" 46 | return datetime.datetime.fromisoformat(val) 47 | 48 | sqlite3.register_converter("datetime", convert_datetime) 49 | 50 | # refactor duplicate functions 51 | 52 | def get_tessellation_pmn( 53 | conn: sqlite3.Connection, 54 | project_dir: str, 55 | prefix: str, 56 | data_source: str, 57 | bucket: str = "noaa-ocs-nationalbathymetry-pds", 58 | ) -> str: 59 | """ 60 | Download the tessellation scheme geopackage from AWS. 61 | 62 | Parameters 63 | ---------- 64 | conn : sqlite3.Connection 65 | database connection object. 66 | project_dir : str 67 | destination directory for project. 68 | prefix : str 69 | the prefix for the geopackage on AWS to find the file. 70 | data_source : str 71 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 72 | bucket : str 73 | AWS bucket for the National Bathymetric Source project. 74 | 75 | Returns 76 | ------- 77 | destination_name : str 78 | the downloaded file path string. 79 | """ 80 | cursor = conn.cursor() 81 | cursor.execute("SELECT * FROM catalog WHERE file = 'Tessellation'") 82 | for tilescheme in [dict(row) for row in cursor.fetchall()]: 83 | try: 84 | os.remove(os.path.join(project_dir, tilescheme["location"])) 85 | except (OSError, PermissionError): 86 | continue 87 | if data_source not in ["BlueTopo", "Modeling", "BAG", "S102V21", "S102V22"]: 88 | gpkg_files = os.listdir(prefix) 89 | gpkg_files = [file for file in gpkg_files if file.endswith(".gpkg") and "Tile_Scheme" in file] 90 | if len(gpkg_files) == 0: 91 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No geometry found in {prefix}") 92 | return None 93 | gpkg_files.sort(reverse=True) 94 | filename = gpkg_files[0] 95 | if len(gpkg_files) > 1: 96 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one geometry found in {prefix}, using {gpkg_files[0]}") 97 | destination_name = os.path.join(project_dir, data_source, f"Tessellation", gpkg_files[0]) 98 | if not os.path.exists(os.path.dirname(destination_name)): 99 | os.makedirs(os.path.dirname(destination_name)) 100 | try: 101 | shutil.copy(os.path.join(prefix, gpkg_files[0]), destination_name) 102 | relative = os.path.join(data_source, f"Tessellation", gpkg_files[0]) 103 | except: 104 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download tile scheme " "possibly due to conflict with an open existing file. " "Please close all files and attempt again") 105 | sys.exit(1) 106 | else: 107 | cred = { 108 | "aws_access_key_id": "", 109 | "aws_secret_access_key": "", 110 | "config": Config(signature_version=UNSIGNED), 111 | } 112 | client = boto3.client("s3", **cred) 113 | pageinator = client.get_paginator("list_objects_v2") 114 | objs = pageinator.paginate(Bucket=bucket, Prefix=prefix).build_full_result() 115 | if "Contents" not in objs: 116 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No geometry found in {prefix}") 117 | return None 118 | tileschemes = objs["Contents"] 119 | tileschemes.sort(key=lambda x: x["LastModified"], reverse=True) 120 | source_name = tileschemes[0]["Key"] 121 | filename = os.path.basename(source_name) 122 | relative = os.path.join(data_source, f"Tessellation", filename) 123 | if len(tileschemes) > 1: 124 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one geometry found in {prefix}, using {filename}") 125 | destination_name = os.path.join(project_dir, relative) 126 | if not os.path.exists(os.path.dirname(destination_name)): 127 | os.makedirs(os.path.dirname(destination_name)) 128 | try: 129 | client.download_file(bucket, source_name, destination_name) 130 | except (OSError, PermissionError) as e: 131 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download tile scheme " "possibly due to conflict with an open existing file. " "Please close all files and attempt again") 132 | sys.exit(1) 133 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Downloaded {filename}") 134 | cursor.execute( 135 | """REPLACE INTO catalog(file, location, downloaded) 136 | VALUES(?, ?, ?)""", 137 | ("Tessellation", relative, datetime.datetime.now()), 138 | ) 139 | conn.commit() 140 | return destination_name 141 | 142 | 143 | def get_tessellation( 144 | conn: sqlite3.Connection, 145 | project_dir: str, 146 | prefix: str, 147 | data_source: str, 148 | bucket: str = "noaa-ocs-nationalbathymetry-pds", 149 | ) -> str: 150 | """ 151 | Download the tessellation scheme geopackage from AWS. 152 | 153 | Parameters 154 | ---------- 155 | conn : sqlite3.Connection 156 | database connection object. 157 | project_dir : str 158 | destination directory for project. 159 | prefix : str 160 | the prefix for the geopackage on AWS to find the file. 161 | data_source : str 162 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 163 | bucket : str 164 | AWS bucket for the National Bathymetric Source project. 165 | 166 | Returns 167 | ------- 168 | destination_name : str 169 | the downloaded file path string. 170 | """ 171 | cursor = conn.cursor() 172 | cursor.execute("SELECT * FROM tileset WHERE tilescheme = 'Tessellation'") 173 | for tilescheme in [dict(row) for row in cursor.fetchall()]: 174 | try: 175 | os.remove(os.path.join(project_dir, tilescheme["location"])) 176 | except (OSError, PermissionError): 177 | continue 178 | if data_source not in ["BlueTopo", "Modeling", "BAG", "S102V21", "S102V22"]: 179 | gpkg_files = os.listdir(prefix) 180 | gpkg_files = [file for file in gpkg_files if file.endswith(".gpkg") and "Tile_Scheme" in file] 181 | if len(gpkg_files) == 0: 182 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No geometry found in {prefix}") 183 | return None 184 | gpkg_files.sort(reverse=True) 185 | filename = gpkg_files[0] 186 | if len(gpkg_files) > 1: 187 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one geometry found in {prefix}, using {gpkg_files[0]}") 188 | destination_name = os.path.join(project_dir, data_source, f"Tessellation", gpkg_files[0]) 189 | if not os.path.exists(os.path.dirname(destination_name)): 190 | os.makedirs(os.path.dirname(destination_name)) 191 | try: 192 | shutil.copy(os.path.join(prefix, gpkg_files[0]), destination_name) 193 | relative = os.path.join(data_source, f"Tessellation", gpkg_files[0]) 194 | except: 195 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download tile scheme " "possibly due to conflict with an open existing file. " "Please close all files and attempt again") 196 | sys.exit(1) 197 | else: 198 | cred = { 199 | "aws_access_key_id": "", 200 | "aws_secret_access_key": "", 201 | "config": Config(signature_version=UNSIGNED), 202 | } 203 | client = boto3.client("s3", **cred) 204 | pageinator = client.get_paginator("list_objects_v2") 205 | objs = pageinator.paginate(Bucket=bucket, Prefix=prefix).build_full_result() 206 | if "Contents" not in objs: 207 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No geometry found in {prefix}") 208 | return None 209 | tileschemes = objs["Contents"] 210 | tileschemes.sort(key=lambda x: x["LastModified"], reverse=True) 211 | source_name = tileschemes[0]["Key"] 212 | filename = os.path.basename(source_name) 213 | relative = os.path.join(data_source, f"Tessellation", filename) 214 | if len(tileschemes) > 1: 215 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one geometry found in {prefix}, using {filename}") 216 | destination_name = os.path.join(project_dir, relative) 217 | if not os.path.exists(os.path.dirname(destination_name)): 218 | os.makedirs(os.path.dirname(destination_name)) 219 | try: 220 | client.download_file(bucket, source_name, destination_name) 221 | except (OSError, PermissionError) as e: 222 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download tile scheme " "possibly due to conflict with an open existing file. " "Please close all files and attempt again") 223 | sys.exit(1) 224 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Downloaded {filename}") 225 | cursor.execute( 226 | """REPLACE INTO tileset(tilescheme, location, downloaded) 227 | VALUES(?, ?, ?)""", 228 | ("Tessellation", relative, datetime.datetime.now()), 229 | ) 230 | conn.commit() 231 | return destination_name 232 | 233 | # refactor later 234 | def get_xml( 235 | conn: sqlite3.Connection, 236 | project_dir: str, 237 | prefix: str, 238 | data_source: str, 239 | bucket: str = "noaa-ocs-nationalbathymetry-pds", 240 | ) -> str: 241 | """ 242 | Download XML from AWS. 243 | 244 | Parameters 245 | ---------- 246 | conn : sqlite3.Connection 247 | database connection object. 248 | project_dir : str 249 | destination directory for project. 250 | prefix : str 251 | the prefix for the XML on AWS to find the file. 252 | data_source : str 253 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 254 | bucket : str 255 | AWS bucket for the National Bathymetric Source project. 256 | 257 | Returns 258 | ------- 259 | destination_name : str 260 | the downloaded file path string. 261 | """ 262 | cursor = conn.cursor() 263 | cursor.execute("SELECT * FROM catalog WHERE file = 'XML'") 264 | for tilescheme in [dict(row) for row in cursor.fetchall()]: 265 | try: 266 | if os.path.isfile(os.path.join(project_dir, tilescheme["location"])): 267 | os.remove(os.path.join(project_dir, tilescheme["location"])) 268 | except (OSError, PermissionError): 269 | continue 270 | if data_source in ["S102V21", "S102V22"]: 271 | cred = { 272 | "aws_access_key_id": "", 273 | "aws_secret_access_key": "", 274 | "config": Config(signature_version=UNSIGNED), 275 | } 276 | client = boto3.client("s3", **cred) 277 | pageinator = client.get_paginator("list_objects_v2") 278 | objs = pageinator.paginate(Bucket=bucket, Prefix=prefix).build_full_result() 279 | if "Contents" not in objs: 280 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No XML found in {prefix}") 281 | return None 282 | tileschemes = objs["Contents"] 283 | tileschemes.sort(key=lambda x: x["LastModified"], reverse=True) 284 | source_name = tileschemes[0]["Key"] 285 | filename = os.path.basename(source_name) 286 | relative = os.path.join(data_source, f"Data", filename) 287 | if len(tileschemes) > 1: 288 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one XML found in {prefix}, using {filename}") 289 | destination_name = os.path.join(project_dir, relative) 290 | filename_renamed = 'CATALOG.XML' 291 | relative_renamed = os.path.join(data_source, f"Data", filename_renamed) 292 | destination_name_renamed = os.path.join(project_dir, relative_renamed) 293 | if not os.path.exists(os.path.dirname(destination_name)): 294 | os.makedirs(os.path.dirname(destination_name)) 295 | try: 296 | client.download_file(bucket, source_name, destination_name) 297 | except (OSError, PermissionError) as e: 298 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download XML " "possibly due to conflict with an open existing file. " "Please close all files and attempt again") 299 | sys.exit(1) 300 | try: 301 | os.replace(destination_name, destination_name_renamed) 302 | except (OSError, PermissionError) as e: 303 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to rename XML to CATALOG.xml." "possibly due to conflict with an open existing file named CATALOG.XML. " "Please close all files and attempt again") 304 | sys.exit(1) 305 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Downloaded {filename_renamed}") 306 | cursor.execute( 307 | """REPLACE INTO catalog(file, location, downloaded) 308 | VALUES(?, ?, ?)""", 309 | ("XML", relative, datetime.datetime.now()), 310 | ) 311 | conn.commit() 312 | return destination_name_renamed 313 | 314 | 315 | def download_tiles_pmn( 316 | conn: sqlite3.Connection, 317 | project_dir: str, 318 | tile_prefix: str, 319 | data_source: str, 320 | bucket: str = "noaa-ocs-nationalbathymetry-pds", 321 | ) -> [[str], [str], [str]]: 322 | """ 323 | Download tiles' files (geotiff and aux per tile). 324 | 325 | Parameters 326 | ---------- 327 | conn : sqlite3.Connection 328 | database connection object. 329 | project_dir : str 330 | destination directory for project. 331 | tile_prefix : str 332 | s3 prefix for tiles. 333 | data_source : str 334 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 335 | bucket : str 336 | AWS bucket for the National Bathymetric Source project. 337 | 338 | Returns 339 | ------- 340 | existing_tiles : list 341 | tiles already existing locally. 342 | tiles_found : list 343 | tiles found in s3 bucket. 344 | tiles_not_found : list 345 | tiles not found in s3 bucket. 346 | """ 347 | download_tile_list = all_db_tiles(conn) 348 | # better tqdm download time estimate? 349 | random.shuffle(download_tile_list) 350 | new_tile_list = [download_tile for download_tile in download_tile_list if download_tile["file_disk"] is None] 351 | print("\nResolving fetch list...") 352 | if tile_prefix != "Local": 353 | cred = { 354 | "aws_access_key_id": "", 355 | "aws_secret_access_key": "", 356 | "config": Config(signature_version=UNSIGNED), 357 | } 358 | client = boto3.client("s3", **cred) 359 | pageinator = client.get_paginator("list_objects_v2") 360 | existing_tiles = [] 361 | missing_tiles = [] 362 | tiles_found = [] 363 | tiles_not_found = [] 364 | download_dict = {} 365 | for fields in download_tile_list: 366 | if fields["file_disk"]: 367 | if os.path.isfile(os.path.join(project_dir, fields["file_disk"])): 368 | if fields["file_verified"] != "True": 369 | missing_tiles.append(fields["tilename"]) 370 | else: 371 | existing_tiles.append(fields["tilename"]) 372 | continue 373 | if os.path.isfile(os.path.join(project_dir, fields["file_disk"])) is False: 374 | missing_tiles.append(fields["tilename"]) 375 | if 'Navigation_Test_and_Evaluation' in tile_prefix: 376 | tilename = fields["tilename"] 377 | if fields["file_link"] and fields["file_link"] != "None": 378 | found = False 379 | for obj in client.list_objects(Bucket='noaa-ocs-nationalbathymetry-pds', Prefix=fields['file_link'].split('amazonaws.com/')[1])['Contents']: 380 | if os.path.basename(fields["file_link"])[7:13] in obj['Key']: 381 | download_dict[tilename] = { 382 | "tile": tilename, 383 | "bucket": bucket, 384 | "client": client, 385 | "subregion": fields["subregion"], 386 | "utm": fields["utm"], 387 | } 388 | source_name = obj["Key"] 389 | download_dict[tilename]["file"] = source_name 390 | download_dict[tilename]["file_disk"] = os.path.join(data_source, "Data", os.path.basename(fields["file_link"])) 391 | download_dict[tilename]["file_dest"] = os.path.join(project_dir, download_dict[tilename]["file_disk"]) 392 | download_dict[tilename]["file_verified"] = fields["file_verified"] 393 | download_dict[tilename]["file_sha256_checksum"] = fields["file_sha256_checksum"] 394 | if not os.path.exists(os.path.dirname(download_dict[tilename]["file_dest"])): 395 | os.makedirs(os.path.dirname(download_dict[tilename]["file_dest"])) 396 | found = True 397 | tiles_found.append(tilename) 398 | break 399 | if found is False: 400 | tiles_not_found.append(tilename) 401 | else: 402 | raise ValueError(f"Invalid tile prefix: {tile_prefix}") 403 | 404 | def pull(downloads: dict) -> dict: 405 | """ 406 | Download files and verify hash. 407 | 408 | Parameters 409 | ---------- 410 | downloads : dict 411 | dict holding necessary values to execute download and checksum verification. 412 | 413 | Returns 414 | ------- 415 | dict 416 | result of download attempt. 417 | """ 418 | try: 419 | downloads["client"].download_file(downloads["bucket"], downloads["file"], downloads["file_dest"]) 420 | if os.path.isfile(downloads["file_dest"]) is False: 421 | return {"Tile": downloads["tile"], "Result": False, "Reason": "missing download"} 422 | file_hash = hashlib.sha256(open(downloads["file_dest"], "rb").read()).hexdigest() 423 | if downloads["file_sha256_checksum"] != file_hash: 424 | return {"Tile": downloads["tile"], "Result": False, "Reason": "incorrect hash"} 425 | except Exception as e: 426 | return {"Tile": downloads["tile"], "Result": False, "Reason": "exception"} 427 | return {"Tile": downloads["tile"], "Result": True, "Reason": "success"} 428 | 429 | print(f"{len(new_tile_list)} tile(s) with new data") 430 | print(f"{len(missing_tiles)} tile(s) already downloaded are missing locally") 431 | download_length = len(download_dict.keys()) 432 | results = [] 433 | if download_length: 434 | print(f"\nFetching {download_length} tiles") 435 | with tqdm( 436 | total=download_length, 437 | bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} Tiles {elapsed}, {remaining} Est. Time Remaining" "{postfix}", 438 | desc=f"{data_source} Fetch", 439 | colour="#0085CA", 440 | position=0, 441 | leave=True, 442 | ) as progress: 443 | with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count() - 1) as executor: 444 | for i in executor.map(pull, download_dict.values()): 445 | results.append(i) 446 | progress.update(1) 447 | successful_downloads = [download["Tile"] for download in results if download["Result"] == True] 448 | failed_downloads = [download["Tile"] for download in results if download["Result"] == False] 449 | failed_verifications = [download["Tile"] for download in results if (download["Result"] == False and download["Reason"] == "incorrect hash")] 450 | 451 | if len(successful_downloads) > 0: 452 | if data_source.lower() == "s102v22": 453 | update_records_pmn2(conn, download_dict, successful_downloads) 454 | else: 455 | update_records_pmn1(conn, download_dict, successful_downloads) 456 | 457 | return ( 458 | list(set(tiles_found)), 459 | list(set(tiles_not_found)), 460 | successful_downloads, 461 | failed_downloads, 462 | existing_tiles, 463 | missing_tiles, 464 | failed_verifications, 465 | new_tile_list, 466 | ) 467 | 468 | 469 | def download_tiles( 470 | conn: sqlite3.Connection, 471 | project_dir: str, 472 | tile_prefix: str, 473 | data_source: str, 474 | bucket: str = "noaa-ocs-nationalbathymetry-pds", 475 | ) -> [[str], [str], [str]]: 476 | """ 477 | Download tiles' files (geotiff and aux per tile). 478 | 479 | Parameters 480 | ---------- 481 | conn : sqlite3.Connection 482 | database connection object. 483 | project_dir : str 484 | destination directory for project. 485 | tile_prefix : str 486 | s3 prefix for tiles. 487 | data_source : str 488 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 489 | bucket : str 490 | AWS bucket for the National Bathymetric Source project. 491 | 492 | Returns 493 | ------- 494 | existing_tiles : list 495 | tiles already existing locally. 496 | tiles_found : list 497 | tiles found in s3 bucket. 498 | tiles_not_found : list 499 | tiles not found in s3 bucket. 500 | """ 501 | download_tile_list = all_db_tiles(conn) 502 | # better tqdm download time estimate? 503 | random.shuffle(download_tile_list) 504 | new_tile_list = [download_tile for download_tile in download_tile_list if download_tile["geotiff_disk"] is None or download_tile["rat_disk"] is None] 505 | print("\nResolving fetch list...") 506 | if tile_prefix != "Local": 507 | cred = { 508 | "aws_access_key_id": "", 509 | "aws_secret_access_key": "", 510 | "config": Config(signature_version=UNSIGNED), 511 | } 512 | client = boto3.client("s3", **cred) 513 | pageinator = client.get_paginator("list_objects_v2") 514 | existing_tiles = [] 515 | missing_tiles = [] 516 | tiles_found = [] 517 | tiles_not_found = [] 518 | download_dict = {} 519 | for fields in download_tile_list: 520 | if fields["geotiff_disk"] and fields["rat_disk"]: 521 | if os.path.isfile(os.path.join(project_dir, fields["geotiff_disk"])) and os.path.isfile(os.path.join(project_dir, fields["rat_disk"])): 522 | if fields["geotiff_verified"] != "True" or fields["rat_verified"] != "True": 523 | missing_tiles.append(fields["tilename"]) 524 | else: 525 | existing_tiles.append(fields["tilename"]) 526 | continue 527 | if os.path.isfile(os.path.join(project_dir, fields["geotiff_disk"])) is False or os.path.isfile(os.path.join(project_dir, fields["rat_disk"])) is False: 528 | missing_tiles.append(fields["tilename"]) 529 | if "BlueTopo" in tile_prefix or "Modeling" in tile_prefix: 530 | tilename = fields["tilename"] 531 | pfx = tile_prefix + f"/{tilename}/" 532 | objs = pageinator.paginate(Bucket=bucket, Prefix=pfx).build_full_result() 533 | if len(objs) > 0: 534 | download_dict[tilename] = { 535 | "tile": tilename, 536 | "bucket": bucket, 537 | "client": client, 538 | "subregion": fields["subregion"], 539 | "utm": fields["utm"], 540 | } 541 | for object_name in objs["Contents"]: 542 | source_name = object_name["Key"] 543 | relative = os.path.join(data_source, f"UTM{fields['utm']}", os.path.basename(source_name)) 544 | destination_name = os.path.join(project_dir, relative) 545 | if not os.path.exists(os.path.dirname(destination_name)): 546 | os.makedirs(os.path.dirname(destination_name)) 547 | if ".aux" in source_name.lower(): 548 | download_dict[tilename]["rat"] = source_name 549 | download_dict[tilename]["rat_dest"] = destination_name 550 | download_dict[tilename]["rat_verified"] = fields["rat_verified"] 551 | download_dict[tilename]["rat_disk"] = relative 552 | download_dict[tilename]["rat_sha256_checksum"] = fields["rat_sha256_checksum"] 553 | else: 554 | download_dict[tilename]["geotiff"] = source_name 555 | download_dict[tilename]["geotiff_dest"] = destination_name 556 | download_dict[tilename]["geotiff_verified"] = fields["geotiff_verified"] 557 | download_dict[tilename]["geotiff_disk"] = relative 558 | download_dict[tilename]["geotiff_sha256_checksum"] = fields["geotiff_sha256_checksum"] 559 | tiles_found.append(tilename) 560 | else: 561 | tiles_not_found.append(tilename) 562 | # refactor later 563 | elif tile_prefix == "Local": 564 | tilename = fields["tilename"] 565 | if fields["geotiff_link"] and fields["rat_link"]: 566 | download_dict[tilename] = { 567 | "tile": tilename, 568 | "subregion": fields["subregion"], 569 | "client": "Local", 570 | "utm": fields["utm"], 571 | } 572 | download_dict[tilename]["rat"] = fields["rat_link"] 573 | download_dict[tilename]["rat_disk"] = os.path.join(data_source, f"UTM{fields['utm']}", os.path.basename(fields["rat_link"])) 574 | download_dict[tilename]["rat_dest"] = os.path.join(project_dir, download_dict[tilename]["rat_disk"]) 575 | download_dict[tilename]["rat_verified"] = fields["rat_verified"] 576 | download_dict[tilename]["rat_sha256_checksum"] = fields["rat_sha256_checksum"] 577 | download_dict[tilename]["geotiff"] = fields["geotiff_link"] 578 | download_dict[tilename]["geotiff_disk"] = os.path.join(data_source, f"UTM{fields['utm']}", os.path.basename(fields["geotiff_link"])) 579 | download_dict[tilename]["geotiff_dest"] = os.path.join(project_dir, download_dict[tilename]["geotiff_disk"]) 580 | download_dict[tilename]["geotiff_verified"] = fields["geotiff_verified"] 581 | download_dict[tilename]["geotiff_sha256_checksum"] = fields["geotiff_sha256_checksum"] 582 | if not os.path.exists(os.path.dirname(download_dict[tilename]["geotiff_dest"])): 583 | os.makedirs(os.path.dirname(download_dict[tilename]["geotiff_dest"])) 584 | tiles_found.append(tilename) 585 | else: 586 | tiles_not_found.append(tilename) 587 | else: 588 | raise ValueError(f"Invalid tile prefix: {tile_prefix}") 589 | 590 | def pull(downloads: dict) -> dict: 591 | """ 592 | Download files and verify hash. 593 | 594 | Parameters 595 | ---------- 596 | downloads : dict 597 | dict holding necessary values to execute download and checksum verification. 598 | 599 | Returns 600 | ------- 601 | dict 602 | result of download attempt. 603 | """ 604 | try: 605 | if downloads["client"] == "Local": 606 | shutil.copy(downloads["geotiff"], downloads["geotiff_dest"]) 607 | shutil.copy(downloads["rat"], downloads["rat_dest"]) 608 | else: 609 | downloads["client"].download_file(downloads["bucket"], downloads["geotiff"], downloads["geotiff_dest"]) 610 | downloads["client"].download_file(downloads["bucket"], downloads["rat"], downloads["rat_dest"]) 611 | if os.path.isfile(downloads["geotiff_dest"]) is False or os.path.isfile(downloads["rat_dest"]) is False: 612 | return {"Tile": downloads["tile"], "Result": False, "Reason": "missing download"} 613 | geotiff_hash = hashlib.sha256(open(downloads["geotiff_dest"], "rb").read()).hexdigest() 614 | rat_hash = hashlib.sha256(open(downloads["rat_dest"], "rb").read()).hexdigest() 615 | if downloads["geotiff_sha256_checksum"] != geotiff_hash or downloads["rat_sha256_checksum"] != rat_hash: 616 | return {"Tile": downloads["tile"], "Result": False, "Reason": "incorrect hash"} 617 | except Exception as e: 618 | return {"Tile": downloads["tile"], "Result": False, "Reason": "exception"} 619 | return {"Tile": downloads["tile"], "Result": True, "Reason": "success"} 620 | 621 | print(f"{len(new_tile_list)} tile(s) with new data") 622 | print(f"{len(missing_tiles)} tile(s) already downloaded are missing locally") 623 | download_length = len(download_dict.keys()) 624 | results = [] 625 | if download_length: 626 | print(f"\nFetching {download_length} tiles") 627 | with tqdm( 628 | total=download_length, 629 | bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} Tiles {elapsed}, {remaining} Est. Time Remaining" "{postfix}", 630 | desc=f"{data_source} Fetch", 631 | colour="#0085CA", 632 | position=0, 633 | leave=True, 634 | ) as progress: 635 | with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count() - 1) as executor: 636 | for i in executor.map(pull, download_dict.values()): 637 | results.append(i) 638 | progress.update(1) 639 | successful_downloads = [download["Tile"] for download in results if download["Result"] == True] 640 | failed_downloads = [download["Tile"] for download in results if download["Result"] == False] 641 | failed_verifications = [download["Tile"] for download in results if (download["Result"] == False and download["Reason"] == "incorrect hash")] 642 | 643 | if len(successful_downloads) > 0: 644 | update_records(conn, download_dict, successful_downloads) 645 | 646 | return ( 647 | list(set(tiles_found)), 648 | list(set(tiles_not_found)), 649 | successful_downloads, 650 | failed_downloads, 651 | existing_tiles, 652 | missing_tiles, 653 | failed_verifications, 654 | new_tile_list, 655 | ) 656 | 657 | def get_tile_list(desired_area_filename: str, tile_scheme_filename: str) -> [str]: 658 | """ 659 | Get the list of tiles inside the given polygon(s). 660 | 661 | Parameters 662 | ---------- 663 | desired_area_filename : str 664 | a gdal compatible file path denoting geometries that reflect the region 665 | of interest. 666 | tile_scheme_filename : str 667 | a gdal compatible file path denoting geometries that reflect the 668 | tessellation scheme with addressing information for the desired tiles. 669 | 670 | Returns 671 | ------- 672 | feature_list : str 673 | list of tiles intersecting with the provided polygon(s). 674 | """ 675 | data_source = ogr.Open(desired_area_filename) 676 | if data_source is None: 677 | print("Unable to open desired area file") 678 | return None 679 | source = ogr.Open(tile_scheme_filename) 680 | if source is None: 681 | print("Unable to open tile scheme file") 682 | return None 683 | driver = ogr.GetDriverByName("MEMORY") 684 | intersect = driver.CreateDataSource("memData") 685 | intersect_lyr = intersect.CreateLayer("mem", geom_type=ogr.wkbPolygon) 686 | source_layer = source.GetLayer(0) 687 | source_crs = source_layer.GetSpatialRef() 688 | num_target_layers = data_source.GetLayerCount() 689 | feature_list = [] 690 | for layer_num in range(num_target_layers): 691 | target_layer = data_source.GetLayer(layer_num) 692 | target_crs = target_layer.GetSpatialRef() 693 | same_crs = target_crs.IsSame(source_crs) 694 | if not same_crs: 695 | transformed_input = transform_layer(target_layer, source_crs) 696 | target_layer = transformed_input.GetLayer(0) 697 | target_layer.Intersection(source_layer, intersect_lyr) 698 | if not same_crs: 699 | transformed_input = None 700 | lyr_defn = intersect_lyr.GetLayerDefn() 701 | for feature in intersect_lyr: 702 | fields = {} 703 | for idx in range(lyr_defn.GetFieldCount()): 704 | fields[lyr_defn.GetFieldDefn(idx).name] = feature.GetField(idx) 705 | feature_list.append(fields) 706 | return feature_list 707 | 708 | 709 | def transform_layer(input_layer: ogr.Layer, desired_crs: osr.SpatialReference) -> ogr.DataSource: 710 | """ 711 | Transform a provided ogr layer to the provided coordinate reference system. 712 | 713 | Parameters 714 | ---------- 715 | input_layer : ogr.Layer 716 | the ogr layer to be transformed. 717 | desired_crs : osr.SpatialReference 718 | the coordinate system for the transform. 719 | 720 | Returns 721 | ------- 722 | out_ds : ogr.DataSource 723 | transformed ogr memory datasource. 724 | """ 725 | target_crs = input_layer.GetSpatialRef() 726 | coord_trans = osr.CoordinateTransformation(target_crs, desired_crs) 727 | driver = ogr.GetDriverByName("MEMORY") 728 | out_ds = driver.CreateDataSource("memData") 729 | out_lyr = out_ds.CreateLayer("out_lyr", geom_type=input_layer.GetGeomType()) 730 | out_defn = out_lyr.GetLayerDefn() 731 | in_feature = input_layer.GetNextFeature() 732 | while in_feature: 733 | geom = in_feature.GetGeometryRef() 734 | geom.Transform(coord_trans) 735 | out_feature = ogr.Feature(out_defn) 736 | out_feature.SetGeometry(geom) 737 | out_lyr.CreateFeature(out_feature) 738 | out_feature = None 739 | in_feature = input_layer.GetNextFeature() 740 | return out_ds 741 | 742 | 743 | def update_records_pmn1(conn: sqlite3.Connection, download_dict: dict, successful_downloads: list) -> None: 744 | """ 745 | Update tile record and associated tables in SQLite database. 746 | 747 | Parameters 748 | ---------- 749 | conn : sqlite3.Connection 750 | database connection object. 751 | download_dict : dict 752 | relevant fields per tile 753 | successful_downloads : list 754 | list of tilenames successfully downloaded 755 | """ 756 | # TODO refactor more sensibly 757 | tiles_records = [] 758 | subregion_records = [] 759 | utm_records = [] 760 | for tilename in download_dict: 761 | if tilename in successful_downloads: 762 | tiles_records.append((download_dict[tilename]["file_disk"], "True", tilename)) 763 | subregion_records.append( 764 | ( 765 | download_dict[tilename]["subregion"], 766 | download_dict[tilename]["utm"], 767 | None, 768 | None, 769 | None, 770 | None, 771 | None, 772 | None, 773 | None, 774 | None, 775 | 0, 776 | ) 777 | ) 778 | utm_records.append((download_dict[tilename]["utm"], None, None, 0)) 779 | if len(tiles_records) == 0: 780 | return 781 | cursor = conn.cursor() 782 | cursor.execute("BEGIN TRANSACTION;") 783 | cursor.executemany( 784 | """ 785 | UPDATE tiles 786 | SET file_disk = ?, 787 | file_verified = ? 788 | WHERE tilename = ? 789 | """, 790 | tiles_records, 791 | ) 792 | cursor.executemany( 793 | """ 794 | INSERT INTO vrt_subregion(region, utm, res_2_vrt, 795 | res_2_ovr, res_4_vrt, res_4_ovr, res_8_vrt, res_8_ovr, 796 | complete_vrt, complete_ovr, built) 797 | VALUES(?, ?, ?, ?, ? ,? , ?, ? ,? ,? ,?) 798 | ON CONFLICT(region) DO UPDATE 799 | SET utm = EXCLUDED.utm, 800 | res_2_vrt = EXCLUDED.res_2_vrt, 801 | res_2_ovr = EXCLUDED.res_2_ovr, 802 | res_4_vrt = EXCLUDED.res_4_vrt, 803 | res_4_ovr = EXCLUDED.res_4_ovr, 804 | res_8_vrt = EXCLUDED.res_8_vrt, 805 | res_8_ovr = EXCLUDED.res_8_ovr, 806 | complete_vrt = EXCLUDED.complete_vrt, 807 | complete_ovr = EXCLUDED.complete_ovr, 808 | built = EXCLUDED.built 809 | """, 810 | subregion_records, 811 | ) 812 | cursor.executemany( 813 | """ 814 | INSERT INTO vrt_utm(utm, utm_vrt, utm_ovr, built) 815 | VALUES(?, ?, ?, ?) 816 | ON CONFLICT(utm) DO UPDATE 817 | SET utm_vrt = EXCLUDED.utm_vrt, 818 | utm_ovr = EXCLUDED.utm_ovr, 819 | built = EXCLUDED.built 820 | """, 821 | utm_records, 822 | ) 823 | cursor.execute("COMMIT;") 824 | conn.commit() 825 | 826 | 827 | def update_records_pmn2(conn: sqlite3.Connection, download_dict: dict, successful_downloads: list) -> None: 828 | """ 829 | Update tile record and associated tables in SQLite database. 830 | 831 | Parameters 832 | ---------- 833 | conn : sqlite3.Connection 834 | database connection object. 835 | download_dict : dict 836 | relevant fields per tile 837 | successful_downloads : list 838 | list of tilenames successfully downloaded 839 | """ 840 | # TODO refactor more sensibly 841 | tiles_records = [] 842 | subregion_records = [] 843 | utm_records = [] 844 | for tilename in download_dict: 845 | if tilename in successful_downloads: 846 | tiles_records.append((download_dict[tilename]["file_disk"], "True", tilename)) 847 | subregion_records.append( 848 | ( 849 | download_dict[tilename]["subregion"], 850 | download_dict[tilename]["utm"], 851 | None, 852 | None, 853 | None, 854 | None, 855 | None, 856 | None, 857 | None, 858 | None, 859 | None, 860 | None, 861 | None, 862 | None, 863 | None, 864 | None, 865 | None, 866 | None, 867 | 0, 868 | 0 869 | ) 870 | ) 871 | utm_records.append((download_dict[tilename]["utm"], None, None, None, None, None, 0, 0, 0)) 872 | if len(tiles_records) == 0: 873 | return 874 | cursor = conn.cursor() 875 | cursor.execute("BEGIN TRANSACTION;") 876 | cursor.executemany( 877 | """ 878 | UPDATE tiles 879 | SET file_disk = ?, 880 | file_verified = ? 881 | WHERE tilename = ? 882 | """, 883 | tiles_records, 884 | ) 885 | cursor.executemany( 886 | """ 887 | INSERT INTO vrt_subregion(region, utm, 888 | res_2_subdataset1_vrt, 889 | res_2_subdataset1_ovr, 890 | res_2_subdataset2_vrt, 891 | res_2_subdataset2_ovr, 892 | res_4_subdataset1_vrt, 893 | res_4_subdataset1_ovr, 894 | res_4_subdataset2_vrt, 895 | res_4_subdataset2_ovr, 896 | res_8_subdataset1_vrt, 897 | res_8_subdataset1_ovr, 898 | res_8_subdataset2_vrt, 899 | res_8_subdataset2_ovr, 900 | complete_subdataset1_vrt, 901 | complete_subdataset1_ovr, 902 | complete_subdataset2_vrt, 903 | complete_subdataset2_ovr, 904 | built_subdataset1, 905 | built_subdataset2) 906 | VALUES(?, ?, ?, ?, ? ,? , ?, ? ,? ,? ,?, ?, ?, ?, ?, ? ,? , ?, ? ,?) 907 | ON CONFLICT(region) DO UPDATE 908 | SET utm = EXCLUDED.utm, 909 | res_2_subdataset1_vrt = EXCLUDED.res_2_subdataset1_vrt, 910 | res_2_subdataset1_ovr = EXCLUDED.res_2_subdataset1_ovr, 911 | res_2_subdataset2_vrt = EXCLUDED.res_2_subdataset2_vrt, 912 | res_2_subdataset2_ovr = EXCLUDED.res_2_subdataset2_ovr, 913 | res_4_subdataset1_vrt = EXCLUDED.res_4_subdataset1_vrt, 914 | res_4_subdataset1_ovr = EXCLUDED.res_4_subdataset1_ovr, 915 | res_4_subdataset2_vrt = EXCLUDED.res_4_subdataset2_vrt, 916 | res_4_subdataset2_ovr = EXCLUDED.res_4_subdataset2_ovr, 917 | 918 | res_8_subdataset1_vrt = EXCLUDED.res_8_subdataset1_vrt, 919 | res_8_subdataset1_ovr = EXCLUDED.res_8_subdataset1_ovr, 920 | res_8_subdataset2_vrt = EXCLUDED.res_8_subdataset2_vrt, 921 | res_8_subdataset2_ovr = EXCLUDED.res_8_subdataset2_ovr, 922 | 923 | complete_subdataset1_vrt = EXCLUDED.complete_subdataset1_vrt, 924 | complete_subdataset1_ovr = EXCLUDED.complete_subdataset1_ovr, 925 | 926 | complete_subdataset2_vrt = EXCLUDED.complete_subdataset2_vrt, 927 | complete_subdataset2_ovr = EXCLUDED.complete_subdataset2_ovr, 928 | 929 | built_subdataset1 = EXCLUDED.built_subdataset1, 930 | built_subdataset2 = EXCLUDED.built_subdataset2 931 | """, 932 | subregion_records, 933 | ) 934 | cursor.executemany( 935 | """ 936 | INSERT INTO vrt_utm(utm, 937 | utm_subdataset1_vrt, utm_subdataset1_ovr, 938 | utm_subdataset2_vrt, utm_subdataset2_ovr, 939 | utm_combined_vrt, 940 | built_subdataset1, 941 | built_subdataset2, 942 | built_combined) 943 | VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) 944 | ON CONFLICT(utm) DO UPDATE 945 | SET utm_subdataset1_vrt = EXCLUDED.utm_subdataset1_vrt, 946 | utm_subdataset1_ovr = EXCLUDED.utm_subdataset1_ovr, 947 | utm_subdataset2_vrt = EXCLUDED.utm_subdataset2_vrt, 948 | utm_subdataset2_ovr = EXCLUDED.utm_subdataset2_ovr, 949 | utm_combined_vrt = EXCLUDED.utm_combined_vrt, 950 | built_subdataset1 = EXCLUDED.built_subdataset1, 951 | built_subdataset2 = EXCLUDED.built_subdataset2, 952 | built_combined = EXCLUDED.built_combined 953 | """, 954 | utm_records, 955 | ) 956 | cursor.execute("COMMIT;") 957 | conn.commit() 958 | 959 | 960 | def update_records(conn: sqlite3.Connection, download_dict: dict, successful_downloads: list) -> None: 961 | """ 962 | Update tile record and associated tables in SQLite database. 963 | 964 | Parameters 965 | ---------- 966 | conn : sqlite3.Connection 967 | database connection object. 968 | download_dict : dict 969 | relevant fields per tile 970 | successful_downloads : list 971 | list of tilenames successfully downloaded 972 | """ 973 | # TODO refactor more sensibly 974 | tiles_records = [] 975 | subregion_records = [] 976 | utm_records = [] 977 | for tilename in download_dict: 978 | if tilename in successful_downloads: 979 | tiles_records.append((download_dict[tilename]["geotiff_disk"], download_dict[tilename]["rat_disk"], "True", "True", tilename)) 980 | subregion_records.append( 981 | ( 982 | download_dict[tilename]["subregion"], 983 | download_dict[tilename]["utm"], 984 | None, 985 | None, 986 | None, 987 | None, 988 | None, 989 | None, 990 | None, 991 | None, 992 | 0, 993 | ) 994 | ) 995 | utm_records.append((download_dict[tilename]["utm"], None, None, 0)) 996 | if len(tiles_records) == 0: 997 | return 998 | cursor = conn.cursor() 999 | cursor.execute("BEGIN TRANSACTION;") 1000 | cursor.executemany( 1001 | """ 1002 | UPDATE tiles 1003 | SET geotiff_disk = ?, rat_disk = ?, 1004 | geotiff_verified = ?, rat_verified = ? 1005 | WHERE tilename = ? 1006 | """, 1007 | tiles_records, 1008 | ) 1009 | cursor.executemany( 1010 | """ 1011 | INSERT INTO vrt_subregion(region, utm, res_2_vrt, 1012 | res_2_ovr, res_4_vrt, res_4_ovr, res_8_vrt, res_8_ovr, 1013 | complete_vrt, complete_ovr, built) 1014 | VALUES(?, ?, ?, ?, ? ,? , ?, ? ,? ,? ,?) 1015 | ON CONFLICT(region) DO UPDATE 1016 | SET utm = EXCLUDED.utm, 1017 | res_2_vrt = EXCLUDED.res_2_vrt, 1018 | res_2_ovr = EXCLUDED.res_2_ovr, 1019 | res_4_vrt = EXCLUDED.res_4_vrt, 1020 | res_4_ovr = EXCLUDED.res_4_ovr, 1021 | res_8_vrt = EXCLUDED.res_8_vrt, 1022 | res_8_ovr = EXCLUDED.res_8_ovr, 1023 | complete_vrt = EXCLUDED.complete_vrt, 1024 | complete_ovr = EXCLUDED.complete_ovr, 1025 | built = EXCLUDED.built 1026 | """, 1027 | subregion_records, 1028 | ) 1029 | cursor.executemany( 1030 | """ 1031 | INSERT INTO vrt_utm(utm, utm_vrt, utm_ovr, built) 1032 | VALUES(?, ?, ?, ?) 1033 | ON CONFLICT(utm) DO UPDATE 1034 | SET utm_vrt = EXCLUDED.utm_vrt, 1035 | utm_ovr = EXCLUDED.utm_ovr, 1036 | built = EXCLUDED.built 1037 | """, 1038 | utm_records, 1039 | ) 1040 | cursor.execute("COMMIT;") 1041 | conn.commit() 1042 | 1043 | 1044 | def insert_new_pmn(conn: sqlite3.Connection, tiles: list, data_source) -> int: 1045 | """ 1046 | Insert new tile records into SQLite database. 1047 | 1048 | Parameters 1049 | ---------- 1050 | conn : sqlite3.Connection 1051 | database connection object. 1052 | tiles : list of dict 1053 | list of tile records. 1054 | 1055 | Returns 1056 | ------- 1057 | int 1058 | amount of delivered tiles from input tiles. 1059 | """ 1060 | if data_source.lower() == "bag": 1061 | tile_list = [(tile["TILE_ID"],) for tile in tiles if tile["ISSUANCE"] and tile["BAG"] and tile["BAG"].lower() != "none"] 1062 | elif data_source.lower() == "s102v21": 1063 | tile_list = [(tile["TILE_ID"],) for tile in tiles if tile["ISSUANCE"] and tile["S102V21"] and tile["S102V21"].lower() != "none"] 1064 | elif data_source.lower() == "s102v22": 1065 | tile_list = [(tile["TILE_ID"],) for tile in tiles if tile["ISSUANCE"] and tile["S102V22"] and tile["S102V22"].lower() != "none"] 1066 | else: 1067 | raise ValueError(f"Unexpected data source {data_source}") 1068 | cursor = conn.cursor() 1069 | cursor.executemany( 1070 | """INSERT INTO tiles(tilename) 1071 | VALUES(?) ON CONFLICT DO NOTHING""", 1072 | tile_list, 1073 | ) 1074 | conn.commit() 1075 | return len(tile_list) 1076 | 1077 | 1078 | def insert_new(conn: sqlite3.Connection, tiles: list) -> int: 1079 | """ 1080 | Insert new tile records into SQLite database. 1081 | 1082 | Parameters 1083 | ---------- 1084 | conn : sqlite3.Connection 1085 | database connection object. 1086 | tiles : list of dict 1087 | list of tile records. 1088 | 1089 | Returns 1090 | ------- 1091 | int 1092 | amount of delivered tiles from input tiles. 1093 | """ 1094 | cursor = conn.cursor() 1095 | tile_list = [(tile["tile"],) for tile in tiles if tile["Delivered_Date"] and tile["GeoTIFF_Link"] and tile["RAT_Link"]] 1096 | cursor.executemany( 1097 | """INSERT INTO tiles(tilename) 1098 | VALUES(?) ON CONFLICT DO NOTHING""", 1099 | tile_list, 1100 | ) 1101 | conn.commit() 1102 | return len(tile_list) 1103 | 1104 | 1105 | def all_db_tiles(conn: sqlite3.Connection) -> list: 1106 | """ 1107 | Retrieve all tile records in tiles table of SQLite database. 1108 | 1109 | Parameters 1110 | ---------- 1111 | conn : sqlite3.Connection 1112 | database connection object. 1113 | 1114 | Returns 1115 | ------- 1116 | list 1117 | all tile records as dictionaries. 1118 | """ 1119 | cursor = conn.cursor() 1120 | cursor.execute("SELECT * FROM tiles") 1121 | return [dict(row) for row in cursor.fetchall()] 1122 | 1123 | 1124 | def upsert_tiles_pmn(conn: sqlite3.Connection, project_dir: str, tile_scheme: str, data_source: str) -> None: 1125 | """ 1126 | Update tile records in database with latest deliveries found in tilescheme. 1127 | 1128 | Parameters 1129 | ---------- 1130 | conn : sqlite3.Connection 1131 | database connection object. 1132 | project_dir : str 1133 | destination directory for project. 1134 | tile_scheme : str 1135 | a gdal compatible file path with the tessellation scheme. 1136 | """ 1137 | # database records holds current set 1138 | # tilescheme polygons has latest set 1139 | # use the two to see where new tiles or updates to existing tiles exist 1140 | # use global tileset to map its region 1141 | db_tiles = all_db_tiles(conn) 1142 | ts_ds = ogr.Open(tile_scheme) 1143 | ts_lyr = ts_ds.GetLayer() 1144 | ts_defn = ts_lyr.GetLayerDefn() 1145 | ts_tiles = [] 1146 | for ft in ts_lyr: 1147 | field_list = {} 1148 | geom = ft.GetGeometryRef() 1149 | field_list["wkt_geom"] = geom.ExportToWkt() 1150 | for field_num in range(ts_defn.GetFieldCount()): 1151 | field_name = ts_defn.GetFieldDefn(field_num).name 1152 | field_list[field_name.lower()] = ft.GetField(field_name) 1153 | if data_source == 'BAG': 1154 | field_list['tile'] = ft.GetField('tile_id') 1155 | field_list['file_link'] = ft.GetField('bag') 1156 | field_list['file_sha256_checksum'] = ft.GetField('bag_sha256') 1157 | field_list['delivered_date'] = ft.GetField('issuance') 1158 | field_list['utm'] = ft.GetField('utm') 1159 | field_list['resolution'] = ft.GetField('resolution') 1160 | if data_source == 'S102V21': 1161 | field_list['tile'] = ft.GetField('tile_id') 1162 | field_list['file_link'] = ft.GetField('s102v21') 1163 | field_list['file_sha256_checksum'] = ft.GetField('s102v21_sha256') 1164 | field_list['delivered_date'] = ft.GetField('issuance') 1165 | field_list['utm'] = ft.GetField('utm') 1166 | field_list['resolution'] = ft.GetField('resolution') 1167 | if data_source == 'S102V22': 1168 | field_list['tile'] = ft.GetField('tile_id') 1169 | field_list['file_link'] = ft.GetField('s102v22') 1170 | field_list['file_sha256_checksum'] = ft.GetField('s102v22_sha256') 1171 | field_list['delivered_date'] = ft.GetField('issuance') 1172 | field_list['utm'] = ft.GetField('utm') 1173 | field_list['resolution'] = ft.GetField('resolution') 1174 | ts_tiles.append(field_list) 1175 | ts_ds = None 1176 | global_tileset = global_region_tileset(1, "1.2") 1177 | gs = ogr.Open(global_tileset) 1178 | lyr = gs.GetLayer() 1179 | insert_tiles = [] 1180 | for db_tile in db_tiles: 1181 | ts_tile = [ts_tile for ts_tile in ts_tiles if db_tile["tilename"] == ts_tile["tile"]] 1182 | if len(ts_tile) == 0: 1183 | print(f"Warning: {db_tile['tilename']} in database appears to have " "been removed from latest tilescheme") 1184 | continue 1185 | if len(ts_tile) > 1: 1186 | raise ValueError(f"More than one tilename {db_tile['tilename']} " "found in tileset.\n" "Please alert NBS.\n" "{debug_info}") 1187 | ts_tile = ts_tile[0] 1188 | # inserted into db only when delivered_date exists 1189 | # so value of None in ts_tile indicates delivered_date was removed 1190 | if ts_tile["delivered_date"] is None: 1191 | print("Warning: Unexpected removal of delivered date " f"for tile {db_tile['tilename']}") 1192 | continue 1193 | if (db_tile["delivered_date"] is None) or (ts_tile["delivered_date"] > db_tile["delivered_date"]): 1194 | try: 1195 | if db_tile["file_disk"] and os.path.isfile(os.path.join(project_dir, db_tile["file_disk"])): 1196 | os.remove(os.path.join(project_dir, db_tile["file_disk"])) 1197 | except (OSError, PermissionError) as e: 1198 | print("Failed to remove older files for tile " f"{db_tile['tilename']}. Please close all files and " "attempt fetch again.") 1199 | gdal.Unlink(global_tileset) 1200 | raise e 1201 | lyr.SetSpatialFilter(ogr.CreateGeometryFromWkt(ts_tile["wkt_geom"])) 1202 | if lyr.GetFeatureCount() != 1: 1203 | gdal.Unlink(global_tileset) 1204 | raise ValueError("Error getting subregion for " f"{db_tile['tilename']}. \n" f"{lyr.GetFeatureCount()} subregion(s). \n" f"{debug_info}") 1205 | region_ft = lyr.GetNextFeature() 1206 | ts_tile["region"] = region_ft.GetField("Region") 1207 | insert_tiles.append( 1208 | ( 1209 | ts_tile["tile"], 1210 | ts_tile["file_link"], 1211 | ts_tile["delivered_date"], 1212 | ts_tile["resolution"], 1213 | ts_tile["utm"], 1214 | ts_tile["region"], 1215 | ts_tile["file_sha256_checksum"], 1216 | ) 1217 | ) 1218 | if insert_tiles: 1219 | cursor = conn.cursor() 1220 | for ins in insert_tiles: 1221 | if len(ins) != 7: 1222 | print(len(ins)) 1223 | raise ValueError() 1224 | cursor.executemany( 1225 | """ 1226 | INSERT INTO tiles(tilename, file_link, 1227 | delivered_date, resolution, utm, subregion, 1228 | file_sha256_checksum) 1229 | VALUES(?, ?, ? ,? ,? ,?, ?) 1230 | ON CONFLICT(tilename) DO UPDATE 1231 | SET file_link = EXCLUDED.file_link, 1232 | delivered_date = EXCLUDED.delivered_date, 1233 | resolution = EXCLUDED.resolution, 1234 | utm = EXCLUDED.utm, 1235 | subregion = EXCLUDED.subregion, 1236 | file_sha256_checksum = EXCLUDED.file_sha256_checksum, 1237 | file_verified = Null, 1238 | file_disk = Null 1239 | """, 1240 | insert_tiles, 1241 | ) 1242 | conn.commit() 1243 | gdal.Unlink(global_tileset) 1244 | 1245 | 1246 | def upsert_tiles(conn: sqlite3.Connection, project_dir: str, tile_scheme: str) -> None: 1247 | """ 1248 | Update tile records in database with latest deliveries found in tilescheme. 1249 | 1250 | Parameters 1251 | ---------- 1252 | conn : sqlite3.Connection 1253 | database connection object. 1254 | project_dir : str 1255 | destination directory for project. 1256 | tile_scheme : str 1257 | a gdal compatible file path with the tessellation scheme. 1258 | """ 1259 | # database records holds current set 1260 | # tilescheme polygons has latest set 1261 | # use the two to see where new tiles or updates to existing tiles exist 1262 | # use global tileset to map its region 1263 | db_tiles = all_db_tiles(conn) 1264 | ts_ds = ogr.Open(tile_scheme) 1265 | ts_lyr = ts_ds.GetLayer() 1266 | ts_defn = ts_lyr.GetLayerDefn() 1267 | ts_tiles = [] 1268 | for ft in ts_lyr: 1269 | field_list = {} 1270 | geom = ft.GetGeometryRef() 1271 | field_list["wkt_geom"] = geom.ExportToWkt() 1272 | for field_num in range(ts_defn.GetFieldCount()): 1273 | field_name = ts_defn.GetFieldDefn(field_num).name 1274 | field_list[field_name.lower()] = ft.GetField(field_name) 1275 | ts_tiles.append(field_list) 1276 | ts_ds = None 1277 | global_tileset = global_region_tileset(1, "1.2") 1278 | gs = ogr.Open(global_tileset) 1279 | lyr = gs.GetLayer() 1280 | insert_tiles = [] 1281 | for db_tile in db_tiles: 1282 | ts_tile = [ts_tile for ts_tile in ts_tiles if db_tile["tilename"] == ts_tile["tile"]] 1283 | if len(ts_tile) == 0: 1284 | print(f"Warning: {db_tile['tilename']} in database appears to have " "been removed from latest tilescheme") 1285 | continue 1286 | if len(ts_tile) > 1: 1287 | raise ValueError(f"More than one tilename {db_tile['tilename']} " "found in tileset.\n" "Please alert NBS.\n" "{debug_info}") 1288 | ts_tile = ts_tile[0] 1289 | # inserted into db only when delivered_date exists 1290 | # so value of None in ts_tile indicates delivered_date was removed 1291 | if ts_tile["delivered_date"] is None: 1292 | print("Warning: Unexpected removal of delivered date " f"for tile {db_tile['tilename']}") 1293 | continue 1294 | if (db_tile["delivered_date"] is None) or (ts_tile["delivered_date"] > db_tile["delivered_date"]): 1295 | try: 1296 | if db_tile["geotiff_disk"] and os.path.isfile(os.path.join(project_dir, db_tile["geotiff_disk"])): 1297 | os.remove(os.path.join(project_dir, db_tile["geotiff_disk"])) 1298 | if db_tile["rat_disk"] and os.path.isfile(os.path.join(project_dir, db_tile["rat_disk"])): 1299 | os.remove(os.path.join(project_dir, db_tile["rat_disk"])) 1300 | except (OSError, PermissionError) as e: 1301 | print("Failed to remove older files for tile " f"{db_tile['tilename']}. Please close all files and " "attempt fetch again.") 1302 | gdal.Unlink(global_tileset) 1303 | raise e 1304 | lyr.SetSpatialFilter(ogr.CreateGeometryFromWkt(ts_tile["wkt_geom"])) 1305 | if lyr.GetFeatureCount() != 1: 1306 | gdal.Unlink(global_tileset) 1307 | raise ValueError("Error getting subregion for " f"{db_tile['tilename']}. \n" f"{lyr.GetFeatureCount()} subregion(s). \n" f"{debug_info}") 1308 | region_ft = lyr.GetNextFeature() 1309 | ts_tile["region"] = region_ft.GetField("Region") 1310 | insert_tiles.append( 1311 | ( 1312 | ts_tile["tile"], 1313 | ts_tile["geotiff_link"], 1314 | ts_tile["rat_link"], 1315 | ts_tile["delivered_date"], 1316 | ts_tile["resolution"], 1317 | ts_tile["utm"], 1318 | ts_tile["region"], 1319 | ts_tile["geotiff_sha256_checksum"], 1320 | ts_tile["rat_sha256_checksum"], 1321 | ) 1322 | ) 1323 | if insert_tiles: 1324 | cursor = conn.cursor() 1325 | for ins in insert_tiles: 1326 | if len(ins) != 9: 1327 | print(len(ins)) 1328 | raise ValueError() 1329 | cursor.executemany( 1330 | """ 1331 | INSERT INTO tiles(tilename, geotiff_link, rat_link, 1332 | delivered_date, resolution, utm, subregion, 1333 | geotiff_sha256_checksum, rat_sha256_checksum) 1334 | VALUES(?, ?, ? ,? ,? ,?, ?, ?, ?) 1335 | ON CONFLICT(tilename) DO UPDATE 1336 | SET geotiff_link = EXCLUDED.geotiff_link, 1337 | rat_link = EXCLUDED.rat_link, 1338 | delivered_date = EXCLUDED.delivered_date, 1339 | resolution = EXCLUDED.resolution, 1340 | utm = EXCLUDED.utm, 1341 | subregion = EXCLUDED.subregion, 1342 | geotiff_sha256_checksum = EXCLUDED.geotiff_sha256_checksum, 1343 | rat_sha256_checksum = EXCLUDED.rat_sha256_checksum, 1344 | geotiff_verified = Null, 1345 | rat_verified = Null, 1346 | geotiff_disk = Null, 1347 | rat_disk = Null 1348 | """, 1349 | insert_tiles, 1350 | ) 1351 | conn.commit() 1352 | gdal.Unlink(global_tileset) 1353 | 1354 | 1355 | def convert_base(charset: str, input: int, minimum: int) -> str: 1356 | """ 1357 | Convert integer to new base system using the given symbols with a 1358 | minimum length filled using leading characters of the lowest value in the 1359 | given charset. 1360 | 1361 | Parameters 1362 | ---------- 1363 | charset : str 1364 | length of this str will be the new base system and characters 1365 | given will be the symbols used. 1366 | input : int 1367 | integer to convert. 1368 | minimum : int 1369 | returned output will be adjusted to this desired length using 1370 | leading characters of the lowest value in charset. 1371 | 1372 | Returns 1373 | ------- 1374 | str 1375 | converted value in given system. 1376 | """ 1377 | res = "" 1378 | while input: 1379 | res += charset[input % len(charset)] 1380 | input //= len(charset) 1381 | return (res[::-1] or charset[0]).rjust(minimum, charset[0]) 1382 | 1383 | 1384 | def global_region_tileset(index: int, size: str) -> str: 1385 | """ 1386 | Generate a global tilescheme. 1387 | 1388 | Parameters 1389 | ---------- 1390 | index : int 1391 | index of tileset to determine tilescheme name. 1392 | size : str 1393 | length of the side of an individual tile in degrees. 1394 | 1395 | Returns 1396 | ------- 1397 | location : str 1398 | gdal memory filepath to global tilescheme. 1399 | """ 1400 | charset = "BCDFGHJKLMNPQRSTVWXZ" 1401 | name = convert_base(charset, index, 2) 1402 | roundnum = len(size.split(".")[1]) 1403 | size = float(size) 1404 | location = "/vsimem/global_tileset.gpkg" 1405 | ds = ogr.GetDriverByName("GPKG").CreateDataSource(location) 1406 | srs = osr.SpatialReference() 1407 | srs.ImportFromEPSG(4326) 1408 | layer = ds.CreateLayer("global_tileset", srs, ogr.wkbMultiPolygon) 1409 | layer.CreateFields( 1410 | [ 1411 | ogr.FieldDefn("Region", ogr.OFTString), 1412 | ogr.FieldDefn("UTM_Zone", ogr.OFTInteger), 1413 | ogr.FieldDefn("Hemisphere", ogr.OFTString), 1414 | ] 1415 | ) 1416 | layer_defn = layer.GetLayerDefn() 1417 | layer.StartTransaction() 1418 | y = round(-90 + size, roundnum) 1419 | y_count = 0 1420 | while y <= 90: 1421 | ns = "N" 1422 | if y <= 0: 1423 | ns = "S" 1424 | x = -180 1425 | x_count = 0 1426 | while x < 180: 1427 | current_utm = "{:02d}".format(int(np.ceil((180 + x + 0.00000001) / 6))) 1428 | ring = ogr.Geometry(ogr.wkbLinearRing) 1429 | ring.AddPoint_2D(x, y) 1430 | ring.AddPoint_2D(round(x + size, roundnum), y) 1431 | ring.AddPoint_2D(round(x + size, roundnum), round(y - size, roundnum)) 1432 | ring.AddPoint_2D(x, round(y - size, roundnum)) 1433 | ring.AddPoint_2D(x, y) 1434 | poly = ogr.Geometry(ogr.wkbPolygon) 1435 | poly.AddGeometry(ring) 1436 | poly = poly.Buffer(-0.002) 1437 | multipoly = ogr.Geometry(ogr.wkbMultiPolygon) 1438 | multipoly.AddGeometry(poly) 1439 | feat = ogr.Feature(layer_defn) 1440 | feat.SetGeometry(multipoly) 1441 | charset = "2456789BCDFGHJKLMNPQRSTVWXZ" 1442 | x_rep = convert_base(charset, x_count, 3) 1443 | y_rep = convert_base(charset, y_count, 3) 1444 | feat.SetField("Region", f"{name}{x_rep}{y_rep}") 1445 | feat.SetField("UTM_Zone", current_utm) 1446 | feat.SetField("Hemisphere", ns) 1447 | layer.CreateFeature(feat) 1448 | x = round(x + size, roundnum) 1449 | x_count += 1 1450 | y = round(y + size, roundnum) 1451 | y_count += 1 1452 | layer.CommitTransaction() 1453 | return location 1454 | 1455 | 1456 | def sweep_files(conn: sqlite3.Connection, project_dir: str) -> None: 1457 | """ 1458 | Remove missing files from tracking. 1459 | 1460 | Parameters 1461 | ---------- 1462 | conn : sqlite3.Connection 1463 | database connection object. 1464 | project_dir : str 1465 | destination directory for project. 1466 | """ 1467 | db_tiles = all_db_tiles(conn) 1468 | cursor = conn.cursor() 1469 | untracked_tiles = 0 1470 | untracked_subregions = 0 1471 | untracked_utms = 0 1472 | for fields in db_tiles: 1473 | if (fields["geotiff_disk"] and os.path.isfile(os.path.join(project_dir, fields["geotiff_disk"])) == False) or (fields["rat_disk"] and os.path.isfile(os.path.join(project_dir, fields["rat_disk"])) == False): 1474 | cursor.execute( 1475 | "DELETE FROM tiles where tilename = ? RETURNING *", 1476 | (fields["tilename"],), 1477 | ) 1478 | del_tile = cursor.fetchone() 1479 | if del_tile: 1480 | untracked_tiles += 1 1481 | files = ["geotiff_disk", "rat_disk"] 1482 | for file in files: 1483 | try: 1484 | if del_tile[file] and os.path.isfile(os.path.join(project_dir, del_tile[file])): 1485 | os.remove(os.path.join(project_dir, del_tile[file])) 1486 | except (OSError, PermissionError): 1487 | continue 1488 | cursor.execute( 1489 | """DELETE FROM vrt_subregion 1490 | WHERE region NOT IN 1491 | (SELECT subregion 1492 | FROM tiles 1493 | WHERE geotiff_disk is not null 1494 | AND rat_disk is not null) 1495 | RETURNING *;""" 1496 | ) 1497 | del_subregions = cursor.fetchall() 1498 | untracked_subregions += len(del_subregions) 1499 | for del_subregion in del_subregions: 1500 | files = [ 1501 | "res_2_vrt", 1502 | "res_2_ovr", 1503 | "res_4_vrt", 1504 | "res_4_ovr", 1505 | "res_8_vrt", 1506 | "res_8_ovr", 1507 | "complete_vrt", 1508 | "complete_ovr", 1509 | ] 1510 | for file in files: 1511 | try: 1512 | if del_subregion[file] and os.path.isfile(os.path.join(project_dir, del_subregion[file])): 1513 | os.remove(os.path.join(project_dir, del_subregion[file])) 1514 | except (OSError, PermissionError): 1515 | continue 1516 | cursor.execute( 1517 | """DELETE FROM vrt_utm 1518 | WHERE utm NOT IN 1519 | (SELECT utm 1520 | FROM tiles 1521 | WHERE geotiff_disk is not null 1522 | AND rat_disk is not null) 1523 | RETURNING *;""" 1524 | ) 1525 | del_utms = cursor.fetchall() 1526 | untracked_utms += len(del_utms) 1527 | for del_utm in del_utms: 1528 | files = ["utm_vrt", "utm_ovr"] 1529 | for file in files: 1530 | try: 1531 | if (del_utm[file]) and (os.path.isfile(os.path.join(project_dir, del_utm[file]))): 1532 | os.remove(os.path.join(project_dir, del_utm[file])) 1533 | except (OSError, PermissionError): 1534 | continue 1535 | conn.commit() 1536 | return untracked_tiles, untracked_subregions, untracked_utms 1537 | 1538 | 1539 | def main( 1540 | project_dir: str, 1541 | desired_area_filename: str = None, 1542 | untrack_missing: bool = False, 1543 | data_source: str = None, 1544 | ) -> [[str], [str]]: 1545 | """ 1546 | Track tiles. Download tiles. Update already tracked tiles. 1547 | 1548 | Parameters 1549 | ---------- 1550 | project_dir : str 1551 | The directory path to use. Will create if it does not currently exist. 1552 | Required argument. 1553 | desired_area_filename : str 1554 | The geometry file to use to find intersecting available tiles. 1555 | The returned tile ids at the time of intersection will be added to 1556 | tracking. fetch_tiles will stay up to date with the latest data 1557 | available from the NBS for all tracked tiles. This argument is 1558 | not necessary if you do not want to add new tile ids to tracking. 1559 | untrack_missing : bool 1560 | This flag will untrack tiles that have missing files in your local 1561 | download directory. fetch_tiles will no longer retrieve these tiles. 1562 | data_source : str 1563 | The NBS offers various products to different end-users. Some are available publicly. 1564 | Use this argument to identify which product you want. BlueTopo is the default. 1565 | 1566 | Returns 1567 | ------- 1568 | successful_downloads : list 1569 | tiles downloaded. 1570 | list 1571 | tiles not found in s3 or failed during download. 1572 | """ 1573 | project_dir = os.path.expanduser(project_dir) 1574 | if desired_area_filename: 1575 | desired_area_filename = os.path.expanduser(desired_area_filename) 1576 | if os.path.isabs(project_dir) is False or (desired_area_filename and os.path.isabs(desired_area_filename) is False): 1577 | print("Please use an absolute path for your project folder and geometry path.") 1578 | if "windows" not in platform.system().lower(): 1579 | print("Typically for non windows systems this means starting with '/'") 1580 | sys.exit(1) 1581 | 1582 | if data_source is None or data_source.lower() == "bluetopo": 1583 | data_source = "BlueTopo" 1584 | geom_prefix = "BlueTopo/_BlueTopo_Tile_Scheme/BlueTopo_Tile_Scheme" 1585 | tile_prefix = "BlueTopo" 1586 | 1587 | elif data_source.lower() == "modeling": 1588 | data_source = "Modeling" 1589 | geom_prefix = "Test-and-Evaluation/Modeling/_Modeling_Tile_Scheme/Modeling_Tile_Scheme" 1590 | tile_prefix = "Test-and-Evaluation/Modeling" 1591 | 1592 | elif data_source.lower() == "bag": 1593 | data_source = "BAG" 1594 | geom_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/_Navigation_Tile_Scheme/Navigation_Tile_Scheme" 1595 | tile_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/BAG" 1596 | 1597 | elif data_source.lower() == "s102v21": 1598 | data_source = "S102V21" 1599 | geom_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/_Navigation_Tile_Scheme/Navigation_Tile_Scheme" 1600 | xml_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/S102V21/_CATALOG" 1601 | tile_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/S102V21" 1602 | 1603 | elif data_source.lower() == "s102v22": 1604 | data_source = "S102V22" 1605 | geom_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/_Navigation_Tile_Scheme/Navigation_Tile_Scheme" 1606 | xml_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/S102V22/_CATALOG" 1607 | tile_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/S102V22" 1608 | 1609 | 1610 | elif os.path.isdir(data_source): 1611 | geom_prefix = data_source 1612 | files = os.listdir(geom_prefix) 1613 | files = [file for file in files if file.endswith(".gpkg") and "Tile_Scheme" in file] 1614 | files.sort(reverse=True) 1615 | data_source = None 1616 | for file in files: 1617 | ds_basefile = os.path.basename(file) 1618 | data_source = ds_basefile.split("_")[0] 1619 | break 1620 | if data_source is None: 1621 | raise ValueError(f"Please pass in directory which contains a tile scheme file if you're using a local data source.") 1622 | tile_prefix = "Local" 1623 | 1624 | else: 1625 | raise ValueError(f"Invalid data source: {data_source}") 1626 | 1627 | start = datetime.datetime.now() 1628 | print(f"[{start.strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Beginning work in project folder: {project_dir}") 1629 | if not os.path.exists(project_dir): 1630 | os.makedirs(project_dir) 1631 | 1632 | if data_source.lower() in ("bag", "s102v21"): 1633 | conn = connect_to_survey_registry_pmn1(project_dir, data_source) 1634 | elif data_source.lower() in ("s102v22"): 1635 | conn = connect_to_survey_registry_pmn2(project_dir, data_source) 1636 | else: 1637 | conn = connect_to_survey_registry(project_dir, data_source) 1638 | 1639 | if data_source.lower() in ("s102v21", "s102v22"): 1640 | get_xml(conn, project_dir, xml_prefix, data_source) 1641 | 1642 | if data_source.lower() in ("bag", "s102v21", "s102v22"): 1643 | geom_file = get_tessellation_pmn(conn, project_dir, geom_prefix, data_source) 1644 | else: 1645 | geom_file = get_tessellation(conn, project_dir, geom_prefix, data_source) 1646 | 1647 | if untrack_missing: 1648 | untracked_tiles, untracked_sr, untracked_utms = sweep_files(conn, project_dir) 1649 | print(f"Untracked {untracked_tiles} tile(s), " f"{untracked_sr} subregion vrt(s), " f"{untracked_utms} utm vrt(s)") 1650 | 1651 | if desired_area_filename: 1652 | if not os.path.isfile(desired_area_filename): 1653 | raise ValueError(f"The geometry {desired_area_filename} for " "determining what to download does not exist.") 1654 | if data_source.lower() in ("bag", "s102v21", "s102v22"): 1655 | tile_list = get_tile_list(desired_area_filename, geom_file) 1656 | available_tile_count = insert_new_pmn(conn, tile_list, data_source) 1657 | else: 1658 | tile_list = get_tile_list(desired_area_filename, geom_file) 1659 | available_tile_count = insert_new(conn, tile_list) 1660 | print(f"\nTracking {available_tile_count} available {data_source} tile(s) " f"discovered in a total of {len(tile_list)} intersected tile(s) " "with given polygon.") 1661 | 1662 | if data_source.lower() in ("bag", "s102v21", "s102v22"): 1663 | upsert_tiles_pmn(conn, project_dir, geom_file, data_source) 1664 | else: 1665 | upsert_tiles(conn, project_dir, geom_file) 1666 | 1667 | if data_source.lower() in ("bag", "s102v21", "s102v22"): 1668 | ( 1669 | tiles_found, 1670 | tiles_not_found, 1671 | successful_downloads, 1672 | failed_downloads, 1673 | existing_tiles, 1674 | missing_tiles, 1675 | failed_verifications, 1676 | new_tile_list, 1677 | ) = download_tiles_pmn(conn, project_dir, tile_prefix, data_source) 1678 | else: 1679 | ( 1680 | tiles_found, 1681 | tiles_not_found, 1682 | successful_downloads, 1683 | failed_downloads, 1684 | existing_tiles, 1685 | missing_tiles, 1686 | failed_verifications, 1687 | new_tile_list, 1688 | ) = download_tiles(conn, project_dir, tile_prefix, data_source) 1689 | print("\n___________________________________ SUMMARY ___________________________________") 1690 | print("\nExisting:") 1691 | print( 1692 | "Number of tiles already existing locally without updates:", 1693 | len(existing_tiles), 1694 | ) 1695 | if new_tile_list or missing_tiles: 1696 | print("\nSearch:") 1697 | print(f"Number of tiles to attempt to fetch: {len(new_tile_list) + len(missing_tiles)} [ {len(new_tile_list)} new data + {len(missing_tiles)} missing locally ]") 1698 | if len(tiles_found) < (len(new_tile_list) + len(missing_tiles)): 1699 | print("* Some tiles we wanted to fetch were not found in the S3 bucket." "\n* The NBS may be actively updating the tiles in question." "\n* You can rerun fetch_tiles at a later time to download these tiles." "\n* Please contact the NBS if this issue does not fix itself on subsequent later runs.") 1700 | print("\nFetch:") 1701 | print(f"Number of tiles found in S3 successfully downloaded: {len(successful_downloads)}/{len(tiles_found)}") 1702 | if len(failed_downloads): 1703 | print("* Some tiles appear to have failed downloading." "\n* Please rerun fetch_tiles to retry.") 1704 | if len(failed_verifications): 1705 | print(f"{len(failed_verifications)} tiles failed checksum verification: {failed_verifications}" f"\nPlease contact the NBS if this issue does not fix itself on subsequent runs.") 1706 | print(f"\n[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Operation complete after {datetime.datetime.now() - start}") 1707 | return successful_downloads, list(set(tiles_not_found + failed_downloads)) -------------------------------------------------------------------------------- /nbs/bluetopo/core/build_vrt.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import copy 3 | import datetime 4 | import os 5 | import platform 6 | import shutil 7 | import sqlite3 8 | import sys 9 | 10 | import numpy as np 11 | from osgeo import gdal 12 | 13 | gdal.UseExceptions() 14 | gdal.SetConfigOption("COMPRESS_OVERVIEW", "DEFLATE") 15 | gdal.SetConfigOption("GDAL_NUM_THREADS", "ALL_CPUS") 16 | 17 | # refactor duplicate functions 18 | 19 | def connect_to_survey_registry_pmn2(project_dir: str, data_source: str) -> sqlite3.Connection: 20 | """ 21 | Create new or connect to existing SQLite database. 22 | 23 | Parameters 24 | ---------- 25 | project_dir : str 26 | destination directory for project. 27 | data_source : str 28 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 29 | 30 | Returns 31 | ------- 32 | conn : sqlite3.Connection 33 | connection to SQLite database. 34 | """ 35 | catalog_fields = {"file": "text", "location": "text", "downloaded": "text"} 36 | vrt_subregion_fields = {"region": "text", "utm": "text", 37 | "res_2_subdataset1_vrt": "text", "res_2_subdataset1_ovr": "text", 38 | "res_2_subdataset2_vrt": "text", "res_2_subdataset2_ovr": "text", 39 | "res_4_subdataset1_vrt": "text", "res_4_subdataset1_ovr": "text", 40 | "res_4_subdataset2_vrt": "text", "res_4_subdataset2_ovr": "text", 41 | "res_8_subdataset1_vrt": "text", "res_8_subdataset1_ovr": "text", 42 | "res_8_subdataset2_vrt": "text", "res_8_subdataset2_ovr": "text", 43 | "complete_subdataset1_vrt": "text", "complete_subdataset1_ovr": "text", 44 | "complete_subdataset2_vrt": "text", "complete_subdataset2_ovr": "text", 45 | "built_subdataset1": "integer", 46 | "built_subdataset2": "integer"} 47 | vrt_utm_fields = {"utm": "text", 48 | "utm_subdataset1_vrt": "text", "utm_subdataset1_ovr": "text", 49 | "utm_subdataset2_vrt": "text", "utm_subdataset2_ovr": "text", 50 | "utm_combined_vrt": "text", 51 | "built_subdataset1": "integer", 52 | "built_subdataset2": "integer", 53 | "built_combined": "integer"} 54 | vrt_tiles = {"tilename": "text", 55 | "file_link": "text", 56 | "delivered_date": "text", "resolution": "text", 57 | "utm": "text", "subregion": "text", 58 | "file_disk": "text", 59 | "file_sha256_checksum": "text", 60 | "file_verified": "text"} 61 | database_path = os.path.join(project_dir, f"{data_source.lower()}_registry.db") 62 | conn = None 63 | try: 64 | conn = sqlite3.connect(database_path) 65 | conn.row_factory = sqlite3.Row 66 | except sqlite3.Error as e: 67 | print("Failed to establish SQLite database connection.") 68 | raise e 69 | if conn is not None: 70 | try: 71 | cursor = conn.cursor() 72 | cursor.execute( 73 | """ 74 | CREATE TABLE IF NOT EXISTS catalog ( 75 | file text PRIMARY KEY 76 | ); 77 | """ 78 | ) 79 | cursor.execute( 80 | """ 81 | CREATE TABLE IF NOT EXISTS vrt_subregion ( 82 | region text PRIMARY KEY 83 | ); 84 | """ 85 | ) 86 | cursor.execute( 87 | """ 88 | CREATE TABLE IF NOT EXISTS vrt_utm ( 89 | utm text PRIMARY KEY 90 | ); 91 | """ 92 | ) 93 | cursor.execute( 94 | """ 95 | CREATE TABLE IF NOT EXISTS tiles ( 96 | tilename text PRIMARY KEY 97 | ); 98 | """ 99 | ) 100 | conn.commit() 101 | cursor.execute("SELECT name FROM pragma_table_info('catalog')") 102 | tileset_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 103 | cursor.execute("SELECT name FROM pragma_table_info('vrt_subregion')") 104 | vrt_subregion_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 105 | cursor.execute("SELECT name FROM pragma_table_info('vrt_utm')") 106 | vrt_utm_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 107 | cursor.execute("SELECT name FROM pragma_table_info('tiles')") 108 | tiles_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 109 | for field in catalog_fields: 110 | if field not in tileset_existing_fields: 111 | cursor.execute(f"ALTER TABLE catalog ADD COLUMN {field} {catalog_fields[field]}") 112 | conn.commit() 113 | for field in vrt_subregion_fields: 114 | if field not in vrt_subregion_existing_fields: 115 | cursor.execute(f"ALTER TABLE vrt_subregion ADD COLUMN {field} {vrt_subregion_fields[field]}") 116 | conn.commit() 117 | for field in vrt_utm_fields: 118 | if field not in vrt_utm_existing_fields: 119 | cursor.execute(f"ALTER TABLE vrt_utm ADD COLUMN {field} {vrt_utm_fields[field]}") 120 | conn.commit() 121 | for field in vrt_tiles: 122 | if field not in tiles_existing_fields: 123 | cursor.execute(f"ALTER TABLE tiles ADD COLUMN {field} {vrt_tiles[field]}") 124 | conn.commit() 125 | except sqlite3.Error as e: 126 | print("Failed to create SQLite tables.") 127 | raise e 128 | return conn 129 | 130 | 131 | def connect_to_survey_registry_pmn1(project_dir: str, data_source: str) -> sqlite3.Connection: 132 | """ 133 | Create new or connect to existing SQLite database. 134 | 135 | Parameters 136 | ---------- 137 | project_dir : str 138 | destination directory for project. 139 | data_source : str 140 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 141 | 142 | Returns 143 | ------- 144 | conn : sqlite3.Connection 145 | connection to SQLite database. 146 | """ 147 | catalog_fields = {"file": "text", "location": "text", "downloaded": "text"} 148 | vrt_subregion_fields = {"region": "text", "utm": "text", 149 | "res_2_vrt": "text", "res_2_ovr": "text", 150 | "res_4_vrt": "text", "res_4_ovr": "text", 151 | "res_8_vrt": "text", "res_8_ovr": "text", 152 | "complete_vrt": "text", "complete_ovr": "text", 153 | "built": "integer"} 154 | vrt_utm_fields = {"utm": "text", 155 | "utm_vrt": "text", "utm_ovr": "text", 156 | "built": "integer"} 157 | vrt_tiles = {"tilename": "text", 158 | "file_link": "text", 159 | "delivered_date": "text", "resolution": "text", 160 | "utm": "text", "subregion": "text", 161 | "file_disk": "text", 162 | "file_sha256_checksum": "text", 163 | "file_verified": "text"} 164 | database_path = os.path.join(project_dir, f"{data_source.lower()}_registry.db") 165 | conn = None 166 | try: 167 | conn = sqlite3.connect(database_path) 168 | conn.row_factory = sqlite3.Row 169 | except sqlite3.Error as e: 170 | print("Failed to establish SQLite database connection.") 171 | raise e 172 | if conn is not None: 173 | try: 174 | cursor = conn.cursor() 175 | cursor.execute( 176 | """ 177 | CREATE TABLE IF NOT EXISTS catalog ( 178 | file text PRIMARY KEY 179 | ); 180 | """ 181 | ) 182 | cursor.execute( 183 | """ 184 | CREATE TABLE IF NOT EXISTS vrt_subregion ( 185 | region text PRIMARY KEY 186 | ); 187 | """ 188 | ) 189 | cursor.execute( 190 | """ 191 | CREATE TABLE IF NOT EXISTS vrt_utm ( 192 | utm text PRIMARY KEY 193 | ); 194 | """ 195 | ) 196 | cursor.execute( 197 | """ 198 | CREATE TABLE IF NOT EXISTS tiles ( 199 | tilename text PRIMARY KEY 200 | ); 201 | """ 202 | ) 203 | conn.commit() 204 | cursor.execute("SELECT name FROM pragma_table_info('catalog')") 205 | tileset_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 206 | cursor.execute("SELECT name FROM pragma_table_info('vrt_subregion')") 207 | vrt_subregion_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 208 | cursor.execute("SELECT name FROM pragma_table_info('vrt_utm')") 209 | vrt_utm_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 210 | cursor.execute("SELECT name FROM pragma_table_info('tiles')") 211 | tiles_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 212 | for field in catalog_fields: 213 | if field not in tileset_existing_fields: 214 | cursor.execute(f"ALTER TABLE catalog ADD COLUMN {field} {catalog_fields[field]}") 215 | conn.commit() 216 | for field in vrt_subregion_fields: 217 | if field not in vrt_subregion_existing_fields: 218 | cursor.execute(f"ALTER TABLE vrt_subregion ADD COLUMN {field} {vrt_subregion_fields[field]}") 219 | conn.commit() 220 | for field in vrt_utm_fields: 221 | if field not in vrt_utm_existing_fields: 222 | cursor.execute(f"ALTER TABLE vrt_utm ADD COLUMN {field} {vrt_utm_fields[field]}") 223 | conn.commit() 224 | for field in vrt_tiles: 225 | if field not in tiles_existing_fields: 226 | cursor.execute(f"ALTER TABLE tiles ADD COLUMN {field} {vrt_tiles[field]}") 227 | conn.commit() 228 | except sqlite3.Error as e: 229 | print("Failed to create SQLite tables.") 230 | raise e 231 | return conn 232 | 233 | 234 | def connect_to_survey_registry(project_dir: str, data_source: str) -> sqlite3.Connection: 235 | """ 236 | Create new or connect to existing SQLite database. 237 | 238 | Parameters 239 | ---------- 240 | project_dir : str 241 | destination directory for project. 242 | data_source : str 243 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 244 | 245 | Returns 246 | ------- 247 | conn : sqlite3.Connection 248 | connection to SQLite database. 249 | """ 250 | tileset_fields = {"tilescheme": "text", "location": "text", "downloaded": "text"} 251 | vrt_subregion_fields = {"region": "text", "utm": "text", "res_2_vrt": "text", "res_2_ovr": "text", "res_4_vrt": "text", "res_4_ovr": "text", "res_8_vrt": "text", "res_8_ovr": "text", "complete_vrt": "text", "complete_ovr": "text", "built": "integer"} 252 | vrt_utm_fields = {"utm": "text", "utm_vrt": "text", "utm_ovr": "text", "built": "integer"} 253 | vrt_tiles = {"tilename": "text", "geotiff_link": "text", "rat_link": "text", "delivered_date": "text", "resolution": "text", "utm": "text", "subregion": "text", "geotiff_disk": "text", "rat_disk": "text", "geotiff_sha256_checksum": "text", "rat_sha256_checksum": "text", "geotiff_verified": "text", "rat_verified": "text"} 254 | database_path = os.path.join(project_dir, f"{data_source.lower()}_registry.db") 255 | conn = None 256 | try: 257 | conn = sqlite3.connect(database_path) 258 | conn.row_factory = sqlite3.Row 259 | except sqlite3.Error as e: 260 | print("Failed to establish SQLite database connection.") 261 | raise e 262 | if conn is not None: 263 | try: 264 | cursor = conn.cursor() 265 | cursor.execute( 266 | """ 267 | CREATE TABLE IF NOT EXISTS tileset ( 268 | tilescheme text PRIMARY KEY 269 | ); 270 | """ 271 | ) 272 | cursor.execute( 273 | """ 274 | CREATE TABLE IF NOT EXISTS vrt_subregion ( 275 | region text PRIMARY KEY 276 | ); 277 | """ 278 | ) 279 | cursor.execute( 280 | """ 281 | CREATE TABLE IF NOT EXISTS vrt_utm ( 282 | utm text PRIMARY KEY 283 | ); 284 | """ 285 | ) 286 | cursor.execute( 287 | """ 288 | CREATE TABLE IF NOT EXISTS tiles ( 289 | tilename text PRIMARY KEY 290 | ); 291 | """ 292 | ) 293 | conn.commit() 294 | cursor.execute("SELECT name FROM pragma_table_info('tileset')") 295 | tileset_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 296 | cursor.execute("SELECT name FROM pragma_table_info('vrt_subregion')") 297 | vrt_subregion_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 298 | cursor.execute("SELECT name FROM pragma_table_info('vrt_utm')") 299 | vrt_utm_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 300 | cursor.execute("SELECT name FROM pragma_table_info('tiles')") 301 | tiles_existing_fields = [dict(row)["name"] for row in cursor.fetchall()] 302 | for field in tileset_fields: 303 | if field not in tileset_existing_fields: 304 | cursor.execute(f"ALTER TABLE tileset ADD COLUMN {field} {tileset_fields[field]}") 305 | conn.commit() 306 | for field in vrt_subregion_fields: 307 | if field not in vrt_subregion_existing_fields: 308 | cursor.execute(f"ALTER TABLE vrt_subregion ADD COLUMN {field} {vrt_subregion_fields[field]}") 309 | conn.commit() 310 | for field in vrt_utm_fields: 311 | if field not in vrt_utm_existing_fields: 312 | cursor.execute(f"ALTER TABLE vrt_utm ADD COLUMN {field} {vrt_utm_fields[field]}") 313 | conn.commit() 314 | for field in vrt_tiles: 315 | if field not in tiles_existing_fields: 316 | cursor.execute(f"ALTER TABLE tiles ADD COLUMN {field} {vrt_tiles[field]}") 317 | conn.commit() 318 | except sqlite3.Error as e: 319 | print("Failed to create SQLite tables.") 320 | raise e 321 | return conn 322 | 323 | def build_sub_vrts_pmn( 324 | subregion: str, 325 | subregion_tiles: list, 326 | project_dir: str, 327 | data_source: str, 328 | relative_to_vrt: bool, 329 | ) -> dict: 330 | """ 331 | Build the VRTs of a given subregion. 332 | 333 | Parameters 334 | ---------- 335 | subregion 336 | subregion name. 337 | subregion_tiles 338 | list of tile records belonging to subregion. 339 | project_dir 340 | destination directory for project. 341 | data_source : str 342 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 343 | relative_to_vrt : bool 344 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths. 345 | 346 | Returns 347 | ------- 348 | fields : dict 349 | holds name of subregion and the paths of its VRT and OVR files. 350 | """ 351 | fields = { 352 | "region": subregion["region"], 353 | "res_2_subdataset1_vrt": None, 354 | "res_2_subdataset1_ovr": None, 355 | "res_2_subdataset2_vrt": None, 356 | "res_2_subdataset2_ovr": None, 357 | "res_4_subdataset1_vrt": None, 358 | "res_4_subdataset1_ovr": None, 359 | "res_4_subdataset2_vrt": None, 360 | "res_4_subdataset2_ovr": None, 361 | "res_8_subdataset1_vrt": None, 362 | "res_8_subdataset1_ovr": None, 363 | "res_8_subdataset2_vrt": None, 364 | "res_8_subdataset2_ovr": None, 365 | "complete_subdataset1_vrt": None, 366 | "complete_subdataset1_ovr": None, 367 | "complete_subdataset2_vrt": None, 368 | "complete_subdataset2_ovr": None, 369 | } 370 | rel_dir = os.path.join(f"{data_source}_VRT", subregion["region"]) 371 | subregion_dir = os.path.join(project_dir, rel_dir) 372 | try: 373 | if os.path.isdir(subregion_dir): 374 | shutil.rmtree(subregion_dir) 375 | except (OSError, PermissionError) as e: 376 | print(f"Failed to remove older vrt files for {subregion['region']}\n" "Please close all files and attempt again") 377 | sys.exit(1) 378 | if not os.path.exists(subregion_dir): 379 | os.makedirs(subregion_dir) 380 | resolution_tiles = collections.defaultdict(list) 381 | for subregion_tile in subregion_tiles: 382 | resolution_tiles[subregion_tile["resolution"]].append(subregion_tile) 383 | vrt_subdataset1_list = [] 384 | vrt_subdataset2_list = [] 385 | for res, tiles in resolution_tiles.items(): 386 | print(f"Building {subregion['region']} band {res}...") 387 | rel_subdataset1_path = os.path.join(rel_dir, subregion["region"] + f"_{res}_BathymetryCoverage.vrt") 388 | res_subdataset1_vrt = os.path.join(project_dir, rel_subdataset1_path) 389 | rel_subdataset2_path = os.path.join(rel_dir, subregion["region"] + f"_{res}_QualityOfSurvey.vrt") 390 | res_subdataset2_vrt = os.path.join(project_dir, rel_subdataset2_path) 391 | tiffs_subdataset1 = [os.path.join(project_dir, tile["file_disk"]) for tile in tiles] 392 | tiffs_subdataset2 = [] 393 | for tile in tiles: 394 | fpath = os.path.join(project_dir, f'{tile["file_disk"]}').replace("\\", "/") 395 | if os.path.join(project_dir, f'{tile["file_disk"]}').startswith('/') and os.path.join(project_dir, f'{tile["file_disk"]}').startswith('//') is False: 396 | tiffs_subdataset2.append('S102:"/' + fpath + '":QualityOfSurvey') 397 | else: 398 | tiffs_subdataset2.append('S102:"' + fpath + '":QualityOfSurvey') 399 | # tiffs_subdataset2 = ['S102:/' + os.path.join(project_dir, f'{tile["file_disk"]}') + ':QualityOfSurvey' for tile in tiles] 400 | # revisit levels 401 | if "2" in res: 402 | create_vrt_pmn(tiffs_subdataset1, res_subdataset1_vrt, [2, 4], relative_to_vrt, subdataset = 1) 403 | vrt_subdataset1_list.append(res_subdataset1_vrt) 404 | create_vrt_pmn(tiffs_subdataset2, res_subdataset2_vrt, [2, 4], relative_to_vrt, subdataset = 2) 405 | vrt_subdataset2_list.append(res_subdataset2_vrt) 406 | fields["res_2_subdataset1_vrt"] = rel_subdataset1_path 407 | fields["res_2_subdataset2_vrt"] = rel_subdataset2_path 408 | if os.path.isfile(os.path.join(project_dir, fields["res_2_subdataset1_vrt"] + ".ovr")): 409 | fields["res_2_subdataset1_ovr"] = rel_subdataset1_path + ".ovr" 410 | if os.path.isfile(os.path.join(project_dir, fields["res_2_subdataset2_vrt"] + ".ovr")): 411 | fields["res_2_subdataset2_ovr"] = rel_subdataset2_path + ".ovr" 412 | if "4" in res: 413 | create_vrt_pmn(tiffs_subdataset1, res_subdataset1_vrt, [4, 8], relative_to_vrt, subdataset = 1) 414 | vrt_subdataset1_list.append(res_subdataset1_vrt) 415 | create_vrt_pmn(tiffs_subdataset2, res_subdataset2_vrt, [4, 8], relative_to_vrt, subdataset = 2) 416 | vrt_subdataset2_list.append(res_subdataset2_vrt) 417 | fields["res_4_subdataset1_vrt"] = rel_subdataset1_path 418 | fields["res_4_subdataset2_vrt"] = rel_subdataset2_path 419 | if os.path.isfile(os.path.join(project_dir, fields["res_4_subdataset1_vrt"] + ".ovr")): 420 | fields["res_4_subdataset1_ovr"] = rel_subdataset1_path + ".ovr" 421 | if os.path.isfile(os.path.join(project_dir, fields["res_4_subdataset2_vrt"] + ".ovr")): 422 | fields["res_4_subdataset2_ovr"] = rel_subdataset2_path + ".ovr" 423 | if "8" in res: 424 | create_vrt_pmn(tiffs_subdataset1, res_subdataset1_vrt, [8], relative_to_vrt, subdataset = 1) 425 | vrt_subdataset1_list.append(res_subdataset1_vrt) 426 | create_vrt_pmn(tiffs_subdataset2, res_subdataset2_vrt, [8], relative_to_vrt, subdataset = 2) 427 | vrt_subdataset2_list.append(res_subdataset2_vrt) 428 | fields["res_8_subdataset1_vrt"] = rel_subdataset1_path 429 | fields["res_8_subdataset2_vrt"] = rel_subdataset2_path 430 | if os.path.isfile(os.path.join(project_dir, fields["res_8_subdataset1_vrt"] + ".ovr")): 431 | fields["res_8_subdataset1_ovr"] = rel_subdataset1_path + ".ovr" 432 | if os.path.isfile(os.path.join(project_dir, fields["res_8_subdataset2_vrt"] + ".ovr")): 433 | fields["res_8_subdataset2_ovr"] = rel_subdataset2_path + ".ovr" 434 | if "16" in res: 435 | vrt_subdataset1_list.extend(tiffs_subdataset1) 436 | vrt_subdataset2_list.extend(tiffs_subdataset2) 437 | rel_subdataset1_path = os.path.join(rel_dir, subregion["region"] + "_complete_BathymetryCoverage.vrt") 438 | complete_subdataset1_vrt = os.path.join(project_dir, rel_subdataset1_path) 439 | rel_subdataset2_path = os.path.join(rel_dir, subregion["region"] + "_complete_QualityOfSurvey.vrt") 440 | complete_subdataset2_vrt = os.path.join(project_dir, rel_subdataset2_path) 441 | create_vrt_pmn(vrt_subdataset1_list, complete_subdataset1_vrt, [16], relative_to_vrt, subdataset = 1) 442 | create_vrt_pmn(vrt_subdataset2_list, complete_subdataset2_vrt, [16], relative_to_vrt, subdataset = 2) 443 | fields["complete_subdataset1_vrt"] = rel_subdataset1_path 444 | if os.path.isfile(os.path.join(project_dir, fields["complete_subdataset1_vrt"] + ".ovr")): 445 | fields["complete_subdataset1_ovr"] = rel_subdataset1_path + ".ovr" 446 | fields["complete_subdataset2_vrt"] = rel_subdataset2_path 447 | if os.path.isfile(os.path.join(project_dir, fields["complete_subdataset2_vrt"] + ".ovr")): 448 | fields["complete_subdataset2_ovr"] = rel_subdataset2_path + ".ovr" 449 | return fields 450 | 451 | def build_sub_vrts_pmn1( 452 | subregion: str, 453 | subregion_tiles: list, 454 | project_dir: str, 455 | data_source: str, 456 | relative_to_vrt: bool, 457 | ) -> dict: 458 | """ 459 | Build the VRTs of a given subregion. 460 | 461 | Parameters 462 | ---------- 463 | subregion 464 | subregion name. 465 | subregion_tiles 466 | list of tile records belonging to subregion. 467 | project_dir 468 | destination directory for project. 469 | data_source : str 470 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 471 | relative_to_vrt : bool 472 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths. 473 | 474 | Returns 475 | ------- 476 | fields : dict 477 | holds name of subregion and the paths of its VRT and OVR files. 478 | """ 479 | fields = { 480 | "region": subregion["region"], 481 | "res_2_vrt": None, 482 | "res_2_ovr": None, 483 | "res_4_vrt": None, 484 | "res_4_ovr": None, 485 | "res_8_vrt": None, 486 | "res_8_ovr": None, 487 | "complete_vrt": None, 488 | "complete_ovr": None, 489 | } 490 | rel_dir = os.path.join(f"{data_source}_VRT", subregion["region"]) 491 | subregion_dir = os.path.join(project_dir, rel_dir) 492 | try: 493 | if os.path.isdir(subregion_dir): 494 | shutil.rmtree(subregion_dir) 495 | except (OSError, PermissionError) as e: 496 | print(f"Failed to remove older vrt files for {subregion['region']}\n" "Please close all files and attempt again") 497 | sys.exit(1) 498 | if not os.path.exists(subregion_dir): 499 | os.makedirs(subregion_dir) 500 | resolution_tiles = collections.defaultdict(list) 501 | for subregion_tile in subregion_tiles: 502 | resolution_tiles[subregion_tile["resolution"]].append(subregion_tile) 503 | vrt_list = [] 504 | for res, tiles in resolution_tiles.items(): 505 | print(f"Building {subregion['region']} band {res}...") 506 | rel_path = os.path.join(rel_dir, subregion["region"] + f"_{res}.vrt") 507 | res_vrt = os.path.join(project_dir, rel_path) 508 | tiffs = [os.path.join(project_dir, tile["file_disk"]) for tile in tiles] 509 | # revisit levels 510 | if "2" in res: 511 | create_vrt_pmn1(tiffs, res_vrt, [2, 4], relative_to_vrt) 512 | vrt_list.append(res_vrt) 513 | fields["res_2_vrt"] = rel_path 514 | if os.path.isfile(os.path.join(project_dir, fields["res_2_vrt"] + ".ovr")): 515 | fields["res_2_ovr"] = rel_path + ".ovr" 516 | if "4" in res: 517 | create_vrt_pmn1(tiffs, res_vrt, [4, 8], relative_to_vrt) 518 | vrt_list.append(res_vrt) 519 | fields["res_4_vrt"] = rel_path 520 | if os.path.isfile(os.path.join(project_dir, fields["res_4_vrt"] + ".ovr")): 521 | fields["res_4_ovr"] = rel_path + ".ovr" 522 | if "8" in res: 523 | create_vrt_pmn1(tiffs, res_vrt, [8], relative_to_vrt) 524 | vrt_list.append(res_vrt) 525 | fields["res_8_vrt"] = rel_path 526 | if os.path.isfile(os.path.join(project_dir, fields["res_8_vrt"] + ".ovr")): 527 | fields["res_8_ovr"] = rel_path + ".ovr" 528 | if "16" in res: 529 | vrt_list.extend(tiffs) 530 | rel_path = os.path.join(rel_dir, subregion["region"] + "_complete.vrt") 531 | complete_vrt = os.path.join(project_dir, rel_path) 532 | create_vrt_pmn1(vrt_list, complete_vrt, [16], relative_to_vrt) 533 | fields["complete_vrt"] = rel_path 534 | if os.path.isfile(os.path.join(project_dir, fields["complete_vrt"] + ".ovr")): 535 | fields["complete_ovr"] = rel_path + ".ovr" 536 | return fields 537 | 538 | 539 | def build_sub_vrts( 540 | subregion: str, 541 | subregion_tiles: list, 542 | project_dir: str, 543 | data_source: str, 544 | relative_to_vrt: bool, 545 | ) -> dict: 546 | """ 547 | Build the VRTs of a given subregion. 548 | 549 | Parameters 550 | ---------- 551 | subregion 552 | subregion name. 553 | subregion_tiles 554 | list of tile records belonging to subregion. 555 | project_dir 556 | destination directory for project. 557 | data_source : str 558 | the data source for the project e.g. 'BlueTopo' or 'Modeling'. 559 | relative_to_vrt : bool 560 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths. 561 | 562 | Returns 563 | ------- 564 | fields : dict 565 | holds name of subregion and the paths of its VRT and OVR files. 566 | """ 567 | fields = { 568 | "region": subregion["region"], 569 | "res_2_vrt": None, 570 | "res_2_ovr": None, 571 | "res_4_vrt": None, 572 | "res_4_ovr": None, 573 | "res_8_vrt": None, 574 | "res_8_ovr": None, 575 | "complete_vrt": None, 576 | "complete_ovr": None, 577 | } 578 | rel_dir = os.path.join(f"{data_source}_VRT", subregion["region"]) 579 | subregion_dir = os.path.join(project_dir, rel_dir) 580 | try: 581 | if os.path.isdir(subregion_dir): 582 | shutil.rmtree(subregion_dir) 583 | except (OSError, PermissionError) as e: 584 | print(f"Failed to remove older vrt files for {subregion['region']}\n" "Please close all files and attempt again") 585 | sys.exit(1) 586 | if not os.path.exists(subregion_dir): 587 | os.makedirs(subregion_dir) 588 | resolution_tiles = collections.defaultdict(list) 589 | for subregion_tile in subregion_tiles: 590 | resolution_tiles[subregion_tile["resolution"]].append(subregion_tile) 591 | vrt_list = [] 592 | for res, tiles in resolution_tiles.items(): 593 | print(f"Building {subregion['region']} band {res}...") 594 | rel_path = os.path.join(rel_dir, subregion["region"] + f"_{res}.vrt") 595 | res_vrt = os.path.join(project_dir, rel_path) 596 | tiffs = [os.path.join(project_dir, tile["geotiff_disk"]) for tile in tiles] 597 | # revisit levels 598 | if "2" in res: 599 | create_vrt(tiffs, res_vrt, [2, 4], relative_to_vrt) 600 | vrt_list.append(res_vrt) 601 | fields["res_2_vrt"] = rel_path 602 | if os.path.isfile(os.path.join(project_dir, fields["res_2_vrt"] + ".ovr")): 603 | fields["res_2_ovr"] = rel_path + ".ovr" 604 | if "4" in res: 605 | create_vrt(tiffs, res_vrt, [4, 8], relative_to_vrt) 606 | vrt_list.append(res_vrt) 607 | fields["res_4_vrt"] = rel_path 608 | if os.path.isfile(os.path.join(project_dir, fields["res_4_vrt"] + ".ovr")): 609 | fields["res_4_ovr"] = rel_path + ".ovr" 610 | if "8" in res: 611 | create_vrt(tiffs, res_vrt, [8], relative_to_vrt) 612 | vrt_list.append(res_vrt) 613 | fields["res_8_vrt"] = rel_path 614 | if os.path.isfile(os.path.join(project_dir, fields["res_8_vrt"] + ".ovr")): 615 | fields["res_8_ovr"] = rel_path + ".ovr" 616 | if "16" in res: 617 | vrt_list.extend(tiffs) 618 | rel_path = os.path.join(rel_dir, subregion["region"] + "_complete.vrt") 619 | complete_vrt = os.path.join(project_dir, rel_path) 620 | create_vrt(vrt_list, complete_vrt, [16], relative_to_vrt) 621 | fields["complete_vrt"] = rel_path 622 | if os.path.isfile(os.path.join(project_dir, fields["complete_vrt"] + ".ovr")): 623 | fields["complete_ovr"] = rel_path + ".ovr" 624 | return fields 625 | 626 | 627 | def combine_vrts(files: list, vrt_path: str, relative_to_vrt: bool) -> None: 628 | """ 629 | Build VRT from files. 630 | 631 | Parameters 632 | ---------- 633 | files 634 | list of the file paths to include in the vrt. 635 | vrt_path 636 | output vrt path. 637 | levels 638 | list of overview levels to be built with the vrt. 639 | relative_to_vrt : bool 640 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths. 641 | """ 642 | # not efficient but insignificant 643 | files = copy.deepcopy(files) 644 | try: 645 | if os.path.isfile(vrt_path): 646 | os.remove(vrt_path) 647 | if os.path.isfile(vrt_path + ".ovr"): 648 | os.remove(vrt_path + ".ovr") 649 | except (OSError, PermissionError) as e: 650 | print(f"Failed to remove older vrt files for {vrt_path}\n" "Please close all files and attempt again") 651 | sys.exit(1) 652 | vrt_options = gdal.BuildVRTOptions(options='-separate -allow_projection_difference', resampleAlg="near", resolution="highest") 653 | cwd = os.getcwd() 654 | try: 655 | os.chdir(os.path.dirname(vrt_path)) 656 | if relative_to_vrt is True: 657 | for idx in range(len(files)): 658 | files[idx] = os.path.relpath(files[idx], os.path.dirname(vrt_path)) 659 | relative_vrt_path = os.path.relpath(vrt_path, os.getcwd()) 660 | vrt = gdal.BuildVRT(relative_vrt_path, files, options=vrt_options) 661 | band1 = vrt.GetRasterBand(1) 662 | band1.SetDescription("Elevation") 663 | band2 = vrt.GetRasterBand(2) 664 | band2.SetDescription("Uncertainty") 665 | band3 = vrt.GetRasterBand(3) 666 | band3.SetDescription("QualityOfSurvey") 667 | vrt = None 668 | except: 669 | raise RuntimeError(f"VRT failed to build for {vrt_path}") 670 | finally: 671 | os.chdir(cwd) 672 | 673 | 674 | def create_vrt_pmn(files: list, vrt_path: str, levels: list, relative_to_vrt: bool, subdataset: int) -> None: 675 | """ 676 | Build VRT from files. 677 | 678 | Parameters 679 | ---------- 680 | files 681 | list of the file paths to include in the vrt. 682 | vrt_path 683 | output vrt path. 684 | levels 685 | list of overview levels to be built with the vrt. 686 | relative_to_vrt : bool 687 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths. 688 | """ 689 | # not efficient but insignificant 690 | files = copy.deepcopy(files) 691 | try: 692 | if os.path.isfile(vrt_path): 693 | os.remove(vrt_path) 694 | if os.path.isfile(vrt_path + ".ovr"): 695 | os.remove(vrt_path + ".ovr") 696 | except (OSError, PermissionError) as e: 697 | print(f"Failed to remove older vrt files for {vrt_path}\n" "Please close all files and attempt again") 698 | sys.exit(1) 699 | vrt_options = gdal.BuildVRTOptions(options='-allow_projection_difference', resampleAlg="near", resolution="highest") 700 | cwd = os.getcwd() 701 | try: 702 | os.chdir(os.path.dirname(vrt_path)) 703 | if relative_to_vrt is True: 704 | for idx in range(len(files)): 705 | if 'S102:' in files[idx]: 706 | continue 707 | else: 708 | files[idx] = os.path.relpath(files[idx], os.path.dirname(vrt_path)) 709 | relative_vrt_path = os.path.relpath(vrt_path, os.getcwd()) 710 | vrt = gdal.BuildVRT(relative_vrt_path, files, options=vrt_options) 711 | if subdataset == 1: 712 | band1 = vrt.GetRasterBand(1) 713 | band1.SetDescription("Elevation") 714 | band2 = vrt.GetRasterBand(2) 715 | band2.SetDescription("Uncertainty") 716 | if subdataset == 2: 717 | band1 = vrt.GetRasterBand(1) 718 | band1.SetDescription("QualityOfSurvey") 719 | vrt = None 720 | except: 721 | raise RuntimeError(f"VRT failed to build for {vrt_path}") 722 | finally: 723 | os.chdir(cwd) 724 | vrt = gdal.Open(vrt_path, 0) 725 | vrt.BuildOverviews("NEAREST", levels) 726 | vrt = None 727 | 728 | def create_vrt_pmn1(files: list, vrt_path: str, levels: list, relative_to_vrt: bool) -> None: 729 | """ 730 | Build VRT from files. 731 | 732 | Parameters 733 | ---------- 734 | files 735 | list of the file paths to include in the vrt. 736 | vrt_path 737 | output vrt path. 738 | levels 739 | list of overview levels to be built with the vrt. 740 | relative_to_vrt : bool 741 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths. 742 | """ 743 | # not efficient but insignificant 744 | files = copy.deepcopy(files) 745 | try: 746 | if os.path.isfile(vrt_path): 747 | os.remove(vrt_path) 748 | if os.path.isfile(vrt_path + ".ovr"): 749 | os.remove(vrt_path + ".ovr") 750 | except (OSError, PermissionError) as e: 751 | print(f"Failed to remove older vrt files for {vrt_path}\n" "Please close all files and attempt again") 752 | sys.exit(1) 753 | vrt_options = gdal.BuildVRTOptions(options='-allow_projection_difference', resampleAlg="near", resolution="highest") 754 | cwd = os.getcwd() 755 | try: 756 | os.chdir(os.path.dirname(vrt_path)) 757 | if relative_to_vrt is True: 758 | for idx in range(len(files)): 759 | files[idx] = os.path.relpath(files[idx], os.path.dirname(vrt_path)) 760 | relative_vrt_path = os.path.relpath(vrt_path, os.getcwd()) 761 | vrt = gdal.BuildVRT(relative_vrt_path, files, options=vrt_options) 762 | band1 = vrt.GetRasterBand(1) 763 | band1.SetDescription("Elevation") 764 | band2 = vrt.GetRasterBand(2) 765 | band2.SetDescription("Uncertainty") 766 | vrt = None 767 | except: 768 | raise RuntimeError(f"VRT failed to build for {vrt_path}") 769 | finally: 770 | os.chdir(cwd) 771 | vrt = gdal.Open(vrt_path, 0) 772 | vrt.BuildOverviews("NEAREST", levels) 773 | vrt = None 774 | 775 | def create_vrt(files: list, vrt_path: str, levels: list, relative_to_vrt: bool) -> None: 776 | """ 777 | Build VRT from files. 778 | 779 | Parameters 780 | ---------- 781 | files 782 | list of the file paths to include in the vrt. 783 | vrt_path 784 | output vrt path. 785 | levels 786 | list of overview levels to be built with the vrt. 787 | relative_to_vrt : bool 788 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths. 789 | """ 790 | # not efficient but insignificant 791 | files = copy.deepcopy(files) 792 | try: 793 | if os.path.isfile(vrt_path): 794 | os.remove(vrt_path) 795 | if os.path.isfile(vrt_path + ".ovr"): 796 | os.remove(vrt_path + ".ovr") 797 | except (OSError, PermissionError) as e: 798 | print(f"Failed to remove older vrt files for {vrt_path}\n" "Please close all files and attempt again") 799 | sys.exit(1) 800 | vrt_options = gdal.BuildVRTOptions(options='-allow_projection_difference', resampleAlg="near", resolution="highest") 801 | cwd = os.getcwd() 802 | try: 803 | os.chdir(os.path.dirname(vrt_path)) 804 | if relative_to_vrt is True: 805 | for idx in range(len(files)): 806 | files[idx] = os.path.relpath(files[idx], os.path.dirname(vrt_path)) 807 | relative_vrt_path = os.path.relpath(vrt_path, os.getcwd()) 808 | vrt = gdal.BuildVRT(relative_vrt_path, files, options=vrt_options) 809 | band1 = vrt.GetRasterBand(1) 810 | band1.SetDescription("Elevation") 811 | band2 = vrt.GetRasterBand(2) 812 | band2.SetDescription("Uncertainty") 813 | band3 = vrt.GetRasterBand(3) 814 | band3.SetDescription("Contributor") 815 | vrt = None 816 | except: 817 | raise RuntimeError(f"VRT failed to build for {vrt_path}") 818 | finally: 819 | os.chdir(cwd) 820 | vrt = gdal.Open(vrt_path, 0) 821 | vrt.BuildOverviews("NEAREST", levels) 822 | vrt = None 823 | 824 | 825 | def add_vrt_rat_pmn(conn: sqlite3.Connection, utm: str, project_dir: str, vrt_path: str, data_source: str) -> None: 826 | """ 827 | Create a raster attribute table for the VRT. 828 | 829 | Parameters 830 | ---------- 831 | conn : sqlite3.Connection 832 | database connection object. 833 | utm : str 834 | utm zone of the VRT. 835 | project_dir : str 836 | destination directory for project. 837 | vrt_path : str 838 | path to the VRT to which to add the raster attribute table. 839 | data_source : str 840 | The NBS offers various products to different end-users. Some are available publicly. 841 | Use this argument to identify which product you want. BlueTopo is the default. 842 | """ 843 | expected_fields = dict( 844 | value=[int, gdal.GFU_MinMax], 845 | count=[int, gdal.GFU_PixelCount], 846 | data_assessment=[int, gdal.GFU_Generic], 847 | feature_least_depth=[float, gdal.GFU_Generic], 848 | significant_features=[float, gdal.GFU_Generic], 849 | feature_size=[float, gdal.GFU_Generic], 850 | coverage=[int, gdal.GFU_Generic], 851 | bathy_coverage=[int, gdal.GFU_Generic], 852 | horizontal_uncert_fixed=[float, gdal.GFU_Generic], 853 | horizontal_uncert_var=[float, gdal.GFU_Generic], 854 | vertical_uncert_fixed=[float, gdal.GFU_Generic], 855 | vertical_uncert_var=[float, gdal.GFU_Generic], 856 | license_name=[str, gdal.GFU_Generic], 857 | license_url=[str, gdal.GFU_Generic], 858 | source_survey_id=[str, gdal.GFU_Generic], 859 | source_institution=[str, gdal.GFU_Generic], 860 | survey_date_start=[str, gdal.GFU_Generic], 861 | survey_date_end=[str, gdal.GFU_Generic], 862 | ) 863 | if data_source.lower() == "hsd": 864 | expected_fields["catzoc"] = [int, gdal.GFU_Generic] 865 | expected_fields["supercession_score"] = [float, gdal.GFU_Generic] 866 | expected_fields["decay_score"] = [float, gdal.GFU_Generic] 867 | expected_fields["unqualified"] = [int, gdal.GFU_Generic] 868 | expected_fields["sensitive"] = [int, gdal.GFU_Generic] 869 | # refactor later 870 | if data_source.lower() in ['s102v22']: 871 | expected_fields = dict( 872 | value=[int, gdal.GFU_MinMax], 873 | data_assessment=[int, gdal.GFU_Generic], 874 | feature_least_depth=[float, gdal.GFU_Generic], 875 | significant_features=[float, gdal.GFU_Generic], 876 | feature_size=[str, gdal.GFU_Generic], 877 | # ? 878 | feature_size_var=[int, gdal.GFU_Generic], 879 | coverage=[int, gdal.GFU_Generic], 880 | bathy_coverage=[int, gdal.GFU_Generic], 881 | horizontal_uncert_fixed=[float, gdal.GFU_Generic], 882 | horizontal_uncert_var=[float, gdal.GFU_Generic], 883 | survey_date_start=[str, gdal.GFU_Generic], 884 | survey_date_end=[str, gdal.GFU_Generic], 885 | source_survey_id=[str, gdal.GFU_Generic], 886 | source_institution=[str, gdal.GFU_Generic], 887 | # ? 888 | bathymetric_uncertainty_type=[int, gdal.GFU_Generic], 889 | ) 890 | cursor = conn.cursor() 891 | cursor.execute("SELECT * FROM tiles WHERE utm = ?", (utm,)) 892 | exp_fields = list(expected_fields.keys()) 893 | tiles = [dict(row) for row in cursor.fetchall()] 894 | surveys = [] 895 | for tile in tiles: 896 | if tile['file_disk'] is None or os.path.isfile(os.path.join(project_dir, tile["file_disk"])) is False: 897 | continue 898 | gtiff = os.path.join(project_dir, tile["file_disk"]).replace('\\', '/') 899 | if os.path.isfile(gtiff) is False: 900 | continue 901 | # rat_file = os.path.join(project_dir, tile["rat_disk"]) 902 | # if os.path.isfile(rat_file) is False and data_source.lower() != 's102v22': 903 | # continue 904 | if data_source.lower() != 's102v22': 905 | ds = gdal.Open(gtiff) 906 | contrib = ds.GetRasterBand(3) 907 | rat_n = contrib.GetDefaultRAT() 908 | for col in range(rat_n.GetColumnCount()): 909 | if exp_fields[col] != rat_n.GetNameOfCol(col).lower(): 910 | raise ValueError("Unexpected field order") 911 | else: 912 | ds = gdal.Open(f'S102:"{gtiff}":QualityOfSurvey') 913 | contrib = ds.GetRasterBand(1) 914 | rat_n = contrib.GetDefaultRAT() 915 | for row in range(rat_n.GetRowCount()): 916 | exist = False 917 | for survey in surveys: 918 | if survey[0] == rat_n.GetValueAsString(row, 0): 919 | survey[1] = int(survey[1]) + rat_n.GetValueAsInt(row, 1) 920 | # this is the count field 921 | # GFU_PixelCount usage has support as int dtype in some 922 | # software so avoiding changing it to python float (double) 923 | # this is a temp solution to avoid overflow error which can 924 | # occur with generalization in vrts of extreme coverage 925 | if survey[1] > 2147483647: 926 | survey[1] = 2147483647 927 | exist = True 928 | break 929 | if exist: 930 | continue 931 | curr = [] 932 | for col in range(rat_n.GetColumnCount()): 933 | entry_val = rat_n.GetValueAsString(row, col) 934 | # test removal 935 | if rat_n.GetNameOfCol(col).lower() in ['feature_size_var', 'bathymetric_uncertainty_type']: 936 | entry_val = 0 937 | curr.append(entry_val) 938 | surveys.append(curr) 939 | rat = gdal.RasterAttributeTable() 940 | for entry in expected_fields: 941 | field_type, usage = expected_fields[entry] 942 | if field_type == str: 943 | col_type = gdal.GFT_String 944 | elif field_type == int: 945 | col_type = gdal.GFT_Integer 946 | elif field_type == float: 947 | col_type = gdal.GFT_Real 948 | else: 949 | raise TypeError("Unknown data type submitted for gdal raster attribute table.") 950 | rat.CreateColumn(entry, col_type, usage) 951 | rat.SetRowCount(len(surveys)) 952 | for row_idx, survey in enumerate(surveys): 953 | for col_idx, entry in enumerate(expected_fields): 954 | field_type, usage = expected_fields[entry] 955 | if field_type == str: 956 | rat.SetValueAsString(row_idx, col_idx, survey[col_idx]) 957 | elif field_type == int: 958 | rat.SetValueAsInt(row_idx, col_idx, int(survey[col_idx])) 959 | elif field_type == float: 960 | rat.SetValueAsDouble(row_idx, col_idx, float(survey[col_idx])) 961 | vrt_ds = gdal.Open(vrt_path, 1) 962 | contributor_band = vrt_ds.GetRasterBand(3) 963 | contributor_band.SetDefaultRAT(rat) 964 | 965 | 966 | def add_vrt_rat(conn: sqlite3.Connection, utm: str, project_dir: str, vrt_path: str, data_source: str) -> None: 967 | """ 968 | Create a raster attribute table for the VRT. 969 | 970 | Parameters 971 | ---------- 972 | conn : sqlite3.Connection 973 | database connection object. 974 | utm : str 975 | utm zone of the VRT. 976 | project_dir : str 977 | destination directory for project. 978 | vrt_path : str 979 | path to the VRT to which to add the raster attribute table. 980 | data_source : str 981 | The NBS offers various products to different end-users. Some are available publicly. 982 | Use this argument to identify which product you want. BlueTopo is the default. 983 | """ 984 | expected_fields = dict( 985 | value=[int, gdal.GFU_MinMax], 986 | count=[int, gdal.GFU_PixelCount], 987 | data_assessment=[int, gdal.GFU_Generic], 988 | feature_least_depth=[float, gdal.GFU_Generic], 989 | significant_features=[float, gdal.GFU_Generic], 990 | feature_size=[float, gdal.GFU_Generic], 991 | coverage=[int, gdal.GFU_Generic], 992 | bathy_coverage=[int, gdal.GFU_Generic], 993 | horizontal_uncert_fixed=[float, gdal.GFU_Generic], 994 | horizontal_uncert_var=[float, gdal.GFU_Generic], 995 | vertical_uncert_fixed=[float, gdal.GFU_Generic], 996 | vertical_uncert_var=[float, gdal.GFU_Generic], 997 | license_name=[str, gdal.GFU_Generic], 998 | license_url=[str, gdal.GFU_Generic], 999 | source_survey_id=[str, gdal.GFU_Generic], 1000 | source_institution=[str, gdal.GFU_Generic], 1001 | survey_date_start=[str, gdal.GFU_Generic], 1002 | survey_date_end=[str, gdal.GFU_Generic], 1003 | ) 1004 | if data_source.lower() == "hsd": 1005 | expected_fields['catzoc'] = [int, gdal.GFU_Generic] 1006 | expected_fields['supercession_score'] = [float, gdal.GFU_Generic] 1007 | expected_fields['decay_score'] = [float, gdal.GFU_Generic] 1008 | expected_fields['unqualified'] = [int, gdal.GFU_Generic] 1009 | expected_fields['sensitive'] = [int, gdal.GFU_Generic] 1010 | cursor = conn.cursor() 1011 | cursor.execute("SELECT * FROM tiles WHERE utm = ?", (utm,)) 1012 | exp_fields = list(expected_fields.keys()) 1013 | tiles = [dict(row) for row in cursor.fetchall()] 1014 | surveys = [] 1015 | for tile in tiles: 1016 | gtiff = os.path.join(project_dir, tile["geotiff_disk"]) 1017 | if os.path.isfile(gtiff) is False: 1018 | continue 1019 | rat_file = os.path.join(project_dir, tile["rat_disk"]) 1020 | if os.path.isfile(rat_file) is False: 1021 | continue 1022 | ds = gdal.Open(gtiff) 1023 | contrib = ds.GetRasterBand(3) 1024 | rat_n = contrib.GetDefaultRAT() 1025 | for col in range(rat_n.GetColumnCount()): 1026 | if exp_fields[col] != rat_n.GetNameOfCol(col).lower(): 1027 | raise ValueError("Unexpected field order") 1028 | for row in range(rat_n.GetRowCount()): 1029 | exist = False 1030 | for survey in surveys: 1031 | if survey[0] == rat_n.GetValueAsString(row, 0): 1032 | survey[1] = int(survey[1]) + rat_n.GetValueAsInt(row, 1) 1033 | # this is the count field 1034 | # GFU_PixelCount usage has support as int dtype in some 1035 | # software so avoiding changing it to python float (double) 1036 | # this is a temp solution to avoid overflow error which can 1037 | # occur with generalization in vrts of extreme coverage 1038 | if survey[1] > 2147483647: 1039 | survey[1] = 2147483647 1040 | exist = True 1041 | break 1042 | if exist: 1043 | continue 1044 | curr = [] 1045 | for col in range(rat_n.GetColumnCount()): 1046 | curr.append(rat_n.GetValueAsString(row, col)) 1047 | surveys.append(curr) 1048 | rat = gdal.RasterAttributeTable() 1049 | for entry in expected_fields: 1050 | field_type, usage = expected_fields[entry] 1051 | if field_type == str: 1052 | col_type = gdal.GFT_String 1053 | elif field_type == int: 1054 | col_type = gdal.GFT_Integer 1055 | elif field_type == float: 1056 | col_type = gdal.GFT_Real 1057 | else: 1058 | raise TypeError("Unknown data type submitted for gdal raster attribute table.") 1059 | rat.CreateColumn(entry, col_type, usage) 1060 | rat.SetRowCount(len(surveys)) 1061 | for row_idx, survey in enumerate(surveys): 1062 | for col_idx, entry in enumerate(expected_fields): 1063 | field_type, usage = expected_fields[entry] 1064 | if field_type == str: 1065 | rat.SetValueAsString(row_idx, col_idx, survey[col_idx]) 1066 | elif field_type == int: 1067 | rat.SetValueAsInt(row_idx, col_idx, int(survey[col_idx])) 1068 | elif field_type == float: 1069 | rat.SetValueAsDouble(row_idx, col_idx, float(survey[col_idx])) 1070 | vrt_ds = gdal.Open(vrt_path, 1) 1071 | contributor_band = vrt_ds.GetRasterBand(3) 1072 | contributor_band.SetDefaultRAT(rat) 1073 | 1074 | 1075 | def select_tiles_by_subregion_pmn(project_dir: str, conn: sqlite3.Connection, subregion: str) -> list: 1076 | """ 1077 | Retrieve all tile records with files in the given subregion. 1078 | 1079 | Parameters 1080 | ---------- 1081 | project_dir 1082 | destination directory for project. 1083 | conn : sqlite3.Connection 1084 | database connection object. 1085 | subregion : str 1086 | subregion name. 1087 | 1088 | Returns 1089 | ------- 1090 | existing_tiles : list 1091 | list of tile records. 1092 | """ 1093 | cursor = conn.cursor() 1094 | cursor.execute("SELECT * FROM tiles WHERE subregion = ?", (subregion,)) 1095 | tiles = [dict(row) for row in cursor.fetchall()] 1096 | existing_tiles = [tile for tile in tiles if tile["file_disk"] and os.path.isfile(os.path.join(project_dir, tile["file_disk"]))] 1097 | if len(tiles) - len(existing_tiles) != 0: 1098 | print(f"Did not find the files for {len(tiles) - len(existing_tiles)} " f"registered tile(s) in subregion {subregion}. " "Run fetch_tiles to retrieve files " "or correct the directory path if incorrect.") 1099 | return existing_tiles 1100 | 1101 | 1102 | def select_tiles_by_subregion(project_dir: str, conn: sqlite3.Connection, subregion: str) -> list: 1103 | """ 1104 | Retrieve all tile records with files in the given subregion. 1105 | 1106 | Parameters 1107 | ---------- 1108 | project_dir 1109 | destination directory for project. 1110 | conn : sqlite3.Connection 1111 | database connection object. 1112 | subregion : str 1113 | subregion name. 1114 | 1115 | Returns 1116 | ------- 1117 | existing_tiles : list 1118 | list of tile records. 1119 | """ 1120 | cursor = conn.cursor() 1121 | cursor.execute("SELECT * FROM tiles WHERE subregion = ?", (subregion,)) 1122 | tiles = [dict(row) for row in cursor.fetchall()] 1123 | existing_tiles = [tile for tile in tiles if tile["geotiff_disk"] and tile["rat_disk"] and os.path.isfile(os.path.join(project_dir, tile["geotiff_disk"])) and os.path.isfile(os.path.join(project_dir, tile["rat_disk"]))] 1124 | if len(tiles) - len(existing_tiles) != 0: 1125 | print(f"Did not find the files for {len(tiles) - len(existing_tiles)} " f"registered tile(s) in subregion {subregion}. " "Run fetch_tiles to retrieve files " "or correct the directory path if incorrect.") 1126 | return existing_tiles 1127 | 1128 | 1129 | def select_subregions_by_utm_pmn(project_dir: str, conn: sqlite3.Connection, utm: str) -> list: 1130 | """ 1131 | Retrieve all subregion records with files in the given UTM. 1132 | 1133 | Parameters 1134 | ---------- 1135 | project_dir 1136 | destination directory for project. 1137 | conn : sqlite3.Connection 1138 | database connection object. 1139 | utm : str 1140 | UTM zone. 1141 | 1142 | Returns 1143 | ------- 1144 | subregions : list 1145 | list of subregion records in UTM zone. 1146 | """ 1147 | cursor = conn.cursor() 1148 | cursor.execute( 1149 | """ 1150 | SELECT * FROM vrt_subregion 1151 | WHERE utm = ? AND built_subdataset1 = 1 AND built_subdataset2 = 1 1152 | """, 1153 | (utm,), 1154 | ) 1155 | subregions = [dict(row) for row in cursor.fetchall()] 1156 | for s in subregions: 1157 | if ( 1158 | (s["res_2_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset1_vrt"]))) 1159 | or (s["res_2_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset1_ovr"]))) 1160 | 1161 | or (s["res_2_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset2_vrt"]))) 1162 | or (s["res_2_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset2_ovr"]))) 1163 | 1164 | or (s["res_4_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset1_vrt"]))) 1165 | or (s["res_4_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset1_ovr"]))) 1166 | 1167 | or (s["res_4_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset2_vrt"]))) 1168 | or (s["res_4_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset2_ovr"]))) 1169 | 1170 | or (s["res_8_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset1_vrt"]))) 1171 | or (s["res_8_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset1_ovr"]))) 1172 | 1173 | or (s["res_8_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset2_vrt"]))) 1174 | or (s["res_8_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset2_ovr"]))) 1175 | 1176 | or (s["complete_subdataset1_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset1_vrt"]))) 1177 | or (s["complete_subdataset1_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset1_ovr"]))) 1178 | 1179 | or (s["complete_subdataset2_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset2_vrt"]))) 1180 | or (s["complete_subdataset2_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset2_ovr"]))) 1181 | 1182 | ): 1183 | raise RuntimeError(f"Subregion VRT files missing for {s['utm']}. Please rerun.") 1184 | return subregions 1185 | 1186 | 1187 | def select_subregions_by_utm(project_dir: str, conn: sqlite3.Connection, utm: str) -> list: 1188 | """ 1189 | Retrieve all subregion records with files in the given UTM. 1190 | 1191 | Parameters 1192 | ---------- 1193 | project_dir 1194 | destination directory for project. 1195 | conn : sqlite3.Connection 1196 | database connection object. 1197 | utm : str 1198 | UTM zone. 1199 | 1200 | Returns 1201 | ------- 1202 | subregions : list 1203 | list of subregion records in UTM zone. 1204 | """ 1205 | cursor = conn.cursor() 1206 | cursor.execute( 1207 | """ 1208 | SELECT * FROM vrt_subregion 1209 | WHERE utm = ? AND built = 1 1210 | """, 1211 | (utm,), 1212 | ) 1213 | subregions = [dict(row) for row in cursor.fetchall()] 1214 | for s in subregions: 1215 | if ( 1216 | (s["res_2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_vrt"]))) 1217 | or (s["res_2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_ovr"]))) 1218 | or (s["res_4_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_vrt"]))) 1219 | or (s["res_4_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_ovr"]))) 1220 | or (s["res_8_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_vrt"]))) 1221 | or (s["res_8_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_ovr"]))) 1222 | or (s["complete_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_vrt"]))) 1223 | or (s["complete_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_ovr"]))) 1224 | ): 1225 | raise RuntimeError(f"Subregion VRT files missing for {s['utm']}. Please rerun.") 1226 | return subregions 1227 | 1228 | def select_unbuilt_subregions_pmn(conn: sqlite3.Connection) -> list: 1229 | """ 1230 | Retrieve all unbuilt subregion records. 1231 | 1232 | Parameters 1233 | ---------- 1234 | conn : sqlite3.Connection 1235 | database connection object. 1236 | 1237 | Returns 1238 | ------- 1239 | subregions : list 1240 | list of unbuilt subregion records. 1241 | """ 1242 | cursor = conn.cursor() 1243 | cursor.execute("SELECT * FROM vrt_subregion WHERE built_subdataset1 = 0 or built_subdataset2 = 0") 1244 | subregions = [dict(row) for row in cursor.fetchall()] 1245 | return subregions 1246 | 1247 | 1248 | def select_unbuilt_subregions(conn: sqlite3.Connection) -> list: 1249 | """ 1250 | Retrieve all unbuilt subregion records. 1251 | 1252 | Parameters 1253 | ---------- 1254 | conn : sqlite3.Connection 1255 | database connection object. 1256 | 1257 | Returns 1258 | ------- 1259 | subregions : list 1260 | list of unbuilt subregion records. 1261 | """ 1262 | cursor = conn.cursor() 1263 | cursor.execute("SELECT * FROM vrt_subregion WHERE built = 0") 1264 | subregions = [dict(row) for row in cursor.fetchall()] 1265 | return subregions 1266 | 1267 | 1268 | def select_unbuilt_utms_pmn(conn: sqlite3.Connection) -> list: 1269 | """ 1270 | Retrieve all unbuilt utm records. 1271 | 1272 | Parameters 1273 | ---------- 1274 | conn : sqlite3.Connection 1275 | database connection object. 1276 | 1277 | Returns 1278 | ------- 1279 | utms : list 1280 | list of unbuilt utm records. 1281 | """ 1282 | cursor = conn.cursor() 1283 | cursor.execute("SELECT * FROM vrt_utm WHERE built_subdataset1 = 0 or built_subdataset2 = 0") 1284 | utms = [dict(row) for row in cursor.fetchall()] 1285 | return utms 1286 | 1287 | 1288 | 1289 | def select_unbuilt_utms(conn: sqlite3.Connection) -> list: 1290 | """ 1291 | Retrieve all unbuilt utm records. 1292 | 1293 | Parameters 1294 | ---------- 1295 | conn : sqlite3.Connection 1296 | database connection object. 1297 | 1298 | Returns 1299 | ------- 1300 | utms : list 1301 | list of unbuilt utm records. 1302 | """ 1303 | cursor = conn.cursor() 1304 | cursor.execute("SELECT * FROM vrt_utm WHERE built = 0") 1305 | utms = [dict(row) for row in cursor.fetchall()] 1306 | return utms 1307 | 1308 | 1309 | def update_subregion_pmn(conn: sqlite3.Connection, fields: dict) -> None: 1310 | """ 1311 | Update subregion records with given path values. 1312 | 1313 | Parameters 1314 | ---------- 1315 | conn : sqlite3.Connection 1316 | database connection object. 1317 | fields : dict 1318 | dictionary with the name of the subregion and paths for its associated 1319 | VRT and OVR files. 1320 | """ 1321 | cursor = conn.cursor() 1322 | cursor.execute( 1323 | """UPDATE vrt_subregion 1324 | SET res_2_subdataset1_vrt = ?, res_2_subdataset1_ovr = ?, 1325 | res_2_subdataset2_vrt = ?, res_2_subdataset2_ovr = ?, 1326 | res_4_subdataset1_vrt = ?, res_4_subdataset1_ovr = ?, 1327 | res_4_subdataset2_vrt = ?, res_4_subdataset2_ovr = ?, 1328 | res_8_subdataset1_vrt = ?, res_8_subdataset1_ovr = ?, 1329 | res_8_subdataset2_vrt = ?, res_8_subdataset2_ovr = ?, 1330 | complete_subdataset1_vrt = ?, complete_subdataset1_ovr = ?, 1331 | complete_subdataset2_vrt = ?, complete_subdataset2_ovr = ?, 1332 | built_subdataset1 = 1, 1333 | built_subdataset2 = 1 1334 | WHERE region = ?""", 1335 | ( 1336 | fields["res_2_subdataset1_vrt"], 1337 | fields["res_2_subdataset1_ovr"], 1338 | fields["res_2_subdataset2_vrt"], 1339 | fields["res_2_subdataset2_ovr"], 1340 | fields["res_4_subdataset1_vrt"], 1341 | fields["res_4_subdataset1_ovr"], 1342 | fields["res_4_subdataset2_vrt"], 1343 | fields["res_4_subdataset2_ovr"], 1344 | fields["res_8_subdataset1_vrt"], 1345 | fields["res_8_subdataset1_ovr"], 1346 | fields["res_8_subdataset2_vrt"], 1347 | fields["res_8_subdataset2_ovr"], 1348 | 1349 | fields["complete_subdataset1_vrt"], 1350 | fields["complete_subdataset1_ovr"], 1351 | 1352 | fields["complete_subdataset2_vrt"], 1353 | fields["complete_subdataset2_ovr"], 1354 | 1355 | fields["region"], 1356 | ), 1357 | ) 1358 | conn.commit() 1359 | 1360 | 1361 | def update_subregion(conn: sqlite3.Connection, fields: dict) -> None: 1362 | """ 1363 | Update subregion records with given path values. 1364 | 1365 | Parameters 1366 | ---------- 1367 | conn : sqlite3.Connection 1368 | database connection object. 1369 | fields : dict 1370 | dictionary with the name of the subregion and paths for its associated 1371 | VRT and OVR files. 1372 | """ 1373 | cursor = conn.cursor() 1374 | cursor.execute( 1375 | """UPDATE vrt_subregion 1376 | SET res_2_vrt = ?, res_2_ovr = ?, res_4_vrt = ?, 1377 | res_4_ovr = ?, res_8_vrt = ?, res_8_ovr = ?, 1378 | complete_vrt = ?, complete_ovr = ?, built = 1 1379 | WHERE region = ?""", 1380 | ( 1381 | fields["res_2_vrt"], 1382 | fields["res_2_ovr"], 1383 | fields["res_4_vrt"], 1384 | fields["res_4_ovr"], 1385 | fields["res_8_vrt"], 1386 | fields["res_8_ovr"], 1387 | fields["complete_vrt"], 1388 | fields["complete_ovr"], 1389 | fields["region"], 1390 | ), 1391 | ) 1392 | conn.commit() 1393 | 1394 | 1395 | def update_utm_pmn(conn: sqlite3.Connection, fields: dict) -> None: 1396 | """ 1397 | Update utm records with given path values. 1398 | 1399 | Parameters 1400 | ---------- 1401 | conn : sqlite3.Connection 1402 | database connection object. 1403 | fields : dict 1404 | dictionary with the name of the UTM zone and paths for its associated 1405 | VRT and OVR files. 1406 | """ 1407 | cursor = conn.cursor() 1408 | cursor.execute( 1409 | """UPDATE vrt_utm 1410 | SET 1411 | utm_subdataset1_vrt = ?, utm_subdataset1_ovr = ?, 1412 | utm_subdataset2_vrt = ?, utm_subdataset2_ovr = ?, 1413 | utm_combined_vrt = ?, 1414 | built_subdataset1 = 1, 1415 | built_subdataset2 = 1, 1416 | built_combined = 1 1417 | WHERE utm = ?""", 1418 | ( 1419 | fields["utm_subdataset1_vrt"], 1420 | fields["utm_subdataset1_ovr"], 1421 | fields["utm_subdataset2_vrt"], 1422 | fields["utm_subdataset2_ovr"], 1423 | fields["utm_combined_vrt"], 1424 | fields["utm"], 1425 | ), 1426 | ) 1427 | conn.commit() 1428 | 1429 | 1430 | def update_utm(conn: sqlite3.Connection, fields: dict) -> None: 1431 | """ 1432 | Update utm records with given path values. 1433 | 1434 | Parameters 1435 | ---------- 1436 | conn : sqlite3.Connection 1437 | database connection object. 1438 | fields : dict 1439 | dictionary with the name of the UTM zone and paths for its associated 1440 | VRT and OVR files. 1441 | """ 1442 | cursor = conn.cursor() 1443 | cursor.execute( 1444 | """UPDATE vrt_utm 1445 | SET utm_vrt = ?, utm_ovr = ?, built = 1 1446 | WHERE utm = ?""", 1447 | ( 1448 | fields["utm_vrt"], 1449 | fields["utm_ovr"], 1450 | fields["utm"], 1451 | ), 1452 | ) 1453 | conn.commit() 1454 | 1455 | 1456 | def missing_subregions_pmn(project_dir: str, conn: sqlite3.Connection) -> int: 1457 | """ 1458 | Confirm built subregions's associated VRT and OVR files exists. 1459 | If the files do not exist, then change the subregion record to unbuilt. 1460 | 1461 | Parameters 1462 | ---------- 1463 | project_dir 1464 | destination directory for project. 1465 | conn : sqlite3.Connection 1466 | database connection object. 1467 | 1468 | Returns 1469 | ------- 1470 | missing_subregion_count : int 1471 | count of subregions with missing files. 1472 | """ 1473 | cursor = conn.cursor() 1474 | cursor.execute("SELECT * FROM vrt_subregion WHERE built_subdataset1 = 1 or built_subdataset2 = 1") 1475 | subregions = [dict(row) for row in cursor.fetchall()] 1476 | missing_subregion_count = 0 1477 | # todo comparison against tiles table to know res vrts exist where expected 1478 | for s in subregions: 1479 | if ( 1480 | (s["res_2_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset1_vrt"]))) 1481 | or (s["res_2_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset1_ovr"]))) 1482 | or (s["res_2_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset2_vrt"]))) 1483 | or (s["res_2_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset2_ovr"]))) 1484 | or (s["res_4_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset1_vrt"]))) 1485 | or (s["res_4_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset1_ovr"]))) 1486 | or (s["res_4_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset2_vrt"]))) 1487 | or (s["res_4_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset2_ovr"]))) 1488 | or (s["res_8_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset1_vrt"]))) 1489 | or (s["res_8_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset1_ovr"]))) 1490 | or (s["res_8_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset2_vrt"]))) 1491 | or (s["res_8_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset2_ovr"]))) 1492 | or (s["complete_subdataset1_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset1_vrt"]))) 1493 | or (s["complete_subdataset1_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset1_ovr"]))) 1494 | or (s["complete_subdataset2_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset2_vrt"]))) 1495 | or (s["complete_subdataset2_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset2_ovr"]))) 1496 | ): 1497 | missing_subregion_count += 1 1498 | cursor.execute( 1499 | """UPDATE vrt_subregion 1500 | SET res_2_subdataset1_vrt = ?, res_2_subdataset1_ovr = ?, 1501 | res_2_subdataset2_vrt = ?, res_2_subdataset2_ovr = ?, 1502 | 1503 | res_4_subdataset1_vrt = ?, res_4_subdataset1_ovr = ?, 1504 | res_4_subdataset2_vrt = ?, res_4_subdataset2_ovr = ?, 1505 | 1506 | res_8_subdataset1_vrt = ?, res_8_subdataset1_ovr = ?, 1507 | res_8_subdataset2_vrt = ?, res_8_subdataset2_ovr = ?, 1508 | 1509 | complete_subdataset1_vrt = ?, complete_subdataset1_ovr = ?, 1510 | complete_subdataset2_vrt = ?, complete_subdataset2_ovr = ?, 1511 | 1512 | built_subdataset1 = 0, 1513 | built_subdataset2 = 0 1514 | 1515 | WHERE region = ?""", 1516 | ( 1517 | None, 1518 | None, 1519 | None, 1520 | None, 1521 | None, 1522 | None, 1523 | None, 1524 | None, 1525 | None, 1526 | None, 1527 | None, 1528 | None, 1529 | None, 1530 | None, 1531 | None, 1532 | None, 1533 | s["region"], 1534 | ), 1535 | ) 1536 | cursor.execute( 1537 | """UPDATE vrt_utm 1538 | SET utm_subdataset1_vrt = ?, utm_subdataset1_ovr = ?, 1539 | utm_subdataset2_vrt = ?, utm_subdataset2_ovr = ?, 1540 | 1541 | utm_combined_vrt = ?, 1542 | 1543 | built_subdataset1 = 0, 1544 | built_subdataset2 = 0, 1545 | built_combined = 0 1546 | 1547 | WHERE utm = ?""", 1548 | ( 1549 | None, 1550 | None, 1551 | None, 1552 | None, 1553 | None, 1554 | s["utm"], 1555 | ), 1556 | ) 1557 | conn.commit() 1558 | return missing_subregion_count 1559 | 1560 | 1561 | 1562 | def missing_subregions(project_dir: str, conn: sqlite3.Connection) -> int: 1563 | """ 1564 | Confirm built subregions's associated VRT and OVR files exists. 1565 | If the files do not exist, then change the subregion record to unbuilt. 1566 | 1567 | Parameters 1568 | ---------- 1569 | project_dir 1570 | destination directory for project. 1571 | conn : sqlite3.Connection 1572 | database connection object. 1573 | 1574 | Returns 1575 | ------- 1576 | missing_subregion_count : int 1577 | count of subregions with missing files. 1578 | """ 1579 | cursor = conn.cursor() 1580 | cursor.execute("SELECT * FROM vrt_subregion WHERE built = 1") 1581 | subregions = [dict(row) for row in cursor.fetchall()] 1582 | missing_subregion_count = 0 1583 | # todo comparison against tiles table to know res vrts exist where expected 1584 | for s in subregions: 1585 | if ( 1586 | (s["res_2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_vrt"]))) 1587 | or (s["res_2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_ovr"]))) 1588 | or (s["res_4_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_vrt"]))) 1589 | or (s["res_4_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_ovr"]))) 1590 | or (s["res_8_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_vrt"]))) 1591 | or (s["res_8_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_ovr"]))) 1592 | or (s["complete_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_vrt"]))) 1593 | or (s["complete_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_ovr"]))) 1594 | ): 1595 | missing_subregion_count += 1 1596 | cursor.execute( 1597 | """UPDATE vrt_subregion 1598 | SET res_2_vrt = ?, res_2_ovr = ?, res_4_vrt = ?, 1599 | res_4_ovr = ?, res_8_vrt = ?, res_8_ovr = ?, 1600 | complete_vrt = ?, complete_ovr = ?, built = 0 1601 | WHERE region = ?""", 1602 | ( 1603 | None, 1604 | None, 1605 | None, 1606 | None, 1607 | None, 1608 | None, 1609 | None, 1610 | None, 1611 | s["region"], 1612 | ), 1613 | ) 1614 | cursor.execute( 1615 | """UPDATE vrt_utm 1616 | SET utm_vrt = ?, utm_ovr = ?, built = 0 1617 | WHERE utm = ?""", 1618 | ( 1619 | None, 1620 | None, 1621 | s["utm"], 1622 | ), 1623 | ) 1624 | conn.commit() 1625 | return missing_subregion_count 1626 | 1627 | 1628 | def missing_utms_pmn(project_dir: str, conn: sqlite3.Connection) -> int: 1629 | """ 1630 | Confirm built utm's associated VRT and OVR files exists. 1631 | If the files do not exist, then change the utm record to unbuilt. 1632 | 1633 | Parameters 1634 | ---------- 1635 | project_dir 1636 | destination directory for project. 1637 | conn : sqlite3.Connection 1638 | database connection object. 1639 | 1640 | Returns 1641 | ------- 1642 | missing_utm_count : int 1643 | count of UTM zones with missing files. 1644 | """ 1645 | cursor = conn.cursor() 1646 | cursor.execute("SELECT * FROM vrt_utm WHERE built_subdataset1 = 1 or built_subdataset2 = 1") 1647 | utms = [dict(row) for row in cursor.fetchall()] 1648 | missing_utm_count = 0 1649 | for utm in utms: 1650 | if (utm["utm_subdataset1_vrt"] is None or utm["utm_subdataset1_ovr"] is None 1651 | or utm["utm_subdataset2_vrt"] is None or utm["utm_subdataset2_ovr"] is None 1652 | or utm["utm_combined_vrt"] is None 1653 | or os.path.isfile(os.path.join(project_dir, utm["utm_subdataset1_vrt"])) == False 1654 | or os.path.isfile(os.path.join(project_dir, utm["utm_subdataset1_ovr"])) == False 1655 | or os.path.isfile(os.path.join(project_dir, utm["utm_subdataset2_vrt"])) == False 1656 | or os.path.isfile(os.path.join(project_dir, utm["utm_subdataset2_ovr"])) == False 1657 | or os.path.isfile(os.path.join(project_dir, utm["utm_combined_vrt"])) == False): 1658 | missing_utm_count += 1 1659 | cursor.execute( 1660 | """UPDATE vrt_utm 1661 | SET 1662 | utm_subdataset1_vrt = ?, utm_subdataset1_ovr = ?, 1663 | utm_subdataset2_vrt = ?, utm_subdataset2_ovr = ?, 1664 | utm_combined_vrt = ?, 1665 | built_subdataset1 = 0, 1666 | built_subdataset2 = 0, 1667 | built_combined = 0 1668 | WHERE utm = ?""", 1669 | ( 1670 | None, 1671 | None, 1672 | None, 1673 | None, 1674 | None, 1675 | utm["utm"], 1676 | ), 1677 | ) 1678 | conn.commit() 1679 | return missing_utm_count 1680 | 1681 | 1682 | def missing_utms(project_dir: str, conn: sqlite3.Connection) -> int: 1683 | """ 1684 | Confirm built utm's associated VRT and OVR files exists. 1685 | If the files do not exist, then change the utm record to unbuilt. 1686 | 1687 | Parameters 1688 | ---------- 1689 | project_dir 1690 | destination directory for project. 1691 | conn : sqlite3.Connection 1692 | database connection object. 1693 | 1694 | Returns 1695 | ------- 1696 | missing_utm_count : int 1697 | count of UTM zones with missing files. 1698 | """ 1699 | cursor = conn.cursor() 1700 | cursor.execute("SELECT * FROM vrt_utm WHERE built = 1") 1701 | utms = [dict(row) for row in cursor.fetchall()] 1702 | missing_utm_count = 0 1703 | for utm in utms: 1704 | if utm["utm_vrt"] is None or utm["utm_ovr"] is None or os.path.isfile(os.path.join(project_dir, utm["utm_vrt"])) == False or os.path.isfile(os.path.join(project_dir, utm["utm_ovr"])) == False: 1705 | missing_utm_count += 1 1706 | cursor.execute( 1707 | """UPDATE vrt_utm 1708 | SET utm_vrt = ?, utm_ovr = ?, built = 0 1709 | WHERE utm = ?""", 1710 | ( 1711 | None, 1712 | None, 1713 | utm["utm"], 1714 | ), 1715 | ) 1716 | conn.commit() 1717 | return missing_utm_count 1718 | 1719 | 1720 | def main(project_dir: str, data_source: str = None, relative_to_vrt: bool = True) -> None: 1721 | """ 1722 | Build a gdal VRT for all available tiles. 1723 | This VRT is a collection of smaller areas described as VRTs. 1724 | Nominally 2 meter, 4 meter, and 8 meter data are collected with overviews. 1725 | These data are then added to 16 meter data for the subregion. 1726 | The subregions are then collected into a UTM zone VRT where higher level 1727 | overviews are made. 1728 | 1729 | Parameters 1730 | ---------- 1731 | project_dir 1732 | The directory path to use. Will create if it does not currently exist. 1733 | Required argument. 1734 | data_source : str 1735 | The NBS offers various products to different end-users. Some are available publicly. 1736 | Use this argument to identify which product you want. BlueTopo is the default. 1737 | relative_to_vrt : bool 1738 | Use this argument to set paths of referenced files inside the VRT as relative or absolute paths. 1739 | 1740 | """ 1741 | project_dir = os.path.expanduser(project_dir) 1742 | if os.path.isabs(project_dir) is False: 1743 | print("Please use an absolute path for your project folder.") 1744 | if "windows" not in platform.system().lower(): 1745 | print("Typically for non windows systems this means starting with '/'") 1746 | sys.exit(1) 1747 | 1748 | if int(gdal.VersionInfo()) < 3040000: 1749 | raise RuntimeError("Please update GDAL to >=3.4 to run build_vrt. \n" "Some users have encountered issues with " "conda's installation of GDAL 3.4. " "Try more recent versions of GDAL if you also " "encounter issues in your conda environment.") 1750 | 1751 | if data_source is None or data_source.lower() == "bluetopo": 1752 | data_source = "BlueTopo" 1753 | 1754 | elif data_source.lower() == "modeling": 1755 | data_source = "Modeling" 1756 | 1757 | elif data_source.lower() == "bag": 1758 | data_source = "BAG" 1759 | 1760 | elif data_source.lower() == "s102v21": 1761 | if int(gdal.VersionInfo()) < 3090000: 1762 | raise RuntimeError("Please update GDAL to >=3.8 to run build_vrt for S102V22.") 1763 | data_source = "S102V21" 1764 | 1765 | elif data_source.lower() == "s102v22": 1766 | if int(gdal.VersionInfo()) < 3090000: 1767 | raise RuntimeError("Please update GDAL to >=3.8 to run build_vrt for S102V22.") 1768 | data_source = "S102V22" 1769 | 1770 | elif os.path.isdir(data_source): 1771 | files = os.listdir(data_source) 1772 | files = [file for file in files if file.endswith(".gpkg") and "Tile_Scheme" in file] 1773 | files.sort(reverse=True) 1774 | data_source = None 1775 | for file in files: 1776 | ds_basefile = os.path.basename(file) 1777 | data_source = ds_basefile.split("_")[0] 1778 | break 1779 | if data_source is None: 1780 | raise ValueError(f"Please pass in directory which contains a tile scheme file if you're using a local data source.") 1781 | 1782 | if not os.path.isdir(project_dir): 1783 | raise ValueError(f"Folder path not found: {project_dir}") 1784 | 1785 | if not os.path.isfile(os.path.join(project_dir, f"{data_source.lower()}_registry.db")): 1786 | raise ValueError(f"SQLite database not found. Confirm correct folder. " "Note: fetch_tiles must be run at least once prior " "to build_vrt") 1787 | 1788 | if not os.path.isdir(os.path.join(project_dir, data_source)): 1789 | raise ValueError(f"Tile downloads folder not found for {data_source}. Confirm correct folder. " "Note: fetch_tiles must be run at least once prior " "to build_vrt") 1790 | 1791 | start = datetime.datetime.now() 1792 | print(f"[{start.strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Beginning work in project folder: {project_dir}\n") 1793 | if data_source.lower() in ("bag", "s102v21"): 1794 | conn = connect_to_survey_registry_pmn1(project_dir, data_source) 1795 | elif data_source.lower() in ("s102v22"): 1796 | conn = connect_to_survey_registry_pmn2(project_dir, data_source) 1797 | else: 1798 | conn = connect_to_survey_registry(project_dir, data_source) 1799 | # subregions missing files 1800 | if data_source.lower() in ("s102v22"): 1801 | missing_subregion_count = missing_subregions_pmn(project_dir, conn) 1802 | else: 1803 | missing_subregion_count = missing_subregions(project_dir, conn) 1804 | 1805 | if missing_subregion_count: 1806 | print(f"{missing_subregion_count} subregion vrts files missing. " "Added to build list.") 1807 | 1808 | # build subregion vrts 1809 | if data_source.lower() not in ("s102v22"): 1810 | unbuilt_subregions = select_unbuilt_subregions(conn) 1811 | if len(unbuilt_subregions) > 0: 1812 | print(f"Building {len(unbuilt_subregions)} subregion vrt(s). This may " "take minutes or hours depending on the amount of tiles.") 1813 | for ub_sr in unbuilt_subregions: 1814 | if data_source.lower() in ("bag", "s102v21"): 1815 | sr_tiles = select_tiles_by_subregion_pmn(project_dir, conn, ub_sr["region"]) 1816 | else: 1817 | sr_tiles = select_tiles_by_subregion(project_dir, conn, ub_sr["region"]) 1818 | if len(sr_tiles) < 1: 1819 | continue 1820 | if data_source.lower() in ("bag", "s102v21"): 1821 | fields = build_sub_vrts_pmn1(ub_sr, sr_tiles, project_dir, data_source, relative_to_vrt) 1822 | else: 1823 | fields = build_sub_vrts(ub_sr, sr_tiles, project_dir, data_source, relative_to_vrt) 1824 | update_subregion(conn, fields) 1825 | else: 1826 | print("Subregion vrt(s) appear up to date with the most recently " "fetched tiles.") 1827 | else: 1828 | unbuilt_subregions = select_unbuilt_subregions_pmn(conn) 1829 | if len(unbuilt_subregions) > 0: 1830 | print(f"Building {len(unbuilt_subregions)} subregion vrt(s). This may " "take minutes or hours depending on the amount of tiles.") 1831 | for ub_sr in unbuilt_subregions: 1832 | sr_tiles = select_tiles_by_subregion_pmn(project_dir, conn, ub_sr["region"]) 1833 | if len(sr_tiles) < 1: 1834 | continue 1835 | fields = build_sub_vrts_pmn(ub_sr, sr_tiles, project_dir, data_source, relative_to_vrt) 1836 | update_subregion_pmn(conn, fields) 1837 | else: 1838 | print("Subregion vrt(s) appear up to date with the most recently " "fetched tiles.") 1839 | 1840 | # utms missing files 1841 | if data_source.lower() in ("s102v22"): 1842 | missing_utm_count = missing_utms_pmn(project_dir, conn) 1843 | if missing_utm_count: 1844 | print(f"{missing_utm_count} utm vrts files missing. Added to build list.") 1845 | else: 1846 | missing_utm_count = missing_utms(project_dir, conn) 1847 | if missing_utm_count: 1848 | print(f"{missing_utm_count} utm vrts files missing. Added to build list.") 1849 | 1850 | # build utm vrts 1851 | if data_source.lower() not in ("s102v22"): 1852 | unbuilt_utms = select_unbuilt_utms(conn) 1853 | if len(unbuilt_utms) > 0: 1854 | print(f"Building {len(unbuilt_utms)} utm vrt(s). This may take minutes " "or hours depending on the amount of tiles.") 1855 | for ub_utm in unbuilt_utms: 1856 | utm_start = datetime.datetime.now() 1857 | subregions = select_subregions_by_utm(project_dir, conn, ub_utm["utm"]) 1858 | vrt_list = [os.path.join(project_dir, subregion["complete_vrt"]) for subregion in subregions] 1859 | if len(vrt_list) < 1: 1860 | continue 1861 | rel_path = os.path.join(f"{data_source}_VRT", f"{data_source}_Fetched_UTM{ub_utm['utm']}.vrt") 1862 | utm_vrt = os.path.join(project_dir, rel_path) 1863 | print(f"Building utm{ub_utm['utm']}...") 1864 | if data_source.lower() in ("bag", "s102v21"): 1865 | create_vrt_pmn1(vrt_list, utm_vrt, [32, 64], relative_to_vrt) 1866 | else: 1867 | create_vrt(vrt_list, utm_vrt, [32, 64], relative_to_vrt) 1868 | add_vrt_rat(conn, ub_utm["utm"], project_dir, utm_vrt, data_source) 1869 | fields = {"utm_vrt": rel_path, "utm_ovr": None, "utm": ub_utm["utm"]} 1870 | if os.path.isfile(os.path.join(project_dir, rel_path + ".ovr")): 1871 | fields["utm_ovr"] = rel_path + ".ovr" 1872 | else: 1873 | raise RuntimeError("Overview failed to create for " f"utm{ub_utm['utm']}. Please try again. " "If error persists, please contact NBS.") 1874 | update_utm(conn, fields) 1875 | print(f"utm{ub_utm['utm']} complete after {datetime.datetime.now() - utm_start}") 1876 | else: 1877 | print("UTM vrt(s) appear up to date with the most recently " f"fetched tiles.\nNote: deleting the {data_source}_VRT folder will " "allow you to recreate from scratch if necessary") 1878 | else: 1879 | unbuilt_utms = select_unbuilt_utms_pmn(conn) 1880 | if len(unbuilt_utms) > 0: 1881 | print(f"Building {len(unbuilt_utms)} utm vrt(s). This may take minutes " "or hours depending on the amount of tiles.") 1882 | for ub_utm in unbuilt_utms: 1883 | utm_start = datetime.datetime.now() 1884 | subregions = select_subregions_by_utm_pmn(project_dir, conn, ub_utm["utm"]) 1885 | vrt_subdataset1_list = [os.path.join(project_dir, subregion["complete_subdataset1_vrt"]) for subregion in subregions] 1886 | vrt_subdataset2_list = [os.path.join(project_dir, subregion["complete_subdataset2_vrt"]) for subregion in subregions] 1887 | if len(vrt_subdataset1_list) < 1 or len(vrt_subdataset2_list) < 1: 1888 | continue 1889 | 1890 | rel_subdataset1_path = os.path.join(f"{data_source}_VRT", f"{data_source}_Fetched_UTM{ub_utm['utm']}_BathymetryCoverage.vrt") 1891 | utm_subdataset1_vrt = os.path.join(project_dir, rel_subdataset1_path) 1892 | 1893 | rel_subdataset2_path = os.path.join(f"{data_source}_VRT", f"{data_source}_Fetched_UTM{ub_utm['utm']}_QualityOfSurvey.vrt") 1894 | utm_subdataset2_vrt = os.path.join(project_dir, rel_subdataset2_path) 1895 | 1896 | rel_combined_path = os.path.join(f"{data_source}_VRT", f"{data_source}_Fetched_UTM{ub_utm['utm']}.vrt") 1897 | utm_combined_vrt = os.path.join(project_dir, rel_combined_path) 1898 | 1899 | print(f"Building utm{ub_utm['utm']}...") 1900 | create_vrt_pmn(vrt_subdataset1_list, utm_subdataset1_vrt, [32, 64], relative_to_vrt, subdataset = 1) 1901 | 1902 | create_vrt_pmn(vrt_subdataset2_list, utm_subdataset2_vrt, [32, 64], relative_to_vrt, subdataset = 2) 1903 | 1904 | fields = {"utm_subdataset1_vrt": rel_subdataset1_path, 1905 | "utm_subdataset2_vrt": rel_subdataset2_path, 1906 | "utm_subdataset1_ovr": None, 1907 | "utm_subdataset2_ovr": None, 1908 | "utm_combined_vrt": utm_combined_vrt, 1909 | "utm": ub_utm["utm"]} 1910 | 1911 | if os.path.isfile(os.path.join(project_dir, rel_subdataset1_path + ".ovr")): 1912 | fields["utm_subdataset1_ovr"] = rel_subdataset1_path + ".ovr" 1913 | else: 1914 | raise RuntimeError("Overview failed to create for " f"utm{ub_utm['utm']}. Please try again. " "If error persists, please contact NBS.") 1915 | 1916 | if os.path.isfile(os.path.join(project_dir, rel_subdataset2_path + ".ovr")): 1917 | fields["utm_subdataset2_ovr"] = rel_subdataset2_path + ".ovr" 1918 | else: 1919 | raise RuntimeError("Overview failed to create for " f"utm{ub_utm['utm']}. Please try again. " "If error persists, please contact NBS.") 1920 | 1921 | combine_vrts([utm_subdataset1_vrt, utm_subdataset2_vrt], utm_combined_vrt, relative_to_vrt) 1922 | 1923 | if data_source.lower() not in ("bag", "s102v21"): 1924 | if data_source.lower() in ('s102v22'): 1925 | add_vrt_rat_pmn(conn, ub_utm["utm"], project_dir, utm_combined_vrt, data_source) 1926 | else: 1927 | add_vrt_rat(conn, ub_utm["utm"], project_dir, utm_combined_vrt, data_source) 1928 | 1929 | update_utm_pmn(conn, fields) 1930 | print(f"utm{ub_utm['utm']} complete after {datetime.datetime.now() - utm_start}") 1931 | 1932 | else: 1933 | print("UTM vrt(s) appear up to date with the most recently " f"fetched tiles.\nNote: deleting the {data_source}_VRT folder will " "allow you to recreate from scratch if necessary") 1934 | 1935 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Operation complete after {datetime.datetime.now() - start}") --------------------------------------------------------------------------------