├── .gitignore
├── nbs
└── bluetopo
│ ├── __init__.py
│ ├── cli
│ └── cli.py
│ └── core
│ ├── fetch_tiles.py
│ └── build_vrt.py
├── .github
└── workflows
│ └── pypi.yaml
├── pyproject.toml
├── README.md
└── LICENSE
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | *.pyc
3 | *.egg-info
--------------------------------------------------------------------------------
/nbs/bluetopo/__init__.py:
--------------------------------------------------------------------------------
1 | from .core.build_vrt import main as build_vrt
2 | from .core.fetch_tiles import main as fetch_tiles
3 |
--------------------------------------------------------------------------------
/.github/workflows/pypi.yaml:
--------------------------------------------------------------------------------
1 | name: Upload Python Package
2 |
3 | on:
4 | release:
5 | types: [published]
6 |
7 | permissions:
8 | contents: read
9 |
10 | jobs:
11 | deploy:
12 | runs-on: ubuntu-latest
13 |
14 | steps:
15 | - uses: actions/checkout@v3
16 | - name: Set up Python
17 | uses: actions/setup-python@v3
18 | with:
19 | python-version: "3.x"
20 | - name: Install dependencies
21 | run: |
22 | python -m pip install --upgrade pip
23 | pip install build
24 | - name: Build package
25 | run: python -m build
26 | - name: Publish package
27 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
28 | with:
29 | user: __token__
30 | password: ${{ secrets.PYPI_API_TOKEN }}
31 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "BlueTopo"
7 | version = "0.7.0"
8 | authors = [
9 | { name = "Glen Rice", email = "ocs.nbs@noaa.gov" },
10 | { name = "Tashi Geleg", email = "ocs.nbs@noaa.gov" },
11 | ]
12 | description = "National Bathymetric Source Project BlueTopo"
13 | readme = "README.md"
14 | license = { file = "LICENSE" }
15 | keywords = ["BlueTopo", "National Bathymetric Source", "Bathymetry"]
16 | dependencies = ["numpy", "boto3", "tqdm"]
17 |
18 | [project.scripts]
19 | fetch_tiles = "nbs.bluetopo.cli.cli:fetch_tiles_command"
20 | build_vrt = "nbs.bluetopo.cli.cli:build_vrt_command"
21 |
22 | [project.urls]
23 | homepage = "https://www.nauticalcharts.noaa.gov/data/bluetopo.html"
24 | source = "https://github.com/noaa-ocs-hydrography/BlueTopo"
--------------------------------------------------------------------------------
/nbs/bluetopo/cli/cli.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | from argparse import ArgumentParser
3 |
4 | from nbs.bluetopo.core.build_vrt import main as build_vrt
5 | from nbs.bluetopo.core.fetch_tiles import main as fetch_tiles
6 |
7 |
8 | def str_to_bool(relative_to_vrt):
9 | if isinstance(relative_to_vrt, bool):
10 | return relative_to_vrt
11 | if relative_to_vrt.lower() in ("yes", "true", "t", "y", "1"):
12 | return True
13 | elif relative_to_vrt.lower() in ("no", "false", "f", "n", "0"):
14 | return False
15 | else:
16 | raise argparse.ArgumentTypeError("Boolean value expected.")
17 |
18 |
19 | def build_vrt_command():
20 | """
21 | console_scripts entry point for build_vrt cli command
22 |
23 | """
24 | parser = ArgumentParser()
25 | parser.add_argument(
26 | "-d",
27 | "--dir",
28 | "--directory",
29 | help="The directory path to use. " "Will create if it does not currently exist. Required argument.",
30 | type=str,
31 | nargs="?",
32 | dest="dir",
33 | required=True,
34 | )
35 | parser.add_argument(
36 | "-s",
37 | "--source",
38 | help=("The NBS offers various products to different end-users. " "Some are available publicly. Use this argument to identify " "the data source. BlueTopo is the default."),
39 | default="bluetopo",
40 | dest="source",
41 | nargs="?",
42 | )
43 | parser.add_argument(
44 | "-r",
45 | "--rel",
46 | "--relative_to_vrt",
47 | help=("This bool argument will determine whether files referenced in the VRT " "are relative or absolute. The default value is true setting all paths " "inside the VRT to relative."),
48 | nargs="?",
49 | dest="relative_to_vrt",
50 | default="true",
51 | const=True,
52 | type=str_to_bool,
53 | )
54 | args = parser.parse_args()
55 | build_vrt(
56 | project_dir=args.dir,
57 | data_source=args.source,
58 | relative_to_vrt=args.relative_to_vrt,
59 | )
60 |
61 |
62 | def fetch_tiles_command():
63 | """
64 | console_scripts entry point for fetch_tiles cli command
65 |
66 | """
67 | parser = ArgumentParser()
68 | parser.add_argument(
69 | "-d",
70 | "--dir",
71 | "--directory",
72 | help="The directory path to use. " "Will create if it does not currently exist. Required argument.",
73 | type=str,
74 | nargs="?",
75 | dest="dir",
76 | required=True,
77 | )
78 | parser.add_argument(
79 | "-g",
80 | "--geom",
81 | "--geometry",
82 | help=("The geometry file to use to find intersecting available tiles. " "The returned tile ids at the time of intersection will be added to " "tracking. fetch_tiles will stay up to date with the latest data " "available from the NBS for all tracked tiles. This argument is " "not necessary if you do not want to add new tile ids to tracking."),
83 | type=str,
84 | dest="geom",
85 | nargs="?",
86 | )
87 | parser.add_argument(
88 | "-s",
89 | "--source",
90 | help=("The NBS offers various products to different end-users. " "Some are available publicly. Use this argument to identify " "the data source. BlueTopo is the default."),
91 | default="bluetopo",
92 | dest="source",
93 | nargs="?",
94 | )
95 | parser.add_argument(
96 | "-u",
97 | "--untrack",
98 | help=("This flag will untrack tiles that have missing files in your local " "download directory. fetch_tiles will no longer retrieve these tiles."),
99 | dest="untrack",
100 | action="store_true",
101 | )
102 | args = parser.parse_args()
103 | fetch_tiles(
104 | project_dir=args.dir,
105 | desired_area_filename=args.geom,
106 | untrack_missing=args.untrack,
107 | data_source=args.source,
108 | )
109 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://www.nauticalcharts.noaa.gov/data/bluetopo.html)
2 |
3 | ---
4 |
5 |
6 | Background •
7 | Requirements •
8 | Installation •
9 | Quickstart •
10 | CLI •
11 | Notes •
12 | Contact
13 |
14 |
15 | ## Overview
16 |
17 | This package simplifies getting BlueTopo data in your area of interest.
18 |
19 | ## Background
20 |
21 | [BlueTopo](https://www.nauticalcharts.noaa.gov/data/bluetopo.html) is a compilation of the best available public bathymetric data of U.S. waters.
22 |
23 | Created by [NOAA Office of Coast Survey's](https://www.nauticalcharts.noaa.gov/) National Bathymetric Source project, [BlueTopo data](https://www.nauticalcharts.noaa.gov/data/bluetopo_specs.html) intends to provide depth information nationwide with the vertical uncertainty tied to that depth estimate as well as information on the survey source that it originated from.
24 |
25 | This data is presented in a multiband high resolution GeoTIFF with an associated raster attribute table.
26 |
27 | For answers to frequently asked questions, visit the [FAQ](https://www.nauticalcharts.noaa.gov/data/bluetopo_faq.html).
28 |
29 | ## Requirements
30 |
31 | This codebase is written for Python 3 and relies on the following python
32 | packages:
33 |
34 | - gdal / ogr
35 | - numpy
36 | - boto3
37 | - tqdm
38 |
39 | ## Installation
40 |
41 | Install conda (If you have not already): [conda installation](https://docs.conda.io/projects/conda/en/latest/user-guide/install/)
42 |
43 | In the command line, create an environment with the required packages:
44 |
45 | ```
46 | conda create -n bluetopo_env -c conda-forge 'gdal>=3.4'
47 | ```
48 |
49 | ```
50 | conda activate bluetopo_env
51 | ```
52 |
53 | ```
54 | pip install bluetopo
55 | ```
56 |
57 | ## Quickstart
58 |
59 | To download the desired files, first create a geometry file (such as a geopackage) with a polygon depicting the area of interest. Then run the following commands inside of a Python shell:
60 |
61 | ```python
62 | from nbs.bluetopo import fetch_tiles
63 | ```
64 |
65 | ```python
66 | fetch_tiles(r'C:\download_path', 'area_of_interest.gpkg')
67 | ```
68 |
69 | To build a GDAL VRT of the downloaded tiles:
70 |
71 | ```python
72 | from nbs.bluetopo import build_vrt
73 | ```
74 |
75 | ```python
76 | build_vrt(r'C:\download_path')
77 | ```
78 |
79 | ## CLI
80 |
81 | You can also use the command line. Confirm the environment we created during installation is activated.
82 |
83 | To fetch the latest BlueTopo data, use `fetch_tiles` passing a directory path and a geometry file path with a polygon depicting your area of interest:
84 |
85 | ```
86 | fetch_tiles -d [DIRECTORY PATH] -g [GEOMETRY FILE PATH]
87 | ```
88 |
89 | Pass the same directory path to `build_vrt` to create a VRT from the fetched data:
90 |
91 | ```
92 | build_vrt -d [DIRECTORY PATH]
93 | ```
94 |
95 | Use `-h` for help and to see additional arguments.
96 |
97 | For most usecases, reusing the commands above to stay up to date in your area of interest is adequate.
98 |
99 | ## Notes
100 |
101 | In addition to BlueTopo, modeling data is available. You can work with modeling data using the `source` argument in the CLI commands or the `data_source` argument if you're calling the function directly.
102 |
103 | The primary difference between BlueTopo and modeling data is the vertical datum. Modeling data is on a low water datum.
104 |
105 | ## Authors
106 |
107 | - Glen Rice (NOAA),
108 |
109 | - Tashi Geleg (Lynker / NOAA),
110 |
111 | ## License
112 |
113 | This work, as a whole, falls under Creative Commons Zero (see
114 | [LICENSE](LICENSE)).
115 |
116 | ## Disclaimer
117 |
118 | This repository is a scientific product and is not official
119 | communication of the National Oceanic and Atmospheric Administration, or
120 | the United States Department of Commerce. All NOAA GitHub project code
121 | is provided on an 'as is' basis and the user assumes responsibility for
122 | its use. Any claims against the Department of Commerce or Department of
123 | Commerce bureaus stemming from the use of this GitHub project will be
124 | governed by all applicable Federal law. Any reference to specific
125 | commercial products, processes, or services by service mark, trademark,
126 | manufacturer, or otherwise, does not constitute or imply their
127 | endorsement, recommendation or favoring by the Department of Commerce.
128 | The Department of Commerce seal and logo, or the seal and logo of a DOC
129 | bureau, shall not be used in any manner to imply endorsement of any
130 | commercial product or activity by DOC or the United States Government.
131 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | CC0 1.0 Universal
2 |
3 | Statement of Purpose
4 |
5 | The laws of most jurisdictions throughout the world automatically confer
6 | exclusive Copyright and Related Rights (defined below) upon the creator and
7 | subsequent owner(s) (each and all, an "owner") of an original work of
8 | authorship and/or a database (each, a "Work").
9 |
10 | Certain owners wish to permanently relinquish those rights to a Work for the
11 | purpose of contributing to a commons of creative, cultural and scientific
12 | works ("Commons") that the public can reliably and without fear of later
13 | claims of infringement build upon, modify, incorporate in other works, reuse
14 | and redistribute as freely as possible in any form whatsoever and for any
15 | purposes, including without limitation commercial purposes. These owners may
16 | contribute to the Commons to promote the ideal of a free culture and the
17 | further production of creative, cultural and scientific works, or to gain
18 | reputation or greater distribution for their Work in part through the use and
19 | efforts of others.
20 |
21 | For these and/or other purposes and motivations, and without any expectation
22 | of additional consideration or compensation, the person associating CC0 with a
23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
25 | and publicly distribute the Work under its terms, with knowledge of his or her
26 | Copyright and Related Rights in the Work and the meaning and intended legal
27 | effect of CC0 on those rights.
28 |
29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
30 | protected by copyright and related or neighboring rights ("Copyright and
31 | Related Rights"). Copyright and Related Rights include, but are not limited
32 | to, the following:
33 |
34 | i. the right to reproduce, adapt, distribute, perform, display, communicate,
35 | and translate a Work;
36 |
37 | ii. moral rights retained by the original author(s) and/or performer(s);
38 |
39 | iii. publicity and privacy rights pertaining to a person's image or likeness
40 | depicted in a Work;
41 |
42 | iv. rights protecting against unfair competition in regards to a Work,
43 | subject to the limitations in paragraph 4(a), below;
44 |
45 | v. rights protecting the extraction, dissemination, use and reuse of data in
46 | a Work;
47 |
48 | vi. database rights (such as those arising under Directive 96/9/EC of the
49 | European Parliament and of the Council of 11 March 1996 on the legal
50 | protection of databases, and under any national implementation thereof,
51 | including any amended or successor version of such directive); and
52 |
53 | vii. other similar, equivalent or corresponding rights throughout the world
54 | based on applicable law or treaty, and any national implementations thereof.
55 |
56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
59 | and Related Rights and associated claims and causes of action, whether now
60 | known or unknown (including existing as well as future claims and causes of
61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
62 | duration provided by applicable law or treaty (including future time
63 | extensions), (iii) in any current or future medium and for any number of
64 | copies, and (iv) for any purpose whatsoever, including without limitation
65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
66 | the Waiver for the benefit of each member of the public at large and to the
67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
68 | shall not be subject to revocation, rescission, cancellation, termination, or
69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
70 | by the public as contemplated by Affirmer's express Statement of Purpose.
71 |
72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
73 | judged legally invalid or ineffective under applicable law, then the Waiver
74 | shall be preserved to the maximum extent permitted taking into account
75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
77 | non transferable, non sublicensable, non exclusive, irrevocable and
78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
80 | provided by applicable law or treaty (including future time extensions), (iii)
81 | in any current or future medium and for any number of copies, and (iv) for any
82 | purpose whatsoever, including without limitation commercial, advertising or
83 | promotional purposes (the "License"). The License shall be deemed effective as
84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
85 | License for any reason be judged legally invalid or ineffective under
86 | applicable law, such partial invalidity or ineffectiveness shall not
87 | invalidate the remainder of the License, and in such case Affirmer hereby
88 | affirms that he or she will not (i) exercise any of his or her remaining
89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
90 | and causes of action with respect to the Work, in either case contrary to
91 | Affirmer's express Statement of Purpose.
92 |
93 | 4. Limitations and Disclaimers.
94 |
95 | a. No trademark or patent rights held by Affirmer are waived, abandoned,
96 | surrendered, licensed or otherwise affected by this document.
97 |
98 | b. Affirmer offers the Work as-is and makes no representations or warranties
99 | of any kind concerning the Work, express, implied, statutory or otherwise,
100 | including without limitation warranties of title, merchantability, fitness
101 | for a particular purpose, non infringement, or the absence of latent or
102 | other defects, accuracy, or the present or absence of errors, whether or not
103 | discoverable, all to the greatest extent permissible under applicable law.
104 |
105 | c. Affirmer disclaims responsibility for clearing rights of other persons
106 | that may apply to the Work or any use thereof, including without limitation
107 | any person's Copyright and Related Rights in the Work. Further, Affirmer
108 | disclaims responsibility for obtaining any necessary consents, permissions
109 | or other rights required for any use of the Work.
110 |
111 | d. Affirmer understands and acknowledges that Creative Commons is not a
112 | party to this document and has no duty or obligation with respect to this
113 | CC0 or use of the Work.
114 |
115 | For more information, please see
116 |
117 |
--------------------------------------------------------------------------------
/nbs/bluetopo/core/fetch_tiles.py:
--------------------------------------------------------------------------------
1 | """
2 | fetch_tiles.py
3 |
4 | 0.0.1 20220614
5 |
6 | glen.rice@noaa.gov 20220614
7 |
8 | An example script for downloading BlueTopo (and Modeling) datasets from AWS.
9 |
10 | """
11 |
12 | import concurrent.futures
13 | import datetime
14 | import hashlib
15 | import os
16 | import platform
17 | import random
18 | import shutil
19 | import sqlite3
20 | import sys
21 |
22 | import boto3
23 | import numpy as np
24 | from botocore import UNSIGNED
25 | from botocore.client import Config
26 | from osgeo import gdal, ogr, osr
27 | from tqdm import tqdm
28 |
29 | from nbs.bluetopo.core.build_vrt import connect_to_survey_registry, connect_to_survey_registry_pmn1, connect_to_survey_registry_pmn2
30 |
31 | debug_info = f"""
32 | Python {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}
33 | GDAL {gdal.VersionInfo()}
34 | SQLite {sqlite3.sqlite_version}
35 | Date {datetime.datetime.now()}
36 | """
37 |
38 | def adapt_datetime_iso(val):
39 | """Adapt datetime.datetime to timezone-naive ISO 8601 date."""
40 | return val.isoformat()
41 |
42 | sqlite3.register_adapter(datetime.datetime, adapt_datetime_iso)
43 |
44 | def convert_datetime(val):
45 | """Convert ISO 8601 datetime to datetime.datetime object."""
46 | return datetime.datetime.fromisoformat(val)
47 |
48 | sqlite3.register_converter("datetime", convert_datetime)
49 |
50 | # refactor duplicate functions
51 |
52 | def get_tessellation_pmn(
53 | conn: sqlite3.Connection,
54 | project_dir: str,
55 | prefix: str,
56 | data_source: str,
57 | bucket: str = "noaa-ocs-nationalbathymetry-pds",
58 | ) -> str:
59 | """
60 | Download the tessellation scheme geopackage from AWS.
61 |
62 | Parameters
63 | ----------
64 | conn : sqlite3.Connection
65 | database connection object.
66 | project_dir : str
67 | destination directory for project.
68 | prefix : str
69 | the prefix for the geopackage on AWS to find the file.
70 | data_source : str
71 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
72 | bucket : str
73 | AWS bucket for the National Bathymetric Source project.
74 |
75 | Returns
76 | -------
77 | destination_name : str
78 | the downloaded file path string.
79 | """
80 | cursor = conn.cursor()
81 | cursor.execute("SELECT * FROM catalog WHERE file = 'Tessellation'")
82 | for tilescheme in [dict(row) for row in cursor.fetchall()]:
83 | try:
84 | os.remove(os.path.join(project_dir, tilescheme["location"]))
85 | except (OSError, PermissionError):
86 | continue
87 | if data_source not in ["BlueTopo", "Modeling", "BAG", "S102V21", "S102V22"]:
88 | gpkg_files = os.listdir(prefix)
89 | gpkg_files = [file for file in gpkg_files if file.endswith(".gpkg") and "Tile_Scheme" in file]
90 | if len(gpkg_files) == 0:
91 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No geometry found in {prefix}")
92 | return None
93 | gpkg_files.sort(reverse=True)
94 | filename = gpkg_files[0]
95 | if len(gpkg_files) > 1:
96 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one geometry found in {prefix}, using {gpkg_files[0]}")
97 | destination_name = os.path.join(project_dir, data_source, f"Tessellation", gpkg_files[0])
98 | if not os.path.exists(os.path.dirname(destination_name)):
99 | os.makedirs(os.path.dirname(destination_name))
100 | try:
101 | shutil.copy(os.path.join(prefix, gpkg_files[0]), destination_name)
102 | relative = os.path.join(data_source, f"Tessellation", gpkg_files[0])
103 | except:
104 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download tile scheme " "possibly due to conflict with an open existing file. " "Please close all files and attempt again")
105 | sys.exit(1)
106 | else:
107 | cred = {
108 | "aws_access_key_id": "",
109 | "aws_secret_access_key": "",
110 | "config": Config(signature_version=UNSIGNED),
111 | }
112 | client = boto3.client("s3", **cred)
113 | pageinator = client.get_paginator("list_objects_v2")
114 | objs = pageinator.paginate(Bucket=bucket, Prefix=prefix).build_full_result()
115 | if "Contents" not in objs:
116 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No geometry found in {prefix}")
117 | return None
118 | tileschemes = objs["Contents"]
119 | tileschemes.sort(key=lambda x: x["LastModified"], reverse=True)
120 | source_name = tileschemes[0]["Key"]
121 | filename = os.path.basename(source_name)
122 | relative = os.path.join(data_source, f"Tessellation", filename)
123 | if len(tileschemes) > 1:
124 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one geometry found in {prefix}, using {filename}")
125 | destination_name = os.path.join(project_dir, relative)
126 | if not os.path.exists(os.path.dirname(destination_name)):
127 | os.makedirs(os.path.dirname(destination_name))
128 | try:
129 | client.download_file(bucket, source_name, destination_name)
130 | except (OSError, PermissionError) as e:
131 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download tile scheme " "possibly due to conflict with an open existing file. " "Please close all files and attempt again")
132 | sys.exit(1)
133 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Downloaded {filename}")
134 | cursor.execute(
135 | """REPLACE INTO catalog(file, location, downloaded)
136 | VALUES(?, ?, ?)""",
137 | ("Tessellation", relative, datetime.datetime.now()),
138 | )
139 | conn.commit()
140 | return destination_name
141 |
142 |
143 | def get_tessellation(
144 | conn: sqlite3.Connection,
145 | project_dir: str,
146 | prefix: str,
147 | data_source: str,
148 | bucket: str = "noaa-ocs-nationalbathymetry-pds",
149 | ) -> str:
150 | """
151 | Download the tessellation scheme geopackage from AWS.
152 |
153 | Parameters
154 | ----------
155 | conn : sqlite3.Connection
156 | database connection object.
157 | project_dir : str
158 | destination directory for project.
159 | prefix : str
160 | the prefix for the geopackage on AWS to find the file.
161 | data_source : str
162 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
163 | bucket : str
164 | AWS bucket for the National Bathymetric Source project.
165 |
166 | Returns
167 | -------
168 | destination_name : str
169 | the downloaded file path string.
170 | """
171 | cursor = conn.cursor()
172 | cursor.execute("SELECT * FROM tileset WHERE tilescheme = 'Tessellation'")
173 | for tilescheme in [dict(row) for row in cursor.fetchall()]:
174 | try:
175 | os.remove(os.path.join(project_dir, tilescheme["location"]))
176 | except (OSError, PermissionError):
177 | continue
178 | if data_source not in ["BlueTopo", "Modeling", "BAG", "S102V21", "S102V22"]:
179 | gpkg_files = os.listdir(prefix)
180 | gpkg_files = [file for file in gpkg_files if file.endswith(".gpkg") and "Tile_Scheme" in file]
181 | if len(gpkg_files) == 0:
182 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No geometry found in {prefix}")
183 | return None
184 | gpkg_files.sort(reverse=True)
185 | filename = gpkg_files[0]
186 | if len(gpkg_files) > 1:
187 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one geometry found in {prefix}, using {gpkg_files[0]}")
188 | destination_name = os.path.join(project_dir, data_source, f"Tessellation", gpkg_files[0])
189 | if not os.path.exists(os.path.dirname(destination_name)):
190 | os.makedirs(os.path.dirname(destination_name))
191 | try:
192 | shutil.copy(os.path.join(prefix, gpkg_files[0]), destination_name)
193 | relative = os.path.join(data_source, f"Tessellation", gpkg_files[0])
194 | except:
195 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download tile scheme " "possibly due to conflict with an open existing file. " "Please close all files and attempt again")
196 | sys.exit(1)
197 | else:
198 | cred = {
199 | "aws_access_key_id": "",
200 | "aws_secret_access_key": "",
201 | "config": Config(signature_version=UNSIGNED),
202 | }
203 | client = boto3.client("s3", **cred)
204 | pageinator = client.get_paginator("list_objects_v2")
205 | objs = pageinator.paginate(Bucket=bucket, Prefix=prefix).build_full_result()
206 | if "Contents" not in objs:
207 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No geometry found in {prefix}")
208 | return None
209 | tileschemes = objs["Contents"]
210 | tileschemes.sort(key=lambda x: x["LastModified"], reverse=True)
211 | source_name = tileschemes[0]["Key"]
212 | filename = os.path.basename(source_name)
213 | relative = os.path.join(data_source, f"Tessellation", filename)
214 | if len(tileschemes) > 1:
215 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one geometry found in {prefix}, using {filename}")
216 | destination_name = os.path.join(project_dir, relative)
217 | if not os.path.exists(os.path.dirname(destination_name)):
218 | os.makedirs(os.path.dirname(destination_name))
219 | try:
220 | client.download_file(bucket, source_name, destination_name)
221 | except (OSError, PermissionError) as e:
222 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download tile scheme " "possibly due to conflict with an open existing file. " "Please close all files and attempt again")
223 | sys.exit(1)
224 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Downloaded {filename}")
225 | cursor.execute(
226 | """REPLACE INTO tileset(tilescheme, location, downloaded)
227 | VALUES(?, ?, ?)""",
228 | ("Tessellation", relative, datetime.datetime.now()),
229 | )
230 | conn.commit()
231 | return destination_name
232 |
233 | # refactor later
234 | def get_xml(
235 | conn: sqlite3.Connection,
236 | project_dir: str,
237 | prefix: str,
238 | data_source: str,
239 | bucket: str = "noaa-ocs-nationalbathymetry-pds",
240 | ) -> str:
241 | """
242 | Download XML from AWS.
243 |
244 | Parameters
245 | ----------
246 | conn : sqlite3.Connection
247 | database connection object.
248 | project_dir : str
249 | destination directory for project.
250 | prefix : str
251 | the prefix for the XML on AWS to find the file.
252 | data_source : str
253 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
254 | bucket : str
255 | AWS bucket for the National Bathymetric Source project.
256 |
257 | Returns
258 | -------
259 | destination_name : str
260 | the downloaded file path string.
261 | """
262 | cursor = conn.cursor()
263 | cursor.execute("SELECT * FROM catalog WHERE file = 'XML'")
264 | for tilescheme in [dict(row) for row in cursor.fetchall()]:
265 | try:
266 | if os.path.isfile(os.path.join(project_dir, tilescheme["location"])):
267 | os.remove(os.path.join(project_dir, tilescheme["location"]))
268 | except (OSError, PermissionError):
269 | continue
270 | if data_source in ["S102V21", "S102V22"]:
271 | cred = {
272 | "aws_access_key_id": "",
273 | "aws_secret_access_key": "",
274 | "config": Config(signature_version=UNSIGNED),
275 | }
276 | client = boto3.client("s3", **cred)
277 | pageinator = client.get_paginator("list_objects_v2")
278 | objs = pageinator.paginate(Bucket=bucket, Prefix=prefix).build_full_result()
279 | if "Contents" not in objs:
280 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: No XML found in {prefix}")
281 | return None
282 | tileschemes = objs["Contents"]
283 | tileschemes.sort(key=lambda x: x["LastModified"], reverse=True)
284 | source_name = tileschemes[0]["Key"]
285 | filename = os.path.basename(source_name)
286 | relative = os.path.join(data_source, f"Data", filename)
287 | if len(tileschemes) > 1:
288 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: More than one XML found in {prefix}, using {filename}")
289 | destination_name = os.path.join(project_dir, relative)
290 | filename_renamed = 'CATALOG.XML'
291 | relative_renamed = os.path.join(data_source, f"Data", filename_renamed)
292 | destination_name_renamed = os.path.join(project_dir, relative_renamed)
293 | if not os.path.exists(os.path.dirname(destination_name)):
294 | os.makedirs(os.path.dirname(destination_name))
295 | try:
296 | client.download_file(bucket, source_name, destination_name)
297 | except (OSError, PermissionError) as e:
298 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to download XML " "possibly due to conflict with an open existing file. " "Please close all files and attempt again")
299 | sys.exit(1)
300 | try:
301 | os.replace(destination_name, destination_name_renamed)
302 | except (OSError, PermissionError) as e:
303 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: " "Failed to rename XML to CATALOG.xml." "possibly due to conflict with an open existing file named CATALOG.XML. " "Please close all files and attempt again")
304 | sys.exit(1)
305 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Downloaded {filename_renamed}")
306 | cursor.execute(
307 | """REPLACE INTO catalog(file, location, downloaded)
308 | VALUES(?, ?, ?)""",
309 | ("XML", relative, datetime.datetime.now()),
310 | )
311 | conn.commit()
312 | return destination_name_renamed
313 |
314 |
315 | def download_tiles_pmn(
316 | conn: sqlite3.Connection,
317 | project_dir: str,
318 | tile_prefix: str,
319 | data_source: str,
320 | bucket: str = "noaa-ocs-nationalbathymetry-pds",
321 | ) -> [[str], [str], [str]]:
322 | """
323 | Download tiles' files (geotiff and aux per tile).
324 |
325 | Parameters
326 | ----------
327 | conn : sqlite3.Connection
328 | database connection object.
329 | project_dir : str
330 | destination directory for project.
331 | tile_prefix : str
332 | s3 prefix for tiles.
333 | data_source : str
334 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
335 | bucket : str
336 | AWS bucket for the National Bathymetric Source project.
337 |
338 | Returns
339 | -------
340 | existing_tiles : list
341 | tiles already existing locally.
342 | tiles_found : list
343 | tiles found in s3 bucket.
344 | tiles_not_found : list
345 | tiles not found in s3 bucket.
346 | """
347 | download_tile_list = all_db_tiles(conn)
348 | # better tqdm download time estimate?
349 | random.shuffle(download_tile_list)
350 | new_tile_list = [download_tile for download_tile in download_tile_list if download_tile["file_disk"] is None]
351 | print("\nResolving fetch list...")
352 | if tile_prefix != "Local":
353 | cred = {
354 | "aws_access_key_id": "",
355 | "aws_secret_access_key": "",
356 | "config": Config(signature_version=UNSIGNED),
357 | }
358 | client = boto3.client("s3", **cred)
359 | pageinator = client.get_paginator("list_objects_v2")
360 | existing_tiles = []
361 | missing_tiles = []
362 | tiles_found = []
363 | tiles_not_found = []
364 | download_dict = {}
365 | for fields in download_tile_list:
366 | if fields["file_disk"]:
367 | if os.path.isfile(os.path.join(project_dir, fields["file_disk"])):
368 | if fields["file_verified"] != "True":
369 | missing_tiles.append(fields["tilename"])
370 | else:
371 | existing_tiles.append(fields["tilename"])
372 | continue
373 | if os.path.isfile(os.path.join(project_dir, fields["file_disk"])) is False:
374 | missing_tiles.append(fields["tilename"])
375 | if 'Navigation_Test_and_Evaluation' in tile_prefix:
376 | tilename = fields["tilename"]
377 | if fields["file_link"] and fields["file_link"] != "None":
378 | found = False
379 | for obj in client.list_objects(Bucket='noaa-ocs-nationalbathymetry-pds', Prefix=fields['file_link'].split('amazonaws.com/')[1])['Contents']:
380 | if os.path.basename(fields["file_link"])[7:13] in obj['Key']:
381 | download_dict[tilename] = {
382 | "tile": tilename,
383 | "bucket": bucket,
384 | "client": client,
385 | "subregion": fields["subregion"],
386 | "utm": fields["utm"],
387 | }
388 | source_name = obj["Key"]
389 | download_dict[tilename]["file"] = source_name
390 | download_dict[tilename]["file_disk"] = os.path.join(data_source, "Data", os.path.basename(fields["file_link"]))
391 | download_dict[tilename]["file_dest"] = os.path.join(project_dir, download_dict[tilename]["file_disk"])
392 | download_dict[tilename]["file_verified"] = fields["file_verified"]
393 | download_dict[tilename]["file_sha256_checksum"] = fields["file_sha256_checksum"]
394 | if not os.path.exists(os.path.dirname(download_dict[tilename]["file_dest"])):
395 | os.makedirs(os.path.dirname(download_dict[tilename]["file_dest"]))
396 | found = True
397 | tiles_found.append(tilename)
398 | break
399 | if found is False:
400 | tiles_not_found.append(tilename)
401 | else:
402 | raise ValueError(f"Invalid tile prefix: {tile_prefix}")
403 |
404 | def pull(downloads: dict) -> dict:
405 | """
406 | Download files and verify hash.
407 |
408 | Parameters
409 | ----------
410 | downloads : dict
411 | dict holding necessary values to execute download and checksum verification.
412 |
413 | Returns
414 | -------
415 | dict
416 | result of download attempt.
417 | """
418 | try:
419 | downloads["client"].download_file(downloads["bucket"], downloads["file"], downloads["file_dest"])
420 | if os.path.isfile(downloads["file_dest"]) is False:
421 | return {"Tile": downloads["tile"], "Result": False, "Reason": "missing download"}
422 | file_hash = hashlib.sha256(open(downloads["file_dest"], "rb").read()).hexdigest()
423 | if downloads["file_sha256_checksum"] != file_hash:
424 | return {"Tile": downloads["tile"], "Result": False, "Reason": "incorrect hash"}
425 | except Exception as e:
426 | return {"Tile": downloads["tile"], "Result": False, "Reason": "exception"}
427 | return {"Tile": downloads["tile"], "Result": True, "Reason": "success"}
428 |
429 | print(f"{len(new_tile_list)} tile(s) with new data")
430 | print(f"{len(missing_tiles)} tile(s) already downloaded are missing locally")
431 | download_length = len(download_dict.keys())
432 | results = []
433 | if download_length:
434 | print(f"\nFetching {download_length} tiles")
435 | with tqdm(
436 | total=download_length,
437 | bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} Tiles {elapsed}, {remaining} Est. Time Remaining" "{postfix}",
438 | desc=f"{data_source} Fetch",
439 | colour="#0085CA",
440 | position=0,
441 | leave=True,
442 | ) as progress:
443 | with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count() - 1) as executor:
444 | for i in executor.map(pull, download_dict.values()):
445 | results.append(i)
446 | progress.update(1)
447 | successful_downloads = [download["Tile"] for download in results if download["Result"] == True]
448 | failed_downloads = [download["Tile"] for download in results if download["Result"] == False]
449 | failed_verifications = [download["Tile"] for download in results if (download["Result"] == False and download["Reason"] == "incorrect hash")]
450 |
451 | if len(successful_downloads) > 0:
452 | if data_source.lower() == "s102v22":
453 | update_records_pmn2(conn, download_dict, successful_downloads)
454 | else:
455 | update_records_pmn1(conn, download_dict, successful_downloads)
456 |
457 | return (
458 | list(set(tiles_found)),
459 | list(set(tiles_not_found)),
460 | successful_downloads,
461 | failed_downloads,
462 | existing_tiles,
463 | missing_tiles,
464 | failed_verifications,
465 | new_tile_list,
466 | )
467 |
468 |
469 | def download_tiles(
470 | conn: sqlite3.Connection,
471 | project_dir: str,
472 | tile_prefix: str,
473 | data_source: str,
474 | bucket: str = "noaa-ocs-nationalbathymetry-pds",
475 | ) -> [[str], [str], [str]]:
476 | """
477 | Download tiles' files (geotiff and aux per tile).
478 |
479 | Parameters
480 | ----------
481 | conn : sqlite3.Connection
482 | database connection object.
483 | project_dir : str
484 | destination directory for project.
485 | tile_prefix : str
486 | s3 prefix for tiles.
487 | data_source : str
488 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
489 | bucket : str
490 | AWS bucket for the National Bathymetric Source project.
491 |
492 | Returns
493 | -------
494 | existing_tiles : list
495 | tiles already existing locally.
496 | tiles_found : list
497 | tiles found in s3 bucket.
498 | tiles_not_found : list
499 | tiles not found in s3 bucket.
500 | """
501 | download_tile_list = all_db_tiles(conn)
502 | # better tqdm download time estimate?
503 | random.shuffle(download_tile_list)
504 | new_tile_list = [download_tile for download_tile in download_tile_list if download_tile["geotiff_disk"] is None or download_tile["rat_disk"] is None]
505 | print("\nResolving fetch list...")
506 | if tile_prefix != "Local":
507 | cred = {
508 | "aws_access_key_id": "",
509 | "aws_secret_access_key": "",
510 | "config": Config(signature_version=UNSIGNED),
511 | }
512 | client = boto3.client("s3", **cred)
513 | pageinator = client.get_paginator("list_objects_v2")
514 | existing_tiles = []
515 | missing_tiles = []
516 | tiles_found = []
517 | tiles_not_found = []
518 | download_dict = {}
519 | for fields in download_tile_list:
520 | if fields["geotiff_disk"] and fields["rat_disk"]:
521 | if os.path.isfile(os.path.join(project_dir, fields["geotiff_disk"])) and os.path.isfile(os.path.join(project_dir, fields["rat_disk"])):
522 | if fields["geotiff_verified"] != "True" or fields["rat_verified"] != "True":
523 | missing_tiles.append(fields["tilename"])
524 | else:
525 | existing_tiles.append(fields["tilename"])
526 | continue
527 | if os.path.isfile(os.path.join(project_dir, fields["geotiff_disk"])) is False or os.path.isfile(os.path.join(project_dir, fields["rat_disk"])) is False:
528 | missing_tiles.append(fields["tilename"])
529 | if "BlueTopo" in tile_prefix or "Modeling" in tile_prefix:
530 | tilename = fields["tilename"]
531 | pfx = tile_prefix + f"/{tilename}/"
532 | objs = pageinator.paginate(Bucket=bucket, Prefix=pfx).build_full_result()
533 | if len(objs) > 0:
534 | download_dict[tilename] = {
535 | "tile": tilename,
536 | "bucket": bucket,
537 | "client": client,
538 | "subregion": fields["subregion"],
539 | "utm": fields["utm"],
540 | }
541 | for object_name in objs["Contents"]:
542 | source_name = object_name["Key"]
543 | relative = os.path.join(data_source, f"UTM{fields['utm']}", os.path.basename(source_name))
544 | destination_name = os.path.join(project_dir, relative)
545 | if not os.path.exists(os.path.dirname(destination_name)):
546 | os.makedirs(os.path.dirname(destination_name))
547 | if ".aux" in source_name.lower():
548 | download_dict[tilename]["rat"] = source_name
549 | download_dict[tilename]["rat_dest"] = destination_name
550 | download_dict[tilename]["rat_verified"] = fields["rat_verified"]
551 | download_dict[tilename]["rat_disk"] = relative
552 | download_dict[tilename]["rat_sha256_checksum"] = fields["rat_sha256_checksum"]
553 | else:
554 | download_dict[tilename]["geotiff"] = source_name
555 | download_dict[tilename]["geotiff_dest"] = destination_name
556 | download_dict[tilename]["geotiff_verified"] = fields["geotiff_verified"]
557 | download_dict[tilename]["geotiff_disk"] = relative
558 | download_dict[tilename]["geotiff_sha256_checksum"] = fields["geotiff_sha256_checksum"]
559 | tiles_found.append(tilename)
560 | else:
561 | tiles_not_found.append(tilename)
562 | # refactor later
563 | elif tile_prefix == "Local":
564 | tilename = fields["tilename"]
565 | if fields["geotiff_link"] and fields["rat_link"]:
566 | download_dict[tilename] = {
567 | "tile": tilename,
568 | "subregion": fields["subregion"],
569 | "client": "Local",
570 | "utm": fields["utm"],
571 | }
572 | download_dict[tilename]["rat"] = fields["rat_link"]
573 | download_dict[tilename]["rat_disk"] = os.path.join(data_source, f"UTM{fields['utm']}", os.path.basename(fields["rat_link"]))
574 | download_dict[tilename]["rat_dest"] = os.path.join(project_dir, download_dict[tilename]["rat_disk"])
575 | download_dict[tilename]["rat_verified"] = fields["rat_verified"]
576 | download_dict[tilename]["rat_sha256_checksum"] = fields["rat_sha256_checksum"]
577 | download_dict[tilename]["geotiff"] = fields["geotiff_link"]
578 | download_dict[tilename]["geotiff_disk"] = os.path.join(data_source, f"UTM{fields['utm']}", os.path.basename(fields["geotiff_link"]))
579 | download_dict[tilename]["geotiff_dest"] = os.path.join(project_dir, download_dict[tilename]["geotiff_disk"])
580 | download_dict[tilename]["geotiff_verified"] = fields["geotiff_verified"]
581 | download_dict[tilename]["geotiff_sha256_checksum"] = fields["geotiff_sha256_checksum"]
582 | if not os.path.exists(os.path.dirname(download_dict[tilename]["geotiff_dest"])):
583 | os.makedirs(os.path.dirname(download_dict[tilename]["geotiff_dest"]))
584 | tiles_found.append(tilename)
585 | else:
586 | tiles_not_found.append(tilename)
587 | else:
588 | raise ValueError(f"Invalid tile prefix: {tile_prefix}")
589 |
590 | def pull(downloads: dict) -> dict:
591 | """
592 | Download files and verify hash.
593 |
594 | Parameters
595 | ----------
596 | downloads : dict
597 | dict holding necessary values to execute download and checksum verification.
598 |
599 | Returns
600 | -------
601 | dict
602 | result of download attempt.
603 | """
604 | try:
605 | if downloads["client"] == "Local":
606 | shutil.copy(downloads["geotiff"], downloads["geotiff_dest"])
607 | shutil.copy(downloads["rat"], downloads["rat_dest"])
608 | else:
609 | downloads["client"].download_file(downloads["bucket"], downloads["geotiff"], downloads["geotiff_dest"])
610 | downloads["client"].download_file(downloads["bucket"], downloads["rat"], downloads["rat_dest"])
611 | if os.path.isfile(downloads["geotiff_dest"]) is False or os.path.isfile(downloads["rat_dest"]) is False:
612 | return {"Tile": downloads["tile"], "Result": False, "Reason": "missing download"}
613 | geotiff_hash = hashlib.sha256(open(downloads["geotiff_dest"], "rb").read()).hexdigest()
614 | rat_hash = hashlib.sha256(open(downloads["rat_dest"], "rb").read()).hexdigest()
615 | if downloads["geotiff_sha256_checksum"] != geotiff_hash or downloads["rat_sha256_checksum"] != rat_hash:
616 | return {"Tile": downloads["tile"], "Result": False, "Reason": "incorrect hash"}
617 | except Exception as e:
618 | return {"Tile": downloads["tile"], "Result": False, "Reason": "exception"}
619 | return {"Tile": downloads["tile"], "Result": True, "Reason": "success"}
620 |
621 | print(f"{len(new_tile_list)} tile(s) with new data")
622 | print(f"{len(missing_tiles)} tile(s) already downloaded are missing locally")
623 | download_length = len(download_dict.keys())
624 | results = []
625 | if download_length:
626 | print(f"\nFetching {download_length} tiles")
627 | with tqdm(
628 | total=download_length,
629 | bar_format="{desc}: {percentage:3.0f}%|{bar}| {n_fmt}/{total_fmt} Tiles {elapsed}, {remaining} Est. Time Remaining" "{postfix}",
630 | desc=f"{data_source} Fetch",
631 | colour="#0085CA",
632 | position=0,
633 | leave=True,
634 | ) as progress:
635 | with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count() - 1) as executor:
636 | for i in executor.map(pull, download_dict.values()):
637 | results.append(i)
638 | progress.update(1)
639 | successful_downloads = [download["Tile"] for download in results if download["Result"] == True]
640 | failed_downloads = [download["Tile"] for download in results if download["Result"] == False]
641 | failed_verifications = [download["Tile"] for download in results if (download["Result"] == False and download["Reason"] == "incorrect hash")]
642 |
643 | if len(successful_downloads) > 0:
644 | update_records(conn, download_dict, successful_downloads)
645 |
646 | return (
647 | list(set(tiles_found)),
648 | list(set(tiles_not_found)),
649 | successful_downloads,
650 | failed_downloads,
651 | existing_tiles,
652 | missing_tiles,
653 | failed_verifications,
654 | new_tile_list,
655 | )
656 |
657 | def get_tile_list(desired_area_filename: str, tile_scheme_filename: str) -> [str]:
658 | """
659 | Get the list of tiles inside the given polygon(s).
660 |
661 | Parameters
662 | ----------
663 | desired_area_filename : str
664 | a gdal compatible file path denoting geometries that reflect the region
665 | of interest.
666 | tile_scheme_filename : str
667 | a gdal compatible file path denoting geometries that reflect the
668 | tessellation scheme with addressing information for the desired tiles.
669 |
670 | Returns
671 | -------
672 | feature_list : str
673 | list of tiles intersecting with the provided polygon(s).
674 | """
675 | data_source = ogr.Open(desired_area_filename)
676 | if data_source is None:
677 | print("Unable to open desired area file")
678 | return None
679 | source = ogr.Open(tile_scheme_filename)
680 | if source is None:
681 | print("Unable to open tile scheme file")
682 | return None
683 | driver = ogr.GetDriverByName("MEMORY")
684 | intersect = driver.CreateDataSource("memData")
685 | intersect_lyr = intersect.CreateLayer("mem", geom_type=ogr.wkbPolygon)
686 | source_layer = source.GetLayer(0)
687 | source_crs = source_layer.GetSpatialRef()
688 | num_target_layers = data_source.GetLayerCount()
689 | feature_list = []
690 | for layer_num in range(num_target_layers):
691 | target_layer = data_source.GetLayer(layer_num)
692 | target_crs = target_layer.GetSpatialRef()
693 | same_crs = target_crs.IsSame(source_crs)
694 | if not same_crs:
695 | transformed_input = transform_layer(target_layer, source_crs)
696 | target_layer = transformed_input.GetLayer(0)
697 | target_layer.Intersection(source_layer, intersect_lyr)
698 | if not same_crs:
699 | transformed_input = None
700 | lyr_defn = intersect_lyr.GetLayerDefn()
701 | for feature in intersect_lyr:
702 | fields = {}
703 | for idx in range(lyr_defn.GetFieldCount()):
704 | fields[lyr_defn.GetFieldDefn(idx).name] = feature.GetField(idx)
705 | feature_list.append(fields)
706 | return feature_list
707 |
708 |
709 | def transform_layer(input_layer: ogr.Layer, desired_crs: osr.SpatialReference) -> ogr.DataSource:
710 | """
711 | Transform a provided ogr layer to the provided coordinate reference system.
712 |
713 | Parameters
714 | ----------
715 | input_layer : ogr.Layer
716 | the ogr layer to be transformed.
717 | desired_crs : osr.SpatialReference
718 | the coordinate system for the transform.
719 |
720 | Returns
721 | -------
722 | out_ds : ogr.DataSource
723 | transformed ogr memory datasource.
724 | """
725 | target_crs = input_layer.GetSpatialRef()
726 | coord_trans = osr.CoordinateTransformation(target_crs, desired_crs)
727 | driver = ogr.GetDriverByName("MEMORY")
728 | out_ds = driver.CreateDataSource("memData")
729 | out_lyr = out_ds.CreateLayer("out_lyr", geom_type=input_layer.GetGeomType())
730 | out_defn = out_lyr.GetLayerDefn()
731 | in_feature = input_layer.GetNextFeature()
732 | while in_feature:
733 | geom = in_feature.GetGeometryRef()
734 | geom.Transform(coord_trans)
735 | out_feature = ogr.Feature(out_defn)
736 | out_feature.SetGeometry(geom)
737 | out_lyr.CreateFeature(out_feature)
738 | out_feature = None
739 | in_feature = input_layer.GetNextFeature()
740 | return out_ds
741 |
742 |
743 | def update_records_pmn1(conn: sqlite3.Connection, download_dict: dict, successful_downloads: list) -> None:
744 | """
745 | Update tile record and associated tables in SQLite database.
746 |
747 | Parameters
748 | ----------
749 | conn : sqlite3.Connection
750 | database connection object.
751 | download_dict : dict
752 | relevant fields per tile
753 | successful_downloads : list
754 | list of tilenames successfully downloaded
755 | """
756 | # TODO refactor more sensibly
757 | tiles_records = []
758 | subregion_records = []
759 | utm_records = []
760 | for tilename in download_dict:
761 | if tilename in successful_downloads:
762 | tiles_records.append((download_dict[tilename]["file_disk"], "True", tilename))
763 | subregion_records.append(
764 | (
765 | download_dict[tilename]["subregion"],
766 | download_dict[tilename]["utm"],
767 | None,
768 | None,
769 | None,
770 | None,
771 | None,
772 | None,
773 | None,
774 | None,
775 | 0,
776 | )
777 | )
778 | utm_records.append((download_dict[tilename]["utm"], None, None, 0))
779 | if len(tiles_records) == 0:
780 | return
781 | cursor = conn.cursor()
782 | cursor.execute("BEGIN TRANSACTION;")
783 | cursor.executemany(
784 | """
785 | UPDATE tiles
786 | SET file_disk = ?,
787 | file_verified = ?
788 | WHERE tilename = ?
789 | """,
790 | tiles_records,
791 | )
792 | cursor.executemany(
793 | """
794 | INSERT INTO vrt_subregion(region, utm, res_2_vrt,
795 | res_2_ovr, res_4_vrt, res_4_ovr, res_8_vrt, res_8_ovr,
796 | complete_vrt, complete_ovr, built)
797 | VALUES(?, ?, ?, ?, ? ,? , ?, ? ,? ,? ,?)
798 | ON CONFLICT(region) DO UPDATE
799 | SET utm = EXCLUDED.utm,
800 | res_2_vrt = EXCLUDED.res_2_vrt,
801 | res_2_ovr = EXCLUDED.res_2_ovr,
802 | res_4_vrt = EXCLUDED.res_4_vrt,
803 | res_4_ovr = EXCLUDED.res_4_ovr,
804 | res_8_vrt = EXCLUDED.res_8_vrt,
805 | res_8_ovr = EXCLUDED.res_8_ovr,
806 | complete_vrt = EXCLUDED.complete_vrt,
807 | complete_ovr = EXCLUDED.complete_ovr,
808 | built = EXCLUDED.built
809 | """,
810 | subregion_records,
811 | )
812 | cursor.executemany(
813 | """
814 | INSERT INTO vrt_utm(utm, utm_vrt, utm_ovr, built)
815 | VALUES(?, ?, ?, ?)
816 | ON CONFLICT(utm) DO UPDATE
817 | SET utm_vrt = EXCLUDED.utm_vrt,
818 | utm_ovr = EXCLUDED.utm_ovr,
819 | built = EXCLUDED.built
820 | """,
821 | utm_records,
822 | )
823 | cursor.execute("COMMIT;")
824 | conn.commit()
825 |
826 |
827 | def update_records_pmn2(conn: sqlite3.Connection, download_dict: dict, successful_downloads: list) -> None:
828 | """
829 | Update tile record and associated tables in SQLite database.
830 |
831 | Parameters
832 | ----------
833 | conn : sqlite3.Connection
834 | database connection object.
835 | download_dict : dict
836 | relevant fields per tile
837 | successful_downloads : list
838 | list of tilenames successfully downloaded
839 | """
840 | # TODO refactor more sensibly
841 | tiles_records = []
842 | subregion_records = []
843 | utm_records = []
844 | for tilename in download_dict:
845 | if tilename in successful_downloads:
846 | tiles_records.append((download_dict[tilename]["file_disk"], "True", tilename))
847 | subregion_records.append(
848 | (
849 | download_dict[tilename]["subregion"],
850 | download_dict[tilename]["utm"],
851 | None,
852 | None,
853 | None,
854 | None,
855 | None,
856 | None,
857 | None,
858 | None,
859 | None,
860 | None,
861 | None,
862 | None,
863 | None,
864 | None,
865 | None,
866 | None,
867 | 0,
868 | 0
869 | )
870 | )
871 | utm_records.append((download_dict[tilename]["utm"], None, None, None, None, None, 0, 0, 0))
872 | if len(tiles_records) == 0:
873 | return
874 | cursor = conn.cursor()
875 | cursor.execute("BEGIN TRANSACTION;")
876 | cursor.executemany(
877 | """
878 | UPDATE tiles
879 | SET file_disk = ?,
880 | file_verified = ?
881 | WHERE tilename = ?
882 | """,
883 | tiles_records,
884 | )
885 | cursor.executemany(
886 | """
887 | INSERT INTO vrt_subregion(region, utm,
888 | res_2_subdataset1_vrt,
889 | res_2_subdataset1_ovr,
890 | res_2_subdataset2_vrt,
891 | res_2_subdataset2_ovr,
892 | res_4_subdataset1_vrt,
893 | res_4_subdataset1_ovr,
894 | res_4_subdataset2_vrt,
895 | res_4_subdataset2_ovr,
896 | res_8_subdataset1_vrt,
897 | res_8_subdataset1_ovr,
898 | res_8_subdataset2_vrt,
899 | res_8_subdataset2_ovr,
900 | complete_subdataset1_vrt,
901 | complete_subdataset1_ovr,
902 | complete_subdataset2_vrt,
903 | complete_subdataset2_ovr,
904 | built_subdataset1,
905 | built_subdataset2)
906 | VALUES(?, ?, ?, ?, ? ,? , ?, ? ,? ,? ,?, ?, ?, ?, ?, ? ,? , ?, ? ,?)
907 | ON CONFLICT(region) DO UPDATE
908 | SET utm = EXCLUDED.utm,
909 | res_2_subdataset1_vrt = EXCLUDED.res_2_subdataset1_vrt,
910 | res_2_subdataset1_ovr = EXCLUDED.res_2_subdataset1_ovr,
911 | res_2_subdataset2_vrt = EXCLUDED.res_2_subdataset2_vrt,
912 | res_2_subdataset2_ovr = EXCLUDED.res_2_subdataset2_ovr,
913 | res_4_subdataset1_vrt = EXCLUDED.res_4_subdataset1_vrt,
914 | res_4_subdataset1_ovr = EXCLUDED.res_4_subdataset1_ovr,
915 | res_4_subdataset2_vrt = EXCLUDED.res_4_subdataset2_vrt,
916 | res_4_subdataset2_ovr = EXCLUDED.res_4_subdataset2_ovr,
917 |
918 | res_8_subdataset1_vrt = EXCLUDED.res_8_subdataset1_vrt,
919 | res_8_subdataset1_ovr = EXCLUDED.res_8_subdataset1_ovr,
920 | res_8_subdataset2_vrt = EXCLUDED.res_8_subdataset2_vrt,
921 | res_8_subdataset2_ovr = EXCLUDED.res_8_subdataset2_ovr,
922 |
923 | complete_subdataset1_vrt = EXCLUDED.complete_subdataset1_vrt,
924 | complete_subdataset1_ovr = EXCLUDED.complete_subdataset1_ovr,
925 |
926 | complete_subdataset2_vrt = EXCLUDED.complete_subdataset2_vrt,
927 | complete_subdataset2_ovr = EXCLUDED.complete_subdataset2_ovr,
928 |
929 | built_subdataset1 = EXCLUDED.built_subdataset1,
930 | built_subdataset2 = EXCLUDED.built_subdataset2
931 | """,
932 | subregion_records,
933 | )
934 | cursor.executemany(
935 | """
936 | INSERT INTO vrt_utm(utm,
937 | utm_subdataset1_vrt, utm_subdataset1_ovr,
938 | utm_subdataset2_vrt, utm_subdataset2_ovr,
939 | utm_combined_vrt,
940 | built_subdataset1,
941 | built_subdataset2,
942 | built_combined)
943 | VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)
944 | ON CONFLICT(utm) DO UPDATE
945 | SET utm_subdataset1_vrt = EXCLUDED.utm_subdataset1_vrt,
946 | utm_subdataset1_ovr = EXCLUDED.utm_subdataset1_ovr,
947 | utm_subdataset2_vrt = EXCLUDED.utm_subdataset2_vrt,
948 | utm_subdataset2_ovr = EXCLUDED.utm_subdataset2_ovr,
949 | utm_combined_vrt = EXCLUDED.utm_combined_vrt,
950 | built_subdataset1 = EXCLUDED.built_subdataset1,
951 | built_subdataset2 = EXCLUDED.built_subdataset2,
952 | built_combined = EXCLUDED.built_combined
953 | """,
954 | utm_records,
955 | )
956 | cursor.execute("COMMIT;")
957 | conn.commit()
958 |
959 |
960 | def update_records(conn: sqlite3.Connection, download_dict: dict, successful_downloads: list) -> None:
961 | """
962 | Update tile record and associated tables in SQLite database.
963 |
964 | Parameters
965 | ----------
966 | conn : sqlite3.Connection
967 | database connection object.
968 | download_dict : dict
969 | relevant fields per tile
970 | successful_downloads : list
971 | list of tilenames successfully downloaded
972 | """
973 | # TODO refactor more sensibly
974 | tiles_records = []
975 | subregion_records = []
976 | utm_records = []
977 | for tilename in download_dict:
978 | if tilename in successful_downloads:
979 | tiles_records.append((download_dict[tilename]["geotiff_disk"], download_dict[tilename]["rat_disk"], "True", "True", tilename))
980 | subregion_records.append(
981 | (
982 | download_dict[tilename]["subregion"],
983 | download_dict[tilename]["utm"],
984 | None,
985 | None,
986 | None,
987 | None,
988 | None,
989 | None,
990 | None,
991 | None,
992 | 0,
993 | )
994 | )
995 | utm_records.append((download_dict[tilename]["utm"], None, None, 0))
996 | if len(tiles_records) == 0:
997 | return
998 | cursor = conn.cursor()
999 | cursor.execute("BEGIN TRANSACTION;")
1000 | cursor.executemany(
1001 | """
1002 | UPDATE tiles
1003 | SET geotiff_disk = ?, rat_disk = ?,
1004 | geotiff_verified = ?, rat_verified = ?
1005 | WHERE tilename = ?
1006 | """,
1007 | tiles_records,
1008 | )
1009 | cursor.executemany(
1010 | """
1011 | INSERT INTO vrt_subregion(region, utm, res_2_vrt,
1012 | res_2_ovr, res_4_vrt, res_4_ovr, res_8_vrt, res_8_ovr,
1013 | complete_vrt, complete_ovr, built)
1014 | VALUES(?, ?, ?, ?, ? ,? , ?, ? ,? ,? ,?)
1015 | ON CONFLICT(region) DO UPDATE
1016 | SET utm = EXCLUDED.utm,
1017 | res_2_vrt = EXCLUDED.res_2_vrt,
1018 | res_2_ovr = EXCLUDED.res_2_ovr,
1019 | res_4_vrt = EXCLUDED.res_4_vrt,
1020 | res_4_ovr = EXCLUDED.res_4_ovr,
1021 | res_8_vrt = EXCLUDED.res_8_vrt,
1022 | res_8_ovr = EXCLUDED.res_8_ovr,
1023 | complete_vrt = EXCLUDED.complete_vrt,
1024 | complete_ovr = EXCLUDED.complete_ovr,
1025 | built = EXCLUDED.built
1026 | """,
1027 | subregion_records,
1028 | )
1029 | cursor.executemany(
1030 | """
1031 | INSERT INTO vrt_utm(utm, utm_vrt, utm_ovr, built)
1032 | VALUES(?, ?, ?, ?)
1033 | ON CONFLICT(utm) DO UPDATE
1034 | SET utm_vrt = EXCLUDED.utm_vrt,
1035 | utm_ovr = EXCLUDED.utm_ovr,
1036 | built = EXCLUDED.built
1037 | """,
1038 | utm_records,
1039 | )
1040 | cursor.execute("COMMIT;")
1041 | conn.commit()
1042 |
1043 |
1044 | def insert_new_pmn(conn: sqlite3.Connection, tiles: list, data_source) -> int:
1045 | """
1046 | Insert new tile records into SQLite database.
1047 |
1048 | Parameters
1049 | ----------
1050 | conn : sqlite3.Connection
1051 | database connection object.
1052 | tiles : list of dict
1053 | list of tile records.
1054 |
1055 | Returns
1056 | -------
1057 | int
1058 | amount of delivered tiles from input tiles.
1059 | """
1060 | if data_source.lower() == "bag":
1061 | tile_list = [(tile["TILE_ID"],) for tile in tiles if tile["ISSUANCE"] and tile["BAG"] and tile["BAG"].lower() != "none"]
1062 | elif data_source.lower() == "s102v21":
1063 | tile_list = [(tile["TILE_ID"],) for tile in tiles if tile["ISSUANCE"] and tile["S102V21"] and tile["S102V21"].lower() != "none"]
1064 | elif data_source.lower() == "s102v22":
1065 | tile_list = [(tile["TILE_ID"],) for tile in tiles if tile["ISSUANCE"] and tile["S102V22"] and tile["S102V22"].lower() != "none"]
1066 | else:
1067 | raise ValueError(f"Unexpected data source {data_source}")
1068 | cursor = conn.cursor()
1069 | cursor.executemany(
1070 | """INSERT INTO tiles(tilename)
1071 | VALUES(?) ON CONFLICT DO NOTHING""",
1072 | tile_list,
1073 | )
1074 | conn.commit()
1075 | return len(tile_list)
1076 |
1077 |
1078 | def insert_new(conn: sqlite3.Connection, tiles: list) -> int:
1079 | """
1080 | Insert new tile records into SQLite database.
1081 |
1082 | Parameters
1083 | ----------
1084 | conn : sqlite3.Connection
1085 | database connection object.
1086 | tiles : list of dict
1087 | list of tile records.
1088 |
1089 | Returns
1090 | -------
1091 | int
1092 | amount of delivered tiles from input tiles.
1093 | """
1094 | cursor = conn.cursor()
1095 | tile_list = [(tile["tile"],) for tile in tiles if tile["Delivered_Date"] and tile["GeoTIFF_Link"] and tile["RAT_Link"]]
1096 | cursor.executemany(
1097 | """INSERT INTO tiles(tilename)
1098 | VALUES(?) ON CONFLICT DO NOTHING""",
1099 | tile_list,
1100 | )
1101 | conn.commit()
1102 | return len(tile_list)
1103 |
1104 |
1105 | def all_db_tiles(conn: sqlite3.Connection) -> list:
1106 | """
1107 | Retrieve all tile records in tiles table of SQLite database.
1108 |
1109 | Parameters
1110 | ----------
1111 | conn : sqlite3.Connection
1112 | database connection object.
1113 |
1114 | Returns
1115 | -------
1116 | list
1117 | all tile records as dictionaries.
1118 | """
1119 | cursor = conn.cursor()
1120 | cursor.execute("SELECT * FROM tiles")
1121 | return [dict(row) for row in cursor.fetchall()]
1122 |
1123 |
1124 | def upsert_tiles_pmn(conn: sqlite3.Connection, project_dir: str, tile_scheme: str, data_source: str) -> None:
1125 | """
1126 | Update tile records in database with latest deliveries found in tilescheme.
1127 |
1128 | Parameters
1129 | ----------
1130 | conn : sqlite3.Connection
1131 | database connection object.
1132 | project_dir : str
1133 | destination directory for project.
1134 | tile_scheme : str
1135 | a gdal compatible file path with the tessellation scheme.
1136 | """
1137 | # database records holds current set
1138 | # tilescheme polygons has latest set
1139 | # use the two to see where new tiles or updates to existing tiles exist
1140 | # use global tileset to map its region
1141 | db_tiles = all_db_tiles(conn)
1142 | ts_ds = ogr.Open(tile_scheme)
1143 | ts_lyr = ts_ds.GetLayer()
1144 | ts_defn = ts_lyr.GetLayerDefn()
1145 | ts_tiles = []
1146 | for ft in ts_lyr:
1147 | field_list = {}
1148 | geom = ft.GetGeometryRef()
1149 | field_list["wkt_geom"] = geom.ExportToWkt()
1150 | for field_num in range(ts_defn.GetFieldCount()):
1151 | field_name = ts_defn.GetFieldDefn(field_num).name
1152 | field_list[field_name.lower()] = ft.GetField(field_name)
1153 | if data_source == 'BAG':
1154 | field_list['tile'] = ft.GetField('tile_id')
1155 | field_list['file_link'] = ft.GetField('bag')
1156 | field_list['file_sha256_checksum'] = ft.GetField('bag_sha256')
1157 | field_list['delivered_date'] = ft.GetField('issuance')
1158 | field_list['utm'] = ft.GetField('utm')
1159 | field_list['resolution'] = ft.GetField('resolution')
1160 | if data_source == 'S102V21':
1161 | field_list['tile'] = ft.GetField('tile_id')
1162 | field_list['file_link'] = ft.GetField('s102v21')
1163 | field_list['file_sha256_checksum'] = ft.GetField('s102v21_sha256')
1164 | field_list['delivered_date'] = ft.GetField('issuance')
1165 | field_list['utm'] = ft.GetField('utm')
1166 | field_list['resolution'] = ft.GetField('resolution')
1167 | if data_source == 'S102V22':
1168 | field_list['tile'] = ft.GetField('tile_id')
1169 | field_list['file_link'] = ft.GetField('s102v22')
1170 | field_list['file_sha256_checksum'] = ft.GetField('s102v22_sha256')
1171 | field_list['delivered_date'] = ft.GetField('issuance')
1172 | field_list['utm'] = ft.GetField('utm')
1173 | field_list['resolution'] = ft.GetField('resolution')
1174 | ts_tiles.append(field_list)
1175 | ts_ds = None
1176 | global_tileset = global_region_tileset(1, "1.2")
1177 | gs = ogr.Open(global_tileset)
1178 | lyr = gs.GetLayer()
1179 | insert_tiles = []
1180 | for db_tile in db_tiles:
1181 | ts_tile = [ts_tile for ts_tile in ts_tiles if db_tile["tilename"] == ts_tile["tile"]]
1182 | if len(ts_tile) == 0:
1183 | print(f"Warning: {db_tile['tilename']} in database appears to have " "been removed from latest tilescheme")
1184 | continue
1185 | if len(ts_tile) > 1:
1186 | raise ValueError(f"More than one tilename {db_tile['tilename']} " "found in tileset.\n" "Please alert NBS.\n" "{debug_info}")
1187 | ts_tile = ts_tile[0]
1188 | # inserted into db only when delivered_date exists
1189 | # so value of None in ts_tile indicates delivered_date was removed
1190 | if ts_tile["delivered_date"] is None:
1191 | print("Warning: Unexpected removal of delivered date " f"for tile {db_tile['tilename']}")
1192 | continue
1193 | if (db_tile["delivered_date"] is None) or (ts_tile["delivered_date"] > db_tile["delivered_date"]):
1194 | try:
1195 | if db_tile["file_disk"] and os.path.isfile(os.path.join(project_dir, db_tile["file_disk"])):
1196 | os.remove(os.path.join(project_dir, db_tile["file_disk"]))
1197 | except (OSError, PermissionError) as e:
1198 | print("Failed to remove older files for tile " f"{db_tile['tilename']}. Please close all files and " "attempt fetch again.")
1199 | gdal.Unlink(global_tileset)
1200 | raise e
1201 | lyr.SetSpatialFilter(ogr.CreateGeometryFromWkt(ts_tile["wkt_geom"]))
1202 | if lyr.GetFeatureCount() != 1:
1203 | gdal.Unlink(global_tileset)
1204 | raise ValueError("Error getting subregion for " f"{db_tile['tilename']}. \n" f"{lyr.GetFeatureCount()} subregion(s). \n" f"{debug_info}")
1205 | region_ft = lyr.GetNextFeature()
1206 | ts_tile["region"] = region_ft.GetField("Region")
1207 | insert_tiles.append(
1208 | (
1209 | ts_tile["tile"],
1210 | ts_tile["file_link"],
1211 | ts_tile["delivered_date"],
1212 | ts_tile["resolution"],
1213 | ts_tile["utm"],
1214 | ts_tile["region"],
1215 | ts_tile["file_sha256_checksum"],
1216 | )
1217 | )
1218 | if insert_tiles:
1219 | cursor = conn.cursor()
1220 | for ins in insert_tiles:
1221 | if len(ins) != 7:
1222 | print(len(ins))
1223 | raise ValueError()
1224 | cursor.executemany(
1225 | """
1226 | INSERT INTO tiles(tilename, file_link,
1227 | delivered_date, resolution, utm, subregion,
1228 | file_sha256_checksum)
1229 | VALUES(?, ?, ? ,? ,? ,?, ?)
1230 | ON CONFLICT(tilename) DO UPDATE
1231 | SET file_link = EXCLUDED.file_link,
1232 | delivered_date = EXCLUDED.delivered_date,
1233 | resolution = EXCLUDED.resolution,
1234 | utm = EXCLUDED.utm,
1235 | subregion = EXCLUDED.subregion,
1236 | file_sha256_checksum = EXCLUDED.file_sha256_checksum,
1237 | file_verified = Null,
1238 | file_disk = Null
1239 | """,
1240 | insert_tiles,
1241 | )
1242 | conn.commit()
1243 | gdal.Unlink(global_tileset)
1244 |
1245 |
1246 | def upsert_tiles(conn: sqlite3.Connection, project_dir: str, tile_scheme: str) -> None:
1247 | """
1248 | Update tile records in database with latest deliveries found in tilescheme.
1249 |
1250 | Parameters
1251 | ----------
1252 | conn : sqlite3.Connection
1253 | database connection object.
1254 | project_dir : str
1255 | destination directory for project.
1256 | tile_scheme : str
1257 | a gdal compatible file path with the tessellation scheme.
1258 | """
1259 | # database records holds current set
1260 | # tilescheme polygons has latest set
1261 | # use the two to see where new tiles or updates to existing tiles exist
1262 | # use global tileset to map its region
1263 | db_tiles = all_db_tiles(conn)
1264 | ts_ds = ogr.Open(tile_scheme)
1265 | ts_lyr = ts_ds.GetLayer()
1266 | ts_defn = ts_lyr.GetLayerDefn()
1267 | ts_tiles = []
1268 | for ft in ts_lyr:
1269 | field_list = {}
1270 | geom = ft.GetGeometryRef()
1271 | field_list["wkt_geom"] = geom.ExportToWkt()
1272 | for field_num in range(ts_defn.GetFieldCount()):
1273 | field_name = ts_defn.GetFieldDefn(field_num).name
1274 | field_list[field_name.lower()] = ft.GetField(field_name)
1275 | ts_tiles.append(field_list)
1276 | ts_ds = None
1277 | global_tileset = global_region_tileset(1, "1.2")
1278 | gs = ogr.Open(global_tileset)
1279 | lyr = gs.GetLayer()
1280 | insert_tiles = []
1281 | for db_tile in db_tiles:
1282 | ts_tile = [ts_tile for ts_tile in ts_tiles if db_tile["tilename"] == ts_tile["tile"]]
1283 | if len(ts_tile) == 0:
1284 | print(f"Warning: {db_tile['tilename']} in database appears to have " "been removed from latest tilescheme")
1285 | continue
1286 | if len(ts_tile) > 1:
1287 | raise ValueError(f"More than one tilename {db_tile['tilename']} " "found in tileset.\n" "Please alert NBS.\n" "{debug_info}")
1288 | ts_tile = ts_tile[0]
1289 | # inserted into db only when delivered_date exists
1290 | # so value of None in ts_tile indicates delivered_date was removed
1291 | if ts_tile["delivered_date"] is None:
1292 | print("Warning: Unexpected removal of delivered date " f"for tile {db_tile['tilename']}")
1293 | continue
1294 | if (db_tile["delivered_date"] is None) or (ts_tile["delivered_date"] > db_tile["delivered_date"]):
1295 | try:
1296 | if db_tile["geotiff_disk"] and os.path.isfile(os.path.join(project_dir, db_tile["geotiff_disk"])):
1297 | os.remove(os.path.join(project_dir, db_tile["geotiff_disk"]))
1298 | if db_tile["rat_disk"] and os.path.isfile(os.path.join(project_dir, db_tile["rat_disk"])):
1299 | os.remove(os.path.join(project_dir, db_tile["rat_disk"]))
1300 | except (OSError, PermissionError) as e:
1301 | print("Failed to remove older files for tile " f"{db_tile['tilename']}. Please close all files and " "attempt fetch again.")
1302 | gdal.Unlink(global_tileset)
1303 | raise e
1304 | lyr.SetSpatialFilter(ogr.CreateGeometryFromWkt(ts_tile["wkt_geom"]))
1305 | if lyr.GetFeatureCount() != 1:
1306 | gdal.Unlink(global_tileset)
1307 | raise ValueError("Error getting subregion for " f"{db_tile['tilename']}. \n" f"{lyr.GetFeatureCount()} subregion(s). \n" f"{debug_info}")
1308 | region_ft = lyr.GetNextFeature()
1309 | ts_tile["region"] = region_ft.GetField("Region")
1310 | insert_tiles.append(
1311 | (
1312 | ts_tile["tile"],
1313 | ts_tile["geotiff_link"],
1314 | ts_tile["rat_link"],
1315 | ts_tile["delivered_date"],
1316 | ts_tile["resolution"],
1317 | ts_tile["utm"],
1318 | ts_tile["region"],
1319 | ts_tile["geotiff_sha256_checksum"],
1320 | ts_tile["rat_sha256_checksum"],
1321 | )
1322 | )
1323 | if insert_tiles:
1324 | cursor = conn.cursor()
1325 | for ins in insert_tiles:
1326 | if len(ins) != 9:
1327 | print(len(ins))
1328 | raise ValueError()
1329 | cursor.executemany(
1330 | """
1331 | INSERT INTO tiles(tilename, geotiff_link, rat_link,
1332 | delivered_date, resolution, utm, subregion,
1333 | geotiff_sha256_checksum, rat_sha256_checksum)
1334 | VALUES(?, ?, ? ,? ,? ,?, ?, ?, ?)
1335 | ON CONFLICT(tilename) DO UPDATE
1336 | SET geotiff_link = EXCLUDED.geotiff_link,
1337 | rat_link = EXCLUDED.rat_link,
1338 | delivered_date = EXCLUDED.delivered_date,
1339 | resolution = EXCLUDED.resolution,
1340 | utm = EXCLUDED.utm,
1341 | subregion = EXCLUDED.subregion,
1342 | geotiff_sha256_checksum = EXCLUDED.geotiff_sha256_checksum,
1343 | rat_sha256_checksum = EXCLUDED.rat_sha256_checksum,
1344 | geotiff_verified = Null,
1345 | rat_verified = Null,
1346 | geotiff_disk = Null,
1347 | rat_disk = Null
1348 | """,
1349 | insert_tiles,
1350 | )
1351 | conn.commit()
1352 | gdal.Unlink(global_tileset)
1353 |
1354 |
1355 | def convert_base(charset: str, input: int, minimum: int) -> str:
1356 | """
1357 | Convert integer to new base system using the given symbols with a
1358 | minimum length filled using leading characters of the lowest value in the
1359 | given charset.
1360 |
1361 | Parameters
1362 | ----------
1363 | charset : str
1364 | length of this str will be the new base system and characters
1365 | given will be the symbols used.
1366 | input : int
1367 | integer to convert.
1368 | minimum : int
1369 | returned output will be adjusted to this desired length using
1370 | leading characters of the lowest value in charset.
1371 |
1372 | Returns
1373 | -------
1374 | str
1375 | converted value in given system.
1376 | """
1377 | res = ""
1378 | while input:
1379 | res += charset[input % len(charset)]
1380 | input //= len(charset)
1381 | return (res[::-1] or charset[0]).rjust(minimum, charset[0])
1382 |
1383 |
1384 | def global_region_tileset(index: int, size: str) -> str:
1385 | """
1386 | Generate a global tilescheme.
1387 |
1388 | Parameters
1389 | ----------
1390 | index : int
1391 | index of tileset to determine tilescheme name.
1392 | size : str
1393 | length of the side of an individual tile in degrees.
1394 |
1395 | Returns
1396 | -------
1397 | location : str
1398 | gdal memory filepath to global tilescheme.
1399 | """
1400 | charset = "BCDFGHJKLMNPQRSTVWXZ"
1401 | name = convert_base(charset, index, 2)
1402 | roundnum = len(size.split(".")[1])
1403 | size = float(size)
1404 | location = "/vsimem/global_tileset.gpkg"
1405 | ds = ogr.GetDriverByName("GPKG").CreateDataSource(location)
1406 | srs = osr.SpatialReference()
1407 | srs.ImportFromEPSG(4326)
1408 | layer = ds.CreateLayer("global_tileset", srs, ogr.wkbMultiPolygon)
1409 | layer.CreateFields(
1410 | [
1411 | ogr.FieldDefn("Region", ogr.OFTString),
1412 | ogr.FieldDefn("UTM_Zone", ogr.OFTInteger),
1413 | ogr.FieldDefn("Hemisphere", ogr.OFTString),
1414 | ]
1415 | )
1416 | layer_defn = layer.GetLayerDefn()
1417 | layer.StartTransaction()
1418 | y = round(-90 + size, roundnum)
1419 | y_count = 0
1420 | while y <= 90:
1421 | ns = "N"
1422 | if y <= 0:
1423 | ns = "S"
1424 | x = -180
1425 | x_count = 0
1426 | while x < 180:
1427 | current_utm = "{:02d}".format(int(np.ceil((180 + x + 0.00000001) / 6)))
1428 | ring = ogr.Geometry(ogr.wkbLinearRing)
1429 | ring.AddPoint_2D(x, y)
1430 | ring.AddPoint_2D(round(x + size, roundnum), y)
1431 | ring.AddPoint_2D(round(x + size, roundnum), round(y - size, roundnum))
1432 | ring.AddPoint_2D(x, round(y - size, roundnum))
1433 | ring.AddPoint_2D(x, y)
1434 | poly = ogr.Geometry(ogr.wkbPolygon)
1435 | poly.AddGeometry(ring)
1436 | poly = poly.Buffer(-0.002)
1437 | multipoly = ogr.Geometry(ogr.wkbMultiPolygon)
1438 | multipoly.AddGeometry(poly)
1439 | feat = ogr.Feature(layer_defn)
1440 | feat.SetGeometry(multipoly)
1441 | charset = "2456789BCDFGHJKLMNPQRSTVWXZ"
1442 | x_rep = convert_base(charset, x_count, 3)
1443 | y_rep = convert_base(charset, y_count, 3)
1444 | feat.SetField("Region", f"{name}{x_rep}{y_rep}")
1445 | feat.SetField("UTM_Zone", current_utm)
1446 | feat.SetField("Hemisphere", ns)
1447 | layer.CreateFeature(feat)
1448 | x = round(x + size, roundnum)
1449 | x_count += 1
1450 | y = round(y + size, roundnum)
1451 | y_count += 1
1452 | layer.CommitTransaction()
1453 | return location
1454 |
1455 |
1456 | def sweep_files(conn: sqlite3.Connection, project_dir: str) -> None:
1457 | """
1458 | Remove missing files from tracking.
1459 |
1460 | Parameters
1461 | ----------
1462 | conn : sqlite3.Connection
1463 | database connection object.
1464 | project_dir : str
1465 | destination directory for project.
1466 | """
1467 | db_tiles = all_db_tiles(conn)
1468 | cursor = conn.cursor()
1469 | untracked_tiles = 0
1470 | untracked_subregions = 0
1471 | untracked_utms = 0
1472 | for fields in db_tiles:
1473 | if (fields["geotiff_disk"] and os.path.isfile(os.path.join(project_dir, fields["geotiff_disk"])) == False) or (fields["rat_disk"] and os.path.isfile(os.path.join(project_dir, fields["rat_disk"])) == False):
1474 | cursor.execute(
1475 | "DELETE FROM tiles where tilename = ? RETURNING *",
1476 | (fields["tilename"],),
1477 | )
1478 | del_tile = cursor.fetchone()
1479 | if del_tile:
1480 | untracked_tiles += 1
1481 | files = ["geotiff_disk", "rat_disk"]
1482 | for file in files:
1483 | try:
1484 | if del_tile[file] and os.path.isfile(os.path.join(project_dir, del_tile[file])):
1485 | os.remove(os.path.join(project_dir, del_tile[file]))
1486 | except (OSError, PermissionError):
1487 | continue
1488 | cursor.execute(
1489 | """DELETE FROM vrt_subregion
1490 | WHERE region NOT IN
1491 | (SELECT subregion
1492 | FROM tiles
1493 | WHERE geotiff_disk is not null
1494 | AND rat_disk is not null)
1495 | RETURNING *;"""
1496 | )
1497 | del_subregions = cursor.fetchall()
1498 | untracked_subregions += len(del_subregions)
1499 | for del_subregion in del_subregions:
1500 | files = [
1501 | "res_2_vrt",
1502 | "res_2_ovr",
1503 | "res_4_vrt",
1504 | "res_4_ovr",
1505 | "res_8_vrt",
1506 | "res_8_ovr",
1507 | "complete_vrt",
1508 | "complete_ovr",
1509 | ]
1510 | for file in files:
1511 | try:
1512 | if del_subregion[file] and os.path.isfile(os.path.join(project_dir, del_subregion[file])):
1513 | os.remove(os.path.join(project_dir, del_subregion[file]))
1514 | except (OSError, PermissionError):
1515 | continue
1516 | cursor.execute(
1517 | """DELETE FROM vrt_utm
1518 | WHERE utm NOT IN
1519 | (SELECT utm
1520 | FROM tiles
1521 | WHERE geotiff_disk is not null
1522 | AND rat_disk is not null)
1523 | RETURNING *;"""
1524 | )
1525 | del_utms = cursor.fetchall()
1526 | untracked_utms += len(del_utms)
1527 | for del_utm in del_utms:
1528 | files = ["utm_vrt", "utm_ovr"]
1529 | for file in files:
1530 | try:
1531 | if (del_utm[file]) and (os.path.isfile(os.path.join(project_dir, del_utm[file]))):
1532 | os.remove(os.path.join(project_dir, del_utm[file]))
1533 | except (OSError, PermissionError):
1534 | continue
1535 | conn.commit()
1536 | return untracked_tiles, untracked_subregions, untracked_utms
1537 |
1538 |
1539 | def main(
1540 | project_dir: str,
1541 | desired_area_filename: str = None,
1542 | untrack_missing: bool = False,
1543 | data_source: str = None,
1544 | ) -> [[str], [str]]:
1545 | """
1546 | Track tiles. Download tiles. Update already tracked tiles.
1547 |
1548 | Parameters
1549 | ----------
1550 | project_dir : str
1551 | The directory path to use. Will create if it does not currently exist.
1552 | Required argument.
1553 | desired_area_filename : str
1554 | The geometry file to use to find intersecting available tiles.
1555 | The returned tile ids at the time of intersection will be added to
1556 | tracking. fetch_tiles will stay up to date with the latest data
1557 | available from the NBS for all tracked tiles. This argument is
1558 | not necessary if you do not want to add new tile ids to tracking.
1559 | untrack_missing : bool
1560 | This flag will untrack tiles that have missing files in your local
1561 | download directory. fetch_tiles will no longer retrieve these tiles.
1562 | data_source : str
1563 | The NBS offers various products to different end-users. Some are available publicly.
1564 | Use this argument to identify which product you want. BlueTopo is the default.
1565 |
1566 | Returns
1567 | -------
1568 | successful_downloads : list
1569 | tiles downloaded.
1570 | list
1571 | tiles not found in s3 or failed during download.
1572 | """
1573 | project_dir = os.path.expanduser(project_dir)
1574 | if desired_area_filename:
1575 | desired_area_filename = os.path.expanduser(desired_area_filename)
1576 | if os.path.isabs(project_dir) is False or (desired_area_filename and os.path.isabs(desired_area_filename) is False):
1577 | print("Please use an absolute path for your project folder and geometry path.")
1578 | if "windows" not in platform.system().lower():
1579 | print("Typically for non windows systems this means starting with '/'")
1580 | sys.exit(1)
1581 |
1582 | if data_source is None or data_source.lower() == "bluetopo":
1583 | data_source = "BlueTopo"
1584 | geom_prefix = "BlueTopo/_BlueTopo_Tile_Scheme/BlueTopo_Tile_Scheme"
1585 | tile_prefix = "BlueTopo"
1586 |
1587 | elif data_source.lower() == "modeling":
1588 | data_source = "Modeling"
1589 | geom_prefix = "Test-and-Evaluation/Modeling/_Modeling_Tile_Scheme/Modeling_Tile_Scheme"
1590 | tile_prefix = "Test-and-Evaluation/Modeling"
1591 |
1592 | elif data_source.lower() == "bag":
1593 | data_source = "BAG"
1594 | geom_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/_Navigation_Tile_Scheme/Navigation_Tile_Scheme"
1595 | tile_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/BAG"
1596 |
1597 | elif data_source.lower() == "s102v21":
1598 | data_source = "S102V21"
1599 | geom_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/_Navigation_Tile_Scheme/Navigation_Tile_Scheme"
1600 | xml_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/S102V21/_CATALOG"
1601 | tile_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/S102V21"
1602 |
1603 | elif data_source.lower() == "s102v22":
1604 | data_source = "S102V22"
1605 | geom_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/_Navigation_Tile_Scheme/Navigation_Tile_Scheme"
1606 | xml_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/S102V22/_CATALOG"
1607 | tile_prefix = "Test-and-Evaluation/Navigation_Test_and_Evaluation/S102V22"
1608 |
1609 |
1610 | elif os.path.isdir(data_source):
1611 | geom_prefix = data_source
1612 | files = os.listdir(geom_prefix)
1613 | files = [file for file in files if file.endswith(".gpkg") and "Tile_Scheme" in file]
1614 | files.sort(reverse=True)
1615 | data_source = None
1616 | for file in files:
1617 | ds_basefile = os.path.basename(file)
1618 | data_source = ds_basefile.split("_")[0]
1619 | break
1620 | if data_source is None:
1621 | raise ValueError(f"Please pass in directory which contains a tile scheme file if you're using a local data source.")
1622 | tile_prefix = "Local"
1623 |
1624 | else:
1625 | raise ValueError(f"Invalid data source: {data_source}")
1626 |
1627 | start = datetime.datetime.now()
1628 | print(f"[{start.strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Beginning work in project folder: {project_dir}")
1629 | if not os.path.exists(project_dir):
1630 | os.makedirs(project_dir)
1631 |
1632 | if data_source.lower() in ("bag", "s102v21"):
1633 | conn = connect_to_survey_registry_pmn1(project_dir, data_source)
1634 | elif data_source.lower() in ("s102v22"):
1635 | conn = connect_to_survey_registry_pmn2(project_dir, data_source)
1636 | else:
1637 | conn = connect_to_survey_registry(project_dir, data_source)
1638 |
1639 | if data_source.lower() in ("s102v21", "s102v22"):
1640 | get_xml(conn, project_dir, xml_prefix, data_source)
1641 |
1642 | if data_source.lower() in ("bag", "s102v21", "s102v22"):
1643 | geom_file = get_tessellation_pmn(conn, project_dir, geom_prefix, data_source)
1644 | else:
1645 | geom_file = get_tessellation(conn, project_dir, geom_prefix, data_source)
1646 |
1647 | if untrack_missing:
1648 | untracked_tiles, untracked_sr, untracked_utms = sweep_files(conn, project_dir)
1649 | print(f"Untracked {untracked_tiles} tile(s), " f"{untracked_sr} subregion vrt(s), " f"{untracked_utms} utm vrt(s)")
1650 |
1651 | if desired_area_filename:
1652 | if not os.path.isfile(desired_area_filename):
1653 | raise ValueError(f"The geometry {desired_area_filename} for " "determining what to download does not exist.")
1654 | if data_source.lower() in ("bag", "s102v21", "s102v22"):
1655 | tile_list = get_tile_list(desired_area_filename, geom_file)
1656 | available_tile_count = insert_new_pmn(conn, tile_list, data_source)
1657 | else:
1658 | tile_list = get_tile_list(desired_area_filename, geom_file)
1659 | available_tile_count = insert_new(conn, tile_list)
1660 | print(f"\nTracking {available_tile_count} available {data_source} tile(s) " f"discovered in a total of {len(tile_list)} intersected tile(s) " "with given polygon.")
1661 |
1662 | if data_source.lower() in ("bag", "s102v21", "s102v22"):
1663 | upsert_tiles_pmn(conn, project_dir, geom_file, data_source)
1664 | else:
1665 | upsert_tiles(conn, project_dir, geom_file)
1666 |
1667 | if data_source.lower() in ("bag", "s102v21", "s102v22"):
1668 | (
1669 | tiles_found,
1670 | tiles_not_found,
1671 | successful_downloads,
1672 | failed_downloads,
1673 | existing_tiles,
1674 | missing_tiles,
1675 | failed_verifications,
1676 | new_tile_list,
1677 | ) = download_tiles_pmn(conn, project_dir, tile_prefix, data_source)
1678 | else:
1679 | (
1680 | tiles_found,
1681 | tiles_not_found,
1682 | successful_downloads,
1683 | failed_downloads,
1684 | existing_tiles,
1685 | missing_tiles,
1686 | failed_verifications,
1687 | new_tile_list,
1688 | ) = download_tiles(conn, project_dir, tile_prefix, data_source)
1689 | print("\n___________________________________ SUMMARY ___________________________________")
1690 | print("\nExisting:")
1691 | print(
1692 | "Number of tiles already existing locally without updates:",
1693 | len(existing_tiles),
1694 | )
1695 | if new_tile_list or missing_tiles:
1696 | print("\nSearch:")
1697 | print(f"Number of tiles to attempt to fetch: {len(new_tile_list) + len(missing_tiles)} [ {len(new_tile_list)} new data + {len(missing_tiles)} missing locally ]")
1698 | if len(tiles_found) < (len(new_tile_list) + len(missing_tiles)):
1699 | print("* Some tiles we wanted to fetch were not found in the S3 bucket." "\n* The NBS may be actively updating the tiles in question." "\n* You can rerun fetch_tiles at a later time to download these tiles." "\n* Please contact the NBS if this issue does not fix itself on subsequent later runs.")
1700 | print("\nFetch:")
1701 | print(f"Number of tiles found in S3 successfully downloaded: {len(successful_downloads)}/{len(tiles_found)}")
1702 | if len(failed_downloads):
1703 | print("* Some tiles appear to have failed downloading." "\n* Please rerun fetch_tiles to retry.")
1704 | if len(failed_verifications):
1705 | print(f"{len(failed_verifications)} tiles failed checksum verification: {failed_verifications}" f"\nPlease contact the NBS if this issue does not fix itself on subsequent runs.")
1706 | print(f"\n[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Operation complete after {datetime.datetime.now() - start}")
1707 | return successful_downloads, list(set(tiles_not_found + failed_downloads))
--------------------------------------------------------------------------------
/nbs/bluetopo/core/build_vrt.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import copy
3 | import datetime
4 | import os
5 | import platform
6 | import shutil
7 | import sqlite3
8 | import sys
9 |
10 | import numpy as np
11 | from osgeo import gdal
12 |
13 | gdal.UseExceptions()
14 | gdal.SetConfigOption("COMPRESS_OVERVIEW", "DEFLATE")
15 | gdal.SetConfigOption("GDAL_NUM_THREADS", "ALL_CPUS")
16 |
17 | # refactor duplicate functions
18 |
19 | def connect_to_survey_registry_pmn2(project_dir: str, data_source: str) -> sqlite3.Connection:
20 | """
21 | Create new or connect to existing SQLite database.
22 |
23 | Parameters
24 | ----------
25 | project_dir : str
26 | destination directory for project.
27 | data_source : str
28 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
29 |
30 | Returns
31 | -------
32 | conn : sqlite3.Connection
33 | connection to SQLite database.
34 | """
35 | catalog_fields = {"file": "text", "location": "text", "downloaded": "text"}
36 | vrt_subregion_fields = {"region": "text", "utm": "text",
37 | "res_2_subdataset1_vrt": "text", "res_2_subdataset1_ovr": "text",
38 | "res_2_subdataset2_vrt": "text", "res_2_subdataset2_ovr": "text",
39 | "res_4_subdataset1_vrt": "text", "res_4_subdataset1_ovr": "text",
40 | "res_4_subdataset2_vrt": "text", "res_4_subdataset2_ovr": "text",
41 | "res_8_subdataset1_vrt": "text", "res_8_subdataset1_ovr": "text",
42 | "res_8_subdataset2_vrt": "text", "res_8_subdataset2_ovr": "text",
43 | "complete_subdataset1_vrt": "text", "complete_subdataset1_ovr": "text",
44 | "complete_subdataset2_vrt": "text", "complete_subdataset2_ovr": "text",
45 | "built_subdataset1": "integer",
46 | "built_subdataset2": "integer"}
47 | vrt_utm_fields = {"utm": "text",
48 | "utm_subdataset1_vrt": "text", "utm_subdataset1_ovr": "text",
49 | "utm_subdataset2_vrt": "text", "utm_subdataset2_ovr": "text",
50 | "utm_combined_vrt": "text",
51 | "built_subdataset1": "integer",
52 | "built_subdataset2": "integer",
53 | "built_combined": "integer"}
54 | vrt_tiles = {"tilename": "text",
55 | "file_link": "text",
56 | "delivered_date": "text", "resolution": "text",
57 | "utm": "text", "subregion": "text",
58 | "file_disk": "text",
59 | "file_sha256_checksum": "text",
60 | "file_verified": "text"}
61 | database_path = os.path.join(project_dir, f"{data_source.lower()}_registry.db")
62 | conn = None
63 | try:
64 | conn = sqlite3.connect(database_path)
65 | conn.row_factory = sqlite3.Row
66 | except sqlite3.Error as e:
67 | print("Failed to establish SQLite database connection.")
68 | raise e
69 | if conn is not None:
70 | try:
71 | cursor = conn.cursor()
72 | cursor.execute(
73 | """
74 | CREATE TABLE IF NOT EXISTS catalog (
75 | file text PRIMARY KEY
76 | );
77 | """
78 | )
79 | cursor.execute(
80 | """
81 | CREATE TABLE IF NOT EXISTS vrt_subregion (
82 | region text PRIMARY KEY
83 | );
84 | """
85 | )
86 | cursor.execute(
87 | """
88 | CREATE TABLE IF NOT EXISTS vrt_utm (
89 | utm text PRIMARY KEY
90 | );
91 | """
92 | )
93 | cursor.execute(
94 | """
95 | CREATE TABLE IF NOT EXISTS tiles (
96 | tilename text PRIMARY KEY
97 | );
98 | """
99 | )
100 | conn.commit()
101 | cursor.execute("SELECT name FROM pragma_table_info('catalog')")
102 | tileset_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
103 | cursor.execute("SELECT name FROM pragma_table_info('vrt_subregion')")
104 | vrt_subregion_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
105 | cursor.execute("SELECT name FROM pragma_table_info('vrt_utm')")
106 | vrt_utm_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
107 | cursor.execute("SELECT name FROM pragma_table_info('tiles')")
108 | tiles_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
109 | for field in catalog_fields:
110 | if field not in tileset_existing_fields:
111 | cursor.execute(f"ALTER TABLE catalog ADD COLUMN {field} {catalog_fields[field]}")
112 | conn.commit()
113 | for field in vrt_subregion_fields:
114 | if field not in vrt_subregion_existing_fields:
115 | cursor.execute(f"ALTER TABLE vrt_subregion ADD COLUMN {field} {vrt_subregion_fields[field]}")
116 | conn.commit()
117 | for field in vrt_utm_fields:
118 | if field not in vrt_utm_existing_fields:
119 | cursor.execute(f"ALTER TABLE vrt_utm ADD COLUMN {field} {vrt_utm_fields[field]}")
120 | conn.commit()
121 | for field in vrt_tiles:
122 | if field not in tiles_existing_fields:
123 | cursor.execute(f"ALTER TABLE tiles ADD COLUMN {field} {vrt_tiles[field]}")
124 | conn.commit()
125 | except sqlite3.Error as e:
126 | print("Failed to create SQLite tables.")
127 | raise e
128 | return conn
129 |
130 |
131 | def connect_to_survey_registry_pmn1(project_dir: str, data_source: str) -> sqlite3.Connection:
132 | """
133 | Create new or connect to existing SQLite database.
134 |
135 | Parameters
136 | ----------
137 | project_dir : str
138 | destination directory for project.
139 | data_source : str
140 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
141 |
142 | Returns
143 | -------
144 | conn : sqlite3.Connection
145 | connection to SQLite database.
146 | """
147 | catalog_fields = {"file": "text", "location": "text", "downloaded": "text"}
148 | vrt_subregion_fields = {"region": "text", "utm": "text",
149 | "res_2_vrt": "text", "res_2_ovr": "text",
150 | "res_4_vrt": "text", "res_4_ovr": "text",
151 | "res_8_vrt": "text", "res_8_ovr": "text",
152 | "complete_vrt": "text", "complete_ovr": "text",
153 | "built": "integer"}
154 | vrt_utm_fields = {"utm": "text",
155 | "utm_vrt": "text", "utm_ovr": "text",
156 | "built": "integer"}
157 | vrt_tiles = {"tilename": "text",
158 | "file_link": "text",
159 | "delivered_date": "text", "resolution": "text",
160 | "utm": "text", "subregion": "text",
161 | "file_disk": "text",
162 | "file_sha256_checksum": "text",
163 | "file_verified": "text"}
164 | database_path = os.path.join(project_dir, f"{data_source.lower()}_registry.db")
165 | conn = None
166 | try:
167 | conn = sqlite3.connect(database_path)
168 | conn.row_factory = sqlite3.Row
169 | except sqlite3.Error as e:
170 | print("Failed to establish SQLite database connection.")
171 | raise e
172 | if conn is not None:
173 | try:
174 | cursor = conn.cursor()
175 | cursor.execute(
176 | """
177 | CREATE TABLE IF NOT EXISTS catalog (
178 | file text PRIMARY KEY
179 | );
180 | """
181 | )
182 | cursor.execute(
183 | """
184 | CREATE TABLE IF NOT EXISTS vrt_subregion (
185 | region text PRIMARY KEY
186 | );
187 | """
188 | )
189 | cursor.execute(
190 | """
191 | CREATE TABLE IF NOT EXISTS vrt_utm (
192 | utm text PRIMARY KEY
193 | );
194 | """
195 | )
196 | cursor.execute(
197 | """
198 | CREATE TABLE IF NOT EXISTS tiles (
199 | tilename text PRIMARY KEY
200 | );
201 | """
202 | )
203 | conn.commit()
204 | cursor.execute("SELECT name FROM pragma_table_info('catalog')")
205 | tileset_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
206 | cursor.execute("SELECT name FROM pragma_table_info('vrt_subregion')")
207 | vrt_subregion_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
208 | cursor.execute("SELECT name FROM pragma_table_info('vrt_utm')")
209 | vrt_utm_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
210 | cursor.execute("SELECT name FROM pragma_table_info('tiles')")
211 | tiles_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
212 | for field in catalog_fields:
213 | if field not in tileset_existing_fields:
214 | cursor.execute(f"ALTER TABLE catalog ADD COLUMN {field} {catalog_fields[field]}")
215 | conn.commit()
216 | for field in vrt_subregion_fields:
217 | if field not in vrt_subregion_existing_fields:
218 | cursor.execute(f"ALTER TABLE vrt_subregion ADD COLUMN {field} {vrt_subregion_fields[field]}")
219 | conn.commit()
220 | for field in vrt_utm_fields:
221 | if field not in vrt_utm_existing_fields:
222 | cursor.execute(f"ALTER TABLE vrt_utm ADD COLUMN {field} {vrt_utm_fields[field]}")
223 | conn.commit()
224 | for field in vrt_tiles:
225 | if field not in tiles_existing_fields:
226 | cursor.execute(f"ALTER TABLE tiles ADD COLUMN {field} {vrt_tiles[field]}")
227 | conn.commit()
228 | except sqlite3.Error as e:
229 | print("Failed to create SQLite tables.")
230 | raise e
231 | return conn
232 |
233 |
234 | def connect_to_survey_registry(project_dir: str, data_source: str) -> sqlite3.Connection:
235 | """
236 | Create new or connect to existing SQLite database.
237 |
238 | Parameters
239 | ----------
240 | project_dir : str
241 | destination directory for project.
242 | data_source : str
243 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
244 |
245 | Returns
246 | -------
247 | conn : sqlite3.Connection
248 | connection to SQLite database.
249 | """
250 | tileset_fields = {"tilescheme": "text", "location": "text", "downloaded": "text"}
251 | vrt_subregion_fields = {"region": "text", "utm": "text", "res_2_vrt": "text", "res_2_ovr": "text", "res_4_vrt": "text", "res_4_ovr": "text", "res_8_vrt": "text", "res_8_ovr": "text", "complete_vrt": "text", "complete_ovr": "text", "built": "integer"}
252 | vrt_utm_fields = {"utm": "text", "utm_vrt": "text", "utm_ovr": "text", "built": "integer"}
253 | vrt_tiles = {"tilename": "text", "geotiff_link": "text", "rat_link": "text", "delivered_date": "text", "resolution": "text", "utm": "text", "subregion": "text", "geotiff_disk": "text", "rat_disk": "text", "geotiff_sha256_checksum": "text", "rat_sha256_checksum": "text", "geotiff_verified": "text", "rat_verified": "text"}
254 | database_path = os.path.join(project_dir, f"{data_source.lower()}_registry.db")
255 | conn = None
256 | try:
257 | conn = sqlite3.connect(database_path)
258 | conn.row_factory = sqlite3.Row
259 | except sqlite3.Error as e:
260 | print("Failed to establish SQLite database connection.")
261 | raise e
262 | if conn is not None:
263 | try:
264 | cursor = conn.cursor()
265 | cursor.execute(
266 | """
267 | CREATE TABLE IF NOT EXISTS tileset (
268 | tilescheme text PRIMARY KEY
269 | );
270 | """
271 | )
272 | cursor.execute(
273 | """
274 | CREATE TABLE IF NOT EXISTS vrt_subregion (
275 | region text PRIMARY KEY
276 | );
277 | """
278 | )
279 | cursor.execute(
280 | """
281 | CREATE TABLE IF NOT EXISTS vrt_utm (
282 | utm text PRIMARY KEY
283 | );
284 | """
285 | )
286 | cursor.execute(
287 | """
288 | CREATE TABLE IF NOT EXISTS tiles (
289 | tilename text PRIMARY KEY
290 | );
291 | """
292 | )
293 | conn.commit()
294 | cursor.execute("SELECT name FROM pragma_table_info('tileset')")
295 | tileset_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
296 | cursor.execute("SELECT name FROM pragma_table_info('vrt_subregion')")
297 | vrt_subregion_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
298 | cursor.execute("SELECT name FROM pragma_table_info('vrt_utm')")
299 | vrt_utm_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
300 | cursor.execute("SELECT name FROM pragma_table_info('tiles')")
301 | tiles_existing_fields = [dict(row)["name"] for row in cursor.fetchall()]
302 | for field in tileset_fields:
303 | if field not in tileset_existing_fields:
304 | cursor.execute(f"ALTER TABLE tileset ADD COLUMN {field} {tileset_fields[field]}")
305 | conn.commit()
306 | for field in vrt_subregion_fields:
307 | if field not in vrt_subregion_existing_fields:
308 | cursor.execute(f"ALTER TABLE vrt_subregion ADD COLUMN {field} {vrt_subregion_fields[field]}")
309 | conn.commit()
310 | for field in vrt_utm_fields:
311 | if field not in vrt_utm_existing_fields:
312 | cursor.execute(f"ALTER TABLE vrt_utm ADD COLUMN {field} {vrt_utm_fields[field]}")
313 | conn.commit()
314 | for field in vrt_tiles:
315 | if field not in tiles_existing_fields:
316 | cursor.execute(f"ALTER TABLE tiles ADD COLUMN {field} {vrt_tiles[field]}")
317 | conn.commit()
318 | except sqlite3.Error as e:
319 | print("Failed to create SQLite tables.")
320 | raise e
321 | return conn
322 |
323 | def build_sub_vrts_pmn(
324 | subregion: str,
325 | subregion_tiles: list,
326 | project_dir: str,
327 | data_source: str,
328 | relative_to_vrt: bool,
329 | ) -> dict:
330 | """
331 | Build the VRTs of a given subregion.
332 |
333 | Parameters
334 | ----------
335 | subregion
336 | subregion name.
337 | subregion_tiles
338 | list of tile records belonging to subregion.
339 | project_dir
340 | destination directory for project.
341 | data_source : str
342 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
343 | relative_to_vrt : bool
344 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths.
345 |
346 | Returns
347 | -------
348 | fields : dict
349 | holds name of subregion and the paths of its VRT and OVR files.
350 | """
351 | fields = {
352 | "region": subregion["region"],
353 | "res_2_subdataset1_vrt": None,
354 | "res_2_subdataset1_ovr": None,
355 | "res_2_subdataset2_vrt": None,
356 | "res_2_subdataset2_ovr": None,
357 | "res_4_subdataset1_vrt": None,
358 | "res_4_subdataset1_ovr": None,
359 | "res_4_subdataset2_vrt": None,
360 | "res_4_subdataset2_ovr": None,
361 | "res_8_subdataset1_vrt": None,
362 | "res_8_subdataset1_ovr": None,
363 | "res_8_subdataset2_vrt": None,
364 | "res_8_subdataset2_ovr": None,
365 | "complete_subdataset1_vrt": None,
366 | "complete_subdataset1_ovr": None,
367 | "complete_subdataset2_vrt": None,
368 | "complete_subdataset2_ovr": None,
369 | }
370 | rel_dir = os.path.join(f"{data_source}_VRT", subregion["region"])
371 | subregion_dir = os.path.join(project_dir, rel_dir)
372 | try:
373 | if os.path.isdir(subregion_dir):
374 | shutil.rmtree(subregion_dir)
375 | except (OSError, PermissionError) as e:
376 | print(f"Failed to remove older vrt files for {subregion['region']}\n" "Please close all files and attempt again")
377 | sys.exit(1)
378 | if not os.path.exists(subregion_dir):
379 | os.makedirs(subregion_dir)
380 | resolution_tiles = collections.defaultdict(list)
381 | for subregion_tile in subregion_tiles:
382 | resolution_tiles[subregion_tile["resolution"]].append(subregion_tile)
383 | vrt_subdataset1_list = []
384 | vrt_subdataset2_list = []
385 | for res, tiles in resolution_tiles.items():
386 | print(f"Building {subregion['region']} band {res}...")
387 | rel_subdataset1_path = os.path.join(rel_dir, subregion["region"] + f"_{res}_BathymetryCoverage.vrt")
388 | res_subdataset1_vrt = os.path.join(project_dir, rel_subdataset1_path)
389 | rel_subdataset2_path = os.path.join(rel_dir, subregion["region"] + f"_{res}_QualityOfSurvey.vrt")
390 | res_subdataset2_vrt = os.path.join(project_dir, rel_subdataset2_path)
391 | tiffs_subdataset1 = [os.path.join(project_dir, tile["file_disk"]) for tile in tiles]
392 | tiffs_subdataset2 = []
393 | for tile in tiles:
394 | fpath = os.path.join(project_dir, f'{tile["file_disk"]}').replace("\\", "/")
395 | if os.path.join(project_dir, f'{tile["file_disk"]}').startswith('/') and os.path.join(project_dir, f'{tile["file_disk"]}').startswith('//') is False:
396 | tiffs_subdataset2.append('S102:"/' + fpath + '":QualityOfSurvey')
397 | else:
398 | tiffs_subdataset2.append('S102:"' + fpath + '":QualityOfSurvey')
399 | # tiffs_subdataset2 = ['S102:/' + os.path.join(project_dir, f'{tile["file_disk"]}') + ':QualityOfSurvey' for tile in tiles]
400 | # revisit levels
401 | if "2" in res:
402 | create_vrt_pmn(tiffs_subdataset1, res_subdataset1_vrt, [2, 4], relative_to_vrt, subdataset = 1)
403 | vrt_subdataset1_list.append(res_subdataset1_vrt)
404 | create_vrt_pmn(tiffs_subdataset2, res_subdataset2_vrt, [2, 4], relative_to_vrt, subdataset = 2)
405 | vrt_subdataset2_list.append(res_subdataset2_vrt)
406 | fields["res_2_subdataset1_vrt"] = rel_subdataset1_path
407 | fields["res_2_subdataset2_vrt"] = rel_subdataset2_path
408 | if os.path.isfile(os.path.join(project_dir, fields["res_2_subdataset1_vrt"] + ".ovr")):
409 | fields["res_2_subdataset1_ovr"] = rel_subdataset1_path + ".ovr"
410 | if os.path.isfile(os.path.join(project_dir, fields["res_2_subdataset2_vrt"] + ".ovr")):
411 | fields["res_2_subdataset2_ovr"] = rel_subdataset2_path + ".ovr"
412 | if "4" in res:
413 | create_vrt_pmn(tiffs_subdataset1, res_subdataset1_vrt, [4, 8], relative_to_vrt, subdataset = 1)
414 | vrt_subdataset1_list.append(res_subdataset1_vrt)
415 | create_vrt_pmn(tiffs_subdataset2, res_subdataset2_vrt, [4, 8], relative_to_vrt, subdataset = 2)
416 | vrt_subdataset2_list.append(res_subdataset2_vrt)
417 | fields["res_4_subdataset1_vrt"] = rel_subdataset1_path
418 | fields["res_4_subdataset2_vrt"] = rel_subdataset2_path
419 | if os.path.isfile(os.path.join(project_dir, fields["res_4_subdataset1_vrt"] + ".ovr")):
420 | fields["res_4_subdataset1_ovr"] = rel_subdataset1_path + ".ovr"
421 | if os.path.isfile(os.path.join(project_dir, fields["res_4_subdataset2_vrt"] + ".ovr")):
422 | fields["res_4_subdataset2_ovr"] = rel_subdataset2_path + ".ovr"
423 | if "8" in res:
424 | create_vrt_pmn(tiffs_subdataset1, res_subdataset1_vrt, [8], relative_to_vrt, subdataset = 1)
425 | vrt_subdataset1_list.append(res_subdataset1_vrt)
426 | create_vrt_pmn(tiffs_subdataset2, res_subdataset2_vrt, [8], relative_to_vrt, subdataset = 2)
427 | vrt_subdataset2_list.append(res_subdataset2_vrt)
428 | fields["res_8_subdataset1_vrt"] = rel_subdataset1_path
429 | fields["res_8_subdataset2_vrt"] = rel_subdataset2_path
430 | if os.path.isfile(os.path.join(project_dir, fields["res_8_subdataset1_vrt"] + ".ovr")):
431 | fields["res_8_subdataset1_ovr"] = rel_subdataset1_path + ".ovr"
432 | if os.path.isfile(os.path.join(project_dir, fields["res_8_subdataset2_vrt"] + ".ovr")):
433 | fields["res_8_subdataset2_ovr"] = rel_subdataset2_path + ".ovr"
434 | if "16" in res:
435 | vrt_subdataset1_list.extend(tiffs_subdataset1)
436 | vrt_subdataset2_list.extend(tiffs_subdataset2)
437 | rel_subdataset1_path = os.path.join(rel_dir, subregion["region"] + "_complete_BathymetryCoverage.vrt")
438 | complete_subdataset1_vrt = os.path.join(project_dir, rel_subdataset1_path)
439 | rel_subdataset2_path = os.path.join(rel_dir, subregion["region"] + "_complete_QualityOfSurvey.vrt")
440 | complete_subdataset2_vrt = os.path.join(project_dir, rel_subdataset2_path)
441 | create_vrt_pmn(vrt_subdataset1_list, complete_subdataset1_vrt, [16], relative_to_vrt, subdataset = 1)
442 | create_vrt_pmn(vrt_subdataset2_list, complete_subdataset2_vrt, [16], relative_to_vrt, subdataset = 2)
443 | fields["complete_subdataset1_vrt"] = rel_subdataset1_path
444 | if os.path.isfile(os.path.join(project_dir, fields["complete_subdataset1_vrt"] + ".ovr")):
445 | fields["complete_subdataset1_ovr"] = rel_subdataset1_path + ".ovr"
446 | fields["complete_subdataset2_vrt"] = rel_subdataset2_path
447 | if os.path.isfile(os.path.join(project_dir, fields["complete_subdataset2_vrt"] + ".ovr")):
448 | fields["complete_subdataset2_ovr"] = rel_subdataset2_path + ".ovr"
449 | return fields
450 |
451 | def build_sub_vrts_pmn1(
452 | subregion: str,
453 | subregion_tiles: list,
454 | project_dir: str,
455 | data_source: str,
456 | relative_to_vrt: bool,
457 | ) -> dict:
458 | """
459 | Build the VRTs of a given subregion.
460 |
461 | Parameters
462 | ----------
463 | subregion
464 | subregion name.
465 | subregion_tiles
466 | list of tile records belonging to subregion.
467 | project_dir
468 | destination directory for project.
469 | data_source : str
470 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
471 | relative_to_vrt : bool
472 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths.
473 |
474 | Returns
475 | -------
476 | fields : dict
477 | holds name of subregion and the paths of its VRT and OVR files.
478 | """
479 | fields = {
480 | "region": subregion["region"],
481 | "res_2_vrt": None,
482 | "res_2_ovr": None,
483 | "res_4_vrt": None,
484 | "res_4_ovr": None,
485 | "res_8_vrt": None,
486 | "res_8_ovr": None,
487 | "complete_vrt": None,
488 | "complete_ovr": None,
489 | }
490 | rel_dir = os.path.join(f"{data_source}_VRT", subregion["region"])
491 | subregion_dir = os.path.join(project_dir, rel_dir)
492 | try:
493 | if os.path.isdir(subregion_dir):
494 | shutil.rmtree(subregion_dir)
495 | except (OSError, PermissionError) as e:
496 | print(f"Failed to remove older vrt files for {subregion['region']}\n" "Please close all files and attempt again")
497 | sys.exit(1)
498 | if not os.path.exists(subregion_dir):
499 | os.makedirs(subregion_dir)
500 | resolution_tiles = collections.defaultdict(list)
501 | for subregion_tile in subregion_tiles:
502 | resolution_tiles[subregion_tile["resolution"]].append(subregion_tile)
503 | vrt_list = []
504 | for res, tiles in resolution_tiles.items():
505 | print(f"Building {subregion['region']} band {res}...")
506 | rel_path = os.path.join(rel_dir, subregion["region"] + f"_{res}.vrt")
507 | res_vrt = os.path.join(project_dir, rel_path)
508 | tiffs = [os.path.join(project_dir, tile["file_disk"]) for tile in tiles]
509 | # revisit levels
510 | if "2" in res:
511 | create_vrt_pmn1(tiffs, res_vrt, [2, 4], relative_to_vrt)
512 | vrt_list.append(res_vrt)
513 | fields["res_2_vrt"] = rel_path
514 | if os.path.isfile(os.path.join(project_dir, fields["res_2_vrt"] + ".ovr")):
515 | fields["res_2_ovr"] = rel_path + ".ovr"
516 | if "4" in res:
517 | create_vrt_pmn1(tiffs, res_vrt, [4, 8], relative_to_vrt)
518 | vrt_list.append(res_vrt)
519 | fields["res_4_vrt"] = rel_path
520 | if os.path.isfile(os.path.join(project_dir, fields["res_4_vrt"] + ".ovr")):
521 | fields["res_4_ovr"] = rel_path + ".ovr"
522 | if "8" in res:
523 | create_vrt_pmn1(tiffs, res_vrt, [8], relative_to_vrt)
524 | vrt_list.append(res_vrt)
525 | fields["res_8_vrt"] = rel_path
526 | if os.path.isfile(os.path.join(project_dir, fields["res_8_vrt"] + ".ovr")):
527 | fields["res_8_ovr"] = rel_path + ".ovr"
528 | if "16" in res:
529 | vrt_list.extend(tiffs)
530 | rel_path = os.path.join(rel_dir, subregion["region"] + "_complete.vrt")
531 | complete_vrt = os.path.join(project_dir, rel_path)
532 | create_vrt_pmn1(vrt_list, complete_vrt, [16], relative_to_vrt)
533 | fields["complete_vrt"] = rel_path
534 | if os.path.isfile(os.path.join(project_dir, fields["complete_vrt"] + ".ovr")):
535 | fields["complete_ovr"] = rel_path + ".ovr"
536 | return fields
537 |
538 |
539 | def build_sub_vrts(
540 | subregion: str,
541 | subregion_tiles: list,
542 | project_dir: str,
543 | data_source: str,
544 | relative_to_vrt: bool,
545 | ) -> dict:
546 | """
547 | Build the VRTs of a given subregion.
548 |
549 | Parameters
550 | ----------
551 | subregion
552 | subregion name.
553 | subregion_tiles
554 | list of tile records belonging to subregion.
555 | project_dir
556 | destination directory for project.
557 | data_source : str
558 | the data source for the project e.g. 'BlueTopo' or 'Modeling'.
559 | relative_to_vrt : bool
560 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths.
561 |
562 | Returns
563 | -------
564 | fields : dict
565 | holds name of subregion and the paths of its VRT and OVR files.
566 | """
567 | fields = {
568 | "region": subregion["region"],
569 | "res_2_vrt": None,
570 | "res_2_ovr": None,
571 | "res_4_vrt": None,
572 | "res_4_ovr": None,
573 | "res_8_vrt": None,
574 | "res_8_ovr": None,
575 | "complete_vrt": None,
576 | "complete_ovr": None,
577 | }
578 | rel_dir = os.path.join(f"{data_source}_VRT", subregion["region"])
579 | subregion_dir = os.path.join(project_dir, rel_dir)
580 | try:
581 | if os.path.isdir(subregion_dir):
582 | shutil.rmtree(subregion_dir)
583 | except (OSError, PermissionError) as e:
584 | print(f"Failed to remove older vrt files for {subregion['region']}\n" "Please close all files and attempt again")
585 | sys.exit(1)
586 | if not os.path.exists(subregion_dir):
587 | os.makedirs(subregion_dir)
588 | resolution_tiles = collections.defaultdict(list)
589 | for subregion_tile in subregion_tiles:
590 | resolution_tiles[subregion_tile["resolution"]].append(subregion_tile)
591 | vrt_list = []
592 | for res, tiles in resolution_tiles.items():
593 | print(f"Building {subregion['region']} band {res}...")
594 | rel_path = os.path.join(rel_dir, subregion["region"] + f"_{res}.vrt")
595 | res_vrt = os.path.join(project_dir, rel_path)
596 | tiffs = [os.path.join(project_dir, tile["geotiff_disk"]) for tile in tiles]
597 | # revisit levels
598 | if "2" in res:
599 | create_vrt(tiffs, res_vrt, [2, 4], relative_to_vrt)
600 | vrt_list.append(res_vrt)
601 | fields["res_2_vrt"] = rel_path
602 | if os.path.isfile(os.path.join(project_dir, fields["res_2_vrt"] + ".ovr")):
603 | fields["res_2_ovr"] = rel_path + ".ovr"
604 | if "4" in res:
605 | create_vrt(tiffs, res_vrt, [4, 8], relative_to_vrt)
606 | vrt_list.append(res_vrt)
607 | fields["res_4_vrt"] = rel_path
608 | if os.path.isfile(os.path.join(project_dir, fields["res_4_vrt"] + ".ovr")):
609 | fields["res_4_ovr"] = rel_path + ".ovr"
610 | if "8" in res:
611 | create_vrt(tiffs, res_vrt, [8], relative_to_vrt)
612 | vrt_list.append(res_vrt)
613 | fields["res_8_vrt"] = rel_path
614 | if os.path.isfile(os.path.join(project_dir, fields["res_8_vrt"] + ".ovr")):
615 | fields["res_8_ovr"] = rel_path + ".ovr"
616 | if "16" in res:
617 | vrt_list.extend(tiffs)
618 | rel_path = os.path.join(rel_dir, subregion["region"] + "_complete.vrt")
619 | complete_vrt = os.path.join(project_dir, rel_path)
620 | create_vrt(vrt_list, complete_vrt, [16], relative_to_vrt)
621 | fields["complete_vrt"] = rel_path
622 | if os.path.isfile(os.path.join(project_dir, fields["complete_vrt"] + ".ovr")):
623 | fields["complete_ovr"] = rel_path + ".ovr"
624 | return fields
625 |
626 |
627 | def combine_vrts(files: list, vrt_path: str, relative_to_vrt: bool) -> None:
628 | """
629 | Build VRT from files.
630 |
631 | Parameters
632 | ----------
633 | files
634 | list of the file paths to include in the vrt.
635 | vrt_path
636 | output vrt path.
637 | levels
638 | list of overview levels to be built with the vrt.
639 | relative_to_vrt : bool
640 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths.
641 | """
642 | # not efficient but insignificant
643 | files = copy.deepcopy(files)
644 | try:
645 | if os.path.isfile(vrt_path):
646 | os.remove(vrt_path)
647 | if os.path.isfile(vrt_path + ".ovr"):
648 | os.remove(vrt_path + ".ovr")
649 | except (OSError, PermissionError) as e:
650 | print(f"Failed to remove older vrt files for {vrt_path}\n" "Please close all files and attempt again")
651 | sys.exit(1)
652 | vrt_options = gdal.BuildVRTOptions(options='-separate -allow_projection_difference', resampleAlg="near", resolution="highest")
653 | cwd = os.getcwd()
654 | try:
655 | os.chdir(os.path.dirname(vrt_path))
656 | if relative_to_vrt is True:
657 | for idx in range(len(files)):
658 | files[idx] = os.path.relpath(files[idx], os.path.dirname(vrt_path))
659 | relative_vrt_path = os.path.relpath(vrt_path, os.getcwd())
660 | vrt = gdal.BuildVRT(relative_vrt_path, files, options=vrt_options)
661 | band1 = vrt.GetRasterBand(1)
662 | band1.SetDescription("Elevation")
663 | band2 = vrt.GetRasterBand(2)
664 | band2.SetDescription("Uncertainty")
665 | band3 = vrt.GetRasterBand(3)
666 | band3.SetDescription("QualityOfSurvey")
667 | vrt = None
668 | except:
669 | raise RuntimeError(f"VRT failed to build for {vrt_path}")
670 | finally:
671 | os.chdir(cwd)
672 |
673 |
674 | def create_vrt_pmn(files: list, vrt_path: str, levels: list, relative_to_vrt: bool, subdataset: int) -> None:
675 | """
676 | Build VRT from files.
677 |
678 | Parameters
679 | ----------
680 | files
681 | list of the file paths to include in the vrt.
682 | vrt_path
683 | output vrt path.
684 | levels
685 | list of overview levels to be built with the vrt.
686 | relative_to_vrt : bool
687 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths.
688 | """
689 | # not efficient but insignificant
690 | files = copy.deepcopy(files)
691 | try:
692 | if os.path.isfile(vrt_path):
693 | os.remove(vrt_path)
694 | if os.path.isfile(vrt_path + ".ovr"):
695 | os.remove(vrt_path + ".ovr")
696 | except (OSError, PermissionError) as e:
697 | print(f"Failed to remove older vrt files for {vrt_path}\n" "Please close all files and attempt again")
698 | sys.exit(1)
699 | vrt_options = gdal.BuildVRTOptions(options='-allow_projection_difference', resampleAlg="near", resolution="highest")
700 | cwd = os.getcwd()
701 | try:
702 | os.chdir(os.path.dirname(vrt_path))
703 | if relative_to_vrt is True:
704 | for idx in range(len(files)):
705 | if 'S102:' in files[idx]:
706 | continue
707 | else:
708 | files[idx] = os.path.relpath(files[idx], os.path.dirname(vrt_path))
709 | relative_vrt_path = os.path.relpath(vrt_path, os.getcwd())
710 | vrt = gdal.BuildVRT(relative_vrt_path, files, options=vrt_options)
711 | if subdataset == 1:
712 | band1 = vrt.GetRasterBand(1)
713 | band1.SetDescription("Elevation")
714 | band2 = vrt.GetRasterBand(2)
715 | band2.SetDescription("Uncertainty")
716 | if subdataset == 2:
717 | band1 = vrt.GetRasterBand(1)
718 | band1.SetDescription("QualityOfSurvey")
719 | vrt = None
720 | except:
721 | raise RuntimeError(f"VRT failed to build for {vrt_path}")
722 | finally:
723 | os.chdir(cwd)
724 | vrt = gdal.Open(vrt_path, 0)
725 | vrt.BuildOverviews("NEAREST", levels)
726 | vrt = None
727 |
728 | def create_vrt_pmn1(files: list, vrt_path: str, levels: list, relative_to_vrt: bool) -> None:
729 | """
730 | Build VRT from files.
731 |
732 | Parameters
733 | ----------
734 | files
735 | list of the file paths to include in the vrt.
736 | vrt_path
737 | output vrt path.
738 | levels
739 | list of overview levels to be built with the vrt.
740 | relative_to_vrt : bool
741 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths.
742 | """
743 | # not efficient but insignificant
744 | files = copy.deepcopy(files)
745 | try:
746 | if os.path.isfile(vrt_path):
747 | os.remove(vrt_path)
748 | if os.path.isfile(vrt_path + ".ovr"):
749 | os.remove(vrt_path + ".ovr")
750 | except (OSError, PermissionError) as e:
751 | print(f"Failed to remove older vrt files for {vrt_path}\n" "Please close all files and attempt again")
752 | sys.exit(1)
753 | vrt_options = gdal.BuildVRTOptions(options='-allow_projection_difference', resampleAlg="near", resolution="highest")
754 | cwd = os.getcwd()
755 | try:
756 | os.chdir(os.path.dirname(vrt_path))
757 | if relative_to_vrt is True:
758 | for idx in range(len(files)):
759 | files[idx] = os.path.relpath(files[idx], os.path.dirname(vrt_path))
760 | relative_vrt_path = os.path.relpath(vrt_path, os.getcwd())
761 | vrt = gdal.BuildVRT(relative_vrt_path, files, options=vrt_options)
762 | band1 = vrt.GetRasterBand(1)
763 | band1.SetDescription("Elevation")
764 | band2 = vrt.GetRasterBand(2)
765 | band2.SetDescription("Uncertainty")
766 | vrt = None
767 | except:
768 | raise RuntimeError(f"VRT failed to build for {vrt_path}")
769 | finally:
770 | os.chdir(cwd)
771 | vrt = gdal.Open(vrt_path, 0)
772 | vrt.BuildOverviews("NEAREST", levels)
773 | vrt = None
774 |
775 | def create_vrt(files: list, vrt_path: str, levels: list, relative_to_vrt: bool) -> None:
776 | """
777 | Build VRT from files.
778 |
779 | Parameters
780 | ----------
781 | files
782 | list of the file paths to include in the vrt.
783 | vrt_path
784 | output vrt path.
785 | levels
786 | list of overview levels to be built with the vrt.
787 | relative_to_vrt : bool
788 | This arg determines if paths of referenced files inside the VRT are relative or absolute paths.
789 | """
790 | # not efficient but insignificant
791 | files = copy.deepcopy(files)
792 | try:
793 | if os.path.isfile(vrt_path):
794 | os.remove(vrt_path)
795 | if os.path.isfile(vrt_path + ".ovr"):
796 | os.remove(vrt_path + ".ovr")
797 | except (OSError, PermissionError) as e:
798 | print(f"Failed to remove older vrt files for {vrt_path}\n" "Please close all files and attempt again")
799 | sys.exit(1)
800 | vrt_options = gdal.BuildVRTOptions(options='-allow_projection_difference', resampleAlg="near", resolution="highest")
801 | cwd = os.getcwd()
802 | try:
803 | os.chdir(os.path.dirname(vrt_path))
804 | if relative_to_vrt is True:
805 | for idx in range(len(files)):
806 | files[idx] = os.path.relpath(files[idx], os.path.dirname(vrt_path))
807 | relative_vrt_path = os.path.relpath(vrt_path, os.getcwd())
808 | vrt = gdal.BuildVRT(relative_vrt_path, files, options=vrt_options)
809 | band1 = vrt.GetRasterBand(1)
810 | band1.SetDescription("Elevation")
811 | band2 = vrt.GetRasterBand(2)
812 | band2.SetDescription("Uncertainty")
813 | band3 = vrt.GetRasterBand(3)
814 | band3.SetDescription("Contributor")
815 | vrt = None
816 | except:
817 | raise RuntimeError(f"VRT failed to build for {vrt_path}")
818 | finally:
819 | os.chdir(cwd)
820 | vrt = gdal.Open(vrt_path, 0)
821 | vrt.BuildOverviews("NEAREST", levels)
822 | vrt = None
823 |
824 |
825 | def add_vrt_rat_pmn(conn: sqlite3.Connection, utm: str, project_dir: str, vrt_path: str, data_source: str) -> None:
826 | """
827 | Create a raster attribute table for the VRT.
828 |
829 | Parameters
830 | ----------
831 | conn : sqlite3.Connection
832 | database connection object.
833 | utm : str
834 | utm zone of the VRT.
835 | project_dir : str
836 | destination directory for project.
837 | vrt_path : str
838 | path to the VRT to which to add the raster attribute table.
839 | data_source : str
840 | The NBS offers various products to different end-users. Some are available publicly.
841 | Use this argument to identify which product you want. BlueTopo is the default.
842 | """
843 | expected_fields = dict(
844 | value=[int, gdal.GFU_MinMax],
845 | count=[int, gdal.GFU_PixelCount],
846 | data_assessment=[int, gdal.GFU_Generic],
847 | feature_least_depth=[float, gdal.GFU_Generic],
848 | significant_features=[float, gdal.GFU_Generic],
849 | feature_size=[float, gdal.GFU_Generic],
850 | coverage=[int, gdal.GFU_Generic],
851 | bathy_coverage=[int, gdal.GFU_Generic],
852 | horizontal_uncert_fixed=[float, gdal.GFU_Generic],
853 | horizontal_uncert_var=[float, gdal.GFU_Generic],
854 | vertical_uncert_fixed=[float, gdal.GFU_Generic],
855 | vertical_uncert_var=[float, gdal.GFU_Generic],
856 | license_name=[str, gdal.GFU_Generic],
857 | license_url=[str, gdal.GFU_Generic],
858 | source_survey_id=[str, gdal.GFU_Generic],
859 | source_institution=[str, gdal.GFU_Generic],
860 | survey_date_start=[str, gdal.GFU_Generic],
861 | survey_date_end=[str, gdal.GFU_Generic],
862 | )
863 | if data_source.lower() == "hsd":
864 | expected_fields["catzoc"] = [int, gdal.GFU_Generic]
865 | expected_fields["supercession_score"] = [float, gdal.GFU_Generic]
866 | expected_fields["decay_score"] = [float, gdal.GFU_Generic]
867 | expected_fields["unqualified"] = [int, gdal.GFU_Generic]
868 | expected_fields["sensitive"] = [int, gdal.GFU_Generic]
869 | # refactor later
870 | if data_source.lower() in ['s102v22']:
871 | expected_fields = dict(
872 | value=[int, gdal.GFU_MinMax],
873 | data_assessment=[int, gdal.GFU_Generic],
874 | feature_least_depth=[float, gdal.GFU_Generic],
875 | significant_features=[float, gdal.GFU_Generic],
876 | feature_size=[str, gdal.GFU_Generic],
877 | # ?
878 | feature_size_var=[int, gdal.GFU_Generic],
879 | coverage=[int, gdal.GFU_Generic],
880 | bathy_coverage=[int, gdal.GFU_Generic],
881 | horizontal_uncert_fixed=[float, gdal.GFU_Generic],
882 | horizontal_uncert_var=[float, gdal.GFU_Generic],
883 | survey_date_start=[str, gdal.GFU_Generic],
884 | survey_date_end=[str, gdal.GFU_Generic],
885 | source_survey_id=[str, gdal.GFU_Generic],
886 | source_institution=[str, gdal.GFU_Generic],
887 | # ?
888 | bathymetric_uncertainty_type=[int, gdal.GFU_Generic],
889 | )
890 | cursor = conn.cursor()
891 | cursor.execute("SELECT * FROM tiles WHERE utm = ?", (utm,))
892 | exp_fields = list(expected_fields.keys())
893 | tiles = [dict(row) for row in cursor.fetchall()]
894 | surveys = []
895 | for tile in tiles:
896 | if tile['file_disk'] is None or os.path.isfile(os.path.join(project_dir, tile["file_disk"])) is False:
897 | continue
898 | gtiff = os.path.join(project_dir, tile["file_disk"]).replace('\\', '/')
899 | if os.path.isfile(gtiff) is False:
900 | continue
901 | # rat_file = os.path.join(project_dir, tile["rat_disk"])
902 | # if os.path.isfile(rat_file) is False and data_source.lower() != 's102v22':
903 | # continue
904 | if data_source.lower() != 's102v22':
905 | ds = gdal.Open(gtiff)
906 | contrib = ds.GetRasterBand(3)
907 | rat_n = contrib.GetDefaultRAT()
908 | for col in range(rat_n.GetColumnCount()):
909 | if exp_fields[col] != rat_n.GetNameOfCol(col).lower():
910 | raise ValueError("Unexpected field order")
911 | else:
912 | ds = gdal.Open(f'S102:"{gtiff}":QualityOfSurvey')
913 | contrib = ds.GetRasterBand(1)
914 | rat_n = contrib.GetDefaultRAT()
915 | for row in range(rat_n.GetRowCount()):
916 | exist = False
917 | for survey in surveys:
918 | if survey[0] == rat_n.GetValueAsString(row, 0):
919 | survey[1] = int(survey[1]) + rat_n.GetValueAsInt(row, 1)
920 | # this is the count field
921 | # GFU_PixelCount usage has support as int dtype in some
922 | # software so avoiding changing it to python float (double)
923 | # this is a temp solution to avoid overflow error which can
924 | # occur with generalization in vrts of extreme coverage
925 | if survey[1] > 2147483647:
926 | survey[1] = 2147483647
927 | exist = True
928 | break
929 | if exist:
930 | continue
931 | curr = []
932 | for col in range(rat_n.GetColumnCount()):
933 | entry_val = rat_n.GetValueAsString(row, col)
934 | # test removal
935 | if rat_n.GetNameOfCol(col).lower() in ['feature_size_var', 'bathymetric_uncertainty_type']:
936 | entry_val = 0
937 | curr.append(entry_val)
938 | surveys.append(curr)
939 | rat = gdal.RasterAttributeTable()
940 | for entry in expected_fields:
941 | field_type, usage = expected_fields[entry]
942 | if field_type == str:
943 | col_type = gdal.GFT_String
944 | elif field_type == int:
945 | col_type = gdal.GFT_Integer
946 | elif field_type == float:
947 | col_type = gdal.GFT_Real
948 | else:
949 | raise TypeError("Unknown data type submitted for gdal raster attribute table.")
950 | rat.CreateColumn(entry, col_type, usage)
951 | rat.SetRowCount(len(surveys))
952 | for row_idx, survey in enumerate(surveys):
953 | for col_idx, entry in enumerate(expected_fields):
954 | field_type, usage = expected_fields[entry]
955 | if field_type == str:
956 | rat.SetValueAsString(row_idx, col_idx, survey[col_idx])
957 | elif field_type == int:
958 | rat.SetValueAsInt(row_idx, col_idx, int(survey[col_idx]))
959 | elif field_type == float:
960 | rat.SetValueAsDouble(row_idx, col_idx, float(survey[col_idx]))
961 | vrt_ds = gdal.Open(vrt_path, 1)
962 | contributor_band = vrt_ds.GetRasterBand(3)
963 | contributor_band.SetDefaultRAT(rat)
964 |
965 |
966 | def add_vrt_rat(conn: sqlite3.Connection, utm: str, project_dir: str, vrt_path: str, data_source: str) -> None:
967 | """
968 | Create a raster attribute table for the VRT.
969 |
970 | Parameters
971 | ----------
972 | conn : sqlite3.Connection
973 | database connection object.
974 | utm : str
975 | utm zone of the VRT.
976 | project_dir : str
977 | destination directory for project.
978 | vrt_path : str
979 | path to the VRT to which to add the raster attribute table.
980 | data_source : str
981 | The NBS offers various products to different end-users. Some are available publicly.
982 | Use this argument to identify which product you want. BlueTopo is the default.
983 | """
984 | expected_fields = dict(
985 | value=[int, gdal.GFU_MinMax],
986 | count=[int, gdal.GFU_PixelCount],
987 | data_assessment=[int, gdal.GFU_Generic],
988 | feature_least_depth=[float, gdal.GFU_Generic],
989 | significant_features=[float, gdal.GFU_Generic],
990 | feature_size=[float, gdal.GFU_Generic],
991 | coverage=[int, gdal.GFU_Generic],
992 | bathy_coverage=[int, gdal.GFU_Generic],
993 | horizontal_uncert_fixed=[float, gdal.GFU_Generic],
994 | horizontal_uncert_var=[float, gdal.GFU_Generic],
995 | vertical_uncert_fixed=[float, gdal.GFU_Generic],
996 | vertical_uncert_var=[float, gdal.GFU_Generic],
997 | license_name=[str, gdal.GFU_Generic],
998 | license_url=[str, gdal.GFU_Generic],
999 | source_survey_id=[str, gdal.GFU_Generic],
1000 | source_institution=[str, gdal.GFU_Generic],
1001 | survey_date_start=[str, gdal.GFU_Generic],
1002 | survey_date_end=[str, gdal.GFU_Generic],
1003 | )
1004 | if data_source.lower() == "hsd":
1005 | expected_fields['catzoc'] = [int, gdal.GFU_Generic]
1006 | expected_fields['supercession_score'] = [float, gdal.GFU_Generic]
1007 | expected_fields['decay_score'] = [float, gdal.GFU_Generic]
1008 | expected_fields['unqualified'] = [int, gdal.GFU_Generic]
1009 | expected_fields['sensitive'] = [int, gdal.GFU_Generic]
1010 | cursor = conn.cursor()
1011 | cursor.execute("SELECT * FROM tiles WHERE utm = ?", (utm,))
1012 | exp_fields = list(expected_fields.keys())
1013 | tiles = [dict(row) for row in cursor.fetchall()]
1014 | surveys = []
1015 | for tile in tiles:
1016 | gtiff = os.path.join(project_dir, tile["geotiff_disk"])
1017 | if os.path.isfile(gtiff) is False:
1018 | continue
1019 | rat_file = os.path.join(project_dir, tile["rat_disk"])
1020 | if os.path.isfile(rat_file) is False:
1021 | continue
1022 | ds = gdal.Open(gtiff)
1023 | contrib = ds.GetRasterBand(3)
1024 | rat_n = contrib.GetDefaultRAT()
1025 | for col in range(rat_n.GetColumnCount()):
1026 | if exp_fields[col] != rat_n.GetNameOfCol(col).lower():
1027 | raise ValueError("Unexpected field order")
1028 | for row in range(rat_n.GetRowCount()):
1029 | exist = False
1030 | for survey in surveys:
1031 | if survey[0] == rat_n.GetValueAsString(row, 0):
1032 | survey[1] = int(survey[1]) + rat_n.GetValueAsInt(row, 1)
1033 | # this is the count field
1034 | # GFU_PixelCount usage has support as int dtype in some
1035 | # software so avoiding changing it to python float (double)
1036 | # this is a temp solution to avoid overflow error which can
1037 | # occur with generalization in vrts of extreme coverage
1038 | if survey[1] > 2147483647:
1039 | survey[1] = 2147483647
1040 | exist = True
1041 | break
1042 | if exist:
1043 | continue
1044 | curr = []
1045 | for col in range(rat_n.GetColumnCount()):
1046 | curr.append(rat_n.GetValueAsString(row, col))
1047 | surveys.append(curr)
1048 | rat = gdal.RasterAttributeTable()
1049 | for entry in expected_fields:
1050 | field_type, usage = expected_fields[entry]
1051 | if field_type == str:
1052 | col_type = gdal.GFT_String
1053 | elif field_type == int:
1054 | col_type = gdal.GFT_Integer
1055 | elif field_type == float:
1056 | col_type = gdal.GFT_Real
1057 | else:
1058 | raise TypeError("Unknown data type submitted for gdal raster attribute table.")
1059 | rat.CreateColumn(entry, col_type, usage)
1060 | rat.SetRowCount(len(surveys))
1061 | for row_idx, survey in enumerate(surveys):
1062 | for col_idx, entry in enumerate(expected_fields):
1063 | field_type, usage = expected_fields[entry]
1064 | if field_type == str:
1065 | rat.SetValueAsString(row_idx, col_idx, survey[col_idx])
1066 | elif field_type == int:
1067 | rat.SetValueAsInt(row_idx, col_idx, int(survey[col_idx]))
1068 | elif field_type == float:
1069 | rat.SetValueAsDouble(row_idx, col_idx, float(survey[col_idx]))
1070 | vrt_ds = gdal.Open(vrt_path, 1)
1071 | contributor_band = vrt_ds.GetRasterBand(3)
1072 | contributor_band.SetDefaultRAT(rat)
1073 |
1074 |
1075 | def select_tiles_by_subregion_pmn(project_dir: str, conn: sqlite3.Connection, subregion: str) -> list:
1076 | """
1077 | Retrieve all tile records with files in the given subregion.
1078 |
1079 | Parameters
1080 | ----------
1081 | project_dir
1082 | destination directory for project.
1083 | conn : sqlite3.Connection
1084 | database connection object.
1085 | subregion : str
1086 | subregion name.
1087 |
1088 | Returns
1089 | -------
1090 | existing_tiles : list
1091 | list of tile records.
1092 | """
1093 | cursor = conn.cursor()
1094 | cursor.execute("SELECT * FROM tiles WHERE subregion = ?", (subregion,))
1095 | tiles = [dict(row) for row in cursor.fetchall()]
1096 | existing_tiles = [tile for tile in tiles if tile["file_disk"] and os.path.isfile(os.path.join(project_dir, tile["file_disk"]))]
1097 | if len(tiles) - len(existing_tiles) != 0:
1098 | print(f"Did not find the files for {len(tiles) - len(existing_tiles)} " f"registered tile(s) in subregion {subregion}. " "Run fetch_tiles to retrieve files " "or correct the directory path if incorrect.")
1099 | return existing_tiles
1100 |
1101 |
1102 | def select_tiles_by_subregion(project_dir: str, conn: sqlite3.Connection, subregion: str) -> list:
1103 | """
1104 | Retrieve all tile records with files in the given subregion.
1105 |
1106 | Parameters
1107 | ----------
1108 | project_dir
1109 | destination directory for project.
1110 | conn : sqlite3.Connection
1111 | database connection object.
1112 | subregion : str
1113 | subregion name.
1114 |
1115 | Returns
1116 | -------
1117 | existing_tiles : list
1118 | list of tile records.
1119 | """
1120 | cursor = conn.cursor()
1121 | cursor.execute("SELECT * FROM tiles WHERE subregion = ?", (subregion,))
1122 | tiles = [dict(row) for row in cursor.fetchall()]
1123 | existing_tiles = [tile for tile in tiles if tile["geotiff_disk"] and tile["rat_disk"] and os.path.isfile(os.path.join(project_dir, tile["geotiff_disk"])) and os.path.isfile(os.path.join(project_dir, tile["rat_disk"]))]
1124 | if len(tiles) - len(existing_tiles) != 0:
1125 | print(f"Did not find the files for {len(tiles) - len(existing_tiles)} " f"registered tile(s) in subregion {subregion}. " "Run fetch_tiles to retrieve files " "or correct the directory path if incorrect.")
1126 | return existing_tiles
1127 |
1128 |
1129 | def select_subregions_by_utm_pmn(project_dir: str, conn: sqlite3.Connection, utm: str) -> list:
1130 | """
1131 | Retrieve all subregion records with files in the given UTM.
1132 |
1133 | Parameters
1134 | ----------
1135 | project_dir
1136 | destination directory for project.
1137 | conn : sqlite3.Connection
1138 | database connection object.
1139 | utm : str
1140 | UTM zone.
1141 |
1142 | Returns
1143 | -------
1144 | subregions : list
1145 | list of subregion records in UTM zone.
1146 | """
1147 | cursor = conn.cursor()
1148 | cursor.execute(
1149 | """
1150 | SELECT * FROM vrt_subregion
1151 | WHERE utm = ? AND built_subdataset1 = 1 AND built_subdataset2 = 1
1152 | """,
1153 | (utm,),
1154 | )
1155 | subregions = [dict(row) for row in cursor.fetchall()]
1156 | for s in subregions:
1157 | if (
1158 | (s["res_2_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset1_vrt"])))
1159 | or (s["res_2_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset1_ovr"])))
1160 |
1161 | or (s["res_2_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset2_vrt"])))
1162 | or (s["res_2_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset2_ovr"])))
1163 |
1164 | or (s["res_4_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset1_vrt"])))
1165 | or (s["res_4_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset1_ovr"])))
1166 |
1167 | or (s["res_4_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset2_vrt"])))
1168 | or (s["res_4_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset2_ovr"])))
1169 |
1170 | or (s["res_8_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset1_vrt"])))
1171 | or (s["res_8_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset1_ovr"])))
1172 |
1173 | or (s["res_8_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset2_vrt"])))
1174 | or (s["res_8_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset2_ovr"])))
1175 |
1176 | or (s["complete_subdataset1_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset1_vrt"])))
1177 | or (s["complete_subdataset1_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset1_ovr"])))
1178 |
1179 | or (s["complete_subdataset2_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset2_vrt"])))
1180 | or (s["complete_subdataset2_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset2_ovr"])))
1181 |
1182 | ):
1183 | raise RuntimeError(f"Subregion VRT files missing for {s['utm']}. Please rerun.")
1184 | return subregions
1185 |
1186 |
1187 | def select_subregions_by_utm(project_dir: str, conn: sqlite3.Connection, utm: str) -> list:
1188 | """
1189 | Retrieve all subregion records with files in the given UTM.
1190 |
1191 | Parameters
1192 | ----------
1193 | project_dir
1194 | destination directory for project.
1195 | conn : sqlite3.Connection
1196 | database connection object.
1197 | utm : str
1198 | UTM zone.
1199 |
1200 | Returns
1201 | -------
1202 | subregions : list
1203 | list of subregion records in UTM zone.
1204 | """
1205 | cursor = conn.cursor()
1206 | cursor.execute(
1207 | """
1208 | SELECT * FROM vrt_subregion
1209 | WHERE utm = ? AND built = 1
1210 | """,
1211 | (utm,),
1212 | )
1213 | subregions = [dict(row) for row in cursor.fetchall()]
1214 | for s in subregions:
1215 | if (
1216 | (s["res_2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_vrt"])))
1217 | or (s["res_2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_ovr"])))
1218 | or (s["res_4_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_vrt"])))
1219 | or (s["res_4_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_ovr"])))
1220 | or (s["res_8_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_vrt"])))
1221 | or (s["res_8_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_ovr"])))
1222 | or (s["complete_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_vrt"])))
1223 | or (s["complete_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_ovr"])))
1224 | ):
1225 | raise RuntimeError(f"Subregion VRT files missing for {s['utm']}. Please rerun.")
1226 | return subregions
1227 |
1228 | def select_unbuilt_subregions_pmn(conn: sqlite3.Connection) -> list:
1229 | """
1230 | Retrieve all unbuilt subregion records.
1231 |
1232 | Parameters
1233 | ----------
1234 | conn : sqlite3.Connection
1235 | database connection object.
1236 |
1237 | Returns
1238 | -------
1239 | subregions : list
1240 | list of unbuilt subregion records.
1241 | """
1242 | cursor = conn.cursor()
1243 | cursor.execute("SELECT * FROM vrt_subregion WHERE built_subdataset1 = 0 or built_subdataset2 = 0")
1244 | subregions = [dict(row) for row in cursor.fetchall()]
1245 | return subregions
1246 |
1247 |
1248 | def select_unbuilt_subregions(conn: sqlite3.Connection) -> list:
1249 | """
1250 | Retrieve all unbuilt subregion records.
1251 |
1252 | Parameters
1253 | ----------
1254 | conn : sqlite3.Connection
1255 | database connection object.
1256 |
1257 | Returns
1258 | -------
1259 | subregions : list
1260 | list of unbuilt subregion records.
1261 | """
1262 | cursor = conn.cursor()
1263 | cursor.execute("SELECT * FROM vrt_subregion WHERE built = 0")
1264 | subregions = [dict(row) for row in cursor.fetchall()]
1265 | return subregions
1266 |
1267 |
1268 | def select_unbuilt_utms_pmn(conn: sqlite3.Connection) -> list:
1269 | """
1270 | Retrieve all unbuilt utm records.
1271 |
1272 | Parameters
1273 | ----------
1274 | conn : sqlite3.Connection
1275 | database connection object.
1276 |
1277 | Returns
1278 | -------
1279 | utms : list
1280 | list of unbuilt utm records.
1281 | """
1282 | cursor = conn.cursor()
1283 | cursor.execute("SELECT * FROM vrt_utm WHERE built_subdataset1 = 0 or built_subdataset2 = 0")
1284 | utms = [dict(row) for row in cursor.fetchall()]
1285 | return utms
1286 |
1287 |
1288 |
1289 | def select_unbuilt_utms(conn: sqlite3.Connection) -> list:
1290 | """
1291 | Retrieve all unbuilt utm records.
1292 |
1293 | Parameters
1294 | ----------
1295 | conn : sqlite3.Connection
1296 | database connection object.
1297 |
1298 | Returns
1299 | -------
1300 | utms : list
1301 | list of unbuilt utm records.
1302 | """
1303 | cursor = conn.cursor()
1304 | cursor.execute("SELECT * FROM vrt_utm WHERE built = 0")
1305 | utms = [dict(row) for row in cursor.fetchall()]
1306 | return utms
1307 |
1308 |
1309 | def update_subregion_pmn(conn: sqlite3.Connection, fields: dict) -> None:
1310 | """
1311 | Update subregion records with given path values.
1312 |
1313 | Parameters
1314 | ----------
1315 | conn : sqlite3.Connection
1316 | database connection object.
1317 | fields : dict
1318 | dictionary with the name of the subregion and paths for its associated
1319 | VRT and OVR files.
1320 | """
1321 | cursor = conn.cursor()
1322 | cursor.execute(
1323 | """UPDATE vrt_subregion
1324 | SET res_2_subdataset1_vrt = ?, res_2_subdataset1_ovr = ?,
1325 | res_2_subdataset2_vrt = ?, res_2_subdataset2_ovr = ?,
1326 | res_4_subdataset1_vrt = ?, res_4_subdataset1_ovr = ?,
1327 | res_4_subdataset2_vrt = ?, res_4_subdataset2_ovr = ?,
1328 | res_8_subdataset1_vrt = ?, res_8_subdataset1_ovr = ?,
1329 | res_8_subdataset2_vrt = ?, res_8_subdataset2_ovr = ?,
1330 | complete_subdataset1_vrt = ?, complete_subdataset1_ovr = ?,
1331 | complete_subdataset2_vrt = ?, complete_subdataset2_ovr = ?,
1332 | built_subdataset1 = 1,
1333 | built_subdataset2 = 1
1334 | WHERE region = ?""",
1335 | (
1336 | fields["res_2_subdataset1_vrt"],
1337 | fields["res_2_subdataset1_ovr"],
1338 | fields["res_2_subdataset2_vrt"],
1339 | fields["res_2_subdataset2_ovr"],
1340 | fields["res_4_subdataset1_vrt"],
1341 | fields["res_4_subdataset1_ovr"],
1342 | fields["res_4_subdataset2_vrt"],
1343 | fields["res_4_subdataset2_ovr"],
1344 | fields["res_8_subdataset1_vrt"],
1345 | fields["res_8_subdataset1_ovr"],
1346 | fields["res_8_subdataset2_vrt"],
1347 | fields["res_8_subdataset2_ovr"],
1348 |
1349 | fields["complete_subdataset1_vrt"],
1350 | fields["complete_subdataset1_ovr"],
1351 |
1352 | fields["complete_subdataset2_vrt"],
1353 | fields["complete_subdataset2_ovr"],
1354 |
1355 | fields["region"],
1356 | ),
1357 | )
1358 | conn.commit()
1359 |
1360 |
1361 | def update_subregion(conn: sqlite3.Connection, fields: dict) -> None:
1362 | """
1363 | Update subregion records with given path values.
1364 |
1365 | Parameters
1366 | ----------
1367 | conn : sqlite3.Connection
1368 | database connection object.
1369 | fields : dict
1370 | dictionary with the name of the subregion and paths for its associated
1371 | VRT and OVR files.
1372 | """
1373 | cursor = conn.cursor()
1374 | cursor.execute(
1375 | """UPDATE vrt_subregion
1376 | SET res_2_vrt = ?, res_2_ovr = ?, res_4_vrt = ?,
1377 | res_4_ovr = ?, res_8_vrt = ?, res_8_ovr = ?,
1378 | complete_vrt = ?, complete_ovr = ?, built = 1
1379 | WHERE region = ?""",
1380 | (
1381 | fields["res_2_vrt"],
1382 | fields["res_2_ovr"],
1383 | fields["res_4_vrt"],
1384 | fields["res_4_ovr"],
1385 | fields["res_8_vrt"],
1386 | fields["res_8_ovr"],
1387 | fields["complete_vrt"],
1388 | fields["complete_ovr"],
1389 | fields["region"],
1390 | ),
1391 | )
1392 | conn.commit()
1393 |
1394 |
1395 | def update_utm_pmn(conn: sqlite3.Connection, fields: dict) -> None:
1396 | """
1397 | Update utm records with given path values.
1398 |
1399 | Parameters
1400 | ----------
1401 | conn : sqlite3.Connection
1402 | database connection object.
1403 | fields : dict
1404 | dictionary with the name of the UTM zone and paths for its associated
1405 | VRT and OVR files.
1406 | """
1407 | cursor = conn.cursor()
1408 | cursor.execute(
1409 | """UPDATE vrt_utm
1410 | SET
1411 | utm_subdataset1_vrt = ?, utm_subdataset1_ovr = ?,
1412 | utm_subdataset2_vrt = ?, utm_subdataset2_ovr = ?,
1413 | utm_combined_vrt = ?,
1414 | built_subdataset1 = 1,
1415 | built_subdataset2 = 1,
1416 | built_combined = 1
1417 | WHERE utm = ?""",
1418 | (
1419 | fields["utm_subdataset1_vrt"],
1420 | fields["utm_subdataset1_ovr"],
1421 | fields["utm_subdataset2_vrt"],
1422 | fields["utm_subdataset2_ovr"],
1423 | fields["utm_combined_vrt"],
1424 | fields["utm"],
1425 | ),
1426 | )
1427 | conn.commit()
1428 |
1429 |
1430 | def update_utm(conn: sqlite3.Connection, fields: dict) -> None:
1431 | """
1432 | Update utm records with given path values.
1433 |
1434 | Parameters
1435 | ----------
1436 | conn : sqlite3.Connection
1437 | database connection object.
1438 | fields : dict
1439 | dictionary with the name of the UTM zone and paths for its associated
1440 | VRT and OVR files.
1441 | """
1442 | cursor = conn.cursor()
1443 | cursor.execute(
1444 | """UPDATE vrt_utm
1445 | SET utm_vrt = ?, utm_ovr = ?, built = 1
1446 | WHERE utm = ?""",
1447 | (
1448 | fields["utm_vrt"],
1449 | fields["utm_ovr"],
1450 | fields["utm"],
1451 | ),
1452 | )
1453 | conn.commit()
1454 |
1455 |
1456 | def missing_subregions_pmn(project_dir: str, conn: sqlite3.Connection) -> int:
1457 | """
1458 | Confirm built subregions's associated VRT and OVR files exists.
1459 | If the files do not exist, then change the subregion record to unbuilt.
1460 |
1461 | Parameters
1462 | ----------
1463 | project_dir
1464 | destination directory for project.
1465 | conn : sqlite3.Connection
1466 | database connection object.
1467 |
1468 | Returns
1469 | -------
1470 | missing_subregion_count : int
1471 | count of subregions with missing files.
1472 | """
1473 | cursor = conn.cursor()
1474 | cursor.execute("SELECT * FROM vrt_subregion WHERE built_subdataset1 = 1 or built_subdataset2 = 1")
1475 | subregions = [dict(row) for row in cursor.fetchall()]
1476 | missing_subregion_count = 0
1477 | # todo comparison against tiles table to know res vrts exist where expected
1478 | for s in subregions:
1479 | if (
1480 | (s["res_2_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset1_vrt"])))
1481 | or (s["res_2_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset1_ovr"])))
1482 | or (s["res_2_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset2_vrt"])))
1483 | or (s["res_2_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_subdataset2_ovr"])))
1484 | or (s["res_4_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset1_vrt"])))
1485 | or (s["res_4_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset1_ovr"])))
1486 | or (s["res_4_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset2_vrt"])))
1487 | or (s["res_4_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_subdataset2_ovr"])))
1488 | or (s["res_8_subdataset1_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset1_vrt"])))
1489 | or (s["res_8_subdataset1_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset1_ovr"])))
1490 | or (s["res_8_subdataset2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset2_vrt"])))
1491 | or (s["res_8_subdataset2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_subdataset2_ovr"])))
1492 | or (s["complete_subdataset1_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset1_vrt"])))
1493 | or (s["complete_subdataset1_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset1_ovr"])))
1494 | or (s["complete_subdataset2_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset2_vrt"])))
1495 | or (s["complete_subdataset2_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_subdataset2_ovr"])))
1496 | ):
1497 | missing_subregion_count += 1
1498 | cursor.execute(
1499 | """UPDATE vrt_subregion
1500 | SET res_2_subdataset1_vrt = ?, res_2_subdataset1_ovr = ?,
1501 | res_2_subdataset2_vrt = ?, res_2_subdataset2_ovr = ?,
1502 |
1503 | res_4_subdataset1_vrt = ?, res_4_subdataset1_ovr = ?,
1504 | res_4_subdataset2_vrt = ?, res_4_subdataset2_ovr = ?,
1505 |
1506 | res_8_subdataset1_vrt = ?, res_8_subdataset1_ovr = ?,
1507 | res_8_subdataset2_vrt = ?, res_8_subdataset2_ovr = ?,
1508 |
1509 | complete_subdataset1_vrt = ?, complete_subdataset1_ovr = ?,
1510 | complete_subdataset2_vrt = ?, complete_subdataset2_ovr = ?,
1511 |
1512 | built_subdataset1 = 0,
1513 | built_subdataset2 = 0
1514 |
1515 | WHERE region = ?""",
1516 | (
1517 | None,
1518 | None,
1519 | None,
1520 | None,
1521 | None,
1522 | None,
1523 | None,
1524 | None,
1525 | None,
1526 | None,
1527 | None,
1528 | None,
1529 | None,
1530 | None,
1531 | None,
1532 | None,
1533 | s["region"],
1534 | ),
1535 | )
1536 | cursor.execute(
1537 | """UPDATE vrt_utm
1538 | SET utm_subdataset1_vrt = ?, utm_subdataset1_ovr = ?,
1539 | utm_subdataset2_vrt = ?, utm_subdataset2_ovr = ?,
1540 |
1541 | utm_combined_vrt = ?,
1542 |
1543 | built_subdataset1 = 0,
1544 | built_subdataset2 = 0,
1545 | built_combined = 0
1546 |
1547 | WHERE utm = ?""",
1548 | (
1549 | None,
1550 | None,
1551 | None,
1552 | None,
1553 | None,
1554 | s["utm"],
1555 | ),
1556 | )
1557 | conn.commit()
1558 | return missing_subregion_count
1559 |
1560 |
1561 |
1562 | def missing_subregions(project_dir: str, conn: sqlite3.Connection) -> int:
1563 | """
1564 | Confirm built subregions's associated VRT and OVR files exists.
1565 | If the files do not exist, then change the subregion record to unbuilt.
1566 |
1567 | Parameters
1568 | ----------
1569 | project_dir
1570 | destination directory for project.
1571 | conn : sqlite3.Connection
1572 | database connection object.
1573 |
1574 | Returns
1575 | -------
1576 | missing_subregion_count : int
1577 | count of subregions with missing files.
1578 | """
1579 | cursor = conn.cursor()
1580 | cursor.execute("SELECT * FROM vrt_subregion WHERE built = 1")
1581 | subregions = [dict(row) for row in cursor.fetchall()]
1582 | missing_subregion_count = 0
1583 | # todo comparison against tiles table to know res vrts exist where expected
1584 | for s in subregions:
1585 | if (
1586 | (s["res_2_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_2_vrt"])))
1587 | or (s["res_2_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_2_ovr"])))
1588 | or (s["res_4_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_4_vrt"])))
1589 | or (s["res_4_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_4_ovr"])))
1590 | or (s["res_8_vrt"] and not os.path.isfile(os.path.join(project_dir, s["res_8_vrt"])))
1591 | or (s["res_8_ovr"] and not os.path.isfile(os.path.join(project_dir, s["res_8_ovr"])))
1592 | or (s["complete_vrt"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_vrt"])))
1593 | or (s["complete_ovr"] is None or not os.path.isfile(os.path.join(project_dir, s["complete_ovr"])))
1594 | ):
1595 | missing_subregion_count += 1
1596 | cursor.execute(
1597 | """UPDATE vrt_subregion
1598 | SET res_2_vrt = ?, res_2_ovr = ?, res_4_vrt = ?,
1599 | res_4_ovr = ?, res_8_vrt = ?, res_8_ovr = ?,
1600 | complete_vrt = ?, complete_ovr = ?, built = 0
1601 | WHERE region = ?""",
1602 | (
1603 | None,
1604 | None,
1605 | None,
1606 | None,
1607 | None,
1608 | None,
1609 | None,
1610 | None,
1611 | s["region"],
1612 | ),
1613 | )
1614 | cursor.execute(
1615 | """UPDATE vrt_utm
1616 | SET utm_vrt = ?, utm_ovr = ?, built = 0
1617 | WHERE utm = ?""",
1618 | (
1619 | None,
1620 | None,
1621 | s["utm"],
1622 | ),
1623 | )
1624 | conn.commit()
1625 | return missing_subregion_count
1626 |
1627 |
1628 | def missing_utms_pmn(project_dir: str, conn: sqlite3.Connection) -> int:
1629 | """
1630 | Confirm built utm's associated VRT and OVR files exists.
1631 | If the files do not exist, then change the utm record to unbuilt.
1632 |
1633 | Parameters
1634 | ----------
1635 | project_dir
1636 | destination directory for project.
1637 | conn : sqlite3.Connection
1638 | database connection object.
1639 |
1640 | Returns
1641 | -------
1642 | missing_utm_count : int
1643 | count of UTM zones with missing files.
1644 | """
1645 | cursor = conn.cursor()
1646 | cursor.execute("SELECT * FROM vrt_utm WHERE built_subdataset1 = 1 or built_subdataset2 = 1")
1647 | utms = [dict(row) for row in cursor.fetchall()]
1648 | missing_utm_count = 0
1649 | for utm in utms:
1650 | if (utm["utm_subdataset1_vrt"] is None or utm["utm_subdataset1_ovr"] is None
1651 | or utm["utm_subdataset2_vrt"] is None or utm["utm_subdataset2_ovr"] is None
1652 | or utm["utm_combined_vrt"] is None
1653 | or os.path.isfile(os.path.join(project_dir, utm["utm_subdataset1_vrt"])) == False
1654 | or os.path.isfile(os.path.join(project_dir, utm["utm_subdataset1_ovr"])) == False
1655 | or os.path.isfile(os.path.join(project_dir, utm["utm_subdataset2_vrt"])) == False
1656 | or os.path.isfile(os.path.join(project_dir, utm["utm_subdataset2_ovr"])) == False
1657 | or os.path.isfile(os.path.join(project_dir, utm["utm_combined_vrt"])) == False):
1658 | missing_utm_count += 1
1659 | cursor.execute(
1660 | """UPDATE vrt_utm
1661 | SET
1662 | utm_subdataset1_vrt = ?, utm_subdataset1_ovr = ?,
1663 | utm_subdataset2_vrt = ?, utm_subdataset2_ovr = ?,
1664 | utm_combined_vrt = ?,
1665 | built_subdataset1 = 0,
1666 | built_subdataset2 = 0,
1667 | built_combined = 0
1668 | WHERE utm = ?""",
1669 | (
1670 | None,
1671 | None,
1672 | None,
1673 | None,
1674 | None,
1675 | utm["utm"],
1676 | ),
1677 | )
1678 | conn.commit()
1679 | return missing_utm_count
1680 |
1681 |
1682 | def missing_utms(project_dir: str, conn: sqlite3.Connection) -> int:
1683 | """
1684 | Confirm built utm's associated VRT and OVR files exists.
1685 | If the files do not exist, then change the utm record to unbuilt.
1686 |
1687 | Parameters
1688 | ----------
1689 | project_dir
1690 | destination directory for project.
1691 | conn : sqlite3.Connection
1692 | database connection object.
1693 |
1694 | Returns
1695 | -------
1696 | missing_utm_count : int
1697 | count of UTM zones with missing files.
1698 | """
1699 | cursor = conn.cursor()
1700 | cursor.execute("SELECT * FROM vrt_utm WHERE built = 1")
1701 | utms = [dict(row) for row in cursor.fetchall()]
1702 | missing_utm_count = 0
1703 | for utm in utms:
1704 | if utm["utm_vrt"] is None or utm["utm_ovr"] is None or os.path.isfile(os.path.join(project_dir, utm["utm_vrt"])) == False or os.path.isfile(os.path.join(project_dir, utm["utm_ovr"])) == False:
1705 | missing_utm_count += 1
1706 | cursor.execute(
1707 | """UPDATE vrt_utm
1708 | SET utm_vrt = ?, utm_ovr = ?, built = 0
1709 | WHERE utm = ?""",
1710 | (
1711 | None,
1712 | None,
1713 | utm["utm"],
1714 | ),
1715 | )
1716 | conn.commit()
1717 | return missing_utm_count
1718 |
1719 |
1720 | def main(project_dir: str, data_source: str = None, relative_to_vrt: bool = True) -> None:
1721 | """
1722 | Build a gdal VRT for all available tiles.
1723 | This VRT is a collection of smaller areas described as VRTs.
1724 | Nominally 2 meter, 4 meter, and 8 meter data are collected with overviews.
1725 | These data are then added to 16 meter data for the subregion.
1726 | The subregions are then collected into a UTM zone VRT where higher level
1727 | overviews are made.
1728 |
1729 | Parameters
1730 | ----------
1731 | project_dir
1732 | The directory path to use. Will create if it does not currently exist.
1733 | Required argument.
1734 | data_source : str
1735 | The NBS offers various products to different end-users. Some are available publicly.
1736 | Use this argument to identify which product you want. BlueTopo is the default.
1737 | relative_to_vrt : bool
1738 | Use this argument to set paths of referenced files inside the VRT as relative or absolute paths.
1739 |
1740 | """
1741 | project_dir = os.path.expanduser(project_dir)
1742 | if os.path.isabs(project_dir) is False:
1743 | print("Please use an absolute path for your project folder.")
1744 | if "windows" not in platform.system().lower():
1745 | print("Typically for non windows systems this means starting with '/'")
1746 | sys.exit(1)
1747 |
1748 | if int(gdal.VersionInfo()) < 3040000:
1749 | raise RuntimeError("Please update GDAL to >=3.4 to run build_vrt. \n" "Some users have encountered issues with " "conda's installation of GDAL 3.4. " "Try more recent versions of GDAL if you also " "encounter issues in your conda environment.")
1750 |
1751 | if data_source is None or data_source.lower() == "bluetopo":
1752 | data_source = "BlueTopo"
1753 |
1754 | elif data_source.lower() == "modeling":
1755 | data_source = "Modeling"
1756 |
1757 | elif data_source.lower() == "bag":
1758 | data_source = "BAG"
1759 |
1760 | elif data_source.lower() == "s102v21":
1761 | if int(gdal.VersionInfo()) < 3090000:
1762 | raise RuntimeError("Please update GDAL to >=3.8 to run build_vrt for S102V22.")
1763 | data_source = "S102V21"
1764 |
1765 | elif data_source.lower() == "s102v22":
1766 | if int(gdal.VersionInfo()) < 3090000:
1767 | raise RuntimeError("Please update GDAL to >=3.8 to run build_vrt for S102V22.")
1768 | data_source = "S102V22"
1769 |
1770 | elif os.path.isdir(data_source):
1771 | files = os.listdir(data_source)
1772 | files = [file for file in files if file.endswith(".gpkg") and "Tile_Scheme" in file]
1773 | files.sort(reverse=True)
1774 | data_source = None
1775 | for file in files:
1776 | ds_basefile = os.path.basename(file)
1777 | data_source = ds_basefile.split("_")[0]
1778 | break
1779 | if data_source is None:
1780 | raise ValueError(f"Please pass in directory which contains a tile scheme file if you're using a local data source.")
1781 |
1782 | if not os.path.isdir(project_dir):
1783 | raise ValueError(f"Folder path not found: {project_dir}")
1784 |
1785 | if not os.path.isfile(os.path.join(project_dir, f"{data_source.lower()}_registry.db")):
1786 | raise ValueError(f"SQLite database not found. Confirm correct folder. " "Note: fetch_tiles must be run at least once prior " "to build_vrt")
1787 |
1788 | if not os.path.isdir(os.path.join(project_dir, data_source)):
1789 | raise ValueError(f"Tile downloads folder not found for {data_source}. Confirm correct folder. " "Note: fetch_tiles must be run at least once prior " "to build_vrt")
1790 |
1791 | start = datetime.datetime.now()
1792 | print(f"[{start.strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Beginning work in project folder: {project_dir}\n")
1793 | if data_source.lower() in ("bag", "s102v21"):
1794 | conn = connect_to_survey_registry_pmn1(project_dir, data_source)
1795 | elif data_source.lower() in ("s102v22"):
1796 | conn = connect_to_survey_registry_pmn2(project_dir, data_source)
1797 | else:
1798 | conn = connect_to_survey_registry(project_dir, data_source)
1799 | # subregions missing files
1800 | if data_source.lower() in ("s102v22"):
1801 | missing_subregion_count = missing_subregions_pmn(project_dir, conn)
1802 | else:
1803 | missing_subregion_count = missing_subregions(project_dir, conn)
1804 |
1805 | if missing_subregion_count:
1806 | print(f"{missing_subregion_count} subregion vrts files missing. " "Added to build list.")
1807 |
1808 | # build subregion vrts
1809 | if data_source.lower() not in ("s102v22"):
1810 | unbuilt_subregions = select_unbuilt_subregions(conn)
1811 | if len(unbuilt_subregions) > 0:
1812 | print(f"Building {len(unbuilt_subregions)} subregion vrt(s). This may " "take minutes or hours depending on the amount of tiles.")
1813 | for ub_sr in unbuilt_subregions:
1814 | if data_source.lower() in ("bag", "s102v21"):
1815 | sr_tiles = select_tiles_by_subregion_pmn(project_dir, conn, ub_sr["region"])
1816 | else:
1817 | sr_tiles = select_tiles_by_subregion(project_dir, conn, ub_sr["region"])
1818 | if len(sr_tiles) < 1:
1819 | continue
1820 | if data_source.lower() in ("bag", "s102v21"):
1821 | fields = build_sub_vrts_pmn1(ub_sr, sr_tiles, project_dir, data_source, relative_to_vrt)
1822 | else:
1823 | fields = build_sub_vrts(ub_sr, sr_tiles, project_dir, data_source, relative_to_vrt)
1824 | update_subregion(conn, fields)
1825 | else:
1826 | print("Subregion vrt(s) appear up to date with the most recently " "fetched tiles.")
1827 | else:
1828 | unbuilt_subregions = select_unbuilt_subregions_pmn(conn)
1829 | if len(unbuilt_subregions) > 0:
1830 | print(f"Building {len(unbuilt_subregions)} subregion vrt(s). This may " "take minutes or hours depending on the amount of tiles.")
1831 | for ub_sr in unbuilt_subregions:
1832 | sr_tiles = select_tiles_by_subregion_pmn(project_dir, conn, ub_sr["region"])
1833 | if len(sr_tiles) < 1:
1834 | continue
1835 | fields = build_sub_vrts_pmn(ub_sr, sr_tiles, project_dir, data_source, relative_to_vrt)
1836 | update_subregion_pmn(conn, fields)
1837 | else:
1838 | print("Subregion vrt(s) appear up to date with the most recently " "fetched tiles.")
1839 |
1840 | # utms missing files
1841 | if data_source.lower() in ("s102v22"):
1842 | missing_utm_count = missing_utms_pmn(project_dir, conn)
1843 | if missing_utm_count:
1844 | print(f"{missing_utm_count} utm vrts files missing. Added to build list.")
1845 | else:
1846 | missing_utm_count = missing_utms(project_dir, conn)
1847 | if missing_utm_count:
1848 | print(f"{missing_utm_count} utm vrts files missing. Added to build list.")
1849 |
1850 | # build utm vrts
1851 | if data_source.lower() not in ("s102v22"):
1852 | unbuilt_utms = select_unbuilt_utms(conn)
1853 | if len(unbuilt_utms) > 0:
1854 | print(f"Building {len(unbuilt_utms)} utm vrt(s). This may take minutes " "or hours depending on the amount of tiles.")
1855 | for ub_utm in unbuilt_utms:
1856 | utm_start = datetime.datetime.now()
1857 | subregions = select_subregions_by_utm(project_dir, conn, ub_utm["utm"])
1858 | vrt_list = [os.path.join(project_dir, subregion["complete_vrt"]) for subregion in subregions]
1859 | if len(vrt_list) < 1:
1860 | continue
1861 | rel_path = os.path.join(f"{data_source}_VRT", f"{data_source}_Fetched_UTM{ub_utm['utm']}.vrt")
1862 | utm_vrt = os.path.join(project_dir, rel_path)
1863 | print(f"Building utm{ub_utm['utm']}...")
1864 | if data_source.lower() in ("bag", "s102v21"):
1865 | create_vrt_pmn1(vrt_list, utm_vrt, [32, 64], relative_to_vrt)
1866 | else:
1867 | create_vrt(vrt_list, utm_vrt, [32, 64], relative_to_vrt)
1868 | add_vrt_rat(conn, ub_utm["utm"], project_dir, utm_vrt, data_source)
1869 | fields = {"utm_vrt": rel_path, "utm_ovr": None, "utm": ub_utm["utm"]}
1870 | if os.path.isfile(os.path.join(project_dir, rel_path + ".ovr")):
1871 | fields["utm_ovr"] = rel_path + ".ovr"
1872 | else:
1873 | raise RuntimeError("Overview failed to create for " f"utm{ub_utm['utm']}. Please try again. " "If error persists, please contact NBS.")
1874 | update_utm(conn, fields)
1875 | print(f"utm{ub_utm['utm']} complete after {datetime.datetime.now() - utm_start}")
1876 | else:
1877 | print("UTM vrt(s) appear up to date with the most recently " f"fetched tiles.\nNote: deleting the {data_source}_VRT folder will " "allow you to recreate from scratch if necessary")
1878 | else:
1879 | unbuilt_utms = select_unbuilt_utms_pmn(conn)
1880 | if len(unbuilt_utms) > 0:
1881 | print(f"Building {len(unbuilt_utms)} utm vrt(s). This may take minutes " "or hours depending on the amount of tiles.")
1882 | for ub_utm in unbuilt_utms:
1883 | utm_start = datetime.datetime.now()
1884 | subregions = select_subregions_by_utm_pmn(project_dir, conn, ub_utm["utm"])
1885 | vrt_subdataset1_list = [os.path.join(project_dir, subregion["complete_subdataset1_vrt"]) for subregion in subregions]
1886 | vrt_subdataset2_list = [os.path.join(project_dir, subregion["complete_subdataset2_vrt"]) for subregion in subregions]
1887 | if len(vrt_subdataset1_list) < 1 or len(vrt_subdataset2_list) < 1:
1888 | continue
1889 |
1890 | rel_subdataset1_path = os.path.join(f"{data_source}_VRT", f"{data_source}_Fetched_UTM{ub_utm['utm']}_BathymetryCoverage.vrt")
1891 | utm_subdataset1_vrt = os.path.join(project_dir, rel_subdataset1_path)
1892 |
1893 | rel_subdataset2_path = os.path.join(f"{data_source}_VRT", f"{data_source}_Fetched_UTM{ub_utm['utm']}_QualityOfSurvey.vrt")
1894 | utm_subdataset2_vrt = os.path.join(project_dir, rel_subdataset2_path)
1895 |
1896 | rel_combined_path = os.path.join(f"{data_source}_VRT", f"{data_source}_Fetched_UTM{ub_utm['utm']}.vrt")
1897 | utm_combined_vrt = os.path.join(project_dir, rel_combined_path)
1898 |
1899 | print(f"Building utm{ub_utm['utm']}...")
1900 | create_vrt_pmn(vrt_subdataset1_list, utm_subdataset1_vrt, [32, 64], relative_to_vrt, subdataset = 1)
1901 |
1902 | create_vrt_pmn(vrt_subdataset2_list, utm_subdataset2_vrt, [32, 64], relative_to_vrt, subdataset = 2)
1903 |
1904 | fields = {"utm_subdataset1_vrt": rel_subdataset1_path,
1905 | "utm_subdataset2_vrt": rel_subdataset2_path,
1906 | "utm_subdataset1_ovr": None,
1907 | "utm_subdataset2_ovr": None,
1908 | "utm_combined_vrt": utm_combined_vrt,
1909 | "utm": ub_utm["utm"]}
1910 |
1911 | if os.path.isfile(os.path.join(project_dir, rel_subdataset1_path + ".ovr")):
1912 | fields["utm_subdataset1_ovr"] = rel_subdataset1_path + ".ovr"
1913 | else:
1914 | raise RuntimeError("Overview failed to create for " f"utm{ub_utm['utm']}. Please try again. " "If error persists, please contact NBS.")
1915 |
1916 | if os.path.isfile(os.path.join(project_dir, rel_subdataset2_path + ".ovr")):
1917 | fields["utm_subdataset2_ovr"] = rel_subdataset2_path + ".ovr"
1918 | else:
1919 | raise RuntimeError("Overview failed to create for " f"utm{ub_utm['utm']}. Please try again. " "If error persists, please contact NBS.")
1920 |
1921 | combine_vrts([utm_subdataset1_vrt, utm_subdataset2_vrt], utm_combined_vrt, relative_to_vrt)
1922 |
1923 | if data_source.lower() not in ("bag", "s102v21"):
1924 | if data_source.lower() in ('s102v22'):
1925 | add_vrt_rat_pmn(conn, ub_utm["utm"], project_dir, utm_combined_vrt, data_source)
1926 | else:
1927 | add_vrt_rat(conn, ub_utm["utm"], project_dir, utm_combined_vrt, data_source)
1928 |
1929 | update_utm_pmn(conn, fields)
1930 | print(f"utm{ub_utm['utm']} complete after {datetime.datetime.now() - utm_start}")
1931 |
1932 | else:
1933 | print("UTM vrt(s) appear up to date with the most recently " f"fetched tiles.\nNote: deleting the {data_source}_VRT folder will " "allow you to recreate from scratch if necessary")
1934 |
1935 | print(f"[{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} {datetime.datetime.now().astimezone().tzname()}] {data_source}: Operation complete after {datetime.datetime.now() - start}")
--------------------------------------------------------------------------------