├── .gitignore
├── .spyproject
├── codestyle.ini
├── encoding.ini
├── vcs.ini
└── workspace.ini
├── LICENSE
├── README.md
├── images
├── compVizApp.png
└── new
├── notebooks
├── UNET_G4G_2019_Parking.ipynb
└── UNET_G4G_2019_solar.ipynb
└── utils
├── array_tools.py
├── calibration.py
├── ee_tools.py
├── model_tools.py
├── pc_tools.py
├── prediction_tools.py
├── processing.py
├── raster_tools.py
└── stats.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .httr-oauth
2 |
3 | # Byte-compiled / optimized / DLL files
4 | *__pycache__/
5 | *.py[cod]
6 | *$py.class
7 | *.pyc
8 |
9 | # Data directories
10 | data/
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Azure stuff
16 | *.amlignore
17 | *.amltmp
18 | .ipynb_aml_checkpoints
19 |
20 | # Distribution / packaging
21 | .Python
22 | build/
23 | develop-eggs/
24 | dist/
25 | downloads/
26 | eggs/
27 | .eggs/
28 | lib/
29 | lib64/
30 | parts/
31 | sdist/
32 | var/
33 | wheels/
34 | pip-wheel-metadata/
35 | share/python-wheels/
36 | *.egg-info/
37 | .installed.cfg
38 | *.egg
39 | MANIFEST
40 |
41 | # PyInstaller
42 | # Usually these files are written by a python script from a template
43 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
44 | *.manifest
45 | *.spec
46 |
47 | # Installer logs
48 | pip-log.txt
49 | pip-delete-this-directory.txt
50 |
51 | # Unit test / coverage reports
52 | htmlcov/
53 | .tox/
54 | .nox/
55 | .coverage
56 | .coverage.*
57 | .cache
58 | nosetests.xml
59 | coverage.xml
60 | *.cover
61 | *.py,cover
62 | .hypothesis/
63 | .pytest_cache/
64 |
65 | # Translations
66 | *.mo
67 | *.pot
68 |
69 | # Django stuff:
70 | *.log
71 | local_settings.py
72 | db.sqlite3
73 | db.sqlite3-journal
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | target/
87 |
88 | # Jupyter Notebook
89 | .ipynb_checkpoints
90 |
91 | # IPython
92 | profile_default/
93 | ipython_config.py
94 |
95 | # pyenv
96 | .python-version
97 |
98 | # pipenv
99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | # install all needed dependencies.
103 | #Pipfile.lock
104 |
105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
106 | __pypackages__/
107 |
108 | # Celery stuff
109 | celerybeat-schedule
110 | celerybeat.pid
111 |
112 | # SageMath parsed files
113 | *.sage.py
114 |
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 |
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 |
128 | # Rope project settings
129 | .ropeproject
130 |
131 | # mkdocs documentation
132 | /site
133 |
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 |
139 | # Pyre type checker
140 | .pyre
141 |
--------------------------------------------------------------------------------
/.spyproject/codestyle.ini:
--------------------------------------------------------------------------------
1 | [codestyle]
2 | indentation = True
3 |
4 | [main]
5 | version = 0.1.0
6 |
7 |
--------------------------------------------------------------------------------
/.spyproject/encoding.ini:
--------------------------------------------------------------------------------
1 | [encoding]
2 | text_encoding = utf-8
3 |
4 | [main]
5 | version = 0.1.0
6 |
7 |
--------------------------------------------------------------------------------
/.spyproject/vcs.ini:
--------------------------------------------------------------------------------
1 | [vcs]
2 | use_version_control = False
3 | version_control_system =
4 |
5 | [main]
6 | version = 0.1.0
7 |
8 |
--------------------------------------------------------------------------------
/.spyproject/workspace.ini:
--------------------------------------------------------------------------------
1 | [workspace]
2 | restore_data_on_startup = True
3 | save_data_on_exit = True
4 | save_history = True
5 | save_non_project_files = False
6 |
7 | [main]
8 | version = 0.1.0
9 | recent_files = ['C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\utils\\prediction_tools.py', 'C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\utils\\model_tools.py', 'C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\utils\\processing.py', 'C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\azure\\train_wetland.py', 'C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\azure\\train_landcover.py']
10 |
11 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | BSD 2-Clause License
2 |
3 | Copyright (c) 2019, mjevans26
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Computer Vision with Free Satellite Data
2 | This repository contains code used to produce computer vision models that can identify infrastructure in publicly available satellite imagery.
3 |
4 | ## Organization
5 | The bulk of useful code in this repository is contained in the 'utils' directory. These python files are modules that can be used. They are organized, generally, by the imports they rely on and the kinds of functions they contain. For instance, utils/pc_tools.py imports the planetary computer ecosystem of packages and contains functions and classes for working with data from the MPC. Similarly, model_tools imports tensorflow and keras libraries and contains functions and classes for constructing and training deep learning models using these libraries.
6 |
7 | ## Parking lots
8 | As part of the [Long Island Solar Roadmap](https://solarroadmap.org), we are testing the ability for computer vision models to automate the detection and delineation of parking lots in NAIP satellite imagery. This analysis uses the Deeplab v3 model with a pre-trained ResNet backbone.
9 |
10 | ## Solar arrays
11 | Ground mounted solar arrays are prominent features on the landscape, and their proliferation can be hard to keep up with. The Chesapeake Conservancy trained a computer vision model to detect and delineate solar arrays from Sentinel-2 data. This UNET model can be used to rapidly update the map of solar energy in DE, MD, PA, NY, VA, WV and other eastern states. These outputs were recently published in a [Biological Conservation](https://www.sciencedirect.com/science/article/pii/S0006320723001751) paper.
12 |
13 | ### App
14 | The outputs are available for inspection interactively through a [Google Earth Engine App]([https://defendersofwildlifegis.users.earthengine.app/view/compviz](https://mevans-cic.users.earthengine.app/view/cpksolar)https://mevans-cic.users.earthengine.app/view/cpksolar)
15 | 
16 |
--------------------------------------------------------------------------------
/images/compVizApp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mjevans26/Satellite_ComputerVision/9753cedf4403a529503e4bfea3f6f3b9ee68f740/images/compVizApp.png
--------------------------------------------------------------------------------
/images/new:
--------------------------------------------------------------------------------
1 | k
2 |
--------------------------------------------------------------------------------
/notebooks/UNET_G4G_2019_Parking.ipynb:
--------------------------------------------------------------------------------
1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"UNET_G4G_2019_Parking.ipynb","provenance":[],"private_outputs":true,"collapsed_sections":[],"toc_visible":true,"machine_shape":"hm"},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"view-in-github","colab_type":"text"},"source":["
"]},{"cell_type":"code","metadata":{"id":"esIMGVxhDI0f","colab_type":"code","colab":{}},"source":["#@title Copyright 2019 Google LLC. { display-mode: \"form\" }\n","# Licensed under the Apache License, Version 2.0 (the \"License\");\n","# you may not use this file except in compliance with the License.\n","# You may obtain a copy of the License at\n","#\n","# https://www.apache.org/licenses/LICENSE-2.0\n","#\n","# Unless required by applicable law or agreed to in writing, software\n","# distributed under the License is distributed on an \"AS IS\" BASIS,\n","# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n","# See the License for the specific language governing permissions and\n","# limitations under the License."],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"_SHAc5qbiR8l","colab_type":"text"},"source":["# Introduction\n","\n","This is a Google Colab notebook demonstrating the process used to export training, evaluation, and prediction data from Google Earth Engine used to develop a [Deeplab V3](https://arxiv.org/abs/1706.05587) convolutional neural network that delineates ground mounted solar arrays in [NAIP](https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/). Model training and predictions are accomplished in a separate notebook."]},{"cell_type":"markdown","metadata":{"id":"_MJ4kW1pEhwP","colab_type":"text"},"source":["# Setup software libraries\n","\n","Install needed libraries to the notebook VM. Authenticate as necessary."]},{"cell_type":"code","metadata":{"id":"neIa46CpciXq","colab_type":"code","colab":{}},"source":["# Cloud authentication.\n","from google.colab import auth\n","auth.authenticate_user()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"4D6ArFWrckmS","colab_type":"code","colab":{}},"source":["# Earth Engine install to notebook VM.\n","!pip install earthengine-api"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"jat01FEoUMqg","colab_type":"code","colab":{}},"source":["# Import, authenticate and initialize the Earth Engine library.\n","import ee\n","ee.Authenticate()\n","ee.Initialize()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"8RnZzcYhcpsQ","colab_type":"code","colab":{}},"source":["# Tensorflow setup.\n","import tensorflow as tf\n","\n","tf.enable_eager_execution()\n","print(tf.__version__)\n","\n","%load_ext tensorboard"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"n1hFdpBQfyhN","colab_type":"code","colab":{}},"source":["# Folium setup.\n","import folium\n","print(folium.__version__)\n","\n","# Define the URL format used for Earth Engine generated map tiles.\n","EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"WjUgYcsAs9Ed","colab_type":"text"},"source":["##Mount Google Drive"]},{"cell_type":"code","metadata":{"id":"JKDKpX4FtQA1","colab_type":"code","colab":{}},"source":["# Attach specified google drive directory to this notebook\n","from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"M6pVAfdDIJ-a","colab_type":"code","colab":{}},"source":["%cd '/content/drive/My Drive/repos/Satellite_ComputerVision'"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"iT8ycmzClYwf","colab_type":"text"},"source":["# Variables\n","\n","Declare the variables that will be in use throughout the notebook."]},{"cell_type":"markdown","metadata":{"id":"qKs6HuxOzjMl","colab_type":"text"},"source":["## Specify your Cloud Storage Bucket\n","You must have write access to a bucket to run this demo! To run it read-only, use the demo bucket below, but note that writes to this bucket will not work."]},{"cell_type":"code","metadata":{"id":"obDDH1eDzsch","colab_type":"code","colab":{}},"source":["# This is read-only:\n","BUCKET = 'cvod-203614-mlengine'"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wmfKLl9XcnGJ","colab_type":"text"},"source":["## Set other global variables"]},{"cell_type":"code","metadata":{"id":"psz7wJKalaoj","colab_type":"code","colab":{}},"source":["# Specify names locations for outputs in Cloud Storage. \n","FOLDER = 'LI_parking'\n","PRED_BASE = 'data/predict'\n","TRAINING_BASE = 'data/training'\n","EVAL_BASE = 'data/eval'\n","MODEL_BASE = 'models'\n","log_dir = 'drive/My Drive/Tensorflow/models/UNET256'\n","\n","# Specify inputs (Landsat bands) to the model and the response variable.\n","opticalBands = ['R', 'G', 'B']\n","thermalBands = ['B8', 'B11', 'B12']\n","pcaBands = ['pc1', 'pc2', 'pc3']\n","BANDS = opticalBands# + thermalBands# + pcaBands\n","RESPONSE = 'landcover'\n","FEATURES = BANDS + [RESPONSE]\n","\n","# Specify the size and shape of patches expected by the model.\n","KERNEL_SIZE = 512\n","KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n","COLUMNS = [\n"," tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n","]\n","FEATURES_DICT = dict(zip(FEATURES, COLUMNS))\n","\n","# Sizes of the training and evaluation datasets.\n","TRAIN_SIZE = 8000\n","EVAL_SIZE = 5000\n","\n","# Specify model training parameters.\n","BATCH_SIZE = 16\n","EPOCHS = 20\n","BUFFER_SIZE = 8000\n","OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)\n","LOSS = 'binary_crossentropy'\n","METRICS = [tf.keras.metrics.categorical_accuracy, tf.keras.metrics.MeanIoU(num_classes=2)]"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hgoDc7Hilfc4","colab_type":"text"},"source":["# Imagery\n","\n","Gather and setup the imagery to use for inputs (predictors). This is a three-year, cloud-free, Landsat 8 composite. Display it in the notebook for a sanity check."]},{"cell_type":"code","metadata":{"id":"-IlgXu-vcUEY","colab_type":"code","colab":{}},"source":["# Use Landsat 8 surface reflectance data.\n","NAIP = ee.ImageCollection(\"USDA/NAIP/DOQQ\")\n","towns = ee.FeatureCollection(\"users/defendersofwildlifeGIS/LongIsland/towns\")\n","\n","begin = '2017-01-01'\n","end = '2017-12-30'\n","\n","# The image input data is a cloud-masked median composite.\n","image = NAIP.filterDate(begin, end)\\\n",".filterBounds(towns)\\\n",".filterDate(begin, end)\\\n",".median()\\\n",".select(BANDS)\\\n",".clip(towns)\n","\n","# Use folium to visualize the imagery.\n","mapid = image.getMapId({'bands': ['R', 'G', 'B'], 'min': 0, 'max': 256})\n","map = folium.Map(location=[40.8175, -73.195])\n","folium.TileLayer(\n"," tiles=EE_TILES.format(**mapid),\n"," attr='Google Earth Engine',\n"," overlay=True,\n"," name='median composite',\n"," ).add_to(map)\n","\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"gHznnctkJsZJ","colab_type":"text"},"source":["Prepare the response (what we want to predict). This is impervious surface area (in fraction of a pixel) from the 2016 NLCD dataset. Display to check."]},{"cell_type":"code","metadata":{"id":"5Wxz9BPYHBwh","colab_type":"code","colab":{}},"source":["def set_landcover(ft):\n"," return ft.set('label', 1)\n","\n","nassauParkingFootprints = ee.FeatureCollection(\"users/defendersofwildlifeGIS/LongIsland/NassauParking\")\n","suffolkParkingFootprints = ee.FeatureCollection('users/defendersofwildlifeGIS/LongIsland/SuffolkParking')\n","parkingFootprints = nassauParkingFootprints.merge(suffolkParkingFootprints)\n","parking = parkingFootprints.map(set_landcover)\n","blankimg = ee.Image.constant(0)\n","parking_footprint = parking.reduceToImage(['label'], ee.Reducer.first())\n","labelimg = blankimg.where(parking_footprint, parking_footprint).rename('landcover')\n","\n","mapid = labelimg.getMapId({'bands': 'landcover', 'min':0, 'max': 1})\n","print(mapid)\n","map = folium.Map(location = [40.8175, -73.195])\n","folium.TileLayer(\n"," tiles = EE_TILES.format(**mapid),\n"," attr='Google Earth Engine',\n"," overlay = True,\n"," name = 'parking lots',\n",").add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CTS7_ZzPDhhg","colab_type":"text"},"source":["Stack the 2D images (Landsat composite and NLCD impervious surface) to create a single image from which samples can be taken. Convert the image into an array image in which each pixel stores 256x256 patches of pixels for each band. This is a key step that bears emphasis: to export training patches, convert a multi-band image to [an array image](https://developers.google.com/earth-engine/arrays_array_images#array-images) using [`neighborhoodToArray()`](https://developers.google.com/earth-engine/api_docs#eeimageneighborhoodtoarray), then sample the image at points."]},{"cell_type":"code","metadata":{"id":"eGHYsdAOipa4","colab_type":"code","colab":{}},"source":["featureStack = ee.Image.cat([\n"," image.select(BANDS),\n"," labelimg.select(RESPONSE)\n","]).float()\n","\n","print(featureStack.bandNames().getInfo())\n","\n","list = ee.List.repeat(1, KERNEL_SIZE)\n","lists = ee.List.repeat(list, KERNEL_SIZE)\n","kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)\n","\n","arrays = featureStack.neighborhoodToArray(kernel)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"F4djSxBRG2el","colab_type":"text"},"source":["Use some pre-made geometries to sample the stack in strategic locations. Specifically, these are hand-made polygons in which to take the 256x256 samples. Display the sampling polygons on a map, red for training polygons, blue for evaluation."]},{"cell_type":"code","metadata":{"id":"ure_WaD0itQY","colab_type":"code","colab":{}},"source":["import re\n","towns = towns.randomColumn('random', 52.0)\n","townList = ee.List(towns.aggregate_array('TOWN')).distinct()\n","townList = [townList.get(town).getInfo() for town in range(townList.size().getInfo())]\n","townList = [town for town in townList if not re.search(r\"City|Water|Indian\", town)]\n","trainList = townList[0:(len(townList)//10) * 8]\n","evalList = townList[(len(townList)//10) * 8:]\n","\n","trainFilter = ee.Filter.inList('TOWN', ee.List(trainList))\n","evalFilter = ee.Filter.inList(\"TOWN\", ee.List(evalList))\n","\n","trainingPolys = towns.filter(trainFilter)\n","print('training size', len(trainList))\n","\n","evalPolys = towns.filter(evalFilter)\n","print('eval size', len(evalList))\n","\n","polyImage = ee.Image(0).byte().paint(trainingPolys, 1).paint(evalPolys, 2)\n","polyImage = polyImage.updateMask(polyImage)\n","\n","mapid = polyImage.getMapId({'min': 1, 'max': 2, 'palette': ['red', 'blue']})\n","map = folium.Map(location=[40.8175, -73.195], zoom_start=8)\n","folium.TileLayer(\n"," tiles=EE_TILES.format(**mapid),\n"," attr='Google Earth Engine',\n"," overlay=True,\n"," name='training polygons',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ZV890gPHeZqz","colab_type":"text"},"source":["# Sampling\n","\n","The mapped data look reasonable so take a sample from each polygon and merge the results into a single export. The key step is sampling the array image at points, to get all the pixels in a 256x256 neighborhood at each point. It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record. You do NOT need to export each training/testing patch to a different image. Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error. Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export."]},{"cell_type":"code","metadata":{"id":"FyRpvwENxE-A","colab_type":"code","cellView":"code","colab":{}},"source":["#@title Don't run\n","# Convert the feature collections to lists for iteration.\n","#trainingPolysList = trainingPolys.toList(trainingPolys.size())\n","#evalPolysList = trainingPolys.toList(trainingPolys.size())\n","\n","# These numbers determined experimentally.\n","n = 100 # Number of shards in each town.\n","N = 1000 # Total sample size in each town.\n","\n","for town in trainList:\n"," geomSample = ee.FeatureCollection([])\n"," for i in range (n):\n"," sample = arrays.sample(\n"," region = trainingPolys.filterMetadata('TOWN', 'equals', town),\n"," scale = 1,\n"," numPixels = N/n,\n"," seed = i,\n"," tileScale = 8\n"," )\n"," geomSample = geomSample.merge(sample)\n"," \n"," desc = 'DeepLab_' + str(KERNEL_SIZE) + '_NAIP_' + town\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = geomSample,\n"," description = desc,\n"," bucket = BUCKET,\n"," fileNamePrefix = FOLDER + '/' + TRAINING_BASE + '/' + desc,\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()\n"," \n","for town in evalList:\n"," geomSample = ee.FeatureCollection([])\n"," for i in range(n):\n"," sample = arrays.sample(\n"," region = evalPolys.filterMetadata('TOWN', 'equals', town),\n"," scale = 1,\n"," numPixels = N/n,\n"," seed = i,\n"," tileScale = 8\n"," )\n"," geomSample = geomSample.merge(sample)\n"," \n"," desc = 'DeepLab_' + str(KERNEL_SIZE) + 'NAIP_' + town\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = geomSample,\n"," description = desc,\n"," bucket = BUCKET,\n"," fileNamePrefix = FOLDER + '/' + EVAL_BASE + '/' + desc,\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()\n"," \n","#Export all the training data (in many pieces), with one task \n","#per geometry.\n","# for g in range(trainingPolys.size().getInfo()):\n","# geomSample = ee.FeatureCollection([])\n","# for i in range(n):\n","# sample = arrays.sample(\n","# region = ee.Feature(trainingPolysList.get(g)).geometry(), \n","# scale = 30, \n","# numPixels = N / n, # Size of the shard.\n","# seed = i,\n","# tileScale = 8\n","# )\n","# geomSample = geomSample.merge(sample)\n"," \n","# desc = TRAINING_BASE + '_g' + str(g)\n","# task = ee.batch.Export.table.toCloudStorage(\n","# collection = geomSample,\n","# description = desc, \n","# bucket = BUCKET, \n","# fileNamePrefix = FOLDER + '/' + desc,\n","# fileFormat = 'TFRecord',\n","# selectors = BANDS + [RESPONSE]\n","# )\n","# task.start()\n","\n","# # Export all the evaluation data.\n","# for g in range(evalPolys.size().getInfo()):\n","# geomSample = ee.FeatureCollection([])\n","# for i in range(n):\n","# sample = arrays.sample(\n","# region = ee.Feature(evalPolysList.get(g)).geometry(), \n","# scale = 30, \n","# numPixels = N / n,\n","# seed = i,\n","# tileScale = 8\n","# )\n","# geomSample = geomSample.merge(sample)\n"," \n","# desc = EVAL_BASE + '_g' + str(g)\n","# task = ee.batch.Export.table.toCloudStorage(\n","# collection = geomSample,\n","# description = desc, \n","# bucket = BUCKET, \n","# fileNamePrefix = FOLDER + '/' + desc,\n","# fileFormat = 'TFRecord',\n","# selectors = BANDS + [RESPONSE]\n","# )\n","# task.start()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"dk51-l7MH2Sa","colab_type":"text"},"source":["##Preprocessing\n","Define functions that apply random manipulations to our training data"]},{"cell_type":"code","metadata":{"id":"ajyp48-vINuy","colab_type":"code","colab":{}},"source":["def augColor(x):\n"," \"\"\"Color augmentation\n","\n"," Args:\n"," x: Image\n","\n"," Returns:\n"," Augmented image\n"," \"\"\"\n"," x = tf.image.random_hue(x, 0.08)\n"," x = tf.image.random_saturation(x, 0.6, 1.6)\n"," x = tf.image.random_brightness(x, 0.05)\n"," x = tf.image.random_contrast(x, 0.7, 1.3)\n"," return x\n"," \n"," \n","def augImg(img):\n"," outDims = tf.shape(img)[0:1]\n"," x = tf.image.random_flip_left_right(img)\n"," x = tf.image.random_flip_up_down(x)\n"," x = rotated = tf.image.rot90(x, tf.random_uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))\n"," #x = zoom(x, outDims)\n"," #since were gonna map_fn this on a 4d image, output must be 3d, so squeeze the artificial 'sample' dimension\n"," return tf.squeeze(x)\n","\n","def preprocess(img, labels):\n"," dims = tf.shape(img)\n"," #need to combine labels and bands for morphological transformations\n"," comb = tf.concat([img, tf.expand_dims(labels, axis = 2)], axis = 2)\n"," aug = aug_img(comb)\n"," #aug = tf.map_fn(fn = aug_img, elems = comb)\n"," labels = tf.squeeze(aug[:, :, -1])\n"," band_stack = color(aug[:, :, 0:dims[2]])\n"," return band_stack, labels"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rWXrvBE4607G","colab_type":"text"},"source":["# Training data\n","\n","Load the data exported from Earth Engine into a `tf.data.Dataset`. The following are helper functions for that."]},{"cell_type":"code","metadata":{"id":"WWZ0UXCVMyJP","colab_type":"code","colab":{}},"source":["def parse_tfrecord(example_proto):\n"," \"\"\"The parsing function.\n"," Read a serialized example into the structure defined by FEATURES_DICT.\n"," Args:\n"," example_proto: a serialized Example.\n"," Returns: \n"," A dictionary of tensors, keyed by feature name.\n"," \"\"\"\n"," return tf.io.parse_single_example(example_proto, FEATURES_DICT)\n","\n","\n","def to_tuple(inputs):\n"," \"\"\"Function to convert a dictionary of tensors to a tuple of (inputs, outputs).\n"," Turn the tensors returned by parse_tfrecord into a stack in HWC shape.\n"," Args:\n"," inputs: A dictionary of tensors, keyed by feature name.\n"," Returns: \n"," A dtuple of (inputs, outputs).\n"," \"\"\"\n"," inputsList = [inputs.get(key) for key in FEATURES]\n"," stacked = tf.stack(inputsList, axis=0)\n"," # Convert from CHW to HWC\n"," stacked = tf.transpose(stacked, [1, 2, 0])\n"," stacked = augImg(stacked)\n"," return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]\n","\n","\n","def get_dataset(pattern):\n"," \"\"\"Function to read, parse and format to tuple a set of input tfrecord files.\n"," Get all the files matching the pattern, parse and convert to tuple.\n"," Args:\n"," pattern: A file pattern to match in a Cloud Storage bucket.\n"," Returns: \n"," A tf.data.Dataset\n"," \"\"\"\n"," glob = tf.gfile.Glob(pattern)\n"," dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')\n"," dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)\n"," dataset = dataset.map(to_tuple, num_parallel_calls=5)\n"," return dataset"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Xg1fa18336D2","colab_type":"text"},"source":["Use the helpers to read in the training dataset. Print the first record to check."]},{"cell_type":"code","metadata":{"id":"rm0qRF0fAYcC","colab_type":"code","colab":{}},"source":["def get_training_dataset():\n","\t\"\"\"Get the preprocessed training dataset\n"," Returns: \n"," A tf.data.Dataset of training data.\n"," \"\"\"\n","\tglob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '/*'\n","\tprint(glob)\n","\tdataset = get_dataset(glob)\n","\tdataset = dataset.shuffle(8000).batch(BATCH_SIZE).repeat()\n","\treturn dataset\n","\n","training = get_training_dataset()\n","\n","print(iter(training.take(1)).next())"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"7CRGG26bYWQZ","colab_type":"code","colab":{}},"source":["print(iter(training.take(1)).next())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"j-cQO5RL6vob","colab_type":"text"},"source":["# Evaluation data\n","\n","Now do the same thing to get an evaluation dataset. Note that unlike the training dataset, the evaluation dataset has a batch size of 1, is not repeated and is not shuffled."]},{"cell_type":"code","metadata":{"id":"ieKTCGiJ6xzo","colab_type":"code","colab":{}},"source":["def get_eval_dataset():\n","\t\"\"\"Get the preprocessed evaluation dataset\n"," Returns: \n"," A tf.data.Dataset of evaluation data.\n"," \"\"\"\n","\tglob = 'gs://' + BUCKET + '/' + FOLDER + '/' + EVAL_BASE + '/*'\n","\tprint(glob)\n","\tdataset = get_dataset(glob)\n","\tdataset = dataset.batch(1).repeat()\n","\treturn dataset\n","\n","evaluation = get_eval_dataset()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"keoalUvBbSkh","colab_type":"code","colab":{}},"source":["print(iter(evaluation.take(1)).next())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"9JIE7Yl87lgU","colab_type":"text"},"source":["# Model\n","\n","Here we use the Keras implementation of the U-Net model as found [in the TensorFlow examples](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb). The U-Net model takes 256x256 pixel patches as input and outputs per-pixel class probability, label or a continuous output. We can implement the model essentially unmodified, but will use mean squared error loss on the sigmoidal output since we are treating this as a regression problem, rather than a classification problem. Since impervious surface fraction is constrained to [0,1], with many values close to zero or one, a saturating activation function is suitable here."]},{"cell_type":"markdown","metadata":{"id":"Xh2EZyyPu84H","colab_type":"text"},"source":["##Metrics"]},{"cell_type":"code","metadata":{"id":"mISCOXUHu7G_","colab_type":"code","colab":{}},"source":["def weighted_bce(y_true, y_pred):\n"," bce = tf.nn.weighted_cross_entropy_with_logits(labels = y_true, logits = y_pred, pos_weight = 20)\n"," return tf.reduce_mean(bce)\n","\n","def iou(true, pred):\n","\n"," intersection = true * pred\n","\n"," notTrue = 1 - true\n"," union = true + (notTrue * pred)\n","\n"," return tf.reduce_sum(intersection)/tf.reduce_sum(union)\n","\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"wsnnnz56yS3l","colab_type":"code","colab":{}},"source":["from tensorflow.python.keras import layers\n","from tensorflow.python.keras import losses\n","from tensorflow.python.keras import models\n","from tensorflow.python.keras import metrics\n","from tensorflow.python.keras import optimizers\n","\n","def conv_block(input_tensor, num_filters):\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\treturn encoder\n","\n","def encoder_block(input_tensor, num_filters):\n","\tencoder = conv_block(input_tensor, num_filters)\n","\tencoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)\n","\treturn encoder_pool, encoder\n","\n","def decoder_block(input_tensor, concat_tensor, num_filters):\n","\tdecoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)\n","\tdecoder = layers.concatenate([concat_tensor, decoder], axis=-1)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\treturn decoder\n","\n","def get_model():\n","\tinputs = layers.Input(shape=[None, None, len(BANDS)]) # 256\n","\tencoder0_pool, encoder0 = encoder_block(inputs, 32) # 128\n","\tencoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) # 64\n","\tencoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) # 32\n","\tencoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) # 16\n","\tencoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) # 8\n","\tcenter = conv_block(encoder4_pool, 1024) # center\n","\tdecoder4 = decoder_block(center, encoder4, 512) # 16\n","\tdecoder3 = decoder_block(decoder4, encoder3, 256) # 32\n","\tdecoder2 = decoder_block(decoder3, encoder2, 128) # 64\n","\tdecoder1 = decoder_block(decoder2, encoder1, 64) # 128\n","\tdecoder0 = decoder_block(decoder1, encoder0, 32) # 256\n","\toutputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)\n","\n","\tmodel = models.Model(inputs=[inputs], outputs=[outputs])\n","\n","\tmodel.compile(\n","\t\toptimizer=OPTIMIZER, \n"," loss = weighted_bce,\n","\t\t#loss=losses.get(LOSS),\n","\t\tmetrics=[metrics.get(metric) for metric in METRICS])\n","\n","\treturn model\n","\n","\n","log_dir = 'drive/My Drive/Tensorflow/models/UNET256'\n","\n","checkpoint = tf.keras.callbacks.ModelCheckpoint(\n"," log_dir+'best_weights.hdf5',\n"," monitor='val_mean_io_u',\n"," verbose=1,\n"," save_best_only=True,\n"," mode='max'\n"," )"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uu_E7OTDBCoS","colab_type":"text"},"source":["# Training the model\n","\n","You train a Keras model by calling `.fit()` on it. Here we're going to train for 10 epochs, which is suitable for demonstration purposes. For production use, you probably want to optimize this parameter, for example through [hyperparamter tuning](https://cloud.google.com/ml-engine/docs/tensorflow/using-hyperparameter-tuning)."]},{"cell_type":"code","metadata":{"id":"NzzaWxOhSxBy","colab_type":"code","colab":{}},"source":["m = get_model()\n","\n","\n","m.fit(\n"," x=training, \n"," epochs=EPOCHS, \n"," steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n"," validation_data=evaluation,\n"," validation_steps=EVAL_SIZE/BATCH_SIZE,\n"," callbacks = [checkpoint]\n"," )\n","\n","m.save('drive/My Drive/Tensorflow/models/UNET256/UNET256.h5')\n","\n","#!gsutil cp best_weights.hdf5 gs://cvod-203614-mlengine/NC_solar/models/UNET256/best_weights.hdf5\n","#!gsutil cp UNET256.h5 gs://cvod-203614-mlengine/NC_solar/models/UNET256/UNET256.h5"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"U2XrwZHp66j4","colab_type":"text"},"source":["Note that the notebook VM is sometimes not heavy-duty enough to get through a whole training job, especially if you have a large buffer size or a large number of epochs. You can still use this notebook for training, but may need to set up an alternative VM ([learn more](https://research.google.com/colaboratory/local-runtimes.html)) for production use. Alternatively, you can package your code for running large training jobs on Google's AI Platform [as described here](https://cloud.google.com/ml-engine/docs/tensorflow/trainer-considerations). The following code loads a pre-trained model, which you can use for predictions right away."]},{"cell_type":"markdown","metadata":{"id":"zvIqqpNXqJSE","colab_type":"text"},"source":["##Load model and resume training"]},{"cell_type":"code","metadata":{"id":"q0xgBhsaqInV","colab_type":"code","colab":{}},"source":["#bring in the architecture and best weights from GCS\n","m = models.load_model('drive/My Drive/Tensorflow/models/UNET256/UNET256.h5', custom_objects={'weighted_bce': weighted_bce})\n","m.load_weights('drive/My Drive/Tensorflow/models/UNET256/best_weights.hdf5') \n","\n","#lets see where were at\n","evalMetrics = m.evaluate(x=evaluation, steps = EVAL_SIZE, verbose = 1)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"xlsFciElxOUA","colab_type":"code","colab":{}},"source":["#set the monitored value (val_mean_io_u) to current evaluation output\n","checkpoint = tf.keras.callbacks.ModelCheckpoint(\n"," log_dir+'best_weights.hdf5',\n"," monitor='val_mean_io_u',\n"," verbose=1,\n"," save_best_only=True,\n"," mode='max'\n"," )\n","\n","checkpoint.best = evalMetrics[2]\n","print(checkpoint.__dict__)\n","print(checkpoint.best)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"7eq0aLlw864A","colab_type":"text"},"source":["## Set up tensorboard"]},{"cell_type":"code","metadata":{"id":"PA2gJENE8-J1","colab_type":"code","colab":{}},"source":["tensorboard = tf.keras.callbacks.TensorBoard(log_dir= 'drive/My Drive/Tensorflow/models/UNET256')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Ty8wCxDtqWBM","colab_type":"code","colab":{}},"source":["#Now keep training!\n","m.fit(\n"," x=training, \n"," epochs= 10, \n"," steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n"," validation_data=evaluation,\n"," validation_steps=EVAL_SIZE/BATCH_SIZE,\n"," callbacks = [checkpoint, tensorboard]\n"," )\n","#m.save('drive/My Drive/Tensorflow/models/UNET256/UNET256.h5')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"tyhWcGHJ82e8","colab_type":"code","colab":{}},"source":["m.save('drive/My Drive/Tensorflow/models/UNET256/UNET256.h5')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"i9OM5BiS1xYQ","colab_type":"code","colab":{}},"source":["%tensorboard --logdir 'drive/My Drive/Tensorflow/models/UNET256'"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-RJpNfEUS1qp","colab_type":"code","colab":{}},"source":["# Load a trained model. 50 epochs. 25 hours. Final RMSE ~0.08.\n","MODEL_DIR = BUCKET + '/' + FOLDER + '/' + 'models/UNET256'\n","m = tf.contrib.saved_model.load_keras_model(MODEL_DIR)\n","m.summary()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"J1ySNup0xCqN","colab_type":"text"},"source":["# Prediction\n","\n","The prediction pipeline is:\n","\n","1. Export imagery on which to do predictions from Earth Engine in TFRecord format to a Cloud Storge bucket.\n","2. Use the trained model to make the predictions.\n","3. Write the predictions to a TFRecord file in a Cloud Storage.\n","4. Upload the predictions TFRecord file to Earth Engine.\n","\n","The following functions handle this process. It's useful to separate the export from the predictions so that you can experiment with different models without running the export every time."]},{"cell_type":"code","metadata":{"id":"lv6nb0ShH4_T","colab_type":"code","colab":{}},"source":["#Inspect the prediction outputs\n","predictions = m.predict(evaluation, steps=1, verbose=1)\n","for prediction in predictions:\n"," print(predictions)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"M3WDAa-RUpXP","colab_type":"code","colab":{}},"source":["def doExport(image, out_image_base, kernel_buffer, region):\n"," \"\"\"Run the image export task. Block until complete.\n"," \"\"\"\n"," task = ee.batch.Export.image.toCloudStorage(\n"," image = image.select(BANDS+[RESPONSE]), \n"," description = out_image_base, \n"," bucket = BUCKET, \n"," fileNamePrefix = FOLDER + '/' + PRED_BASE + '/' + out_image_base, \n"," region = region.getInfo()['coordinates'], \n"," scale = 1, \n"," fileFormat = 'TFRecord', \n"," maxPixels = 1e10,\n"," formatOptions = { \n"," 'patchDimensions': KERNEL_SHAPE,\n"," 'kernelSize': kernel_buffer,\n"," 'compressed': True,\n"," 'maxFileSize': 104857600\n"," }\n"," )\n"," task.start()\n","\n"," # Block until the task completes.\n"," print('Running image export to Cloud Storage...')\n"," import time\n"," while task.active():\n"," time.sleep(30)\n","\n"," # Error condition\n"," if task.status()['state'] != 'COMPLETED':\n"," print('Error with image export.')\n"," else:\n"," print('Image export completed.')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"zb_9_FflygVw","colab_type":"code","colab":{}},"source":["def doPrediction(out_image_base, user_folder, kernel_buffer, region):\n"," \"\"\"Perform inference on exported imagery, upload to Earth Engine.\n"," \"\"\"\n","\n"," print('Looking for TFRecord files...')\n"," \n"," # Get a list of all the files in the output bucket.\n"," filesList = !gsutil ls 'gs://'{BUCKET}'/'{FOLDER}'/'{PRED_BASE}\n"," # Get only the files generated by the image export.\n"," exportFilesList = [s for s in filesList if out_image_base in s]\n","\n"," # Get the list of image files and the JSON mixer file.\n"," imageFilesList = []\n"," jsonFile = None\n"," for f in exportFilesList:\n"," if f.endswith('.tfrecord.gz'):\n"," imageFilesList.append(f)\n"," elif f.endswith('.json'):\n"," jsonFile = f\n","\n"," # Make sure the files are in the right order.\n"," imageFilesList.sort()\n","\n"," from pprint import pprint\n"," pprint(imageFilesList)\n"," print(jsonFile)\n"," \n"," import json\n"," # Load the contents of the mixer file to a JSON object.\n"," jsonText = !gsutil cat {jsonFile}\n"," # Get a single string w/ newlines from the IPython.utils.text.SList\n"," mixer = json.loads(jsonText.nlstr)\n"," pprint(mixer)\n"," patches = mixer['totalPatches']\n"," \n"," # Get set up for prediction.\n"," x_buffer = int(kernel_buffer[0] / 2)\n"," y_buffer = int(kernel_buffer[1] / 2)\n","\n"," buffered_shape = [\n"," KERNEL_SHAPE[0] + kernel_buffer[0],\n"," KERNEL_SHAPE[1] + kernel_buffer[1]]\n","\n"," imageColumns = [\n"," tf.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) \n"," for k in BANDS\n"," ]\n","\n"," imageFeaturesDict = dict(zip(BANDS, imageColumns))\n","\n"," def parse_image(example_proto):\n"," return tf.parse_single_example(example_proto, imageFeaturesDict)\n","\n"," def toTupleImage(dict):\n"," inputsList = [dict.get(key) for key in BANDS]\n"," stacked = tf.stack(inputsList, axis=0)\n"," stacked = tf.transpose(stacked, [1, 2, 0])\n"," return stacked\n"," \n"," # Create a dataset from the TFRecord file(s) in Cloud Storage.\n"," imageDataset = tf.data.TFRecordDataset(imageFilesList, compression_type='GZIP')\n"," imageDataset = imageDataset.map(parse_image, num_parallel_calls=5)\n"," imageDataset = imageDataset.map(toTupleImage).batch(1)\n"," \n"," # Perform inference.\n"," print('Running predictions...')\n"," predictions = m.predict(imageDataset, steps=patches, verbose=1)\n"," # print(predictions[0])\n","\n"," print('Writing predictions...')\n"," out_image_file = 'gs://' + BUCKET + '/' + FOLDER + '/' + PRED_BASE + '/outputs/' + out_image_base + '.TFRecord'\n"," writer = tf.python_io.TFRecordWriter(out_image_file)\n"," patches = 0\n"," for predictionPatch in predictions:\n"," print('Writing patch ' + str(patches) + '...')\n"," predictionPatch = predictionPatch[\n"," x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]\n","\n"," # Create an example.\n"," example = tf.train.Example(\n"," features=tf.train.Features(\n"," feature={\n"," 'probability': tf.train.Feature(\n"," float_list=tf.train.FloatList(\n"," value=predictionPatch.flatten()))\n"," }\n"," )\n"," )\n"," # Write the example.\n"," writer.write(example.SerializeToString())\n"," patches += 1\n","\n"," writer.close()\n","\n"," # Start the upload.\n"," out_image_asset = user_folder + '/' + out_image_base\n"," !earthengine upload image --asset_id={out_image_asset} {out_image_file} {jsonFile}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"LZqlymOehnQO","colab_type":"text"},"source":["Now there's all the code needed to run the prediction pipeline, all that remains is to specify the output region in which to do the prediction, the names of the output files, where to put them, and the shape of the outputs. In terms of the shape, the model is trained on 256x256 patches, but can work (in theory) on any patch that's big enough with even dimensions ([reference](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf)). Because of tile boundary artifacts, give the model slightly larger patches for prediction, then clip out the middle 256x256 patch. This is controlled with a kernel buffer, half the size of which will extend beyond the kernel buffer. For example, specifying a 128x128 kernel will append 64 pixels on each side of the patch, to ensure that the pixels in the output are taken from inputs completely covered by the kernel. "]},{"cell_type":"code","metadata":{"id":"FPANwc7B1-TS","colab_type":"code","colab":{}},"source":["# This has a read-only asset in it:\n","user_folder = 'users/defendersofwildlifeGIS'\n","\n","# Base file name to use for TFRecord files and assets.\n","li_image_base = 'li_parking_deeplab512Pred'\n","# Half this will extend on the sides of each patch.\n","li_kernel_buffer = [256, 256]\n","# Huntington\n","li_region = ee.Feature(towns.filterMetadata(\"TOWN\", 'equals', 'Huntington').first()).geometry()\n","print(li_region.area().getInfo())"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"lLNEOLkXWvSi","colab_type":"code","cellView":"both","colab":{}},"source":["#@title Don't run\n","# Run the export.\n","doExport(featureStack, li_image_base, li_kernel_buffer, li_region)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"KxACnxKFrQ_J","colab_type":"code","cellView":"both","colab":{}},"source":["#@title Don't run\n","# Run the prediction.\n","doPrediction(nc_image_base, user_folder, nc_kernel_buffer, nc_region)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uj_G9OZ1xH6K","colab_type":"text"},"source":["# Display the output\n","\n","One the data has been exported, the model has made predictions and the predictions have been written to a file, and the image imported to Earth Engine, it's possible to display the resultant Earth Engine asset. Here, display the impervious area predictions over Beijing, China."]},{"cell_type":"code","metadata":{"id":"Jgco6HJ4R5p2","colab_type":"code","colab":{}},"source":["out_image = ee.Image(user_folder + '/' + bj_image_base)\n","mapid = out_image.getMapId({'min': 0, 'max': 1})\n","map = folium.Map(location=[39.898, 116.5097])\n","folium.TileLayer(\n"," tiles=EE_TILES.format(**mapid),\n"," attr='Google Earth Engine',\n"," overlay=True,\n"," name='predicted impervious',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]}]}
2 |
--------------------------------------------------------------------------------
/utils/array_tools.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Mar 226 10:50:44 2023
4 |
5 | @author: MEvans
6 | """
7 |
8 | import numpy as np
9 | import math
10 | from random import shuffle, randint, uniform
11 |
12 | def make_harmonics(times: np.ndarray, timesteps, dims):
13 | """Create arrays of sin and cos representations of time
14 | Parameters:
15 | times (np.ndarray): 1D array of start times
16 | timesteps (int): number of annual timesteps
17 | dims (tpl): H, W dimensions of output data
18 | Returns:
19 | np.ndarray: 4D array (B, (dims), 2) with
20 | """
21 | xys = [sin_cos(time, timesteps) for time in times] # use the T dimension to get number of intervals
22 | # r = deg_to_radians(lat) # convert latitude to radians
23 | out = np.stack([np.stack([np.full(dims, x), np.full(dims, y)], axis = -1) for x,y in xys], axis = 0)
24 | return out
25 |
26 | def merge_classes(cond_array, trans, out_array):
27 | """Reclassify categorical array values
28 | Parameters
29 | ---
30 | cond_array: np.ndarray
31 | array with values to be evaluated by conditional expression
32 | trans: list[tpl]
33 | tuples containing condition and value to return where true
34 | array: np.ndarray
35 | array to be returned where condition false
36 | Returns
37 | ---
38 | np.darray
39 | reclassified array same shape and size as input
40 | """
41 | output = np.copy(out_array)
42 | for x,y in trans:
43 | output[cond_array == x] = y
44 | return output
45 |
46 |
47 | def normalize_array(img, axes=[2], epsilon=1e-8, moments = None, splits = None):
48 | """
49 | Standardize incoming image patches by mean and variance.
50 |
51 | Moments can be calculated based on patch data by providing axes:
52 | To standardize each pixel use axes = [2]
53 | To standardize each channel use axes = [0, 1]
54 | To standardize globally use axes = [0, 1, 2]
55 |
56 | To standardize by global, or per-channel moments supply a list of [mean, variance] tuples.
57 | To standardize groups of channels separately, identify the size of each group. Groups of
58 | channels must be stacked contiguously and group sizes must sum to the total # of channels
59 |
60 | Parameters
61 | ---
62 | img: np.ndarray
63 | nD image (usually 3d) to be normalized
64 | axes: list: int
65 | Array of ints. Axes along which to compute mean and variance, usually length n-1
66 | epsilon: float
67 | small number to avoid dividing by zero
68 | moments: list:tpl:int
69 | list of global mean, std tuples for standardization
70 | splits: list:int
71 | size(s) of groups of features to be kept together
72 | Return:
73 | tensor: nD image tensor normalized by channels
74 | """
75 |
76 | # define a basic function to normalize a 3d tensor
77 | def normalize(img):
78 | # shape = tf.shape(x).numpy()
79 | # if we've defined global or per-channel moments...
80 | if moments:
81 | # cast moments to arrays for mean and variance
82 | mean = np.array([tpl[0] for tpl in moments], dtype = 'float32')
83 | std = np.array([tpl[1] for tpl in moments], dtype = 'float32')
84 | # otherwise, calculate moments along provided axes
85 | else:
86 | mean = np.nanmean(img, axes, keepdims = True)
87 | std = np.nanstd(img, axes, keepdims = True)
88 | # keepdims = True to ensure compatibility with input tensor
89 |
90 | # normalize the input tensor
91 | normed = (img - mean)/(std + epsilon)
92 | return normed
93 |
94 | # if splits are given, apply tensor normalization to each split
95 | if splits:
96 | splitLen = sum(splits)
97 | toNorm = img[:,:,0:splitLen]
98 | dontNorm = img[:,:,splitLen:]
99 | arrays = np.split(toNorm, splits, axis = -1)
100 | normed = [normalize(array) for array in arrays]
101 | normed.append(dontNorm)
102 | # gather normalized splits into single tensor
103 | img_normed = np.concatenate(normed, axis = -1)
104 | else:
105 | img_normed = normalize(img)
106 |
107 | return img_normed
108 |
109 | def rescale_array(img, axes = -1, epsilon=1e-8, moments = None, splits = None):
110 | """
111 | Rescale incoming image patch to [0,1] based on min and max values
112 |
113 | Min, max can be calculated based on patch data by providing axes:
114 | To rescale each pixel use axes = [2]
115 | To rescale each channel use axes = [0, 1]
116 | To rescale globally use axes = [0, 1, 2]
117 |
118 | To rescale by global, or per-channel moments supply a list of [mean, variance] tuples.
119 | To rescale groups of channels separately, identify the size of each group. Groups of
120 | channels must be stacked contiguously and group sizes must sum to the total # of channels
121 |
122 | Parameters
123 | ---
124 | img: np.ndarray
125 | array to be rescaled, usually 3D (H,W,C)
126 | axes: list: int
127 | Array of ints. Axes along which to compute mean and variance, usually length n-1
128 | epsilon: float
129 | small number to avoid dividing by zero
130 | moments: list:tpl:int
131 | optional, list of global mean, std tuples for standardization
132 | splits: list:int
133 | optional, size(s) of groups of features to be kept together
134 | Return:
135 | tensor: 3D tensor of same shape as input, with values [0,1]
136 | """
137 | def rescale(img):
138 | if moments:
139 | minimum = np.array([tpl[0] for tpl in moments], dtype = 'float32')
140 | maximum = np.array([tpl[1] for tpl in moments], dtype = 'float32')
141 | else:
142 | minimum = np.nanmin(img, axis = axes, keepdims = True)
143 | maximum = np.nanmax(img, axis = axes, keepdims = True)
144 | scaled = (img - minimum)/((maximum - minimum) + epsilon)
145 | # scaled = tf.divide(tf.subtract(img, minimum), tf.add(tf.subtract(maximum, minimum))
146 | return scaled
147 |
148 | # if splits are given, apply tensor normalization to each split
149 | if splits:
150 | arrays = np.split(img, splits, axis = -1)
151 | rescaled = [rescale(array) for array in arrays]
152 | # gather normalized splits into single tensor
153 | img_rescaled = np.concat(rescaled, axis = -1)
154 | else:
155 | img_rescaled = rescale(img)
156 |
157 | return img_rescaled
158 |
159 | def aug_array_color(img: np.ndarray) -> np.ndarray:
160 | """Randomly change the brightness and contrast of an image
161 | Parameters
162 | ---
163 | img: np.ndarray
164 | image to be adjusted
165 |
166 | Return
167 | ---
168 | np.ndarray: input array with brightness and contrast adjusted
169 | """
170 | dims = len(img.shape)
171 | n_ch = img.shape[-1]
172 | axes = (0,1) if dims == 3 else (1,2)
173 |
174 | contra_adj = 0.05
175 | bright_adj = 0.05
176 |
177 | ch_mean = np.nanmean(img, axis = axes, keepdims = True)
178 | # print('channel means', ch_mean)
179 | contra_mul = uniform(a = 1-contra_adj, b = 1+contra_adj)
180 |
181 | bright_mul = uniform(a = 1 - bright_adj, b = 1+bright_adj)
182 |
183 | recolored = (img - ch_mean) * contra_mul + (ch_mean * bright_mul)
184 | return recolored
185 |
186 | def aug_array_morph(img: np.ndarray, v_rand:bool = None, h_rand:bool = None, r_rand:int = None, return_tuple:bool = False) -> np.ndarray:
187 | """
188 | Perform morphological image augmentation on image array
189 | Parameters:
190 | img (np.ndarray): 4D or 3D channels last image array
191 | Returns:
192 | np.ndarray: 3D channels last image array
193 | """
194 | dims = list(range(len(img.shape)))
195 | v_axis = dims[-3] # channels last, vertical axis is always third last
196 | h_axis = dims[-2] # channels last, horizontal axis is always second last
197 |
198 | if v_rand is None:
199 | v_rand = uniform(0,1) < 0.5
200 | if h_rand is None:
201 | h_rand = uniform(0,1) < 0.5
202 | if r_rand is None:
203 | r_rand = randint(0,3)
204 |
205 | # flip array up/down
206 | x = np.flip(img, axis = v_axis) if v_rand else img
207 | x = np.flip(x, axis = h_axis) if h_rand else x
208 | x = np.rot90(x, r_rand, axes = (v_axis, h_axis))
209 |
210 | if return_tuple:
211 | return x, v_rand, h_rand, r_rand
212 | else:
213 | return x
214 |
215 | def normalize_timeseries(arr, maxval = 10000, minval = 0, axis = -1, e = 0.00001):
216 | # normalize band values across timesteps
217 | normalized = (arr-minval)/(maxval-minval+e)
218 | # mn = np.nanmean(arr, axis = axis, keepdims = True)
219 | # std = np.nanstd(arr, axis = axis, keepdims = True)
220 | # normalized = (arr - mn)/(std+e)
221 | # replace nans with zeros?
222 | finite = np.where(np.isnan(normalized), 0.0, normalized)
223 | return finite
224 |
225 | def rearrange_timeseries(arr: np.ndarray, nbands: int) -> np.ndarray:
226 | """ Randomly rearange 3d images in a timeseries
227 |
228 | Changes the startpoint of a temporal sequence of 3D images stored in a 4D array
229 | while maintaining relative order.
230 |
231 | Parameters
232 | ---
233 | arr: np.ndarray
234 | 5D (B, T, H, W, C) array to be rearranged
235 | nbands: int
236 | size of the last array dimension corresponding to image bands/channels
237 |
238 | Returns
239 | ---
240 | np.ndarray
241 | 5D array of same size/shape as input
242 | """
243 |
244 | # the number of time steps is in the 1st dimension if our data is (B, T, H, W, C)
245 | timesteps = arr.shape[1]
246 | # randomly pick one of the timesteps as the starting time
247 | starttime = randint(0, timesteps-1)
248 | # print('start', starttime)
249 | # grab all timesteps leading up to the timestep corresponding to our random first
250 | last = arr[:,0:starttime,:,:,:]
251 | print('last shape', last.shape)
252 | first = arr[:,starttime:timesteps,:,:,:]
253 | print('start shape', first.shape)
254 | rearranged = np.concatenate([first, last], axis = 1)
255 | rearranged.shape == arr.shape
256 | return(rearranged)
257 |
258 | def split_timeseries(arr: np.ndarray) -> tuple:
259 | """Divide a timeseries of 3D images into a series of images and labels
260 |
261 | Parameters
262 | ---
263 | arr: np.ndarray
264 | 5D (B, T, H, W, C) array to be split
265 |
266 | Returns
267 | ---
268 | tuple
269 | two 5D timeseries arrays
270 | """
271 |
272 | feats = arr[:,0:-1,:,:,:]
273 | labels = arr[:,-1,:,:,0:nbands]
274 |
275 | # confirm there are no all-nan images in labels
276 | batch_sums = np.sum(labels, axis = (1,2,3))
277 | if 0.0 in batch_sums:
278 | print('all nan labels, reshuffling')
279 | feats, labels = rearrange_timeseries(arr, nbands)
280 |
281 | return(feats, labels)
282 |
283 | def sin_cos(t:int, freq:int = 6) -> tuple:
284 | x = t/freq
285 | theta = 2*math.pi * x
286 | return (math.sin(theta), math.cos(theta))
287 |
288 | def add_harmonic(timeseries: np.ndarray):
289 | """ add harmonic variables to an imagery timeseries. currently assumes first image is start of year
290 | B, T, H, W, C
291 | """
292 | in_shape = timeseries.shape
293 | timesteps = in_shape[1]
294 | tpls = [sin_cos(t, timesteps) for t in range(timesteps)]
295 | xys = [np.stack([np.full((in_shape[0], in_shape[2], in_shape[3]), x), np.full((in_shape[0], in_shape[2], in_shape[3]), y)], axis = -1) for x,y in tpls]
296 | harmonics = np.stack(xys, axis = 1)
297 | harmonic_timeseries = np.concatenate([timeseries, harmonics], axis = -1)
298 | return harmonic_timeseries
299 |
--------------------------------------------------------------------------------
/utils/calibration.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Mar 16 17:44:19 2020
4 |
5 | @author: MEvans
6 | """
7 |
8 | import math
9 | import ee
10 | from stats import normalize
11 |
12 | def clamp_and_scale(img, bands, p, AOI):
13 | """
14 | clip the upper range of an image based on percentile
15 |
16 | This function is similar to ee.Image().clip() and ee.Image().unitScale(),
17 | but operates on multiple bands with potentially different upper limits.
18 |
19 | Parameters:
20 | img (ee.Image): the image to modify
21 | bands (ee.List):
22 | p (int): upper percentile above which to truncate values
23 | AOI (ee.Geometry): area within which to calculate percentile
24 |
25 | Returns:
26 | ee.Image: rescaled image with band values [0, 1]
27 | """
28 | #create a list of the 99th percentile value for all bands
29 | percentiles = img.select(bands).reduceRegion(
30 | reducer = ee.Reducer.percentile([99]).repeat(ee.List(bands).size()),
31 | geometry = AOI,
32 | scale = 100,
33 | maxPixels = 1e13,
34 | tileScale = 12
35 | ).get('p99')
36 |
37 | #turn list of 99th percentiles into constant image
38 | upperImg = ee.Image.constant(percentiles).rename(bands)
39 |
40 | #clip the upper range of extreme values where sensors get washed out
41 | normImage = img.where(img.gte(upperImg), upperImg)
42 |
43 | # rescale the truncated image to [0, 1]
44 | rescaled = normalize(normImage, upperImg, ee.Image.constant(0))
45 | return ee.Image(rescaled)
46 |
47 | def scene_median(imgCol, bands, sceneID):
48 | """
49 | Create median images for each unique scene in an image collection
50 | Parameters:
51 | imgCol (ee.ImageCollection):
52 | bands (list): image bands on which to calculate medians
53 | sceneID (str): metadata field storing unique scene ID values
54 | Returns:
55 | ee.ImageCollection: composed of median images per scene
56 | """
57 | # first get list of all scene IDs
58 | scenes = ee.List(imgCol.aggregate_array(sceneID)).distinct()
59 | # define function to filter by scene id and take median
60 |
61 | medians = scenes.map(lambda str: imgCol.filter(ee.Filter.eq(sceneID, str)).median().set(sceneID, str))
62 | return ee.ImageCollection(medians).select(bands)
63 |
64 | def get_overlap(imgCol1, imgCol2):
65 | """
66 | Calculate the area of overlap between two image collections
67 | Parameters:
68 | imgCol1 (ee.ImageCollection): first image collection
69 | imgCol2 (ee.ImageCollection): second image collection
70 | Returns:
71 | ee.Geometry: area of overlap
72 | """
73 | geom1 = imgCol1.geometry(5).dissolve()
74 | geom2 = imgCol2.geometry(5).dissolve()
75 | intersect = geom1.intersection(geom2, 5)
76 | return intersect
77 |
78 | def hist_to_FC(hist, band):
79 | """
80 | convert a histogram of band values to a feature collection
81 |
82 | Args:
83 | hist (ee.Dictionary): output of histogram reducer on an image
84 | band (str): band name
85 |
86 | Return:
87 | ee.FeatureCollection: one feature for each histogram bin with
88 | """
89 | # properties 'bucketMeans' and 'probability' (normalized cummulative probability).
90 | valsList = ee.List(ee.Dictionary(ee.Dictionary(hist).get(band)).get('bucketMeans'))
91 | freqsList = ee.List(ee.Dictionary(ee.Dictionary(hist).get(band)).get('histogram'))
92 | cdfArray = ee.Array(freqsList).accum(0)
93 | total = cdfArray.get([-1])
94 | normalizedCdf = cdfArray.divide(total)
95 |
96 | # create 2D array with histogram bucket means and normalized cdf values
97 | array = ee.Array.cat([valsList, normalizedCdf], 1)
98 |
99 | # define function to create a feature colleciton with properties determined by list
100 | def fxn(ls):
101 | return ee.Feature(None, {'dn': ee.List(ls).get(0), 'probability': ee.List(ls).get(1)})
102 |
103 | output = ee.FeatureCollection(array.toList().map(fxn))
104 | return output
105 |
106 | def make_FC(image, AOI):
107 | """
108 | create a feature colleciton from the histograms of an images bands
109 |
110 | Parameters:
111 | image (ee.Image): input image
112 | AOI (ee.Feaure): area within which to...
113 | Returns:
114 | ee.List: list of feature collections returned by hist_to_FC
115 | """
116 | # Histogram equalization start:
117 | bands = image.bandNames()
118 | histo = image.reduceRegion(
119 | reducer = ee.Reducer.histogram(
120 | maxBuckets = math.pow(2, 12)
121 | ),
122 | geometry = AOI,
123 | scale = 100,
124 | maxPixels = 1e13,
125 | tileScale = 12
126 | )
127 |
128 | def fxn(band):
129 | return hist_to_FC(histo, band)
130 |
131 | # map hist -> FC conversion fxn across bands
132 | output = bands.map(fxn)
133 |
134 | return output
135 |
136 | def equalize(image1, image2, AOI):
137 | """
138 | use histogram matching to calibrate two images
139 |
140 | Parameters:
141 | image1 (ee.Image): reference image
142 | image2 (ee.Image): image to be calibrated
143 | AOI (ee.Geometry): area of overlap between the two images
144 |
145 | Returns:
146 | ee.Image: image2 with bands calibrated to the histogram(s) of image1 bands
147 | """
148 | bands = image1.bandNames()
149 | nBands = bands.size().subtract(1)
150 |
151 | # These are lists of feature collections
152 | fc1 = make_FC(image1, AOI)
153 | fc2 = make_FC(image2, AOI)
154 |
155 | def fxn(i):
156 | band = bands.get(i)
157 | classifier1 = ee.Classifier.randomForest(100)\
158 | .setOutputMode('REGRESSION')\
159 | .train(
160 | features = ee.FeatureCollection(ee.List(fc1).get(i)),
161 | classProperty = 'dn',
162 | inputProperties = ['probability']
163 | )
164 |
165 | classifier2 = ee.Classifier.randomForest(100)\
166 | .setOutputMode('REGRESSION')\
167 | .train(
168 | features = ee.FeatureCollection(ee.List(fc2).get(i)),
169 | classProperty = 'probability',
170 | inputProperties = ['dn']
171 | )
172 |
173 | # Do the shuffle: DN -> probability -> DN. Return the result.
174 | b = image2.select([band]).rename('dn');
175 | # DN -> probability -> DN
176 | output = b.classify(classifier2, 'probability')\
177 | .classify(classifier1, band)
178 |
179 | return output
180 |
181 | imgList = ee.List.sequence(0, nBands).map(fxn)
182 | return ee.ImageCollection(imgList).toBands().rename(bands)
183 |
184 | def equalize_collection(imgCol, bands, sceneID):
185 | """
186 | histogram equalize images in a collection by unique orbit path
187 |
188 | Parameters:
189 | imgCol (ee.ImageCollection): collection storing images to equalize
190 | bands (list): list of band names to be calibrated
191 | sceneID (str): property by which images will be grouped
192 |
193 | Returns:
194 | ee.ImageCollection: median images per scene equalized to the westernmost path
195 | """
196 | # first get list of all scene IDs
197 | scenes = ee.List(imgCol.aggregate_array(sceneID)).distinct()
198 | # create an image collection of scene medians
199 | medians = scene_median(imgCol, bands, sceneID)
200 | # define a function to return the centroid longitude of each scene
201 | def get_coord_min(str):
202 | centroids = imgCol.filter(ee.Filter.eq(sceneID, str)).geometry(1).centroid(1)
203 | longs = centroids.coordinates().get(0)
204 | return longs
205 | # create a list of centroid longitudes
206 | coords = scenes.map(get_coord_min)
207 | # sort the scenes by increasing longitude
208 | scenes = scenes.sort(coords)
209 | # define a function that will equalize the list of scenes in succession
210 | def iterate_equalize(scene, prev):
211 | # take the previous median image
212 | prev = ee.List(prev)
213 | img1 = ee.Image(prev.get(-1))
214 | # take the next median image
215 | img2 = ee.Image(medians.filter(ee.Filter.eq(sceneID, scene)).first())
216 | # filter image collection to the previous scene
217 | index = scenes.indexOf(scene).subtract(1)
218 | imgCol1 = imgCol.filter(ee.Filter.eq(sceneID, scenes.get(index)))
219 | #imgCol1 = imgCol.filter(ee.Filter.eq(sceneID, prev))
220 | # filter image collection to the next scene
221 | imgCol2 = imgCol.filter(ee.Filter.eq(sceneID, scene))
222 | overlap = get_overlap(imgCol1, imgCol2)
223 | # if there is overlap between collections, equalize (returns image)
224 | # otherwise return the current image
225 | equalized = ee.Algorithms.If(overlap.area(5).gt(0), equalize(img1, img2, overlap), img2)
226 | update = ee.List(prev).add(equalized)
227 | return update
228 | # create a list of successively equalized scenes
229 | # initial value for iterate is the first median scene
230 | first = ee.Image(medians.filter(ee.Filter.eq(sceneID, scenes.get(0))).first())
231 | # take all but the first scene median and iteratively equalize
232 | output = scenes.slice(1).iterate(iterate_equalize, ee.List([first]))
233 | return ee.ImageCollection.fromImages(output)
--------------------------------------------------------------------------------
/utils/ee_tools.py:
--------------------------------------------------------------------------------
1 | import ee
2 |
3 | # Initialize Earth Engine
4 | ee.Initialize()
5 |
6 | # Initialize Earth Engine
7 | JRC = ee.ImageCollection("JRC/GSW1_1/YearlyHistory")
8 |
9 | def norm_p(z):
10 | """
11 | Caclulate (approx) the p-value for a standard normal distribution
12 |
13 | Parameters:
14 | z (ee.Image): image containing z-scores
15 |
16 | Returns:
17 | ee.Image: image containing p-values
18 | """
19 | return ee.Image.constant(1).subtract(z.multiply(-1.65451).exp().add(1).pow(-1))
20 |
21 | def chi_p(chi, df):
22 | """ Caclulate the CDF probability of a chi-square statistic
23 | Parameters:
24 | chi (ee.Image): single band image with observations from a chi-squared dist
25 | df (int): degrees of freedom
26 | Returns:
27 | ee.Image: single band image of probabilities
28 | """
29 | cdf = ee.Image(chi.divide(2)).gammainc(ee.Number(df).divide(2))
30 | return cdf.rename(['p'])
31 |
32 | def gamma_p(stat, df):
33 | shape = ee.Image(1)
34 | scale = ee.Image(df)
35 | denom = shape.gamma()
36 | num = shape.gammainc(stat.divide(scale))
37 | return num.divide(denom).rename(['p'])
38 |
39 | def normalize(img, maxImg, minImg):
40 | """
41 | Scale an image from 0 to 1
42 |
43 | Parameters:
44 | img (ee.Image): image to be rescaled
45 | maxImg (ee.Image): image storing the maximum value of the image
46 | minImg (ee.Image): image storing the minimum value of the image
47 | Returns:
48 | ee.Image:
49 | """
50 | return img.subtract(minImg).divide(maxImg.subtract(minImg))
51 |
52 | def standardize(img):
53 | """
54 | Standardize an image to z-scores using mean and sd
55 |
56 | Parameters:
57 | img (ee.Image): image to be rescaled standardized
58 |
59 | Returns:
60 | ee.Image: image containing z-scores per band
61 | """
62 | bands = img.bandNames()
63 | mean = img.reduceRegion(
64 | reducer= ee.Reducer.mean(),
65 | scale= 300).toImage()
66 | sd = img.reduceRegion(
67 | reducer= ee.Reducer.stdDev(),
68 | scale= 300
69 | ).toImage(bands)
70 | return img.subtract(mean).divide(sd)
71 |
72 |
73 | def ldaScore(img, inter, xbands, coefficients):
74 | """
75 | Function converting multiband image into single band image of LDA scores
76 |
77 | Parameters:
78 | img (ee.Image): multiband image
79 | int (float): intercept parameter from LDA analysis
80 | xbands (ee.List): string list of n band names
81 | coefficients (ee.List): numeric list of length n containing LDA coefficients
82 | Returns:
83 | ee.Image: image with one band containing LDA scores based on provided coefficients
84 | """
85 | bands = img.select(xbands)
86 | coeffs = ee.Dictionary.fromLists(xbands, coefficients).toImage(xbands)
87 | score = bands.multiply(coeffs).addBands(ee.Image(inter)).reduce(ee.Reducer.sum())
88 | return score
89 |
90 | def sentinel2toa(img):
91 | """
92 | Convert processed sentinel toa reflectance to raw values, and extract azimuth / zenith metadata
93 |
94 | Parameters:
95 | img (ee.Image): Sentinel-2 image to convert
96 |
97 | Returns:
98 | ee.Image:
99 | """
100 | toa = img.select(['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B10', 'B11', 'B12']) \
101 | .divide(10000)\
102 | .set('solar_azimuth', img.get('MEAN_SOLAR_AZIMUTH_ANGLE')) \
103 | .set('solar_zenith', img.get('MEAN_SOLAR_ZENITH_ANGLE')) \
104 | .set('viewing_azimuth', img.get('MEAN_INCIDENCE_AZIMUTH_ANGLE_B8')) \
105 | .set('viewing_zenith', img.get('MEAN_INCIDENCE_ZENITH_ANGLE_B8')) \
106 | .set('CLOUDY_PIXEL_PERCENTAGE', img.get('CLOUDY_PIXEL_PERCENTAGE')) \
107 | #.set('system:time_start', img.get('system:time_start'));
108 | return img.select(['QA60']).addBands(toa);
109 |
110 | def rescale(img, exp, thresholds):
111 | #print('rescale:', img, exp, thresholds)
112 | #return img.subtract(thresholds[0]).divide(thresholds[1]-thresholds[0])
113 | return img.expression(exp, {'img': img}).subtract(thresholds[0]).divide(thresholds[1] - thresholds[0])
114 |
115 | def waterScore(img):
116 | """
117 | Calculate a water likelihood score [0, 1]
118 |
119 | Parameters:
120 | img (ee.Image): Sentinel-2 image
121 |
122 | Returns:
123 | ee.Image: image with single ['waterscore'] band
124 | """
125 | img = sentinel2toa(img)
126 | # Compute several indicators of water and take the minimum of them.
127 | score = ee.Image(1.0)
128 |
129 | # Set up some params
130 | darkBands = ['B3', 'B4', 'B8', 'B11', 'B12']
131 | brightBand = 'B2'
132 | shadowSumBands = ['B8', 'B11', 'B12']
133 | # Water tends to be dark
134 | sum = img.select(shadowSumBands).reduce(ee.Reducer.sum())
135 | #sum = rescale(sum, [0.35, 0.2]).clamp(0, 1)
136 | sum = rescale(sum, 'img', [0.35, 0.2]).clamp(0, 1)
137 | score = score.min(sum)
138 |
139 | # It also tends to be relatively bright in the blue band
140 | mean = img.select(darkBands).reduce(ee.Reducer.mean())
141 | std = img.select(darkBands).reduce(ee.Reducer.stdDev())
142 | z = (img.select([brightBand]).subtract(std)).divide(mean)
143 | z = rescale(z, 'img', [0, 1]).clamp(0, 1)
144 | #z = rescale(z, [0,1]).clamp(0,1)
145 | score = score.min(z)
146 |
147 | # Water is at or above freezing
148 | # score = score.min(rescale(img, 'img.temp', [273, 275]));
149 |
150 | # Water is nigh in ndsi(aka mndwi)
151 | ndsi = img.normalizedDifference(['B3', 'B11'])
152 | ndsi = rescale(ndsi, 'img', [0.3, 0.8])
153 | #ndsi = rescale(ndsi, [0.3, 0.8])
154 |
155 | score = score.min(ndsi)
156 |
157 | return score.clamp(0, 1).rename(['waterScore'])
158 |
159 | def basicQA(img):
160 | """
161 | Mask clouds in a Sentinel-2 image using builg in quality assurance band
162 | Parameters:
163 | img (ee.Image): Sentinel-2 image with QA band
164 | Returns:
165 | ee.Image: original image masked for clouds and cirrus
166 | """
167 | #print('basicQA:', img)
168 | qa = img.select('QA60').int16()
169 | # print('qa:', type(qa))
170 | # qa = img.select(['QA60']).int16()
171 | #print('qa:', qa.getInfo())
172 | # Bits 10 and 11 are clouds and cirrus, respectively.
173 | cloudBitMask = 1024 # math.pow(2, 10)
174 | cirrusBitMask = 2048 #math.pow(2, 11)
175 | # Both flags should be set to zero, indicating clear conditions.
176 | #mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0))
177 | mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0))
178 | dated = img.updateMask(mask)
179 | #dated = img.addBands(img.metadata('system:time_start', 'date')).updateMask(mask)
180 | return dated
181 |
182 | # Function to cloud mask from the Fmask band of Landsat 8 SR data.
183 | def maskL8sr(image):
184 | # Bits 3 and 5 are cloud shadow and cloud, respectively.
185 | cloudShadowBitMask = ee.Number(2).pow(3).int()
186 | cloudsBitMask = ee.Number(2).pow(5).int()
187 |
188 | # Get the pixel QA band.
189 | qa = image.select('pixel_qa')
190 |
191 | # Both flags should be set to zero, indicating clear conditions.
192 | mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(qa.bitwiseAnd(cloudsBitMask).eq(0))
193 |
194 | # Return the masked image, scaled to [0, 1].
195 | return image.updateMask(mask)
196 |
197 |
198 | def cloudBands(img):
199 | ndmi = img.normalizedDifference(['B8', 'B11']).rename(['ndmi'])
200 | ndsi = img.normalizedDifference(['B3', 'B11']).rename(['ndsi'])
201 | cirrus = img.select(['B1', 'B10']).reduce(ee.Reducer.sum()).rename(['cirrus'])
202 | vis = img.select(['B4', 'B3', 'B2']).reduce(ee.Reducer.sum()).rename(['vis'])
203 | return img.addBands(ndmi).addBands(ndsi).addBands(cirrus).addBands(vis)
204 |
205 |
206 | def darkC (img, R, G, B):
207 | R = img.select(R)
208 | G = img.select(G)
209 | B = img.select(B)
210 | maxRB = R.max(B)
211 | maxGB = G.max(B)
212 | maxRG = R.max(G)
213 | C1 = G.divide(maxRB).atan().rename(['C1'])
214 | C2 = R.divide(maxGB).atan().rename(['C2'])
215 | C3 = B.divide(maxRG).atan().rename(['C3'])
216 | return img.addBands(C1).addBands(C2).addBands(C3)
217 |
218 | def sentinelCloudScore(img):
219 | """
220 | Compute a custom cloud likelihood score for Sentinel-2 imagery
221 | Parameters:
222 | img (ee.Image): Sentinel-2 image
223 | Returns:
224 | ee.Image: original image with added ['cloudScore'] band
225 | """
226 | im = sentinel2toa(img)
227 | # Compute several indicators of cloudyness and take the minimum of them.
228 | score = ee.Image(1)
229 |
230 | # Clouds are reasonably bright in the blue and cirrus bands.
231 | #score = score.min(rescale(im.select(['B2']), [0.1, 0.5]))
232 | score = score.min(rescale(im, 'img.B2', [0.1, 0.5]))
233 | #score = score.min(rescale(im.select(['B1']), [0.1, 0.3]))
234 | score = score.min(rescale(im, 'img.B1', [0.1, 0.3]))
235 | #score = score.min(rescale(im.select(['B1']).add(im.select(['B10'])), [0.15, 0.2]))
236 | score = score.min(rescale(im, 'img.B1 + img.B10', [0.15, 0.2]))
237 |
238 | # Clouds are reasonably bright in all visible bands.
239 | #score = score.min(rescale(im.select('B4').add(im.select('B3')).add(im.select('B2')), [0.2, 0.8]))
240 | score = score.min(rescale(im, 'img.B4 + img.B3 + img.B2', [0.2, 0.8]))
241 |
242 | # Clouds are moist
243 | ndmi = im.normalizedDifference(['B8','B11'])
244 | #score=score.min(rescale(ndmi, [-0.1, 0.1]))
245 | score=score.min(rescale(ndmi, 'img', [-0.1, 0.1]))
246 |
247 | # However, clouds are not snow.
248 | ndsi = im.normalizedDifference(['B3', 'B11'])
249 | #score=score.min(rescale(ndsi, [0.8, 0.6]))
250 | score=score.min(rescale(ndsi, 'img', [0.8, 0.6]))
251 |
252 | score = score.multiply(100).byte()
253 | #print('score:', type(score))
254 |
255 | return img.addBands(score.rename(['cloudScore']))
256 |
257 | def mask(img):
258 | date = img.date()
259 | year = date.get('year')
260 | month = date.get('month')
261 | cdi = ee.Algorithms.Sentinel2.CDI(img)
262 | scored = basicQA(img)
263 | clouds = sentinelCloudScore(scored).lte(15).Or(cdi.gte(-0.2))
264 | water = waterScore(img).select('waterScore').lte(0.25)
265 | jrc = ee.Image(JRC.filterMetadata('month', 'equals', month).filterMetadata('year', 'equals', year).first())
266 | waterMask = jrc.focal_max(1, 'square', 'pixels').neq(2).And(water)
267 | shadowMask = img.select('B11').gt(900)
268 | return scored.updateMask(clouds.And(shadowMask).And(waterMask))
269 |
270 | def maskSR(img):
271 | """
272 | Apply built in masks to Sentinel-2 surface reflectance imagery
273 | Parameters:
274 | img (ee.Image): Sentinel-2 level 2A surface reflectange image
275 | Returns:
276 | ee.Image: masked image
277 | """
278 | # jrc = ee.Image('JRC/GSW1_1/YearlyHistory/2018')
279 | scored = basicQA(img);
280 | maskBand = img.select('SCL')
281 | cloudMask = maskBand.neq(8).And(maskBand.neq(9))
282 | # waterMask = maskBand.neq(6).where(jrc.gte(2), 0)
283 | cirrusMask = maskBand.neq(10)
284 | snowMask = maskBand.neq(11)
285 | darkMask = maskBand.neq(2).And(maskBand.neq(3))
286 | return scored.updateMask(cloudMask.And(cirrusMask).And(snowMask).And(darkMask))
287 |
288 | def maskTOA(img):
289 | """
290 | Mask Sentinel-2 1C top of atmosphere imagery for clouds, water, shadow
291 | Parameters:
292 | img (ee.Image): Sentinel-2 level 1C image
293 | Returns:
294 | ee.Image: masked image
295 | """
296 | # date = img.date()
297 | # year = date.get('year')
298 | #month = date.get('month')
299 | #cdi = ee.Algorithms.Sentinel2.CDI(img)
300 | scored = basicQA(img)
301 | cloudMask = sentinelCloudScore(scored).select('cloudScore').lte(15)#.Or(cdi.gte(-0.2))
302 | # water = waterScore(img).select('waterScore').lte(0.25)
303 | # jrc = ee.Image(JRC.filterMetadata('year', 'equals', year).first())
304 | # watermask = water.where(jrc.gte(2), 0)
305 | # shadowMask = img.select('B11').gt(900)
306 | return scored.updateMask(cloudMask)#.And(shadowMask))#.And(watermask))
307 |
308 |
--------------------------------------------------------------------------------
/utils/pc_tools.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | from pathlib import Path
4 | from importlib import reload
5 | import numpy as np
6 | import os
7 | import sys
8 | from os.path import join
9 | from glob import glob
10 | import io
11 | from datetime import datetime
12 | import xml
13 |
14 | from osgeo import gdal
15 | import xarray as xr
16 | import rasterio as rio
17 | from rasterio.vrt import WarpedVRT
18 | from rioxarray.merge import merge_arrays
19 | import rioxarray
20 | from rioxarray.merge import merge_arrays
21 | from pyproj import CRS
22 |
23 | import planetary_computer
24 | from dask_gateway import GatewayCluster
25 | from dask.distributed import wait, Client
26 | import pystac_client
27 | import pystac
28 | import stackstac
29 | import stac_vrt
30 |
31 | FILE = Path(__file__).resolve() # full path to the current file, including extension
32 | print('filepath', FILE)
33 | ROOT = FILE.parents[0] # list of upstream directories containing file
34 | print('root', ROOT)
35 | REL = Path(os.path.relpath(ROOT, Path.cwd()))
36 | print('rel', REL)
37 | if str(ROOT) not in sys.path:
38 | sys.path.append(str(ROOT))
39 | if str(REL) not in sys.path:
40 | sys.path.append(str(REL)) # add REL to PATH
41 |
42 | from azure.storage.blob import ContainerClient, BlobClient
43 |
44 | def recursive_api_try(search):
45 | try:
46 | signed = planetary_computer.sign(search.get_all_items())
47 | # collection = search.item_collection()
48 | # print(len(collection), 'assets')
49 | # signed = [planetary_computer.sign(item).to_dict() for item in collection]
50 | except pystac_client.exceptions.APIError as error:
51 | print('APIError, trying again')
52 | signed = recursive_api_try(search)
53 | return signed
54 |
55 | def resign_vrt(filename, element_tag):
56 | """Update the authentication token on previously created VRT items
57 | Params
58 | ---
59 | filename: str
60 | element_tag: str
61 | xml tag containing asset url to be signed
62 | """
63 | tree = xml.etree.ElementTree.parse(filename)
64 | root = tree._root
65 | p = Path(filename)
66 | sub_vrt_list = []
67 | for item in root.iter(element_tag):
68 | text = item.text
69 | # if item.attrib['relativeToVRT'] == '0':
70 | if text.startswith('http'):
71 | newtext = planetary_computer.sign(text.split('?')[0])
72 | item.text = newtext
73 | elif '.vrt' in text:
74 | sub_vrt_list.append(text)
75 | newtext = text[:-4]+'_resigned.vrt'
76 | item.text = newtext
77 | for file in sub_vrt_list:
78 | etag = 'SourceDataset' if 'warped' in file else element_tag
79 | resign_vrt(file, etag)
80 | tree.write(str(p.parent)+'/'+str(p.stem)+'_resigned.vrt')
81 |
82 | def export_blob(data: np.ndarray, container_client: ContainerClient, blobUrl: str) -> None:
83 | with io.BytesIO() as buffer:
84 | np.save(buffer, data)
85 | buffer.seek(0)
86 | blob_client = container_client.get_blob_client(blobUrl)
87 | blob_client.upload_blob(buffer, overwrite=True)
88 |
89 | def normalize_dataArray(da: xr.DataArray, dim: str) -> xr.DataArray:
90 | """Normalize (mean = 0, sd = 1) values in a xarray DataArray along given axis
91 |
92 | Parameters
93 | ---
94 | da: xarray.DataArray
95 | array to be normalized
96 | dim: str
97 | name of dimension along which to calculate mean and standard deviation (e.g. 'band')
98 |
99 | Return
100 | ---
101 | xarray.DataArray: input array with values scaled to mean = 0 and sd = 1
102 | """
103 | mean = da.mean(dim = dim, skipna = True)
104 | sd = da.std(dim = dim, skipna = True)
105 | normalized = (da - mean)/(sd+0.000001)
106 | return normalized
107 |
108 | def trim_dataArray(da: xr.DataArray, size: int) -> xr.DataArray:
109 | """Trim the remainder from x and y dimensions of a DataArray
110 |
111 | Parameters
112 | ---
113 | da: xarray:DataArray
114 | input array to be trimmed
115 | size: int
116 | size of chunks in x and y dimension. remaining array x&y size will be evenly divisible by this value
117 |
118 | Return:
119 | xarray:DataArray: resized input array with x & y dimensions evenly divisible by 'size'
120 | """
121 | slices = {}
122 | for coord in ["y", "x"]:
123 | remainder = len(da.coords[coord]) % size
124 | slice_ = slice(-remainder) if remainder else slice(None)
125 | slices[coord] = slice_
126 |
127 | trimmed = da.isel(**slices)
128 | return trimmed
129 |
130 | def get_naip_stac(aoi, dates):
131 |
132 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
133 | collections = ['naip']
134 |
135 | search = catalog.search(
136 | intersects = aoi,
137 | datetime = dates,
138 | collections = collections,
139 | limit = 500
140 | )
141 |
142 | items = planetary_computer.sign(search.item_collection_as_dict())
143 | # items is a pystac ItemCollection
144 | # items2 = items.to_dict()
145 | features = items['features']
146 | dates = [x['properties']['datetime'] for x in features]
147 | years = [date[0:4] for date in dates]
148 | years.sort()
149 | filtered = [x for x in features if x['properties']['datetime'][0:4] == years[-1]]
150 | urls = [item['assets']['image']['href'] for item in filtered]
151 | # organize all naip images overlapping box into a vrt stac
152 | crs_list = np.array([item['properties']['proj:epsg'] for item in filtered])
153 | crss = np.unique(crs_list)
154 | crs_counts = [len(crs_list[crs_list == crs]) for crs in crss]
155 | print('naip crss', crss)
156 | if len(crss) > 1:
157 | # rioxrs = []
158 | minority_idx = np.argmin(crs_counts)
159 | majority_idx = np.argmax(crs_counts)
160 | majority_urls = [url for i, url in enumerate(urls) if crs_list[i] == crss[majority_idx]]
161 | minority_urls = [url for i, url in enumerate(urls) if crs_list[i] == crss[minority_idx]]
162 | print('minority urls', minority_urls)
163 | minority_vrt = gdal.BuildVRT("./minority.vrt", minority_urls)
164 | majority_vrt = gdal.BuildVRT("./majority.vrt", majority_urls)
165 | warped_vrt = gdal.Warp("./warped.vrt", minority_vrt, format = 'vrt', dstSRS = f'EPSG:{crss[majority_idx]}')
166 | naipVRT = gdal.BuildVRT('./naiptmp.vrt', [warped_vrt, majority_vrt])
167 | # naipVRT = None
168 | # for i, url in enumerate(urls):
169 | # rioxr = rioxarray.open_rasterio(url)
170 | # if crs_list[i] == crss[minority_idx]:
171 | # reprojected = rioxr.rio.reproject(f'EPSG:{crss[majority_idx]}')
172 | # rioxrs.append(reprojected)
173 | # else:
174 | # rioxrs.append(rioxr)
175 | # merged = merge_arrays(rioxrs)
176 | # return merged
177 | else:
178 | # rioxrs = [rioxarray.open_rasterio(url, lock = False) for url in urls]
179 | # merged = merge_arrays(rioxrs)
180 | # vrt = stac_vrt.build_vrt(filtered, block_width=512, block_height=512, data_type="Byte")
181 | # naipImg = rioxarray.open_rasterio(vrt, lock = False)
182 | naipVRT = gdal.BuildVRT('./naiptmp.vrt', urls)
183 | naipVRT = None
184 | naipImg = rioxarray.open_rasterio('./naiptmp.vrt', lock = False)
185 | return naipImg
186 |
187 | def get_dem_stac(aoi, dates, crs = None, resolution = None):
188 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
189 | search = catalog.search(
190 | intersects = aoi,
191 | collections = ["3dep-seamless"]
192 | )
193 |
194 | # items is a pystac ItemCollection
195 | items = list(planetary_computer.sign(search.item_collection()))
196 | dems = [item for item in items if item.properties['gsd'] == 10] # we only want 10 m data
197 | return dems
198 | # # hagUrl = hag[0]['assets']['data']['href']
199 | # demProperties = dems[0].properties
200 | # if crs:
201 | # demCrs = crs
202 | # else:
203 | # demCrs = demProperties['proj:epsg']
204 | # # demTransform = demProperties['proj:transform']
205 | # # if resolution:
206 | # # demRes = resolution
207 | # # else:
208 | # # demRes = demProperties['gsd']
209 |
210 | # demStac = stackstac.stack(
211 | # dems,
212 | # epsg = demCrs,
213 | # resolution = 10)
214 | # # sortby_date = False,
215 | # # assets = ['data'])
216 | # print('3dep transform', demStac.rio.transform())
217 | # demMedian = demStac.median(dim = 'time')
218 | # projected = demMedian.rio.set_crs(demCrs)
219 | # # reprojected = projected.rio.reproject(hagCrs)
220 |
221 | # return projected
222 |
223 | def get_hag_stac(aoi, dates, crs = None, resolution = None):
224 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
225 | search = catalog.search(
226 | intersects = aoi,
227 | datetime = dates,
228 | collections = ['3dep-lidar-hag']
229 | )
230 |
231 | items = recursive_api_try(search)
232 | # items is a pystac ItemCollection
233 | items2 = items.to_dict()
234 | hag = items2['features']
235 |
236 | # hagUrl = hag[0]['assets']['data']['href']
237 | hagProperties = hag[0]['properties']
238 | hagCrs = hagProperties['proj:projjson']['components'][0]['id']['code']
239 | hagTransform = hagProperties['proj:transform']
240 | if resolution:
241 | hagRes = resolution
242 | else:
243 | hagRes = hagTransform[0]
244 |
245 | # hagSide = 360//hagRes
246 | # hagZoom = round(600/hagSide, 4)
247 |
248 | # hagCrs = [asset['properties']['proj:projjson']['components'][0]['id']['code'] for asset in hag]
249 | # print('hag CRS', hagCrs[0])
250 | hagStac = stackstac.stack(
251 | hag,
252 | epsg = hagCrs,
253 | resolution = hagRes,
254 | sortby_date = False,
255 | assets = ['data'])
256 |
257 | hagMedian = hagStac.median(dim = 'time')
258 | projected = hagMedian.rio.set_crs(hagCrs)
259 | # reprojected = projected.rio.reproject(hagCrs)
260 |
261 | return projected
262 |
263 | def naip_mosaic(naips: list, crs: int):
264 | """ mosaic a list of naip stac items into a single xarray DataArray
265 | Parameters
266 | --------
267 | naips: list:
268 | list of naip image items in stac format
269 | crs: int
270 | epsg code specifying the common crs to project naip images
271 | Return
272 | ---
273 | xr.DataArray: single array of mosaicd naip images
274 | """
275 | data = [item for item in naips if item['properties']['proj:epsg'] == crs]
276 | crs = CRS.from_user_input(26918)
277 | naipStac = stac_vrt.build_vrt(
278 | data, block_width=512, block_height=512, data_type="Byte", crs = crs)
279 | naipImage = rioxarray.open_rasterio(naipStac, chunks = (4, 8192, 8192), lock = False)
280 | # reprojected = naipImage.rio.reproject('EPSG:4326')
281 | return(naipImage)
282 |
283 | def harmonize_to_old(data):
284 | """
285 | Harmonize new Sentinel-2 data to the old baseline.
286 |
287 | Parameters
288 | ----------
289 | data: xarray.DataArray
290 | A DataArray with four dimensions: time, band, y, x
291 |
292 | Returns
293 | -------
294 | harmonized: xarray.DataArray
295 | A DataArray with all values harmonized to the old
296 | processing baseline.
297 | """
298 | cutoff = datetime(2022, 1, 25)
299 | offset = 1000
300 | bands = [
301 | "B01",
302 | "B02",
303 | "B03",
304 | "B04",
305 | "B05",
306 | "B06",
307 | "B07",
308 | "B08",
309 | "B8A",
310 | "B09",
311 | "B10",
312 | "B11",
313 | "B12",
314 | ]
315 |
316 | old = data.sel(time=slice(cutoff))
317 |
318 | to_process = list(set(bands) & set(data.band.data.tolist()))
319 | new = data.sel(time=slice(cutoff, None)).drop_sel(band=to_process)
320 |
321 | new_harmonized = data.sel(time=slice(cutoff, None), band=to_process).clip(offset)
322 | new_harmonized -= offset
323 |
324 | new = xr.concat([new, new_harmonized], "band").sel(band=data.band.data.tolist())
325 | return xr.concat([old, new], dim="time")
326 |
327 | def get_s2_stac(dates, aoi, cloud_thresh = 10, bands = ["B02", "B03", "B04", "B08"], epsg = None):
328 | """from a pystac client return a stac of s2 imagery
329 |
330 | Parameters
331 | ----
332 | dates: str
333 | start/end dates
334 | aoi: shapely.geometry.Polygon
335 | polygon defining area of search
336 | cloud_thresh: int
337 | maximum cloudy pixel percentage of s2 images to return
338 | bands: list
339 | asset (band) names to return and stack
340 | epsg: int
341 | epsg coordinate system to reproject s2 data to
342 |
343 | Return
344 | ---
345 | stackstac.stac()
346 | """
347 | # connect to the planetary computer catalog
348 | catalog = pystac_client.Client.open(
349 | "https://planetarycomputer.microsoft.com/api/stac/v1",
350 | modifier = planetary_computer.sign_inplace)
351 |
352 | search = catalog.search(
353 | collections = ['sentinel-2-l2a'],
354 | datetime = dates,
355 | intersects = aoi,
356 | query={"eo:cloud_cover": {"lt": cloud_thresh}}
357 | )
358 |
359 | s2items = [item.to_dict() for item in list(search.get_items())]
360 | if len(s2items) > 0:
361 | s2 = s2items[0]
362 | if epsg:
363 | s2epsg = epsg
364 | else:
365 | s2epsg = s2['properties']['proj:epsg']
366 |
367 | s2Stac = (
368 | stackstac.stack(
369 | s2items,
370 | epsg = s2epsg,
371 | assets=bands, # red, green, blue, nir
372 | # chunksize=4096,
373 | resolution=10,
374 | )
375 | .where(lambda x: x > 0, other=np.nan) # sentinel-2 uses 0 as nodata
376 | )
377 |
378 | harmonized = harmonize_to_old(s2Stac)
379 |
380 | s2crs = s2Stac.attrs['crs']
381 | s2projected = harmonized.rio.set_crs(s2crs)
382 | else:
383 | # clipped = s2projected.rio.clip(geometries = [aoi], crs = epsg)
384 | harmonized = None
385 | return harmonized
386 |
387 | def get_s1_stac(dates, aoi, epsg = None, bands = ["vv", "vh"]):
388 | """from a pystac client return a stac of s2 imagery
389 |
390 | Parameters
391 | ----
392 | client: pystac_client.Client()
393 | pystac catalog from which to retrieve assets
394 | dates: str
395 | start/end dates
396 | bbox: tpl
397 | [xmin, ymin, xmax, ymax]
398 |
399 | Return
400 | ---
401 | stackstac.stac()
402 | """
403 | # connect to the planetary computer catalog
404 | catalog = pystac_client.Client.open(
405 | "https://planetarycomputer.microsoft.com/api/stac/v1",
406 | modifier = planetary_computer.sign_inplace)
407 |
408 | search = catalog.search(
409 | datetime = dates,
410 | intersects = aoi,
411 | collections=["sentinel-1-rtc"],
412 | query={"sar:polarizations": {"eq": ['VV', 'VH']},
413 | 'sar:instrument_mode': {"eq": 'IW'},
414 | 'sat:orbit_state': {"eq": 'ascending'}
415 | }
416 | )
417 |
418 | s1items = search.item_collection()
419 | if not epsg:
420 | s1 = s1items[0]
421 | epsg = s1.properties['proj:epsg']
422 | s1Stac = stackstac.stack(
423 | s1items,
424 | epsg = epsg,
425 | assets=bands,
426 | resolution=10,
427 | gdal_env=stackstac.DEFAULT_GDAL_ENV.updated(
428 | always=dict(GDAL_HTTP_MAX_RETRY=5, GDAL_HTTP_RETRY_DELAY=1)
429 | )
430 | )
431 |
432 | # # get spatial reference info
433 | # s1crs = s1Stac.attrs['crs']
434 | # s1transform = s1Stac.attrs['transform']
435 | # s1res = s1transform[0]
436 |
437 | # s1projected = s1Stac.rio.set_crs(s1crs)
438 | # clipped = s1projected.rio.clip(geometries = [aoi], crs = 4326)
439 | return s1Stac
440 |
441 | def get_s1_stac(dates, aoi, epsg = None, bands = ["vv", "vh"]):
442 | """from a pystac client return a stac of s2 imagery
443 |
444 | Parameters
445 | ----
446 | client: pystac_client.Client()
447 | pystac catalog from which to retrieve assets
448 | dates: str
449 | start/end dates
450 | bbox: tpl
451 | [xmin, ymin, xmax, ymax]
452 |
453 | Return
454 | ---
455 | stackstac.stac()
456 | """
457 | # connect to the planetary computer catalog
458 | catalog = pystac_client.Client.open(
459 | "https://planetarycomputer.microsoft.com/api/stac/v1",
460 | modifier = planetary_computer.sign_inplace)
461 |
462 | search = catalog.search(
463 | datetime = dates,
464 | intersects = aoi,
465 | collections=["sentinel-1-rtc"],
466 | query={"sar:polarizations": {"eq": ['VV', 'VH']},
467 | 'sar:instrument_mode': {"eq": 'IW'},
468 | 'sat:orbit_state': {"eq": 'ascending'}
469 | }
470 | )
471 |
472 | s1items = search.item_collection()
473 | if not epsg:
474 | s1 = s1items[0]
475 | epsg = s1.properties['proj:epsg']
476 | s1Stac = stackstac.stack(
477 | s1items,
478 | epsg = epsg,
479 | assets=bands,
480 | resolution=10,
481 | gdal_env=stackstac.DEFAULT_GDAL_ENV.updated(
482 | always=dict(GDAL_HTTP_MAX_RETRY=5, GDAL_HTTP_RETRY_DELAY=1)
483 | )
484 | )
485 |
486 | # # get spatial reference info
487 | # s1crs = s1Stac.attrs['crs']
488 | # s1transform = s1Stac.attrs['transform']
489 | # s1res = s1transform[0]
490 |
491 | # s1projected = s1Stac.rio.set_crs(s1crs)
492 | # clipped = s1projected.rio.clip(geometries = [aoi], crs = 4326)
493 | return s1Stac
494 |
495 | def get_ssurgo_stac(aoi, epsg)-> np.ndarray:
496 | """Sample ssurgo data in raster format
497 |
498 | Parameters
499 | ---
500 | aoi: shapely.geometry.Polygon
501 | polygon coordinates defining search aoi
502 | epsg: int
503 | cooridnate reference system epsg code to reproject ssurgo data to
504 |
505 | Returns
506 | ---
507 | np.ndarray: 3-dimensional raster (window_size, window_size, 4) containing ssurgo data
508 | """
509 | # connect to the PC STAC catalog
510 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
511 |
512 | # get the gnatsco raster, which has 'mukey' values per pixel
513 | search = catalog.search(
514 | collections=["gnatsgo-rasters"],
515 | intersects=aoi
516 | )
517 | surgoitems = planetary_computer.sign(search.get_all_items())
518 | return surgoitems
519 | # surgoitems = [planetary_computer.sign(item).to_dict() for item in list(search.items())]
520 | # surgo = surgoitems[0]
521 |
522 | # surgowkt = surgo['properties']['proj:wkt2']
523 | # if epsg:
524 | # surgoEPSG = epsg #surgoCrs.to_epsg()
525 | # else:
526 | # surgoEPSG = CRS.from_wkt(surgowkt).to_epsg()
527 |
528 | # print(surgoEPSG)
529 | # # surgoepsg = surgo['properties']['proj:epsg']
530 | # surgoStac = stackstac.stack(
531 | # surgoitems,
532 | # # epsg = surgoEPSG,
533 | # epsg = surgoEPSG,
534 | # assets=['mukey'])
535 |
536 | # surgoTransform = surgoStac.attrs['transform']
537 | # # surgores = 10 #surgoTransform[0] TODO: COnfirm ssurgo is always 10 m resolution
538 | # # print('resolution', surgores)
539 |
540 | # temporal = surgoStac.median(dim = 'time')
541 | # return temporal, surgoTransform, surgoEPSG
542 |
543 | def join_ssurgo(ssurgo_table, ssurgo_raster:np.ndarray):
544 | C,H,W = ssurgo_raster.shape
545 | # get the unique values and their indices from the raster so we can join to table data
546 | unique_mukeys, inverse = np.unique(ssurgo_raster, return_inverse=True)
547 | # print('\t\tJoining SSURGO Arrays. Unique mukeys', unique_mukeys)
548 | rearranged = ssurgo_table[['mukey', 'hydclprs', 'drclassdcd', 'flodfreqdcd', 'wtdepannmin']].groupby('mukey').first().reindex(unique_mukeys, fill_value=np.nan).astype(np.float64)
549 | rearranged.loc[rearranged['wtdepannmin'] > 200.0, 'wtdepannmin'] = 200.0 # anything above 200 should be clipped to 200
550 | rearranged['wtdepannmin'] = rearranged['wtdepannmin'].fillna(200.0) # missing values are above 200 cm deep
551 | rearranged['wtdepannmin'] = rearranged['wtdepannmin']/200.0 # 200 cm is the max measured value
552 |
553 | rearranged['flodfreqdcd'] = rearranged['flodfreqdcd'].fillna(0.0) # missing values mean no flooding
554 |
555 | rearranged['drclassdcd'] = rearranged['drclassdcd'].fillna(0.0) # missing values mean no soil e.g. excessively drained
556 |
557 | rearranged['hydclprs'] = rearranged['hydclprs'].fillna(0.0) # missing values mean no soil e.g. not hydric
558 | rearranged['hydclprs'] = rearranged['hydclprs']/100.0 # 100 percent hydric is max
559 | # join tabluar data to ssurgo raster based on mukey
560 | ssurgo_hwc = rearranged.to_numpy()[inverse].reshape((H, W, 4)) # HWC
561 | return ssurgo_hwc
562 |
563 | def get_pc_imagery(aoi, dates, crs):
564 | """Get S2 imagery from Planetary Computer. REQUIRES a valid API token be added to the os environment
565 | Args:
566 | aoi: POLYGON geometry json
567 | dates (tpl): four YYYY-MM-DD date strings defining before and after
568 | crs (int): 4-digit epsg code representing coordinate reference system
569 | """
570 | # Creates the Dask Scheduler. Might take a minute.
571 | cluster = GatewayCluster(
572 | address = "https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway",
573 | proxy_address = "gateway://pccompute-dask.westeurope.cloudapp.azure.com:80",
574 | auth = 'jupyterhub',
575 | worker_cores = 4
576 | )
577 |
578 | client = cluster.get_client()
579 |
580 | # allow our dask cluster to adaptively scale from 2 to 24 nodes
581 | cluster.adapt(minimum=2, maximum=24)
582 |
583 | # extract before and after dates from input in format required by PC
584 | before_dates = f'{dates[0]}/{dates[1]}'
585 | after_dates = f'{dates[2]}/{dates[3]}'
586 |
587 | # connect to the planetary computer catalog
588 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1")
589 | # sentinel = catalog.get_child('sentinel-2-l2a')
590 |
591 | before_data = get_s2_stack(catalog, before_dates, aoi)
592 | after_data = get_s2_stack(catalog, after_dates, aoi)
593 |
594 | # convert provided coordinates into appropriate format for clipping xarray imagery
595 | xs = [x for x,y in aoi['coordinates'][0]]
596 | ys = [y for x,y in aoi['coordinates'][0]]
597 | bounds = [min(xs), min(ys), max(xs), max(ys)]
598 |
599 | # reduce the before and after image collections to a single image using median value per pixel
600 | before = before_data.median(dim="time")
601 | after = after_data.median(dim="time")
602 |
603 | # compute the result and load to local machine
604 | bef_clip = bef.rio.clip([aoi], crs).compute()
605 | aft_clip = aft.rio.clip([aoi], crs).compute()
606 |
607 | # This non-distributed method seems to be working but timing out
608 | # TODO: try changing chunk dimensions, try increasing timeout time of Webservice
609 | # bd, ad = dask.compute(bef_clip, aft_clip)
610 |
611 | # result_dict = wait([bef_clip, aft_clip], return_when = 'ALL_COMPLETED')
612 |
613 | # close our cluster
614 | client.close()
615 | cluster.shutdown()
616 | # return the before and after images as numpy arrays
617 | return bef_clip.data, aft_clip.data
618 |
619 | def run_local(aoi, dates, m, buff = 128, kernel = 256):
620 | """Retrieve Sentinel-2 imagery from Microsoft Planetary Computer and run change detection
621 | Arguments:
622 | aoi (dict): GeoJson like dictionary defining area of interest
623 | crs (int): 4-digit epsg code representing coordinate reference system of the aoi
624 | dates (tpl): Four YYYY-MM-DD strings defining the before and after periods
625 | m (keras.Model): model to be used to make predictions
626 | buff (int): buffer to strip from prediction patches
627 | kernel (int): size of side of prediction patches
628 | Return:
629 | numpy.ndarray: 3D array with per-pixel change probabilities
630 | """
631 | # extract before and after dates from input in format required by PC
632 | before_dates = f'{dates[0]}/{dates[1]}'
633 | after_dates = f'{dates[2]}/{dates[3]}'
634 |
635 | # get our before and after stacs
636 | print('retrieving s2 data')
637 | bef_stac, bef_transform = get_s2_stac(before_dates, aoi)
638 | aft_stac, aft_transform = get_s2_stac(after_dates, aoi) # these are projected rioxarrays
639 |
640 | # create median composites
641 | bef_median = bef_stac.median(dim="time")
642 | aft_median = aft_stac.median(dim="time")
643 |
644 | #normalize
645 | bef_norm = normalize_dataArray(bef_median, 'band')
646 | aft_norm = normalize_dataArray(aft_median, 'band')
647 |
648 | # concatenate
649 | ds = xr.concat([bef_norm, aft_norm], dim="band").assign_coords({'band':['B2', 'B3', 'B4', 'B8','B2_1', 'B3_1', 'B4_1', 'B8_1']})
650 |
651 | C,H,W = ds.shape
652 | print('data shape:', ds.shape) # from planetary computer this is C, H, W
653 | rearranged = ds.transpose('y','x','band')
654 | print('rearranged shape', rearranged.shape)
655 | indices = prediction_tools.generate_chip_indices(rearranged, buff, kernel)
656 | print(len(indices), 'indices generated')
657 | template = np.zeros((H, W))
658 | print('template shape:', template.shape)
659 | # print('generating chips')
660 | # chips, chip_indices = extract_chips(ds)
661 | # print(len(chip_indices), 'chips generated')
662 | dat = rearranged.values
663 | print('running predictions')
664 | output = predict_chips(dat, indices, template, m, kernel = kernel, buff = buff)
665 |
666 | # print(f'returning array of {output.shape}')
667 | return output, bef_median, aft_median, aft_transform
668 |
669 | def run_dask(model_blob_url, weights_blob_url, custom_objects, dates, aoi):
670 | # # create a dask cluster
671 | # print('spinning up Dask Cluster')
672 | # cluster = GatewayCluster(
673 | # address = "https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway",
674 | # proxy_address = "gateway://pccompute-dask.westeurope.cloudapp.azure.com:80",
675 | # auth = 'jupyterhub',
676 | # worker_cores = 4
677 | # )
678 |
679 | # client = cluster.get_client()
680 | # client.upload_file(f'{str(ROOT)}/model_tools.py', load = True)
681 |
682 | # # allow our dask cluster to adaptively scale from 2 to 24 nodes
683 | # cluster.adapt(minimum=4, maximum=24)
684 | # print('cluster created', cluster.dashboard_link)
685 |
686 | # extract before and after dates from input in format required by PC
687 | before_dates = f'{dates[0]}/{dates[1]}'
688 | after_dates = f'{dates[2]}/{dates[3]}'
689 |
690 | # get our before and after stacs
691 | print('retrieving s2 data')
692 | bef_stac = get_s2_stac(before_dates, aoi)
693 | aft_stac = get_s2_stac(after_dates, aoi) # these are projected rioxarrays
694 |
695 | # create median composites
696 | bef_median = bef_stac.median(dim="time")
697 | aft_median = aft_stac.median(dim="time")
698 |
699 | #normalize
700 | bef_norm = normalize_dataArray(bef_median, 'band')
701 | aft_norm = normalize_dataArray(aft_median, 'band')
702 |
703 | # concatenate
704 | ds = xr.concat([bef_norm, aft_norm], dim="band").assign_coords({'band':['B2', 'B3', 'B4', 'B8','B2_1', 'B3_1', 'B4_1', 'B8_1']})
705 |
706 | trimmed = trim_dataArray(ds, 256)
707 | chunked = trimmed.chunk({'x':256, 'y':256})
708 |
709 | print('running chunked predictions')
710 | meta = np.array([[]], dtype="float32")
711 | predictions_array = chunked.data.map_overlap(
712 | lambda x: predict_chunk(x, model_blob_url, weights_blob_url, custom_objects),
713 | depth = (0, 64, 64),
714 | boundary = 0,
715 | meta=meta,
716 | drop_axis=0
717 | )
718 |
719 | # predictions = predictions_array
720 |
721 | # # to restore spatial reference, cast back to Xarray
722 | # out = xr.DataArray(
723 | # predictions,
724 | # coords=trimmed.drop_vars("band").coords,
725 | # dims=("y", "x"),
726 | # )
727 |
728 | return(predictions_array)
729 |
730 |
731 | # def test_PC_connection():
732 | # """Test our ability to retrieve satellite imagery from Planetary Computer
733 |
734 | # Without any processing, return the first Sentinel-2 image from a date range at
735 | # a known location
736 | # """
737 | # # Creates the Dask Scheduler. Might take a minute.
738 | # cluster = GatewayCluster(
739 | # address = "https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway",
740 | # proxy_address = "gateway://pccompute-dask.westeurope.cloudapp.azure.com:80",
741 | # auth = 'jupyterhub',
742 | # worker_cores = 4
743 | # )
744 |
745 | # client = cluster.get_client()
746 |
747 | # # allow our dask cluster to adaptively scale from 2 to 24 nodes
748 | # cluster.adapt(minimum=2, maximum=24)
749 |
750 | # # define fixed start and end date for summer 2021
751 | # before_dates = '2021-05-01/2021-08-01'
752 |
753 | # # connect to the planetary computer catalog
754 | # catalog = pcClient.open("https://planetarycomputer.microsoft.com/api/stac/v1")
755 | # sentinel = catalog.get_child('sentinel-2-l2a')
756 |
757 | # search = catalog.search(
758 | # collections = ['sentinel-2-l2a'],
759 | # datetime=before_dates,
760 | # intersects=aoi
761 | # )
762 |
763 | # search_list = list(search_before.get_items())
764 |
765 | # least_cloudy = [item for item in search_list if item.properties['eo:cloud_cover'] <= 10]
766 |
767 | # items = [pc.sign_item(i).to_dict() for i in least_cloudy]
768 |
769 | # # sanity check to make sure we have retrieved and authenticated items fro planetary computer
770 | # ilen = len(items)
771 | # print(f'{ilen} images in collection')
772 |
773 | # # convert provided coordinates into appropriate format for clipping xarray imagery
774 | # bounds = [-76.503778, 38.988321, -76.530776, 38.988322]
775 |
776 | # # create an
777 | # data = (
778 | # stackstac.stack(
779 | # items[0],
780 | # epsg = 32617,
781 | # bounds_latlon = bounds,
782 | # sortby_date = 'desc',
783 | # # resolution=10,
784 | # assets=['B02', 'B03', 'B04', 'B08'], # blue, green, red, nir
785 | # # chunks is for parallel computing on Dask cluster, only refers to spatial dimension
786 | # chunksize= 'auto' # don't make smaller than native S2 tiles (100x100km)
787 | # )
788 | # .where(lambda x: x > 0, other=np.nan) # sentinel-2 uses 0 as nodata
789 | # .assign_coords(band = lambda x: x.common_name.rename("band")) # use common names
790 | # )
791 |
792 | # # reduce the before and after image collections to a single image using first valid pixel
793 | # before = data.mosaic(dim="time")
794 |
795 | # # assign the native sentinel-2 crs the resulting xarrays
796 | # bef = before.rio.set_crs(32617)
797 |
798 | # # compute the result and load to local machine
799 | # bef_local = bef.compute()
800 |
801 | # # This non-distributed method seems to be working but timing out
802 | # # TODO: try changing chunk dimensions, try increasing timeout time of Webservice
803 | # # bd, ad = dask.compute(bef_clip, aft_clip)
804 |
805 | # # result_dict = wait([bef_clip, aft_clip], return_when = 'ALL_COMPLETED')
806 |
807 | # # close our cluster
808 | # client.close()
809 | # cluster.shutdown()
810 | # # return the image as numpy arrays
811 | # return bef_local.data
812 |
813 |
--------------------------------------------------------------------------------
/utils/prediction_tools.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Dec 4 19:24:42 2020
4 |
5 | @author: MEvans
6 | """
7 | import os
8 | from os.path import join
9 | import sys
10 | from sys import path
11 | from pathlib import Path
12 |
13 | # import ee
14 | import json
15 | import numpy as np
16 | import tensorflow as tf
17 | from matplotlib import pyplot as plt
18 | #import gsutil
19 | import rasterio as rio
20 | from rasterio.crs import CRS
21 | from rasterio.warp import transform_bounds
22 | from rasterio.transform import array_bounds
23 |
24 | FILE = Path(__file__).resolve() # full path to the current file, including extension
25 | print('filepath', FILE)
26 | ROOT = FILE.parents[0] # list of upstream directories containing file
27 | print('root', ROOT)
28 | REL = Path(os.path.relpath(ROOT, Path.cwd()))
29 | print('rel', REL)
30 | if str(ROOT) not in sys.path:
31 | path.append(str(ROOT))
32 | if str(REL) not in sys.path:
33 | path.append(str(REL)) # add REL to PATH
34 |
35 | from processing import normalize_tensor, rescale_tensor
36 | # TODO: automate spliting of full GEE path
37 | # def doExport(image, features, scale, bucket, pred_base, pred_path, region, kernel_shape = [256, 256], kernel_buffer = [128,128]):
38 | # """
39 | # Run an image export task on which to run predictions. Block until complete.
40 | # Parameters:
41 | # image (ee.Image): image to be exported for prediction
42 | # features (list): list of band names to include in export
43 | # scale (int): pixel scale
44 | # bucket (str): name of GCS bucket to write files
45 | # pred_path (str): relative google cloud directory path for export
46 | # pred_base (str): base filename of exported image
47 | # kernel_shape (array): size of image patch in pixels
48 | # kernel_buffer (array): pixels to buffer the prediction patch. half added to each side
49 | # region (ee.Geometry):
50 | # """
51 | # task = ee.batch.Export.image.toCloudStorage(
52 | # image = image.select(features),
53 | # description = pred_base,
54 | # bucket = bucket,
55 | # fileNamePrefix = join(pred_path, pred_base),
56 | # region = region,#.getInfo()['coordinates'],
57 | # scale = scale,
58 | # fileFormat = 'TFRecord',
59 | # maxPixels = 1e13,
60 | # formatOptions = {
61 | # 'patchDimensions': kernel_shape,
62 | # 'kernelSize': kernel_buffer,
63 | # 'compressed': True,
64 | # 'maxFileSize': 104857600
65 | # }
66 | # )
67 | # task.start()
68 |
69 | # # Block until the task completes.
70 | # print('Running image export to Cloud Storage...')
71 | # import time
72 | # while task.active():
73 | # time.sleep(30)
74 |
75 | # # Error condition
76 | # if task.status()['state'] != 'COMPLETED':
77 | # print('Error with image export.')
78 | # else:
79 | # print('Image export completed.')
80 |
81 | # # Error condition
82 | # if task.status()['state'] != 'COMPLETED':
83 | # print('Error with image export.')
84 | # else:
85 | # print('Image export completed.')
86 |
87 | def generate_chip_indices(arr, buff = 128, kernel = 256):
88 | """
89 | Parameters
90 | ---
91 | arr: np.ndarray
92 | 3D array (H, W, C) for which indices should be generated
93 | buff: int
94 | size of pixels to be trimmed from chips
95 | kernel: int
96 | size of contiguous image chips
97 | Return
98 | ---
99 | list::np.ndarray: list containing (y,x) index of chips upper left corner
100 | """
101 | H, W, C = arr.shape
102 | side = buff + kernel
103 | x_buff = y_buff = buff//2
104 |
105 | y_indices = list(range(y_buff, H - side, kernel))
106 | x_indices = list(range(x_buff, W - side, kernel))
107 |
108 | indices = [(y_index, x_index) for y_index in y_indices for x_index in x_indices]
109 | return indices
110 |
111 | def extract_chips(arr, buff = 128, kernel = 256):
112 | """Break an array into (potentially) overlapping chips for analysis
113 | Arguments:
114 | arr (ndarray): 3D array to run predictions on
115 | buff (int): size of pixels to be trimmed from chips
116 | kernel (int): size of contiguous image chips
117 | Return:
118 | list::np.ndarray: list containing image chips of size (kernel+buff, kernel+buff)
119 | """
120 | H, W, C = arr.shape
121 | side = buff + kernel
122 | x_buff = y_buff = buff//2
123 | chips = []
124 |
125 | chip_indices = generate_chip_indices(arr, buff, kernel)
126 |
127 | for x, y in chip_indices:
128 | chip = arr[y-y_buff:y+kernel+y_buff, x-x_buff:x+kernel+x_buff, :]
129 | chips.append(chip)
130 |
131 | return chips
132 |
133 | def predict_chips(arr, chip_indices, template, m, kernel = 256, buff = 128):
134 | """Predict changes in image chips
135 | Arguments:
136 | chips (list): kernel+buff x kernel+buff pixel chips to be fed to U-Net model
137 | chip_indices (list): list of (y,x) tuples marking position of chip upper-left corner in output array
138 | m (keras.Model): model to be used to make predictions
139 | template (ndarray): 2D all-zero array to which predictions will be written
140 | buff (int): total number of pixels to be trimmed from output chips in x and y direction
141 | kernel (int): number of pixels in x and y retained in prediction chips
142 | Return:
143 | ndarray: 3D array of size output.shape containing change probabilities
144 | """
145 | y_buff = x_buff = buff//2
146 | if len(chip_indices) >= 1:
147 | for y, x in chip_indices:
148 | print(y,x)
149 | chip = arr[y - y_buff:y+kernel+y_buff, x - x_buff:x + kernel + x_buff, :]
150 | print(chip.shape)
151 | # preds = m.predict(np.array([chips[i]]), verbose = 0)
152 | preds = m.predict(np.array([chip]), verbose = 0)
153 | print(preds.shape)
154 | template[y:y+kernel, x:x+kernel] += preds[0, y_buff:(kernel + y_buff), x_buff:(kernel+x_buff), 0]
155 |
156 | return template
157 |
158 | #def makePredDataset(bucket, pred_path, pred_image_base, kernel_buffer, features, raw = None):
159 | def make_pred_dataset(file_list, features, kernel_shape = [256, 256], kernel_buffer = [128, 128], axes = [2], splits = None, moments = None, one_hot = None, **kwargs):
160 | """ Make a TFRecord Dataset that can be used for predictions
161 | Parameters:
162 | file_list: list of complete pathnames for prediction data files
163 | pred_path (str): path to .tfrecord files
164 | pred_image_base (str): pattern matching basename of file(s)
165 | kernel_shape (tpl): size of image patch in pixels
166 | kernel_buffer (tpl): pixels to trim from H, W dimensions of prediction
167 | features (list): names of features in incoming data
168 | axes (list): axes for normalization
169 | one_hot (dict): key:value pairs for name of one-hot variable and desired one-hot depth
170 | Return:
171 | TFRecord Dataset
172 | """
173 |
174 | # Make sure the files are in the right order.
175 | file_list.sort()
176 |
177 | # Get set up for prediction.
178 | x_buffer = int(kernel_buffer[0] / 2)
179 | y_buffer = int(kernel_buffer[1] / 2)
180 |
181 | buffered_shape = [
182 | kernel_shape[0] + kernel_buffer[0],
183 | kernel_shape[1] + kernel_buffer[1]]
184 |
185 | imageColumns = [
186 | tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32)
187 | for k in features
188 | ]
189 |
190 | imageFeaturesDict = dict(zip(features, imageColumns))
191 |
192 | def parse_image(example_proto):
193 | return tf.io.parse_single_example(example_proto, imageFeaturesDict)
194 |
195 | def toTupleImage(dic):
196 |
197 | # stack the augmented bands, optional one-hot tensors, and response variable
198 | if one_hot:
199 | featList = [dic.get(key) for key in features if key not in one_hot.keys()]
200 | hotList = [tf.one_hot(tf.cast(dic.get(key), tf.uint8), val, axis = 2) for key, val in one_hot.items()]
201 | else:
202 | featList = [dic.get(key) for key in features]
203 |
204 | bands = tf.transpose(tf.stack(featList, axis = 0), [1,2,0])
205 | bands = rescale_tensor(bands, axes = axes, moments = moments, splits = splits)
206 | # If custom preprocessing functions are specified add respective bands
207 |
208 | for fxn in kwargs.values():
209 | der = fxn(dic)
210 | der = tf.expand_dims(der, axis = 2)
211 | bands = tf.concat([bands, der], axis = 2)
212 |
213 | if one_hot:
214 | hotStack = tf.concat(hotList, axis = 2)
215 | stacked = tf.concat([bands, hotStack], axis =2)
216 | else:
217 | stacked = tf.concat([bands], axis = 2)
218 |
219 | return stacked
220 |
221 | # Create a dataset(s) from the TFRecord file(s) in Cloud Storage.
222 |
223 | imageDataset = tf.data.TFRecordDataset(file_list, compression_type='GZIP')
224 | imageDataset = imageDataset.map(parse_image, num_parallel_calls=5)
225 | imageDataset = imageDataset.map(toTupleImage).batch(1)
226 | return imageDataset
227 |
228 | def plot_to_image(figure):
229 | """Converts the matplotlib plot specified by 'figure' to a PNG image and
230 | returns it. The supplied figure is closed and inaccessible after this call."""
231 | # Save the plot to a PNG in memory.
232 | import io
233 | buf = io.BytesIO()
234 | plt.savefig(buf, format='png')
235 | # Closing the figure prevents it from being displayed directly inside
236 | # the notebook.
237 | plt.close(figure)
238 | buf.seek(0)
239 | # Convert PNG buffer to TF image
240 | image = tf.image.decode_png(buf.getvalue(), channels=4)
241 | # Add the batch dimension
242 | image = tf.expand_dims(image, 0)
243 | return image
244 |
245 | def callback_predictions(imageDataset, model, mixer, kernel_shape = [256, 256], kernel_buffer = [128, 128]):
246 | patches = mixer['totalPatches']
247 | cols = mixer['patchesPerRow']
248 | rows = patches//cols
249 |
250 | # Perform inference.
251 | predictions = model.predict(imageDataset, steps=patches, verbose=1)
252 |
253 | # some models will outputs probs (B, H, W, NCLASSES) and classes (B, H, W) as a list
254 | if type(predictions) == list:
255 | # in this case lets just grab the probabilities
256 | predictions = predictions[0]
257 |
258 | x_buffer = int(kernel_buffer[0] / 2)
259 | y_buffer = int(kernel_buffer[1] / 2)
260 | x_size = kernel_shape[0]+y_buffer
261 | y_size = kernel_shape[1]+x_buffer
262 |
263 | x = 1
264 | for prediction in predictions:
265 | print('Writing patch ' + str(x) + '...')
266 | # write probability of target class (i.e. 1), classes can be calculated post processing if not present already
267 | patch = prediction[y_buffer:y_size, x_buffer:x_size, 1]
268 | # predPatch = np.add(np.argmax(prediction, axis = 2), 1)
269 | # probPatch = np.max(prediction, axis = 2)
270 | # predPatch = predPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]
271 | # probPatch = probPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]
272 | # # stack probabilities and classes along channel dimension
273 | # patch = np.stack([predPatch, probPatch], axis = 2)
274 |
275 | ## NOTE: Predictions come out with y as 0 dimension (ie. rows), x as 1 dimension (ie. columns)
276 | # if we're at the beginning of a row
277 | if x%cols == 1:
278 | row = patch
279 | else:
280 | row = np.append(row, patch, axis = 1)
281 | # if we reached the end of a row start a new one
282 | if x%cols == 0:
283 | # for the first row, create single row rows object
284 | if x <= cols:
285 | rows = row
286 | else:
287 | # add current row to previous rows along y axis
288 | rows = np.append(rows, row, axis = 0)
289 | x += 1
290 |
291 | return rows
292 |
293 | def make_array_predictions(imageDataset, model, jsonFile, kernel_shape = [256, 256], kernel_buffer = [128,128]):
294 | """Create a 3D array of prediction outputs from TFRecord dataset
295 |
296 | Given a set of TFRecords representing image patches on which to run model predictions,
297 | and a json file specifying the spatial reference system and arrangement of patches,
298 | this function writes predictions to a single, reconstructed numpy array of shape
299 | (?,?,2). Dimension 2 holds class probabilities and most likely class.
300 |
301 | Parameters:
302 | imageDataset (tf.Dataset): image patch tensors on which to run predictions
303 | model (keras Model): model used to make predictions
304 | jsonFile (str): complete GCS filepath to json file
305 | kernel_size(tpl): size of image patch in pixels
306 | kernel_buffer (tpl): pixels to trim from H, W, dimensions of each output patch
307 | Return:
308 | ndarray: 3D array of prediction outputs.
309 | """
310 | # we need metadata from the json file to reconstruct prediction patches
311 | # Load the contents of the mixer file to a JSON object.
312 | # jsonFile = '/'.join(jsonFile.split(sep = '/')[3:])
313 | # blob = bucket.get_blob(jsonFile) #23Mar21 update to use google-cloud-storage library
314 | # jsonText = blob.download_as_string().decode('utf-8')
315 | # mixer = json.loads(jsonText)
316 |
317 | with open(jsonFile,) as file:
318 | mixer = json.load(file)
319 |
320 | # # Load the contents of the mixer file to a JSON object.
321 | # jsonText = !gsutil cat {jsonFile}
322 | #
323 | # # Get a single string w/ newlines from the IPython.utils.text.SList
324 | # mixer = json.loads(jsonText.nlstr)
325 |
326 | print(mixer)
327 | patches = mixer['totalPatches']
328 | cols = mixer['patchesPerRow']
329 | rows = patches//cols
330 |
331 | # Perform inference.
332 | print('Running predictions...')
333 | predictions = model.predict(imageDataset, steps=patches, verbose=1)
334 |
335 | # some models will outputs probs and classes as a list
336 | if type(predictions) == list:
337 | # in this case, concatenate list elments into a single 4d array along last dimension
338 | predictions = np.concatenate(predictions, axis = 3)
339 |
340 | x_buffer = int(kernel_buffer[0] / 2)
341 | y_buffer = int(kernel_buffer[1] / 2)
342 | x_size = kernel_shape[0]+y_buffer
343 | y_size = kernel_shape[1]+x_buffer
344 |
345 | x = 1
346 | for prediction in predictions:
347 | print('Writing patch ' + str(x) + '...')
348 | # lets just write probabilities, classes can be calculated post processing if not present already
349 | patch = prediction[y_buffer:y_size, x_buffer:x_size, :]
350 | # predPatch = np.add(np.argmax(prediction, axis = 2), 1)
351 | # probPatch = np.max(prediction, axis = 2)
352 | # predPatch = predPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]
353 | # probPatch = probPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]
354 | # # stack probabilities and classes along channel dimension
355 | # patch = np.stack([predPatch, probPatch], axis = 2)
356 |
357 | ## NOTE: Predictions come out with y as 0 dimension (ie. rows), x as 1 dimension (ie. columns)
358 | # if we're at the beginning of a row
359 | if x%cols == 1:
360 | row = patch
361 | else:
362 | row = np.append(row, patch, axis = 1)
363 | # if we reached the end of a row start a new one
364 | if x%cols == 0:
365 | # for the first row, create single row rows object
366 | if x <= cols:
367 | rows = row
368 | else:
369 | # add current row to previous rows along y axis
370 | rows = np.append(rows, row, axis = 0)
371 | x += 1
372 |
373 | return rows
374 |
375 | def write_tfrecord_predictions(predictions, pred_path, out_image_base, kernel_shape = [256, 256], kernel_buffer = [128,128]):
376 | """Generate predictions and save as TFRecords to Cloud Storage
377 | Parameters:
378 | imageDataset (tf.Dataset): data on which to run predictions
379 | pred_path (str): full path to output directory
380 | out_image_base (str): file basename for input and output files
381 | kernel_shape (tpl): [y, x] size of image patch in pixels
382 | kernel_buffer (tpl): [y, x] size of buffer to be trimmed from predictions
383 |
384 | Return:
385 | empty: Writes TFRecord files to specified destination
386 | """
387 | # Perform inference.
388 | # print('Running predictions...')
389 | # predictions = model.predict(imageDataset, steps=None, verbose=1)
390 | # print(predictions[0])
391 |
392 | # some models will outputs probs and classes as a list
393 | if type(predictions) == list:
394 | # in this case, concatenate list elments into a single 4d array along last dimension
395 | predictions = np.concatenate(predictions, axis = 3)
396 |
397 | # get the number of bands (should usually be one or two)
398 | C = predictions.shape[-1]
399 |
400 | out_image_file = join(pred_path, f'{out_image_base}.tfrecords')
401 |
402 | print('Writing predictions to ' + out_image_file + '...')
403 | writer = tf.io.TFRecordWriter(out_image_file)
404 |
405 | patches = 1
406 |
407 | x_buffer = int(kernel_buffer[0] / 2)
408 | y_buffer = int(kernel_buffer[1] / 2)
409 | x_size = x_buffer + kernel_shape[1]
410 | y_size = y_buffer + kernel_shape[0]
411 |
412 | for prediction in predictions:
413 | print('Writing patch ' + str(patches) + '...')
414 | # lets just write probabilities, classes can be calculated post processing if not present already
415 | patch = prediction[y_buffer:y_size, x_buffer:x_size, :]
416 | # predPatch = np.add(np.argmax(prediction, axis = 2), 1)
417 | # probPatch = np.max(prediction, axis = 2)
418 | # predPatch = predPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]
419 | # probPatch = probPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]
420 |
421 | # for each band in prediction, create a tf train feature
422 | feature = {}
423 | for i in range(C):
424 | feat = tf.train.Feature(float_list = tf.train.FloatList(value = np.ndarray.flatten(patch[:,:,i])))
425 | feature['b{}'.format(i+1)] = feat
426 |
427 | # Create an example.
428 | example = tf.train.Example(
429 | features=tf.train.Features(
430 | feature = feature
431 | # feature={
432 | # 'class': tf.train.Feature(
433 | # int64_list=tf.train.Int64List(
434 | # value = np.ndarray.flatten(predPatch))),
435 | # 'prob': tf.train.Feature(
436 | # float_list = tf.train.FloatList(
437 | # value = np.ndarray.flatten(probPatch)))
438 | # }
439 | )
440 | )
441 | # Write the example.
442 | writer.write(example.SerializeToString())
443 | patches += 1
444 |
445 | writer.close()
446 |
447 | def write_geotiff_prediction(image, jsonFile, aoi):
448 | with open(jsonFile,) as file:
449 | mixer = json.load(file)
450 |
451 | transform = mixer['projection']['affine']['doubleMatrix']
452 | crs = mixer['projection']['crs']
453 | ppr = mixer['patchesPerRow']
454 | tp = mixer['totalPatches']
455 | rows = int(tp/ppr)
456 |
457 | if image.ndim < 3:
458 | image = np.expand_dims(image, axis = -1)
459 |
460 | affine = rio.Affine(transform[0], transform[1], transform[2], transform[3], transform[4], transform[5])
461 |
462 | with rio.open(
463 | f'{aoi}.tif',
464 | 'w',
465 | driver = 'GTiff',
466 | width = image.shape[1],
467 | height = image.shape[0],
468 | count = image.shape[2],
469 | dtype = image.dtype,
470 | crs = crs,
471 | transform = affine) as dst:
472 | dst.write(np.transpose(image, (2,0,1)))
473 |
474 | # TODO: re-calculate n and write files not strictly based on rows
475 | def write_geotiff_predictions(imageDataset, model, jsonFile, outImgBase, outImgPath, kernel_buffer = [128,128]):
476 | """Run predictions on a TFRecord dataset and save as a GeoTIFF
477 | Parameters:
478 | imageDataset (tf.Dataset): data on which to run predictions
479 | model (tf.keras.Model): trained model
480 | jsonFile (str): filename of json mixer file
481 | outImgPath (str): directory in which to write predictions
482 | outImgBase (str): file basename
483 | kernel_buffer (tpl): x and y padding around patches
484 | Return:
485 | empty: writes geotiff records temporarily to working directory
486 | """
487 | with open(jsonFile, ) as file:
488 | mixer = json.load(file)
489 | transform = mixer['projection']['affine']['doubleMatrix']
490 | crs = mixer['projection']['crs']
491 | ppr = mixer['patchesPerRow']
492 | tp = mixer['totalPatches']
493 | rows = int(tp/ppr)
494 | kernel_shape = mixer['patchDimensions']
495 |
496 | H = rows*kernel_shape[0]
497 | W = ppr*kernel_shape[1]
498 | y_indices = list(range(0, H, kernel_shape[0]))
499 | x_indices = list(range(0, W, kernel_shape[1]))
500 | indices = [(y,x) for y in y_indices for x in x_indices]
501 | out_array = np.zeros((H, W, 1), dtype = np.float32)
502 | print('out array', out_array.shape)
503 | x_buffer = int(kernel_buffer[0]/2)
504 | y_buffer = int(kernel_buffer[1]/2)
505 | x_size = x_buffer + kernel_shape[1]
506 | y_size = y_buffer + kernel_shape[0]
507 |
508 | # prediction = model.predict(imageDataset, steps = tp, verbose = 1)
509 | # if type(predictions) == list:
510 | # predictions = np.concatenate(predictions, axis = 3)
511 |
512 | iterator = iter(imageDataset)
513 |
514 | for i, (y,x) in enumerate (indices):
515 | prediction = model.predict(iterator.next(), steps = 1, verbose = 1)
516 | if type(prediction) == list:
517 | prediction = np.concatenate(prediction, axis = 3)
518 | # prediction = predictions[i]
519 | print('prediction', prediction.shape)
520 | out_array[y:y+kernel_shape[0], x:x+kernel_shape[1], 0] += prediction[0, y_buffer:y_size, x_buffer:x_size, 0]
521 |
522 | affine = rio.Affine(transform[0], transform[1], transform[2], transform[3], transform[4], transform[5])
523 |
524 | out_image_file = join(outImgPath, f'{outImgBase}.tif')
525 | print(f'writing image to {out_image_file}')
526 | with rio.open(
527 | out_image_file,
528 | 'w',
529 | driver = 'GTiff',
530 | width = W,
531 | height = H,
532 | count = 1,
533 | dtype = out_array.dtype,
534 | crs = crs,
535 | transform = affine) as dst:
536 | dst.write(np.transpose(out_array, (2,0,1)))
537 |
538 | #def ingest_predictions(pred_path, out_image_base, user_folder):
539 | # """
540 | # Upload prediction image(s) to Earth Engine.
541 | # Parameters:
542 | # pred_path (str): Google cloud (or Drive) path storing prediction image files
543 | # pred_image_base (str):
544 | # user_folder (str): GEE directory to store asset
545 | # out_image_base (str): base filename for GEE asset
546 | # """
547 | # blob = bucket.get_blob(join(pred_path, out_image_base + '_mixer.json'))
548 | # jsonFile = blob.name
549 | #
550 | ## jsonFile = !gsutil ls {join('gs://', pred_path, out_image_base + '*.json')}
551 | # print(jsonFile)
552 | # blobs = bucket.list_blobs(join(pred_path, 'outputs', out_image_base + ))
553 | # predFiles = !gsutil ls {join('gs://', pred_path, 'outputs', out_image_base + '*TFRecord')}
554 | # print(predFiles)
555 | # out_image_files = ' '.join(predFiles)
556 | # # Start the upload.
557 | # out_image_asset = join(user_folder, out_image_base)
558 | # !earthengine upload image --asset_id={out_image_asset} {out_image_files} {jsonFile[0]}
559 |
560 | def get_img_bounds(img, jsonFile, dst_crs = None):
561 | """Get the projected top left and bottom right coordinates of an image
562 | Parameters:
563 | img (ndarray): image to generate bounding coordinates for
564 | jsonFile (str): path to json file defining crs and image size
565 | dst_crs (str): epsg code for output crs
566 | Return:
567 | tpl: [[lat min, lon min],[lat max, lon max]]
568 | """
569 | # Get a single string w/ newlines from the IPython.utils.text.SList
570 | with open(jsonFile,) as f:
571 | mixer = json.load(f)
572 | # mixer = json.loads(jsonText.nlstr)
573 | transform = mixer['projection']['affine']['doubleMatrix']
574 | print(transform)
575 | src_crs = CRS.from_string(mixer['projection']['crs'])
576 | print(src_crs)
577 | affine = rio.Affine(transform[0], transform[1], transform[2], transform[3], transform[4], transform[5])
578 | H,W = [0,0]
579 |
580 | if type(img) == np.ndarray:
581 | print('input image is numpy')
582 | H,W = img.shape
583 | print('image shape is ', H, W)
584 | bounds = array_bounds(H, W, affine)
585 |
586 | elif type(img) == str:
587 | print('input image is geotiff')
588 | with rio.open(img) as src:
589 | bounds = src.bounds
590 | # H, W = src.shape
591 |
592 | print(bounds)
593 | lon_min, lat_min, lon_max, lat_max = bounds
594 | # if we need to transform the bounds, such as for folium ('EPSG:3857')
595 | if dst_crs:
596 | dst_crs = CRS.from_string(dst_crs)
597 | out_bounds = transform_bounds(src_crs, dst_crs, left = lon_min, bottom = lat_min, right = lon_max, top = lat_max, densify_pts=21)
598 | lon_min, lat_min, lon_max, lat_max = out_bounds
599 | print(out_bounds)
600 | return [[lat_min, lon_min], [lat_max, lon_max]]
601 |
602 | def doPrediction(bucket, pred_path, pred_image_base, features, one_hot, out_image_base, kernel_shape, kernel_buffer):
603 | """
604 | Given a bucket and path to prediction images, create a prediction dataset, make predictions
605 | and write tfrecords to GCS
606 | Parameters:
607 | bucket: (Bucket): google-cloud-storage bucket object
608 | pred_path (str): relative GCS path storing prediction image files
609 | pred_image_base (str): base filename of prediction files
610 | user_folder (str): GEE directory to store asset
611 | out_image_base (str): base filename for GEE asset
612 | kernel_buffer (Array): length 2 array
613 | Return:
614 | list: list of written image filenames to be used in earthengine upload
615 | """
616 |
617 | print('Looking for TFRecord files...')
618 |
619 | # Get a list of all the files in the output bucket.
620 | blobs = bucket.list_blobs(prefix = join(pred_path, pred_image_base))
621 | filesList = [file.name for file in blobs if pred_image_base in file.name]
622 | # filesList = !gsutil ls {pred_path}
623 | # Get only the files generated by the image export.
624 | # exportFilesList = [s for s in filesList if pred_image_base in s]
625 |
626 | # Get the list of image files and the JSON mixer file.
627 | imageFilesList = []
628 | jsonFile = None
629 | for f in filesList:
630 | if f.endswith('.tfrecord.gz'):
631 | imageFilesList.append(f)
632 | elif f.endswith('.json'):
633 | jsonFile = f
634 |
635 | # Make sure the files are in the right order.
636 | imageFilesList.sort()
637 |
638 | from pprint import pprint
639 | pprint('image files:', imageFilesList)
640 | print('json file:', jsonFile)
641 |
642 | # make a prediction dataset from the given files
643 |
644 | # Load the contents of the mixer file to a JSON object.
645 | blob = bucket.get_blob(jsonFile)
646 | jsonText = blob.download_as_string().decode('utf-8')
647 | mixer = json.loads(jsonText)
648 | # jsonText = !gsutil cat {jsonFile}
649 | # Get a single string w/ newlines from the IPython.utils.text.SList
650 | # mixer = json.loads(jsonText.nlstr)
651 | pprint(mixer)
652 | patches = mixer['totalPatches']
653 |
654 | # # Get set up for prediction.
655 | # x_buffer = int(kernel_buffer[0] / 2)
656 | # y_buffer = int(kernel_buffer[1] / 2)
657 | #
658 | # buffered_shape = [
659 | # KERNEL_SHAPE[0] + kernel_buffer[0],
660 | # KERNEL_SHAPE[1] + kernel_buffer[1]]
661 | #
662 | # imageColumns = [
663 | # tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32)
664 | # for k in BANDS
665 | # ]
666 | #
667 | # imageFeaturesDict = dict(zip(BANDS, imageColumns))
668 | #
669 | # def parse_image(example_proto):
670 | # return tf.io.parse_single_example(example_proto, imageFeaturesDict)
671 | #
672 | # def toTupleImage(dic):
673 | # inputsList = [dic.get(key) for key in BANDS]
674 | # stacked = tf.stack(inputsList, axis=0)
675 | # stacked = tf.transpose(stacked, [1, 2, 0])
676 | # stacked = normalize(stacked, [0, 1])
677 | # return stacked
678 |
679 | # Create a dataset(s) from the TFRecord file(s) in Cloud Storage.
680 | i = 0
681 | patches = 0
682 | written_files = []
683 | while i < len(imageFilesList):
684 | imageDataset = make_pred_dataset(file_list = imageFilesList[i:i+100], kernel_shape = kernel_shape, kernel_buffer = kernel_buffer, features = features, one_hot = one_hot)
685 | # imageDataset = tf.data.TFRecordDataset(imageFilesList[i:i+100], compression_type='GZIP')
686 | # imageDataset = imageDataset.map(parse_image, num_parallel_calls=5)
687 | # imageDataset = imageDataset.map(toTupleImage).batch(1)
688 |
689 | out_image_base = out_image_base + '{:04d}'.format(i)
690 | out_image_file = join('gs://', bucket.name, pred_path, 'outputs/tfrecord', out_image_base + '.TFRecord')
691 | write_tfrecord_predictions(imageDataset, pred_path = pred_path, out_image_base = out_image_base, kernel_buffer = kernel_buffer)
692 | # # Perform inference.
693 | # print('Running predictions...')
694 | # predictions = m.predict(imageDataset, steps=None, verbose=1)
695 | # # print(predictions[0])
696 | #
697 | #
698 | #
699 | # print('Writing predictions to ' + out_image_file + '...')
700 | # writer = tf.io.TFRecordWriter(out_image_file)
701 | # for predictionPatch in predictions:
702 | # print('Writing patch ' + str(patches) + '...')
703 | # predictionPatch = predictionPatch[
704 | # x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]
705 | #
706 | # # Create an example.
707 | # example = tf.train.Example(
708 | # features=tf.train.Features(
709 | # feature={
710 | # 'probability': tf.train.Feature(
711 | # float_list=tf.train.FloatList(
712 | # value=predictionPatch.flatten()))
713 | # }
714 | # )
715 | # )
716 | # # Write the example.
717 | # writer.write(example.SerializeToString())
718 | # patches += 1
719 | #
720 | # writer.close()
721 | i += 100
722 | written_files.append(out_image_file)
723 |
724 | out_image_files = ' '.join(written_files)
725 | # Start the upload.
726 | # out_image_asset = join(user_folder, out_image_base)
727 | # !earthengine upload image --asset_id={out_image_asset} {out_image_files} {jsonFile}
728 | # return list of written image files for use in gee upload
729 | return out_image_files
730 |
731 | def predict_pc_local(aoi, dates, m, buff = 128, kernel = 256):
732 | """Retrieve Sentinel-2 imagery from Microsoft Planetary Computer and run change detection
733 | Arguments:
734 | aoi (dict): GeoJson like dictionary defining area of interest
735 | crs (int): 4-digit epsg code representing coordinate reference system of the aoi
736 | dates (tpl): Four YYYY-MM-DD strings defining the before and after periods
737 | m (keras.Model): model to be used to make predictions
738 | buff (int): buffer to strip from prediction patches
739 | kernel (int): size of side of prediction patches
740 | Return:
741 | numpy.ndarray: 3D array with per-pixel change probabilities
742 | """
743 | # extract before and after dates from input in format required by PC
744 | before_dates = f'{dates[0]}/{dates[1]}'
745 | after_dates = f'{dates[2]}/{dates[3]}'
746 |
747 | # get our before and after stacs
748 | print('retrieving s2 data')
749 | bef_stac, bef_transform = get_s2_stac(before_dates, aoi)
750 | aft_stac, aft_transform = get_s2_stac(after_dates, aoi) # these are projected rioxarrays
751 |
752 | # create median composites
753 | bef_median = bef_stac.median(dim="time")
754 | aft_median = aft_stac.median(dim="time")
755 |
756 | #normalize
757 | bef_norm = normalize_dataArray(bef_median, 'band')
758 | aft_norm = normalize_dataArray(aft_median, 'band')
759 |
760 | # concatenate
761 | ds = xr.concat([bef_norm, aft_norm], dim="band").assign_coords({'band':['B2', 'B3', 'B4', 'B8','B2_1', 'B3_1', 'B4_1', 'B8_1']})
762 |
763 | C,H,W = ds.shape
764 | print('data shape:', ds.shape) # from planetary computer this is C, H, W
765 | rearranged = ds.transpose('y','x','band')
766 | print('rearranged shape', rearranged.shape)
767 | indices = prediction_tools.generate_chip_indices(rearranged, buff, kernel)
768 | print(len(indices), 'indices generated')
769 | template = np.zeros((H, W))
770 | print('template shape:', template.shape)
771 | # print('generating chips')
772 | # chips, chip_indices = extract_chips(ds)
773 | # print(len(chip_indices), 'chips generated')
774 | dat = rearranged.values
775 | print('running predictions')
776 | output = predict_chips(dat, indices, template, m, kernel = kernel, buff = buff)
777 |
778 | # print(f'returning array of {output.shape}')
779 | return output, bef_median, aft_median, aft_transform
780 |
781 | def predict_pc_dask(model_blob_url, weights_blob_url, custom_objects, dates, aoi):
782 | # # create a dask cluster
783 | # print('spinning up Dask Cluster')
784 | # cluster = GatewayCluster(
785 | # address = "https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway",
786 | # proxy_address = "gateway://pccompute-dask.westeurope.cloudapp.azure.com:80",
787 | # auth = 'jupyterhub',
788 | # worker_cores = 4
789 | # )
790 |
791 | # client = cluster.get_client()
792 | # client.upload_file(f'{str(ROOT)}/model_tools.py', load = True)
793 |
794 | # # allow our dask cluster to adaptively scale from 2 to 24 nodes
795 | # cluster.adapt(minimum=4, maximum=24)
796 | # print('cluster created', cluster.dashboard_link)
797 |
798 | # extract before and after dates from input in format required by PC
799 | before_dates = f'{dates[0]}/{dates[1]}'
800 | after_dates = f'{dates[2]}/{dates[3]}'
801 |
802 | # get our before and after stacs
803 | print('retrieving s2 data')
804 | bef_stac = get_s2_stac(before_dates, aoi)
805 | aft_stac = get_s2_stac(after_dates, aoi) # these are projected rioxarrays
806 |
807 | # create median composites
808 | bef_median = bef_stac.median(dim="time")
809 | aft_median = aft_stac.median(dim="time")
810 |
811 | #normalize
812 | bef_norm = normalize_dataArray(bef_median, 'band')
813 | aft_norm = normalize_dataArray(aft_median, 'band')
814 |
815 | # concatenate
816 | ds = xr.concat([bef_norm, aft_norm], dim="band").assign_coords({'band':['B2', 'B3', 'B4', 'B8','B2_1', 'B3_1', 'B4_1', 'B8_1']})
817 |
818 | trimmed = trim_dataArray(ds, 256)
819 | chunked = trimmed.chunk({'x':256, 'y':256})
820 |
821 | print('running chunked predictions')
822 | meta = np.array([[]], dtype="float32")
823 | predictions_array = chunked.data.map_overlap(
824 | lambda x: predict_chunk(x, model_blob_url, weights_blob_url, custom_objects),
825 | depth = (0, 64, 64),
826 | boundary = 0,
827 | meta=meta,
828 | drop_axis=0
829 | )
830 |
831 | # predictions = predictions_array
832 |
833 | # # to restore spatial reference, cast back to Xarray
834 | # out = xr.DataArray(
835 | # predictions,
836 | # coords=trimmed.drop_vars("band").coords,
837 | # dims=("y", "x"),
838 | # )
839 |
840 | return(predictions_array)
841 |
--------------------------------------------------------------------------------
/utils/processing.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Fri Mar 20 10:50:44 2020
3 |
4 | @author: MEvans
5 | """
6 | import tensorflow as tf
7 | import numpy as np
8 | import math
9 | import os
10 | import copy
11 | import sys
12 | import requests
13 | import io
14 | from random import shuffle, randint, uniform
15 | from pathlib import Path
16 |
17 | FILE = Path(__file__).resolve()
18 | ROOT = FILE.parents[0]
19 | DIR = Path(os.path.relpath(ROOT, Path.cwd()))
20 |
21 | if str(DIR) not in sys.path:
22 | sys.path.append(str(DIR))
23 |
24 | import array_tools
25 |
26 | def get_file_id(f:str, delim:str = '_', parts:slice = slice(3,5), flag=False):
27 | """Return a unique identifyier from a file name
28 |
29 | Params
30 | ---
31 | f: str
32 | file basename
33 | delim: str
34 | delimiter optionally splitting filename into parts
35 | parts: slice
36 | slice identifying the parts to return
37 |
38 | Returns
39 | ---
40 | tuple: tuple of filename pieces
41 | """
42 | stem = str(Path(f).stem)
43 | splits = stem.split(delim)
44 | ids = splits[parts]
45 | return tuple(ids)
46 |
47 | def match_files(urls, vars, delim:str = '_', parts:slice = slice(3,5), subset: set = None, flatdirectory:bool = False):
48 | """Align files by unique id among variables
49 | Params
50 | ---
51 | urls: list:str
52 | unordered list of all filepaths to be sorted and aligned by variable
53 | vars: dict
54 | key, value pairs with variable names as keys (e.g., 'naip'). value = None will skip that variable
55 | delim: str
56 | delimiter optionally splitting filename into parts
57 | parts: slice
58 | slice identifying the parts to return
59 | subset: set
60 | optional. unique ids with which to further subset the returned files
61 |
62 | Returns
63 | ---
64 | dict: key, value pairs for each valid key in vars. variable names are key (e.g. 'naip') and values are corresponding list of files
65 | """
66 |
67 | #print(len(subset))
68 | vars_copy = copy.deepcopy(vars)
69 |
70 | if flatdirectory:
71 | files_dic = {key:[url for url in urls if f'_{key}_' in url] for key in vars_copy.keys() if vars_copy[key]['files'] is not None}
72 | else:
73 | files_dic = {key:[url for url in urls if f'/{key}/' in url] for key in vars_copy.keys() if vars_copy[key]['files'] is not None}
74 |
75 | ids = [set([get_file_id(f, delim, parts) for f in files]) for files in files_dic.values()] # list of sets per var
76 |
77 | intersection = set.intersection(*ids)
78 |
79 | if subset:
80 | intx = intersection.intersection(subset)
81 | else:
82 | intx = intersection
83 |
84 | for var, ls in files_dic.items():
85 | subset = [f for f in ls if get_file_id(f, delim, parts) in intx]
86 | subset.sort()
87 | vars_copy[var].update({"files": subset})
88 |
89 | return vars_copy
90 |
91 | def split_files(files, labels = ['label', 'lu', 'naip', 'lidar', 's2'], delim = '_', parts = slice(3,5)):
92 | """Divide list of .npy arrays into separate lists by source data (e.g. NAIP, S2, etc.)
93 |
94 | Params
95 | ---
96 | files: list(str)
97 | list of files to be split
98 | labels: list(str)
99 | list of prefixes identifying subsets of files to return
100 |
101 | Return
102 | ---
103 | list, list, list: tuple of lists per file subset
104 | """
105 | def get_file_id(f, parts):
106 | stem = str(Path(f).stem)
107 | splits = stem.split(delim)
108 | ids = splits[parts]
109 | return tuple(ids)
110 |
111 | indices = [set([get_file_id(f, parts) for f in files if label in Path(f).parts]) for label in labels]
112 | intersection = set.intersection(*indices)
113 | out_files = [[f for f in files if label in Path(f).parts and get_file_id(f, parts) in intersection] for label in labels]
114 | return out_files
115 |
116 | def calc_ndvi(input):
117 | """Caclulate NDVI from Sentinel-2 data
118 | Parameters:
119 | input (dict): dictionary of incoming tensors
120 | Returns:
121 | tensor
122 | """
123 | epsilon = 1e-8
124 | nir = input.get('B8')
125 | red = input.get('B4')
126 | ndvi = tf.divide(tf.subtract(nir, red), tf.add(epsilon, tf.add(nir,red)))
127 | return ndvi
128 |
129 | def aug_tensor_color(img):
130 | n_ch = tf.shape(img)[-1]
131 | contra_adj = 0.05
132 | bright_adj = 0.05
133 |
134 | ch_mean = tf.math.reduce_mean(img, axis = (0,1), keepdims = True)
135 | #ch_mean = np.mean(img, axis=(0, 1), keepdims=True).astype(np.float32)
136 |
137 | contra_mul = tf.random.uniform(shape = (1, 1, n_ch),
138 | minval = 1-contra_adj,
139 | maxval = 1+contra_adj)
140 | # contra_mul = np.random.uniform(1 - contra_adj, 1 + contra_adj, (1, 1, n_ch)).astype(
141 | # np.float32
142 | # )
143 |
144 | bright_mul = tf.random.uniform(shape = (1, 1, n_ch),
145 | minval = 1 - bright_adj,
146 | maxval = 1+bright_adj)
147 | # bright_mul = np.random.uniform(1 - bright_adj, 1 + bright_adj, (1, 1, n_ch)).astype(
148 | # np.float32
149 | # )
150 |
151 | recolored = (img - ch_mean) * contra_mul + ch_mean * bright_mul
152 | return recolored
153 |
154 | def augColor(x, contra_adj = 0.05, bright_adj = 0.05):
155 | """Color augmentation
156 |
157 | Args:
158 | x: Image
159 |
160 | Returns:
161 | Augmented image
162 | """
163 | x = tf.image.random_hue(x, 0.05)
164 | x = tf.image.random_saturation(x, 0.6, 1.6)
165 | x = tf.image.random_brightness(x, 0.05)
166 | x = tf.image.random_contrast(x, 0.7, 1.3)
167 | return x
168 |
169 | def aug_tensor_morph(img):
170 | """
171 | Perform image augmentation on tfRecords
172 | Parameters:
173 | img (TFRecord): 4D tensor
174 | Returns:
175 | 3D tensor:
176 | """
177 | outDims = tf.shape(img)[0:1]
178 | x = tf.image.random_flip_left_right(img)
179 | x = tf.image.random_flip_up_down(x)
180 | x = tf.image.rot90(x, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))
181 | #x = zoom(x, outDims)
182 | #since were gonna map_fn this on a 4d image, output must be 3d, so squeeze the artificial 'sample' dimension
183 | return tf.squeeze(x)
184 |
185 | def normalize_timeseries(arr, maxval = 10000, axis = -1, e = 0.00001):
186 | # normalize band values across timesteps
187 | normalized = arr/maxval
188 | # mn = np.nanmean(arr, axis = axis, keepdims = True)
189 | # std = np.nanstd(arr, axis = axis, keepdims = True)
190 | # normalized = (arr - mn)/(std+e)
191 | # replace nans with zeros?
192 | finite = np.where(np.isnan(normalized), 0.0, normalized)
193 | return finite
194 |
195 | def rearrange_timeseries(arr, nbands, time_dim = 1):
196 | # the number of time steps is in the 1st dimension if our data is (B, T, H, W, C)
197 | timesteps = arr.shape[time_dim]
198 | # randomly pick one of the timesteps as the starting time
199 | starttime = randint(0, timesteps-1)
200 | # print('start', starttime)
201 | # grab all timesteps leading up to the timestep corresponding to our random first
202 | last = arr[:,0:starttime,:,:,:]
203 | # print('last shape', last.shape)
204 | first = arr[:,starttime:timesteps,:,:,:]
205 | # print('start shape', first.shape)
206 | rearranged = np.concatenate([first, last], axis = 1)
207 | rearranged.shape == arr.shape
208 |
209 | feats = rearranged[:,0:-1,:,:,:]
210 | labels = rearranged[:,-1,:,:,0:nbands]
211 |
212 | # confirm there are no all-nan images in labels
213 | batch_sums = np.sum(labels, axis = (1,2,3))
214 | if 0.0 in batch_sums:
215 | print('all nan labels, reshuffling')
216 | feats, labels, starttime = rearrange_timeseries(arr, nbands)
217 |
218 | return(feats, labels, starttime)
219 |
220 | def sin_cos(t:int, freq:int = 6) -> tuple:
221 | x = t/freq
222 | theta = 2*math.pi * x
223 | return (math.sin(theta), math.cos(theta))
224 |
225 | def normalize_tensor(x, axes=[2], epsilon=1e-8, moments = None, splits = None):
226 | """
227 | Standardize incoming image patches by mean and variance.
228 |
229 | Moments can be calculated based on patch data by providing axes:
230 | To standardize each pixel use axes = [2]
231 | To standardize each channel use axes = [0, 1]
232 | To standardize globally use axes = [0, 1, 2]
233 |
234 | To standardize by global, or per-channel moments supply a list of [mean, variance] tuples.
235 | To standardize groups of channels separately, identify the size of each group. Groups of
236 | channels must be stacked contiguously and group sizes must sum to the total # of channels
237 |
238 | Parameters:
239 | x (tensor): nD image tensor
240 | axes (array): Array of ints. Axes along which to compute mean and variance, usually length n-1
241 | epsilon (float): small number to avoid dividing by zero
242 | moments (list): list of global mean, variance tuples for standardization
243 | splits (list): size(s) of groups of features to be kept together
244 | Return:
245 | tensor: nD image tensor normalized by channels
246 | """
247 |
248 | # define a basic function to normalize a 3d tensor
249 | def normalize(x):
250 | # shape = tf.shape(x).numpy()
251 | # if we've defined global or per-channel moments...
252 | if moments:
253 | # cast moments to arrays for mean and variance
254 | mean = np.array([tpl[0] for tpl in moments], dtype = 'float32')
255 | variance = np.array([tpl[1] for tpl in moments], dtype = 'float32')
256 | # otherwise, calculate moments along provided axes
257 | else:
258 | mean, variance = tf.nn.moments(x, axes, keepdims = True)
259 | # keepdims = True to ensure compatibility with input tensor
260 |
261 | # normalize the input tensor
262 | normed = (x - mean)/tf.sqrt(variance + epsilon)
263 | return normed
264 |
265 |
266 | # if splits are given, apply tensor normalization to each split
267 | if splits:
268 | splitLen = sum(splits)
269 | toNorm = x[:,:,0:splitLen]
270 | dontNorm = x[:,:,splitLen:]
271 | tensors = tf.split(toNorm, splits, axis = 2)
272 | normed = [normalize(tensor) for tensor in tensors]
273 | normed.append(dontNorm)
274 | # gather normalized splits into single tensor
275 | x_normed = tf.concat(normed, axis = 2)
276 | else:
277 | x_normed = normalize(x)
278 |
279 | return x_normed
280 |
281 | def rescale_tensor(img, axes = [2], epsilon=1e-8, moments = None, splits = None):
282 | """
283 | Rescale incoming image patch to [0,1] based on min and max values
284 |
285 | Min, max can be calculated based on patch data by providing axes:
286 | To rescale each pixel use axes = [2]
287 | To rescale each channel use axes = [0, 1]
288 | To rescale globally use axes = [0, 1, 2]
289 |
290 | To rescale by global, or per-channel moments supply a list of [mean, variance] tuples.
291 | To rescale groups of channels separately, identify the size of each group. Groups of
292 | channels must be stacked contiguously and group sizes must sum to the total # of channels
293 |
294 | Args:
295 | img (tensor): 3D (H,W,C) image tensor
296 | axes (list): axes along which to calculate min/max for rescaling
297 | moments (list): list of [min, max] tuples for standardization
298 | splits (list): size(s) of groups of features to be kept together
299 | Return:
300 | tensor: 3D tensor of same shape as input, with values [0,1]
301 | """
302 | def rescale(img):
303 | if moments:
304 | minimum = np.array([tpl[0] for tpl in moments], dtype = 'float32')
305 | maximum = np.array([tpl[1] for tpl in moments], dtype = 'float32')
306 | else:
307 | minimum = tf.math.reduce_min(img, axis = axes, keepdims = True)
308 | maximum = tf.math.reduce_max(img, axis = axes, keepdims = True)
309 | scaled = (img - minimum)/((maximum - minimum) + epsilon)
310 | # scaled = tf.divide(tf.subtract(img, minimum), tf.add(tf.subtract(maximum, minimum))
311 | return scaled
312 |
313 | # if splits are given, apply tensor normalization to each split
314 | if splits:
315 | tensors = tf.split(img, splits, axis = 2)
316 | rescaled = [rescale(tensor) for tensor in tensors]
317 | # gather normalized splits into single tensor
318 | img_rescaled = tf.concat(rescaled, axis = 2)
319 | else:
320 | img_rescaled = rescale(img)
321 |
322 | return img_rescaled
323 |
324 | #def parse_tfrecord(example_proto, ftDict):
325 | # """The parsing function.
326 | # Read a serialized example into the structure defined by FEATURES_DICT.
327 | # Args:
328 | # example_proto: a serialized Example.
329 | # Returns:
330 | # A dictionary of tensors, keyed by feature name.
331 | # """
332 | # return tf.io.parse_single_example(example_proto, ftDict)
333 |
334 |
335 | def to_tuple(inputs, features, response, axes = [2], splits = None, one_hot = None, moments = None, **kwargs):
336 | """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
337 | Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
338 | Args:
339 | inputs (dict): A dictionary of tensors, keyed by feature name. Response
340 | variable must be the last item.
341 | features (list): List of input feature names
342 | respones (str): response name(s)
343 | axes (list): axes along which to calculate moments for rescaling
344 | one_hot (dict): key:value pairs for name of one-hot variable and desired one-hot depth
345 | splits (list): size(s) of groups of features to be kept together
346 | moments (list): list of [mean, var] tuples for standardization
347 | Returns:
348 | A dtuple of (inputs, outputs).
349 | """
350 | # one_hot = kwargs.get('one_hot')
351 | # splits = kwargs.get('splits')
352 | # moments = kwargs.get('moments')
353 |
354 | # If custom preprocessing functions are specified add respective bands
355 | for fxn in kwargs.values():
356 | der = fxn(inputs)
357 | inputs = der
358 |
359 | # inputsList = [inputs.get(key) for key in features + [response]]
360 | if type(response) == dict:
361 | depth = list(response.values())[0]
362 | key = list(response.keys())[0]
363 | res = tf.squeeze(tf.one_hot(tf.cast(inputs.get(key), tf.uint8), depth = depth))
364 | else:
365 | res = tf.expand_dims(inputs.get(response), axis = 2)
366 |
367 | # stack the augmented bands, optional one-hot tensors, and response variable
368 | if one_hot:
369 | featList = [inputs.get(key) for key in features if key not in one_hot.keys()]
370 | hotList= [tf.one_hot(tf.cast(inputs.get(key), tf.uint8), val, axis = 2) for key, val in one_hot.items() if key in features]
371 | # hotList = [tf.one_hot(tf.cast(inputs.get(key), tf.uint8), val, axis = 2) for key, val in one_hot.items()]
372 | else:
373 | featList = [inputs.get(key) for key in features]
374 |
375 | # stack, transpose, augment, and normalize continuous bands
376 | bands = tf.transpose(tf.stack(featList, axis = 0), [1,2,0])
377 | bands = aug_tensor_color(bands)
378 | bands = rescale_tensor(bands, axes = axes, moments = moments, splits = splits)
379 |
380 | if one_hot:
381 | hotStack = tf.concat(hotList, axis = 2)
382 | stacked = tf.concat([bands, hotStack, res], axis =2)
383 | else:
384 | stacked = tf.concat([bands, res], axis = 2)
385 |
386 | # perform morphological augmentation
387 | stacked = aug_tensor_morph(stacked)
388 |
389 | feats = stacked[:, :, :-res.shape[2]]
390 | labels = stacked[:, :, -res.shape[2]:]
391 | labels = tf.where(tf.greater(labels, 1.0), 1.0, labels)
392 | return feats, labels
393 |
394 | def get_dataset(files, ftDict, features, response, axes = [2], splits = None, one_hot = None, moments = None, **kwargs):
395 | """Function to read, parse and format to tuple a set of input tfrecord files.
396 | Get all the files matching the pattern, parse and convert to tuple.
397 | Args:
398 | files (list): A list of filenames storing tfrecords
399 | FtDict (dic): Dictionary of input features in tfrecords
400 | features (list): List of input feature names
401 | respones (str): response name(s)
402 | axes (list): axes along which to calculate moments for rescaling
403 | one_hot (dict): key:value pairs for name of one-hot variable and desired one-hot depth
404 | splits (list): size(s) of groups of features to be kept together
405 | moments (list): list of [mean, var] tuples for standardization
406 | Returns:
407 | A tf.data.Dataset
408 | """
409 |
410 | def parse_tfrecord(example_proto):
411 | return tf.io.parse_single_example(example_proto, ftDict)
412 |
413 | def tupelize(ftDict):
414 | return to_tuple(ftDict, features, response, axes, splits, one_hot, moments, **kwargs)
415 |
416 | dataset = tf.data.TFRecordDataset(files, compression_type='GZIP')
417 | dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)
418 | dataset = dataset.map(tupelize, num_parallel_calls=5)
419 | return dataset
420 |
421 | def get_training_dataset(files, ftDict, features, response, buff, batch = 16, repeat = True, axes = [2], splits = None, one_hot = None, moments = None, **kwargs):
422 | """
423 | Get the preprocessed training dataset
424 | Args:
425 | files (list): list of tfrecord files to be used for training
426 | FtDict (dic): Dictionary of input features in tfrecords
427 | features (list): List of input feature names
428 | respones (str): response name(s)
429 | axes (list): axes along which to calculate moments for rescaling
430 | buffer (int): buffer size for shuffle
431 | batch (int): batch size for training
432 | repeat (bool): should the dataset be repeated
433 | Returns:
434 | A tf.data.Dataset of training data.
435 | """
436 | dataset = get_dataset(files, ftDict, features, response, axes, splits, one_hot, moments, **kwargs)
437 | if repeat:
438 | dataset = dataset.shuffle(buff).batch(batch).repeat()
439 | else:
440 | dataset = dataset.shuffle(buff).batch(batch)
441 | return dataset
442 |
443 | def get_eval_dataset(files, ftDict, features, response, axes = [2], splits = None, one_hot = None, moments = None, **kwargs):
444 | """
445 | Get the preprocessed evaluation dataset
446 | Args:
447 | files (list): list of tfrecords to be used for evaluation
448 | Returns:
449 | A tf.data.Dataset of evaluation data.
450 | """
451 |
452 | dataset = get_dataset(files, ftDict, features, response, axes, splits, one_hot, moments, **kwargs)
453 | dataset = dataset.batch(1)
454 | return dataset
455 |
456 | class UNETDataGenerator(tf.keras.utils.Sequence):
457 | """Generates data for Keras
458 | Sequence based data generator. Suitable for building data generator for training and prediction.
459 | """
460 | def __init__(self, labelfiles = None, s2files = None, naipfiles = None,
461 | hagfiles = None, lidarfiles = None, lufiles = None,
462 | demfiles = None, ssurgofiles = None,
463 | to_fit=True, batch_size=32, unet_dim=(256, 256),
464 | n_channels=4, n_classes = 8, shuffle=True,
465 | splits = None, moments = None,
466 | lc_transitions = [(12,3), (11,3), (10,3), (9,8), (255, 0)],
467 | lu_transitions = [(82,9), (84,10)]):
468 | """Initialization
469 |
470 | :param files: list of all files to use in the generator
471 | :param to_fit: True to return X and y, False to return X only
472 | :param batch_size: batch size at each iteration
473 | :param dim: tuple indicating image dimension
474 | :param n_channels: number of image channels
475 | :param n_classes: number of output masks
476 | :param n_timesteps: number of multi-channel images
477 | :param shuffle: True to shuffle label indexes after every epoch
478 | """
479 | self.s2files = s2files
480 | self.naipfiles = naipfiles
481 | self.hagfiles = hagfiles
482 | self.demfiles = demfiles
483 | self.ssurgofiles = ssurgofiles
484 | self.lidarfiles = lidarfiles
485 | self.labelfiles = labelfiles
486 | self.lufiles = lufiles
487 | self.to_fit = to_fit
488 | self.batch_size = batch_size
489 | self.unet_dim = unet_dim
490 | self.n_channels = n_channels
491 | self.n_classes = n_classes
492 | self.shuffle = shuffle
493 | self.splits = splits
494 | self.moments = moments
495 | self.lc_trans = lc_transitions
496 | self.lu_trans = lu_transitions
497 | self.indexes = np.arange(len(self.labelfiles))
498 | self.mask = False
499 | self.on_epoch_end()
500 |
501 | # do an initial shuffle for cases where the generator is called fresh at the start of each epoch
502 | if self.shuffle == True:
503 | print('shuffling')
504 | np.random.shuffle(self.indexes)
505 |
506 | if self.to_fit == True:
507 | print('masking on')
508 | self.mask = True
509 |
510 | def __len__(self):
511 | """Denotes the number of batches per epoch
512 |
513 | :return: number of batches per epoch
514 | """
515 | return int(np.floor(len(self.indexes) / self.batch_size))
516 |
517 | def on_epoch_end(self):
518 | """Updates indexes after each epoch
519 |
520 | """
521 | print('the generator knows the epoch ended')
522 | self.indexes = np.arange(len(self.indexes))
523 | if self.shuffle == True:
524 | print('shuffling')
525 | np.random.shuffle(self.indexes)
526 |
527 | @staticmethod
528 | def load_numpy_url(url):
529 |
530 | if os.path.exists(url):
531 | data = np.load(url)
532 | else:
533 | response = requests.get(url)
534 | response.raise_for_status()
535 | data = np.load(io.BytesIO(response.content))
536 |
537 | return(data)
538 |
539 | def _load_numpy_data(self, files_temp):
540 | arrays = [UNETDataGenerator.load_numpy_url(f) for f in files_temp]
541 | return(arrays)
542 |
543 | def _get_unet_data(self, files_temp, add_nan_mask = False,rescale_val=False):
544 | # arrays come from PC in (C, H, W) format
545 | arrays = self._load_numpy_data(files_temp)
546 | try:
547 | assert len(arrays) > 0
548 | assert all([len(x.shape) == 3 for x in arrays]), 'all arrays not 3D'
549 | # ensure all arrays are C, H, W to start
550 | chw = [np.moveaxis(x, source = -1, destination = 0) if x.shape[-1] < x.shape[0] else x for x in arrays]
551 | if rescale_val is not False:
552 | chw = [x/rescale_val for x in chw]
553 | if add_nan_mask == True:
554 | chw_new = []
555 | for cur_array in chw:
556 |
557 | mask_channel = np.zeros([cur_array.shape[1], cur_array.shape[2]])
558 | # Create a random array to be used to replace the original data
559 | if self.to_fit:
560 | for arr_2d in cur_array:
561 | nans = np.isnan(arr_2d)
562 | bads = arr_2d < -5000
563 | mask_channel[nans==True] = 1
564 | mask_channel[bads==True] = 1
565 | arr_2d[mask_channel==1] = np.random.randn((mask_channel==1).sum())
566 | # arr_2d[nans==True] = np.random.uniform()
567 | #arr_2d[np.isnan(arr_2d)] = np.random.randn(len(arr_2d[np.isnan(arr_2d)]))
568 | #print("AFTER FIX:",np.isnan(cur_array).sum())
569 | #cur_array = np.vstack((cur_array, mask[None,:,:]))
570 |
571 |
572 | """randarr = np.random.uniform(size=cur_array.shape)*cur_array.max()
573 | # Build a mask layer to use in the replacement
574 | n_cols = cur_array.shape[2]
575 | n_rows = cur_array.shape[1]
576 | mask_channel = np.ones((n_rows, n_cols), dtype=np.int8)
577 | np.any(cur_array == np.nan, axis=0, out=mask_channel)
578 | # Replace the values in any of the channels where the mask_channel is 0 with the values from the random array
579 | cur_array[:, mask_channel == 1] = randarr[:, mask_channel == 1]
580 | cur_array[:, mask_channel == 1] = randarr[:, mask_channel == 1] """
581 | cur_array = np.append(cur_array, mask_channel[np.newaxis, :, :], axis=0)
582 | #print("AFTER:",np.isnan(cur_array).sum())
583 | chw_new.append(cur_array)
584 | chw = chw_new
585 | batch = np.stack(chw, axis = 0)
586 | assert np.isnan(batch).sum() < 1, 'nans in batch, skipping'
587 | in_shape = batch.shape
588 | # in case our incoming data is of different size than we want, define a trim amount
589 | trim = ((in_shape[2] - self.unet_dim[0])//2, (in_shape[3] - self.unet_dim[1])//2)
590 | # If necessary, trim data to (-1, dims[0], dims[1])
591 | array = batch[:,:,trim[0]:self.unet_dim[0]+trim[0], trim[1]:self.unet_dim[1]+trim[1]]
592 | # rearrange arrays from (B, C, H, W) -> (B, H, W, C) expected by model
593 |
594 | reshaped = np.moveaxis(array, source = 1, destination = 3)
595 | return reshaped
596 | except AssertionError as msg:
597 | print(msg)
598 | return None
599 | def _get_naip_data(self, indexes):
600 | files_temp = [self.naipfiles[k] for k in indexes]
601 | naip = self._get_unet_data(files_temp,rescale_val=255.0)
602 | if type(naip) == np.ndarray:
603 |
604 | if self.to_fit:
605 | recolored = array_tools.aug_array_color(naip)
606 | return recolored
607 | return naip
608 | #else:
609 | #return naip
610 |
611 | def _get_s2_data(self, indexes):
612 | files_temp = [self.s2files[k] for k in indexes]
613 | s2 = self._get_unet_data(files_temp,rescale_val=10000.0)
614 | if type(s2) == np.ndarray:
615 | if self.to_fit:
616 | recolored = array_tools.aug_array_color(s2)
617 | return recolored
618 | else:
619 | return s2
620 | #else:
621 | #return s2
622 |
623 | def _get_lidar_data(self, indexes):
624 | files_temp = [self.lidarfiles[k] for k in indexes]
625 | lidar = self._get_unet_data(files_temp,self.mask,rescale_val=100)
626 | if type(lidar) == np.ndarray:
627 | return lidar
628 |
629 | def _get_hag_data(self, indexes):
630 | files_temp = [self.hagfiles[k] for k in indexes]
631 | hag = self._get_unet_data(files_temp, self.mask, rescale_val=100)
632 | if type(hag) == np.ndarray:
633 | return hag
634 | #else:
635 | # return hag
636 |
637 | def _get_dem_data(self, indexes):
638 | files_temp = [self.demfiles[k] for k in indexes]
639 | dem = self._get_unet_data(files_temp,self.mask,rescale_val=2000.0)
640 | if type(dem) == np.ndarray:
641 | # we are going to use the min and max elevations across the chesapeake
642 | return dem
643 | #else:
644 | # return dem
645 |
646 | def _get_ssurgo_data(self, indexes):
647 | files_temp = [self.ssurgofiles[k] for k in indexes]
648 | ssurgo = self._get_unet_data(files_temp)
649 | if type(ssurgo) == np.ndarray:
650 | return ssurgo
651 |
652 | def _process_y(self, indexes):
653 | # get label files for current batch
654 | lc_files = [self.labelfiles[k] for k in indexes]
655 | # lc_arrays = [np.load(file) for file in lc_files]
656 | lc_arrays = self._load_numpy_data(lc_files)
657 |
658 | try:
659 | assert len(lc_arrays) == self.batch_size
660 | assert all([x.shape == (1, self.unet_dim[0], self.unet_dim[1]) for x in lc_arrays])
661 | lc = np.stack(lc_arrays, axis = 0) #(B, C, H, W)
662 | int_labels = lc.astype(int)
663 |
664 | # optionally reduce the number of classes
665 | if self.lc_trans:
666 | merged_labels = array_tools.merge_classes(cond_array = int_labels, trans = self.lc_trans, out_array = int_labels)
667 | else:
668 | merged_labels = int_labels
669 |
670 | if self.lufiles:
671 | lu_files = [self.lufiles[k] for k in indexes]
672 | # lu_arrays = [np.load(file) for file in lu_files]
673 | lu_arrays = self._load_numpy_data(lu_files)
674 | try:
675 | assert len(lu_arrays) == self.batch_size
676 | assert all([x.shape == (1, self.unet_dim[0], self.unet_dim[1]) for x in lu_arrays])
677 | lu = np.stack(lu_arrays, axis = 0) #(B, C, H, W)
678 | y = array_tools.merge_classes(cond_array = lu, trans = self.lu_trans, out_array = merged_labels)
679 | except AssertionError:
680 | return None
681 | else:
682 | y = merged_labels
683 |
684 | # If necessary, trim data to (-1, dims[0], dims[1])
685 | in_shape = y.shape
686 | trim = ((in_shape[2] - self.unet_dim[0])//2, (in_shape[3] - self.unet_dim[1])//2)
687 | array = y[:,:,trim[0]:self.unet_dim[0]+trim[0], trim[1]:self.unet_dim[1]+trim[1]]
688 |
689 | # shift range of categorical labels from [1, n_classes] to [0, n_classes]
690 | zeroed = array
691 | # create one-hot representation of classes
692 | one_hot = tf.one_hot(zeroed, self.n_classes)
693 | # one_hot = to_one_hot(zeroed, self.n_classes)
694 | return tf.squeeze(one_hot)
695 |
696 | except AssertionError:
697 | return None
698 |
699 | def __getitem__(self, index):
700 | """Generate one batch of data
701 |
702 | :param index: index of the batch
703 | :return: X and y when fitting. X only when predicting
704 | """
705 | # Generate indexes of the batch
706 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
707 |
708 | datasets = []
709 |
710 | if self.s2files:
711 | s2Data = self._get_s2_data(indexes)
712 | datasets.append(s2Data)
713 |
714 | if self.naipfiles:
715 | naipData = self._get_naip_data(indexes)
716 | #print("appending Naip data",type(naipData))
717 | datasets.append(naipData)
718 |
719 | if self.hagfiles:
720 | hagData = self._get_hag_data(indexes)
721 | datasets.append(hagData)
722 |
723 | if self.demfiles:
724 | demData = self._get_dem_data(indexes)
725 | # print('dem', demData.shape)
726 | #print("appening DEM data",type(demData))
727 | datasets.append(demData)
728 |
729 | if self.ssurgofiles:
730 | ssurgoData = self._get_ssurgo_data(indexes)
731 | # print('ssurgo', ssurgoData.shape
732 | #print("appending ssurgoData",type(ssurgoData))
733 | datasets.append(ssurgoData)
734 |
735 | if self.lidarfiles:
736 | lidarData = self._get_lidar_data(indexes)
737 | datasets.append(lidarData)
738 |
739 | if any([type(dat) != np.ndarray for dat in datasets]):
740 | pass
741 | else:
742 | xData = np.concatenate(datasets, axis = -1)
743 |
744 | if self.to_fit:
745 | labels = self._process_y(indexes)
746 | # perform morphological augmentation - expects a 3D (H, W, C) image array
747 | stacked = np.concatenate([xData, labels], axis = -1)
748 | morphed = array_tools.aug_array_morph(stacked)
749 | # print('augmented max', np.nanmax(augmented, axis = (0,1,2)))
750 |
751 | feats = morphed[:,:,:,0:self.n_channels]
752 | labels = morphed[:,:,:,self.n_channels:]
753 | return feats, labels
754 | else:
755 | return xData
756 |
757 | class SiameseDataGenerator(UNETDataGenerator):
758 | def __init__(self, beforefiles, afterfiles, add_nan_mask: bool, *args, **kwargs):
759 | super().__init__(*args, **kwargs)
760 | self.beforefiles = beforefiles
761 | self.afterfiles = afterfiles
762 | self.mask = add_nan_mask
763 |
764 | # do an initial shuffle for cases where the generator is called fresh at the start of each epoch
765 | if self.shuffle == True:
766 | print('shuffling')
767 | np.random.shuffle(self.indexes)
768 | print(self.batch_size)
769 | def __len__(self):
770 | """Denotes the number of batches per epoch
771 |
772 | :return: number of batches per epoch
773 | """
774 | return UNETDataGenerator.__len__(self)
775 |
776 | def on_epoch_end(self):
777 | """Updates indexes after each epoch
778 |
779 | """
780 | UNETDataGenerator.on_epoch_end(self)
781 |
782 | def _get_unet_data(self, files_temp, add_nan_mask = False,rescale_val=None):
783 | # arrays come from PC in (C, H, W) format
784 | arrays = self._load_numpy_data(files_temp)
785 | try:
786 | assert len(arrays) > 0
787 | assert all([len(x.shape) == 3 for x in arrays]), 'all arrays not 3D'
788 | # ensure all arrays are C, H, W to start
789 | chw = [np.moveaxis(x, source = -1, destination = 0) if x.shape[-1] < x.shape[0] else x for x in arrays]
790 | if rescale_val is not None:
791 | chw = [x/rescale_val for x in chw]
792 | batch = np.stack(chw, axis = 0)
793 | # assert np.isnan(batch).sum() < 1, 'nans in batch, skipping'
794 | in_shape = batch.shape
795 | # in case our incoming data is of different size than we want, define a trim amount
796 | trim = ((in_shape[2] - self.unet_dim[0])//2, (in_shape[3] - self.unet_dim[1])//2)
797 | # If necessary, trim data to (-1, dims[0], dims[1])
798 | array = batch[:,:,trim[0]:self.unet_dim[0]+trim[0], trim[1]:self.unet_dim[1]+trim[1]]
799 | # rearrange arrays from (B, C, H, W) -> (B, H, W, C) expected by model
800 |
801 | reshaped = np.moveaxis(array, source = 1, destination = 3)
802 | nans = np.isnan(reshaped)
803 | if add_nan_mask:
804 | mask = np.ones(shape = reshaped.shape) # create a mask with all valid pixels by default
805 | mask[nans] = 0 # inject zeros into mask at invalid pixels
806 | mask[reshaped < -1] = 0
807 | reduced_mask = mask.min(axis = -1, keepdims = True) # reduce mask along channels -> (B, C, H, 1)
808 | reshaped[nans] = np.random.random(nans.sum()) # replace nan values with random val from [0,1)
809 | masked = np.concatenate([reshaped, reduced_mask], axis = -1) # add mask to batch as additional channel
810 | return reshaped, reduced_mask
811 | else:
812 | assert np.isnan(reshaped).sum() < 1, 'nans in batch, skipping'
813 | return reshaped, None
814 |
815 | except AssertionError as msg:
816 | print(msg)
817 | return None, None
818 |
819 | def _process_y(self, indexes):
820 | # get label files for current batch
821 | files_temp = [self.labelfiles[k] for k in indexes]
822 | lc_files = self.load_numpy_data(files_temp)
823 | lc_arrays = [np.squeeze(f) for f in lc_files] # make all labels 2D to start
824 | try:
825 | assert len(lc_arrays) == self.batch_size
826 | lc = np.stack(lc_arrays, axis = 0) #(B, H, W)
827 | int_labels = lc.astype(int)
828 | binary = np.where(int_labels > 1, 1, int_labels)
829 | # If necessary, trim data to (-1, dims[0], dims[1])
830 | in_shape = binary.shape # -> (B, H, W)
831 | trim = ((in_shape[1] - self.unet_dim[0])//2, (in_shape[2] - self.unet_dim[1])//2)
832 | array = binary[:,trim[0]:self.unet_dim[0]+trim[0], trim[1]:self.unet_dim[1]+trim[1]]
833 |
834 | # add channel dimension (B, H, W) -> (B, H, W, C) expected by model
835 | reshaped = np.expand_dims(array, -1)
836 | return reshaped
837 | except AssertionError:
838 | return None
839 |
840 | def _get_before_data(self, indexes, rescale_val):
841 | files_temp = [self.beforefiles[k] for k in indexes]
842 | s2, bef_mask = self._get_unet_data(files_temp, add_nan_mask = self.mask, rescale_val=rescale_val)
843 | if type(s2) == np.ndarray:
844 | if self.to_fit:
845 | recolored = array_tools.aug_array_color(s2)
846 | return recolored, bef_mask
847 | else:
848 | return s2, bef_mask
849 |
850 | def _get_after_data(self, indexes, rescale_val):
851 | files_temp = [self.afterfiles[k] for k in indexes]
852 | s2, aft_mask = self._get_unet_data(files_temp, add_nan_mask = self.mask, rescale_val=rescale_val)
853 | if type(s2) == np.ndarray:
854 | if self.to_fit:
855 | recolored = array_tools.aug_array_color(s2)
856 | return recolored, aft_mask
857 | else:
858 | return s2, aft_mask
859 |
860 | def __getitem__(self, index):
861 | """Generate one batch of data
862 |
863 | :param index: index of the batch
864 | :return: X and y when fitting. X only when predicting
865 | """
866 | # Generate indexes of the batch
867 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
868 |
869 | befData, befMask = self._get_before_data(indexes, rescale_val = 10000.0)
870 |
871 | aftData, aftMask = self._get_after_data(indexes, rescale_val = 10000.0)
872 |
873 | labels = self._process_y(indexes)
874 |
875 | # perform morphological augmentation - expects a 3D (H, W, C) image array
876 | # if all([befData is not None, aftData is not None, labels is not None]):
877 | if self.mask:
878 | mask = np.concatenate([befMask, aftMask], axis = -1).min(axis = -1, keepdims= True)
879 | labels = labels * mask
880 |
881 | stacked = np.concatenate([befData, aftData, labels], axis = -1)
882 |
883 | # print('augmented max', np.nanmax(augmented, axis = (0,1,2)))
884 |
885 | if self.to_fit:
886 | morphed = array_tools.aug_array_morph(stacked)
887 | feats_b = morphed[:,:,:,0:self.n_channels]
888 | feats_a = morphed[:,:,:,self.n_channels:2*(self.n_channels)]
889 | labels = morphed[:,:,:,-1:]
890 | return [feats_b, feats_a], labels
891 | else:
892 | return [befData, aftData]
893 |
894 |
895 | class LSTMDataGenerator(tf.keras.utils.Sequence):
896 | """Generates data for Keras
897 | Sequence based data generator. Suitable for building data generator for training and prediction.
898 | """
899 | def __init__(self, files = None,
900 | to_fit=True, batch_size=32, dim=(256, 256),
901 | n_channels=4, n_timesteps = 6, shuffle=True):
902 | """Initialization
903 |
904 | :param files: list of all files to use in the generator
905 | :param to_fit: True to return X and y, False to return X only
906 | :param batch_size: batch size at each iteration
907 | :param dim: tuple indicating image dimension
908 | :param n_channels: number of image channels
909 | :param n_classes: number of output masks
910 | :param n_timesteps: number of multi-channel images
911 | :param shuffle: True to shuffle label indexes after every epoch
912 | """
913 | self.files = files
914 | self.to_fit = to_fit
915 | self.batch_size = batch_size
916 | self.dim = dim
917 | self.n_channels = n_channels
918 | self.n_timesteps = n_timesteps
919 | self.shuffle = shuffle
920 | self.on_epoch_end()
921 |
922 | def __len__(self):
923 | """Denotes the number of batches per epoch
924 |
925 | :return: number of batches per epoch
926 | """
927 | return int(np.floor(len(self.files) / self.batch_size))
928 |
929 | def on_epoch_end(self):
930 | """Updates indexes after each epoch
931 |
932 | """
933 | self.indexes = np.arange(len(self.files))
934 | if self.shuffle == True:
935 | np.random.shuffle(self.indexes)
936 |
937 | def _load_numpy_data(self, files_temp):
938 | arrays = [UNETDataGenerator.load_numpy_url(f) for f in files_temp]
939 | return(arrays)
940 |
941 | def __getitem__(self, index):
942 | """Generate one batch of data
943 |
944 | :param index: index of the batch
945 | :return: X and y when fitting. X only when predicting
946 | """
947 | # Generate indexes of the batch
948 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
949 |
950 | # Find list of IDs
951 | files_temp = [self.files[k] for k in indexes]
952 | # arrays come from PC in (T, C, H, W) format
953 | arrays = self._load_numpy_data(files_temp)
954 |
955 | trim = ((arrays[0].shape[2] - self.dim[0])//2, (arrays[0].shape[3] - self.dim[1])//2)
956 | # TEMPORARY FIX: drop the last image to give us a sereis of 5
957 | array = [arr[0:self.n_timesteps,:,trim[0]:-trim[0],trim[1]:-trim[1]] for arr in arrays]
958 |
959 | # creat a single (B, T, C, H, W) array
960 | batch = np.stack(array, axis = 0)
961 | # rearrange arrays from (B, T, C, H, W) -> (B, T, H, W, C) expected by model
962 | reshaped = np.moveaxis(batch, source = 2, destination = 4)
963 | normalized = normalize_timeseries(reshaped, axis = 1)
964 | # harmonized = add_harmonic(normalized)
965 | if self.to_fit:
966 | rearranged = rearrange_timeseries(normalized, self.n_channels)
967 | feats, labels = array_tools.split_timeseries(rearranged)
968 | # we can't have nans in label
969 | return feats, labels
970 | else:
971 | print('normalized dims', normalized.shape)
972 | return normalized
973 |
974 | class LSTMAutoencoderGenerator(LSTMDataGenerator):
975 | """Generates data for Keras
976 | Sequence based data generator. Suitable for building data generator for training and prediction.
977 | """
978 | def __init__(
979 | self, harmonics = True, sample_weights = False, *args, **kwargs):
980 | """Initialization
981 |
982 | :param files: list of all files to use in the generator
983 | :param to_fit: True to return X and y, False to return X only
984 | :param batch_size: batch size at each iteration
985 | :param dim: tuple indicating image dimension
986 | :param n_channels: number of image channels
987 | :param n_classes: number of output masks
988 | :param n_timesteps: number of multi-channel images
989 | :param shuffle: True to shuffle label indexes after every epoch
990 | """
991 | super().__init__(*args, **kwargs)
992 | self.add_harmonics = harmonics
993 | self.sample_weights = sample_weights
994 | self.on_epoch_end()
995 |
996 | def __len__(self):
997 | return LSTMDataGenerator.__len__(self)
998 |
999 | def on_epoch_end(self):
1000 | LSTMDataGenerator.on_epoch_end(self)
1001 |
1002 | def __getitem__(self, index):
1003 | """Generate one batch of data
1004 |
1005 | :param index: index of the batch
1006 | :return: X and y when fitting. X only when predicting
1007 | """
1008 | # Generate indexes of the batch
1009 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
1010 |
1011 | # Find list of IDs
1012 | files_temp = [self.files[k] for k in indexes]
1013 |
1014 | # arrays come from PC in (T, C, H, W) format
1015 | arrays = self._load_numpy_data(files_temp)
1016 |
1017 | # creat a single (B, T, C, H, W) array
1018 | batch = np.stack(arrays, axis = 0)
1019 |
1020 | # in case our incoming data is of different size than we want, define a trim amount
1021 | trim = ((batch.shape[3] - self.dim[0])//2, (batch.shape[4] - self.dim[1])//2)
1022 |
1023 | # n_timesteps + 1 to account for the fact that the sequence includes the next image as target
1024 | array = batch[:, 0:self.n_timesteps+1,:,trim[0]:self.dim[0]+trim[0],trim[1]:self.dim[1]+trim[1]]
1025 |
1026 | # rearrange arrays from (B, T, C, H, W) -> (B, T, H, W, C) expected by model
1027 | reshaped = np.moveaxis(array, source = 2, destination = 4)
1028 |
1029 | normalized = normalize_timeseries(reshaped, axis = 1)
1030 |
1031 | # harmonized = add_harmonic(normalized)
1032 | if self.add_harmonics:
1033 | # get start dates for each file
1034 | starts = [int(Path(f).stem.split('_')[2]) for f in files_temp]
1035 | else:
1036 | harmonics = None
1037 |
1038 | if self.to_fit:
1039 | feats, y, start = rearrange_timeseries(normalized, self.n_channels)
1040 | temporal_y = np.flip(feats, axis = 1) # reverse images along time dimension
1041 | weights = [None, abs(feats[:,-1,:,:,:] - y)/(feats[:,-1,:,:,:] + y)] if self.sample_weights else None
1042 | if self.add_harmonics:
1043 | starts = [x + start - self.n_timesteps for x in starts]
1044 | harmonics = array_tools.make_harmonics(starts, self.n_timesteps, self.dim)
1045 | return [feats, harmonics], [temporal_y, y], weights
1046 | else:
1047 | if self.add_harmonics:
1048 | harmonics = array_tools.make_harmonics(starts, self.n_timesteps, self.dim)
1049 | return [normalized, harmonics]
1050 |
1051 | class HybridDataGenerator(UNETDataGenerator):
1052 | """Generates data for Keras model with U-Net and LSTM branches
1053 | Sequence based data generator. Suitable for building data generator for training and prediction.
1054 | """
1055 |
1056 | def __init__(self, s1files,
1057 | lstm_dim = (6, 32, 32, 6),
1058 | lc_transitions = [(12,3), (11,3), (10,3), (9,8), (255, 0)],
1059 | lu_transitions = [(82,9), (84,10)],
1060 | unet_dim = (600,600),
1061 | *args, **kwargs):
1062 | """Class Initialization
1063 |
1064 | Params
1065 | ---
1066 | unet_dim: tuple
1067 | desired unet image H, W dimensions
1068 | lstm_dim: tuple
1069 | desired lstm image T, H, W, C dimensions
1070 | lc_transitions: list
1071 | list of ('from', to') tuples defining optional categorical reclassifications for lc data
1072 | lu_transitions: list
1073 | list of ('from', 'to') tuples defining optional categorical reclassificaitons for lu data
1074 |
1075 | Return
1076 | ---
1077 | tuple: three arrays containing batch of corresponding sentinel-2, naip, and label data
1078 | """
1079 | super().__init__(*args, **kwargs)
1080 | self.s1files = s1files
1081 | self.lc_trans = lc_transitions
1082 | self.lu_trans = lu_transitions
1083 | self.lstm_dim = lstm_dim
1084 | self.unet_dim = unet_dim
1085 | self.n_timesteps = lstm_dim[0]
1086 | self.on_epoch_end()
1087 |
1088 | def _get_lstm_data(self, files_temp, rescale_val = 1.0, mask = False):
1089 | arrays = self._load_numpy_data(files_temp)
1090 | try:
1091 | assert len(arrays) > 0, "No Array Found"
1092 | assert all([x.shape == (self.lstm_dim[0], self.lstm_dim[3], self.lstm_dim[1], self.lstm_dim[2]) for x in arrays]), [x.shape for x in arrays]
1093 |
1094 | # creat a single (B, T, C, H, W) array
1095 | batch = np.stack(arrays, axis = 0)
1096 | # in case our incoming data is of different size than we want, define a trim amount
1097 | trim = ((batch.shape[3] - self.lstm_dim[1])//2, (batch.shape[4] - self.lstm_dim[2])//2)
1098 |
1099 | array = batch[:, 0:self.n_timesteps,:,trim[0]:self.lstm_dim[1]+trim[0],trim[1]:self.lstm_dim[2]+trim[1]]
1100 |
1101 | # rearrange arrays from (B, T, C, H, W) -> (B, T, H, W, C) expected by model
1102 | reshaped = np.moveaxis(array, source = 2, destination = 4)
1103 | normalized = normalize_timeseries(reshaped, maxval = rescale_val, axis = 1)
1104 | return normalized
1105 | except AssertionError as msg:
1106 | print(msg)
1107 | sys.exit()
1108 | return None
1109 |
1110 | def _get_s2_data(self, indexes):
1111 | files_temp = [self.s2files[k] for k in indexes]
1112 | normalized = self._get_lstm_data(files_temp, rescale_val = 10000.0)
1113 | if type(normalized) == np.ndarray:
1114 | if self.to_fit:
1115 | recolored = array_tools.aug_array_color(normalized)
1116 | return recolored
1117 | else:
1118 | return normalized
1119 |
1120 | def _get_s1_data(self, indexes):
1121 | files_temp = [self.s1files[k] for k in indexes]
1122 | normalized = self._get_lstm_data(files_temp, rescale_val = -50.0)
1123 | if type(normalized) == np.ndarray:
1124 | return normalized
1125 |
1126 | def __getitem__(self, index):
1127 | """Generate one batch of data
1128 |
1129 | :param index: index of the batch
1130 | :return: X and y when fitting. X only when predicting
1131 | """
1132 | # Generate indexes of the batch
1133 |
1134 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
1135 |
1136 | unetDatasets = []
1137 | lstmDatasets = []
1138 | if self.s2files:
1139 | s2Data = self._get_s2_data(indexes)
1140 | lstmDatasets.append(s2Data)
1141 | if self.s1files:
1142 | s1Data = self._get_s1_data(indexes)
1143 | lstmDatasets.append(s1Data)
1144 | if self.naipfiles:
1145 | naipData = self._get_naip_data(indexes)
1146 | unetDatasets.append(naipData)
1147 | if self.demfiles:
1148 | demData = self._get_dem_data(indexes)
1149 | unetDatasets.append(demData)
1150 | if self.hagfiles:
1151 | hagData = self._get_hag_data(indexes)
1152 | unetDatasets.append(hagData)
1153 | if self.lidarfiles:
1154 | lidarData = self._get_lidar_data(indexes)
1155 | unetDatasets.append(lidarData)
1156 | if self.ssurgofiles:
1157 | ssurgoData = self._get_ssurgo_data(indexes)
1158 | unetDatasets.append(ssurgoData)
1159 |
1160 | if any([type(dat) != np.ndarray for dat in unetDatasets + lstmDatasets]):
1161 | pass
1162 | else:
1163 | unetData = np.concatenate(unetDatasets, axis = -1)
1164 | lstmData = np.concatenate(lstmDatasets, axis = -1)
1165 | feats = [unetData, lstmData]
1166 | # if type(lidarData) == np.ndarray:
1167 | # unetData = np.concatenate([naipData, lidarData], axis = -1)
1168 | # else:
1169 | # unetData = naipData
1170 |
1171 | # feats = [unetData, s2Data]
1172 | # if any([type(dat) == type(None) for dat in feats]):
1173 | # return self.__getitem__(randint(0, len(self.indexes) - self.batch_size))
1174 |
1175 | if self.to_fit:
1176 | labels = self._process_y(indexes)
1177 | if type(labels) == type(None):
1178 | pass
1179 | # feats, labels = split_timeseries(rearranged)
1180 | # we can't have nans in label
1181 | else:
1182 | return feats, labels
1183 | else:
1184 | return feats
1185 |
--------------------------------------------------------------------------------
/utils/raster_tools.py:
--------------------------------------------------------------------------------
1 | """
2 | Created on Wed Jun 29 15:07:52 2022
3 |
4 | @author: mevans
5 | """
6 |
7 | import os
8 | from os.path import join
9 | import rasterio as rio
10 | from rasterio.windows import Window
11 | from rasterio.transform import Affine
12 | from rasterio.merge import merge
13 | import shapely
14 | from shapely.geometry import box
15 | import geopandas as gpd
16 | import numpy as np
17 | from matplotlib.pyplot import imsave
18 | import warnings
19 | import random
20 | from osgeo import gdal
21 | rio.Env(CHECK_DISK_FREE_SPACE=False)
22 |
23 | def generate_chip_indices(H, W, buff = 128, kernel = 256):
24 | """
25 | Parameters
26 | ---
27 | H: int
28 | height dimension in pixels over which indices should be generated
29 | W: int
30 | width dimension in pixels over which indices should be generated
31 | buff: int
32 | size of pixels to be trimmed from each side of chip
33 | kernel: int
34 | size of contiguous image chips
35 | Return
36 | ---
37 | list::np.ndarray: list containing (y,x) index of chips upper left corner
38 | """
39 | side = (2*buff) + kernel
40 | x_buff = y_buff = buff
41 |
42 | y_indices = list(range(y_buff, H - (kernel+buff) +1, kernel))
43 | x_indices = list(range(x_buff, W - (kernel+buff) +1, kernel))
44 |
45 | indices = [(y_index, x_index) for y_index in y_indices for x_index in x_indices]
46 | return indices
47 |
48 | def extract_chips(arr, buff = 128, kernel = 256):
49 | """Break an array into (potentially) overlapping chips for analysis
50 | Arguments:
51 | arr (ndarray): 3D array to run predictions on
52 | buff (int): size of pixels to be trimmed from chips
53 | kernel (int): size of contiguous image chips
54 | Return:
55 | list::np.ndarray: list containing image chips of size (kernel+buff, kernel+buff)
56 | """
57 | H, W, C = arr.shape
58 | side = buff + kernel
59 | x_buff = y_buff = buff//2
60 | chips = []
61 |
62 | chip_indices = generate_chip_indices(arr, buff, kernel)
63 |
64 | for x, y in chip_indices:
65 | chip = arr[y-y_buff:y+kernel+y_buff, x-x_buff:x+kernel+x_buff, :]
66 | chips.append(chip)
67 |
68 | return chips
69 |
70 | def convert(size, box):
71 | """
72 | Convert coordinates of a bounding box given in image pixels to
73 | normalized [0,1] yolo coordinates
74 |
75 | Parameters
76 | ---
77 | size: tpl
78 | height, width of image in pixels
79 | box: list[x0, y0, x1, y1]
80 | corners of box in pixels
81 |
82 | Return
83 | ---
84 | tpl(int, int, int, int): normalized x,y centroid and width, height of box
85 | """
86 | dw = 1./size[1]
87 | dh = 1./size[0]
88 | xmid = (box[0] + box[2])/2.0
89 | ymid = (box[1] + box[3])/2.0
90 | w0 = box[2] - box[0]
91 | h0 = box[3] - box[1]
92 | x = xmid*dw
93 | y = ymid*dh
94 | w = w0*dw
95 | h = h0*dh
96 | return (x,y,w,h)
97 |
98 | def make_window(cx: int, cy:int, window_size: int) -> tuple:
99 | """Create an array window around a centroid
100 |
101 | Parameters
102 | ---
103 | cx: int
104 | centroid x-coord
105 | cy: int
106 | centroid y-coord
107 | window_size: int
108 | size of window in pixels
109 |
110 | Return
111 | ---
112 | tpl: coordinates of top left (x0, y0) and bottom right (x1, y1) window points
113 | """
114 | x0 = round(cx - window_size//2)
115 | y0 = round(cy - window_size//2)
116 | x1 = round(cx + window_size//2)
117 | y1 = round(cy + window_size//2)
118 | return (x0, y0, x1, y1)
119 |
120 | def get_geo_transform(raster_src):
121 | """Get the geotransform for a raster image source.
122 | Arguments
123 | ---------
124 | raster_src : str, :class:`rasterio.DatasetReader`, or `osgeo.gdal.Dataset`
125 | Path to a raster image with georeferencing data to apply to `geom`.
126 | Alternatively, an opened :class:`rasterio.Band` object or
127 | :class:`osgeo.gdal.Dataset` object can be provided. Required if not
128 | using `affine_obj`.
129 | Returns
130 | -------
131 | transform : :class:`affine.Affine`
132 | An affine transformation object to the image's location in its CRS.
133 | """
134 |
135 | if isinstance(raster_src, str):
136 | with rio.Env(CHECK_DISK_FREE_SPACE=False):
137 | with rio.open(raster_src) as src:
138 | affine_obj = src.transform
139 | elif isinstance(raster_src, rio.DatasetReader):
140 | affine_obj = raster_src.transform
141 |
142 | return affine_obj
143 |
144 | def convert_poly_coords(geom, raster_src=None, affine_obj=None, inverse=False,
145 | precision=None):
146 | """Georegister geometry objects currently in pixel coords or vice versa.
147 | Params
148 | ---------
149 | geom : :class:`shapely.geometry.shape` or str
150 | A :class:`shapely.geometry.shape`, or WKT string-formatted geometry
151 | object currently in pixel coordinates.
152 | raster_src : str, optional
153 | Path to a raster image with georeferencing data to apply to `geom`.
154 | Alternatively, an opened :class:`rasterio.Band` object or
155 | :class:`osgeo.gdal.Dataset` object can be provided. Required if not
156 | using `affine_obj`.
157 | affine_obj: list or :class:`affine.Affine`
158 | An affine transformation to apply to `geom` in the form of an
159 | ``[a, b, d, e, xoff, yoff]`` list or an :class:`affine.Affine` object.
160 | Required if not using `raster_src`.
161 | inverse : bool, optional
162 | If true, will perform the inverse affine transformation, going from
163 | geospatial coordinates to pixel coordinates.
164 | precision : int, optional
165 | Decimal precision for the polygon output. If not provided, rounding
166 | is skipped.
167 | Returns
168 | -------
169 | out_geom
170 | A geometry in the same format as the input with its coordinate system
171 | transformed to match the destination object.
172 | """
173 |
174 | if not raster_src and not affine_obj:
175 | raise ValueError("Either raster_src or affine_obj must be provided.")
176 |
177 | if raster_src is not None:
178 | affine_xform = get_geo_transform(raster_src)
179 | else:
180 | if isinstance(affine_obj, Affine):
181 | affine_xform = affine_obj
182 | else:
183 | # assume it's a list in either gdal or "standard" order
184 | # (list_to_affine checks which it is)
185 | if len(affine_obj) == 9: # if it's straight from rasterio
186 | affine_obj = affine_obj[0:6]
187 | affine_xform = Affine(*affine_obj)
188 |
189 | if inverse: # geo->px transform
190 | affine_xform = ~affine_xform
191 |
192 | if isinstance(geom, str):
193 | # get the polygon out of the wkt string
194 | g = shapely.wkt.loads(geom)
195 | elif isinstance(geom, shapely.geometry.base.BaseGeometry):
196 | g = geom
197 | else:
198 | raise TypeError('The provided geometry is not an accepted format. '
199 | 'This function can only accept WKT strings and '
200 | 'shapely geometries.')
201 |
202 | xformed_g = shapely.affinity.affine_transform(g, [affine_xform.a,
203 | affine_xform.b,
204 | affine_xform.d,
205 | affine_xform.e,
206 | affine_xform.xoff,
207 | affine_xform.yoff])
208 | if isinstance(geom, str):
209 | # restore to wkt string format
210 | xformed_g = shapely.wkt.dumps(xformed_g)
211 | if precision is not None:
212 | xformed_g = _reduce_geom_precision(xformed_g, precision=precision)
213 |
214 | return xformed_g
215 |
216 | def convert_pt(geometry: gpd.GeoSeries, out_crs: int, src_transform: list) -> tuple:
217 | """ Change a point to another crs
218 |
219 | Parameters
220 | ---
221 | geomegry: gpd.GeoSeries
222 | geoseries of points
223 | out_crs: int
224 | epsg for the desired crs
225 |
226 | Return
227 | ---
228 | tpl: (x,y) coordinates of point in new crs
229 | """
230 | pt = geometry.to_crs(out_crs)
231 | coords = convert_poly_coords(pt.iloc[0], affine_obj = src_transform, inverse = True, precision = None)
232 | x, y = np.rint(coords.x), np.rint(coords.y)
233 | return (x,y)
234 |
235 | def win_jitter(window_size, jitter_frac=0.1):
236 | '''get x and y jitter
237 | Parameters
238 | ---------
239 | window_size (tpl: dx, dy in pixels
244 | '''
245 | val = np.rint(jitter_frac * window_size)
246 | dx = np.random.randint(-val, val)
247 | dy = np.random.randint(-val, val)
248 |
249 | return dx, dy
250 |
251 | def get_centroid(geom_pix, verbose = True):
252 | """
253 | Get the centroid of a polygon
254 |
255 | Parameters
256 | ----------
257 | geom_pix : shapely POLYGON
258 | verbose : bool, optional
259 | Return print statements? The default is True.
260 |
261 | Returns
262 | -------
263 | cx : float
264 | centroid x coordinate in input crs.
265 | cy : float
266 | centroid y coordinate in input crs.
267 |
268 | """
269 | bounds = geom_pix.bounds
270 | area = geom_pix.area
271 | (minx, miny, maxx, maxy) = bounds
272 | dx, dy = maxx-minx, maxy-miny
273 |
274 | # get centroid
275 | centroid = geom_pix.centroid
276 |
277 | cx_tmp, cy_tmp = list(centroid.coords)[0]
278 | cx, cy = np.rint(cx_tmp), np.rint(cy_tmp)
279 | if verbose:
280 | print (" bounds:", bounds )
281 | print (" dx, dy:", dx, dy )
282 | print (" area:", area )
283 | print("centroid:", centroid)
284 |
285 | return cx, cy
286 |
287 | def make_jittered_window(cx, cy, image_h, image_w, window_size = 1280, jitter_frac = 0.1):
288 | """
289 | Create a jittered image window from and input image and geometry centroid
290 |
291 | Parameters
292 | ----------
293 | cx : float
294 | x-coordinate of centroid around which to jitter window.
295 | cy : float
296 | y-coordinate of centroid around which to jitter window.
297 | image_h : int
298 | height in pixels of input image.
299 | image_w : int
300 | width in pixels of input image.
301 | window_size : int, optional
302 | desired dimension of output window. The default is 1280.
303 | jitter_frac : float, optional
304 | proportion of window size to move window. The default is 0.2.
305 |
306 | Returns
307 | -------
308 | x0 : int
309 | minx coordinate of jittered window
310 | y0 : int
311 | miny coordinate of jittered window.
312 | x1 : int
313 | maxx coordinate of jittered window.
314 | y1 : int
315 | maxy coordinate of jittered window.
316 |
317 | """
318 | # number of pixels in x and y directions to shift window
319 | jx, jy = win_jitter(window_size, jitter_frac=jitter_frac)
320 | x0 = cx - window_size/2 + jx
321 | y0 = cy - window_size/2 + jy
322 | # ensure window does not extend outside larger image
323 | x0 = max(x0, 0)
324 | x0 = int(min(x0, image_w - window_size))
325 | y0 = max(y0, 0)
326 | y0 = int(min(y0, image_h - window_size))
327 | # set other side of square
328 | x1 = x0 + window_size
329 | y1 = y0 + window_size
330 | print('x0', x0, 'y0', y0, 'x1', x1, 'y1', y1)
331 | return x0, y0, x1, y1
332 |
333 | def rasterio_to_img(array, out_path, nbands = 3, ext = None):
334 | """
335 | Write an array read by rasterio to an 8-bit integer image file
336 |
337 | Parameters
338 | ----------
339 | array : numpy.ndarray
340 | image array read by rasterio.
341 | out_path : str
342 | out image file path.
343 | nbands : int, optional
344 | number of image bands to write. The default is 3.
345 | ext : str, optional
346 | image file format extension. The default is 'png'.
347 |
348 | Returns
349 | -------
350 | None.
351 |
352 | """
353 | # convert from CHW to HWC and cast as unsigned 8-band int for saving
354 | t = array.transpose((1,2,0)).astype('uint8')
355 | print('array shape', t.shape)
356 | print('array min', t.min())
357 | print('array max', t.max())
358 | print('array type', t.dtype)
359 | # to use pre-trained YOLO weights, only grab RGB bands
360 | if ext:
361 | out_file = f"{out_path}.{ext}"
362 | else:
363 | out_file = out_path
364 | print('writing image to', out_file)
365 | imsave(out_file, t[:,:,:nbands], vmin = 0, vmax = 255)
366 |
367 | def numpy_to_raster(arr: np.ndarray, mixer: dict, out_file: str, dtype:str):
368 | """
369 | Params
370 | ---
371 | arr: np.ndarray
372 | input (H,W,C) array to be converted to raster
373 | mixer_file: dict
374 | dictionary containing image dimension and spatial reference metadata required by rasterio.write
375 | out_file: str
376 | file path to destination raster file
377 | dtype: str
378 | output dtype accepted by rasterio.write (e.g., 'uint16', 'int32', 'float32', 'float64')
379 |
380 | Return
381 | ---
382 | None: writes raster data to destination file
383 | """
384 | C = arr.shape[0]
385 | meta = {
386 | 'driver':'GTiff',
387 | 'width':mixer['cols'],
388 | 'height':mixer['rows'],
389 | 'count':1,
390 | 'dtype':dtype,
391 | 'transform':rio.Affine(*mixer['transform'][0:6]),
392 | 'crs':mixer['crs'],
393 | 'nodata':255
394 | }
395 | band_list = list(range(1,C+1))
396 | temp_file = out_file.replace(".tif","_temp.tif")
397 | with rio.Env(CHECK_DISK_FREE_SPACE=False):
398 | with rio.open(temp_file, mode = 'w', **meta) as src:
399 | src.write(arr, band_list)
400 | # src.write(arr, 1)
401 | src.close()
402 |
403 | ds = gdal.Open(temp_file)
404 |
405 | options = gdal.TranslateOptions(format = 'COG',creationOptions = ["COMPRESS=LZW"])
406 | ds = gdal.Translate(destName=out_file, srcDS=ds, options=options)
407 | ds = None
408 |
409 | os.remove(temp_file)
410 |
411 | def arrays_to_cog(arrs: list, coords: list, mixer: dict, out_file: str, dtype:str):
412 | """
413 | Params
414 | ---
415 | arr: np.ndarray
416 | input (H,W,C) array to be converted to raster
417 | mixer_file: dict
418 | dictionary containing image dimension and spatial reference metadata required by rasterio.write
419 | out_file: str
420 | file path to destination raster file
421 | dtype: str
422 | output dtype accepted by rasterio.write (e.g., 'uint16', 'int32', 'float32', 'float64')
423 |
424 | Return
425 | ---
426 | None: writes raster data to destination file
427 | """
428 | C = np.load(arrs[0]).shape[-1]
429 | meta = {
430 | 'driver':'GTiff',
431 | 'width':round(mixer['cols']),
432 | 'height':round(mixer['rows']),
433 | 'count':C,
434 | 'dtype':dtype,
435 | 'affine':rio.Affine(*mixer['transform'][0:6]),
436 | 'crs':mixer['crs'],
437 | 'nodata':255
438 | }
439 | band_list = list(range(1,C+1))
440 | temp_file = out_file.replace(".tif","_temp.tif")
441 | with rio.Env(CHECK_DISK_FREE_SPACE=False):
442 | with rio.open(temp_file, mode = 'w', **meta) as dst:
443 | for f in arrs[0:4]:
444 | arr = np.moveaxis(np.load(f), -1, 0)
445 | indices = Path(f).stem.split('_') # X,Y tuple
446 | window = Window(
447 | row_off = int(indices[1]), #Y
448 | col_off = int(indices[0]), #X
449 | width = mixer['size'],
450 | height = mixer['size'])
451 | src.write(arr, window = window, indexes = band_list)
452 |
453 | ds = gdal.Open(temp_file)
454 |
455 | options = gdal.TranslateOptions(format = 'COG',creationOptions = ["COMPRESS=LZW"])
456 | # if we want to write straight to blob, use /vsiaz/container/path
457 | # after setting environmental AZURE_STORAGE_CONNECTION_STRING variable
458 | ds = gdal.Translate(destName=out_file, srcDS=ds, options=options)
459 | ds = None
460 |
461 | os.remove(temp_file)
462 |
463 |
--------------------------------------------------------------------------------
/utils/stats.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from scipy.special import gamma
3 |
4 | def gamma_pdf(x, a, b):
5 | """calculate the pdf of a gamma distribution defined by shape a and scale b
6 | Params
7 | ---
8 | x: float or array
9 | values at which to evaluate the gamma pdf
10 | a: float or array
11 | shape parameter of the gamma distribution
12 | b: float or array
13 | scale parameter of the gamma distribution
14 |
15 | Return
16 | ---
17 | float or array:
18 | probability of x under the gamma distribution with shape a and scale b
19 | """
20 | denom = gamma(a)*(b**a)
21 | num = (x**(a-1))*(np.exp(-1*x/b))
22 | pd = num/denom
23 | return pd
24 |
25 | def lognormal_pdf(x, u, v):
26 | """calculate the pdf of a lognormal distribution defined by mean u and variance v
27 | Params
28 | ---
29 | x: float or array
30 | values at which to evaluate the lognormal pdf
31 | u: float or array
32 | mean of the lognormal distribution
33 | v: float or array
34 | variance of the lognormal distribution
35 |
36 | Return
37 | ---
38 | float or array:
39 | probability of x under the lognormal distribution with mean u and variance v
40 | """
41 | sd = np.sqrt(v)
42 | const = (pi*2)**0.5
43 | first = 1/(sd*const)
44 | edenom = v*2
45 | enum = ((np.log(x) - u)**2)*-1
46 | second = np.exp(enum/edenom)/x
47 | pd = first*second
48 | return pd
49 |
50 |
--------------------------------------------------------------------------------