├── .gitignore ├── .spyproject ├── codestyle.ini ├── encoding.ini ├── vcs.ini └── workspace.ini ├── LICENSE ├── README.md ├── images ├── compVizApp.png └── new ├── notebooks ├── UNET_G4G_2019_Parking.ipynb └── UNET_G4G_2019_solar.ipynb └── utils ├── array_tools.py ├── calibration.py ├── ee_tools.py ├── model_tools.py ├── pc_tools.py ├── prediction_tools.py ├── processing.py ├── raster_tools.py └── stats.py /.gitignore: -------------------------------------------------------------------------------- 1 | .httr-oauth 2 | 3 | # Byte-compiled / optimized / DLL files 4 | *__pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | *.pyc 8 | 9 | # Data directories 10 | data/ 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Azure stuff 16 | *.amlignore 17 | *.amltmp 18 | .ipynb_aml_checkpoints 19 | 20 | # Distribution / packaging 21 | .Python 22 | build/ 23 | develop-eggs/ 24 | dist/ 25 | downloads/ 26 | eggs/ 27 | .eggs/ 28 | lib/ 29 | lib64/ 30 | parts/ 31 | sdist/ 32 | var/ 33 | wheels/ 34 | pip-wheel-metadata/ 35 | share/python-wheels/ 36 | *.egg-info/ 37 | .installed.cfg 38 | *.egg 39 | MANIFEST 40 | 41 | # PyInstaller 42 | # Usually these files are written by a python script from a template 43 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 44 | *.manifest 45 | *.spec 46 | 47 | # Installer logs 48 | pip-log.txt 49 | pip-delete-this-directory.txt 50 | 51 | # Unit test / coverage reports 52 | htmlcov/ 53 | .tox/ 54 | .nox/ 55 | .coverage 56 | .coverage.* 57 | .cache 58 | nosetests.xml 59 | coverage.xml 60 | *.cover 61 | *.py,cover 62 | .hypothesis/ 63 | .pytest_cache/ 64 | 65 | # Translations 66 | *.mo 67 | *.pot 68 | 69 | # Django stuff: 70 | *.log 71 | local_settings.py 72 | db.sqlite3 73 | db.sqlite3-journal 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | .python-version 97 | 98 | # pipenv 99 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 100 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 101 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 102 | # install all needed dependencies. 103 | #Pipfile.lock 104 | 105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 106 | __pypackages__/ 107 | 108 | # Celery stuff 109 | celerybeat-schedule 110 | celerybeat.pid 111 | 112 | # SageMath parsed files 113 | *.sage.py 114 | 115 | # Environments 116 | .env 117 | .venv 118 | env/ 119 | venv/ 120 | ENV/ 121 | env.bak/ 122 | venv.bak/ 123 | 124 | # Spyder project settings 125 | .spyderproject 126 | .spyproject 127 | 128 | # Rope project settings 129 | .ropeproject 130 | 131 | # mkdocs documentation 132 | /site 133 | 134 | # mypy 135 | .mypy_cache/ 136 | .dmypy.json 137 | dmypy.json 138 | 139 | # Pyre type checker 140 | .pyre 141 | -------------------------------------------------------------------------------- /.spyproject/codestyle.ini: -------------------------------------------------------------------------------- 1 | [codestyle] 2 | indentation = True 3 | 4 | [main] 5 | version = 0.1.0 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/encoding.ini: -------------------------------------------------------------------------------- 1 | [encoding] 2 | text_encoding = utf-8 3 | 4 | [main] 5 | version = 0.1.0 6 | 7 | -------------------------------------------------------------------------------- /.spyproject/vcs.ini: -------------------------------------------------------------------------------- 1 | [vcs] 2 | use_version_control = False 3 | version_control_system = 4 | 5 | [main] 6 | version = 0.1.0 7 | 8 | -------------------------------------------------------------------------------- /.spyproject/workspace.ini: -------------------------------------------------------------------------------- 1 | [workspace] 2 | restore_data_on_startup = True 3 | save_data_on_exit = True 4 | save_history = True 5 | save_non_project_files = False 6 | 7 | [main] 8 | version = 0.1.0 9 | recent_files = ['C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\utils\\prediction_tools.py', 'C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\utils\\model_tools.py', 'C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\utils\\processing.py', 'C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\azure\\train_wetland.py', 'C:\\Users\\mevans\\OneDrive - Defenders of Wildlife\\repos\\Satellite_ComputerVision\\azure\\train_landcover.py'] 10 | 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, mjevans26 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Computer Vision with Free Satellite Data 2 | This repository contains code used to produce computer vision models that can identify infrastructure in publicly available satellite imagery. 3 | 4 | ## Organization 5 | The bulk of useful code in this repository is contained in the 'utils' directory. These python files are modules that can be used. They are organized, generally, by the imports they rely on and the kinds of functions they contain. For instance, utils/pc_tools.py imports the planetary computer ecosystem of packages and contains functions and classes for working with data from the MPC. Similarly, model_tools imports tensorflow and keras libraries and contains functions and classes for constructing and training deep learning models using these libraries. 6 | 7 | ## Parking lots 8 | As part of the [Long Island Solar Roadmap](https://solarroadmap.org), we are testing the ability for computer vision models to automate the detection and delineation of parking lots in NAIP satellite imagery. This analysis uses the Deeplab v3 model with a pre-trained ResNet backbone. 9 | 10 | ## Solar arrays 11 | Ground mounted solar arrays are prominent features on the landscape, and their proliferation can be hard to keep up with. The Chesapeake Conservancy trained a computer vision model to detect and delineate solar arrays from Sentinel-2 data. This UNET model can be used to rapidly update the map of solar energy in DE, MD, PA, NY, VA, WV and other eastern states. These outputs were recently published in a [Biological Conservation](https://www.sciencedirect.com/science/article/pii/S0006320723001751) paper. 12 | 13 | ### App 14 | The outputs are available for inspection interactively through a [Google Earth Engine App]([https://defendersofwildlifegis.users.earthengine.app/view/compviz](https://mevans-cic.users.earthengine.app/view/cpksolar)https://mevans-cic.users.earthengine.app/view/cpksolar) 15 | ![App image](/images/compVizApp.png) 16 | -------------------------------------------------------------------------------- /images/compVizApp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mjevans26/Satellite_ComputerVision/9753cedf4403a529503e4bfea3f6f3b9ee68f740/images/compVizApp.png -------------------------------------------------------------------------------- /images/new: -------------------------------------------------------------------------------- 1 | k 2 | -------------------------------------------------------------------------------- /notebooks/UNET_G4G_2019_Parking.ipynb: -------------------------------------------------------------------------------- 1 | {"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"UNET_G4G_2019_Parking.ipynb","provenance":[],"private_outputs":true,"collapsed_sections":[],"toc_visible":true,"machine_shape":"hm"},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"cell_type":"markdown","metadata":{"id":"view-in-github","colab_type":"text"},"source":["\"Open"]},{"cell_type":"code","metadata":{"id":"esIMGVxhDI0f","colab_type":"code","colab":{}},"source":["#@title Copyright 2019 Google LLC. { display-mode: \"form\" }\n","# Licensed under the Apache License, Version 2.0 (the \"License\");\n","# you may not use this file except in compliance with the License.\n","# You may obtain a copy of the License at\n","#\n","# https://www.apache.org/licenses/LICENSE-2.0\n","#\n","# Unless required by applicable law or agreed to in writing, software\n","# distributed under the License is distributed on an \"AS IS\" BASIS,\n","# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n","# See the License for the specific language governing permissions and\n","# limitations under the License."],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"_SHAc5qbiR8l","colab_type":"text"},"source":["# Introduction\n","\n","This is a Google Colab notebook demonstrating the process used to export training, evaluation, and prediction data from Google Earth Engine used to develop a [Deeplab V3](https://arxiv.org/abs/1706.05587) convolutional neural network that delineates ground mounted solar arrays in [NAIP](https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/). Model training and predictions are accomplished in a separate notebook."]},{"cell_type":"markdown","metadata":{"id":"_MJ4kW1pEhwP","colab_type":"text"},"source":["# Setup software libraries\n","\n","Install needed libraries to the notebook VM. Authenticate as necessary."]},{"cell_type":"code","metadata":{"id":"neIa46CpciXq","colab_type":"code","colab":{}},"source":["# Cloud authentication.\n","from google.colab import auth\n","auth.authenticate_user()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"4D6ArFWrckmS","colab_type":"code","colab":{}},"source":["# Earth Engine install to notebook VM.\n","!pip install earthengine-api"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"jat01FEoUMqg","colab_type":"code","colab":{}},"source":["# Import, authenticate and initialize the Earth Engine library.\n","import ee\n","ee.Authenticate()\n","ee.Initialize()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"8RnZzcYhcpsQ","colab_type":"code","colab":{}},"source":["# Tensorflow setup.\n","import tensorflow as tf\n","\n","tf.enable_eager_execution()\n","print(tf.__version__)\n","\n","%load_ext tensorboard"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"n1hFdpBQfyhN","colab_type":"code","colab":{}},"source":["# Folium setup.\n","import folium\n","print(folium.__version__)\n","\n","# Define the URL format used for Earth Engine generated map tiles.\n","EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"WjUgYcsAs9Ed","colab_type":"text"},"source":["##Mount Google Drive"]},{"cell_type":"code","metadata":{"id":"JKDKpX4FtQA1","colab_type":"code","colab":{}},"source":["# Attach specified google drive directory to this notebook\n","from google.colab import drive\n","drive.mount('/content/drive')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"M6pVAfdDIJ-a","colab_type":"code","colab":{}},"source":["%cd '/content/drive/My Drive/repos/Satellite_ComputerVision'"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"iT8ycmzClYwf","colab_type":"text"},"source":["# Variables\n","\n","Declare the variables that will be in use throughout the notebook."]},{"cell_type":"markdown","metadata":{"id":"qKs6HuxOzjMl","colab_type":"text"},"source":["## Specify your Cloud Storage Bucket\n","You must have write access to a bucket to run this demo! To run it read-only, use the demo bucket below, but note that writes to this bucket will not work."]},{"cell_type":"code","metadata":{"id":"obDDH1eDzsch","colab_type":"code","colab":{}},"source":["# This is read-only:\n","BUCKET = 'cvod-203614-mlengine'"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"wmfKLl9XcnGJ","colab_type":"text"},"source":["## Set other global variables"]},{"cell_type":"code","metadata":{"id":"psz7wJKalaoj","colab_type":"code","colab":{}},"source":["# Specify names locations for outputs in Cloud Storage. \n","FOLDER = 'LI_parking'\n","PRED_BASE = 'data/predict'\n","TRAINING_BASE = 'data/training'\n","EVAL_BASE = 'data/eval'\n","MODEL_BASE = 'models'\n","log_dir = 'drive/My Drive/Tensorflow/models/UNET256'\n","\n","# Specify inputs (Landsat bands) to the model and the response variable.\n","opticalBands = ['R', 'G', 'B']\n","thermalBands = ['B8', 'B11', 'B12']\n","pcaBands = ['pc1', 'pc2', 'pc3']\n","BANDS = opticalBands# + thermalBands# + pcaBands\n","RESPONSE = 'landcover'\n","FEATURES = BANDS + [RESPONSE]\n","\n","# Specify the size and shape of patches expected by the model.\n","KERNEL_SIZE = 512\n","KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n","COLUMNS = [\n"," tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n","]\n","FEATURES_DICT = dict(zip(FEATURES, COLUMNS))\n","\n","# Sizes of the training and evaluation datasets.\n","TRAIN_SIZE = 8000\n","EVAL_SIZE = 5000\n","\n","# Specify model training parameters.\n","BATCH_SIZE = 16\n","EPOCHS = 20\n","BUFFER_SIZE = 8000\n","OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)\n","LOSS = 'binary_crossentropy'\n","METRICS = [tf.keras.metrics.categorical_accuracy, tf.keras.metrics.MeanIoU(num_classes=2)]"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hgoDc7Hilfc4","colab_type":"text"},"source":["# Imagery\n","\n","Gather and setup the imagery to use for inputs (predictors). This is a three-year, cloud-free, Landsat 8 composite. Display it in the notebook for a sanity check."]},{"cell_type":"code","metadata":{"id":"-IlgXu-vcUEY","colab_type":"code","colab":{}},"source":["# Use Landsat 8 surface reflectance data.\n","NAIP = ee.ImageCollection(\"USDA/NAIP/DOQQ\")\n","towns = ee.FeatureCollection(\"users/defendersofwildlifeGIS/LongIsland/towns\")\n","\n","begin = '2017-01-01'\n","end = '2017-12-30'\n","\n","# The image input data is a cloud-masked median composite.\n","image = NAIP.filterDate(begin, end)\\\n",".filterBounds(towns)\\\n",".filterDate(begin, end)\\\n",".median()\\\n",".select(BANDS)\\\n",".clip(towns)\n","\n","# Use folium to visualize the imagery.\n","mapid = image.getMapId({'bands': ['R', 'G', 'B'], 'min': 0, 'max': 256})\n","map = folium.Map(location=[40.8175, -73.195])\n","folium.TileLayer(\n"," tiles=EE_TILES.format(**mapid),\n"," attr='Google Earth Engine',\n"," overlay=True,\n"," name='median composite',\n"," ).add_to(map)\n","\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"gHznnctkJsZJ","colab_type":"text"},"source":["Prepare the response (what we want to predict). This is impervious surface area (in fraction of a pixel) from the 2016 NLCD dataset. Display to check."]},{"cell_type":"code","metadata":{"id":"5Wxz9BPYHBwh","colab_type":"code","colab":{}},"source":["def set_landcover(ft):\n"," return ft.set('label', 1)\n","\n","nassauParkingFootprints = ee.FeatureCollection(\"users/defendersofwildlifeGIS/LongIsland/NassauParking\")\n","suffolkParkingFootprints = ee.FeatureCollection('users/defendersofwildlifeGIS/LongIsland/SuffolkParking')\n","parkingFootprints = nassauParkingFootprints.merge(suffolkParkingFootprints)\n","parking = parkingFootprints.map(set_landcover)\n","blankimg = ee.Image.constant(0)\n","parking_footprint = parking.reduceToImage(['label'], ee.Reducer.first())\n","labelimg = blankimg.where(parking_footprint, parking_footprint).rename('landcover')\n","\n","mapid = labelimg.getMapId({'bands': 'landcover', 'min':0, 'max': 1})\n","print(mapid)\n","map = folium.Map(location = [40.8175, -73.195])\n","folium.TileLayer(\n"," tiles = EE_TILES.format(**mapid),\n"," attr='Google Earth Engine',\n"," overlay = True,\n"," name = 'parking lots',\n",").add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"CTS7_ZzPDhhg","colab_type":"text"},"source":["Stack the 2D images (Landsat composite and NLCD impervious surface) to create a single image from which samples can be taken. Convert the image into an array image in which each pixel stores 256x256 patches of pixels for each band. This is a key step that bears emphasis: to export training patches, convert a multi-band image to [an array image](https://developers.google.com/earth-engine/arrays_array_images#array-images) using [`neighborhoodToArray()`](https://developers.google.com/earth-engine/api_docs#eeimageneighborhoodtoarray), then sample the image at points."]},{"cell_type":"code","metadata":{"id":"eGHYsdAOipa4","colab_type":"code","colab":{}},"source":["featureStack = ee.Image.cat([\n"," image.select(BANDS),\n"," labelimg.select(RESPONSE)\n","]).float()\n","\n","print(featureStack.bandNames().getInfo())\n","\n","list = ee.List.repeat(1, KERNEL_SIZE)\n","lists = ee.List.repeat(list, KERNEL_SIZE)\n","kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)\n","\n","arrays = featureStack.neighborhoodToArray(kernel)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"F4djSxBRG2el","colab_type":"text"},"source":["Use some pre-made geometries to sample the stack in strategic locations. Specifically, these are hand-made polygons in which to take the 256x256 samples. Display the sampling polygons on a map, red for training polygons, blue for evaluation."]},{"cell_type":"code","metadata":{"id":"ure_WaD0itQY","colab_type":"code","colab":{}},"source":["import re\n","towns = towns.randomColumn('random', 52.0)\n","townList = ee.List(towns.aggregate_array('TOWN')).distinct()\n","townList = [townList.get(town).getInfo() for town in range(townList.size().getInfo())]\n","townList = [town for town in townList if not re.search(r\"City|Water|Indian\", town)]\n","trainList = townList[0:(len(townList)//10) * 8]\n","evalList = townList[(len(townList)//10) * 8:]\n","\n","trainFilter = ee.Filter.inList('TOWN', ee.List(trainList))\n","evalFilter = ee.Filter.inList(\"TOWN\", ee.List(evalList))\n","\n","trainingPolys = towns.filter(trainFilter)\n","print('training size', len(trainList))\n","\n","evalPolys = towns.filter(evalFilter)\n","print('eval size', len(evalList))\n","\n","polyImage = ee.Image(0).byte().paint(trainingPolys, 1).paint(evalPolys, 2)\n","polyImage = polyImage.updateMask(polyImage)\n","\n","mapid = polyImage.getMapId({'min': 1, 'max': 2, 'palette': ['red', 'blue']})\n","map = folium.Map(location=[40.8175, -73.195], zoom_start=8)\n","folium.TileLayer(\n"," tiles=EE_TILES.format(**mapid),\n"," attr='Google Earth Engine',\n"," overlay=True,\n"," name='training polygons',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"ZV890gPHeZqz","colab_type":"text"},"source":["# Sampling\n","\n","The mapped data look reasonable so take a sample from each polygon and merge the results into a single export. The key step is sampling the array image at points, to get all the pixels in a 256x256 neighborhood at each point. It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record. You do NOT need to export each training/testing patch to a different image. Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error. Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export."]},{"cell_type":"code","metadata":{"id":"FyRpvwENxE-A","colab_type":"code","cellView":"code","colab":{}},"source":["#@title Don't run\n","# Convert the feature collections to lists for iteration.\n","#trainingPolysList = trainingPolys.toList(trainingPolys.size())\n","#evalPolysList = trainingPolys.toList(trainingPolys.size())\n","\n","# These numbers determined experimentally.\n","n = 100 # Number of shards in each town.\n","N = 1000 # Total sample size in each town.\n","\n","for town in trainList:\n"," geomSample = ee.FeatureCollection([])\n"," for i in range (n):\n"," sample = arrays.sample(\n"," region = trainingPolys.filterMetadata('TOWN', 'equals', town),\n"," scale = 1,\n"," numPixels = N/n,\n"," seed = i,\n"," tileScale = 8\n"," )\n"," geomSample = geomSample.merge(sample)\n"," \n"," desc = 'DeepLab_' + str(KERNEL_SIZE) + '_NAIP_' + town\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = geomSample,\n"," description = desc,\n"," bucket = BUCKET,\n"," fileNamePrefix = FOLDER + '/' + TRAINING_BASE + '/' + desc,\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()\n"," \n","for town in evalList:\n"," geomSample = ee.FeatureCollection([])\n"," for i in range(n):\n"," sample = arrays.sample(\n"," region = evalPolys.filterMetadata('TOWN', 'equals', town),\n"," scale = 1,\n"," numPixels = N/n,\n"," seed = i,\n"," tileScale = 8\n"," )\n"," geomSample = geomSample.merge(sample)\n"," \n"," desc = 'DeepLab_' + str(KERNEL_SIZE) + 'NAIP_' + town\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = geomSample,\n"," description = desc,\n"," bucket = BUCKET,\n"," fileNamePrefix = FOLDER + '/' + EVAL_BASE + '/' + desc,\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()\n"," \n","#Export all the training data (in many pieces), with one task \n","#per geometry.\n","# for g in range(trainingPolys.size().getInfo()):\n","# geomSample = ee.FeatureCollection([])\n","# for i in range(n):\n","# sample = arrays.sample(\n","# region = ee.Feature(trainingPolysList.get(g)).geometry(), \n","# scale = 30, \n","# numPixels = N / n, # Size of the shard.\n","# seed = i,\n","# tileScale = 8\n","# )\n","# geomSample = geomSample.merge(sample)\n"," \n","# desc = TRAINING_BASE + '_g' + str(g)\n","# task = ee.batch.Export.table.toCloudStorage(\n","# collection = geomSample,\n","# description = desc, \n","# bucket = BUCKET, \n","# fileNamePrefix = FOLDER + '/' + desc,\n","# fileFormat = 'TFRecord',\n","# selectors = BANDS + [RESPONSE]\n","# )\n","# task.start()\n","\n","# # Export all the evaluation data.\n","# for g in range(evalPolys.size().getInfo()):\n","# geomSample = ee.FeatureCollection([])\n","# for i in range(n):\n","# sample = arrays.sample(\n","# region = ee.Feature(evalPolysList.get(g)).geometry(), \n","# scale = 30, \n","# numPixels = N / n,\n","# seed = i,\n","# tileScale = 8\n","# )\n","# geomSample = geomSample.merge(sample)\n"," \n","# desc = EVAL_BASE + '_g' + str(g)\n","# task = ee.batch.Export.table.toCloudStorage(\n","# collection = geomSample,\n","# description = desc, \n","# bucket = BUCKET, \n","# fileNamePrefix = FOLDER + '/' + desc,\n","# fileFormat = 'TFRecord',\n","# selectors = BANDS + [RESPONSE]\n","# )\n","# task.start()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"dk51-l7MH2Sa","colab_type":"text"},"source":["##Preprocessing\n","Define functions that apply random manipulations to our training data"]},{"cell_type":"code","metadata":{"id":"ajyp48-vINuy","colab_type":"code","colab":{}},"source":["def augColor(x):\n"," \"\"\"Color augmentation\n","\n"," Args:\n"," x: Image\n","\n"," Returns:\n"," Augmented image\n"," \"\"\"\n"," x = tf.image.random_hue(x, 0.08)\n"," x = tf.image.random_saturation(x, 0.6, 1.6)\n"," x = tf.image.random_brightness(x, 0.05)\n"," x = tf.image.random_contrast(x, 0.7, 1.3)\n"," return x\n"," \n"," \n","def augImg(img):\n"," outDims = tf.shape(img)[0:1]\n"," x = tf.image.random_flip_left_right(img)\n"," x = tf.image.random_flip_up_down(x)\n"," x = rotated = tf.image.rot90(x, tf.random_uniform(shape=[], minval=0, maxval=4, dtype=tf.int32))\n"," #x = zoom(x, outDims)\n"," #since were gonna map_fn this on a 4d image, output must be 3d, so squeeze the artificial 'sample' dimension\n"," return tf.squeeze(x)\n","\n","def preprocess(img, labels):\n"," dims = tf.shape(img)\n"," #need to combine labels and bands for morphological transformations\n"," comb = tf.concat([img, tf.expand_dims(labels, axis = 2)], axis = 2)\n"," aug = aug_img(comb)\n"," #aug = tf.map_fn(fn = aug_img, elems = comb)\n"," labels = tf.squeeze(aug[:, :, -1])\n"," band_stack = color(aug[:, :, 0:dims[2]])\n"," return band_stack, labels"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rWXrvBE4607G","colab_type":"text"},"source":["# Training data\n","\n","Load the data exported from Earth Engine into a `tf.data.Dataset`. The following are helper functions for that."]},{"cell_type":"code","metadata":{"id":"WWZ0UXCVMyJP","colab_type":"code","colab":{}},"source":["def parse_tfrecord(example_proto):\n"," \"\"\"The parsing function.\n"," Read a serialized example into the structure defined by FEATURES_DICT.\n"," Args:\n"," example_proto: a serialized Example.\n"," Returns: \n"," A dictionary of tensors, keyed by feature name.\n"," \"\"\"\n"," return tf.io.parse_single_example(example_proto, FEATURES_DICT)\n","\n","\n","def to_tuple(inputs):\n"," \"\"\"Function to convert a dictionary of tensors to a tuple of (inputs, outputs).\n"," Turn the tensors returned by parse_tfrecord into a stack in HWC shape.\n"," Args:\n"," inputs: A dictionary of tensors, keyed by feature name.\n"," Returns: \n"," A dtuple of (inputs, outputs).\n"," \"\"\"\n"," inputsList = [inputs.get(key) for key in FEATURES]\n"," stacked = tf.stack(inputsList, axis=0)\n"," # Convert from CHW to HWC\n"," stacked = tf.transpose(stacked, [1, 2, 0])\n"," stacked = augImg(stacked)\n"," return stacked[:,:,:len(BANDS)], stacked[:,:,len(BANDS):]\n","\n","\n","def get_dataset(pattern):\n"," \"\"\"Function to read, parse and format to tuple a set of input tfrecord files.\n"," Get all the files matching the pattern, parse and convert to tuple.\n"," Args:\n"," pattern: A file pattern to match in a Cloud Storage bucket.\n"," Returns: \n"," A tf.data.Dataset\n"," \"\"\"\n"," glob = tf.gfile.Glob(pattern)\n"," dataset = tf.data.TFRecordDataset(glob, compression_type='GZIP')\n"," dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)\n"," dataset = dataset.map(to_tuple, num_parallel_calls=5)\n"," return dataset"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"Xg1fa18336D2","colab_type":"text"},"source":["Use the helpers to read in the training dataset. Print the first record to check."]},{"cell_type":"code","metadata":{"id":"rm0qRF0fAYcC","colab_type":"code","colab":{}},"source":["def get_training_dataset():\n","\t\"\"\"Get the preprocessed training dataset\n"," Returns: \n"," A tf.data.Dataset of training data.\n"," \"\"\"\n","\tglob = 'gs://' + BUCKET + '/' + FOLDER + '/' + TRAINING_BASE + '/*'\n","\tprint(glob)\n","\tdataset = get_dataset(glob)\n","\tdataset = dataset.shuffle(8000).batch(BATCH_SIZE).repeat()\n","\treturn dataset\n","\n","training = get_training_dataset()\n","\n","print(iter(training.take(1)).next())"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"7CRGG26bYWQZ","colab_type":"code","colab":{}},"source":["print(iter(training.take(1)).next())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"j-cQO5RL6vob","colab_type":"text"},"source":["# Evaluation data\n","\n","Now do the same thing to get an evaluation dataset. Note that unlike the training dataset, the evaluation dataset has a batch size of 1, is not repeated and is not shuffled."]},{"cell_type":"code","metadata":{"id":"ieKTCGiJ6xzo","colab_type":"code","colab":{}},"source":["def get_eval_dataset():\n","\t\"\"\"Get the preprocessed evaluation dataset\n"," Returns: \n"," A tf.data.Dataset of evaluation data.\n"," \"\"\"\n","\tglob = 'gs://' + BUCKET + '/' + FOLDER + '/' + EVAL_BASE + '/*'\n","\tprint(glob)\n","\tdataset = get_dataset(glob)\n","\tdataset = dataset.batch(1).repeat()\n","\treturn dataset\n","\n","evaluation = get_eval_dataset()"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"keoalUvBbSkh","colab_type":"code","colab":{}},"source":["print(iter(evaluation.take(1)).next())"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"9JIE7Yl87lgU","colab_type":"text"},"source":["# Model\n","\n","Here we use the Keras implementation of the U-Net model as found [in the TensorFlow examples](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb). The U-Net model takes 256x256 pixel patches as input and outputs per-pixel class probability, label or a continuous output. We can implement the model essentially unmodified, but will use mean squared error loss on the sigmoidal output since we are treating this as a regression problem, rather than a classification problem. Since impervious surface fraction is constrained to [0,1], with many values close to zero or one, a saturating activation function is suitable here."]},{"cell_type":"markdown","metadata":{"id":"Xh2EZyyPu84H","colab_type":"text"},"source":["##Metrics"]},{"cell_type":"code","metadata":{"id":"mISCOXUHu7G_","colab_type":"code","colab":{}},"source":["def weighted_bce(y_true, y_pred):\n"," bce = tf.nn.weighted_cross_entropy_with_logits(labels = y_true, logits = y_pred, pos_weight = 20)\n"," return tf.reduce_mean(bce)\n","\n","def iou(true, pred):\n","\n"," intersection = true * pred\n","\n"," notTrue = 1 - true\n"," union = true + (notTrue * pred)\n","\n"," return tf.reduce_sum(intersection)/tf.reduce_sum(union)\n","\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"wsnnnz56yS3l","colab_type":"code","colab":{}},"source":["from tensorflow.python.keras import layers\n","from tensorflow.python.keras import losses\n","from tensorflow.python.keras import models\n","from tensorflow.python.keras import metrics\n","from tensorflow.python.keras import optimizers\n","\n","def conv_block(input_tensor, num_filters):\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\treturn encoder\n","\n","def encoder_block(input_tensor, num_filters):\n","\tencoder = conv_block(input_tensor, num_filters)\n","\tencoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)\n","\treturn encoder_pool, encoder\n","\n","def decoder_block(input_tensor, concat_tensor, num_filters):\n","\tdecoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)\n","\tdecoder = layers.concatenate([concat_tensor, decoder], axis=-1)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\treturn decoder\n","\n","def get_model():\n","\tinputs = layers.Input(shape=[None, None, len(BANDS)]) # 256\n","\tencoder0_pool, encoder0 = encoder_block(inputs, 32) # 128\n","\tencoder1_pool, encoder1 = encoder_block(encoder0_pool, 64) # 64\n","\tencoder2_pool, encoder2 = encoder_block(encoder1_pool, 128) # 32\n","\tencoder3_pool, encoder3 = encoder_block(encoder2_pool, 256) # 16\n","\tencoder4_pool, encoder4 = encoder_block(encoder3_pool, 512) # 8\n","\tcenter = conv_block(encoder4_pool, 1024) # center\n","\tdecoder4 = decoder_block(center, encoder4, 512) # 16\n","\tdecoder3 = decoder_block(decoder4, encoder3, 256) # 32\n","\tdecoder2 = decoder_block(decoder3, encoder2, 128) # 64\n","\tdecoder1 = decoder_block(decoder2, encoder1, 64) # 128\n","\tdecoder0 = decoder_block(decoder1, encoder0, 32) # 256\n","\toutputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)\n","\n","\tmodel = models.Model(inputs=[inputs], outputs=[outputs])\n","\n","\tmodel.compile(\n","\t\toptimizer=OPTIMIZER, \n"," loss = weighted_bce,\n","\t\t#loss=losses.get(LOSS),\n","\t\tmetrics=[metrics.get(metric) for metric in METRICS])\n","\n","\treturn model\n","\n","\n","log_dir = 'drive/My Drive/Tensorflow/models/UNET256'\n","\n","checkpoint = tf.keras.callbacks.ModelCheckpoint(\n"," log_dir+'best_weights.hdf5',\n"," monitor='val_mean_io_u',\n"," verbose=1,\n"," save_best_only=True,\n"," mode='max'\n"," )"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uu_E7OTDBCoS","colab_type":"text"},"source":["# Training the model\n","\n","You train a Keras model by calling `.fit()` on it. Here we're going to train for 10 epochs, which is suitable for demonstration purposes. For production use, you probably want to optimize this parameter, for example through [hyperparamter tuning](https://cloud.google.com/ml-engine/docs/tensorflow/using-hyperparameter-tuning)."]},{"cell_type":"code","metadata":{"id":"NzzaWxOhSxBy","colab_type":"code","colab":{}},"source":["m = get_model()\n","\n","\n","m.fit(\n"," x=training, \n"," epochs=EPOCHS, \n"," steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n"," validation_data=evaluation,\n"," validation_steps=EVAL_SIZE/BATCH_SIZE,\n"," callbacks = [checkpoint]\n"," )\n","\n","m.save('drive/My Drive/Tensorflow/models/UNET256/UNET256.h5')\n","\n","#!gsutil cp best_weights.hdf5 gs://cvod-203614-mlengine/NC_solar/models/UNET256/best_weights.hdf5\n","#!gsutil cp UNET256.h5 gs://cvod-203614-mlengine/NC_solar/models/UNET256/UNET256.h5"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"U2XrwZHp66j4","colab_type":"text"},"source":["Note that the notebook VM is sometimes not heavy-duty enough to get through a whole training job, especially if you have a large buffer size or a large number of epochs. You can still use this notebook for training, but may need to set up an alternative VM ([learn more](https://research.google.com/colaboratory/local-runtimes.html)) for production use. Alternatively, you can package your code for running large training jobs on Google's AI Platform [as described here](https://cloud.google.com/ml-engine/docs/tensorflow/trainer-considerations). The following code loads a pre-trained model, which you can use for predictions right away."]},{"cell_type":"markdown","metadata":{"id":"zvIqqpNXqJSE","colab_type":"text"},"source":["##Load model and resume training"]},{"cell_type":"code","metadata":{"id":"q0xgBhsaqInV","colab_type":"code","colab":{}},"source":["#bring in the architecture and best weights from GCS\n","m = models.load_model('drive/My Drive/Tensorflow/models/UNET256/UNET256.h5', custom_objects={'weighted_bce': weighted_bce})\n","m.load_weights('drive/My Drive/Tensorflow/models/UNET256/best_weights.hdf5') \n","\n","#lets see where were at\n","evalMetrics = m.evaluate(x=evaluation, steps = EVAL_SIZE, verbose = 1)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"xlsFciElxOUA","colab_type":"code","colab":{}},"source":["#set the monitored value (val_mean_io_u) to current evaluation output\n","checkpoint = tf.keras.callbacks.ModelCheckpoint(\n"," log_dir+'best_weights.hdf5',\n"," monitor='val_mean_io_u',\n"," verbose=1,\n"," save_best_only=True,\n"," mode='max'\n"," )\n","\n","checkpoint.best = evalMetrics[2]\n","print(checkpoint.__dict__)\n","print(checkpoint.best)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"7eq0aLlw864A","colab_type":"text"},"source":["## Set up tensorboard"]},{"cell_type":"code","metadata":{"id":"PA2gJENE8-J1","colab_type":"code","colab":{}},"source":["tensorboard = tf.keras.callbacks.TensorBoard(log_dir= 'drive/My Drive/Tensorflow/models/UNET256')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Ty8wCxDtqWBM","colab_type":"code","colab":{}},"source":["#Now keep training!\n","m.fit(\n"," x=training, \n"," epochs= 10, \n"," steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n"," validation_data=evaluation,\n"," validation_steps=EVAL_SIZE/BATCH_SIZE,\n"," callbacks = [checkpoint, tensorboard]\n"," )\n","#m.save('drive/My Drive/Tensorflow/models/UNET256/UNET256.h5')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"tyhWcGHJ82e8","colab_type":"code","colab":{}},"source":["m.save('drive/My Drive/Tensorflow/models/UNET256/UNET256.h5')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"i9OM5BiS1xYQ","colab_type":"code","colab":{}},"source":["%tensorboard --logdir 'drive/My Drive/Tensorflow/models/UNET256'"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"-RJpNfEUS1qp","colab_type":"code","colab":{}},"source":["# Load a trained model. 50 epochs. 25 hours. Final RMSE ~0.08.\n","MODEL_DIR = BUCKET + '/' + FOLDER + '/' + 'models/UNET256'\n","m = tf.contrib.saved_model.load_keras_model(MODEL_DIR)\n","m.summary()"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"J1ySNup0xCqN","colab_type":"text"},"source":["# Prediction\n","\n","The prediction pipeline is:\n","\n","1. Export imagery on which to do predictions from Earth Engine in TFRecord format to a Cloud Storge bucket.\n","2. Use the trained model to make the predictions.\n","3. Write the predictions to a TFRecord file in a Cloud Storage.\n","4. Upload the predictions TFRecord file to Earth Engine.\n","\n","The following functions handle this process. It's useful to separate the export from the predictions so that you can experiment with different models without running the export every time."]},{"cell_type":"code","metadata":{"id":"lv6nb0ShH4_T","colab_type":"code","colab":{}},"source":["#Inspect the prediction outputs\n","predictions = m.predict(evaluation, steps=1, verbose=1)\n","for prediction in predictions:\n"," print(predictions)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"M3WDAa-RUpXP","colab_type":"code","colab":{}},"source":["def doExport(image, out_image_base, kernel_buffer, region):\n"," \"\"\"Run the image export task. Block until complete.\n"," \"\"\"\n"," task = ee.batch.Export.image.toCloudStorage(\n"," image = image.select(BANDS+[RESPONSE]), \n"," description = out_image_base, \n"," bucket = BUCKET, \n"," fileNamePrefix = FOLDER + '/' + PRED_BASE + '/' + out_image_base, \n"," region = region.getInfo()['coordinates'], \n"," scale = 1, \n"," fileFormat = 'TFRecord', \n"," maxPixels = 1e10,\n"," formatOptions = { \n"," 'patchDimensions': KERNEL_SHAPE,\n"," 'kernelSize': kernel_buffer,\n"," 'compressed': True,\n"," 'maxFileSize': 104857600\n"," }\n"," )\n"," task.start()\n","\n"," # Block until the task completes.\n"," print('Running image export to Cloud Storage...')\n"," import time\n"," while task.active():\n"," time.sleep(30)\n","\n"," # Error condition\n"," if task.status()['state'] != 'COMPLETED':\n"," print('Error with image export.')\n"," else:\n"," print('Image export completed.')"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"zb_9_FflygVw","colab_type":"code","colab":{}},"source":["def doPrediction(out_image_base, user_folder, kernel_buffer, region):\n"," \"\"\"Perform inference on exported imagery, upload to Earth Engine.\n"," \"\"\"\n","\n"," print('Looking for TFRecord files...')\n"," \n"," # Get a list of all the files in the output bucket.\n"," filesList = !gsutil ls 'gs://'{BUCKET}'/'{FOLDER}'/'{PRED_BASE}\n"," # Get only the files generated by the image export.\n"," exportFilesList = [s for s in filesList if out_image_base in s]\n","\n"," # Get the list of image files and the JSON mixer file.\n"," imageFilesList = []\n"," jsonFile = None\n"," for f in exportFilesList:\n"," if f.endswith('.tfrecord.gz'):\n"," imageFilesList.append(f)\n"," elif f.endswith('.json'):\n"," jsonFile = f\n","\n"," # Make sure the files are in the right order.\n"," imageFilesList.sort()\n","\n"," from pprint import pprint\n"," pprint(imageFilesList)\n"," print(jsonFile)\n"," \n"," import json\n"," # Load the contents of the mixer file to a JSON object.\n"," jsonText = !gsutil cat {jsonFile}\n"," # Get a single string w/ newlines from the IPython.utils.text.SList\n"," mixer = json.loads(jsonText.nlstr)\n"," pprint(mixer)\n"," patches = mixer['totalPatches']\n"," \n"," # Get set up for prediction.\n"," x_buffer = int(kernel_buffer[0] / 2)\n"," y_buffer = int(kernel_buffer[1] / 2)\n","\n"," buffered_shape = [\n"," KERNEL_SHAPE[0] + kernel_buffer[0],\n"," KERNEL_SHAPE[1] + kernel_buffer[1]]\n","\n"," imageColumns = [\n"," tf.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) \n"," for k in BANDS\n"," ]\n","\n"," imageFeaturesDict = dict(zip(BANDS, imageColumns))\n","\n"," def parse_image(example_proto):\n"," return tf.parse_single_example(example_proto, imageFeaturesDict)\n","\n"," def toTupleImage(dict):\n"," inputsList = [dict.get(key) for key in BANDS]\n"," stacked = tf.stack(inputsList, axis=0)\n"," stacked = tf.transpose(stacked, [1, 2, 0])\n"," return stacked\n"," \n"," # Create a dataset from the TFRecord file(s) in Cloud Storage.\n"," imageDataset = tf.data.TFRecordDataset(imageFilesList, compression_type='GZIP')\n"," imageDataset = imageDataset.map(parse_image, num_parallel_calls=5)\n"," imageDataset = imageDataset.map(toTupleImage).batch(1)\n"," \n"," # Perform inference.\n"," print('Running predictions...')\n"," predictions = m.predict(imageDataset, steps=patches, verbose=1)\n"," # print(predictions[0])\n","\n"," print('Writing predictions...')\n"," out_image_file = 'gs://' + BUCKET + '/' + FOLDER + '/' + PRED_BASE + '/outputs/' + out_image_base + '.TFRecord'\n"," writer = tf.python_io.TFRecordWriter(out_image_file)\n"," patches = 0\n"," for predictionPatch in predictions:\n"," print('Writing patch ' + str(patches) + '...')\n"," predictionPatch = predictionPatch[\n"," x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]\n","\n"," # Create an example.\n"," example = tf.train.Example(\n"," features=tf.train.Features(\n"," feature={\n"," 'probability': tf.train.Feature(\n"," float_list=tf.train.FloatList(\n"," value=predictionPatch.flatten()))\n"," }\n"," )\n"," )\n"," # Write the example.\n"," writer.write(example.SerializeToString())\n"," patches += 1\n","\n"," writer.close()\n","\n"," # Start the upload.\n"," out_image_asset = user_folder + '/' + out_image_base\n"," !earthengine upload image --asset_id={out_image_asset} {out_image_file} {jsonFile}"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"LZqlymOehnQO","colab_type":"text"},"source":["Now there's all the code needed to run the prediction pipeline, all that remains is to specify the output region in which to do the prediction, the names of the output files, where to put them, and the shape of the outputs. In terms of the shape, the model is trained on 256x256 patches, but can work (in theory) on any patch that's big enough with even dimensions ([reference](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf)). Because of tile boundary artifacts, give the model slightly larger patches for prediction, then clip out the middle 256x256 patch. This is controlled with a kernel buffer, half the size of which will extend beyond the kernel buffer. For example, specifying a 128x128 kernel will append 64 pixels on each side of the patch, to ensure that the pixels in the output are taken from inputs completely covered by the kernel. "]},{"cell_type":"code","metadata":{"id":"FPANwc7B1-TS","colab_type":"code","colab":{}},"source":["# This has a read-only asset in it:\n","user_folder = 'users/defendersofwildlifeGIS'\n","\n","# Base file name to use for TFRecord files and assets.\n","li_image_base = 'li_parking_deeplab512Pred'\n","# Half this will extend on the sides of each patch.\n","li_kernel_buffer = [256, 256]\n","# Huntington\n","li_region = ee.Feature(towns.filterMetadata(\"TOWN\", 'equals', 'Huntington').first()).geometry()\n","print(li_region.area().getInfo())"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"lLNEOLkXWvSi","colab_type":"code","cellView":"both","colab":{}},"source":["#@title Don't run\n","# Run the export.\n","doExport(featureStack, li_image_base, li_kernel_buffer, li_region)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"KxACnxKFrQ_J","colab_type":"code","cellView":"both","colab":{}},"source":["#@title Don't run\n","# Run the prediction.\n","doPrediction(nc_image_base, user_folder, nc_kernel_buffer, nc_region)"],"execution_count":0,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"uj_G9OZ1xH6K","colab_type":"text"},"source":["# Display the output\n","\n","One the data has been exported, the model has made predictions and the predictions have been written to a file, and the image imported to Earth Engine, it's possible to display the resultant Earth Engine asset. Here, display the impervious area predictions over Beijing, China."]},{"cell_type":"code","metadata":{"id":"Jgco6HJ4R5p2","colab_type":"code","colab":{}},"source":["out_image = ee.Image(user_folder + '/' + bj_image_base)\n","mapid = out_image.getMapId({'min': 0, 'max': 1})\n","map = folium.Map(location=[39.898, 116.5097])\n","folium.TileLayer(\n"," tiles=EE_TILES.format(**mapid),\n"," attr='Google Earth Engine',\n"," overlay=True,\n"," name='predicted impervious',\n"," ).add_to(map)\n","map.add_child(folium.LayerControl())\n","map"],"execution_count":0,"outputs":[]}]} 2 | -------------------------------------------------------------------------------- /utils/array_tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Mar 226 10:50:44 2023 4 | 5 | @author: MEvans 6 | """ 7 | 8 | import numpy as np 9 | import math 10 | from random import shuffle, randint, uniform 11 | 12 | def make_harmonics(times: np.ndarray, timesteps, dims): 13 | """Create arrays of sin and cos representations of time 14 | Parameters: 15 | times (np.ndarray): 1D array of start times 16 | timesteps (int): number of annual timesteps 17 | dims (tpl): H, W dimensions of output data 18 | Returns: 19 | np.ndarray: 4D array (B, (dims), 2) with 20 | """ 21 | xys = [sin_cos(time, timesteps) for time in times] # use the T dimension to get number of intervals 22 | # r = deg_to_radians(lat) # convert latitude to radians 23 | out = np.stack([np.stack([np.full(dims, x), np.full(dims, y)], axis = -1) for x,y in xys], axis = 0) 24 | return out 25 | 26 | def merge_classes(cond_array, trans, out_array): 27 | """Reclassify categorical array values 28 | Parameters 29 | --- 30 | cond_array: np.ndarray 31 | array with values to be evaluated by conditional expression 32 | trans: list[tpl] 33 | tuples containing condition and value to return where true 34 | array: np.ndarray 35 | array to be returned where condition false 36 | Returns 37 | --- 38 | np.darray 39 | reclassified array same shape and size as input 40 | """ 41 | output = np.copy(out_array) 42 | for x,y in trans: 43 | output[cond_array == x] = y 44 | return output 45 | 46 | 47 | def normalize_array(img, axes=[2], epsilon=1e-8, moments = None, splits = None): 48 | """ 49 | Standardize incoming image patches by mean and variance. 50 | 51 | Moments can be calculated based on patch data by providing axes: 52 | To standardize each pixel use axes = [2] 53 | To standardize each channel use axes = [0, 1] 54 | To standardize globally use axes = [0, 1, 2] 55 | 56 | To standardize by global, or per-channel moments supply a list of [mean, variance] tuples. 57 | To standardize groups of channels separately, identify the size of each group. Groups of 58 | channels must be stacked contiguously and group sizes must sum to the total # of channels 59 | 60 | Parameters 61 | --- 62 | img: np.ndarray 63 | nD image (usually 3d) to be normalized 64 | axes: list: int 65 | Array of ints. Axes along which to compute mean and variance, usually length n-1 66 | epsilon: float 67 | small number to avoid dividing by zero 68 | moments: list:tpl:int 69 | list of global mean, std tuples for standardization 70 | splits: list:int 71 | size(s) of groups of features to be kept together 72 | Return: 73 | tensor: nD image tensor normalized by channels 74 | """ 75 | 76 | # define a basic function to normalize a 3d tensor 77 | def normalize(img): 78 | # shape = tf.shape(x).numpy() 79 | # if we've defined global or per-channel moments... 80 | if moments: 81 | # cast moments to arrays for mean and variance 82 | mean = np.array([tpl[0] for tpl in moments], dtype = 'float32') 83 | std = np.array([tpl[1] for tpl in moments], dtype = 'float32') 84 | # otherwise, calculate moments along provided axes 85 | else: 86 | mean = np.nanmean(img, axes, keepdims = True) 87 | std = np.nanstd(img, axes, keepdims = True) 88 | # keepdims = True to ensure compatibility with input tensor 89 | 90 | # normalize the input tensor 91 | normed = (img - mean)/(std + epsilon) 92 | return normed 93 | 94 | # if splits are given, apply tensor normalization to each split 95 | if splits: 96 | splitLen = sum(splits) 97 | toNorm = img[:,:,0:splitLen] 98 | dontNorm = img[:,:,splitLen:] 99 | arrays = np.split(toNorm, splits, axis = -1) 100 | normed = [normalize(array) for array in arrays] 101 | normed.append(dontNorm) 102 | # gather normalized splits into single tensor 103 | img_normed = np.concatenate(normed, axis = -1) 104 | else: 105 | img_normed = normalize(img) 106 | 107 | return img_normed 108 | 109 | def rescale_array(img, axes = -1, epsilon=1e-8, moments = None, splits = None): 110 | """ 111 | Rescale incoming image patch to [0,1] based on min and max values 112 | 113 | Min, max can be calculated based on patch data by providing axes: 114 | To rescale each pixel use axes = [2] 115 | To rescale each channel use axes = [0, 1] 116 | To rescale globally use axes = [0, 1, 2] 117 | 118 | To rescale by global, or per-channel moments supply a list of [mean, variance] tuples. 119 | To rescale groups of channels separately, identify the size of each group. Groups of 120 | channels must be stacked contiguously and group sizes must sum to the total # of channels 121 | 122 | Parameters 123 | --- 124 | img: np.ndarray 125 | array to be rescaled, usually 3D (H,W,C) 126 | axes: list: int 127 | Array of ints. Axes along which to compute mean and variance, usually length n-1 128 | epsilon: float 129 | small number to avoid dividing by zero 130 | moments: list:tpl:int 131 | optional, list of global mean, std tuples for standardization 132 | splits: list:int 133 | optional, size(s) of groups of features to be kept together 134 | Return: 135 | tensor: 3D tensor of same shape as input, with values [0,1] 136 | """ 137 | def rescale(img): 138 | if moments: 139 | minimum = np.array([tpl[0] for tpl in moments], dtype = 'float32') 140 | maximum = np.array([tpl[1] for tpl in moments], dtype = 'float32') 141 | else: 142 | minimum = np.nanmin(img, axis = axes, keepdims = True) 143 | maximum = np.nanmax(img, axis = axes, keepdims = True) 144 | scaled = (img - minimum)/((maximum - minimum) + epsilon) 145 | # scaled = tf.divide(tf.subtract(img, minimum), tf.add(tf.subtract(maximum, minimum)) 146 | return scaled 147 | 148 | # if splits are given, apply tensor normalization to each split 149 | if splits: 150 | arrays = np.split(img, splits, axis = -1) 151 | rescaled = [rescale(array) for array in arrays] 152 | # gather normalized splits into single tensor 153 | img_rescaled = np.concat(rescaled, axis = -1) 154 | else: 155 | img_rescaled = rescale(img) 156 | 157 | return img_rescaled 158 | 159 | def aug_array_color(img: np.ndarray) -> np.ndarray: 160 | """Randomly change the brightness and contrast of an image 161 | Parameters 162 | --- 163 | img: np.ndarray 164 | image to be adjusted 165 | 166 | Return 167 | --- 168 | np.ndarray: input array with brightness and contrast adjusted 169 | """ 170 | dims = len(img.shape) 171 | n_ch = img.shape[-1] 172 | axes = (0,1) if dims == 3 else (1,2) 173 | 174 | contra_adj = 0.05 175 | bright_adj = 0.05 176 | 177 | ch_mean = np.nanmean(img, axis = axes, keepdims = True) 178 | # print('channel means', ch_mean) 179 | contra_mul = uniform(a = 1-contra_adj, b = 1+contra_adj) 180 | 181 | bright_mul = uniform(a = 1 - bright_adj, b = 1+bright_adj) 182 | 183 | recolored = (img - ch_mean) * contra_mul + (ch_mean * bright_mul) 184 | return recolored 185 | 186 | def aug_array_morph(img: np.ndarray, v_rand:bool = None, h_rand:bool = None, r_rand:int = None, return_tuple:bool = False) -> np.ndarray: 187 | """ 188 | Perform morphological image augmentation on image array 189 | Parameters: 190 | img (np.ndarray): 4D or 3D channels last image array 191 | Returns: 192 | np.ndarray: 3D channels last image array 193 | """ 194 | dims = list(range(len(img.shape))) 195 | v_axis = dims[-3] # channels last, vertical axis is always third last 196 | h_axis = dims[-2] # channels last, horizontal axis is always second last 197 | 198 | if v_rand is None: 199 | v_rand = uniform(0,1) < 0.5 200 | if h_rand is None: 201 | h_rand = uniform(0,1) < 0.5 202 | if r_rand is None: 203 | r_rand = randint(0,3) 204 | 205 | # flip array up/down 206 | x = np.flip(img, axis = v_axis) if v_rand else img 207 | x = np.flip(x, axis = h_axis) if h_rand else x 208 | x = np.rot90(x, r_rand, axes = (v_axis, h_axis)) 209 | 210 | if return_tuple: 211 | return x, v_rand, h_rand, r_rand 212 | else: 213 | return x 214 | 215 | def normalize_timeseries(arr, maxval = 10000, minval = 0, axis = -1, e = 0.00001): 216 | # normalize band values across timesteps 217 | normalized = (arr-minval)/(maxval-minval+e) 218 | # mn = np.nanmean(arr, axis = axis, keepdims = True) 219 | # std = np.nanstd(arr, axis = axis, keepdims = True) 220 | # normalized = (arr - mn)/(std+e) 221 | # replace nans with zeros? 222 | finite = np.where(np.isnan(normalized), 0.0, normalized) 223 | return finite 224 | 225 | def rearrange_timeseries(arr: np.ndarray, nbands: int) -> np.ndarray: 226 | """ Randomly rearange 3d images in a timeseries 227 | 228 | Changes the startpoint of a temporal sequence of 3D images stored in a 4D array 229 | while maintaining relative order. 230 | 231 | Parameters 232 | --- 233 | arr: np.ndarray 234 | 5D (B, T, H, W, C) array to be rearranged 235 | nbands: int 236 | size of the last array dimension corresponding to image bands/channels 237 | 238 | Returns 239 | --- 240 | np.ndarray 241 | 5D array of same size/shape as input 242 | """ 243 | 244 | # the number of time steps is in the 1st dimension if our data is (B, T, H, W, C) 245 | timesteps = arr.shape[1] 246 | # randomly pick one of the timesteps as the starting time 247 | starttime = randint(0, timesteps-1) 248 | # print('start', starttime) 249 | # grab all timesteps leading up to the timestep corresponding to our random first 250 | last = arr[:,0:starttime,:,:,:] 251 | print('last shape', last.shape) 252 | first = arr[:,starttime:timesteps,:,:,:] 253 | print('start shape', first.shape) 254 | rearranged = np.concatenate([first, last], axis = 1) 255 | rearranged.shape == arr.shape 256 | return(rearranged) 257 | 258 | def split_timeseries(arr: np.ndarray) -> tuple: 259 | """Divide a timeseries of 3D images into a series of images and labels 260 | 261 | Parameters 262 | --- 263 | arr: np.ndarray 264 | 5D (B, T, H, W, C) array to be split 265 | 266 | Returns 267 | --- 268 | tuple 269 | two 5D timeseries arrays 270 | """ 271 | 272 | feats = arr[:,0:-1,:,:,:] 273 | labels = arr[:,-1,:,:,0:nbands] 274 | 275 | # confirm there are no all-nan images in labels 276 | batch_sums = np.sum(labels, axis = (1,2,3)) 277 | if 0.0 in batch_sums: 278 | print('all nan labels, reshuffling') 279 | feats, labels = rearrange_timeseries(arr, nbands) 280 | 281 | return(feats, labels) 282 | 283 | def sin_cos(t:int, freq:int = 6) -> tuple: 284 | x = t/freq 285 | theta = 2*math.pi * x 286 | return (math.sin(theta), math.cos(theta)) 287 | 288 | def add_harmonic(timeseries: np.ndarray): 289 | """ add harmonic variables to an imagery timeseries. currently assumes first image is start of year 290 | B, T, H, W, C 291 | """ 292 | in_shape = timeseries.shape 293 | timesteps = in_shape[1] 294 | tpls = [sin_cos(t, timesteps) for t in range(timesteps)] 295 | xys = [np.stack([np.full((in_shape[0], in_shape[2], in_shape[3]), x), np.full((in_shape[0], in_shape[2], in_shape[3]), y)], axis = -1) for x,y in tpls] 296 | harmonics = np.stack(xys, axis = 1) 297 | harmonic_timeseries = np.concatenate([timeseries, harmonics], axis = -1) 298 | return harmonic_timeseries 299 | -------------------------------------------------------------------------------- /utils/calibration.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Mar 16 17:44:19 2020 4 | 5 | @author: MEvans 6 | """ 7 | 8 | import math 9 | import ee 10 | from stats import normalize 11 | 12 | def clamp_and_scale(img, bands, p, AOI): 13 | """ 14 | clip the upper range of an image based on percentile 15 | 16 | This function is similar to ee.Image().clip() and ee.Image().unitScale(), 17 | but operates on multiple bands with potentially different upper limits. 18 | 19 | Parameters: 20 | img (ee.Image): the image to modify 21 | bands (ee.List): 22 | p (int): upper percentile above which to truncate values 23 | AOI (ee.Geometry): area within which to calculate percentile 24 | 25 | Returns: 26 | ee.Image: rescaled image with band values [0, 1] 27 | """ 28 | #create a list of the 99th percentile value for all bands 29 | percentiles = img.select(bands).reduceRegion( 30 | reducer = ee.Reducer.percentile([99]).repeat(ee.List(bands).size()), 31 | geometry = AOI, 32 | scale = 100, 33 | maxPixels = 1e13, 34 | tileScale = 12 35 | ).get('p99') 36 | 37 | #turn list of 99th percentiles into constant image 38 | upperImg = ee.Image.constant(percentiles).rename(bands) 39 | 40 | #clip the upper range of extreme values where sensors get washed out 41 | normImage = img.where(img.gte(upperImg), upperImg) 42 | 43 | # rescale the truncated image to [0, 1] 44 | rescaled = normalize(normImage, upperImg, ee.Image.constant(0)) 45 | return ee.Image(rescaled) 46 | 47 | def scene_median(imgCol, bands, sceneID): 48 | """ 49 | Create median images for each unique scene in an image collection 50 | Parameters: 51 | imgCol (ee.ImageCollection): 52 | bands (list): image bands on which to calculate medians 53 | sceneID (str): metadata field storing unique scene ID values 54 | Returns: 55 | ee.ImageCollection: composed of median images per scene 56 | """ 57 | # first get list of all scene IDs 58 | scenes = ee.List(imgCol.aggregate_array(sceneID)).distinct() 59 | # define function to filter by scene id and take median 60 | 61 | medians = scenes.map(lambda str: imgCol.filter(ee.Filter.eq(sceneID, str)).median().set(sceneID, str)) 62 | return ee.ImageCollection(medians).select(bands) 63 | 64 | def get_overlap(imgCol1, imgCol2): 65 | """ 66 | Calculate the area of overlap between two image collections 67 | Parameters: 68 | imgCol1 (ee.ImageCollection): first image collection 69 | imgCol2 (ee.ImageCollection): second image collection 70 | Returns: 71 | ee.Geometry: area of overlap 72 | """ 73 | geom1 = imgCol1.geometry(5).dissolve() 74 | geom2 = imgCol2.geometry(5).dissolve() 75 | intersect = geom1.intersection(geom2, 5) 76 | return intersect 77 | 78 | def hist_to_FC(hist, band): 79 | """ 80 | convert a histogram of band values to a feature collection 81 | 82 | Args: 83 | hist (ee.Dictionary): output of histogram reducer on an image 84 | band (str): band name 85 | 86 | Return: 87 | ee.FeatureCollection: one feature for each histogram bin with 88 | """ 89 | # properties 'bucketMeans' and 'probability' (normalized cummulative probability). 90 | valsList = ee.List(ee.Dictionary(ee.Dictionary(hist).get(band)).get('bucketMeans')) 91 | freqsList = ee.List(ee.Dictionary(ee.Dictionary(hist).get(band)).get('histogram')) 92 | cdfArray = ee.Array(freqsList).accum(0) 93 | total = cdfArray.get([-1]) 94 | normalizedCdf = cdfArray.divide(total) 95 | 96 | # create 2D array with histogram bucket means and normalized cdf values 97 | array = ee.Array.cat([valsList, normalizedCdf], 1) 98 | 99 | # define function to create a feature colleciton with properties determined by list 100 | def fxn(ls): 101 | return ee.Feature(None, {'dn': ee.List(ls).get(0), 'probability': ee.List(ls).get(1)}) 102 | 103 | output = ee.FeatureCollection(array.toList().map(fxn)) 104 | return output 105 | 106 | def make_FC(image, AOI): 107 | """ 108 | create a feature colleciton from the histograms of an images bands 109 | 110 | Parameters: 111 | image (ee.Image): input image 112 | AOI (ee.Feaure): area within which to... 113 | Returns: 114 | ee.List: list of feature collections returned by hist_to_FC 115 | """ 116 | # Histogram equalization start: 117 | bands = image.bandNames() 118 | histo = image.reduceRegion( 119 | reducer = ee.Reducer.histogram( 120 | maxBuckets = math.pow(2, 12) 121 | ), 122 | geometry = AOI, 123 | scale = 100, 124 | maxPixels = 1e13, 125 | tileScale = 12 126 | ) 127 | 128 | def fxn(band): 129 | return hist_to_FC(histo, band) 130 | 131 | # map hist -> FC conversion fxn across bands 132 | output = bands.map(fxn) 133 | 134 | return output 135 | 136 | def equalize(image1, image2, AOI): 137 | """ 138 | use histogram matching to calibrate two images 139 | 140 | Parameters: 141 | image1 (ee.Image): reference image 142 | image2 (ee.Image): image to be calibrated 143 | AOI (ee.Geometry): area of overlap between the two images 144 | 145 | Returns: 146 | ee.Image: image2 with bands calibrated to the histogram(s) of image1 bands 147 | """ 148 | bands = image1.bandNames() 149 | nBands = bands.size().subtract(1) 150 | 151 | # These are lists of feature collections 152 | fc1 = make_FC(image1, AOI) 153 | fc2 = make_FC(image2, AOI) 154 | 155 | def fxn(i): 156 | band = bands.get(i) 157 | classifier1 = ee.Classifier.randomForest(100)\ 158 | .setOutputMode('REGRESSION')\ 159 | .train( 160 | features = ee.FeatureCollection(ee.List(fc1).get(i)), 161 | classProperty = 'dn', 162 | inputProperties = ['probability'] 163 | ) 164 | 165 | classifier2 = ee.Classifier.randomForest(100)\ 166 | .setOutputMode('REGRESSION')\ 167 | .train( 168 | features = ee.FeatureCollection(ee.List(fc2).get(i)), 169 | classProperty = 'probability', 170 | inputProperties = ['dn'] 171 | ) 172 | 173 | # Do the shuffle: DN -> probability -> DN. Return the result. 174 | b = image2.select([band]).rename('dn'); 175 | # DN -> probability -> DN 176 | output = b.classify(classifier2, 'probability')\ 177 | .classify(classifier1, band) 178 | 179 | return output 180 | 181 | imgList = ee.List.sequence(0, nBands).map(fxn) 182 | return ee.ImageCollection(imgList).toBands().rename(bands) 183 | 184 | def equalize_collection(imgCol, bands, sceneID): 185 | """ 186 | histogram equalize images in a collection by unique orbit path 187 | 188 | Parameters: 189 | imgCol (ee.ImageCollection): collection storing images to equalize 190 | bands (list): list of band names to be calibrated 191 | sceneID (str): property by which images will be grouped 192 | 193 | Returns: 194 | ee.ImageCollection: median images per scene equalized to the westernmost path 195 | """ 196 | # first get list of all scene IDs 197 | scenes = ee.List(imgCol.aggregate_array(sceneID)).distinct() 198 | # create an image collection of scene medians 199 | medians = scene_median(imgCol, bands, sceneID) 200 | # define a function to return the centroid longitude of each scene 201 | def get_coord_min(str): 202 | centroids = imgCol.filter(ee.Filter.eq(sceneID, str)).geometry(1).centroid(1) 203 | longs = centroids.coordinates().get(0) 204 | return longs 205 | # create a list of centroid longitudes 206 | coords = scenes.map(get_coord_min) 207 | # sort the scenes by increasing longitude 208 | scenes = scenes.sort(coords) 209 | # define a function that will equalize the list of scenes in succession 210 | def iterate_equalize(scene, prev): 211 | # take the previous median image 212 | prev = ee.List(prev) 213 | img1 = ee.Image(prev.get(-1)) 214 | # take the next median image 215 | img2 = ee.Image(medians.filter(ee.Filter.eq(sceneID, scene)).first()) 216 | # filter image collection to the previous scene 217 | index = scenes.indexOf(scene).subtract(1) 218 | imgCol1 = imgCol.filter(ee.Filter.eq(sceneID, scenes.get(index))) 219 | #imgCol1 = imgCol.filter(ee.Filter.eq(sceneID, prev)) 220 | # filter image collection to the next scene 221 | imgCol2 = imgCol.filter(ee.Filter.eq(sceneID, scene)) 222 | overlap = get_overlap(imgCol1, imgCol2) 223 | # if there is overlap between collections, equalize (returns image) 224 | # otherwise return the current image 225 | equalized = ee.Algorithms.If(overlap.area(5).gt(0), equalize(img1, img2, overlap), img2) 226 | update = ee.List(prev).add(equalized) 227 | return update 228 | # create a list of successively equalized scenes 229 | # initial value for iterate is the first median scene 230 | first = ee.Image(medians.filter(ee.Filter.eq(sceneID, scenes.get(0))).first()) 231 | # take all but the first scene median and iteratively equalize 232 | output = scenes.slice(1).iterate(iterate_equalize, ee.List([first])) 233 | return ee.ImageCollection.fromImages(output) -------------------------------------------------------------------------------- /utils/ee_tools.py: -------------------------------------------------------------------------------- 1 | import ee 2 | 3 | # Initialize Earth Engine 4 | ee.Initialize() 5 | 6 | # Initialize Earth Engine 7 | JRC = ee.ImageCollection("JRC/GSW1_1/YearlyHistory") 8 | 9 | def norm_p(z): 10 | """ 11 | Caclulate (approx) the p-value for a standard normal distribution 12 | 13 | Parameters: 14 | z (ee.Image): image containing z-scores 15 | 16 | Returns: 17 | ee.Image: image containing p-values 18 | """ 19 | return ee.Image.constant(1).subtract(z.multiply(-1.65451).exp().add(1).pow(-1)) 20 | 21 | def chi_p(chi, df): 22 | """ Caclulate the CDF probability of a chi-square statistic 23 | Parameters: 24 | chi (ee.Image): single band image with observations from a chi-squared dist 25 | df (int): degrees of freedom 26 | Returns: 27 | ee.Image: single band image of probabilities 28 | """ 29 | cdf = ee.Image(chi.divide(2)).gammainc(ee.Number(df).divide(2)) 30 | return cdf.rename(['p']) 31 | 32 | def gamma_p(stat, df): 33 | shape = ee.Image(1) 34 | scale = ee.Image(df) 35 | denom = shape.gamma() 36 | num = shape.gammainc(stat.divide(scale)) 37 | return num.divide(denom).rename(['p']) 38 | 39 | def normalize(img, maxImg, minImg): 40 | """ 41 | Scale an image from 0 to 1 42 | 43 | Parameters: 44 | img (ee.Image): image to be rescaled 45 | maxImg (ee.Image): image storing the maximum value of the image 46 | minImg (ee.Image): image storing the minimum value of the image 47 | Returns: 48 | ee.Image: 49 | """ 50 | return img.subtract(minImg).divide(maxImg.subtract(minImg)) 51 | 52 | def standardize(img): 53 | """ 54 | Standardize an image to z-scores using mean and sd 55 | 56 | Parameters: 57 | img (ee.Image): image to be rescaled standardized 58 | 59 | Returns: 60 | ee.Image: image containing z-scores per band 61 | """ 62 | bands = img.bandNames() 63 | mean = img.reduceRegion( 64 | reducer= ee.Reducer.mean(), 65 | scale= 300).toImage() 66 | sd = img.reduceRegion( 67 | reducer= ee.Reducer.stdDev(), 68 | scale= 300 69 | ).toImage(bands) 70 | return img.subtract(mean).divide(sd) 71 | 72 | 73 | def ldaScore(img, inter, xbands, coefficients): 74 | """ 75 | Function converting multiband image into single band image of LDA scores 76 | 77 | Parameters: 78 | img (ee.Image): multiband image 79 | int (float): intercept parameter from LDA analysis 80 | xbands (ee.List): string list of n band names 81 | coefficients (ee.List): numeric list of length n containing LDA coefficients 82 | Returns: 83 | ee.Image: image with one band containing LDA scores based on provided coefficients 84 | """ 85 | bands = img.select(xbands) 86 | coeffs = ee.Dictionary.fromLists(xbands, coefficients).toImage(xbands) 87 | score = bands.multiply(coeffs).addBands(ee.Image(inter)).reduce(ee.Reducer.sum()) 88 | return score 89 | 90 | def sentinel2toa(img): 91 | """ 92 | Convert processed sentinel toa reflectance to raw values, and extract azimuth / zenith metadata 93 | 94 | Parameters: 95 | img (ee.Image): Sentinel-2 image to convert 96 | 97 | Returns: 98 | ee.Image: 99 | """ 100 | toa = img.select(['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B10', 'B11', 'B12']) \ 101 | .divide(10000)\ 102 | .set('solar_azimuth', img.get('MEAN_SOLAR_AZIMUTH_ANGLE')) \ 103 | .set('solar_zenith', img.get('MEAN_SOLAR_ZENITH_ANGLE')) \ 104 | .set('viewing_azimuth', img.get('MEAN_INCIDENCE_AZIMUTH_ANGLE_B8')) \ 105 | .set('viewing_zenith', img.get('MEAN_INCIDENCE_ZENITH_ANGLE_B8')) \ 106 | .set('CLOUDY_PIXEL_PERCENTAGE', img.get('CLOUDY_PIXEL_PERCENTAGE')) \ 107 | #.set('system:time_start', img.get('system:time_start')); 108 | return img.select(['QA60']).addBands(toa); 109 | 110 | def rescale(img, exp, thresholds): 111 | #print('rescale:', img, exp, thresholds) 112 | #return img.subtract(thresholds[0]).divide(thresholds[1]-thresholds[0]) 113 | return img.expression(exp, {'img': img}).subtract(thresholds[0]).divide(thresholds[1] - thresholds[0]) 114 | 115 | def waterScore(img): 116 | """ 117 | Calculate a water likelihood score [0, 1] 118 | 119 | Parameters: 120 | img (ee.Image): Sentinel-2 image 121 | 122 | Returns: 123 | ee.Image: image with single ['waterscore'] band 124 | """ 125 | img = sentinel2toa(img) 126 | # Compute several indicators of water and take the minimum of them. 127 | score = ee.Image(1.0) 128 | 129 | # Set up some params 130 | darkBands = ['B3', 'B4', 'B8', 'B11', 'B12'] 131 | brightBand = 'B2' 132 | shadowSumBands = ['B8', 'B11', 'B12'] 133 | # Water tends to be dark 134 | sum = img.select(shadowSumBands).reduce(ee.Reducer.sum()) 135 | #sum = rescale(sum, [0.35, 0.2]).clamp(0, 1) 136 | sum = rescale(sum, 'img', [0.35, 0.2]).clamp(0, 1) 137 | score = score.min(sum) 138 | 139 | # It also tends to be relatively bright in the blue band 140 | mean = img.select(darkBands).reduce(ee.Reducer.mean()) 141 | std = img.select(darkBands).reduce(ee.Reducer.stdDev()) 142 | z = (img.select([brightBand]).subtract(std)).divide(mean) 143 | z = rescale(z, 'img', [0, 1]).clamp(0, 1) 144 | #z = rescale(z, [0,1]).clamp(0,1) 145 | score = score.min(z) 146 | 147 | # Water is at or above freezing 148 | # score = score.min(rescale(img, 'img.temp', [273, 275])); 149 | 150 | # Water is nigh in ndsi(aka mndwi) 151 | ndsi = img.normalizedDifference(['B3', 'B11']) 152 | ndsi = rescale(ndsi, 'img', [0.3, 0.8]) 153 | #ndsi = rescale(ndsi, [0.3, 0.8]) 154 | 155 | score = score.min(ndsi) 156 | 157 | return score.clamp(0, 1).rename(['waterScore']) 158 | 159 | def basicQA(img): 160 | """ 161 | Mask clouds in a Sentinel-2 image using builg in quality assurance band 162 | Parameters: 163 | img (ee.Image): Sentinel-2 image with QA band 164 | Returns: 165 | ee.Image: original image masked for clouds and cirrus 166 | """ 167 | #print('basicQA:', img) 168 | qa = img.select('QA60').int16() 169 | # print('qa:', type(qa)) 170 | # qa = img.select(['QA60']).int16() 171 | #print('qa:', qa.getInfo()) 172 | # Bits 10 and 11 are clouds and cirrus, respectively. 173 | cloudBitMask = 1024 # math.pow(2, 10) 174 | cirrusBitMask = 2048 #math.pow(2, 11) 175 | # Both flags should be set to zero, indicating clear conditions. 176 | #mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0)) 177 | mask = qa.bitwiseAnd(cloudBitMask).eq(0).And(qa.bitwiseAnd(cirrusBitMask).eq(0)) 178 | dated = img.updateMask(mask) 179 | #dated = img.addBands(img.metadata('system:time_start', 'date')).updateMask(mask) 180 | return dated 181 | 182 | # Function to cloud mask from the Fmask band of Landsat 8 SR data. 183 | def maskL8sr(image): 184 | # Bits 3 and 5 are cloud shadow and cloud, respectively. 185 | cloudShadowBitMask = ee.Number(2).pow(3).int() 186 | cloudsBitMask = ee.Number(2).pow(5).int() 187 | 188 | # Get the pixel QA band. 189 | qa = image.select('pixel_qa') 190 | 191 | # Both flags should be set to zero, indicating clear conditions. 192 | mask = qa.bitwiseAnd(cloudShadowBitMask).eq(0).And(qa.bitwiseAnd(cloudsBitMask).eq(0)) 193 | 194 | # Return the masked image, scaled to [0, 1]. 195 | return image.updateMask(mask) 196 | 197 | 198 | def cloudBands(img): 199 | ndmi = img.normalizedDifference(['B8', 'B11']).rename(['ndmi']) 200 | ndsi = img.normalizedDifference(['B3', 'B11']).rename(['ndsi']) 201 | cirrus = img.select(['B1', 'B10']).reduce(ee.Reducer.sum()).rename(['cirrus']) 202 | vis = img.select(['B4', 'B3', 'B2']).reduce(ee.Reducer.sum()).rename(['vis']) 203 | return img.addBands(ndmi).addBands(ndsi).addBands(cirrus).addBands(vis) 204 | 205 | 206 | def darkC (img, R, G, B): 207 | R = img.select(R) 208 | G = img.select(G) 209 | B = img.select(B) 210 | maxRB = R.max(B) 211 | maxGB = G.max(B) 212 | maxRG = R.max(G) 213 | C1 = G.divide(maxRB).atan().rename(['C1']) 214 | C2 = R.divide(maxGB).atan().rename(['C2']) 215 | C3 = B.divide(maxRG).atan().rename(['C3']) 216 | return img.addBands(C1).addBands(C2).addBands(C3) 217 | 218 | def sentinelCloudScore(img): 219 | """ 220 | Compute a custom cloud likelihood score for Sentinel-2 imagery 221 | Parameters: 222 | img (ee.Image): Sentinel-2 image 223 | Returns: 224 | ee.Image: original image with added ['cloudScore'] band 225 | """ 226 | im = sentinel2toa(img) 227 | # Compute several indicators of cloudyness and take the minimum of them. 228 | score = ee.Image(1) 229 | 230 | # Clouds are reasonably bright in the blue and cirrus bands. 231 | #score = score.min(rescale(im.select(['B2']), [0.1, 0.5])) 232 | score = score.min(rescale(im, 'img.B2', [0.1, 0.5])) 233 | #score = score.min(rescale(im.select(['B1']), [0.1, 0.3])) 234 | score = score.min(rescale(im, 'img.B1', [0.1, 0.3])) 235 | #score = score.min(rescale(im.select(['B1']).add(im.select(['B10'])), [0.15, 0.2])) 236 | score = score.min(rescale(im, 'img.B1 + img.B10', [0.15, 0.2])) 237 | 238 | # Clouds are reasonably bright in all visible bands. 239 | #score = score.min(rescale(im.select('B4').add(im.select('B3')).add(im.select('B2')), [0.2, 0.8])) 240 | score = score.min(rescale(im, 'img.B4 + img.B3 + img.B2', [0.2, 0.8])) 241 | 242 | # Clouds are moist 243 | ndmi = im.normalizedDifference(['B8','B11']) 244 | #score=score.min(rescale(ndmi, [-0.1, 0.1])) 245 | score=score.min(rescale(ndmi, 'img', [-0.1, 0.1])) 246 | 247 | # However, clouds are not snow. 248 | ndsi = im.normalizedDifference(['B3', 'B11']) 249 | #score=score.min(rescale(ndsi, [0.8, 0.6])) 250 | score=score.min(rescale(ndsi, 'img', [0.8, 0.6])) 251 | 252 | score = score.multiply(100).byte() 253 | #print('score:', type(score)) 254 | 255 | return img.addBands(score.rename(['cloudScore'])) 256 | 257 | def mask(img): 258 | date = img.date() 259 | year = date.get('year') 260 | month = date.get('month') 261 | cdi = ee.Algorithms.Sentinel2.CDI(img) 262 | scored = basicQA(img) 263 | clouds = sentinelCloudScore(scored).lte(15).Or(cdi.gte(-0.2)) 264 | water = waterScore(img).select('waterScore').lte(0.25) 265 | jrc = ee.Image(JRC.filterMetadata('month', 'equals', month).filterMetadata('year', 'equals', year).first()) 266 | waterMask = jrc.focal_max(1, 'square', 'pixels').neq(2).And(water) 267 | shadowMask = img.select('B11').gt(900) 268 | return scored.updateMask(clouds.And(shadowMask).And(waterMask)) 269 | 270 | def maskSR(img): 271 | """ 272 | Apply built in masks to Sentinel-2 surface reflectance imagery 273 | Parameters: 274 | img (ee.Image): Sentinel-2 level 2A surface reflectange image 275 | Returns: 276 | ee.Image: masked image 277 | """ 278 | # jrc = ee.Image('JRC/GSW1_1/YearlyHistory/2018') 279 | scored = basicQA(img); 280 | maskBand = img.select('SCL') 281 | cloudMask = maskBand.neq(8).And(maskBand.neq(9)) 282 | # waterMask = maskBand.neq(6).where(jrc.gte(2), 0) 283 | cirrusMask = maskBand.neq(10) 284 | snowMask = maskBand.neq(11) 285 | darkMask = maskBand.neq(2).And(maskBand.neq(3)) 286 | return scored.updateMask(cloudMask.And(cirrusMask).And(snowMask).And(darkMask)) 287 | 288 | def maskTOA(img): 289 | """ 290 | Mask Sentinel-2 1C top of atmosphere imagery for clouds, water, shadow 291 | Parameters: 292 | img (ee.Image): Sentinel-2 level 1C image 293 | Returns: 294 | ee.Image: masked image 295 | """ 296 | # date = img.date() 297 | # year = date.get('year') 298 | #month = date.get('month') 299 | #cdi = ee.Algorithms.Sentinel2.CDI(img) 300 | scored = basicQA(img) 301 | cloudMask = sentinelCloudScore(scored).select('cloudScore').lte(15)#.Or(cdi.gte(-0.2)) 302 | # water = waterScore(img).select('waterScore').lte(0.25) 303 | # jrc = ee.Image(JRC.filterMetadata('year', 'equals', year).first()) 304 | # watermask = water.where(jrc.gte(2), 0) 305 | # shadowMask = img.select('B11').gt(900) 306 | return scored.updateMask(cloudMask)#.And(shadowMask))#.And(watermask)) 307 | 308 | -------------------------------------------------------------------------------- /utils/pc_tools.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | from pathlib import Path 4 | from importlib import reload 5 | import numpy as np 6 | import os 7 | import sys 8 | from os.path import join 9 | from glob import glob 10 | import io 11 | from datetime import datetime 12 | import xml 13 | 14 | from osgeo import gdal 15 | import xarray as xr 16 | import rasterio as rio 17 | from rasterio.vrt import WarpedVRT 18 | from rioxarray.merge import merge_arrays 19 | import rioxarray 20 | from rioxarray.merge import merge_arrays 21 | from pyproj import CRS 22 | 23 | import planetary_computer 24 | from dask_gateway import GatewayCluster 25 | from dask.distributed import wait, Client 26 | import pystac_client 27 | import pystac 28 | import stackstac 29 | import stac_vrt 30 | 31 | FILE = Path(__file__).resolve() # full path to the current file, including extension 32 | print('filepath', FILE) 33 | ROOT = FILE.parents[0] # list of upstream directories containing file 34 | print('root', ROOT) 35 | REL = Path(os.path.relpath(ROOT, Path.cwd())) 36 | print('rel', REL) 37 | if str(ROOT) not in sys.path: 38 | sys.path.append(str(ROOT)) 39 | if str(REL) not in sys.path: 40 | sys.path.append(str(REL)) # add REL to PATH 41 | 42 | from azure.storage.blob import ContainerClient, BlobClient 43 | 44 | def recursive_api_try(search): 45 | try: 46 | signed = planetary_computer.sign(search.get_all_items()) 47 | # collection = search.item_collection() 48 | # print(len(collection), 'assets') 49 | # signed = [planetary_computer.sign(item).to_dict() for item in collection] 50 | except pystac_client.exceptions.APIError as error: 51 | print('APIError, trying again') 52 | signed = recursive_api_try(search) 53 | return signed 54 | 55 | def resign_vrt(filename, element_tag): 56 | """Update the authentication token on previously created VRT items 57 | Params 58 | --- 59 | filename: str 60 | element_tag: str 61 | xml tag containing asset url to be signed 62 | """ 63 | tree = xml.etree.ElementTree.parse(filename) 64 | root = tree._root 65 | p = Path(filename) 66 | sub_vrt_list = [] 67 | for item in root.iter(element_tag): 68 | text = item.text 69 | # if item.attrib['relativeToVRT'] == '0': 70 | if text.startswith('http'): 71 | newtext = planetary_computer.sign(text.split('?')[0]) 72 | item.text = newtext 73 | elif '.vrt' in text: 74 | sub_vrt_list.append(text) 75 | newtext = text[:-4]+'_resigned.vrt' 76 | item.text = newtext 77 | for file in sub_vrt_list: 78 | etag = 'SourceDataset' if 'warped' in file else element_tag 79 | resign_vrt(file, etag) 80 | tree.write(str(p.parent)+'/'+str(p.stem)+'_resigned.vrt') 81 | 82 | def export_blob(data: np.ndarray, container_client: ContainerClient, blobUrl: str) -> None: 83 | with io.BytesIO() as buffer: 84 | np.save(buffer, data) 85 | buffer.seek(0) 86 | blob_client = container_client.get_blob_client(blobUrl) 87 | blob_client.upload_blob(buffer, overwrite=True) 88 | 89 | def normalize_dataArray(da: xr.DataArray, dim: str) -> xr.DataArray: 90 | """Normalize (mean = 0, sd = 1) values in a xarray DataArray along given axis 91 | 92 | Parameters 93 | --- 94 | da: xarray.DataArray 95 | array to be normalized 96 | dim: str 97 | name of dimension along which to calculate mean and standard deviation (e.g. 'band') 98 | 99 | Return 100 | --- 101 | xarray.DataArray: input array with values scaled to mean = 0 and sd = 1 102 | """ 103 | mean = da.mean(dim = dim, skipna = True) 104 | sd = da.std(dim = dim, skipna = True) 105 | normalized = (da - mean)/(sd+0.000001) 106 | return normalized 107 | 108 | def trim_dataArray(da: xr.DataArray, size: int) -> xr.DataArray: 109 | """Trim the remainder from x and y dimensions of a DataArray 110 | 111 | Parameters 112 | --- 113 | da: xarray:DataArray 114 | input array to be trimmed 115 | size: int 116 | size of chunks in x and y dimension. remaining array x&y size will be evenly divisible by this value 117 | 118 | Return: 119 | xarray:DataArray: resized input array with x & y dimensions evenly divisible by 'size' 120 | """ 121 | slices = {} 122 | for coord in ["y", "x"]: 123 | remainder = len(da.coords[coord]) % size 124 | slice_ = slice(-remainder) if remainder else slice(None) 125 | slices[coord] = slice_ 126 | 127 | trimmed = da.isel(**slices) 128 | return trimmed 129 | 130 | def get_naip_stac(aoi, dates): 131 | 132 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1") 133 | collections = ['naip'] 134 | 135 | search = catalog.search( 136 | intersects = aoi, 137 | datetime = dates, 138 | collections = collections, 139 | limit = 500 140 | ) 141 | 142 | items = planetary_computer.sign(search.item_collection_as_dict()) 143 | # items is a pystac ItemCollection 144 | # items2 = items.to_dict() 145 | features = items['features'] 146 | dates = [x['properties']['datetime'] for x in features] 147 | years = [date[0:4] for date in dates] 148 | years.sort() 149 | filtered = [x for x in features if x['properties']['datetime'][0:4] == years[-1]] 150 | urls = [item['assets']['image']['href'] for item in filtered] 151 | # organize all naip images overlapping box into a vrt stac 152 | crs_list = np.array([item['properties']['proj:epsg'] for item in filtered]) 153 | crss = np.unique(crs_list) 154 | crs_counts = [len(crs_list[crs_list == crs]) for crs in crss] 155 | print('naip crss', crss) 156 | if len(crss) > 1: 157 | # rioxrs = [] 158 | minority_idx = np.argmin(crs_counts) 159 | majority_idx = np.argmax(crs_counts) 160 | majority_urls = [url for i, url in enumerate(urls) if crs_list[i] == crss[majority_idx]] 161 | minority_urls = [url for i, url in enumerate(urls) if crs_list[i] == crss[minority_idx]] 162 | print('minority urls', minority_urls) 163 | minority_vrt = gdal.BuildVRT("./minority.vrt", minority_urls) 164 | majority_vrt = gdal.BuildVRT("./majority.vrt", majority_urls) 165 | warped_vrt = gdal.Warp("./warped.vrt", minority_vrt, format = 'vrt', dstSRS = f'EPSG:{crss[majority_idx]}') 166 | naipVRT = gdal.BuildVRT('./naiptmp.vrt', [warped_vrt, majority_vrt]) 167 | # naipVRT = None 168 | # for i, url in enumerate(urls): 169 | # rioxr = rioxarray.open_rasterio(url) 170 | # if crs_list[i] == crss[minority_idx]: 171 | # reprojected = rioxr.rio.reproject(f'EPSG:{crss[majority_idx]}') 172 | # rioxrs.append(reprojected) 173 | # else: 174 | # rioxrs.append(rioxr) 175 | # merged = merge_arrays(rioxrs) 176 | # return merged 177 | else: 178 | # rioxrs = [rioxarray.open_rasterio(url, lock = False) for url in urls] 179 | # merged = merge_arrays(rioxrs) 180 | # vrt = stac_vrt.build_vrt(filtered, block_width=512, block_height=512, data_type="Byte") 181 | # naipImg = rioxarray.open_rasterio(vrt, lock = False) 182 | naipVRT = gdal.BuildVRT('./naiptmp.vrt', urls) 183 | naipVRT = None 184 | naipImg = rioxarray.open_rasterio('./naiptmp.vrt', lock = False) 185 | return naipImg 186 | 187 | def get_dem_stac(aoi, dates, crs = None, resolution = None): 188 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1") 189 | search = catalog.search( 190 | intersects = aoi, 191 | collections = ["3dep-seamless"] 192 | ) 193 | 194 | # items is a pystac ItemCollection 195 | items = list(planetary_computer.sign(search.item_collection())) 196 | dems = [item for item in items if item.properties['gsd'] == 10] # we only want 10 m data 197 | return dems 198 | # # hagUrl = hag[0]['assets']['data']['href'] 199 | # demProperties = dems[0].properties 200 | # if crs: 201 | # demCrs = crs 202 | # else: 203 | # demCrs = demProperties['proj:epsg'] 204 | # # demTransform = demProperties['proj:transform'] 205 | # # if resolution: 206 | # # demRes = resolution 207 | # # else: 208 | # # demRes = demProperties['gsd'] 209 | 210 | # demStac = stackstac.stack( 211 | # dems, 212 | # epsg = demCrs, 213 | # resolution = 10) 214 | # # sortby_date = False, 215 | # # assets = ['data']) 216 | # print('3dep transform', demStac.rio.transform()) 217 | # demMedian = demStac.median(dim = 'time') 218 | # projected = demMedian.rio.set_crs(demCrs) 219 | # # reprojected = projected.rio.reproject(hagCrs) 220 | 221 | # return projected 222 | 223 | def get_hag_stac(aoi, dates, crs = None, resolution = None): 224 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1") 225 | search = catalog.search( 226 | intersects = aoi, 227 | datetime = dates, 228 | collections = ['3dep-lidar-hag'] 229 | ) 230 | 231 | items = recursive_api_try(search) 232 | # items is a pystac ItemCollection 233 | items2 = items.to_dict() 234 | hag = items2['features'] 235 | 236 | # hagUrl = hag[0]['assets']['data']['href'] 237 | hagProperties = hag[0]['properties'] 238 | hagCrs = hagProperties['proj:projjson']['components'][0]['id']['code'] 239 | hagTransform = hagProperties['proj:transform'] 240 | if resolution: 241 | hagRes = resolution 242 | else: 243 | hagRes = hagTransform[0] 244 | 245 | # hagSide = 360//hagRes 246 | # hagZoom = round(600/hagSide, 4) 247 | 248 | # hagCrs = [asset['properties']['proj:projjson']['components'][0]['id']['code'] for asset in hag] 249 | # print('hag CRS', hagCrs[0]) 250 | hagStac = stackstac.stack( 251 | hag, 252 | epsg = hagCrs, 253 | resolution = hagRes, 254 | sortby_date = False, 255 | assets = ['data']) 256 | 257 | hagMedian = hagStac.median(dim = 'time') 258 | projected = hagMedian.rio.set_crs(hagCrs) 259 | # reprojected = projected.rio.reproject(hagCrs) 260 | 261 | return projected 262 | 263 | def naip_mosaic(naips: list, crs: int): 264 | """ mosaic a list of naip stac items into a single xarray DataArray 265 | Parameters 266 | -------- 267 | naips: list: 268 | list of naip image items in stac format 269 | crs: int 270 | epsg code specifying the common crs to project naip images 271 | Return 272 | --- 273 | xr.DataArray: single array of mosaicd naip images 274 | """ 275 | data = [item for item in naips if item['properties']['proj:epsg'] == crs] 276 | crs = CRS.from_user_input(26918) 277 | naipStac = stac_vrt.build_vrt( 278 | data, block_width=512, block_height=512, data_type="Byte", crs = crs) 279 | naipImage = rioxarray.open_rasterio(naipStac, chunks = (4, 8192, 8192), lock = False) 280 | # reprojected = naipImage.rio.reproject('EPSG:4326') 281 | return(naipImage) 282 | 283 | def harmonize_to_old(data): 284 | """ 285 | Harmonize new Sentinel-2 data to the old baseline. 286 | 287 | Parameters 288 | ---------- 289 | data: xarray.DataArray 290 | A DataArray with four dimensions: time, band, y, x 291 | 292 | Returns 293 | ------- 294 | harmonized: xarray.DataArray 295 | A DataArray with all values harmonized to the old 296 | processing baseline. 297 | """ 298 | cutoff = datetime(2022, 1, 25) 299 | offset = 1000 300 | bands = [ 301 | "B01", 302 | "B02", 303 | "B03", 304 | "B04", 305 | "B05", 306 | "B06", 307 | "B07", 308 | "B08", 309 | "B8A", 310 | "B09", 311 | "B10", 312 | "B11", 313 | "B12", 314 | ] 315 | 316 | old = data.sel(time=slice(cutoff)) 317 | 318 | to_process = list(set(bands) & set(data.band.data.tolist())) 319 | new = data.sel(time=slice(cutoff, None)).drop_sel(band=to_process) 320 | 321 | new_harmonized = data.sel(time=slice(cutoff, None), band=to_process).clip(offset) 322 | new_harmonized -= offset 323 | 324 | new = xr.concat([new, new_harmonized], "band").sel(band=data.band.data.tolist()) 325 | return xr.concat([old, new], dim="time") 326 | 327 | def get_s2_stac(dates, aoi, cloud_thresh = 10, bands = ["B02", "B03", "B04", "B08"], epsg = None): 328 | """from a pystac client return a stac of s2 imagery 329 | 330 | Parameters 331 | ---- 332 | dates: str 333 | start/end dates 334 | aoi: shapely.geometry.Polygon 335 | polygon defining area of search 336 | cloud_thresh: int 337 | maximum cloudy pixel percentage of s2 images to return 338 | bands: list 339 | asset (band) names to return and stack 340 | epsg: int 341 | epsg coordinate system to reproject s2 data to 342 | 343 | Return 344 | --- 345 | stackstac.stac() 346 | """ 347 | # connect to the planetary computer catalog 348 | catalog = pystac_client.Client.open( 349 | "https://planetarycomputer.microsoft.com/api/stac/v1", 350 | modifier = planetary_computer.sign_inplace) 351 | 352 | search = catalog.search( 353 | collections = ['sentinel-2-l2a'], 354 | datetime = dates, 355 | intersects = aoi, 356 | query={"eo:cloud_cover": {"lt": cloud_thresh}} 357 | ) 358 | 359 | s2items = [item.to_dict() for item in list(search.get_items())] 360 | if len(s2items) > 0: 361 | s2 = s2items[0] 362 | if epsg: 363 | s2epsg = epsg 364 | else: 365 | s2epsg = s2['properties']['proj:epsg'] 366 | 367 | s2Stac = ( 368 | stackstac.stack( 369 | s2items, 370 | epsg = s2epsg, 371 | assets=bands, # red, green, blue, nir 372 | # chunksize=4096, 373 | resolution=10, 374 | ) 375 | .where(lambda x: x > 0, other=np.nan) # sentinel-2 uses 0 as nodata 376 | ) 377 | 378 | harmonized = harmonize_to_old(s2Stac) 379 | 380 | s2crs = s2Stac.attrs['crs'] 381 | s2projected = harmonized.rio.set_crs(s2crs) 382 | else: 383 | # clipped = s2projected.rio.clip(geometries = [aoi], crs = epsg) 384 | harmonized = None 385 | return harmonized 386 | 387 | def get_s1_stac(dates, aoi, epsg = None, bands = ["vv", "vh"]): 388 | """from a pystac client return a stac of s2 imagery 389 | 390 | Parameters 391 | ---- 392 | client: pystac_client.Client() 393 | pystac catalog from which to retrieve assets 394 | dates: str 395 | start/end dates 396 | bbox: tpl 397 | [xmin, ymin, xmax, ymax] 398 | 399 | Return 400 | --- 401 | stackstac.stac() 402 | """ 403 | # connect to the planetary computer catalog 404 | catalog = pystac_client.Client.open( 405 | "https://planetarycomputer.microsoft.com/api/stac/v1", 406 | modifier = planetary_computer.sign_inplace) 407 | 408 | search = catalog.search( 409 | datetime = dates, 410 | intersects = aoi, 411 | collections=["sentinel-1-rtc"], 412 | query={"sar:polarizations": {"eq": ['VV', 'VH']}, 413 | 'sar:instrument_mode': {"eq": 'IW'}, 414 | 'sat:orbit_state': {"eq": 'ascending'} 415 | } 416 | ) 417 | 418 | s1items = search.item_collection() 419 | if not epsg: 420 | s1 = s1items[0] 421 | epsg = s1.properties['proj:epsg'] 422 | s1Stac = stackstac.stack( 423 | s1items, 424 | epsg = epsg, 425 | assets=bands, 426 | resolution=10, 427 | gdal_env=stackstac.DEFAULT_GDAL_ENV.updated( 428 | always=dict(GDAL_HTTP_MAX_RETRY=5, GDAL_HTTP_RETRY_DELAY=1) 429 | ) 430 | ) 431 | 432 | # # get spatial reference info 433 | # s1crs = s1Stac.attrs['crs'] 434 | # s1transform = s1Stac.attrs['transform'] 435 | # s1res = s1transform[0] 436 | 437 | # s1projected = s1Stac.rio.set_crs(s1crs) 438 | # clipped = s1projected.rio.clip(geometries = [aoi], crs = 4326) 439 | return s1Stac 440 | 441 | def get_s1_stac(dates, aoi, epsg = None, bands = ["vv", "vh"]): 442 | """from a pystac client return a stac of s2 imagery 443 | 444 | Parameters 445 | ---- 446 | client: pystac_client.Client() 447 | pystac catalog from which to retrieve assets 448 | dates: str 449 | start/end dates 450 | bbox: tpl 451 | [xmin, ymin, xmax, ymax] 452 | 453 | Return 454 | --- 455 | stackstac.stac() 456 | """ 457 | # connect to the planetary computer catalog 458 | catalog = pystac_client.Client.open( 459 | "https://planetarycomputer.microsoft.com/api/stac/v1", 460 | modifier = planetary_computer.sign_inplace) 461 | 462 | search = catalog.search( 463 | datetime = dates, 464 | intersects = aoi, 465 | collections=["sentinel-1-rtc"], 466 | query={"sar:polarizations": {"eq": ['VV', 'VH']}, 467 | 'sar:instrument_mode': {"eq": 'IW'}, 468 | 'sat:orbit_state': {"eq": 'ascending'} 469 | } 470 | ) 471 | 472 | s1items = search.item_collection() 473 | if not epsg: 474 | s1 = s1items[0] 475 | epsg = s1.properties['proj:epsg'] 476 | s1Stac = stackstac.stack( 477 | s1items, 478 | epsg = epsg, 479 | assets=bands, 480 | resolution=10, 481 | gdal_env=stackstac.DEFAULT_GDAL_ENV.updated( 482 | always=dict(GDAL_HTTP_MAX_RETRY=5, GDAL_HTTP_RETRY_DELAY=1) 483 | ) 484 | ) 485 | 486 | # # get spatial reference info 487 | # s1crs = s1Stac.attrs['crs'] 488 | # s1transform = s1Stac.attrs['transform'] 489 | # s1res = s1transform[0] 490 | 491 | # s1projected = s1Stac.rio.set_crs(s1crs) 492 | # clipped = s1projected.rio.clip(geometries = [aoi], crs = 4326) 493 | return s1Stac 494 | 495 | def get_ssurgo_stac(aoi, epsg)-> np.ndarray: 496 | """Sample ssurgo data in raster format 497 | 498 | Parameters 499 | --- 500 | aoi: shapely.geometry.Polygon 501 | polygon coordinates defining search aoi 502 | epsg: int 503 | cooridnate reference system epsg code to reproject ssurgo data to 504 | 505 | Returns 506 | --- 507 | np.ndarray: 3-dimensional raster (window_size, window_size, 4) containing ssurgo data 508 | """ 509 | # connect to the PC STAC catalog 510 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1") 511 | 512 | # get the gnatsco raster, which has 'mukey' values per pixel 513 | search = catalog.search( 514 | collections=["gnatsgo-rasters"], 515 | intersects=aoi 516 | ) 517 | surgoitems = planetary_computer.sign(search.get_all_items()) 518 | return surgoitems 519 | # surgoitems = [planetary_computer.sign(item).to_dict() for item in list(search.items())] 520 | # surgo = surgoitems[0] 521 | 522 | # surgowkt = surgo['properties']['proj:wkt2'] 523 | # if epsg: 524 | # surgoEPSG = epsg #surgoCrs.to_epsg() 525 | # else: 526 | # surgoEPSG = CRS.from_wkt(surgowkt).to_epsg() 527 | 528 | # print(surgoEPSG) 529 | # # surgoepsg = surgo['properties']['proj:epsg'] 530 | # surgoStac = stackstac.stack( 531 | # surgoitems, 532 | # # epsg = surgoEPSG, 533 | # epsg = surgoEPSG, 534 | # assets=['mukey']) 535 | 536 | # surgoTransform = surgoStac.attrs['transform'] 537 | # # surgores = 10 #surgoTransform[0] TODO: COnfirm ssurgo is always 10 m resolution 538 | # # print('resolution', surgores) 539 | 540 | # temporal = surgoStac.median(dim = 'time') 541 | # return temporal, surgoTransform, surgoEPSG 542 | 543 | def join_ssurgo(ssurgo_table, ssurgo_raster:np.ndarray): 544 | C,H,W = ssurgo_raster.shape 545 | # get the unique values and their indices from the raster so we can join to table data 546 | unique_mukeys, inverse = np.unique(ssurgo_raster, return_inverse=True) 547 | # print('\t\tJoining SSURGO Arrays. Unique mukeys', unique_mukeys) 548 | rearranged = ssurgo_table[['mukey', 'hydclprs', 'drclassdcd', 'flodfreqdcd', 'wtdepannmin']].groupby('mukey').first().reindex(unique_mukeys, fill_value=np.nan).astype(np.float64) 549 | rearranged.loc[rearranged['wtdepannmin'] > 200.0, 'wtdepannmin'] = 200.0 # anything above 200 should be clipped to 200 550 | rearranged['wtdepannmin'] = rearranged['wtdepannmin'].fillna(200.0) # missing values are above 200 cm deep 551 | rearranged['wtdepannmin'] = rearranged['wtdepannmin']/200.0 # 200 cm is the max measured value 552 | 553 | rearranged['flodfreqdcd'] = rearranged['flodfreqdcd'].fillna(0.0) # missing values mean no flooding 554 | 555 | rearranged['drclassdcd'] = rearranged['drclassdcd'].fillna(0.0) # missing values mean no soil e.g. excessively drained 556 | 557 | rearranged['hydclprs'] = rearranged['hydclprs'].fillna(0.0) # missing values mean no soil e.g. not hydric 558 | rearranged['hydclprs'] = rearranged['hydclprs']/100.0 # 100 percent hydric is max 559 | # join tabluar data to ssurgo raster based on mukey 560 | ssurgo_hwc = rearranged.to_numpy()[inverse].reshape((H, W, 4)) # HWC 561 | return ssurgo_hwc 562 | 563 | def get_pc_imagery(aoi, dates, crs): 564 | """Get S2 imagery from Planetary Computer. REQUIRES a valid API token be added to the os environment 565 | Args: 566 | aoi: POLYGON geometry json 567 | dates (tpl): four YYYY-MM-DD date strings defining before and after 568 | crs (int): 4-digit epsg code representing coordinate reference system 569 | """ 570 | # Creates the Dask Scheduler. Might take a minute. 571 | cluster = GatewayCluster( 572 | address = "https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway", 573 | proxy_address = "gateway://pccompute-dask.westeurope.cloudapp.azure.com:80", 574 | auth = 'jupyterhub', 575 | worker_cores = 4 576 | ) 577 | 578 | client = cluster.get_client() 579 | 580 | # allow our dask cluster to adaptively scale from 2 to 24 nodes 581 | cluster.adapt(minimum=2, maximum=24) 582 | 583 | # extract before and after dates from input in format required by PC 584 | before_dates = f'{dates[0]}/{dates[1]}' 585 | after_dates = f'{dates[2]}/{dates[3]}' 586 | 587 | # connect to the planetary computer catalog 588 | catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1") 589 | # sentinel = catalog.get_child('sentinel-2-l2a') 590 | 591 | before_data = get_s2_stack(catalog, before_dates, aoi) 592 | after_data = get_s2_stack(catalog, after_dates, aoi) 593 | 594 | # convert provided coordinates into appropriate format for clipping xarray imagery 595 | xs = [x for x,y in aoi['coordinates'][0]] 596 | ys = [y for x,y in aoi['coordinates'][0]] 597 | bounds = [min(xs), min(ys), max(xs), max(ys)] 598 | 599 | # reduce the before and after image collections to a single image using median value per pixel 600 | before = before_data.median(dim="time") 601 | after = after_data.median(dim="time") 602 | 603 | # compute the result and load to local machine 604 | bef_clip = bef.rio.clip([aoi], crs).compute() 605 | aft_clip = aft.rio.clip([aoi], crs).compute() 606 | 607 | # This non-distributed method seems to be working but timing out 608 | # TODO: try changing chunk dimensions, try increasing timeout time of Webservice 609 | # bd, ad = dask.compute(bef_clip, aft_clip) 610 | 611 | # result_dict = wait([bef_clip, aft_clip], return_when = 'ALL_COMPLETED') 612 | 613 | # close our cluster 614 | client.close() 615 | cluster.shutdown() 616 | # return the before and after images as numpy arrays 617 | return bef_clip.data, aft_clip.data 618 | 619 | def run_local(aoi, dates, m, buff = 128, kernel = 256): 620 | """Retrieve Sentinel-2 imagery from Microsoft Planetary Computer and run change detection 621 | Arguments: 622 | aoi (dict): GeoJson like dictionary defining area of interest 623 | crs (int): 4-digit epsg code representing coordinate reference system of the aoi 624 | dates (tpl): Four YYYY-MM-DD strings defining the before and after periods 625 | m (keras.Model): model to be used to make predictions 626 | buff (int): buffer to strip from prediction patches 627 | kernel (int): size of side of prediction patches 628 | Return: 629 | numpy.ndarray: 3D array with per-pixel change probabilities 630 | """ 631 | # extract before and after dates from input in format required by PC 632 | before_dates = f'{dates[0]}/{dates[1]}' 633 | after_dates = f'{dates[2]}/{dates[3]}' 634 | 635 | # get our before and after stacs 636 | print('retrieving s2 data') 637 | bef_stac, bef_transform = get_s2_stac(before_dates, aoi) 638 | aft_stac, aft_transform = get_s2_stac(after_dates, aoi) # these are projected rioxarrays 639 | 640 | # create median composites 641 | bef_median = bef_stac.median(dim="time") 642 | aft_median = aft_stac.median(dim="time") 643 | 644 | #normalize 645 | bef_norm = normalize_dataArray(bef_median, 'band') 646 | aft_norm = normalize_dataArray(aft_median, 'band') 647 | 648 | # concatenate 649 | ds = xr.concat([bef_norm, aft_norm], dim="band").assign_coords({'band':['B2', 'B3', 'B4', 'B8','B2_1', 'B3_1', 'B4_1', 'B8_1']}) 650 | 651 | C,H,W = ds.shape 652 | print('data shape:', ds.shape) # from planetary computer this is C, H, W 653 | rearranged = ds.transpose('y','x','band') 654 | print('rearranged shape', rearranged.shape) 655 | indices = prediction_tools.generate_chip_indices(rearranged, buff, kernel) 656 | print(len(indices), 'indices generated') 657 | template = np.zeros((H, W)) 658 | print('template shape:', template.shape) 659 | # print('generating chips') 660 | # chips, chip_indices = extract_chips(ds) 661 | # print(len(chip_indices), 'chips generated') 662 | dat = rearranged.values 663 | print('running predictions') 664 | output = predict_chips(dat, indices, template, m, kernel = kernel, buff = buff) 665 | 666 | # print(f'returning array of {output.shape}') 667 | return output, bef_median, aft_median, aft_transform 668 | 669 | def run_dask(model_blob_url, weights_blob_url, custom_objects, dates, aoi): 670 | # # create a dask cluster 671 | # print('spinning up Dask Cluster') 672 | # cluster = GatewayCluster( 673 | # address = "https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway", 674 | # proxy_address = "gateway://pccompute-dask.westeurope.cloudapp.azure.com:80", 675 | # auth = 'jupyterhub', 676 | # worker_cores = 4 677 | # ) 678 | 679 | # client = cluster.get_client() 680 | # client.upload_file(f'{str(ROOT)}/model_tools.py', load = True) 681 | 682 | # # allow our dask cluster to adaptively scale from 2 to 24 nodes 683 | # cluster.adapt(minimum=4, maximum=24) 684 | # print('cluster created', cluster.dashboard_link) 685 | 686 | # extract before and after dates from input in format required by PC 687 | before_dates = f'{dates[0]}/{dates[1]}' 688 | after_dates = f'{dates[2]}/{dates[3]}' 689 | 690 | # get our before and after stacs 691 | print('retrieving s2 data') 692 | bef_stac = get_s2_stac(before_dates, aoi) 693 | aft_stac = get_s2_stac(after_dates, aoi) # these are projected rioxarrays 694 | 695 | # create median composites 696 | bef_median = bef_stac.median(dim="time") 697 | aft_median = aft_stac.median(dim="time") 698 | 699 | #normalize 700 | bef_norm = normalize_dataArray(bef_median, 'band') 701 | aft_norm = normalize_dataArray(aft_median, 'band') 702 | 703 | # concatenate 704 | ds = xr.concat([bef_norm, aft_norm], dim="band").assign_coords({'band':['B2', 'B3', 'B4', 'B8','B2_1', 'B3_1', 'B4_1', 'B8_1']}) 705 | 706 | trimmed = trim_dataArray(ds, 256) 707 | chunked = trimmed.chunk({'x':256, 'y':256}) 708 | 709 | print('running chunked predictions') 710 | meta = np.array([[]], dtype="float32") 711 | predictions_array = chunked.data.map_overlap( 712 | lambda x: predict_chunk(x, model_blob_url, weights_blob_url, custom_objects), 713 | depth = (0, 64, 64), 714 | boundary = 0, 715 | meta=meta, 716 | drop_axis=0 717 | ) 718 | 719 | # predictions = predictions_array 720 | 721 | # # to restore spatial reference, cast back to Xarray 722 | # out = xr.DataArray( 723 | # predictions, 724 | # coords=trimmed.drop_vars("band").coords, 725 | # dims=("y", "x"), 726 | # ) 727 | 728 | return(predictions_array) 729 | 730 | 731 | # def test_PC_connection(): 732 | # """Test our ability to retrieve satellite imagery from Planetary Computer 733 | 734 | # Without any processing, return the first Sentinel-2 image from a date range at 735 | # a known location 736 | # """ 737 | # # Creates the Dask Scheduler. Might take a minute. 738 | # cluster = GatewayCluster( 739 | # address = "https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway", 740 | # proxy_address = "gateway://pccompute-dask.westeurope.cloudapp.azure.com:80", 741 | # auth = 'jupyterhub', 742 | # worker_cores = 4 743 | # ) 744 | 745 | # client = cluster.get_client() 746 | 747 | # # allow our dask cluster to adaptively scale from 2 to 24 nodes 748 | # cluster.adapt(minimum=2, maximum=24) 749 | 750 | # # define fixed start and end date for summer 2021 751 | # before_dates = '2021-05-01/2021-08-01' 752 | 753 | # # connect to the planetary computer catalog 754 | # catalog = pcClient.open("https://planetarycomputer.microsoft.com/api/stac/v1") 755 | # sentinel = catalog.get_child('sentinel-2-l2a') 756 | 757 | # search = catalog.search( 758 | # collections = ['sentinel-2-l2a'], 759 | # datetime=before_dates, 760 | # intersects=aoi 761 | # ) 762 | 763 | # search_list = list(search_before.get_items()) 764 | 765 | # least_cloudy = [item for item in search_list if item.properties['eo:cloud_cover'] <= 10] 766 | 767 | # items = [pc.sign_item(i).to_dict() for i in least_cloudy] 768 | 769 | # # sanity check to make sure we have retrieved and authenticated items fro planetary computer 770 | # ilen = len(items) 771 | # print(f'{ilen} images in collection') 772 | 773 | # # convert provided coordinates into appropriate format for clipping xarray imagery 774 | # bounds = [-76.503778, 38.988321, -76.530776, 38.988322] 775 | 776 | # # create an 777 | # data = ( 778 | # stackstac.stack( 779 | # items[0], 780 | # epsg = 32617, 781 | # bounds_latlon = bounds, 782 | # sortby_date = 'desc', 783 | # # resolution=10, 784 | # assets=['B02', 'B03', 'B04', 'B08'], # blue, green, red, nir 785 | # # chunks is for parallel computing on Dask cluster, only refers to spatial dimension 786 | # chunksize= 'auto' # don't make smaller than native S2 tiles (100x100km) 787 | # ) 788 | # .where(lambda x: x > 0, other=np.nan) # sentinel-2 uses 0 as nodata 789 | # .assign_coords(band = lambda x: x.common_name.rename("band")) # use common names 790 | # ) 791 | 792 | # # reduce the before and after image collections to a single image using first valid pixel 793 | # before = data.mosaic(dim="time") 794 | 795 | # # assign the native sentinel-2 crs the resulting xarrays 796 | # bef = before.rio.set_crs(32617) 797 | 798 | # # compute the result and load to local machine 799 | # bef_local = bef.compute() 800 | 801 | # # This non-distributed method seems to be working but timing out 802 | # # TODO: try changing chunk dimensions, try increasing timeout time of Webservice 803 | # # bd, ad = dask.compute(bef_clip, aft_clip) 804 | 805 | # # result_dict = wait([bef_clip, aft_clip], return_when = 'ALL_COMPLETED') 806 | 807 | # # close our cluster 808 | # client.close() 809 | # cluster.shutdown() 810 | # # return the image as numpy arrays 811 | # return bef_local.data 812 | 813 | -------------------------------------------------------------------------------- /utils/prediction_tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Fri Dec 4 19:24:42 2020 4 | 5 | @author: MEvans 6 | """ 7 | import os 8 | from os.path import join 9 | import sys 10 | from sys import path 11 | from pathlib import Path 12 | 13 | # import ee 14 | import json 15 | import numpy as np 16 | import tensorflow as tf 17 | from matplotlib import pyplot as plt 18 | #import gsutil 19 | import rasterio as rio 20 | from rasterio.crs import CRS 21 | from rasterio.warp import transform_bounds 22 | from rasterio.transform import array_bounds 23 | 24 | FILE = Path(__file__).resolve() # full path to the current file, including extension 25 | print('filepath', FILE) 26 | ROOT = FILE.parents[0] # list of upstream directories containing file 27 | print('root', ROOT) 28 | REL = Path(os.path.relpath(ROOT, Path.cwd())) 29 | print('rel', REL) 30 | if str(ROOT) not in sys.path: 31 | path.append(str(ROOT)) 32 | if str(REL) not in sys.path: 33 | path.append(str(REL)) # add REL to PATH 34 | 35 | from processing import normalize_tensor, rescale_tensor 36 | # TODO: automate spliting of full GEE path 37 | # def doExport(image, features, scale, bucket, pred_base, pred_path, region, kernel_shape = [256, 256], kernel_buffer = [128,128]): 38 | # """ 39 | # Run an image export task on which to run predictions. Block until complete. 40 | # Parameters: 41 | # image (ee.Image): image to be exported for prediction 42 | # features (list): list of band names to include in export 43 | # scale (int): pixel scale 44 | # bucket (str): name of GCS bucket to write files 45 | # pred_path (str): relative google cloud directory path for export 46 | # pred_base (str): base filename of exported image 47 | # kernel_shape (array): size of image patch in pixels 48 | # kernel_buffer (array): pixels to buffer the prediction patch. half added to each side 49 | # region (ee.Geometry): 50 | # """ 51 | # task = ee.batch.Export.image.toCloudStorage( 52 | # image = image.select(features), 53 | # description = pred_base, 54 | # bucket = bucket, 55 | # fileNamePrefix = join(pred_path, pred_base), 56 | # region = region,#.getInfo()['coordinates'], 57 | # scale = scale, 58 | # fileFormat = 'TFRecord', 59 | # maxPixels = 1e13, 60 | # formatOptions = { 61 | # 'patchDimensions': kernel_shape, 62 | # 'kernelSize': kernel_buffer, 63 | # 'compressed': True, 64 | # 'maxFileSize': 104857600 65 | # } 66 | # ) 67 | # task.start() 68 | 69 | # # Block until the task completes. 70 | # print('Running image export to Cloud Storage...') 71 | # import time 72 | # while task.active(): 73 | # time.sleep(30) 74 | 75 | # # Error condition 76 | # if task.status()['state'] != 'COMPLETED': 77 | # print('Error with image export.') 78 | # else: 79 | # print('Image export completed.') 80 | 81 | # # Error condition 82 | # if task.status()['state'] != 'COMPLETED': 83 | # print('Error with image export.') 84 | # else: 85 | # print('Image export completed.') 86 | 87 | def generate_chip_indices(arr, buff = 128, kernel = 256): 88 | """ 89 | Parameters 90 | --- 91 | arr: np.ndarray 92 | 3D array (H, W, C) for which indices should be generated 93 | buff: int 94 | size of pixels to be trimmed from chips 95 | kernel: int 96 | size of contiguous image chips 97 | Return 98 | --- 99 | list::np.ndarray: list containing (y,x) index of chips upper left corner 100 | """ 101 | H, W, C = arr.shape 102 | side = buff + kernel 103 | x_buff = y_buff = buff//2 104 | 105 | y_indices = list(range(y_buff, H - side, kernel)) 106 | x_indices = list(range(x_buff, W - side, kernel)) 107 | 108 | indices = [(y_index, x_index) for y_index in y_indices for x_index in x_indices] 109 | return indices 110 | 111 | def extract_chips(arr, buff = 128, kernel = 256): 112 | """Break an array into (potentially) overlapping chips for analysis 113 | Arguments: 114 | arr (ndarray): 3D array to run predictions on 115 | buff (int): size of pixels to be trimmed from chips 116 | kernel (int): size of contiguous image chips 117 | Return: 118 | list::np.ndarray: list containing image chips of size (kernel+buff, kernel+buff) 119 | """ 120 | H, W, C = arr.shape 121 | side = buff + kernel 122 | x_buff = y_buff = buff//2 123 | chips = [] 124 | 125 | chip_indices = generate_chip_indices(arr, buff, kernel) 126 | 127 | for x, y in chip_indices: 128 | chip = arr[y-y_buff:y+kernel+y_buff, x-x_buff:x+kernel+x_buff, :] 129 | chips.append(chip) 130 | 131 | return chips 132 | 133 | def predict_chips(arr, chip_indices, template, m, kernel = 256, buff = 128): 134 | """Predict changes in image chips 135 | Arguments: 136 | chips (list): kernel+buff x kernel+buff pixel chips to be fed to U-Net model 137 | chip_indices (list): list of (y,x) tuples marking position of chip upper-left corner in output array 138 | m (keras.Model): model to be used to make predictions 139 | template (ndarray): 2D all-zero array to which predictions will be written 140 | buff (int): total number of pixels to be trimmed from output chips in x and y direction 141 | kernel (int): number of pixels in x and y retained in prediction chips 142 | Return: 143 | ndarray: 3D array of size output.shape containing change probabilities 144 | """ 145 | y_buff = x_buff = buff//2 146 | if len(chip_indices) >= 1: 147 | for y, x in chip_indices: 148 | print(y,x) 149 | chip = arr[y - y_buff:y+kernel+y_buff, x - x_buff:x + kernel + x_buff, :] 150 | print(chip.shape) 151 | # preds = m.predict(np.array([chips[i]]), verbose = 0) 152 | preds = m.predict(np.array([chip]), verbose = 0) 153 | print(preds.shape) 154 | template[y:y+kernel, x:x+kernel] += preds[0, y_buff:(kernel + y_buff), x_buff:(kernel+x_buff), 0] 155 | 156 | return template 157 | 158 | #def makePredDataset(bucket, pred_path, pred_image_base, kernel_buffer, features, raw = None): 159 | def make_pred_dataset(file_list, features, kernel_shape = [256, 256], kernel_buffer = [128, 128], axes = [2], splits = None, moments = None, one_hot = None, **kwargs): 160 | """ Make a TFRecord Dataset that can be used for predictions 161 | Parameters: 162 | file_list: list of complete pathnames for prediction data files 163 | pred_path (str): path to .tfrecord files 164 | pred_image_base (str): pattern matching basename of file(s) 165 | kernel_shape (tpl): size of image patch in pixels 166 | kernel_buffer (tpl): pixels to trim from H, W dimensions of prediction 167 | features (list): names of features in incoming data 168 | axes (list): axes for normalization 169 | one_hot (dict): key:value pairs for name of one-hot variable and desired one-hot depth 170 | Return: 171 | TFRecord Dataset 172 | """ 173 | 174 | # Make sure the files are in the right order. 175 | file_list.sort() 176 | 177 | # Get set up for prediction. 178 | x_buffer = int(kernel_buffer[0] / 2) 179 | y_buffer = int(kernel_buffer[1] / 2) 180 | 181 | buffered_shape = [ 182 | kernel_shape[0] + kernel_buffer[0], 183 | kernel_shape[1] + kernel_buffer[1]] 184 | 185 | imageColumns = [ 186 | tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) 187 | for k in features 188 | ] 189 | 190 | imageFeaturesDict = dict(zip(features, imageColumns)) 191 | 192 | def parse_image(example_proto): 193 | return tf.io.parse_single_example(example_proto, imageFeaturesDict) 194 | 195 | def toTupleImage(dic): 196 | 197 | # stack the augmented bands, optional one-hot tensors, and response variable 198 | if one_hot: 199 | featList = [dic.get(key) for key in features if key not in one_hot.keys()] 200 | hotList = [tf.one_hot(tf.cast(dic.get(key), tf.uint8), val, axis = 2) for key, val in one_hot.items()] 201 | else: 202 | featList = [dic.get(key) for key in features] 203 | 204 | bands = tf.transpose(tf.stack(featList, axis = 0), [1,2,0]) 205 | bands = rescale_tensor(bands, axes = axes, moments = moments, splits = splits) 206 | # If custom preprocessing functions are specified add respective bands 207 | 208 | for fxn in kwargs.values(): 209 | der = fxn(dic) 210 | der = tf.expand_dims(der, axis = 2) 211 | bands = tf.concat([bands, der], axis = 2) 212 | 213 | if one_hot: 214 | hotStack = tf.concat(hotList, axis = 2) 215 | stacked = tf.concat([bands, hotStack], axis =2) 216 | else: 217 | stacked = tf.concat([bands], axis = 2) 218 | 219 | return stacked 220 | 221 | # Create a dataset(s) from the TFRecord file(s) in Cloud Storage. 222 | 223 | imageDataset = tf.data.TFRecordDataset(file_list, compression_type='GZIP') 224 | imageDataset = imageDataset.map(parse_image, num_parallel_calls=5) 225 | imageDataset = imageDataset.map(toTupleImage).batch(1) 226 | return imageDataset 227 | 228 | def plot_to_image(figure): 229 | """Converts the matplotlib plot specified by 'figure' to a PNG image and 230 | returns it. The supplied figure is closed and inaccessible after this call.""" 231 | # Save the plot to a PNG in memory. 232 | import io 233 | buf = io.BytesIO() 234 | plt.savefig(buf, format='png') 235 | # Closing the figure prevents it from being displayed directly inside 236 | # the notebook. 237 | plt.close(figure) 238 | buf.seek(0) 239 | # Convert PNG buffer to TF image 240 | image = tf.image.decode_png(buf.getvalue(), channels=4) 241 | # Add the batch dimension 242 | image = tf.expand_dims(image, 0) 243 | return image 244 | 245 | def callback_predictions(imageDataset, model, mixer, kernel_shape = [256, 256], kernel_buffer = [128, 128]): 246 | patches = mixer['totalPatches'] 247 | cols = mixer['patchesPerRow'] 248 | rows = patches//cols 249 | 250 | # Perform inference. 251 | predictions = model.predict(imageDataset, steps=patches, verbose=1) 252 | 253 | # some models will outputs probs (B, H, W, NCLASSES) and classes (B, H, W) as a list 254 | if type(predictions) == list: 255 | # in this case lets just grab the probabilities 256 | predictions = predictions[0] 257 | 258 | x_buffer = int(kernel_buffer[0] / 2) 259 | y_buffer = int(kernel_buffer[1] / 2) 260 | x_size = kernel_shape[0]+y_buffer 261 | y_size = kernel_shape[1]+x_buffer 262 | 263 | x = 1 264 | for prediction in predictions: 265 | print('Writing patch ' + str(x) + '...') 266 | # write probability of target class (i.e. 1), classes can be calculated post processing if not present already 267 | patch = prediction[y_buffer:y_size, x_buffer:x_size, 1] 268 | # predPatch = np.add(np.argmax(prediction, axis = 2), 1) 269 | # probPatch = np.max(prediction, axis = 2) 270 | # predPatch = predPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE] 271 | # probPatch = probPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE] 272 | # # stack probabilities and classes along channel dimension 273 | # patch = np.stack([predPatch, probPatch], axis = 2) 274 | 275 | ## NOTE: Predictions come out with y as 0 dimension (ie. rows), x as 1 dimension (ie. columns) 276 | # if we're at the beginning of a row 277 | if x%cols == 1: 278 | row = patch 279 | else: 280 | row = np.append(row, patch, axis = 1) 281 | # if we reached the end of a row start a new one 282 | if x%cols == 0: 283 | # for the first row, create single row rows object 284 | if x <= cols: 285 | rows = row 286 | else: 287 | # add current row to previous rows along y axis 288 | rows = np.append(rows, row, axis = 0) 289 | x += 1 290 | 291 | return rows 292 | 293 | def make_array_predictions(imageDataset, model, jsonFile, kernel_shape = [256, 256], kernel_buffer = [128,128]): 294 | """Create a 3D array of prediction outputs from TFRecord dataset 295 | 296 | Given a set of TFRecords representing image patches on which to run model predictions, 297 | and a json file specifying the spatial reference system and arrangement of patches, 298 | this function writes predictions to a single, reconstructed numpy array of shape 299 | (?,?,2). Dimension 2 holds class probabilities and most likely class. 300 | 301 | Parameters: 302 | imageDataset (tf.Dataset): image patch tensors on which to run predictions 303 | model (keras Model): model used to make predictions 304 | jsonFile (str): complete GCS filepath to json file 305 | kernel_size(tpl): size of image patch in pixels 306 | kernel_buffer (tpl): pixels to trim from H, W, dimensions of each output patch 307 | Return: 308 | ndarray: 3D array of prediction outputs. 309 | """ 310 | # we need metadata from the json file to reconstruct prediction patches 311 | # Load the contents of the mixer file to a JSON object. 312 | # jsonFile = '/'.join(jsonFile.split(sep = '/')[3:]) 313 | # blob = bucket.get_blob(jsonFile) #23Mar21 update to use google-cloud-storage library 314 | # jsonText = blob.download_as_string().decode('utf-8') 315 | # mixer = json.loads(jsonText) 316 | 317 | with open(jsonFile,) as file: 318 | mixer = json.load(file) 319 | 320 | # # Load the contents of the mixer file to a JSON object. 321 | # jsonText = !gsutil cat {jsonFile} 322 | # 323 | # # Get a single string w/ newlines from the IPython.utils.text.SList 324 | # mixer = json.loads(jsonText.nlstr) 325 | 326 | print(mixer) 327 | patches = mixer['totalPatches'] 328 | cols = mixer['patchesPerRow'] 329 | rows = patches//cols 330 | 331 | # Perform inference. 332 | print('Running predictions...') 333 | predictions = model.predict(imageDataset, steps=patches, verbose=1) 334 | 335 | # some models will outputs probs and classes as a list 336 | if type(predictions) == list: 337 | # in this case, concatenate list elments into a single 4d array along last dimension 338 | predictions = np.concatenate(predictions, axis = 3) 339 | 340 | x_buffer = int(kernel_buffer[0] / 2) 341 | y_buffer = int(kernel_buffer[1] / 2) 342 | x_size = kernel_shape[0]+y_buffer 343 | y_size = kernel_shape[1]+x_buffer 344 | 345 | x = 1 346 | for prediction in predictions: 347 | print('Writing patch ' + str(x) + '...') 348 | # lets just write probabilities, classes can be calculated post processing if not present already 349 | patch = prediction[y_buffer:y_size, x_buffer:x_size, :] 350 | # predPatch = np.add(np.argmax(prediction, axis = 2), 1) 351 | # probPatch = np.max(prediction, axis = 2) 352 | # predPatch = predPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE] 353 | # probPatch = probPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE] 354 | # # stack probabilities and classes along channel dimension 355 | # patch = np.stack([predPatch, probPatch], axis = 2) 356 | 357 | ## NOTE: Predictions come out with y as 0 dimension (ie. rows), x as 1 dimension (ie. columns) 358 | # if we're at the beginning of a row 359 | if x%cols == 1: 360 | row = patch 361 | else: 362 | row = np.append(row, patch, axis = 1) 363 | # if we reached the end of a row start a new one 364 | if x%cols == 0: 365 | # for the first row, create single row rows object 366 | if x <= cols: 367 | rows = row 368 | else: 369 | # add current row to previous rows along y axis 370 | rows = np.append(rows, row, axis = 0) 371 | x += 1 372 | 373 | return rows 374 | 375 | def write_tfrecord_predictions(predictions, pred_path, out_image_base, kernel_shape = [256, 256], kernel_buffer = [128,128]): 376 | """Generate predictions and save as TFRecords to Cloud Storage 377 | Parameters: 378 | imageDataset (tf.Dataset): data on which to run predictions 379 | pred_path (str): full path to output directory 380 | out_image_base (str): file basename for input and output files 381 | kernel_shape (tpl): [y, x] size of image patch in pixels 382 | kernel_buffer (tpl): [y, x] size of buffer to be trimmed from predictions 383 | 384 | Return: 385 | empty: Writes TFRecord files to specified destination 386 | """ 387 | # Perform inference. 388 | # print('Running predictions...') 389 | # predictions = model.predict(imageDataset, steps=None, verbose=1) 390 | # print(predictions[0]) 391 | 392 | # some models will outputs probs and classes as a list 393 | if type(predictions) == list: 394 | # in this case, concatenate list elments into a single 4d array along last dimension 395 | predictions = np.concatenate(predictions, axis = 3) 396 | 397 | # get the number of bands (should usually be one or two) 398 | C = predictions.shape[-1] 399 | 400 | out_image_file = join(pred_path, f'{out_image_base}.tfrecords') 401 | 402 | print('Writing predictions to ' + out_image_file + '...') 403 | writer = tf.io.TFRecordWriter(out_image_file) 404 | 405 | patches = 1 406 | 407 | x_buffer = int(kernel_buffer[0] / 2) 408 | y_buffer = int(kernel_buffer[1] / 2) 409 | x_size = x_buffer + kernel_shape[1] 410 | y_size = y_buffer + kernel_shape[0] 411 | 412 | for prediction in predictions: 413 | print('Writing patch ' + str(patches) + '...') 414 | # lets just write probabilities, classes can be calculated post processing if not present already 415 | patch = prediction[y_buffer:y_size, x_buffer:x_size, :] 416 | # predPatch = np.add(np.argmax(prediction, axis = 2), 1) 417 | # probPatch = np.max(prediction, axis = 2) 418 | # predPatch = predPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE] 419 | # probPatch = probPatch[x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE] 420 | 421 | # for each band in prediction, create a tf train feature 422 | feature = {} 423 | for i in range(C): 424 | feat = tf.train.Feature(float_list = tf.train.FloatList(value = np.ndarray.flatten(patch[:,:,i]))) 425 | feature['b{}'.format(i+1)] = feat 426 | 427 | # Create an example. 428 | example = tf.train.Example( 429 | features=tf.train.Features( 430 | feature = feature 431 | # feature={ 432 | # 'class': tf.train.Feature( 433 | # int64_list=tf.train.Int64List( 434 | # value = np.ndarray.flatten(predPatch))), 435 | # 'prob': tf.train.Feature( 436 | # float_list = tf.train.FloatList( 437 | # value = np.ndarray.flatten(probPatch))) 438 | # } 439 | ) 440 | ) 441 | # Write the example. 442 | writer.write(example.SerializeToString()) 443 | patches += 1 444 | 445 | writer.close() 446 | 447 | def write_geotiff_prediction(image, jsonFile, aoi): 448 | with open(jsonFile,) as file: 449 | mixer = json.load(file) 450 | 451 | transform = mixer['projection']['affine']['doubleMatrix'] 452 | crs = mixer['projection']['crs'] 453 | ppr = mixer['patchesPerRow'] 454 | tp = mixer['totalPatches'] 455 | rows = int(tp/ppr) 456 | 457 | if image.ndim < 3: 458 | image = np.expand_dims(image, axis = -1) 459 | 460 | affine = rio.Affine(transform[0], transform[1], transform[2], transform[3], transform[4], transform[5]) 461 | 462 | with rio.open( 463 | f'{aoi}.tif', 464 | 'w', 465 | driver = 'GTiff', 466 | width = image.shape[1], 467 | height = image.shape[0], 468 | count = image.shape[2], 469 | dtype = image.dtype, 470 | crs = crs, 471 | transform = affine) as dst: 472 | dst.write(np.transpose(image, (2,0,1))) 473 | 474 | # TODO: re-calculate n and write files not strictly based on rows 475 | def write_geotiff_predictions(imageDataset, model, jsonFile, outImgBase, outImgPath, kernel_buffer = [128,128]): 476 | """Run predictions on a TFRecord dataset and save as a GeoTIFF 477 | Parameters: 478 | imageDataset (tf.Dataset): data on which to run predictions 479 | model (tf.keras.Model): trained model 480 | jsonFile (str): filename of json mixer file 481 | outImgPath (str): directory in which to write predictions 482 | outImgBase (str): file basename 483 | kernel_buffer (tpl): x and y padding around patches 484 | Return: 485 | empty: writes geotiff records temporarily to working directory 486 | """ 487 | with open(jsonFile, ) as file: 488 | mixer = json.load(file) 489 | transform = mixer['projection']['affine']['doubleMatrix'] 490 | crs = mixer['projection']['crs'] 491 | ppr = mixer['patchesPerRow'] 492 | tp = mixer['totalPatches'] 493 | rows = int(tp/ppr) 494 | kernel_shape = mixer['patchDimensions'] 495 | 496 | H = rows*kernel_shape[0] 497 | W = ppr*kernel_shape[1] 498 | y_indices = list(range(0, H, kernel_shape[0])) 499 | x_indices = list(range(0, W, kernel_shape[1])) 500 | indices = [(y,x) for y in y_indices for x in x_indices] 501 | out_array = np.zeros((H, W, 1), dtype = np.float32) 502 | print('out array', out_array.shape) 503 | x_buffer = int(kernel_buffer[0]/2) 504 | y_buffer = int(kernel_buffer[1]/2) 505 | x_size = x_buffer + kernel_shape[1] 506 | y_size = y_buffer + kernel_shape[0] 507 | 508 | # prediction = model.predict(imageDataset, steps = tp, verbose = 1) 509 | # if type(predictions) == list: 510 | # predictions = np.concatenate(predictions, axis = 3) 511 | 512 | iterator = iter(imageDataset) 513 | 514 | for i, (y,x) in enumerate (indices): 515 | prediction = model.predict(iterator.next(), steps = 1, verbose = 1) 516 | if type(prediction) == list: 517 | prediction = np.concatenate(prediction, axis = 3) 518 | # prediction = predictions[i] 519 | print('prediction', prediction.shape) 520 | out_array[y:y+kernel_shape[0], x:x+kernel_shape[1], 0] += prediction[0, y_buffer:y_size, x_buffer:x_size, 0] 521 | 522 | affine = rio.Affine(transform[0], transform[1], transform[2], transform[3], transform[4], transform[5]) 523 | 524 | out_image_file = join(outImgPath, f'{outImgBase}.tif') 525 | print(f'writing image to {out_image_file}') 526 | with rio.open( 527 | out_image_file, 528 | 'w', 529 | driver = 'GTiff', 530 | width = W, 531 | height = H, 532 | count = 1, 533 | dtype = out_array.dtype, 534 | crs = crs, 535 | transform = affine) as dst: 536 | dst.write(np.transpose(out_array, (2,0,1))) 537 | 538 | #def ingest_predictions(pred_path, out_image_base, user_folder): 539 | # """ 540 | # Upload prediction image(s) to Earth Engine. 541 | # Parameters: 542 | # pred_path (str): Google cloud (or Drive) path storing prediction image files 543 | # pred_image_base (str): 544 | # user_folder (str): GEE directory to store asset 545 | # out_image_base (str): base filename for GEE asset 546 | # """ 547 | # blob = bucket.get_blob(join(pred_path, out_image_base + '_mixer.json')) 548 | # jsonFile = blob.name 549 | # 550 | ## jsonFile = !gsutil ls {join('gs://', pred_path, out_image_base + '*.json')} 551 | # print(jsonFile) 552 | # blobs = bucket.list_blobs(join(pred_path, 'outputs', out_image_base + )) 553 | # predFiles = !gsutil ls {join('gs://', pred_path, 'outputs', out_image_base + '*TFRecord')} 554 | # print(predFiles) 555 | # out_image_files = ' '.join(predFiles) 556 | # # Start the upload. 557 | # out_image_asset = join(user_folder, out_image_base) 558 | # !earthengine upload image --asset_id={out_image_asset} {out_image_files} {jsonFile[0]} 559 | 560 | def get_img_bounds(img, jsonFile, dst_crs = None): 561 | """Get the projected top left and bottom right coordinates of an image 562 | Parameters: 563 | img (ndarray): image to generate bounding coordinates for 564 | jsonFile (str): path to json file defining crs and image size 565 | dst_crs (str): epsg code for output crs 566 | Return: 567 | tpl: [[lat min, lon min],[lat max, lon max]] 568 | """ 569 | # Get a single string w/ newlines from the IPython.utils.text.SList 570 | with open(jsonFile,) as f: 571 | mixer = json.load(f) 572 | # mixer = json.loads(jsonText.nlstr) 573 | transform = mixer['projection']['affine']['doubleMatrix'] 574 | print(transform) 575 | src_crs = CRS.from_string(mixer['projection']['crs']) 576 | print(src_crs) 577 | affine = rio.Affine(transform[0], transform[1], transform[2], transform[3], transform[4], transform[5]) 578 | H,W = [0,0] 579 | 580 | if type(img) == np.ndarray: 581 | print('input image is numpy') 582 | H,W = img.shape 583 | print('image shape is ', H, W) 584 | bounds = array_bounds(H, W, affine) 585 | 586 | elif type(img) == str: 587 | print('input image is geotiff') 588 | with rio.open(img) as src: 589 | bounds = src.bounds 590 | # H, W = src.shape 591 | 592 | print(bounds) 593 | lon_min, lat_min, lon_max, lat_max = bounds 594 | # if we need to transform the bounds, such as for folium ('EPSG:3857') 595 | if dst_crs: 596 | dst_crs = CRS.from_string(dst_crs) 597 | out_bounds = transform_bounds(src_crs, dst_crs, left = lon_min, bottom = lat_min, right = lon_max, top = lat_max, densify_pts=21) 598 | lon_min, lat_min, lon_max, lat_max = out_bounds 599 | print(out_bounds) 600 | return [[lat_min, lon_min], [lat_max, lon_max]] 601 | 602 | def doPrediction(bucket, pred_path, pred_image_base, features, one_hot, out_image_base, kernel_shape, kernel_buffer): 603 | """ 604 | Given a bucket and path to prediction images, create a prediction dataset, make predictions 605 | and write tfrecords to GCS 606 | Parameters: 607 | bucket: (Bucket): google-cloud-storage bucket object 608 | pred_path (str): relative GCS path storing prediction image files 609 | pred_image_base (str): base filename of prediction files 610 | user_folder (str): GEE directory to store asset 611 | out_image_base (str): base filename for GEE asset 612 | kernel_buffer (Array): length 2 array 613 | Return: 614 | list: list of written image filenames to be used in earthengine upload 615 | """ 616 | 617 | print('Looking for TFRecord files...') 618 | 619 | # Get a list of all the files in the output bucket. 620 | blobs = bucket.list_blobs(prefix = join(pred_path, pred_image_base)) 621 | filesList = [file.name for file in blobs if pred_image_base in file.name] 622 | # filesList = !gsutil ls {pred_path} 623 | # Get only the files generated by the image export. 624 | # exportFilesList = [s for s in filesList if pred_image_base in s] 625 | 626 | # Get the list of image files and the JSON mixer file. 627 | imageFilesList = [] 628 | jsonFile = None 629 | for f in filesList: 630 | if f.endswith('.tfrecord.gz'): 631 | imageFilesList.append(f) 632 | elif f.endswith('.json'): 633 | jsonFile = f 634 | 635 | # Make sure the files are in the right order. 636 | imageFilesList.sort() 637 | 638 | from pprint import pprint 639 | pprint('image files:', imageFilesList) 640 | print('json file:', jsonFile) 641 | 642 | # make a prediction dataset from the given files 643 | 644 | # Load the contents of the mixer file to a JSON object. 645 | blob = bucket.get_blob(jsonFile) 646 | jsonText = blob.download_as_string().decode('utf-8') 647 | mixer = json.loads(jsonText) 648 | # jsonText = !gsutil cat {jsonFile} 649 | # Get a single string w/ newlines from the IPython.utils.text.SList 650 | # mixer = json.loads(jsonText.nlstr) 651 | pprint(mixer) 652 | patches = mixer['totalPatches'] 653 | 654 | # # Get set up for prediction. 655 | # x_buffer = int(kernel_buffer[0] / 2) 656 | # y_buffer = int(kernel_buffer[1] / 2) 657 | # 658 | # buffered_shape = [ 659 | # KERNEL_SHAPE[0] + kernel_buffer[0], 660 | # KERNEL_SHAPE[1] + kernel_buffer[1]] 661 | # 662 | # imageColumns = [ 663 | # tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) 664 | # for k in BANDS 665 | # ] 666 | # 667 | # imageFeaturesDict = dict(zip(BANDS, imageColumns)) 668 | # 669 | # def parse_image(example_proto): 670 | # return tf.io.parse_single_example(example_proto, imageFeaturesDict) 671 | # 672 | # def toTupleImage(dic): 673 | # inputsList = [dic.get(key) for key in BANDS] 674 | # stacked = tf.stack(inputsList, axis=0) 675 | # stacked = tf.transpose(stacked, [1, 2, 0]) 676 | # stacked = normalize(stacked, [0, 1]) 677 | # return stacked 678 | 679 | # Create a dataset(s) from the TFRecord file(s) in Cloud Storage. 680 | i = 0 681 | patches = 0 682 | written_files = [] 683 | while i < len(imageFilesList): 684 | imageDataset = make_pred_dataset(file_list = imageFilesList[i:i+100], kernel_shape = kernel_shape, kernel_buffer = kernel_buffer, features = features, one_hot = one_hot) 685 | # imageDataset = tf.data.TFRecordDataset(imageFilesList[i:i+100], compression_type='GZIP') 686 | # imageDataset = imageDataset.map(parse_image, num_parallel_calls=5) 687 | # imageDataset = imageDataset.map(toTupleImage).batch(1) 688 | 689 | out_image_base = out_image_base + '{:04d}'.format(i) 690 | out_image_file = join('gs://', bucket.name, pred_path, 'outputs/tfrecord', out_image_base + '.TFRecord') 691 | write_tfrecord_predictions(imageDataset, pred_path = pred_path, out_image_base = out_image_base, kernel_buffer = kernel_buffer) 692 | # # Perform inference. 693 | # print('Running predictions...') 694 | # predictions = m.predict(imageDataset, steps=None, verbose=1) 695 | # # print(predictions[0]) 696 | # 697 | # 698 | # 699 | # print('Writing predictions to ' + out_image_file + '...') 700 | # writer = tf.io.TFRecordWriter(out_image_file) 701 | # for predictionPatch in predictions: 702 | # print('Writing patch ' + str(patches) + '...') 703 | # predictionPatch = predictionPatch[ 704 | # x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE] 705 | # 706 | # # Create an example. 707 | # example = tf.train.Example( 708 | # features=tf.train.Features( 709 | # feature={ 710 | # 'probability': tf.train.Feature( 711 | # float_list=tf.train.FloatList( 712 | # value=predictionPatch.flatten())) 713 | # } 714 | # ) 715 | # ) 716 | # # Write the example. 717 | # writer.write(example.SerializeToString()) 718 | # patches += 1 719 | # 720 | # writer.close() 721 | i += 100 722 | written_files.append(out_image_file) 723 | 724 | out_image_files = ' '.join(written_files) 725 | # Start the upload. 726 | # out_image_asset = join(user_folder, out_image_base) 727 | # !earthengine upload image --asset_id={out_image_asset} {out_image_files} {jsonFile} 728 | # return list of written image files for use in gee upload 729 | return out_image_files 730 | 731 | def predict_pc_local(aoi, dates, m, buff = 128, kernel = 256): 732 | """Retrieve Sentinel-2 imagery from Microsoft Planetary Computer and run change detection 733 | Arguments: 734 | aoi (dict): GeoJson like dictionary defining area of interest 735 | crs (int): 4-digit epsg code representing coordinate reference system of the aoi 736 | dates (tpl): Four YYYY-MM-DD strings defining the before and after periods 737 | m (keras.Model): model to be used to make predictions 738 | buff (int): buffer to strip from prediction patches 739 | kernel (int): size of side of prediction patches 740 | Return: 741 | numpy.ndarray: 3D array with per-pixel change probabilities 742 | """ 743 | # extract before and after dates from input in format required by PC 744 | before_dates = f'{dates[0]}/{dates[1]}' 745 | after_dates = f'{dates[2]}/{dates[3]}' 746 | 747 | # get our before and after stacs 748 | print('retrieving s2 data') 749 | bef_stac, bef_transform = get_s2_stac(before_dates, aoi) 750 | aft_stac, aft_transform = get_s2_stac(after_dates, aoi) # these are projected rioxarrays 751 | 752 | # create median composites 753 | bef_median = bef_stac.median(dim="time") 754 | aft_median = aft_stac.median(dim="time") 755 | 756 | #normalize 757 | bef_norm = normalize_dataArray(bef_median, 'band') 758 | aft_norm = normalize_dataArray(aft_median, 'band') 759 | 760 | # concatenate 761 | ds = xr.concat([bef_norm, aft_norm], dim="band").assign_coords({'band':['B2', 'B3', 'B4', 'B8','B2_1', 'B3_1', 'B4_1', 'B8_1']}) 762 | 763 | C,H,W = ds.shape 764 | print('data shape:', ds.shape) # from planetary computer this is C, H, W 765 | rearranged = ds.transpose('y','x','band') 766 | print('rearranged shape', rearranged.shape) 767 | indices = prediction_tools.generate_chip_indices(rearranged, buff, kernel) 768 | print(len(indices), 'indices generated') 769 | template = np.zeros((H, W)) 770 | print('template shape:', template.shape) 771 | # print('generating chips') 772 | # chips, chip_indices = extract_chips(ds) 773 | # print(len(chip_indices), 'chips generated') 774 | dat = rearranged.values 775 | print('running predictions') 776 | output = predict_chips(dat, indices, template, m, kernel = kernel, buff = buff) 777 | 778 | # print(f'returning array of {output.shape}') 779 | return output, bef_median, aft_median, aft_transform 780 | 781 | def predict_pc_dask(model_blob_url, weights_blob_url, custom_objects, dates, aoi): 782 | # # create a dask cluster 783 | # print('spinning up Dask Cluster') 784 | # cluster = GatewayCluster( 785 | # address = "https://pccompute.westeurope.cloudapp.azure.com/compute/services/dask-gateway", 786 | # proxy_address = "gateway://pccompute-dask.westeurope.cloudapp.azure.com:80", 787 | # auth = 'jupyterhub', 788 | # worker_cores = 4 789 | # ) 790 | 791 | # client = cluster.get_client() 792 | # client.upload_file(f'{str(ROOT)}/model_tools.py', load = True) 793 | 794 | # # allow our dask cluster to adaptively scale from 2 to 24 nodes 795 | # cluster.adapt(minimum=4, maximum=24) 796 | # print('cluster created', cluster.dashboard_link) 797 | 798 | # extract before and after dates from input in format required by PC 799 | before_dates = f'{dates[0]}/{dates[1]}' 800 | after_dates = f'{dates[2]}/{dates[3]}' 801 | 802 | # get our before and after stacs 803 | print('retrieving s2 data') 804 | bef_stac = get_s2_stac(before_dates, aoi) 805 | aft_stac = get_s2_stac(after_dates, aoi) # these are projected rioxarrays 806 | 807 | # create median composites 808 | bef_median = bef_stac.median(dim="time") 809 | aft_median = aft_stac.median(dim="time") 810 | 811 | #normalize 812 | bef_norm = normalize_dataArray(bef_median, 'band') 813 | aft_norm = normalize_dataArray(aft_median, 'band') 814 | 815 | # concatenate 816 | ds = xr.concat([bef_norm, aft_norm], dim="band").assign_coords({'band':['B2', 'B3', 'B4', 'B8','B2_1', 'B3_1', 'B4_1', 'B8_1']}) 817 | 818 | trimmed = trim_dataArray(ds, 256) 819 | chunked = trimmed.chunk({'x':256, 'y':256}) 820 | 821 | print('running chunked predictions') 822 | meta = np.array([[]], dtype="float32") 823 | predictions_array = chunked.data.map_overlap( 824 | lambda x: predict_chunk(x, model_blob_url, weights_blob_url, custom_objects), 825 | depth = (0, 64, 64), 826 | boundary = 0, 827 | meta=meta, 828 | drop_axis=0 829 | ) 830 | 831 | # predictions = predictions_array 832 | 833 | # # to restore spatial reference, cast back to Xarray 834 | # out = xr.DataArray( 835 | # predictions, 836 | # coords=trimmed.drop_vars("band").coords, 837 | # dims=("y", "x"), 838 | # ) 839 | 840 | return(predictions_array) 841 | -------------------------------------------------------------------------------- /utils/processing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Fri Mar 20 10:50:44 2020 3 | 4 | @author: MEvans 5 | """ 6 | import tensorflow as tf 7 | import numpy as np 8 | import math 9 | import os 10 | import copy 11 | import sys 12 | import requests 13 | import io 14 | from random import shuffle, randint, uniform 15 | from pathlib import Path 16 | 17 | FILE = Path(__file__).resolve() 18 | ROOT = FILE.parents[0] 19 | DIR = Path(os.path.relpath(ROOT, Path.cwd())) 20 | 21 | if str(DIR) not in sys.path: 22 | sys.path.append(str(DIR)) 23 | 24 | import array_tools 25 | 26 | def get_file_id(f:str, delim:str = '_', parts:slice = slice(3,5), flag=False): 27 | """Return a unique identifyier from a file name 28 | 29 | Params 30 | --- 31 | f: str 32 | file basename 33 | delim: str 34 | delimiter optionally splitting filename into parts 35 | parts: slice 36 | slice identifying the parts to return 37 | 38 | Returns 39 | --- 40 | tuple: tuple of filename pieces 41 | """ 42 | stem = str(Path(f).stem) 43 | splits = stem.split(delim) 44 | ids = splits[parts] 45 | return tuple(ids) 46 | 47 | def match_files(urls, vars, delim:str = '_', parts:slice = slice(3,5), subset: set = None, flatdirectory:bool = False): 48 | """Align files by unique id among variables 49 | Params 50 | --- 51 | urls: list:str 52 | unordered list of all filepaths to be sorted and aligned by variable 53 | vars: dict 54 | key, value pairs with variable names as keys (e.g., 'naip'). value = None will skip that variable 55 | delim: str 56 | delimiter optionally splitting filename into parts 57 | parts: slice 58 | slice identifying the parts to return 59 | subset: set 60 | optional. unique ids with which to further subset the returned files 61 | 62 | Returns 63 | --- 64 | dict: key, value pairs for each valid key in vars. variable names are key (e.g. 'naip') and values are corresponding list of files 65 | """ 66 | 67 | #print(len(subset)) 68 | vars_copy = copy.deepcopy(vars) 69 | 70 | if flatdirectory: 71 | files_dic = {key:[url for url in urls if f'_{key}_' in url] for key in vars_copy.keys() if vars_copy[key]['files'] is not None} 72 | else: 73 | files_dic = {key:[url for url in urls if f'/{key}/' in url] for key in vars_copy.keys() if vars_copy[key]['files'] is not None} 74 | 75 | ids = [set([get_file_id(f, delim, parts) for f in files]) for files in files_dic.values()] # list of sets per var 76 | 77 | intersection = set.intersection(*ids) 78 | 79 | if subset: 80 | intx = intersection.intersection(subset) 81 | else: 82 | intx = intersection 83 | 84 | for var, ls in files_dic.items(): 85 | subset = [f for f in ls if get_file_id(f, delim, parts) in intx] 86 | subset.sort() 87 | vars_copy[var].update({"files": subset}) 88 | 89 | return vars_copy 90 | 91 | def split_files(files, labels = ['label', 'lu', 'naip', 'lidar', 's2'], delim = '_', parts = slice(3,5)): 92 | """Divide list of .npy arrays into separate lists by source data (e.g. NAIP, S2, etc.) 93 | 94 | Params 95 | --- 96 | files: list(str) 97 | list of files to be split 98 | labels: list(str) 99 | list of prefixes identifying subsets of files to return 100 | 101 | Return 102 | --- 103 | list, list, list: tuple of lists per file subset 104 | """ 105 | def get_file_id(f, parts): 106 | stem = str(Path(f).stem) 107 | splits = stem.split(delim) 108 | ids = splits[parts] 109 | return tuple(ids) 110 | 111 | indices = [set([get_file_id(f, parts) for f in files if label in Path(f).parts]) for label in labels] 112 | intersection = set.intersection(*indices) 113 | out_files = [[f for f in files if label in Path(f).parts and get_file_id(f, parts) in intersection] for label in labels] 114 | return out_files 115 | 116 | def calc_ndvi(input): 117 | """Caclulate NDVI from Sentinel-2 data 118 | Parameters: 119 | input (dict): dictionary of incoming tensors 120 | Returns: 121 | tensor 122 | """ 123 | epsilon = 1e-8 124 | nir = input.get('B8') 125 | red = input.get('B4') 126 | ndvi = tf.divide(tf.subtract(nir, red), tf.add(epsilon, tf.add(nir,red))) 127 | return ndvi 128 | 129 | def aug_tensor_color(img): 130 | n_ch = tf.shape(img)[-1] 131 | contra_adj = 0.05 132 | bright_adj = 0.05 133 | 134 | ch_mean = tf.math.reduce_mean(img, axis = (0,1), keepdims = True) 135 | #ch_mean = np.mean(img, axis=(0, 1), keepdims=True).astype(np.float32) 136 | 137 | contra_mul = tf.random.uniform(shape = (1, 1, n_ch), 138 | minval = 1-contra_adj, 139 | maxval = 1+contra_adj) 140 | # contra_mul = np.random.uniform(1 - contra_adj, 1 + contra_adj, (1, 1, n_ch)).astype( 141 | # np.float32 142 | # ) 143 | 144 | bright_mul = tf.random.uniform(shape = (1, 1, n_ch), 145 | minval = 1 - bright_adj, 146 | maxval = 1+bright_adj) 147 | # bright_mul = np.random.uniform(1 - bright_adj, 1 + bright_adj, (1, 1, n_ch)).astype( 148 | # np.float32 149 | # ) 150 | 151 | recolored = (img - ch_mean) * contra_mul + ch_mean * bright_mul 152 | return recolored 153 | 154 | def augColor(x, contra_adj = 0.05, bright_adj = 0.05): 155 | """Color augmentation 156 | 157 | Args: 158 | x: Image 159 | 160 | Returns: 161 | Augmented image 162 | """ 163 | x = tf.image.random_hue(x, 0.05) 164 | x = tf.image.random_saturation(x, 0.6, 1.6) 165 | x = tf.image.random_brightness(x, 0.05) 166 | x = tf.image.random_contrast(x, 0.7, 1.3) 167 | return x 168 | 169 | def aug_tensor_morph(img): 170 | """ 171 | Perform image augmentation on tfRecords 172 | Parameters: 173 | img (TFRecord): 4D tensor 174 | Returns: 175 | 3D tensor: 176 | """ 177 | outDims = tf.shape(img)[0:1] 178 | x = tf.image.random_flip_left_right(img) 179 | x = tf.image.random_flip_up_down(x) 180 | x = tf.image.rot90(x, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)) 181 | #x = zoom(x, outDims) 182 | #since were gonna map_fn this on a 4d image, output must be 3d, so squeeze the artificial 'sample' dimension 183 | return tf.squeeze(x) 184 | 185 | def normalize_timeseries(arr, maxval = 10000, axis = -1, e = 0.00001): 186 | # normalize band values across timesteps 187 | normalized = arr/maxval 188 | # mn = np.nanmean(arr, axis = axis, keepdims = True) 189 | # std = np.nanstd(arr, axis = axis, keepdims = True) 190 | # normalized = (arr - mn)/(std+e) 191 | # replace nans with zeros? 192 | finite = np.where(np.isnan(normalized), 0.0, normalized) 193 | return finite 194 | 195 | def rearrange_timeseries(arr, nbands, time_dim = 1): 196 | # the number of time steps is in the 1st dimension if our data is (B, T, H, W, C) 197 | timesteps = arr.shape[time_dim] 198 | # randomly pick one of the timesteps as the starting time 199 | starttime = randint(0, timesteps-1) 200 | # print('start', starttime) 201 | # grab all timesteps leading up to the timestep corresponding to our random first 202 | last = arr[:,0:starttime,:,:,:] 203 | # print('last shape', last.shape) 204 | first = arr[:,starttime:timesteps,:,:,:] 205 | # print('start shape', first.shape) 206 | rearranged = np.concatenate([first, last], axis = 1) 207 | rearranged.shape == arr.shape 208 | 209 | feats = rearranged[:,0:-1,:,:,:] 210 | labels = rearranged[:,-1,:,:,0:nbands] 211 | 212 | # confirm there are no all-nan images in labels 213 | batch_sums = np.sum(labels, axis = (1,2,3)) 214 | if 0.0 in batch_sums: 215 | print('all nan labels, reshuffling') 216 | feats, labels, starttime = rearrange_timeseries(arr, nbands) 217 | 218 | return(feats, labels, starttime) 219 | 220 | def sin_cos(t:int, freq:int = 6) -> tuple: 221 | x = t/freq 222 | theta = 2*math.pi * x 223 | return (math.sin(theta), math.cos(theta)) 224 | 225 | def normalize_tensor(x, axes=[2], epsilon=1e-8, moments = None, splits = None): 226 | """ 227 | Standardize incoming image patches by mean and variance. 228 | 229 | Moments can be calculated based on patch data by providing axes: 230 | To standardize each pixel use axes = [2] 231 | To standardize each channel use axes = [0, 1] 232 | To standardize globally use axes = [0, 1, 2] 233 | 234 | To standardize by global, or per-channel moments supply a list of [mean, variance] tuples. 235 | To standardize groups of channels separately, identify the size of each group. Groups of 236 | channels must be stacked contiguously and group sizes must sum to the total # of channels 237 | 238 | Parameters: 239 | x (tensor): nD image tensor 240 | axes (array): Array of ints. Axes along which to compute mean and variance, usually length n-1 241 | epsilon (float): small number to avoid dividing by zero 242 | moments (list): list of global mean, variance tuples for standardization 243 | splits (list): size(s) of groups of features to be kept together 244 | Return: 245 | tensor: nD image tensor normalized by channels 246 | """ 247 | 248 | # define a basic function to normalize a 3d tensor 249 | def normalize(x): 250 | # shape = tf.shape(x).numpy() 251 | # if we've defined global or per-channel moments... 252 | if moments: 253 | # cast moments to arrays for mean and variance 254 | mean = np.array([tpl[0] for tpl in moments], dtype = 'float32') 255 | variance = np.array([tpl[1] for tpl in moments], dtype = 'float32') 256 | # otherwise, calculate moments along provided axes 257 | else: 258 | mean, variance = tf.nn.moments(x, axes, keepdims = True) 259 | # keepdims = True to ensure compatibility with input tensor 260 | 261 | # normalize the input tensor 262 | normed = (x - mean)/tf.sqrt(variance + epsilon) 263 | return normed 264 | 265 | 266 | # if splits are given, apply tensor normalization to each split 267 | if splits: 268 | splitLen = sum(splits) 269 | toNorm = x[:,:,0:splitLen] 270 | dontNorm = x[:,:,splitLen:] 271 | tensors = tf.split(toNorm, splits, axis = 2) 272 | normed = [normalize(tensor) for tensor in tensors] 273 | normed.append(dontNorm) 274 | # gather normalized splits into single tensor 275 | x_normed = tf.concat(normed, axis = 2) 276 | else: 277 | x_normed = normalize(x) 278 | 279 | return x_normed 280 | 281 | def rescale_tensor(img, axes = [2], epsilon=1e-8, moments = None, splits = None): 282 | """ 283 | Rescale incoming image patch to [0,1] based on min and max values 284 | 285 | Min, max can be calculated based on patch data by providing axes: 286 | To rescale each pixel use axes = [2] 287 | To rescale each channel use axes = [0, 1] 288 | To rescale globally use axes = [0, 1, 2] 289 | 290 | To rescale by global, or per-channel moments supply a list of [mean, variance] tuples. 291 | To rescale groups of channels separately, identify the size of each group. Groups of 292 | channels must be stacked contiguously and group sizes must sum to the total # of channels 293 | 294 | Args: 295 | img (tensor): 3D (H,W,C) image tensor 296 | axes (list): axes along which to calculate min/max for rescaling 297 | moments (list): list of [min, max] tuples for standardization 298 | splits (list): size(s) of groups of features to be kept together 299 | Return: 300 | tensor: 3D tensor of same shape as input, with values [0,1] 301 | """ 302 | def rescale(img): 303 | if moments: 304 | minimum = np.array([tpl[0] for tpl in moments], dtype = 'float32') 305 | maximum = np.array([tpl[1] for tpl in moments], dtype = 'float32') 306 | else: 307 | minimum = tf.math.reduce_min(img, axis = axes, keepdims = True) 308 | maximum = tf.math.reduce_max(img, axis = axes, keepdims = True) 309 | scaled = (img - minimum)/((maximum - minimum) + epsilon) 310 | # scaled = tf.divide(tf.subtract(img, minimum), tf.add(tf.subtract(maximum, minimum)) 311 | return scaled 312 | 313 | # if splits are given, apply tensor normalization to each split 314 | if splits: 315 | tensors = tf.split(img, splits, axis = 2) 316 | rescaled = [rescale(tensor) for tensor in tensors] 317 | # gather normalized splits into single tensor 318 | img_rescaled = tf.concat(rescaled, axis = 2) 319 | else: 320 | img_rescaled = rescale(img) 321 | 322 | return img_rescaled 323 | 324 | #def parse_tfrecord(example_proto, ftDict): 325 | # """The parsing function. 326 | # Read a serialized example into the structure defined by FEATURES_DICT. 327 | # Args: 328 | # example_proto: a serialized Example. 329 | # Returns: 330 | # A dictionary of tensors, keyed by feature name. 331 | # """ 332 | # return tf.io.parse_single_example(example_proto, ftDict) 333 | 334 | 335 | def to_tuple(inputs, features, response, axes = [2], splits = None, one_hot = None, moments = None, **kwargs): 336 | """Function to convert a dictionary of tensors to a tuple of (inputs, outputs). 337 | Turn the tensors returned by parse_tfrecord into a stack in HWC shape. 338 | Args: 339 | inputs (dict): A dictionary of tensors, keyed by feature name. Response 340 | variable must be the last item. 341 | features (list): List of input feature names 342 | respones (str): response name(s) 343 | axes (list): axes along which to calculate moments for rescaling 344 | one_hot (dict): key:value pairs for name of one-hot variable and desired one-hot depth 345 | splits (list): size(s) of groups of features to be kept together 346 | moments (list): list of [mean, var] tuples for standardization 347 | Returns: 348 | A dtuple of (inputs, outputs). 349 | """ 350 | # one_hot = kwargs.get('one_hot') 351 | # splits = kwargs.get('splits') 352 | # moments = kwargs.get('moments') 353 | 354 | # If custom preprocessing functions are specified add respective bands 355 | for fxn in kwargs.values(): 356 | der = fxn(inputs) 357 | inputs = der 358 | 359 | # inputsList = [inputs.get(key) for key in features + [response]] 360 | if type(response) == dict: 361 | depth = list(response.values())[0] 362 | key = list(response.keys())[0] 363 | res = tf.squeeze(tf.one_hot(tf.cast(inputs.get(key), tf.uint8), depth = depth)) 364 | else: 365 | res = tf.expand_dims(inputs.get(response), axis = 2) 366 | 367 | # stack the augmented bands, optional one-hot tensors, and response variable 368 | if one_hot: 369 | featList = [inputs.get(key) for key in features if key not in one_hot.keys()] 370 | hotList= [tf.one_hot(tf.cast(inputs.get(key), tf.uint8), val, axis = 2) for key, val in one_hot.items() if key in features] 371 | # hotList = [tf.one_hot(tf.cast(inputs.get(key), tf.uint8), val, axis = 2) for key, val in one_hot.items()] 372 | else: 373 | featList = [inputs.get(key) for key in features] 374 | 375 | # stack, transpose, augment, and normalize continuous bands 376 | bands = tf.transpose(tf.stack(featList, axis = 0), [1,2,0]) 377 | bands = aug_tensor_color(bands) 378 | bands = rescale_tensor(bands, axes = axes, moments = moments, splits = splits) 379 | 380 | if one_hot: 381 | hotStack = tf.concat(hotList, axis = 2) 382 | stacked = tf.concat([bands, hotStack, res], axis =2) 383 | else: 384 | stacked = tf.concat([bands, res], axis = 2) 385 | 386 | # perform morphological augmentation 387 | stacked = aug_tensor_morph(stacked) 388 | 389 | feats = stacked[:, :, :-res.shape[2]] 390 | labels = stacked[:, :, -res.shape[2]:] 391 | labels = tf.where(tf.greater(labels, 1.0), 1.0, labels) 392 | return feats, labels 393 | 394 | def get_dataset(files, ftDict, features, response, axes = [2], splits = None, one_hot = None, moments = None, **kwargs): 395 | """Function to read, parse and format to tuple a set of input tfrecord files. 396 | Get all the files matching the pattern, parse and convert to tuple. 397 | Args: 398 | files (list): A list of filenames storing tfrecords 399 | FtDict (dic): Dictionary of input features in tfrecords 400 | features (list): List of input feature names 401 | respones (str): response name(s) 402 | axes (list): axes along which to calculate moments for rescaling 403 | one_hot (dict): key:value pairs for name of one-hot variable and desired one-hot depth 404 | splits (list): size(s) of groups of features to be kept together 405 | moments (list): list of [mean, var] tuples for standardization 406 | Returns: 407 | A tf.data.Dataset 408 | """ 409 | 410 | def parse_tfrecord(example_proto): 411 | return tf.io.parse_single_example(example_proto, ftDict) 412 | 413 | def tupelize(ftDict): 414 | return to_tuple(ftDict, features, response, axes, splits, one_hot, moments, **kwargs) 415 | 416 | dataset = tf.data.TFRecordDataset(files, compression_type='GZIP') 417 | dataset = dataset.map(parse_tfrecord, num_parallel_calls=5) 418 | dataset = dataset.map(tupelize, num_parallel_calls=5) 419 | return dataset 420 | 421 | def get_training_dataset(files, ftDict, features, response, buff, batch = 16, repeat = True, axes = [2], splits = None, one_hot = None, moments = None, **kwargs): 422 | """ 423 | Get the preprocessed training dataset 424 | Args: 425 | files (list): list of tfrecord files to be used for training 426 | FtDict (dic): Dictionary of input features in tfrecords 427 | features (list): List of input feature names 428 | respones (str): response name(s) 429 | axes (list): axes along which to calculate moments for rescaling 430 | buffer (int): buffer size for shuffle 431 | batch (int): batch size for training 432 | repeat (bool): should the dataset be repeated 433 | Returns: 434 | A tf.data.Dataset of training data. 435 | """ 436 | dataset = get_dataset(files, ftDict, features, response, axes, splits, one_hot, moments, **kwargs) 437 | if repeat: 438 | dataset = dataset.shuffle(buff).batch(batch).repeat() 439 | else: 440 | dataset = dataset.shuffle(buff).batch(batch) 441 | return dataset 442 | 443 | def get_eval_dataset(files, ftDict, features, response, axes = [2], splits = None, one_hot = None, moments = None, **kwargs): 444 | """ 445 | Get the preprocessed evaluation dataset 446 | Args: 447 | files (list): list of tfrecords to be used for evaluation 448 | Returns: 449 | A tf.data.Dataset of evaluation data. 450 | """ 451 | 452 | dataset = get_dataset(files, ftDict, features, response, axes, splits, one_hot, moments, **kwargs) 453 | dataset = dataset.batch(1) 454 | return dataset 455 | 456 | class UNETDataGenerator(tf.keras.utils.Sequence): 457 | """Generates data for Keras 458 | Sequence based data generator. Suitable for building data generator for training and prediction. 459 | """ 460 | def __init__(self, labelfiles = None, s2files = None, naipfiles = None, 461 | hagfiles = None, lidarfiles = None, lufiles = None, 462 | demfiles = None, ssurgofiles = None, 463 | to_fit=True, batch_size=32, unet_dim=(256, 256), 464 | n_channels=4, n_classes = 8, shuffle=True, 465 | splits = None, moments = None, 466 | lc_transitions = [(12,3), (11,3), (10,3), (9,8), (255, 0)], 467 | lu_transitions = [(82,9), (84,10)]): 468 | """Initialization 469 | 470 | :param files: list of all files to use in the generator 471 | :param to_fit: True to return X and y, False to return X only 472 | :param batch_size: batch size at each iteration 473 | :param dim: tuple indicating image dimension 474 | :param n_channels: number of image channels 475 | :param n_classes: number of output masks 476 | :param n_timesteps: number of multi-channel images 477 | :param shuffle: True to shuffle label indexes after every epoch 478 | """ 479 | self.s2files = s2files 480 | self.naipfiles = naipfiles 481 | self.hagfiles = hagfiles 482 | self.demfiles = demfiles 483 | self.ssurgofiles = ssurgofiles 484 | self.lidarfiles = lidarfiles 485 | self.labelfiles = labelfiles 486 | self.lufiles = lufiles 487 | self.to_fit = to_fit 488 | self.batch_size = batch_size 489 | self.unet_dim = unet_dim 490 | self.n_channels = n_channels 491 | self.n_classes = n_classes 492 | self.shuffle = shuffle 493 | self.splits = splits 494 | self.moments = moments 495 | self.lc_trans = lc_transitions 496 | self.lu_trans = lu_transitions 497 | self.indexes = np.arange(len(self.labelfiles)) 498 | self.mask = False 499 | self.on_epoch_end() 500 | 501 | # do an initial shuffle for cases where the generator is called fresh at the start of each epoch 502 | if self.shuffle == True: 503 | print('shuffling') 504 | np.random.shuffle(self.indexes) 505 | 506 | if self.to_fit == True: 507 | print('masking on') 508 | self.mask = True 509 | 510 | def __len__(self): 511 | """Denotes the number of batches per epoch 512 | 513 | :return: number of batches per epoch 514 | """ 515 | return int(np.floor(len(self.indexes) / self.batch_size)) 516 | 517 | def on_epoch_end(self): 518 | """Updates indexes after each epoch 519 | 520 | """ 521 | print('the generator knows the epoch ended') 522 | self.indexes = np.arange(len(self.indexes)) 523 | if self.shuffle == True: 524 | print('shuffling') 525 | np.random.shuffle(self.indexes) 526 | 527 | @staticmethod 528 | def load_numpy_url(url): 529 | 530 | if os.path.exists(url): 531 | data = np.load(url) 532 | else: 533 | response = requests.get(url) 534 | response.raise_for_status() 535 | data = np.load(io.BytesIO(response.content)) 536 | 537 | return(data) 538 | 539 | def _load_numpy_data(self, files_temp): 540 | arrays = [UNETDataGenerator.load_numpy_url(f) for f in files_temp] 541 | return(arrays) 542 | 543 | def _get_unet_data(self, files_temp, add_nan_mask = False,rescale_val=False): 544 | # arrays come from PC in (C, H, W) format 545 | arrays = self._load_numpy_data(files_temp) 546 | try: 547 | assert len(arrays) > 0 548 | assert all([len(x.shape) == 3 for x in arrays]), 'all arrays not 3D' 549 | # ensure all arrays are C, H, W to start 550 | chw = [np.moveaxis(x, source = -1, destination = 0) if x.shape[-1] < x.shape[0] else x for x in arrays] 551 | if rescale_val is not False: 552 | chw = [x/rescale_val for x in chw] 553 | if add_nan_mask == True: 554 | chw_new = [] 555 | for cur_array in chw: 556 | 557 | mask_channel = np.zeros([cur_array.shape[1], cur_array.shape[2]]) 558 | # Create a random array to be used to replace the original data 559 | if self.to_fit: 560 | for arr_2d in cur_array: 561 | nans = np.isnan(arr_2d) 562 | bads = arr_2d < -5000 563 | mask_channel[nans==True] = 1 564 | mask_channel[bads==True] = 1 565 | arr_2d[mask_channel==1] = np.random.randn((mask_channel==1).sum()) 566 | # arr_2d[nans==True] = np.random.uniform() 567 | #arr_2d[np.isnan(arr_2d)] = np.random.randn(len(arr_2d[np.isnan(arr_2d)])) 568 | #print("AFTER FIX:",np.isnan(cur_array).sum()) 569 | #cur_array = np.vstack((cur_array, mask[None,:,:])) 570 | 571 | 572 | """randarr = np.random.uniform(size=cur_array.shape)*cur_array.max() 573 | # Build a mask layer to use in the replacement 574 | n_cols = cur_array.shape[2] 575 | n_rows = cur_array.shape[1] 576 | mask_channel = np.ones((n_rows, n_cols), dtype=np.int8) 577 | np.any(cur_array == np.nan, axis=0, out=mask_channel) 578 | # Replace the values in any of the channels where the mask_channel is 0 with the values from the random array 579 | cur_array[:, mask_channel == 1] = randarr[:, mask_channel == 1] 580 | cur_array[:, mask_channel == 1] = randarr[:, mask_channel == 1] """ 581 | cur_array = np.append(cur_array, mask_channel[np.newaxis, :, :], axis=0) 582 | #print("AFTER:",np.isnan(cur_array).sum()) 583 | chw_new.append(cur_array) 584 | chw = chw_new 585 | batch = np.stack(chw, axis = 0) 586 | assert np.isnan(batch).sum() < 1, 'nans in batch, skipping' 587 | in_shape = batch.shape 588 | # in case our incoming data is of different size than we want, define a trim amount 589 | trim = ((in_shape[2] - self.unet_dim[0])//2, (in_shape[3] - self.unet_dim[1])//2) 590 | # If necessary, trim data to (-1, dims[0], dims[1]) 591 | array = batch[:,:,trim[0]:self.unet_dim[0]+trim[0], trim[1]:self.unet_dim[1]+trim[1]] 592 | # rearrange arrays from (B, C, H, W) -> (B, H, W, C) expected by model 593 | 594 | reshaped = np.moveaxis(array, source = 1, destination = 3) 595 | return reshaped 596 | except AssertionError as msg: 597 | print(msg) 598 | return None 599 | def _get_naip_data(self, indexes): 600 | files_temp = [self.naipfiles[k] for k in indexes] 601 | naip = self._get_unet_data(files_temp,rescale_val=255.0) 602 | if type(naip) == np.ndarray: 603 | 604 | if self.to_fit: 605 | recolored = array_tools.aug_array_color(naip) 606 | return recolored 607 | return naip 608 | #else: 609 | #return naip 610 | 611 | def _get_s2_data(self, indexes): 612 | files_temp = [self.s2files[k] for k in indexes] 613 | s2 = self._get_unet_data(files_temp,rescale_val=10000.0) 614 | if type(s2) == np.ndarray: 615 | if self.to_fit: 616 | recolored = array_tools.aug_array_color(s2) 617 | return recolored 618 | else: 619 | return s2 620 | #else: 621 | #return s2 622 | 623 | def _get_lidar_data(self, indexes): 624 | files_temp = [self.lidarfiles[k] for k in indexes] 625 | lidar = self._get_unet_data(files_temp,self.mask,rescale_val=100) 626 | if type(lidar) == np.ndarray: 627 | return lidar 628 | 629 | def _get_hag_data(self, indexes): 630 | files_temp = [self.hagfiles[k] for k in indexes] 631 | hag = self._get_unet_data(files_temp, self.mask, rescale_val=100) 632 | if type(hag) == np.ndarray: 633 | return hag 634 | #else: 635 | # return hag 636 | 637 | def _get_dem_data(self, indexes): 638 | files_temp = [self.demfiles[k] for k in indexes] 639 | dem = self._get_unet_data(files_temp,self.mask,rescale_val=2000.0) 640 | if type(dem) == np.ndarray: 641 | # we are going to use the min and max elevations across the chesapeake 642 | return dem 643 | #else: 644 | # return dem 645 | 646 | def _get_ssurgo_data(self, indexes): 647 | files_temp = [self.ssurgofiles[k] for k in indexes] 648 | ssurgo = self._get_unet_data(files_temp) 649 | if type(ssurgo) == np.ndarray: 650 | return ssurgo 651 | 652 | def _process_y(self, indexes): 653 | # get label files for current batch 654 | lc_files = [self.labelfiles[k] for k in indexes] 655 | # lc_arrays = [np.load(file) for file in lc_files] 656 | lc_arrays = self._load_numpy_data(lc_files) 657 | 658 | try: 659 | assert len(lc_arrays) == self.batch_size 660 | assert all([x.shape == (1, self.unet_dim[0], self.unet_dim[1]) for x in lc_arrays]) 661 | lc = np.stack(lc_arrays, axis = 0) #(B, C, H, W) 662 | int_labels = lc.astype(int) 663 | 664 | # optionally reduce the number of classes 665 | if self.lc_trans: 666 | merged_labels = array_tools.merge_classes(cond_array = int_labels, trans = self.lc_trans, out_array = int_labels) 667 | else: 668 | merged_labels = int_labels 669 | 670 | if self.lufiles: 671 | lu_files = [self.lufiles[k] for k in indexes] 672 | # lu_arrays = [np.load(file) for file in lu_files] 673 | lu_arrays = self._load_numpy_data(lu_files) 674 | try: 675 | assert len(lu_arrays) == self.batch_size 676 | assert all([x.shape == (1, self.unet_dim[0], self.unet_dim[1]) for x in lu_arrays]) 677 | lu = np.stack(lu_arrays, axis = 0) #(B, C, H, W) 678 | y = array_tools.merge_classes(cond_array = lu, trans = self.lu_trans, out_array = merged_labels) 679 | except AssertionError: 680 | return None 681 | else: 682 | y = merged_labels 683 | 684 | # If necessary, trim data to (-1, dims[0], dims[1]) 685 | in_shape = y.shape 686 | trim = ((in_shape[2] - self.unet_dim[0])//2, (in_shape[3] - self.unet_dim[1])//2) 687 | array = y[:,:,trim[0]:self.unet_dim[0]+trim[0], trim[1]:self.unet_dim[1]+trim[1]] 688 | 689 | # shift range of categorical labels from [1, n_classes] to [0, n_classes] 690 | zeroed = array 691 | # create one-hot representation of classes 692 | one_hot = tf.one_hot(zeroed, self.n_classes) 693 | # one_hot = to_one_hot(zeroed, self.n_classes) 694 | return tf.squeeze(one_hot) 695 | 696 | except AssertionError: 697 | return None 698 | 699 | def __getitem__(self, index): 700 | """Generate one batch of data 701 | 702 | :param index: index of the batch 703 | :return: X and y when fitting. X only when predicting 704 | """ 705 | # Generate indexes of the batch 706 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] 707 | 708 | datasets = [] 709 | 710 | if self.s2files: 711 | s2Data = self._get_s2_data(indexes) 712 | datasets.append(s2Data) 713 | 714 | if self.naipfiles: 715 | naipData = self._get_naip_data(indexes) 716 | #print("appending Naip data",type(naipData)) 717 | datasets.append(naipData) 718 | 719 | if self.hagfiles: 720 | hagData = self._get_hag_data(indexes) 721 | datasets.append(hagData) 722 | 723 | if self.demfiles: 724 | demData = self._get_dem_data(indexes) 725 | # print('dem', demData.shape) 726 | #print("appening DEM data",type(demData)) 727 | datasets.append(demData) 728 | 729 | if self.ssurgofiles: 730 | ssurgoData = self._get_ssurgo_data(indexes) 731 | # print('ssurgo', ssurgoData.shape 732 | #print("appending ssurgoData",type(ssurgoData)) 733 | datasets.append(ssurgoData) 734 | 735 | if self.lidarfiles: 736 | lidarData = self._get_lidar_data(indexes) 737 | datasets.append(lidarData) 738 | 739 | if any([type(dat) != np.ndarray for dat in datasets]): 740 | pass 741 | else: 742 | xData = np.concatenate(datasets, axis = -1) 743 | 744 | if self.to_fit: 745 | labels = self._process_y(indexes) 746 | # perform morphological augmentation - expects a 3D (H, W, C) image array 747 | stacked = np.concatenate([xData, labels], axis = -1) 748 | morphed = array_tools.aug_array_morph(stacked) 749 | # print('augmented max', np.nanmax(augmented, axis = (0,1,2))) 750 | 751 | feats = morphed[:,:,:,0:self.n_channels] 752 | labels = morphed[:,:,:,self.n_channels:] 753 | return feats, labels 754 | else: 755 | return xData 756 | 757 | class SiameseDataGenerator(UNETDataGenerator): 758 | def __init__(self, beforefiles, afterfiles, add_nan_mask: bool, *args, **kwargs): 759 | super().__init__(*args, **kwargs) 760 | self.beforefiles = beforefiles 761 | self.afterfiles = afterfiles 762 | self.mask = add_nan_mask 763 | 764 | # do an initial shuffle for cases where the generator is called fresh at the start of each epoch 765 | if self.shuffle == True: 766 | print('shuffling') 767 | np.random.shuffle(self.indexes) 768 | print(self.batch_size) 769 | def __len__(self): 770 | """Denotes the number of batches per epoch 771 | 772 | :return: number of batches per epoch 773 | """ 774 | return UNETDataGenerator.__len__(self) 775 | 776 | def on_epoch_end(self): 777 | """Updates indexes after each epoch 778 | 779 | """ 780 | UNETDataGenerator.on_epoch_end(self) 781 | 782 | def _get_unet_data(self, files_temp, add_nan_mask = False,rescale_val=None): 783 | # arrays come from PC in (C, H, W) format 784 | arrays = self._load_numpy_data(files_temp) 785 | try: 786 | assert len(arrays) > 0 787 | assert all([len(x.shape) == 3 for x in arrays]), 'all arrays not 3D' 788 | # ensure all arrays are C, H, W to start 789 | chw = [np.moveaxis(x, source = -1, destination = 0) if x.shape[-1] < x.shape[0] else x for x in arrays] 790 | if rescale_val is not None: 791 | chw = [x/rescale_val for x in chw] 792 | batch = np.stack(chw, axis = 0) 793 | # assert np.isnan(batch).sum() < 1, 'nans in batch, skipping' 794 | in_shape = batch.shape 795 | # in case our incoming data is of different size than we want, define a trim amount 796 | trim = ((in_shape[2] - self.unet_dim[0])//2, (in_shape[3] - self.unet_dim[1])//2) 797 | # If necessary, trim data to (-1, dims[0], dims[1]) 798 | array = batch[:,:,trim[0]:self.unet_dim[0]+trim[0], trim[1]:self.unet_dim[1]+trim[1]] 799 | # rearrange arrays from (B, C, H, W) -> (B, H, W, C) expected by model 800 | 801 | reshaped = np.moveaxis(array, source = 1, destination = 3) 802 | nans = np.isnan(reshaped) 803 | if add_nan_mask: 804 | mask = np.ones(shape = reshaped.shape) # create a mask with all valid pixels by default 805 | mask[nans] = 0 # inject zeros into mask at invalid pixels 806 | mask[reshaped < -1] = 0 807 | reduced_mask = mask.min(axis = -1, keepdims = True) # reduce mask along channels -> (B, C, H, 1) 808 | reshaped[nans] = np.random.random(nans.sum()) # replace nan values with random val from [0,1) 809 | masked = np.concatenate([reshaped, reduced_mask], axis = -1) # add mask to batch as additional channel 810 | return reshaped, reduced_mask 811 | else: 812 | assert np.isnan(reshaped).sum() < 1, 'nans in batch, skipping' 813 | return reshaped, None 814 | 815 | except AssertionError as msg: 816 | print(msg) 817 | return None, None 818 | 819 | def _process_y(self, indexes): 820 | # get label files for current batch 821 | files_temp = [self.labelfiles[k] for k in indexes] 822 | lc_files = self.load_numpy_data(files_temp) 823 | lc_arrays = [np.squeeze(f) for f in lc_files] # make all labels 2D to start 824 | try: 825 | assert len(lc_arrays) == self.batch_size 826 | lc = np.stack(lc_arrays, axis = 0) #(B, H, W) 827 | int_labels = lc.astype(int) 828 | binary = np.where(int_labels > 1, 1, int_labels) 829 | # If necessary, trim data to (-1, dims[0], dims[1]) 830 | in_shape = binary.shape # -> (B, H, W) 831 | trim = ((in_shape[1] - self.unet_dim[0])//2, (in_shape[2] - self.unet_dim[1])//2) 832 | array = binary[:,trim[0]:self.unet_dim[0]+trim[0], trim[1]:self.unet_dim[1]+trim[1]] 833 | 834 | # add channel dimension (B, H, W) -> (B, H, W, C) expected by model 835 | reshaped = np.expand_dims(array, -1) 836 | return reshaped 837 | except AssertionError: 838 | return None 839 | 840 | def _get_before_data(self, indexes, rescale_val): 841 | files_temp = [self.beforefiles[k] for k in indexes] 842 | s2, bef_mask = self._get_unet_data(files_temp, add_nan_mask = self.mask, rescale_val=rescale_val) 843 | if type(s2) == np.ndarray: 844 | if self.to_fit: 845 | recolored = array_tools.aug_array_color(s2) 846 | return recolored, bef_mask 847 | else: 848 | return s2, bef_mask 849 | 850 | def _get_after_data(self, indexes, rescale_val): 851 | files_temp = [self.afterfiles[k] for k in indexes] 852 | s2, aft_mask = self._get_unet_data(files_temp, add_nan_mask = self.mask, rescale_val=rescale_val) 853 | if type(s2) == np.ndarray: 854 | if self.to_fit: 855 | recolored = array_tools.aug_array_color(s2) 856 | return recolored, aft_mask 857 | else: 858 | return s2, aft_mask 859 | 860 | def __getitem__(self, index): 861 | """Generate one batch of data 862 | 863 | :param index: index of the batch 864 | :return: X and y when fitting. X only when predicting 865 | """ 866 | # Generate indexes of the batch 867 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] 868 | 869 | befData, befMask = self._get_before_data(indexes, rescale_val = 10000.0) 870 | 871 | aftData, aftMask = self._get_after_data(indexes, rescale_val = 10000.0) 872 | 873 | labels = self._process_y(indexes) 874 | 875 | # perform morphological augmentation - expects a 3D (H, W, C) image array 876 | # if all([befData is not None, aftData is not None, labels is not None]): 877 | if self.mask: 878 | mask = np.concatenate([befMask, aftMask], axis = -1).min(axis = -1, keepdims= True) 879 | labels = labels * mask 880 | 881 | stacked = np.concatenate([befData, aftData, labels], axis = -1) 882 | 883 | # print('augmented max', np.nanmax(augmented, axis = (0,1,2))) 884 | 885 | if self.to_fit: 886 | morphed = array_tools.aug_array_morph(stacked) 887 | feats_b = morphed[:,:,:,0:self.n_channels] 888 | feats_a = morphed[:,:,:,self.n_channels:2*(self.n_channels)] 889 | labels = morphed[:,:,:,-1:] 890 | return [feats_b, feats_a], labels 891 | else: 892 | return [befData, aftData] 893 | 894 | 895 | class LSTMDataGenerator(tf.keras.utils.Sequence): 896 | """Generates data for Keras 897 | Sequence based data generator. Suitable for building data generator for training and prediction. 898 | """ 899 | def __init__(self, files = None, 900 | to_fit=True, batch_size=32, dim=(256, 256), 901 | n_channels=4, n_timesteps = 6, shuffle=True): 902 | """Initialization 903 | 904 | :param files: list of all files to use in the generator 905 | :param to_fit: True to return X and y, False to return X only 906 | :param batch_size: batch size at each iteration 907 | :param dim: tuple indicating image dimension 908 | :param n_channels: number of image channels 909 | :param n_classes: number of output masks 910 | :param n_timesteps: number of multi-channel images 911 | :param shuffle: True to shuffle label indexes after every epoch 912 | """ 913 | self.files = files 914 | self.to_fit = to_fit 915 | self.batch_size = batch_size 916 | self.dim = dim 917 | self.n_channels = n_channels 918 | self.n_timesteps = n_timesteps 919 | self.shuffle = shuffle 920 | self.on_epoch_end() 921 | 922 | def __len__(self): 923 | """Denotes the number of batches per epoch 924 | 925 | :return: number of batches per epoch 926 | """ 927 | return int(np.floor(len(self.files) / self.batch_size)) 928 | 929 | def on_epoch_end(self): 930 | """Updates indexes after each epoch 931 | 932 | """ 933 | self.indexes = np.arange(len(self.files)) 934 | if self.shuffle == True: 935 | np.random.shuffle(self.indexes) 936 | 937 | def _load_numpy_data(self, files_temp): 938 | arrays = [UNETDataGenerator.load_numpy_url(f) for f in files_temp] 939 | return(arrays) 940 | 941 | def __getitem__(self, index): 942 | """Generate one batch of data 943 | 944 | :param index: index of the batch 945 | :return: X and y when fitting. X only when predicting 946 | """ 947 | # Generate indexes of the batch 948 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] 949 | 950 | # Find list of IDs 951 | files_temp = [self.files[k] for k in indexes] 952 | # arrays come from PC in (T, C, H, W) format 953 | arrays = self._load_numpy_data(files_temp) 954 | 955 | trim = ((arrays[0].shape[2] - self.dim[0])//2, (arrays[0].shape[3] - self.dim[1])//2) 956 | # TEMPORARY FIX: drop the last image to give us a sereis of 5 957 | array = [arr[0:self.n_timesteps,:,trim[0]:-trim[0],trim[1]:-trim[1]] for arr in arrays] 958 | 959 | # creat a single (B, T, C, H, W) array 960 | batch = np.stack(array, axis = 0) 961 | # rearrange arrays from (B, T, C, H, W) -> (B, T, H, W, C) expected by model 962 | reshaped = np.moveaxis(batch, source = 2, destination = 4) 963 | normalized = normalize_timeseries(reshaped, axis = 1) 964 | # harmonized = add_harmonic(normalized) 965 | if self.to_fit: 966 | rearranged = rearrange_timeseries(normalized, self.n_channels) 967 | feats, labels = array_tools.split_timeseries(rearranged) 968 | # we can't have nans in label 969 | return feats, labels 970 | else: 971 | print('normalized dims', normalized.shape) 972 | return normalized 973 | 974 | class LSTMAutoencoderGenerator(LSTMDataGenerator): 975 | """Generates data for Keras 976 | Sequence based data generator. Suitable for building data generator for training and prediction. 977 | """ 978 | def __init__( 979 | self, harmonics = True, sample_weights = False, *args, **kwargs): 980 | """Initialization 981 | 982 | :param files: list of all files to use in the generator 983 | :param to_fit: True to return X and y, False to return X only 984 | :param batch_size: batch size at each iteration 985 | :param dim: tuple indicating image dimension 986 | :param n_channels: number of image channels 987 | :param n_classes: number of output masks 988 | :param n_timesteps: number of multi-channel images 989 | :param shuffle: True to shuffle label indexes after every epoch 990 | """ 991 | super().__init__(*args, **kwargs) 992 | self.add_harmonics = harmonics 993 | self.sample_weights = sample_weights 994 | self.on_epoch_end() 995 | 996 | def __len__(self): 997 | return LSTMDataGenerator.__len__(self) 998 | 999 | def on_epoch_end(self): 1000 | LSTMDataGenerator.on_epoch_end(self) 1001 | 1002 | def __getitem__(self, index): 1003 | """Generate one batch of data 1004 | 1005 | :param index: index of the batch 1006 | :return: X and y when fitting. X only when predicting 1007 | """ 1008 | # Generate indexes of the batch 1009 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] 1010 | 1011 | # Find list of IDs 1012 | files_temp = [self.files[k] for k in indexes] 1013 | 1014 | # arrays come from PC in (T, C, H, W) format 1015 | arrays = self._load_numpy_data(files_temp) 1016 | 1017 | # creat a single (B, T, C, H, W) array 1018 | batch = np.stack(arrays, axis = 0) 1019 | 1020 | # in case our incoming data is of different size than we want, define a trim amount 1021 | trim = ((batch.shape[3] - self.dim[0])//2, (batch.shape[4] - self.dim[1])//2) 1022 | 1023 | # n_timesteps + 1 to account for the fact that the sequence includes the next image as target 1024 | array = batch[:, 0:self.n_timesteps+1,:,trim[0]:self.dim[0]+trim[0],trim[1]:self.dim[1]+trim[1]] 1025 | 1026 | # rearrange arrays from (B, T, C, H, W) -> (B, T, H, W, C) expected by model 1027 | reshaped = np.moveaxis(array, source = 2, destination = 4) 1028 | 1029 | normalized = normalize_timeseries(reshaped, axis = 1) 1030 | 1031 | # harmonized = add_harmonic(normalized) 1032 | if self.add_harmonics: 1033 | # get start dates for each file 1034 | starts = [int(Path(f).stem.split('_')[2]) for f in files_temp] 1035 | else: 1036 | harmonics = None 1037 | 1038 | if self.to_fit: 1039 | feats, y, start = rearrange_timeseries(normalized, self.n_channels) 1040 | temporal_y = np.flip(feats, axis = 1) # reverse images along time dimension 1041 | weights = [None, abs(feats[:,-1,:,:,:] - y)/(feats[:,-1,:,:,:] + y)] if self.sample_weights else None 1042 | if self.add_harmonics: 1043 | starts = [x + start - self.n_timesteps for x in starts] 1044 | harmonics = array_tools.make_harmonics(starts, self.n_timesteps, self.dim) 1045 | return [feats, harmonics], [temporal_y, y], weights 1046 | else: 1047 | if self.add_harmonics: 1048 | harmonics = array_tools.make_harmonics(starts, self.n_timesteps, self.dim) 1049 | return [normalized, harmonics] 1050 | 1051 | class HybridDataGenerator(UNETDataGenerator): 1052 | """Generates data for Keras model with U-Net and LSTM branches 1053 | Sequence based data generator. Suitable for building data generator for training and prediction. 1054 | """ 1055 | 1056 | def __init__(self, s1files, 1057 | lstm_dim = (6, 32, 32, 6), 1058 | lc_transitions = [(12,3), (11,3), (10,3), (9,8), (255, 0)], 1059 | lu_transitions = [(82,9), (84,10)], 1060 | unet_dim = (600,600), 1061 | *args, **kwargs): 1062 | """Class Initialization 1063 | 1064 | Params 1065 | --- 1066 | unet_dim: tuple 1067 | desired unet image H, W dimensions 1068 | lstm_dim: tuple 1069 | desired lstm image T, H, W, C dimensions 1070 | lc_transitions: list 1071 | list of ('from', to') tuples defining optional categorical reclassifications for lc data 1072 | lu_transitions: list 1073 | list of ('from', 'to') tuples defining optional categorical reclassificaitons for lu data 1074 | 1075 | Return 1076 | --- 1077 | tuple: three arrays containing batch of corresponding sentinel-2, naip, and label data 1078 | """ 1079 | super().__init__(*args, **kwargs) 1080 | self.s1files = s1files 1081 | self.lc_trans = lc_transitions 1082 | self.lu_trans = lu_transitions 1083 | self.lstm_dim = lstm_dim 1084 | self.unet_dim = unet_dim 1085 | self.n_timesteps = lstm_dim[0] 1086 | self.on_epoch_end() 1087 | 1088 | def _get_lstm_data(self, files_temp, rescale_val = 1.0, mask = False): 1089 | arrays = self._load_numpy_data(files_temp) 1090 | try: 1091 | assert len(arrays) > 0, "No Array Found" 1092 | assert all([x.shape == (self.lstm_dim[0], self.lstm_dim[3], self.lstm_dim[1], self.lstm_dim[2]) for x in arrays]), [x.shape for x in arrays] 1093 | 1094 | # creat a single (B, T, C, H, W) array 1095 | batch = np.stack(arrays, axis = 0) 1096 | # in case our incoming data is of different size than we want, define a trim amount 1097 | trim = ((batch.shape[3] - self.lstm_dim[1])//2, (batch.shape[4] - self.lstm_dim[2])//2) 1098 | 1099 | array = batch[:, 0:self.n_timesteps,:,trim[0]:self.lstm_dim[1]+trim[0],trim[1]:self.lstm_dim[2]+trim[1]] 1100 | 1101 | # rearrange arrays from (B, T, C, H, W) -> (B, T, H, W, C) expected by model 1102 | reshaped = np.moveaxis(array, source = 2, destination = 4) 1103 | normalized = normalize_timeseries(reshaped, maxval = rescale_val, axis = 1) 1104 | return normalized 1105 | except AssertionError as msg: 1106 | print(msg) 1107 | sys.exit() 1108 | return None 1109 | 1110 | def _get_s2_data(self, indexes): 1111 | files_temp = [self.s2files[k] for k in indexes] 1112 | normalized = self._get_lstm_data(files_temp, rescale_val = 10000.0) 1113 | if type(normalized) == np.ndarray: 1114 | if self.to_fit: 1115 | recolored = array_tools.aug_array_color(normalized) 1116 | return recolored 1117 | else: 1118 | return normalized 1119 | 1120 | def _get_s1_data(self, indexes): 1121 | files_temp = [self.s1files[k] for k in indexes] 1122 | normalized = self._get_lstm_data(files_temp, rescale_val = -50.0) 1123 | if type(normalized) == np.ndarray: 1124 | return normalized 1125 | 1126 | def __getitem__(self, index): 1127 | """Generate one batch of data 1128 | 1129 | :param index: index of the batch 1130 | :return: X and y when fitting. X only when predicting 1131 | """ 1132 | # Generate indexes of the batch 1133 | 1134 | indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] 1135 | 1136 | unetDatasets = [] 1137 | lstmDatasets = [] 1138 | if self.s2files: 1139 | s2Data = self._get_s2_data(indexes) 1140 | lstmDatasets.append(s2Data) 1141 | if self.s1files: 1142 | s1Data = self._get_s1_data(indexes) 1143 | lstmDatasets.append(s1Data) 1144 | if self.naipfiles: 1145 | naipData = self._get_naip_data(indexes) 1146 | unetDatasets.append(naipData) 1147 | if self.demfiles: 1148 | demData = self._get_dem_data(indexes) 1149 | unetDatasets.append(demData) 1150 | if self.hagfiles: 1151 | hagData = self._get_hag_data(indexes) 1152 | unetDatasets.append(hagData) 1153 | if self.lidarfiles: 1154 | lidarData = self._get_lidar_data(indexes) 1155 | unetDatasets.append(lidarData) 1156 | if self.ssurgofiles: 1157 | ssurgoData = self._get_ssurgo_data(indexes) 1158 | unetDatasets.append(ssurgoData) 1159 | 1160 | if any([type(dat) != np.ndarray for dat in unetDatasets + lstmDatasets]): 1161 | pass 1162 | else: 1163 | unetData = np.concatenate(unetDatasets, axis = -1) 1164 | lstmData = np.concatenate(lstmDatasets, axis = -1) 1165 | feats = [unetData, lstmData] 1166 | # if type(lidarData) == np.ndarray: 1167 | # unetData = np.concatenate([naipData, lidarData], axis = -1) 1168 | # else: 1169 | # unetData = naipData 1170 | 1171 | # feats = [unetData, s2Data] 1172 | # if any([type(dat) == type(None) for dat in feats]): 1173 | # return self.__getitem__(randint(0, len(self.indexes) - self.batch_size)) 1174 | 1175 | if self.to_fit: 1176 | labels = self._process_y(indexes) 1177 | if type(labels) == type(None): 1178 | pass 1179 | # feats, labels = split_timeseries(rearranged) 1180 | # we can't have nans in label 1181 | else: 1182 | return feats, labels 1183 | else: 1184 | return feats 1185 | -------------------------------------------------------------------------------- /utils/raster_tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Created on Wed Jun 29 15:07:52 2022 3 | 4 | @author: mevans 5 | """ 6 | 7 | import os 8 | from os.path import join 9 | import rasterio as rio 10 | from rasterio.windows import Window 11 | from rasterio.transform import Affine 12 | from rasterio.merge import merge 13 | import shapely 14 | from shapely.geometry import box 15 | import geopandas as gpd 16 | import numpy as np 17 | from matplotlib.pyplot import imsave 18 | import warnings 19 | import random 20 | from osgeo import gdal 21 | rio.Env(CHECK_DISK_FREE_SPACE=False) 22 | 23 | def generate_chip_indices(H, W, buff = 128, kernel = 256): 24 | """ 25 | Parameters 26 | --- 27 | H: int 28 | height dimension in pixels over which indices should be generated 29 | W: int 30 | width dimension in pixels over which indices should be generated 31 | buff: int 32 | size of pixels to be trimmed from each side of chip 33 | kernel: int 34 | size of contiguous image chips 35 | Return 36 | --- 37 | list::np.ndarray: list containing (y,x) index of chips upper left corner 38 | """ 39 | side = (2*buff) + kernel 40 | x_buff = y_buff = buff 41 | 42 | y_indices = list(range(y_buff, H - (kernel+buff) +1, kernel)) 43 | x_indices = list(range(x_buff, W - (kernel+buff) +1, kernel)) 44 | 45 | indices = [(y_index, x_index) for y_index in y_indices for x_index in x_indices] 46 | return indices 47 | 48 | def extract_chips(arr, buff = 128, kernel = 256): 49 | """Break an array into (potentially) overlapping chips for analysis 50 | Arguments: 51 | arr (ndarray): 3D array to run predictions on 52 | buff (int): size of pixels to be trimmed from chips 53 | kernel (int): size of contiguous image chips 54 | Return: 55 | list::np.ndarray: list containing image chips of size (kernel+buff, kernel+buff) 56 | """ 57 | H, W, C = arr.shape 58 | side = buff + kernel 59 | x_buff = y_buff = buff//2 60 | chips = [] 61 | 62 | chip_indices = generate_chip_indices(arr, buff, kernel) 63 | 64 | for x, y in chip_indices: 65 | chip = arr[y-y_buff:y+kernel+y_buff, x-x_buff:x+kernel+x_buff, :] 66 | chips.append(chip) 67 | 68 | return chips 69 | 70 | def convert(size, box): 71 | """ 72 | Convert coordinates of a bounding box given in image pixels to 73 | normalized [0,1] yolo coordinates 74 | 75 | Parameters 76 | --- 77 | size: tpl 78 | height, width of image in pixels 79 | box: list[x0, y0, x1, y1] 80 | corners of box in pixels 81 | 82 | Return 83 | --- 84 | tpl(int, int, int, int): normalized x,y centroid and width, height of box 85 | """ 86 | dw = 1./size[1] 87 | dh = 1./size[0] 88 | xmid = (box[0] + box[2])/2.0 89 | ymid = (box[1] + box[3])/2.0 90 | w0 = box[2] - box[0] 91 | h0 = box[3] - box[1] 92 | x = xmid*dw 93 | y = ymid*dh 94 | w = w0*dw 95 | h = h0*dh 96 | return (x,y,w,h) 97 | 98 | def make_window(cx: int, cy:int, window_size: int) -> tuple: 99 | """Create an array window around a centroid 100 | 101 | Parameters 102 | --- 103 | cx: int 104 | centroid x-coord 105 | cy: int 106 | centroid y-coord 107 | window_size: int 108 | size of window in pixels 109 | 110 | Return 111 | --- 112 | tpl: coordinates of top left (x0, y0) and bottom right (x1, y1) window points 113 | """ 114 | x0 = round(cx - window_size//2) 115 | y0 = round(cy - window_size//2) 116 | x1 = round(cx + window_size//2) 117 | y1 = round(cy + window_size//2) 118 | return (x0, y0, x1, y1) 119 | 120 | def get_geo_transform(raster_src): 121 | """Get the geotransform for a raster image source. 122 | Arguments 123 | --------- 124 | raster_src : str, :class:`rasterio.DatasetReader`, or `osgeo.gdal.Dataset` 125 | Path to a raster image with georeferencing data to apply to `geom`. 126 | Alternatively, an opened :class:`rasterio.Band` object or 127 | :class:`osgeo.gdal.Dataset` object can be provided. Required if not 128 | using `affine_obj`. 129 | Returns 130 | ------- 131 | transform : :class:`affine.Affine` 132 | An affine transformation object to the image's location in its CRS. 133 | """ 134 | 135 | if isinstance(raster_src, str): 136 | with rio.Env(CHECK_DISK_FREE_SPACE=False): 137 | with rio.open(raster_src) as src: 138 | affine_obj = src.transform 139 | elif isinstance(raster_src, rio.DatasetReader): 140 | affine_obj = raster_src.transform 141 | 142 | return affine_obj 143 | 144 | def convert_poly_coords(geom, raster_src=None, affine_obj=None, inverse=False, 145 | precision=None): 146 | """Georegister geometry objects currently in pixel coords or vice versa. 147 | Params 148 | --------- 149 | geom : :class:`shapely.geometry.shape` or str 150 | A :class:`shapely.geometry.shape`, or WKT string-formatted geometry 151 | object currently in pixel coordinates. 152 | raster_src : str, optional 153 | Path to a raster image with georeferencing data to apply to `geom`. 154 | Alternatively, an opened :class:`rasterio.Band` object or 155 | :class:`osgeo.gdal.Dataset` object can be provided. Required if not 156 | using `affine_obj`. 157 | affine_obj: list or :class:`affine.Affine` 158 | An affine transformation to apply to `geom` in the form of an 159 | ``[a, b, d, e, xoff, yoff]`` list or an :class:`affine.Affine` object. 160 | Required if not using `raster_src`. 161 | inverse : bool, optional 162 | If true, will perform the inverse affine transformation, going from 163 | geospatial coordinates to pixel coordinates. 164 | precision : int, optional 165 | Decimal precision for the polygon output. If not provided, rounding 166 | is skipped. 167 | Returns 168 | ------- 169 | out_geom 170 | A geometry in the same format as the input with its coordinate system 171 | transformed to match the destination object. 172 | """ 173 | 174 | if not raster_src and not affine_obj: 175 | raise ValueError("Either raster_src or affine_obj must be provided.") 176 | 177 | if raster_src is not None: 178 | affine_xform = get_geo_transform(raster_src) 179 | else: 180 | if isinstance(affine_obj, Affine): 181 | affine_xform = affine_obj 182 | else: 183 | # assume it's a list in either gdal or "standard" order 184 | # (list_to_affine checks which it is) 185 | if len(affine_obj) == 9: # if it's straight from rasterio 186 | affine_obj = affine_obj[0:6] 187 | affine_xform = Affine(*affine_obj) 188 | 189 | if inverse: # geo->px transform 190 | affine_xform = ~affine_xform 191 | 192 | if isinstance(geom, str): 193 | # get the polygon out of the wkt string 194 | g = shapely.wkt.loads(geom) 195 | elif isinstance(geom, shapely.geometry.base.BaseGeometry): 196 | g = geom 197 | else: 198 | raise TypeError('The provided geometry is not an accepted format. ' 199 | 'This function can only accept WKT strings and ' 200 | 'shapely geometries.') 201 | 202 | xformed_g = shapely.affinity.affine_transform(g, [affine_xform.a, 203 | affine_xform.b, 204 | affine_xform.d, 205 | affine_xform.e, 206 | affine_xform.xoff, 207 | affine_xform.yoff]) 208 | if isinstance(geom, str): 209 | # restore to wkt string format 210 | xformed_g = shapely.wkt.dumps(xformed_g) 211 | if precision is not None: 212 | xformed_g = _reduce_geom_precision(xformed_g, precision=precision) 213 | 214 | return xformed_g 215 | 216 | def convert_pt(geometry: gpd.GeoSeries, out_crs: int, src_transform: list) -> tuple: 217 | """ Change a point to another crs 218 | 219 | Parameters 220 | --- 221 | geomegry: gpd.GeoSeries 222 | geoseries of points 223 | out_crs: int 224 | epsg for the desired crs 225 | 226 | Return 227 | --- 228 | tpl: (x,y) coordinates of point in new crs 229 | """ 230 | pt = geometry.to_crs(out_crs) 231 | coords = convert_poly_coords(pt.iloc[0], affine_obj = src_transform, inverse = True, precision = None) 232 | x, y = np.rint(coords.x), np.rint(coords.y) 233 | return (x,y) 234 | 235 | def win_jitter(window_size, jitter_frac=0.1): 236 | '''get x and y jitter 237 | Parameters 238 | --------- 239 | window_size (tpl: dx, dy in pixels 244 | ''' 245 | val = np.rint(jitter_frac * window_size) 246 | dx = np.random.randint(-val, val) 247 | dy = np.random.randint(-val, val) 248 | 249 | return dx, dy 250 | 251 | def get_centroid(geom_pix, verbose = True): 252 | """ 253 | Get the centroid of a polygon 254 | 255 | Parameters 256 | ---------- 257 | geom_pix : shapely POLYGON 258 | verbose : bool, optional 259 | Return print statements? The default is True. 260 | 261 | Returns 262 | ------- 263 | cx : float 264 | centroid x coordinate in input crs. 265 | cy : float 266 | centroid y coordinate in input crs. 267 | 268 | """ 269 | bounds = geom_pix.bounds 270 | area = geom_pix.area 271 | (minx, miny, maxx, maxy) = bounds 272 | dx, dy = maxx-minx, maxy-miny 273 | 274 | # get centroid 275 | centroid = geom_pix.centroid 276 | 277 | cx_tmp, cy_tmp = list(centroid.coords)[0] 278 | cx, cy = np.rint(cx_tmp), np.rint(cy_tmp) 279 | if verbose: 280 | print (" bounds:", bounds ) 281 | print (" dx, dy:", dx, dy ) 282 | print (" area:", area ) 283 | print("centroid:", centroid) 284 | 285 | return cx, cy 286 | 287 | def make_jittered_window(cx, cy, image_h, image_w, window_size = 1280, jitter_frac = 0.1): 288 | """ 289 | Create a jittered image window from and input image and geometry centroid 290 | 291 | Parameters 292 | ---------- 293 | cx : float 294 | x-coordinate of centroid around which to jitter window. 295 | cy : float 296 | y-coordinate of centroid around which to jitter window. 297 | image_h : int 298 | height in pixels of input image. 299 | image_w : int 300 | width in pixels of input image. 301 | window_size : int, optional 302 | desired dimension of output window. The default is 1280. 303 | jitter_frac : float, optional 304 | proportion of window size to move window. The default is 0.2. 305 | 306 | Returns 307 | ------- 308 | x0 : int 309 | minx coordinate of jittered window 310 | y0 : int 311 | miny coordinate of jittered window. 312 | x1 : int 313 | maxx coordinate of jittered window. 314 | y1 : int 315 | maxy coordinate of jittered window. 316 | 317 | """ 318 | # number of pixels in x and y directions to shift window 319 | jx, jy = win_jitter(window_size, jitter_frac=jitter_frac) 320 | x0 = cx - window_size/2 + jx 321 | y0 = cy - window_size/2 + jy 322 | # ensure window does not extend outside larger image 323 | x0 = max(x0, 0) 324 | x0 = int(min(x0, image_w - window_size)) 325 | y0 = max(y0, 0) 326 | y0 = int(min(y0, image_h - window_size)) 327 | # set other side of square 328 | x1 = x0 + window_size 329 | y1 = y0 + window_size 330 | print('x0', x0, 'y0', y0, 'x1', x1, 'y1', y1) 331 | return x0, y0, x1, y1 332 | 333 | def rasterio_to_img(array, out_path, nbands = 3, ext = None): 334 | """ 335 | Write an array read by rasterio to an 8-bit integer image file 336 | 337 | Parameters 338 | ---------- 339 | array : numpy.ndarray 340 | image array read by rasterio. 341 | out_path : str 342 | out image file path. 343 | nbands : int, optional 344 | number of image bands to write. The default is 3. 345 | ext : str, optional 346 | image file format extension. The default is 'png'. 347 | 348 | Returns 349 | ------- 350 | None. 351 | 352 | """ 353 | # convert from CHW to HWC and cast as unsigned 8-band int for saving 354 | t = array.transpose((1,2,0)).astype('uint8') 355 | print('array shape', t.shape) 356 | print('array min', t.min()) 357 | print('array max', t.max()) 358 | print('array type', t.dtype) 359 | # to use pre-trained YOLO weights, only grab RGB bands 360 | if ext: 361 | out_file = f"{out_path}.{ext}" 362 | else: 363 | out_file = out_path 364 | print('writing image to', out_file) 365 | imsave(out_file, t[:,:,:nbands], vmin = 0, vmax = 255) 366 | 367 | def numpy_to_raster(arr: np.ndarray, mixer: dict, out_file: str, dtype:str): 368 | """ 369 | Params 370 | --- 371 | arr: np.ndarray 372 | input (H,W,C) array to be converted to raster 373 | mixer_file: dict 374 | dictionary containing image dimension and spatial reference metadata required by rasterio.write 375 | out_file: str 376 | file path to destination raster file 377 | dtype: str 378 | output dtype accepted by rasterio.write (e.g., 'uint16', 'int32', 'float32', 'float64') 379 | 380 | Return 381 | --- 382 | None: writes raster data to destination file 383 | """ 384 | C = arr.shape[0] 385 | meta = { 386 | 'driver':'GTiff', 387 | 'width':mixer['cols'], 388 | 'height':mixer['rows'], 389 | 'count':1, 390 | 'dtype':dtype, 391 | 'transform':rio.Affine(*mixer['transform'][0:6]), 392 | 'crs':mixer['crs'], 393 | 'nodata':255 394 | } 395 | band_list = list(range(1,C+1)) 396 | temp_file = out_file.replace(".tif","_temp.tif") 397 | with rio.Env(CHECK_DISK_FREE_SPACE=False): 398 | with rio.open(temp_file, mode = 'w', **meta) as src: 399 | src.write(arr, band_list) 400 | # src.write(arr, 1) 401 | src.close() 402 | 403 | ds = gdal.Open(temp_file) 404 | 405 | options = gdal.TranslateOptions(format = 'COG',creationOptions = ["COMPRESS=LZW"]) 406 | ds = gdal.Translate(destName=out_file, srcDS=ds, options=options) 407 | ds = None 408 | 409 | os.remove(temp_file) 410 | 411 | def arrays_to_cog(arrs: list, coords: list, mixer: dict, out_file: str, dtype:str): 412 | """ 413 | Params 414 | --- 415 | arr: np.ndarray 416 | input (H,W,C) array to be converted to raster 417 | mixer_file: dict 418 | dictionary containing image dimension and spatial reference metadata required by rasterio.write 419 | out_file: str 420 | file path to destination raster file 421 | dtype: str 422 | output dtype accepted by rasterio.write (e.g., 'uint16', 'int32', 'float32', 'float64') 423 | 424 | Return 425 | --- 426 | None: writes raster data to destination file 427 | """ 428 | C = np.load(arrs[0]).shape[-1] 429 | meta = { 430 | 'driver':'GTiff', 431 | 'width':round(mixer['cols']), 432 | 'height':round(mixer['rows']), 433 | 'count':C, 434 | 'dtype':dtype, 435 | 'affine':rio.Affine(*mixer['transform'][0:6]), 436 | 'crs':mixer['crs'], 437 | 'nodata':255 438 | } 439 | band_list = list(range(1,C+1)) 440 | temp_file = out_file.replace(".tif","_temp.tif") 441 | with rio.Env(CHECK_DISK_FREE_SPACE=False): 442 | with rio.open(temp_file, mode = 'w', **meta) as dst: 443 | for f in arrs[0:4]: 444 | arr = np.moveaxis(np.load(f), -1, 0) 445 | indices = Path(f).stem.split('_') # X,Y tuple 446 | window = Window( 447 | row_off = int(indices[1]), #Y 448 | col_off = int(indices[0]), #X 449 | width = mixer['size'], 450 | height = mixer['size']) 451 | src.write(arr, window = window, indexes = band_list) 452 | 453 | ds = gdal.Open(temp_file) 454 | 455 | options = gdal.TranslateOptions(format = 'COG',creationOptions = ["COMPRESS=LZW"]) 456 | # if we want to write straight to blob, use /vsiaz/container/path 457 | # after setting environmental AZURE_STORAGE_CONNECTION_STRING variable 458 | ds = gdal.Translate(destName=out_file, srcDS=ds, options=options) 459 | ds = None 460 | 461 | os.remove(temp_file) 462 | 463 | -------------------------------------------------------------------------------- /utils/stats.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy.special import gamma 3 | 4 | def gamma_pdf(x, a, b): 5 | """calculate the pdf of a gamma distribution defined by shape a and scale b 6 | Params 7 | --- 8 | x: float or array 9 | values at which to evaluate the gamma pdf 10 | a: float or array 11 | shape parameter of the gamma distribution 12 | b: float or array 13 | scale parameter of the gamma distribution 14 | 15 | Return 16 | --- 17 | float or array: 18 | probability of x under the gamma distribution with shape a and scale b 19 | """ 20 | denom = gamma(a)*(b**a) 21 | num = (x**(a-1))*(np.exp(-1*x/b)) 22 | pd = num/denom 23 | return pd 24 | 25 | def lognormal_pdf(x, u, v): 26 | """calculate the pdf of a lognormal distribution defined by mean u and variance v 27 | Params 28 | --- 29 | x: float or array 30 | values at which to evaluate the lognormal pdf 31 | u: float or array 32 | mean of the lognormal distribution 33 | v: float or array 34 | variance of the lognormal distribution 35 | 36 | Return 37 | --- 38 | float or array: 39 | probability of x under the lognormal distribution with mean u and variance v 40 | """ 41 | sd = np.sqrt(v) 42 | const = (pi*2)**0.5 43 | first = 1/(sd*const) 44 | edenom = v*2 45 | enum = ((np.log(x) - u)**2)*-1 46 | second = np.exp(enum/edenom)/x 47 | pd = first*second 48 | return pd 49 | 50 | --------------------------------------------------------------------------------