├── .gitmodules ├── README.md ├── azure ├── NY_geotiff.json ├── VA_geotiff.json ├── DE_geotiff.json ├── MD_geotiff.json ├── PA_geotiff.json ├── DE_tfrecord.json ├── subset.py ├── predict_solar.py ├── predict_solar_terminal.py ├── train_solar.py └── train_autoencoder.py ├── envs ├── conda_env.yml ├── requirements.txt ├── solar_training.yml └── conda_env_jan29.yml ├── conda_env-copy.yml ├── LICENSE ├── .gitignore ├── Tensorboard.ipynb ├── predict_solar.py ├── demos ├── Training.ipynb ├── Prediction.ipynb ├── Extract_Data_GEE.ipynb └── SampleUNETdata_GEE.ipynb ├── re-train.ipynb └── Setup.ipynb /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "azure/scv"] 2 | path = azure/scv 3 | url = https://github.com/mjevans26/Satellite_ComputerVision.git 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Solar_UNet 2 | Repository contianing code to train U-Net models delineating solar arrays in Sentinel-2 imagery 3 | 4 | # Use 5 | The https://github.com/mjevans26/Satellite_ComputerVision.git repository contains utility tools and functions for training U-Net models, and is included as a git submodule in this directory. Demonstration notebooks for sampling training data and runing predictions are in the 'demos' directory. 6 | 7 | -------------------------------------------------------------------------------- /azure/NY_geotiff.json: -------------------------------------------------------------------------------- 1 | { 2 | "workspace": 3 | { 4 | "workspace_name" :"solar", 5 | "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 6 | "resource_group":"cic_ai" 7 | }, 8 | "blobContainer": 9 | { 10 | "container_name" : "solar", 11 | "account_name" : "aiprojects", 12 | "datastore_name": "solardatablob", 13 | "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/NewYork/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-10T17:36:46Z&se=2022-02-11T01:36:46Z&sv=2020-08-04&sr=c&sig=zL2WPTgN8ks729VcvTz84vjHKVQAhP3aa7AvtJIDhgY%3D" 14 | }, 15 | "data":"CPK_solar/data/predict/{}/{}", 16 | "model": "solar_Jun21" 17 | } -------------------------------------------------------------------------------- /azure/VA_geotiff.json: -------------------------------------------------------------------------------- 1 | { 2 | "workspace": 3 | { 4 | "workspace_name" :"solar", 5 | "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 6 | "resource_group":"cic_ai" 7 | }, 8 | "blobContainer": 9 | { 10 | "container_name" : "solar", 11 | "account_name" : "aiprojects", 12 | "datastore_name": "solardatablob", 13 | "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Virginia/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-04T17:43:12Z&se=2022-02-05T01:43:12Z&sv=2020-08-04&sr=c&sig=e1WIPuD63XQ6c0tBtgpgWiEjgf2FyPTalufcaIaryuw%3D" 14 | }, 15 | "data":"CPK_solar/data/predict/{}/{}", 16 | "model": "solar_Jun21" 17 | } -------------------------------------------------------------------------------- /azure/DE_geotiff.json: -------------------------------------------------------------------------------- 1 | { 2 | "workspace": 3 | { 4 | "workspace_name" :"solar", 5 | "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 6 | "resource_group":"cic_ai" 7 | }, 8 | "blobContainer": 9 | { 10 | "container_name" : "solar", 11 | "account_name" : "aiprojects", 12 | "datastore_name": "solardatablob", 13 | "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Delaware/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-01T15:56:08Z&se=2022-02-01T23:56:08Z&sv=2020-08-04&sr=c&sig=NKdC7QTH0x291Yn9cTnV5l0q%2BMMVrr%2F1EskLVNPwYI8%3D" 14 | }, 15 | "data":"CPK_solar/data/predict/{}/{}", 16 | "model": "solar_Jun21" 17 | } -------------------------------------------------------------------------------- /azure/MD_geotiff.json: -------------------------------------------------------------------------------- 1 | { 2 | "workspace": 3 | { 4 | "workspace_name" :"solar", 5 | "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 6 | "resource_group":"cic_ai" 7 | }, 8 | "blobContainer": 9 | { 10 | "container_name" : "solar", 11 | "account_name" : "aiprojects", 12 | "datastore_name": "solardatablob", 13 | "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Maryland/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-01T15:56:08Z&se=2022-02-01T23:56:08Z&sv=2020-08-04&sr=c&sig=NKdC7QTH0x291Yn9cTnV5l0q%2BMMVrr%2F1EskLVNPwYI8%3D" 14 | }, 15 | "data":"CPK_solar/data/predict/{}/{}", 16 | "model": "solar_Jun21" 17 | } -------------------------------------------------------------------------------- /azure/PA_geotiff.json: -------------------------------------------------------------------------------- 1 | { 2 | "workspace": 3 | { 4 | "workspace_name" :"solar", 5 | "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 6 | "resource_group":"cic_ai" 7 | }, 8 | "blobContainer": 9 | { 10 | "container_name" : "solar", 11 | "account_name" : "aiprojects", 12 | "datastore_name": "solardatablob", 13 | "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Pennsylvania/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-02T20:12:00Z&se=2022-02-03T04:12:00Z&sv=2020-08-04&sr=c&sig=A4H2NpKBa58Rc5vqGm%2F0l6sZpPxgQom7jf%2FDmBxVbEQ%3D" 14 | }, 15 | "data":"CPK_solar/data/predict/{}/{}", 16 | "model": "solar_Jun21" 17 | } -------------------------------------------------------------------------------- /azure/DE_tfrecord.json: -------------------------------------------------------------------------------- 1 | { 2 | "workspace": 3 | { 4 | "workspace_name" :"solar", 5 | "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 6 | "resource_group":"cic_ai" 7 | }, 8 | "blobContainer": 9 | { 10 | "container_name" : "solar", 11 | "account_name" : "aiprojects", 12 | "datastore_name": "solardatablob", 13 | "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Delaware/outputs/tfrecord/{}_{}_{}.TFRecord?sp=racw&st=2022-01-27T18:38:10Z&se=2022-01-29T02:38:10Z&sv=2020-08-04&sr=c&sig=vrHeB7LHAc2R2B6rhS%2BwRLqYM4xY5v1%2B9SlGyj8TTIY%3D" 14 | }, 15 | "data":"CPK_solar/data/predict/{}/{}", 16 | "model": "solar_Jun21" 17 | } -------------------------------------------------------------------------------- /envs/conda_env.yml: -------------------------------------------------------------------------------- 1 | name: solar_training 2 | channels: 3 | - anaconda 4 | - conda-forge 5 | dependencies: 6 | - python=3.7 7 | - pip=20.2.4 8 | - pip: 9 | - matplotlib>=3.3,<3.4 10 | - psutil>=5.8,<5.9 11 | - tqdm>=4.59,<4.60 12 | - pandas>=1.1,<1.2 13 | - scipy>=1.5,<1.6 14 | - numpy>=1.10,<1.20 15 | - rasterio==1.2.10 16 | - ipykernel~=6.0 17 | - azureml-core==1.37.0 18 | - azure-storage-blob==12.9.0 19 | - azureml-defaults==1.37.0 20 | - azureml-mlflow==1.37.0 21 | - azureml-telemetry==1.37.0 22 | - tensorboard==2.4.0 23 | - tensorflow-gpu==2.4.1 24 | - tensorflow-datasets==4.3.0 25 | - onnxruntime-gpu>=1.7,<1.8 26 | - horovod[tensorflow-gpu]==0.21.3 27 | 28 | -------------------------------------------------------------------------------- /conda_env-copy.yml: -------------------------------------------------------------------------------- 1 | name: tf_training 2 | channels: 3 | - anaconda 4 | - conda-forge 5 | dependencies: 6 | - python=3.8 7 | - pip=20.2.4 8 | - pip: 9 | - matplotlib~=3.5.0 10 | - psutil~=5.8.0 11 | - tqdm~=4.62.0 12 | - pandas>=1.1,<1.2 13 | - scipy~=1.7.0 14 | - numpy~=1.21.0 15 | - rasterio==1.2.10 16 | - ipykernel~=6.0 17 | - azureml-core==1.47.0 18 | - azure-storage-blob==12.9.0 19 | - azureml-defaults==1.47.0 20 | - azureml-mlflow==1.47.0 21 | - azureml-telemetry==1.47.0 22 | - tensorboard~=2.11.0 23 | - tensorflow-gpu~=2.11.0 24 | - tensorflow-datasets~=4.5.0 25 | - onnxruntime-gpu~=1.9.0 26 | - 'horovod[tensorflow-gpu]~=0.23.0' 27 | - debugpy~=1.6.3 28 | - protobuf~=3.20 -------------------------------------------------------------------------------- /azure/subset.py: -------------------------------------------------------------------------------- 1 | import rasterio as rio 2 | import json 3 | from rasterio.windows import Window 4 | import numpy as np 5 | 6 | with rio.open('./outputs/raw_unet256_Virginia_solar_Jun21.tif') as src: 7 | H,W = src.shape 8 | crs = src.crs 9 | windows = [Window(0,0, W//2, H//2), Window(0, H//2, W//2, H-(H//2)), Window(W//2, 0, W-(W//2), H//2), Window(W//2, H//2, W-(W//2), H-(H//2))] 10 | for i, window in enumerate(windows): 11 | subset = src.read(window = window) 12 | print(subset.shape) 13 | transform = src.window_transform(window) 14 | with rio.open( 15 | f'./outputs/VA2021_Jun21preds{i}.tif', 16 | 'w', 17 | driver = 'GTiff', 18 | width = W, 19 | height = H, 20 | count = 1, 21 | dtype = subset.dtype, 22 | crs = crs, 23 | transform = transform) as dst: 24 | dst.write(subset) 25 | 26 | -------------------------------------------------------------------------------- /envs/requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib>=3.3,<3.4 2 | psutil>=5.8,<5.9 3 | tqdm>=4.59,<4.60 4 | pandas>=1.1,<1.2 5 | scipy>=1.5,<1.6 6 | numpy>=1.10,<1.20 7 | ipykernel~=6.0 8 | azure-common==1.1.27 9 | azureml-core==1.37.0.post1 10 | azureml-defaults==1.37.0 11 | azure-graphrbac==0.61.1 12 | azure-identity==1.7.0 13 | azure-mgmt-authorization==0.61.0 14 | azure-mgmt-containerregistry==9.0.0 15 | azure-mgmt-core==1.3.0 16 | azure-mgmt-keyvault==9.3.0 17 | azure-mgmt-resource==19.0.0 18 | azure-mgmt-storage==19.0.0 19 | azure-storage-blob==12.9.0 20 | azureml-core==1.37.0.post1 21 | azureml-dataprep==2.25.2 22 | azureml-dataprep-native==38.0.0 23 | azureml-dataprep-rslex==2.1.1 24 | azureml-dataset-runtime==1.37.0 25 | azureml-defaults==1.37.0 26 | azureml-inference-server-http==0.4.2 27 | azureml-mlflow==1.37.0 28 | azureml-telemetry==1.37.0 29 | tensorboard==2.4.0 30 | tensorflow-gpu==2.4.1 31 | tensorflow-datasets==4.3.0 32 | onnxruntime-gpu>=1.7,<1.8 33 | horovod[tensorflow-gpu]==0.21.3 34 | azure-storage-blob==12.9.0 35 | rasterio==1.2.10 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 mjevans26 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .httr-oauth 2 | 3 | # Byte-compiled / optimized / DLL files 4 | *__pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | *.pyc 8 | 9 | # Data directories 10 | data/ 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Azure stuff 16 | *.amlignore 17 | *.amltmp 18 | *azureml-models/ 19 | azure/outputs/ 20 | .ipynb_aml_checkpoints/ 21 | 22 | # Distribution / packaging 23 | .Python 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | wheels/ 36 | pip-wheel-metadata/ 37 | share/python-wheels/ 38 | *.egg-info/ 39 | .installed.cfg 40 | *.egg 41 | MANIFEST 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .nox/ 57 | .coverage 58 | .coverage.* 59 | .cache 60 | nosetests.xml 61 | coverage.xml 62 | *.cover 63 | *.py,cover 64 | .hypothesis/ 65 | .pytest_cache/ 66 | 67 | # Translations 68 | *.mo 69 | *.pot 70 | 71 | # Django stuff: 72 | *.log 73 | local_settings.py 74 | db.sqlite3 75 | db.sqlite3-journal 76 | 77 | # Flask stuff: 78 | instance/ 79 | .webassets-cache 80 | 81 | # Scrapy stuff: 82 | .scrapy 83 | 84 | # Sphinx documentation 85 | docs/_build/ 86 | 87 | # PyBuilder 88 | target/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | .ipynb_aml_checkpoints/ 93 | 94 | 95 | # IPython 96 | profile_default/ 97 | ipython_config.py 98 | 99 | # pyenv 100 | .python-version 101 | 102 | # pipenv 103 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 104 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 105 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 106 | # install all needed dependencies. 107 | #Pipfile.lock 108 | 109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 110 | __pypackages__/ 111 | 112 | # Celery stuff 113 | celerybeat-schedule 114 | celerybeat.pid 115 | 116 | # SageMath parsed files 117 | *.sage.py 118 | 119 | # Environments 120 | .env 121 | .venv 122 | env/ 123 | venv/ 124 | ENV/ 125 | env.bak/ 126 | venv.bak/ 127 | 128 | # Spyder project settings 129 | .spyderproject 130 | .spyproject 131 | 132 | # Rope project settings 133 | .ropeproject 134 | 135 | # mkdocs documentation 136 | /site 137 | 138 | # mypy 139 | .mypy_cache/ 140 | .dmypy.json 141 | dmypy.json 142 | 143 | # Pyre type checker 144 | .pyre 145 | -------------------------------------------------------------------------------- /azure/predict_solar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 22 19:51:32 2021 4 | 5 | @author: MEvans 6 | """ 7 | 8 | from utils import model_tools, processing 9 | from utils.prediction_tools import makePredDataset, write_tfrecord_predictions 10 | from matplotlib import pyplot as plt 11 | import argparse 12 | import os 13 | import glob 14 | import json 15 | import math 16 | import tensorflow as tf 17 | from datetime import datetime 18 | from azureml.core import Run, Workspace, Model 19 | from azure.storage.blob import BlobClient 20 | 21 | 22 | # Set Global variables 23 | 24 | parser = argparse.ArgumentParser() 25 | 26 | parser.add_argument('--pred_data', type = str, default = True, help = 'directory containing test image(s) and mixer') 27 | parser.add_argument('--model_id', type = str, required = True, default = None, help = 'model id for continued training') 28 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches') 29 | parser.add_argument('--bands', type = str, nargs = '+', required = False, default = ["B2", "B3", "B4", "B8", "B11", "B12"]) 30 | parser.add_argument('--blob_url', type = str, required = True, help = 'blob url for upload to blob storage') 31 | 32 | args = parser.parse_args() 33 | 34 | # get the run context 35 | run = Run.get_context() 36 | exp = run.experiment 37 | ws = exp.workspace 38 | 39 | BANDS = args.bands 40 | # BANDS = json.loads(args.bands) 41 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999) 42 | 43 | METRICS = { 44 | 'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')], 45 | 'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')] 46 | } 47 | 48 | def get_weighted_bce(y_true, y_pred): 49 | return model_tools.weighted_bce(y_true, y_pred, 1) 50 | 51 | # if a model directory provided we will reload previously trained model and weights 52 | # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run 53 | model_dir = Model.get_model_path(args.model_id, _workspace = ws) 54 | # model_dir = os.path.join('./models', args.model_id, '1', 'outputs') 55 | 56 | # load our previously trained model and weights 57 | model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0] 58 | weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0] 59 | m = model_tools.get_binary_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = None) 60 | m.load_weights(weights_file) 61 | 62 | # Specify the size and shape of patches expected by the model. 63 | KERNEL_SIZE = args.kernel_size 64 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE] 65 | 66 | 67 | # create special folders './outputs' and './logs' which automatically get saved 68 | os.makedirs('outputs', exist_ok = True) 69 | os.makedirs('logs', exist_ok = True) 70 | out_dir = './outputs' 71 | log_dir = './logs' 72 | 73 | testFiles = [] 74 | 75 | for root, dirs, files in os.walk(args.pred_data): 76 | for f in files: 77 | testFiles.append(os.path.join(root, f)) 78 | 79 | 80 | predFiles = [x for x in testFiles if '.gz' in x] 81 | jsonFiles = [x for x in testFiles if '.json' in x] 82 | jsonFile = jsonFiles[0] 83 | predData = makePredDataset(predFiles, BANDS, one_hot = None) 84 | 85 | write_tfrecord_predictions( 86 | imageDataset = predData, 87 | model = m, 88 | pred_path = './', 89 | out_image_base = f'{jsonFile[:-10]}_{args.model_id}', 90 | kernel_shape = KERNEL_SHAPE, 91 | kernel_buffer = [128,128]) 92 | 93 | # get the current time 94 | now = datetime.now() 95 | date = now.strftime("%d%b%y") 96 | date 97 | 98 | blob_url = args.blob_url 99 | blob_client = BlobClient.from_blob_url(blob_url) 100 | with open(f'{jsonFile[:-10]}_{args.model_id}.TFRecord', 'rb') as f: 101 | blob_client.upload_blob(f) 102 | 103 | 104 | -------------------------------------------------------------------------------- /Tensorboard.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "source": [ 6 | "from azureml.tensorboard import Tensorboard\r\n", 7 | "from azureml.core import Experiment, Environment, Workspace, Datastore, Dataset, Model, ScriptRunConfig\r\n", 8 | "import os\r\n", 9 | "import glob\r\n", 10 | "# get the current workspace\r\n", 11 | "ws = Workspace.from_config()" 12 | ], 13 | "outputs": [], 14 | "execution_count": 1, 15 | "metadata": { 16 | "gather": { 17 | "logged": 1648739190271 18 | } 19 | } 20 | }, 21 | { 22 | "cell_type": "code", 23 | "source": [ 24 | "model = run.register_model(model_name='acd-unet-geeOnera',\r\n", 25 | " tags=run.tags,\r\n", 26 | " description = 'UNET model delineating anthropogenic land cover change in S2 imagery. Trained on GEE + Onera data.',\r\n", 27 | " model_path='outputs/',\r\n", 28 | " model_framework = 'Tensorflow',\r\n", 29 | " model_framework_version= '2.0',\r\n", 30 | " datasets = [('training', gee_train_dataset), ('evaluation', eval_dataset), ('testing', test_dataset)])\r\n", 31 | "print(model.name, model.id, model.version, sep='\\t')" 32 | ], 33 | "outputs": [], 34 | "execution_count": null, 35 | "metadata": { 36 | "collapsed": true, 37 | "jupyter": { 38 | "source_hidden": false, 39 | "outputs_hidden": false 40 | }, 41 | "nteract": { 42 | "transient": { 43 | "deleting": false 44 | } 45 | } 46 | } 47 | }, 48 | { 49 | "cell_type": "code", 50 | "source": [ 51 | "# run_id = 'solar-nc-cpk_1638381140_a602c63e'\r\n", 52 | "\r\n", 53 | "# run = ws.get_run(run_id)\r\n", 54 | "\r\n", 55 | "# run.get_file_names()\r\n", 56 | "# The TensorBoard constructor takes an array of runs, so be sure and pass it in as a single-element array here\r\n", 57 | "# tb = Tensorboard([run])\r\n", 58 | "tb = Tensorboard([], local_root = f'{os.getcwd()}/logs', port = 6006)\r\n", 59 | "\r\n", 60 | "# If successful, start() returns a string with the URI of the instance.\r\n", 61 | "tb.start()" 62 | ], 63 | "outputs": [ 64 | { 65 | "output_type": "stream", 66 | "name": "stdout", 67 | "text": "https://mevans1-6006.eastus.instances.azureml.ms\n" 68 | }, 69 | { 70 | "output_type": "execute_result", 71 | "execution_count": 8, 72 | "data": { 73 | "text/plain": "'https://mevans1-6006.eastus.instances.azureml.ms'" 74 | }, 75 | "metadata": {} 76 | } 77 | ], 78 | "execution_count": 8, 79 | "metadata": { 80 | "collapsed": true, 81 | "jupyter": { 82 | "source_hidden": false, 83 | "outputs_hidden": false 84 | }, 85 | "nteract": { 86 | "transient": { 87 | "deleting": false 88 | } 89 | }, 90 | "gather": { 91 | "logged": 1648739374862 92 | } 93 | } 94 | }, 95 | { 96 | "cell_type": "code", 97 | "source": [ 98 | "tb.stop()" 99 | ], 100 | "outputs": [], 101 | "execution_count": 7, 102 | "metadata": { 103 | "collapsed": true, 104 | "jupyter": { 105 | "source_hidden": false, 106 | "outputs_hidden": false 107 | }, 108 | "nteract": { 109 | "transient": { 110 | "deleting": false 111 | } 112 | }, 113 | "gather": { 114 | "logged": 1648739371516 115 | } 116 | } 117 | } 118 | ], 119 | "metadata": { 120 | "kernelspec": { 121 | "name": "python3-azureml", 122 | "language": "python", 123 | "display_name": "Python 3.6 - AzureML" 124 | }, 125 | "language_info": { 126 | "name": "python", 127 | "version": "3.6.9", 128 | "mimetype": "text/x-python", 129 | "codemirror_mode": { 130 | "name": "ipython", 131 | "version": 3 132 | }, 133 | "pygments_lexer": "ipython3", 134 | "nbconvert_exporter": "python", 135 | "file_extension": ".py" 136 | }, 137 | "kernel_info": { 138 | "name": "python3-azureml" 139 | }, 140 | "nteract": { 141 | "version": "nteract-front-end@1.0.0" 142 | }, 143 | "microsoft": { 144 | "host": { 145 | "AzureML": { 146 | "notebookHasBeenCompleted": true 147 | } 148 | } 149 | } 150 | }, 151 | "nbformat": 4, 152 | "nbformat_minor": 2 153 | } -------------------------------------------------------------------------------- /envs/solar_training.yml: -------------------------------------------------------------------------------- 1 | name: solar-training 2 | channels: 3 | - anaconda 4 | - defaults 5 | dependencies: 6 | - ca-certificates=2020.10.14=0 7 | - certifi=2020.6.20=py37_0 8 | - ld_impl_linux-64=2.33.1=h53a641e_7 9 | - libedit=3.1.20191231=h14c3975_1 10 | - libffi=3.3=he6710b0_2 11 | - libgcc-ng=9.1.0=hdf63c60_0 12 | - libstdcxx-ng=9.1.0=hdf63c60_0 13 | - ncurses=6.2=he6710b0_1 14 | - openssl=1.1.1h=h7b6447c_0 15 | - pip=20.2.4=py37_0 16 | - python=3.7.9=h7579374_0 17 | - readline=8.0=h7b6447c_0 18 | - setuptools=50.3.0=py37hb0f4dca_1 19 | - sqlite=3.33.0=h62c20be_0 20 | - tk=8.6.10=hbc83047_0 21 | - wheel=0.35.1=py_0 22 | - xz=5.2.5=h7b6447c_0 23 | - zlib=1.2.11=h7b6447c_3 24 | - pip: 25 | - absl-py==1.0.0 26 | - adal==1.2.7 27 | - affine==2.3.0 28 | - applicationinsights==0.11.10 29 | - argcomplete==1.12.3 30 | - astunparse==1.6.3 31 | - attrs==21.4.0 32 | - azure-common==1.1.27 33 | - azure-core==1.20.1 34 | - azure-graphrbac==0.61.1 35 | - azure-identity==1.7.0 36 | - azure-mgmt-authorization==0.61.0 37 | - azure-mgmt-containerregistry==9.0.0 38 | - azure-mgmt-core==1.3.0 39 | - azure-mgmt-keyvault==9.3.0 40 | - azure-mgmt-resource==19.0.0 41 | - azure-mgmt-storage==19.0.0 42 | - azure-storage-blob==12.9.0 43 | - azureml-core==1.37.0.post1 44 | - azureml-dataprep==2.25.2 45 | - azureml-dataprep-native==38.0.0 46 | - azureml-dataprep-rslex==2.1.1 47 | - azureml-dataset-runtime==1.37.0 48 | - azureml-defaults==1.37.0 49 | - azureml-inference-server-http==0.4.2 50 | - azureml-mlflow==1.37.0 51 | - azureml-telemetry==1.37.0 52 | - backcall==0.2.0 53 | - backports-tempfile==1.0 54 | - backports-weakref==1.0.post1 55 | - bcrypt==3.2.0 56 | - cachetools==4.2.4 57 | - cffi==1.15.0 58 | - charset-normalizer==2.0.10 59 | - click==8.0.3 60 | - click-plugins==1.1.1 61 | - cligj==0.7.2 62 | - cloudpickle==2.0.0 63 | - colorama==0.4.4 64 | - configparser==3.7.4 65 | - contextlib2==21.6.0 66 | - cryptography==3.4.8 67 | - cycler==0.11.0 68 | - databricks-cli==0.16.2 69 | - debugpy==1.5.1 70 | - decorator==5.1.1 71 | - dill==0.3.4 72 | - distro==1.6.0 73 | - docker==5.0.3 74 | - dotnetcore2==2.1.22 75 | - entrypoints==0.3 76 | - flask==1.0.3 77 | - flatbuffers==1.12 78 | - fusepy==3.0.1 79 | - future==0.18.2 80 | - gast==0.3.3 81 | - gitdb==4.0.9 82 | - gitpython==3.1.26 83 | - google-auth==1.35.0 84 | - google-auth-oauthlib==0.4.6 85 | - google-pasta==0.2.0 86 | - googleapis-common-protos==1.54.0 87 | - grpcio==1.43.0 88 | - gunicorn==20.1.0 89 | - h5py==2.10.0 90 | - horovod==0.21.3 91 | - humanfriendly==9.2 92 | - idna==3.3 93 | - importlib-metadata==4.10.1 94 | - importlib-resources==5.4.0 95 | - inference-schema==1.3.0 96 | - ipykernel==6.7.0 97 | - ipython==7.31.1 98 | - isodate==0.6.1 99 | - itsdangerous==2.0.1 100 | - jedi==0.18.1 101 | - jeepney==0.7.1 102 | - jinja2==3.0.3 103 | - jmespath==0.10.0 104 | - json-logging-py==0.2 105 | - jsonpickle==2.1.0 106 | - jupyter-client==7.1.2 107 | - jupyter-core==4.9.1 108 | - keras-preprocessing==1.1.2 109 | - kiwisolver==1.3.2 110 | - knack==0.8.2 111 | - markdown==3.3.6 112 | - markupsafe==2.0.1 113 | - matplotlib==3.3.4 114 | - matplotlib-inline==0.1.3 115 | - mlflow-skinny==1.23.0 116 | - msal==1.16.0 117 | - msal-extensions==0.3.1 118 | - msrest==0.6.21 119 | - msrestazure==0.6.4 120 | - ndg-httpsclient==0.5.1 121 | - nest-asyncio==1.5.4 122 | - numpy==1.19.5 123 | - oauthlib==3.1.1 124 | - onnxruntime-gpu==1.7.0 125 | - opt-einsum==3.3.0 126 | - packaging==21.3 127 | - pandas==1.1.5 128 | - paramiko==2.9.2 129 | - parso==0.8.3 130 | - pathspec==0.9.0 131 | - pexpect==4.8.0 132 | - pickleshare==0.7.5 133 | - pillow==9.0.0 134 | - pkginfo==1.8.2 135 | - portalocker==2.3.2 136 | - promise==2.3 137 | - prompt-toolkit==3.0.24 138 | - protobuf==3.19.3 139 | - psutil==5.8.0 140 | - ptyprocess==0.7.0 141 | - pyarrow==3.0.0 142 | - pyasn1==0.4.8 143 | - pyasn1-modules==0.2.8 144 | - pycparser==2.21 145 | - pygments==2.11.2 146 | - pyjwt==2.3.0 147 | - pynacl==1.5.0 148 | - pyopenssl==21.0.0 149 | - pyparsing==3.0.7 150 | - pysocks==1.7.1 151 | - python-dateutil==2.8.2 152 | - pytz==2021.3 153 | - pyyaml==6.0 154 | - pyzmq==22.3.0 155 | - rasterio==1.2.10 156 | - requests==2.27.1 157 | - requests-oauthlib==1.3.0 158 | - rsa==4.8 159 | - scipy==1.5.4 160 | - secretstorage==3.3.1 161 | - six==1.16.0 162 | - smmap==5.0.0 163 | - snuggs==1.4.7 164 | - tabulate==0.8.9 165 | - tensorboard==2.4.0 166 | - tensorboard-plugin-wit==1.8.1 167 | - tensorflow-datasets==4.3.0 168 | - tensorflow-estimator==2.4.0 169 | - tensorflow-gpu==2.4.1 170 | - tensorflow-metadata==1.6.0 171 | - termcolor==1.1.0 172 | - tornado==6.1 173 | - tqdm==4.59.0 174 | - traitlets==5.1.1 175 | - typing-extensions==3.7.4.3 176 | - urllib3==1.26.7 177 | - wcwidth==0.2.5 178 | - websocket-client==1.2.3 179 | - werkzeug==2.0.2 180 | - wrapt==1.12.1 181 | - zipp==3.7.0 182 | prefix: /anaconda/envs/solar-training 183 | -------------------------------------------------------------------------------- /predict_solar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 22 19:51:32 2021 4 | 5 | @author: MEvans 6 | """ 7 | 8 | from utils import model_tools, processing 9 | from utils.prediction_tools import makePredDataset, write_tfrecord_predictions 10 | from matplotlib import pyplot as plt 11 | import argparse 12 | import os 13 | import glob 14 | import json 15 | import math 16 | import tensorflow as tf 17 | from datetime import datetime 18 | from azureml.core import Run, Workspace, Model, Datastore, Dataset 19 | from azure.storage.blob import BlobClient 20 | 21 | 22 | # Set Global variables 23 | 24 | parser = argparse.ArgumentParser() 25 | 26 | # parser.add_argument('--pred_data', type = str, default = True, help = 'directory containing test image(s) and mixer') 27 | # parser.add_argument('--model_id', type = str, required = True, default = None, help = 'model id for continued training') 28 | 29 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches') 30 | parser.add_argument('--bands', type = str, nargs = '+', required = False, default = '["B2", "B3", "B4", "B8", "B11", "B12"]') 31 | parser.add_argument('-c', type=str, help='The path to the job config file') 32 | parser.add_argument('--aoi', type=str, required = True, default = 'Delaware', help = 'Name of the area to run predictions') 33 | parser.add_argument('--year', type=str, required = True, default = 'Summer2020', help = 'Season and year subdirectory') 34 | 35 | args = parser.parse_args() 36 | 37 | # # get the run context 38 | # run = Run.get_context() 39 | # exp = run.experiment 40 | # read annual config file 41 | with open(args.c, 'r') as f: 42 | config = json.load(f) 43 | 44 | # access relevant key values 45 | blob = config['blobContainer'] 46 | wksp = config['workspace'] 47 | model = config['model'] 48 | 49 | # load workspace configuration from the config.json file in the current folder. 50 | ws = Workspace(subscription_id = wksp["subscription_id"], workspace_name = wksp["workspace_name"], resource_group = wksp["resource_group"]) 51 | 52 | # access our registered data share containing image data in this workspace 53 | datastore = Datastore.get(workspace = ws, datastore_name = blob['datastore_name']) 54 | pred_path = (datastore, config['data'].format(args.aoi, args.year)) 55 | # pred_path = (datastore, 'CPK_solar/data/predict/testpred6') 56 | blob_files = Dataset.File.from_files(path = [pred_path]) 57 | 58 | # BANDS = args.bands 59 | BANDS = json.loads(args.bands) 60 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999) 61 | 62 | METRICS = { 63 | 'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')], 64 | 'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')] 65 | } 66 | 67 | def get_weighted_bce(y_true, y_pred): 68 | return model_tools.weighted_bce(y_true, y_pred, 1) 69 | 70 | print(f'Loading model {config["model"]}') 71 | # if a model directory provided we will reload previously trained model and weights 72 | # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run 73 | model_dir = Model.get_model_path(model, _workspace = ws) 74 | # model_dir = os.path.join('./models', args.model_id, '1', 'outputs') 75 | 76 | # load our previously trained model and weights 77 | model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0] 78 | weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0] 79 | m = model_tools.get_binary_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = None) 80 | m.load_weights(weights_file) 81 | 82 | print('found model file:', model_file, '/n weights file:', weights_file) 83 | 84 | # Specify the size and shape of patches expected by the model. 85 | KERNEL_SIZE = args.kernel_size 86 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE] 87 | 88 | 89 | # create special folders './outputs' and './logs' which automatically get saved 90 | os.makedirs('outputs', exist_ok = True) 91 | os.makedirs('logs', exist_ok = True) 92 | out_dir = './outputs' 93 | log_dir = './logs' 94 | 95 | testFiles = [] 96 | 97 | print('loading prediction data') 98 | with blob_files.mount() as mount: 99 | mount_point = mount.mount_point 100 | for root, dirs, files in os.walk(mount_point): 101 | for f in files: 102 | testFiles.append(os.path.join(root, f)) 103 | 104 | predFiles = [x for x in testFiles if '.gz' in x] 105 | jsonFiles = [x for x in testFiles if '.json' in x] 106 | jsonFile = jsonFiles[0] 107 | predData = makePredDataset(predFiles, BANDS, one_hot = None) 108 | 109 | predictions = m.predict(predData, steps=None, verbose=1) 110 | 111 | base = os.path.basename(jsonFile)[:-10] 112 | write_tfrecord_predictions( 113 | predictions = predictions, 114 | pred_path = out_dir, 115 | # pred_path = '.', 116 | # out_image_base = 'raw_unet256_testpred_solar_Jun21', 117 | out_image_base = f'{base}_{model}', 118 | kernel_shape = KERNEL_SHAPE, 119 | kernel_buffer = [128,128]) 120 | 121 | # get the current time 122 | now = datetime.now() 123 | date = now.strftime("%d%b%y") 124 | date 125 | 126 | print('moving predicitons to blob') 127 | # blob_url = "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Delaware/outputs/tfrecord/testpred.tfrecords?sp=racw&st=2022-01-27T18:38:10Z&se=2022-01-29T02:38:10Z&sv=2020-08-04&sr=c&sig=vrHeB7LHAc2R2B6rhS%2BwRLqYM4xY5v1%2B9SlGyj8TTIY%3D" 128 | blob_url = blob['blob_url'].format(args.aoi, args.year) 129 | blob_client = BlobClient.from_blob_url(blob_url) 130 | # with open(f'./raw_unet256_testpred_solar_Jun21.tfrecords', 'rb') as f: 131 | with open(f'{out_dir}/{base}_{model}.tfrecords', 'rb') as f: 132 | blob_client.upload_blob(f) 133 | 134 | 135 | -------------------------------------------------------------------------------- /azure/predict_solar_terminal.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 22 19:51:32 2021 4 | 5 | @author: MEvans 6 | """ 7 | import sys 8 | import argparse 9 | import os 10 | import glob 11 | import json 12 | import math 13 | 14 | sys.path.append(os.path.join(sys.path[0], 'scv')) 15 | 16 | from scv.utils import model_tools, processing 17 | from scv.utils.prediction_tools import makePredDataset, write_tfrecord_predictions, write_geotiff_predictions 18 | from matplotlib import pyplot as plt 19 | 20 | import tensorflow as tf 21 | from datetime import datetime 22 | from azureml.core import Run, Workspace, Model, Datastore, Dataset 23 | from azure.storage.blob import BlobClient 24 | 25 | 26 | # Set Global variables 27 | 28 | parser = argparse.ArgumentParser() 29 | 30 | # parser.add_argument('--pred_data', type = str, default = True, help = 'directory containing test image(s) and mixer') 31 | # parser.add_argument('--model_id', type = str, required = True, default = None, help = 'model id for continued training') 32 | 33 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches') 34 | parser.add_argument('--bands', type = str, nargs = '+', required = False, default = '["B2", "B3", "B4", "B8", "B11", "B12"]') 35 | parser.add_argument('-c', type=str, help='The path to the job config file') 36 | parser.add_argument('--aoi', type=str, required = True, default = 'Delaware', help = 'Name of the area to run predictions') 37 | parser.add_argument('--year', type=str, required = True, default = 'Summer2020', help = 'Season and year subdirectory') 38 | 39 | args = parser.parse_args() 40 | 41 | # # get the run context 42 | # run = Run.get_context() 43 | # exp = run.experiment 44 | # read annual config file 45 | with open(args.c, 'r') as f: 46 | config = json.load(f) 47 | 48 | # access relevant key values 49 | blob = config['blobContainer'] 50 | wksp = config['workspace'] 51 | model = config['model'] 52 | 53 | # load workspace configuration from the config.json file in the current folder. 54 | ws = Workspace(subscription_id = wksp["subscription_id"], workspace_name = wksp["workspace_name"], resource_group = wksp["resource_group"]) 55 | 56 | # access our registered data share containing image data in this workspace 57 | datastore = Datastore.get(workspace = ws, datastore_name = blob['datastore_name']) 58 | pred_path = (datastore, config['data'].format(args.aoi, args.year)) 59 | # pred_path = (datastore, 'CPK_solar/data/predict/testpred6') 60 | blob_files = Dataset.File.from_files(path = [pred_path]) 61 | 62 | # BANDS = args.bands 63 | BANDS = json.loads(args.bands) 64 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999) 65 | 66 | METRICS = { 67 | 'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')], 68 | 'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')] 69 | } 70 | 71 | def get_weighted_bce(y_true, y_pred): 72 | return model_tools.weighted_bce(y_true, y_pred, 1) 73 | 74 | print(f'Loading model {config["model"]}') 75 | # if a model directory provided we will reload previously trained model and weights 76 | # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run 77 | model_dir = Model.get_model_path(model, _workspace = ws) 78 | # model_dir = os.path.join('./models', args.model_id, '1', 'outputs') 79 | 80 | # load our previously trained model and weights 81 | model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0] 82 | weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0] 83 | m = model_tools.get_binary_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = None) 84 | m.load_weights(weights_file) 85 | 86 | print('found model file:', model_file, '/n weights file:', weights_file) 87 | 88 | # Specify the size and shape of patches expected by the model. 89 | KERNEL_SIZE = args.kernel_size 90 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE] 91 | 92 | 93 | # create special folders './outputs' and './logs' which automatically get saved 94 | os.makedirs('outputs', exist_ok = True) 95 | os.makedirs('logs', exist_ok = True) 96 | out_dir = './outputs' 97 | log_dir = './logs' 98 | 99 | testFiles = [] 100 | 101 | print('loading prediction data') 102 | with blob_files.mount() as mount: 103 | mount_point = mount.mount_point 104 | for root, dirs, files in os.walk(mount_point): 105 | for f in files: 106 | testFiles.append(os.path.join(root, f)) 107 | 108 | predFiles = [x for x in testFiles if '.gz' in x] 109 | jsonFiles = [x for x in testFiles if '.json' in x] 110 | jsonFile = jsonFiles[0] 111 | base = os.path.basename(jsonFile)[:-10] 112 | predData = makePredDataset(predFiles, BANDS, one_hot = None) 113 | print('writing predictons to geotiff') 114 | write_geotiff_predictions( 115 | imageDataset = predData, 116 | model = m, 117 | jsonFile = jsonFile, 118 | outImgBase = f'{base}_{model}', 119 | outImgPath = out_dir, 120 | kernel_buffer = [128, 128] 121 | ) 122 | 123 | # write_tfrecord_predictions( 124 | # predictions = predictions, 125 | # pred_path = out_dir, 126 | # # pred_path = '.', 127 | # # out_image_base = 'raw_unet256_testpred_solar_Jun21', 128 | # out_image_base = f'{base}_{model}', 129 | # kernel_shape = KERNEL_SHAPE, 130 | # kernel_buffer = [128,128]) 131 | 132 | # write_geotiff_predictions( 133 | # predictions = predictions, 134 | # mixer = mixer, 135 | # outImgBase = f'{base}_{model}', 136 | # outImgPath = out_dir, 137 | # kernel_buffer = [128,128] 138 | # ) 139 | 140 | # get the current time 141 | now = datetime.now() 142 | date = now.strftime("%d%b%y") 143 | date 144 | 145 | print('moving predicitons to blob') 146 | # blob_url = "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Delaware/outputs/tfrecord/testpred.tfrecords?sp=racwdl&st=2022-02-01T15:56:08Z&se=2022-02-01T23:56:08Z&sv=2020-08-04&sr=c&sig=NKdC7QTH0x291Yn9cTnV5l0q%2BMMVrr%2F1EskLVNPwYI8%3D" 147 | blob_url = blob['blob_url'].format(args.aoi, args.year, model) 148 | blob_client = BlobClient.from_blob_url(blob_url) 149 | # with open(f'./raw_unet256_testpred_solar_Jun21.tfrecords', 'rb') as f: 150 | with open(f'{out_dir}/{base}_{model}.tif', 'rb') as f: 151 | blob_client.upload_blob(f) 152 | 153 | 154 | -------------------------------------------------------------------------------- /envs/conda_env_jan29.yml: -------------------------------------------------------------------------------- 1 | name: solar-training 2 | channels: 3 | - anaconda 4 | - conda-forge 5 | - defaults 6 | dependencies: 7 | - _libgcc_mutex=0.1=conda_forge 8 | - _openmp_mutex=4.5=1_llvm 9 | - _tflow_select=2.3.0=mkl 10 | - abseil-cpp=20200923.3=h2531618_0 11 | - aiohttp=3.7.3=py37h4abf009_0 12 | - astor=0.8.1=pyh9f0ad1d_0 13 | - astunparse=1.6.3=pyhd8ed1ab_0 14 | - async-timeout=3.0.1=py_1000 15 | - attrs=21.4.0=pyhd8ed1ab_0 16 | - blas=2.17=openblas 17 | - blinker=1.4=py_1 18 | - brotlipy=0.7.0=py37hb5d75c8_1001 19 | - c-ares=1.17.1=h36c2ea0_0 20 | - ca-certificates=2021.10.8=ha878542_0 21 | - cached-property=1.5.2=hd8ed1ab_1 22 | - cached_property=1.5.2=pyha770c72_1 23 | - cachetools=4.2.4=pyhd8ed1ab_0 24 | - certifi=2021.10.8=py37h89c1867_1 25 | - cffi=1.15.0=py37hd667e15_1 26 | - chardet=3.0.4=py37he5f6b98_1008 27 | - charset-normalizer=2.0.10=pyhd8ed1ab_0 28 | - click=8.0.3=py37h89c1867_1 29 | - dataclasses=0.8=pyhc8e2a94_3 30 | - flatbuffers=1.12.0=h58526e2_0 31 | - giflib=5.2.1=h36c2ea0_2 32 | - google-pasta=0.2.0=pyh8c360ce_0 33 | - hdf5=1.10.6=nompi_h7c3c948_1111 34 | - icu=68.1=h58526e2_0 35 | - idna=3.3=pyhd8ed1ab_0 36 | - importlib-metadata=4.10.1=py37h89c1867_0 37 | - jpeg=9d=h36c2ea0_0 38 | - keras-preprocessing=1.1.2=pyhd8ed1ab_0 39 | - krb5=1.17.2=h926e7f8_0 40 | - ld_impl_linux-64=2.33.1=h53a641e_7 41 | - libblas=3.8.0=17_openblas 42 | - libcblas=3.8.0=17_openblas 43 | - libcurl=7.71.1=hcdd3856_3 44 | - libedit=3.1.20191231=h14c3975_1 45 | - libffi=3.3=he6710b0_2 46 | - libgcc-ng=9.1.0=hdf63c60_0 47 | - libgfortran-ng=7.5.0=h14aa051_19 48 | - libgfortran4=7.5.0=h14aa051_19 49 | - liblapack=3.8.0=17_openblas 50 | - liblapacke=3.8.0=17_openblas 51 | - libopenblas=0.3.10=pthreads_hb3c22a3_5 52 | - libpng=1.6.37=h21135ba_2 53 | - libprotobuf=3.14.0=h8c45485_0 54 | - libssh2=1.9.0=hab1572f_5 55 | - libstdcxx-ng=9.1.0=hdf63c60_0 56 | - llvm-openmp=12.0.1=h4bd325d_1 57 | - markdown=3.3.6=pyhd8ed1ab_0 58 | - multidict=4.7.5=py37h8f50634_2 59 | - ncurses=6.2=he6710b0_1 60 | - oauthlib=3.1.1=pyhd8ed1ab_0 61 | - openssl=1.1.1m=h7f8727e_0 62 | - opt_einsum=3.3.0=pyhd8ed1ab_1 63 | - pip=20.2.4=py37_0 64 | - pyasn1=0.4.8=py_0 65 | - pycparser=2.21=pyhd8ed1ab_0 66 | - pyjwt=2.3.0=pyhd8ed1ab_1 67 | - pyopenssl=21.0.0=pyhd8ed1ab_0 68 | - pysocks=1.7.1=py37h89c1867_4 69 | - python=3.7.9=h7579374_0 70 | - python-flatbuffers=1.12=pyhd8ed1ab_1 71 | - python_abi=3.7=2_cp37m 72 | - pyu2f=0.1.5=pyhd8ed1ab_0 73 | - readline=8.0=h7b6447c_0 74 | - requests=2.27.1=pyhd8ed1ab_0 75 | - requests-oauthlib=1.3.0=pyh9f0ad1d_0 76 | - rsa=4.8=pyhd8ed1ab_0 77 | - setuptools=50.3.0=py37hb0f4dca_1 78 | - snappy=1.1.8=he1b5a44_3 79 | - sqlite=3.37.0=hc218d9a_0 80 | - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0 81 | - tensorflow=2.5.0=mkl_py37h3e8b3f4_0 82 | - tensorflow-base=2.5.0=mkl_py37h35b2a3d_0 83 | - tk=8.6.10=hbc83047_0 84 | - werkzeug=2.0.2=pyhd8ed1ab_0 85 | - wheel=0.35.1=py_0 86 | - wrapt=1.12.1=py37h4abf009_2 87 | - xz=5.2.5=h7b6447c_0 88 | - yarl=1.6.3=py37h4abf009_0 89 | - zipp=3.7.0=pyhd8ed1ab_0 90 | - zlib=1.2.11=h7b6447c_3 91 | - pip: 92 | - absl-py==0.15.0 93 | - adal==1.2.7 94 | - affine==2.3.0 95 | - applicationinsights==0.11.10 96 | - argcomplete==1.12.3 97 | - azure-common==1.1.27 98 | - azure-core==1.20.1 99 | - azure-graphrbac==0.61.1 100 | - azure-identity==1.7.0 101 | - azure-mgmt-authorization==0.61.0 102 | - azure-mgmt-containerregistry==9.0.0 103 | - azure-mgmt-core==1.3.0 104 | - azure-mgmt-keyvault==9.3.0 105 | - azure-mgmt-resource==19.0.0 106 | - azure-mgmt-storage==19.0.0 107 | - azure-storage-blob==12.9.0 108 | - azureml-core==1.37.0.post1 109 | - azureml-dataprep==2.25.2 110 | - azureml-dataprep-native==38.0.0 111 | - azureml-dataprep-rslex==2.1.1 112 | - azureml-dataset-runtime==1.37.0 113 | - azureml-defaults==1.37.0 114 | - azureml-inference-server-http==0.4.2 115 | - azureml-mlflow==1.37.0 116 | - azureml-telemetry==1.37.0 117 | - backcall==0.2.0 118 | - backports-tempfile==1.0 119 | - backports-weakref==1.0.post1 120 | - bcrypt==3.2.0 121 | - click-plugins==1.1.1 122 | - cligj==0.7.2 123 | - cloudpickle==2.0.0 124 | - colorama==0.4.4 125 | - configparser==3.7.4 126 | - contextlib2==21.6.0 127 | - cryptography==3.4.8 128 | - cycler==0.11.0 129 | - databricks-cli==0.16.2 130 | - debugpy==1.5.1 131 | - decorator==5.1.1 132 | - dill==0.3.4 133 | - distro==1.6.0 134 | - docker==5.0.3 135 | - dotnetcore2==2.1.22 136 | - earthengine-api==0.1.296 137 | - entrypoints==0.3 138 | - flask==1.0.3 139 | - fusepy==3.0.1 140 | - future==0.18.2 141 | - gast==0.3.3 142 | - gitdb==4.0.9 143 | - gitpython==3.1.26 144 | - google-api-core==2.4.0 145 | - google-api-python-client==1.12.10 146 | - google-auth==1.35.0 147 | - google-auth-httplib2==0.1.0 148 | - google-auth-oauthlib==0.4.6 149 | - google-cloud-core==2.2.2 150 | - google-cloud-storage==2.1.0 151 | - google-crc32c==1.3.0 152 | - google-resumable-media==2.1.0 153 | - googleapis-common-protos==1.54.0 154 | - grpcio==1.34.1 155 | - gunicorn==20.1.0 156 | - h5py==2.10.0 157 | - horovod==0.21.3 158 | - httplib2==0.20.2 159 | - httplib2shim==0.0.3 160 | - humanfriendly==9.2 161 | - importlib-resources==5.4.0 162 | - inference-schema==1.3.0 163 | - ipykernel==6.7.0 164 | - ipython==7.31.1 165 | - isodate==0.6.1 166 | - itsdangerous==2.0.1 167 | - jedi==0.18.1 168 | - jeepney==0.7.1 169 | - jinja2==3.0.3 170 | - jmespath==0.10.0 171 | - json-logging-py==0.2 172 | - jsonpickle==2.1.0 173 | - jupyter-client==7.1.2 174 | - jupyter-core==4.9.1 175 | - keras-nightly==2.5.0.dev2021032900 176 | - kiwisolver==1.3.2 177 | - knack==0.8.2 178 | - markupsafe==2.0.1 179 | - matplotlib==3.3.4 180 | - matplotlib-inline==0.1.3 181 | - mlflow-skinny==1.23.0 182 | - msal==1.16.0 183 | - msal-extensions==0.3.1 184 | - msrest==0.6.21 185 | - msrestazure==0.6.4 186 | - ndg-httpsclient==0.5.1 187 | - nest-asyncio==1.5.4 188 | - numpy==1.19.5 189 | - onnxruntime-gpu==1.7.0 190 | - packaging==21.3 191 | - pandas==1.1.5 192 | - paramiko==2.9.2 193 | - parso==0.8.3 194 | - pathspec==0.9.0 195 | - pexpect==4.8.0 196 | - pickleshare==0.7.5 197 | - pillow==9.0.0 198 | - pkginfo==1.8.2 199 | - portalocker==2.3.2 200 | - promise==2.3 201 | - prompt-toolkit==3.0.24 202 | - protobuf==3.19.3 203 | - psutil==5.8.0 204 | - ptyprocess==0.7.0 205 | - pyarrow==3.0.0 206 | - pyasn1-modules==0.2.8 207 | - pygments==2.11.2 208 | - pynacl==1.5.0 209 | - pyparsing==3.0.7 210 | - python-dateutil==2.8.2 211 | - pytz==2021.3 212 | - pyyaml==6.0 213 | - pyzmq==22.3.0 214 | - rasterio==1.2.10 215 | - scipy==1.5.4 216 | - secretstorage==3.3.1 217 | - six==1.15.0 218 | - smmap==5.0.0 219 | - snuggs==1.4.7 220 | - tabulate==0.8.9 221 | - tensorboard==2.4.0 222 | - tensorboard-data-server==0.6.1 223 | - tensorflow-datasets==4.3.0 224 | - tensorflow-estimator==2.4.0 225 | - tensorflow-gpu==2.5.0 226 | - tensorflow-metadata==1.6.0 227 | - termcolor==1.1.0 228 | - tornado==6.1 229 | - tqdm==4.59.0 230 | - traitlets==5.1.1 231 | - typing-extensions==3.7.4.3 232 | - uritemplate==3.0.1 233 | - urllib3==1.26.7 234 | - wcwidth==0.2.5 235 | - websocket-client==1.2.3 236 | prefix: /anaconda/envs/solar-training 237 | -------------------------------------------------------------------------------- /azure/train_solar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Jan 2 12:41:40 2021 4 | 5 | @author: MEvans 6 | """ 7 | 8 | from scv.utils import model_tools, processing 9 | from scv.utils.prediction_tools import make_pred_dataset, callback_predictions, plot_to_image 10 | from matplotlib import pyplot as plt 11 | import argparse 12 | import os 13 | import glob 14 | import json 15 | import math 16 | import tensorflow as tf 17 | from datetime import datetime 18 | from azureml.core import Run, Workspace, Model 19 | 20 | print(tf.__version__) 21 | # Set Global variables 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--train_data', type = str, required = True, help = 'Training datasets') 25 | parser.add_argument('--eval_data', type = str, required = True, help = 'Evaluation datasets') 26 | parser.add_argument('--test_data', type = str, default = None, help = 'directory containing test image(s) and mixer') 27 | parser.add_argument('--model_id', type = str, required = False, default = None, help = 'model id for continued training') 28 | parser.add_argument('-lr', '--learning_rate', type = float, default = 0.001, help = 'Initial learning rate') 29 | parser.add_argument('-w', '--weights', type = str, default = None, help = 'sample weight for classes in iou, bce, etc.') 30 | parser.add_argument('--bias', type = float, default = None, help = 'bias value for keras output layer initializer') 31 | parser.add_argument('-e', '--epochs', type = int, default = 10, help = 'Number of epochs to train the model for') 32 | parser.add_argument('-b', '--batch', type = int, default = 16, help = 'Training batch size') 33 | parser.add_argument('--size', type = int, default = 3000, help = 'Size of training dataset') 34 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches') 35 | parser.add_argument('--response', type = str, required = True, help = 'Name of the response variable in tfrecords') 36 | parser.add_argument('--bands', type = str, required = False, default = '["B2", "B3", "B4", "B8", "B11", "B12"]') 37 | parser.add_argument('--splits', type = str, required = False, default = '[0]') 38 | parser.add_argument('--epoch_start', type = int, required = False, help = 'If re-training, the last epoch') 39 | 40 | args = parser.parse_args() 41 | print('bands', args.bands) 42 | TRAIN_SIZE = args.size 43 | BATCH = args.batch 44 | EPOCHS = args.epochs 45 | LAST = args.epoch_start 46 | BIAS = args.bias 47 | WEIGHTS = json.loads(args.weights) 48 | LR = args.learning_rate 49 | BANDS = json.loads(args.bands) 50 | DEPTH = len(BANDS) 51 | SPLITS = json.loads(args.splits) 52 | if sum(SPLITS) == 0: 53 | SPLITS = None 54 | RESPONSE = dict({args.response:2}) 55 | MOMENTS = [(0,10000),(0,10000),(0,10000),(0,10000),(0,10000),(0,10000) ] 56 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999) 57 | 58 | METRICS = { 59 | 'logits':[tf.keras.metrics.CategoricalAccuracy()], 60 | 'classes':[tf.keras.metrics.MeanIoU(num_classes = 2, sparse_y_pred = True, sparse_y_true = False)] 61 | } 62 | 63 | def weighted_crossentropy(y_true, y_pred): 64 | class_weights = tf.compat.v2.constant(WEIGHTS) 65 | weights = tf.reduce_sum(class_weights * y_true, axis = -1) 66 | print('weights shape', weights.shape) 67 | unweighted_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred) 68 | weighted_loss = weights * unweighted_loss 69 | loss = tf.reduce_mean(weighted_loss) 70 | return loss 71 | 72 | 73 | LOSSES = { 74 | 'logits':weighted_crossentropy 75 | } 76 | 77 | FEATURES = BANDS + [args.response] 78 | 79 | # round the training data size up to nearest 100 to define buffer 80 | BUFFER = math.ceil(args.size/100)*100 81 | 82 | # Specify the size and shape of patches expected by the model. 83 | KERNEL_SIZE = args.kernel_size 84 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE] 85 | COLUMNS = [ 86 | tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES 87 | ] 88 | FEATURES_DICT = dict(zip(FEATURES, COLUMNS)) 89 | 90 | # create special folders './outputs' and './logs' which automatically get saved 91 | os.makedirs('outputs', exist_ok = True) 92 | os.makedirs('logs', exist_ok = True) 93 | out_dir = './outputs' 94 | log_dir = './logs' 95 | 96 | # create training dataset 97 | 98 | # train_files = glob.glob(os.path.join(args.data_folder, 'training', 'UNET_256_[A-Z]*.gz')) 99 | # eval_files = glob.glob(os.path.join(args.data_folder, 'eval', 'UNET_256_[A-Z]*.gz')) 100 | 101 | train_files = [] 102 | for root, dirs, files in os.walk(args.train_data): 103 | for f in files: 104 | train_files.append(os.path.join(root, f)) 105 | 106 | eval_files = [] 107 | for root, dirs, files in os.walk(args.eval_data): 108 | for f in files: 109 | eval_files.append(os.path.join(root, f)) 110 | 111 | # train_files = glob.glob(os.path.join(args.train_data, 'UNET_256_[A-Z]*.gz')) 112 | # eval_files = glob.glob(os.path.join(args.eval_data, 'UNET_256_[A-Z]*.gz')) 113 | 114 | training = processing.get_training_dataset( 115 | files = train_files, 116 | ftDict = FEATURES_DICT, 117 | features = BANDS, 118 | response = RESPONSE, 119 | moments = MOMENTS, 120 | buff = BUFFER, 121 | batch = BATCH, 122 | axes = [2], 123 | repeat = True, 124 | splits = SPLITS) 125 | 126 | evaluation = processing.get_eval_dataset( 127 | files = eval_files, 128 | ftDict = FEATURES_DICT, 129 | features = BANDS, 130 | response = RESPONSE, 131 | moments = MOMENTS, 132 | splits = SPLITS) 133 | 134 | ## DEFINE CALLBACKS 135 | 136 | # get the current time 137 | now = datetime.now() 138 | date = now.strftime("%d%b%y") 139 | date 140 | 141 | # define a checkpoint callback to save best models during training 142 | checkpoint = tf.keras.callbacks.ModelCheckpoint( 143 | os.path.join(out_dir, 'best_weights_'+date+'_{epoch:02d}.hdf5'), 144 | monitor='val_classes_mean_io_u', 145 | verbose=1, 146 | save_best_only=True, 147 | mode='max' 148 | ) 149 | 150 | # define a tensorboard callback to write training logs 151 | tensorboard = tf.keras.callbacks.TensorBoard(log_dir = log_dir) 152 | 153 | # get the run context 154 | run = Run.get_context() 155 | exp = run.experiment 156 | ws = exp.workspace 157 | 158 | ## BUILD THE MODEL 159 | 160 | # if a model directory provided we will reload previously trained model and weights 161 | if args.model_id: 162 | # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run 163 | model_dir = Model.get_model_path(args.model_id, _workspace = ws) 164 | # model_dir = os.path.join('./models', args.model_id, '1', 'outputs') 165 | 166 | # load our previously trained model and weights 167 | model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0] 168 | weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0] 169 | m, checkpoint = model_tools.retrain_model( 170 | model_file = model_file, 171 | checkpoint = checkpoint, 172 | eval_data = evaluation, 173 | metric = 'classes_mean_io_u', 174 | weights_file = weights_file, 175 | custom_objects = {'weighted_crossentropy': weighted_crossentropy}, 176 | lr = LR) 177 | # TODO: make this dynamic 178 | initial_epoch = LAST 179 | # otherwise build a model from scratch with provided specs 180 | else: 181 | m = model_tools.get_unet_model(nclasses = 2, nchannels = DEPTH, optim = OPTIMIZER, loss = LOSSES, mets = METRICS, bias = BIAS) 182 | initial_epoch = 0 183 | 184 | # if test images provided, define an image saving callback 185 | if args.test_data: 186 | 187 | test_files = glob.glob(os.path.join(args.test_data, '*.gz')) 188 | mixer_file = glob.glob(os.path.join(args.test_data, '*.json')) 189 | 190 | # run predictions on a test image and log so we can see what the model is doing at each epoch 191 | jsonFile = mixer_file[0] 192 | with open(jsonFile,) as file: 193 | mixer = json.load(file) 194 | 195 | pred_data = make_pred_dataset(test_files, BANDS, moments = MOMENTS) 196 | file_writer = tf.summary.create_file_writer(log_dir + '/preds') 197 | 198 | def log_pred_image(epoch, logs): 199 | out_image = callback_predictions(pred_data, m, mixer) 200 | prob = out_image[:, :] 201 | figure = plt.figure(figsize=(10, 10)) 202 | plt.imshow(prob) 203 | image = plot_to_image(figure) 204 | 205 | with file_writer.as_default(): 206 | tf.summary.image("Predicted Image", image, step=epoch) 207 | 208 | pred_callback = tf.keras.callbacks.LambdaCallback(on_epoch_end = log_pred_image) 209 | 210 | callbacks = [checkpoint, tensorboard, pred_callback] 211 | else: 212 | callbacks = [checkpoint, tensorboard] 213 | 214 | # train the model 215 | steps_per_epoch = int(TRAIN_SIZE//BATCH) 216 | m.fit( 217 | x = training, 218 | epochs = EPOCHS, 219 | steps_per_epoch = steps_per_epoch, 220 | validation_data = evaluation, 221 | callbacks = callbacks, 222 | initial_epoch = initial_epoch 223 | ) 224 | 225 | m.save(os.path.join(out_dir, f'solar_unet256_{date}.h5')) 226 | -------------------------------------------------------------------------------- /azure/train_autoencoder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sat Jan 2 12:41:40 2021 4 | 5 | @author: MEvans 6 | """ 7 | 8 | from Satellite_ComputerVision import model_tools, processing 9 | from Satellite_ComputerVision.prediction_tools import makePredDataset, callback_predictions, plot_to_image 10 | from matplotlib import pyplot as plt 11 | import argparse 12 | import os 13 | import glob 14 | import json 15 | import math 16 | import tensorflow as tf 17 | from datetime import datetime 18 | from azureml.core import Run, Workspace, Model 19 | 20 | 21 | # Set Global variables 22 | 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument('--train_data', type = str, required = True, help = 'Training datasets') 25 | parser.add_argument('--eval_data', type = str, required = True, help = 'Evaluation datasets') 26 | parser.add_argument('--test_data', type = str, default = None, help = 'directory containing test image(s) and mixer') 27 | parser.add_argument('--model_id', type = str, required = False, default = None, help = 'model id for continued training') 28 | parser.add_argument('-lr', '--learning_rate', type = float, default = 0.001, help = 'Initial learning rate') 29 | parser.add_argument('-e', '--epochs', type = int, default = 10, help = 'Number of epochs to train the model for') 30 | parser.add_argument('-b', '--batch', type = int, default = 16, help = 'Training batch size') 31 | parser.add_argument('--size', type = int, default = 3000, help = 'Size of training dataset') 32 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches') 33 | parser.add_argument('--bands', type = str, nargs = '+', required = False, default = ['B2', 'B3', 'B4', 'B8']) 34 | args = parser.parse_args() 35 | 36 | TRAIN_SIZE = args.size 37 | BATCH = args.batch 38 | EPOCHS = args.epochs 39 | LR = args.learning_rate 40 | BANDS = args.bands 41 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999) 42 | LOSS = 'mean_squared_error' 43 | METRICS = [tf.keras.metrics.MeanSquaredError(name = 'mse')] 44 | 45 | # round the training data size up to nearest 100 to define buffer 46 | BUFFER = math.ceil(args.size//4/100)*100 47 | 48 | # Specify the size and shape of patches expected by the model. 49 | KERNEL_SIZE = args.kernel_size 50 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE] 51 | 52 | FEATURES = [tf.io.FixedLenFeature(shape = KERNEL_SHAPE, dtype = tf.float32) for band in BANDS] 53 | FEATURES_DICT = dict(zip(BANDS, FEATURES)) 54 | 55 | # create special folders './outputs' and './logs' which automatically get saved 56 | os.makedirs('outputs', exist_ok = True) 57 | os.makedirs('logs', exist_ok = True) 58 | out_dir = './outputs' 59 | log_dir = './logs' 60 | 61 | # create training dataset 62 | 63 | # train_files = glob.glob(os.path.join(args.data_folder, 'training', 'UNET_256_[A-Z]*.gz')) 64 | # eval_files = glob.glob(os.path.join(args.data_folder, 'eval', 'UNET_256_[A-Z]*.gz')) 65 | 66 | train_files = [] 67 | for root, dirs, files in os.walk(args.train_data): 68 | for f in files: 69 | train_files.append(os.path.join(root, f)) 70 | 71 | eval_files = [] 72 | for root, dirs, files in os.walk(args.eval_data): 73 | for f in files: 74 | eval_files.append(os.path.join(root, f)) 75 | print(f'number of train files = {len(train_files)}') 76 | print(f'first train file is {train_files[0]}') 77 | def to_tuple(inputs): 78 | """Function to convert a dictionary of tensors to a tuple of (inputs, outputs). 79 | Turn the tensors returned by parse_tfrecord into a stack in HWC shape. 80 | Args: 81 | inputs: A dictionary of tensors, keyed by feature name. 82 | Returns: 83 | A dtuple of (inputs, outputs). 84 | """ 85 | # double up our bands to match the structure of before/after data 86 | inputsList = [inputs.get(key) for key in BANDS] 87 | stacked = tf.stack(inputsList, axis=0) 88 | # Convert from CHW to HWC 89 | stacked = tf.transpose(stacked, [1, 2, 0]) 90 | # Perform image augmentation 91 | stacked = processing.aug_img(stacked) 92 | normalized = processing.normalize(stacked, [2]) 93 | # do color augmentation on input features 94 | before = processing.aug_color(normalized) 95 | after = processing.aug_color(normalized) 96 | # standardize each patch of bands 97 | bands = tf.concat([before, after], axis = -1) 98 | response = bands 99 | return bands, response 100 | 101 | def get_dataset(files, ftDict, axes = [2], splits = None, one_hot = None, moments = None, **kwargs): 102 | """Function to read, parse and format to tuple a set of input tfrecord files. 103 | Get all the files matching the pattern, parse and convert to tuple. 104 | Args: 105 | files (list): A list of filenames storing tfrecords 106 | FtDict (dic): Dictionary of input features in tfrecords 107 | features (list): List of input feature names 108 | respones (str): response name(s) 109 | axes (list): axes along which to calculate moments for rescaling 110 | one_hot (dict): key:value pairs for name of one-hot variable and desired one-hot depth 111 | splits (list): size(s) of groups of features to be kept together 112 | moments (list): list of [mean, var] tuples for standardization 113 | Returns: 114 | A tf.data.Dataset 115 | """ 116 | 117 | def parse_tfrecord(example_proto): 118 | return tf.io.parse_single_example(example_proto, ftDict) 119 | 120 | def tupelize(ftDict): 121 | return to_tuple(ftDict) 122 | 123 | dataset = tf.data.TFRecordDataset(files, compression_type='GZIP') 124 | dataset = dataset.map(parse_tfrecord, num_parallel_calls=5) 125 | dataset = dataset.map(tupelize, num_parallel_calls=5) 126 | return dataset 127 | 128 | def get_training_dataset(files, ftDict, buff, batch = 16, repeat = True, axes = [2], splits = None, one_hot = None, moments = None, **kwargs): 129 | """ 130 | Get the preprocessed training dataset 131 | Args: 132 | files (list): list of tfrecord files to be used for training 133 | FtDict (dic): Dictionary of input features in tfrecords 134 | features (list): List of input feature names 135 | respones (str): response name(s) 136 | axes (list): axes along which to calculate moments for rescaling 137 | buffer (int): buffer size for shuffle 138 | batch (int): batch size for training 139 | repeat (bool): should the dataset be repeated 140 | Returns: 141 | A tf.data.Dataset of training data. 142 | """ 143 | dataset = get_dataset(files, ftDict, axes, splits, one_hot, moments, **kwargs) 144 | if repeat: 145 | dataset = dataset.shuffle(buff).batch(batch).repeat() 146 | else: 147 | dataset = dataset.shuffle(buff).batch(batch) 148 | return dataset 149 | 150 | def get_eval_dataset(files, ftDict, axes = [2], splits = None, one_hot = None, moments = None, **kwargs): 151 | """ 152 | Get the preprocessed evaluation dataset 153 | Args: 154 | files (list): list of tfrecords to be used for evaluation 155 | Returns: 156 | A tf.data.Dataset of evaluation data. 157 | """ 158 | 159 | dataset = get_dataset(files, ftDict, axes, splits, one_hot, moments, **kwargs) 160 | dataset = dataset.batch(1) 161 | return dataset 162 | 163 | training = get_training_dataset( 164 | files = train_files[:len(train_files)//2], 165 | ftDict = FEATURES_DICT, 166 | buff = BUFFER, 167 | batch = BATCH, 168 | repeat = True) 169 | 170 | evaluation = get_eval_dataset( 171 | files = eval_files[:len(eval_files)//2], 172 | ftDict = FEATURES_DICT, 173 | features = BANDS) 174 | 175 | ## DEFINE CALLBACKS 176 | 177 | # get the current time 178 | now = datetime.now() 179 | date = now.strftime("%d%b%y") 180 | date 181 | 182 | # define a checkpoint callback to save best models during training 183 | checkpoint = tf.keras.callbacks.ModelCheckpoint( 184 | os.path.join(out_dir, 'best_weights'+date+'.hdf5'), 185 | monitor='val_mse', 186 | verbose=1, 187 | save_best_only=True, 188 | mode='min' 189 | ) 190 | 191 | # define a tensorboard callback to write training logs 192 | tensorboard = tf.keras.callbacks.TensorBoard(log_dir = log_dir) 193 | 194 | callbacks = [checkpoint, tensorboard] 195 | 196 | # get the run context 197 | run = Run.get_context() 198 | exp = run.experiment 199 | ws = exp.workspace 200 | 201 | # # Create a MirroredStrategy. 202 | # strategy = tf.distribute.MirroredStrategy() 203 | # print('Number of devices: {}'.format(strategy.num_replicas_in_sync)) 204 | 205 | # ## BUILD THE MODEL 206 | # with strategy.scope(): 207 | # METRICS = [tf.keras.metrics.MeanSquaredError(name = 'mse')] 208 | 209 | # OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999) 210 | m = model_tools.get_autoencoder(depth = len(BANDS)*2, optim = OPTIMIZER, loss = LOSS, mets = METRICS) 211 | # # if a model directory provided we will reload previously trained model and weights 212 | # if args.model_id: 213 | # # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run 214 | # model_dir = Model.get_model_path(args.model_id, _workspace = ws) 215 | # # model_dir = os.path.join('./models', args.model_id, '1', 'outputs') 216 | 217 | # # load our previously trained model and weights 218 | # model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0] 219 | # weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0] 220 | # m, checkpoint = model_tools.retrain_model(model_file, checkpoint, evaluation, 'classes_mean_iou', weights_file, weight = WEIGHT, lr = LR) 221 | # # TODO: make this dynamic 222 | # initial_epoch = 100 223 | # # otherwise build a model from scratch with provided specs 224 | # else: 225 | # m = model_tools.get_autoencoder(depth = len(BANDS)*2, optim = OPTIMIZER, loss = LOSS, mets = METRICS) 226 | # initial_epoch = 0 227 | 228 | # train the model 229 | steps_per_epoch = int(TRAIN_SIZE//BATCH//4) 230 | print('steps per epoch', steps_per_epoch) 231 | m.fit( 232 | x = training, 233 | epochs = EPOCHS, 234 | steps_per_epoch = steps_per_epoch, 235 | validation_data = evaluation, 236 | callbacks = callbacks 237 | ) 238 | 239 | m.save(os.path.join(out_dir, 'unet256_autoencoder_8band.h5')) 240 | -------------------------------------------------------------------------------- /demos/Training.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Training.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "code", 21 | "metadata": { 22 | "id": "xnHMtxTUavbx" 23 | }, 24 | "source": [ 25 | "#@title Author: Michael Evans { display-mode: \"form\" }\n", 26 | "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", 27 | "# you may not use this file except in compliance with the License.\n", 28 | "# You may obtain a copy of the License at\n", 29 | "#\n", 30 | "# https://www.apache.org/licenses/LICENSE-2.0\n", 31 | "#\n", 32 | "# Unless required by applicable law or agreed to in writing, software\n", 33 | "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", 34 | "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", 35 | "# See the License for the specific language governing permissions and\n", 36 | "# limitations under the License." 37 | ], 38 | "execution_count": null, 39 | "outputs": [] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "id": "8Ciecm6Ia2Xa" 45 | }, 46 | "source": [ 47 | "# Introduction\n", 48 | "\n", 49 | "This notebook demonstrates a workflow for training a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597) on previously exctracted remote sensing data using Tensorflow. In this example, we read 256x256 pixel image chips saved as zipped tfrecords in Google Cloud Storage (Note: the data can be read in from anywhere) containing the visible, infrared, and near infrared bands of Sentinel-2 imagery and a binary label band. This relatively simple model is a mostly unmodified version of [this example](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb) from the TensorFlow docs." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "metadata": { 55 | "id": "Yla55CsQa2yw" 56 | }, 57 | "source": [ 58 | "from os.path import join\n", 59 | "from sys import path\n", 60 | "import json\n", 61 | "import numpy as np\n", 62 | "import tensorflow as tf" 63 | ], 64 | "execution_count": 1, 65 | "outputs": [] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "metadata": { 70 | "id": "jDBvGhTXa5II" 71 | }, 72 | "source": [ 73 | "## Clone repo containing preprocessing and prediction functions\n", 74 | "!git clone https://github.com/mjevans26/Satellite_ComputerVision.git" 75 | ], 76 | "execution_count": null, 77 | "outputs": [] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "metadata": { 82 | "id": "Dl2DPfr9a8eW" 83 | }, 84 | "source": [ 85 | "# Load the necessary modules from repo\n", 86 | "path.append('/content/Satellite_ComputerVision')\n", 87 | "\n", 88 | "from utils.processing import get_training_dataset, get_eval_dataset\n", 89 | "from utils.model_tools import get_model, weighted_bce, make_confusion_matrix" 90 | ], 91 | "execution_count": null, 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "metadata": { 97 | "id": "qoYO47K1gllv" 98 | }, 99 | "source": [ 100 | "# Specify names locations for outputs in Cloud Storage. \n", 101 | "BUCKET = '{YOUR_GCS BUCKET HERE}'\n", 102 | "BUCKET_PATH = join('gs://', BUCKET)\n", 103 | "\n", 104 | "FOLDER = 'NC_solar'\n", 105 | "PRED_BASE = 'data/predict'\n", 106 | "TRAIN_BASE = 'data/training'\n", 107 | "EVAL_BASE = 'data/eval'\n", 108 | "\n", 109 | "# Specify inputs (Sentinel bands) to the model and the response variable.\n", 110 | "opticalBands = ['B2', 'B3', 'B4']\n", 111 | "thermalBands = ['B8', 'B11', 'B12']\n", 112 | "\n", 113 | "BANDS = opticalBands + thermalBands# + pcaBands\n", 114 | "RESPONSE = 'landcover'\n", 115 | "FEATURES = BANDS + [RESPONSE]\n", 116 | "\n", 117 | "# Specify the size and shape of patches expected by the model.\n", 118 | "KERNEL_SIZE = 256\n", 119 | "KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n", 120 | "COLUMNS = [\n", 121 | " tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n", 122 | "]\n", 123 | "FEATURES_DICT = dict(zip(FEATURES, COLUMNS))" 124 | ], 125 | "execution_count": 2, 126 | "outputs": [] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": { 131 | "id": "WBQN2UdagYj6" 132 | }, 133 | "source": [ 134 | "## Training Data\n", 135 | "First, we will read previously exported training data fro GCS into TFRecordDatasets" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "metadata": { 141 | "id": "f7biGY7cbBSU" 142 | }, 143 | "source": [ 144 | "# make sure we have training records\n", 145 | "trainPattern = join(BUCKET_PATH, FOLDER, TRAIN_BASE, '*.tfrecord.gz')\n", 146 | "print(trainPattern)\n", 147 | "trainFiles = !gsutil ls {trainPattern}" 148 | ], 149 | "execution_count": null, 150 | "outputs": [] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "metadata": { 155 | "id": "6e3C9k5Ugm0R" 156 | }, 157 | "source": [ 158 | "# create training dataset with default arguments for batch (16), repeat (True), and normalization axis (0)\n", 159 | "training = get_training_dataset(trainFiles, FEATURES_DICT, BANDS, RESPONSE, 2000)" 160 | ], 161 | "execution_count": null, 162 | "outputs": [] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "metadata": { 167 | "id": "zhylXyE4g2U2" 168 | }, 169 | "source": [ 170 | "# confirm the training dataset produces expected results\n", 171 | "iterator = iter(training)\n", 172 | "print(iterator.next())" 173 | ], 174 | "execution_count": null, 175 | "outputs": [] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "metadata": { 180 | "id": "OnpFtwj_g_lA" 181 | }, 182 | "source": [ 183 | "evalPattern = join(BUCKET_PATH, FOLDER, EVAL_BASE, '*.tfrecord.gz')\n", 184 | "print(evalPattern)\n", 185 | "evalFiles = !gsutil ls {evalPattern}" 186 | ], 187 | "execution_count": null, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "metadata": { 193 | "id": "Js6Dn2dshHYL" 194 | }, 195 | "source": [ 196 | "# create evaluation dataset\n", 197 | "evaluation = get_eval_dataset(evalFiles, FEATURES_DICT, BANDS, RESPONSE)" 198 | ], 199 | "execution_count": null, 200 | "outputs": [] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": { 205 | "id": "RR06Y089jeSk" 206 | }, 207 | "source": [ 208 | "## Model" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "metadata": { 214 | "id": "8Ww2Yq36kbJm" 215 | }, 216 | "source": [ 217 | "# Define Global variables for Model Training\n", 218 | "EPOCHS = 100\n", 219 | "LR = 0.0001\n", 220 | "BATCH = 16\n", 221 | "\n", 222 | "OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999)\n", 223 | "\n", 224 | "METRICS = {\n", 225 | " 'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')],\n", 226 | " 'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')]\n", 227 | " }\n", 228 | "\n", 229 | "OUT_DIR = '{YOUR DIRECTORY FOR SAVING MODEL FILES HERE}'" 230 | ], 231 | "execution_count": 5, 232 | "outputs": [] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": { 237 | "id": "Vf1VcE0h9ZBj" 238 | }, 239 | "source": [ 240 | "When our training data is unbalanced it can be helpful to provide weights for the positive examples so that the model doesn't 'learn' to just predict zeros everywhere. To calculate the weight we read through the dataset and count up the number of 1s and 0s in our labels." 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "metadata": { 246 | "id": "xk2tu-Q6l613" 247 | }, 248 | "source": [ 249 | "# Instantiate a nonsense model\n", 250 | "m = get_model(depth = len(BANDS), optim = OPTIMIZER, loss = 'mse', mets = [tf.keras.metrics.categorical_accuracy], bias = None)\n", 251 | "train_con_mat = make_confusion_matrix(training, m)\n", 252 | "classums = train_con_mat.sum(axis = 1)\n", 253 | "\n", 254 | "# Calculate and save Bias, Weight, and Train size based on data\n", 255 | "BIAS = np.log(classums[1]/classums[0])\n", 256 | "WEIGHT = classums[0]/classums[1]\n", 257 | "TRAIN_SIZE = train_con_mat.sum()//(256*256)" 258 | ], 259 | "execution_count": null, 260 | "outputs": [] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": { 265 | "id": "w7ouh97-9qP7" 266 | }, 267 | "source": [ 268 | "During model training we will save the best performing set of weights as calculated on evaluation data at the end of each epoch. THe metric we track is the mean intersection over union." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "metadata": { 274 | "id": "WlRa0mR6kRwY" 275 | }, 276 | "source": [ 277 | "## DEFINE CALLBACKS\n", 278 | "\n", 279 | "def get_weighted_bce(y_true, y_pred):\n", 280 | " return weighted_bce(y_true, y_pred, WEIGHT)\n", 281 | "\n", 282 | "# get the current time\n", 283 | "now = datetime.now() \n", 284 | "date = now.strftime(\"%d%b%y\")\n", 285 | "date\n", 286 | "\n", 287 | "# define a checkpoint callback to save best models during training\n", 288 | "checkpoint = tf.keras.callbacks.ModelCheckpoint(\n", 289 | " os.path.join(OUT_DIR, 'best_weights_' + date + '.hdf5'),\n", 290 | " monitor='val_classes_mean_iou',\n", 291 | " verbose=1,\n", 292 | " save_best_only=True,\n", 293 | " mode='max'\n", 294 | " )" 295 | ], 296 | "execution_count": null, 297 | "outputs": [] 298 | }, 299 | { 300 | "cell_type": "markdown", 301 | "metadata": { 302 | "id": "t0FluNo_9xzL" 303 | }, 304 | "source": [ 305 | "Create and train the model" 306 | ] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "metadata": { 311 | "id": "rmC4a8c7jfb3" 312 | }, 313 | "source": [ 314 | "m = get_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = BIAS)" 315 | ], 316 | "execution_count": null, 317 | "outputs": [] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "metadata": { 322 | "id": "0mKm-2j3ki6u" 323 | }, 324 | "source": [ 325 | "# train the model\n", 326 | "m.fit(\n", 327 | " x = training,\n", 328 | " epochs = EPOCHS,\n", 329 | " steps_per_epoch = int(TRAIN_SIZE//BATCH),\n", 330 | " validation_data = evaluation,\n", 331 | " callbacks = [checkpoint]\n", 332 | " )\n", 333 | "\n", 334 | "m.save(os.path.join(OUT_DIR, f'{date}_unet256.h5'))" 335 | ], 336 | "execution_count": null, 337 | "outputs": [] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": { 342 | "id": "r73nInHK5HkZ" 343 | }, 344 | "source": [ 345 | "## Re-Training\n", 346 | " The code below will continue training an existing model. You may need to re-create your training and evaluation datasets if you intend to use new or different data from that on which the model was originally trained." 347 | ] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "metadata": { 352 | "id": "dKhUr2BI5MjN" 353 | }, 354 | "source": [ 355 | "from tensorflow.python.keras import models" 356 | ], 357 | "execution_count": null, 358 | "outputs": [] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "metadata": { 363 | "id": "Pfpl6-436ajg" 364 | }, 365 | "source": [ 366 | "# Define where pre-trained model files and weights will come from\n", 367 | "MODEL_FILE = '{PATH TO .h5 MODEL FILE}'\n", 368 | "WEIGHT_FILE = '{PATH TO .hdf5 WEIGHT FILE'\n", 369 | "EVAL_METRIC = 'val_classes_mean_iou'\n", 370 | "# optionally change the learning rate\n", 371 | "LR = 0.0001\n", 372 | "# optionally change the number of epochs to re-train\n", 373 | "EPOCHS = 100" 374 | ], 375 | "execution_count": null, 376 | "outputs": [] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "metadata": { 381 | "id": "ujfWjVTc7DG7" 382 | }, 383 | "source": [ 384 | "# this non-keras native function was used during training so we need to supply it when re-instantiating the trained model\n", 385 | "def get_weighted_bce(y_true, y_pred):\n", 386 | " return weighted_bce(y_true, y_pred, weight)\n", 387 | "\n", 388 | "# get the current time\n", 389 | "now = datetime.now() \n", 390 | "date = now.strftime(\"%d%b%y\")\n", 391 | "date\n", 392 | "\n", 393 | "# define a checkpoint callback to save best models during training\n", 394 | "checkpoint = tf.keras.callbacks.ModelCheckpoint(\n", 395 | " os.path.join(OUT_DIR, 'best_weights_' + date + '.hdf5'),\n", 396 | " monitor='val_classes_mean_iou',\n", 397 | " verbose=1,\n", 398 | " save_best_only=True,\n", 399 | " mode='max'\n", 400 | " )" 401 | ], 402 | "execution_count": null, 403 | "outputs": [] 404 | }, 405 | { 406 | "cell_type": "code", 407 | "metadata": { 408 | "id": "fdLb1x9R6XO-" 409 | }, 410 | "source": [ 411 | "# load our trained model from the model and weights file\n", 412 | "custom_objects = {'get_weighted_bce': get_weighted_bce}\n", 413 | "m = models.load_model(MODEL_FILE, custom_objects = custom_objects)\n", 414 | "m.load_weights(WEIGHT_FILE)\n" 415 | ], 416 | "execution_count": null, 417 | "outputs": [] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "metadata": { 422 | "id": "Pk7X7tC66nlD" 423 | }, 424 | "source": [ 425 | "# set the initial evaluation metric for saving checkpoints to the previous best value\n", 426 | "evalMetrics = m.evaluate(x = eval_data, verbose = 1)\n", 427 | "metrics = m.metrics_names\n", 428 | "index = metrics.index(EVAL_METRIC)\n", 429 | "checkpoint.best = evalMetrics[index]\n" 430 | ], 431 | "execution_count": null, 432 | "outputs": [] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "metadata": { 437 | "id": "B1xL8CEZ7VNs" 438 | }, 439 | "source": [ 440 | "# OPTIONALLY set the learning rate for re-training\n", 441 | "lr = backend.eval(m.optimizer.learning_rate)\n", 442 | "print('current learning rate', lr)\n", 443 | "backend.set_value(m.optimizer.learning_rate, LR)\n", 444 | "print('new learning rate', LR)" 445 | ], 446 | "execution_count": null, 447 | "outputs": [] 448 | }, 449 | { 450 | "cell_type": "code", 451 | "metadata": { 452 | "id": "QbSVDeJz7fem" 453 | }, 454 | "source": [ 455 | "# train the model\n", 456 | "m.fit(\n", 457 | " x = training,\n", 458 | " epochs = EPOCHS,\n", 459 | " steps_per_epoch = steps_per_epoch,\n", 460 | " validation_data = evaluation,\n", 461 | " callbacks = [checkpoint]\n", 462 | " )\n", 463 | "\n", 464 | "m.save(os.path.join(OUT_DIR, f'{date}_unet256.h5'))" 465 | ], 466 | "execution_count": null, 467 | "outputs": [] 468 | } 469 | ] 470 | } -------------------------------------------------------------------------------- /demos/Prediction.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Prediction.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [] 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "code", 21 | "metadata": { 22 | "id": "ObJ-wgPO93nJ" 23 | }, 24 | "source": [ 25 | "#@title Author: Michael Evans { display-mode: \"form\" }\n", 26 | "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", 27 | "# you may not use this file except in compliance with the License.\n", 28 | "# You may obtain a copy of the License at\n", 29 | "#\n", 30 | "# https://www.apache.org/licenses/LICENSE-2.0\n", 31 | "#\n", 32 | "# Unless required by applicable law or agreed to in writing, software\n", 33 | "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", 34 | "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", 35 | "# See the License for the specific language governing permissions and\n", 36 | "# limitations under the License." 37 | ], 38 | "execution_count": null, 39 | "outputs": [] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "id": "4h8d4KK-95sl" 45 | }, 46 | "source": [ 47 | "# Introduction\n", 48 | "\n", 49 | "This notebook demonstrates a workflow for generating a map of predicted solar array footprints using a trained [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597) in Tensorflow. In this example, we create and export images that contain the same variables as used to train our model - the 3 visible, infrared, and 2 near-infrared bands of Sentinel-2 imagery from Google Earth Engine. We load the trained model structure and [weights](https://osf.io/eg35t/) and then run overlapping subsets of these images through the trained model to generate a 2-band output raster containing per-pixel probabilities and classes." 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "metadata": { 55 | "id": "dKLeYISt4FWZ" 56 | }, 57 | "source": [ 58 | "!pip install rasterio" 59 | ], 60 | "execution_count": null, 61 | "outputs": [] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "metadata": { 66 | "id": "foiZFwAhu5FY" 67 | }, 68 | "source": [ 69 | "import os\n", 70 | "import shutil\n", 71 | "import glob\n", 72 | "from os.path import join\n", 73 | "import ee\n", 74 | "import folium\n", 75 | "from tensorflow.python.keras import models\n", 76 | "from sys import path\n", 77 | "import numpy as np\n", 78 | "import rasterio as rio\n", 79 | "import json\n", 80 | "from matplotlib import pyplot as plt\n", 81 | "from matplotlib import colors\n", 82 | "from tensorflow.python.keras import models" 83 | ], 84 | "execution_count": null, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "metadata": { 90 | "id": "n-8MlFrmuycO" 91 | }, 92 | "source": [ 93 | "# Authenticate and initiatlize GEE Account\n", 94 | "ee.Authenticate()\n", 95 | "ee.Initialize()" 96 | ], 97 | "execution_count": null, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "L6tuEC1yvEdP" 104 | }, 105 | "source": [ 106 | "## Clone repo containing preprocessing and prediction functions\n", 107 | "!git clone https://github.com/mjevans26/Satellite_ComputerVision.git" 108 | ], 109 | "execution_count": null, 110 | "outputs": [] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "metadata": { 115 | "id": "A2UDJlJyvKT9" 116 | }, 117 | "source": [ 118 | "# Load the necessary modules from repo\n", 119 | "path.append('./Satellite_ComputerVision')" 120 | ], 121 | "execution_count": null, 122 | "outputs": [] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "metadata": { 127 | "id": "Pm8LpdnwvHEm" 128 | }, 129 | "source": [ 130 | "from utils.model_tools import get_model, make_confusion_matrix, weighted_bce\n", 131 | "from utils.prediction_tools import doExport, makePredDataset, make_array_predictions, get_img_bounds, write_tfrecord_predictions, write_geotiff_prediction\n", 132 | "from utils.clouds import basicQA" 133 | ], 134 | "execution_count": null, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "metadata": { 140 | "id": "XmWIhMaxxS0o" 141 | }, 142 | "source": [ 143 | "# Define a method for displaying Earth Engine image tiles to a folium map.\n", 144 | "def add_ee_layer(self, ee_image_object, vis_params, name):\n", 145 | " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", 146 | " folium.raster_layers.TileLayer(\n", 147 | " tiles = map_id_dict['tile_fetcher'].url_format,\n", 148 | " attr = \"Map Data © Google Earth Engine\",\n", 149 | " name = name,\n", 150 | " overlay = True,\n", 151 | " control = True\n", 152 | " ).add_to(self)\n", 153 | "\n", 154 | "# Add EE drawing method to folium.\n", 155 | "folium.Map.add_ee_layer = add_ee_layer" 156 | ], 157 | "execution_count": null, 158 | "outputs": [] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "metadata": { 163 | "id": "7fPiFxk_xWn7" 164 | }, 165 | "source": [ 166 | "# Specify names locations for outputs in Cloud Storage. \n", 167 | "BUCKET = '{YOUR_GCS BUCKET HERE}'\n", 168 | "BUCKET_PATH = join('gs://', BUCKET)\n", 169 | "FOLDER = '{YOUR PROJECT FOLDER HERE}'\n", 170 | "PRED_BASE = '{YOUR PROJECT SUBDIRECTORY FOR PREDICTION FILES HERE}'\n", 171 | "MODEL_PATH = '{PATH TO MODEL .h5 File}'\n", 172 | "MODEL_WEIGHTS = '{PATH TO MODEL WEIGHTS .hdf5 file}'\n", 173 | "\n", 174 | "# Specify inputs (Sentinel bands) to the model and the response variable.\n", 175 | "opticalBands = ['B2', 'B3', 'B4']\n", 176 | "thermalBands = ['B8', 'B11', 'B12']\n", 177 | "\n", 178 | "BANDS = opticalBands + thermalBands" 179 | ], 180 | "execution_count": null, 181 | "outputs": [] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": { 186 | "id": "IT7d4APrvjJG" 187 | }, 188 | "source": [ 189 | "## Test images\n", 190 | "We first need to create and export some images in GEE on which we can run predictions. This notebook uses a few test aois, but you can incorporate your own study areas in GEE or existing Sentinel-2 imagery" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "metadata": { 196 | "id": "JSaS7FOgvyco" 197 | }, 198 | "source": [ 199 | "# create several small aois to test predictions. These are all in NC\n", 200 | "aois = dict({\n", 201 | " 'Test1': ee.Geometry.Polygon(\n", 202 | " [[[-78.19610376358034, 35.086989862385884],\n", 203 | " [-78.19610376358034, 34.735631502732396],\n", 204 | " [-77.67974634170534, 34.735631502732396],\n", 205 | " [-77.67974634170534, 35.086989862385884]]], None, False),\n", 206 | " 'Test2': ee.Geometry.Polygon(\n", 207 | " [[[-81.59087915420534, 35.84308746418702],\n", 208 | " [-81.59087915420534, 35.47711130797561],\n", 209 | " [-81.03057641983034, 35.47711130797561],\n", 210 | " [-81.03057641983034, 35.84308746418702]]], None, False),\n", 211 | " 'Test3': ee.Geometry.Polygon(\n", 212 | " [[[-78.74447677513596, 36.4941960586897],\n", 213 | " [-78.74447677513596, 36.17115435938789],\n", 214 | " [-78.21713302513596, 36.17115435938789],\n", 215 | " [-78.21713302513596, 36.4941960586897]]], None, False),\n", 216 | " 'Test4': ee.Geometry.Polygon(\n", 217 | " [[[-76.62411544701096, 36.33505523381603],\n", 218 | " [-76.62411544701096, 36.03800955668766],\n", 219 | " [-76.16818282982346, 36.03800955668766],\n", 220 | " [-76.16818282982346, 36.33505523381603]]], None, False)\n", 221 | "})" 222 | ], 223 | "execution_count": null, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "metadata": { 229 | "id": "yA36Bcwfv1U_" 230 | }, 231 | "source": [ 232 | "# Choose the GEE folder in which to ingest prediction image:\n", 233 | "aoi = 'Test4'\n", 234 | "\n", 235 | "# prediction path\n", 236 | "test_path = join(FOLDER, PRED_BASE, aoi)\n", 237 | "\n", 238 | "# Base file name to use for TFRecord files and assets. The name structure includes:\n", 239 | "test_image_base = 'unet256_' + aoi\n", 240 | "\n", 241 | "# Half this will extend on the sides of each patch.\n", 242 | "kernel_buffer = [128, 128]\n", 243 | "\n", 244 | "test_region = aois[aoi]\n", 245 | "\n", 246 | "# find the center of our aoi for map visualization\n", 247 | "center = test_region.centroid(5).coordinates().getInfo()\n", 248 | "center.reverse()" 249 | ], 250 | "execution_count": null, 251 | "outputs": [] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "metadata": { 256 | "id": "s7xAe359wG8n" 257 | }, 258 | "source": [ 259 | "# Create a test image\n", 260 | "S2 = ee.ImageCollection(\"COPERNICUS/S2\")\n", 261 | "\n", 262 | "## Change dates here\n", 263 | "######\n", 264 | "begin = '2020-05-01'\n", 265 | "end = '2020-08-30'\n", 266 | "######\n", 267 | "\n", 268 | "# The image input collection is cloud-masked.\n", 269 | "filtered = S2.filterDate(begin, end)\\\n", 270 | ".filterBounds(test_region)\\\n", 271 | ".filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\\\n", 272 | ".map(basicQA)\n", 273 | "\n", 274 | "# Create a simple median composite to visualize\n", 275 | "## Change .clip to change test area \n", 276 | "test = filtered.median().select(BANDS).clip(test_region)\n", 277 | "\n", 278 | "# Use folium to visualize the imagery.\n", 279 | "#mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n", 280 | "rgbParams = {'bands': ['B4', 'B3', 'B2'],\n", 281 | " 'min': 0,\n", 282 | " 'max': 3000}\n", 283 | "\n", 284 | "nirParams = {'bands': ['B8', 'B11', 'B12'],\n", 285 | " 'min': 0,\n", 286 | " 'max': 3000}\n", 287 | "\n", 288 | "\n", 289 | "## Change coordinates to center map based on aoi used \n", 290 | "map = folium.Map(location=center)\n", 291 | "map.add_ee_layer(test, rgbParams, 'Color')\n", 292 | "map.add_ee_layer(test, nirParams, 'Thermal')\n", 293 | "\n", 294 | "map.add_child(folium.LayerControl())\n", 295 | "map" 296 | ], 297 | "execution_count": null, 298 | "outputs": [] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "metadata": { 303 | "id": "CBW7l5xQxG1Y" 304 | }, 305 | "source": [ 306 | "# Run the export.\n", 307 | "## takes some time (~10 min) --> check GEE tasks to see when completed \n", 308 | "doExport(test, features = BANDS, pred_path = test_path, pred_base = test_image_base, scale = 10, bucket = BUCKET, region = test_region)" 309 | ], 310 | "execution_count": null, 311 | "outputs": [] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": { 316 | "id": "tfg9rxv4xHuS" 317 | }, 318 | "source": [ 319 | "## Predictions" 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "metadata": { 325 | "id": "vYgzH4DpAw2o" 326 | }, 327 | "source": [ 328 | "First we load the model structure and weights" 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "metadata": { 334 | "id": "jd7ysDkezBR1" 335 | }, 336 | "source": [ 337 | "def get_weighted_bce(y_true,y_pred):\n", 338 | " return weighted_bce(y_true, y_pred, 1)\n", 339 | "m = models.load_model(MODEL_PATH, custom_objects = {'get_weighted_bce': get_weighted_bce})\n", 340 | "# m = get_model(depth = DEPTH, optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = None)\n", 341 | "m.load_weights(MODEL_WEIGHTS)" 342 | ], 343 | "execution_count": null, 344 | "outputs": [] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": { 349 | "id": "sePpmbMPA0xG" 350 | }, 351 | "source": [ 352 | "Then generate a file list of our previously exported image data on which we want to make predictions. NOTE: This example reads from Google Cloud Storage, but any means of generating a list of filenames is sufficient" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "metadata": { 358 | "id": "OtgnbS3Q1xmy" 359 | }, 360 | "source": [ 361 | "predFiles = !gsutil ls {join(BUCKET_PATH, test_path, test_image_base + '*.tfrecord.gz')}\n", 362 | "jsonFile = !gsutil ls {join(BUCKET_PATH, test_path, test_image_base + '*.json')}\n", 363 | "jsonFile = jsonFile[0]" 364 | ], 365 | "execution_count": null, 366 | "outputs": [] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "metadata": { 371 | "id": "vgyohfwV1rqT" 372 | }, 373 | "source": [ 374 | "# load our predictions data into a Dataset and inspect the first one\n", 375 | "predData = makePredDataset(predFiles, BANDS, one_hot = None)\n", 376 | "iterator = iter(predData)\n", 377 | "print(iterator.next())" 378 | ], 379 | "execution_count": null, 380 | "outputs": [] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": { 385 | "id": "FG9KksCBBG9F" 386 | }, 387 | "source": [ 388 | "Generate and plot the output predictions" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "metadata": { 394 | "id": "ebY2MYsv18HO" 395 | }, 396 | "source": [ 397 | "# generate prediction rasters\n", 398 | "preds = make_array_predictions(imageDataset = predData, model = m, jsonFile = jsonFile)" 399 | ], 400 | "execution_count": null, 401 | "outputs": [] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "metadata": { 406 | "id": "8F1da8OA2CEM" 407 | }, 408 | "source": [ 409 | "# We can quickly visualize the predictions to see if they look sensible\n", 410 | "figure = plt.figure(figsize = (12,12))\n", 411 | "\n", 412 | "prob = preds[:, :, 0]\n", 413 | "cls = out_image[:, :, 0]\n", 414 | "\n", 415 | "plt.imshow(prob)" 416 | ], 417 | "execution_count": null, 418 | "outputs": [] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "metadata": { 423 | "id": "YBG2Ndga2MJr" 424 | }, 425 | "source": [ 426 | "# overlay the predicted outputs on the original satellite data map\n", 427 | "heatmap = folium.raster_layers.ImageOverlay(\n", 428 | " image=prob,\n", 429 | " bounds= get_img_bounds(prob, jsonFile),\n", 430 | " colormap=lambda x: (0.5, 0, 0.5, 1) if x >= 0.9 else (0, 0, 0, 0),\n", 431 | ")\n", 432 | "map.add_child(heatmap)\n", 433 | "map.add_child(folium.LayerControl())\n", 434 | "map" 435 | ], 436 | "execution_count": null, 437 | "outputs": [] 438 | }, 439 | { 440 | "cell_type": "markdown", 441 | "metadata": { 442 | "id": "7ouKDMpcBJbL" 443 | }, 444 | "source": [ 445 | "Export and save predictions (optional)" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "metadata": { 451 | "id": "8rYA_HkF2kMV" 452 | }, 453 | "source": [ 454 | "# optionally, write predictions to either tfrecord files (best for re-ingesting into GEE)...\n", 455 | "write_tfrecord_predictions(predData, m, test_path, test_image_base)\n", 456 | "#...or a geotiff\n", 457 | "write_geotiff_predictions(image, jsonFile, '{OUTFILE}'):" 458 | ], 459 | "execution_count": null, 460 | "outputs": [] 461 | } 462 | ] 463 | } -------------------------------------------------------------------------------- /demos/Extract_Data_GEE.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Extract_Data_GEE.ipynb", 7 | "provenance": [] 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "code", 20 | "metadata": { 21 | "id": "DzmS6y3XJGEl" 22 | }, 23 | "source": [ 24 | "#@title Author: Michael Evans { display-mode: \"form\" }\n", 25 | "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", 26 | "# you may not use this file except in compliance with the License.\n", 27 | "# You may obtain a copy of the License at\n", 28 | "#\n", 29 | "# https://www.apache.org/licenses/LICENSE-2.0\n", 30 | "#\n", 31 | "# Unless required by applicable law or agreed to in writing, software\n", 32 | "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", 33 | "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", 34 | "# See the License for the specific language governing permissions and\n", 35 | "# limitations under the License." 36 | ], 37 | "execution_count": null, 38 | "outputs": [] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": { 43 | "id": "TbdLwIXWJQMt" 44 | }, 45 | "source": [ 46 | "# Introduction\n", 47 | "\n", 48 | "This notebook demonstrates methods used to acquire training data from Google Earth Engine that can be used to train a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597) using Tensorflow. In this example, we extract 256x256 pixel image chips containing the 3 visible, infrared, and 2 near infrared bands in Sentinel-2 imagery based on [hand-delineated solar array footprints in North Carolina](https://osf.io/ygbwj/). This relatively simple model is a mostly unmodified version of [this example](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb) from the TensorFlow docs." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "metadata": { 54 | "id": "S0eAagvtJi2B" 55 | }, 56 | "source": [ 57 | "from os.path import join\n", 58 | "from google.cloud import storage\n", 59 | "import ee\n", 60 | "from sys import path\n", 61 | "import json\n", 62 | "import numpy as np\n", 63 | "import rasterio as rio\n", 64 | "import folium" 65 | ], 66 | "execution_count": null, 67 | "outputs": [] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "metadata": { 72 | "id": "Qp7doIHHJnys" 73 | }, 74 | "source": [ 75 | "## Clone repo containing preprocessing and prediction functions\n", 76 | "!git clone https://github.com/mjevans26/Satellite_ComputerVision.git" 77 | ], 78 | "execution_count": null, 79 | "outputs": [] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "metadata": { 84 | "id": "0VxQa03hJufS" 85 | }, 86 | "source": [ 87 | "# Load the necessary modules from repo\n", 88 | "path.append('/content/Satellite_ComputerVision')\n", 89 | "from utils.clouds import basicQA, maskTOA, maskSR" 90 | ], 91 | "execution_count": null, 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "metadata": { 97 | "id": "j6JsEWUZJMrx" 98 | }, 99 | "source": [ 100 | "# Import, authenticate and initialize the Earth Engine library.\n", 101 | "ee.Authenticate()\n", 102 | "ee.Initialize()" 103 | ], 104 | "execution_count": null, 105 | "outputs": [] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "metadata": { 110 | "id": "FxE4n2_3J4Uz" 111 | }, 112 | "source": [ 113 | "# Folium setup.\n", 114 | "\n", 115 | "print(folium.__version__)\n", 116 | "\n", 117 | "# Define a method for displaying Earth Engine image tiles to a folium map.\n", 118 | "def add_ee_layer(self, ee_image_object, vis_params, name):\n", 119 | " map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n", 120 | " folium.raster_layers.TileLayer(\n", 121 | " tiles = map_id_dict['tile_fetcher'].url_format,\n", 122 | " attr = \"Map Data © Google Earth Engine\",\n", 123 | " name = name,\n", 124 | " overlay = True,\n", 125 | " control = True\n", 126 | " ).add_to(self)\n", 127 | "\n", 128 | "# Add EE drawing method to folium.\n", 129 | "folium.Map.add_ee_layer = add_ee_layer" 130 | ], 131 | "execution_count": null, 132 | "outputs": [] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "metadata": { 137 | "id": "nYwdLS8tKA-Y" 138 | }, 139 | "source": [ 140 | "# Specify names locations for outputs in Cloud Storage. \n", 141 | "BUCKET = '{YOUR_GCS BUCKET HERE}'\n", 142 | "BUCKET_PATH = join('gs://', BUCKET)\n", 143 | "\n", 144 | "FOLDER = 'NC_solar'\n", 145 | "PRED_BASE = 'data/predict'\n", 146 | "TRAIN_BASE = 'data/training'\n", 147 | "EVAL_BASE = 'data/eval'\n", 148 | "\n", 149 | "# Specify inputs (Sentinel bands) to the model and the response variable.\n", 150 | "opticalBands = ['B2', 'B3', 'B4']\n", 151 | "thermalBands = ['B8', 'B11', 'B12']\n", 152 | "\n", 153 | "BANDS = opticalBands + thermalBands\n", 154 | "RESPONSE = 'landcover'\n", 155 | "FEATURES = BANDS + [RESPONSE]\n", 156 | "SCENEID = 'SENSING_ORBIT_NUMBER'\n", 157 | "\n", 158 | "# Specify the size and shape of patches expected by the model.\n", 159 | "KERNEL_SIZE = 256\n", 160 | "KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n", 161 | "COLUMNS = [\n", 162 | " tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n", 163 | "]\n", 164 | "FEATURES_DICT = dict(zip(FEATURES, COLUMNS))" 165 | ], 166 | "execution_count": null, 167 | "outputs": [] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": { 172 | "id": "r-5l_EfywDvK" 173 | }, 174 | "source": [ 175 | "# Imagery\n", 176 | "\n", 177 | "Access and process the imagery to use for predictor variables using Google Earth Engine. This is a three-month, cloud-free, Sentinel-2 composite corresponding to the latest date from which we have confirmed training data. Display it in the notebook for a sanity check." 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "metadata": { 183 | "id": "btEC3dluJfGq" 184 | }, 185 | "source": [ 186 | "# Use Sentinel-2 surface reflectance data.\n", 187 | "S2 = ee.ImageCollection(\"COPERNICUS/S2\")\n", 188 | "# Grab a feature corresponding to our study area - North Carolina\n", 189 | "states = ee.FeatureCollection(\"TIGER/2016/States\")\n", 190 | "nc = states.filter(ee.Filter.eq('NAME', 'North Carolina')).geometry().buffer(2500)\n", 191 | "begin = '2019-01-01'\n", 192 | "end = '2020-03-01'\n", 193 | "\n", 194 | "# The image input collection is cloud-masked.\n", 195 | "filtered = S2.filterDate(begin, end)\\\n", 196 | ".filterBounds(nc)\\\n", 197 | ".filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\n", 198 | "\n", 199 | "\n", 200 | "# Create a simple median composite to visualize\n", 201 | "winter = filtered.filterDate('2019-12-01', '2020-02-28').map(basicQA).median().select(BANDS).clip(nc)\n", 202 | "spring = filtered.filterDate('2019-03-01', '2019-05-31').map(basicQA).median().select(BANDS).clip(nc)\n", 203 | "summer = filtered.filterDate('2019-06-01', '2019-08-31').map(basicQA).median().select(BANDS).clip(nc)\n", 204 | "fall = filtered.filterDate('2019-09-01', '2019-11-30').map(basicQA).median().select(BANDS).clip(nc)\n", 205 | "\n", 206 | "# Use folium to visualize the imagery.\n", 207 | "#mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n", 208 | "rgbParams = {'bands': ['B4', 'B3', 'B2'],\n", 209 | " 'min': 0,\n", 210 | " 'max': 0.3}\n", 211 | "\n", 212 | "nirParams = {'bands': ['B8', 'B11', 'B12'],\n", 213 | " 'min': 0,\n", 214 | " 'max': 0.3}\n", 215 | "\n", 216 | "map = folium.Map(location=[35.402, -78.376])\n", 217 | "map.add_ee_layer(spring, rgbParams, 'Color')\n", 218 | "map.add_ee_layer(spring, nirParams, 'Thermal')\n", 219 | "\n", 220 | "map.add_child(folium.LayerControl())\n", 221 | "map" 222 | ], 223 | "execution_count": null, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "id": "wpCLxJLVwTHw" 230 | }, 231 | "source": [ 232 | "Prepare the response variable. This is the footprints of ground mounted solar arrays as of 2019. These polygons have been loaded into GEE as a FeatureCollection asset, and coded into a background class [0] and a target class [1].Display on the map to verify." 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "metadata": { 238 | "id": "pS6FWd90wSDW" 239 | }, 240 | "source": [ 241 | "def set_landcover(ft):\n", 242 | " \"\"\"\n", 243 | " Add a property to a feature and set it to 1\n", 244 | " Parameters:\n", 245 | " ft (ee.Feature): feature to have property added\n", 246 | " Returns:\n", 247 | " ee.Feature: input feature with 'label' property set to 1\n", 248 | " \"\"\"\n", 249 | " return ft.set('landcover', 1)\n", 250 | "\n", 251 | "# Get solar footprints data from our GEE Asset\n", 252 | "NC_solar_footprints = ee.FeatureCollection(\"users/defendersofwildlifeGIS/NC/NC_solar_footprints\")\n", 253 | "# Label each polygon with property 'label' equal to 1\n", 254 | "NC_solar_footprints = NC_solar_footprints.map(set_landcover)\n", 255 | "# Create an image with all pixels equal to 0\n", 256 | "blankimg = ee.Image.constant(0)\n", 257 | "# Convert solar footprints to an image (band value will be 1 based on 'label')\n", 258 | "solar_footprint = NC_solar_footprints.reduceToImage(['landcover'], ee.Reducer.first())\n", 259 | "# Convert pixels of blank image to 1 where the values of the footprint image are 1\n", 260 | "# and rename to 'landcover'\n", 261 | "labelimg = blankimg.where(solar_footprint, solar_footprint).rename('landcover')\n", 262 | "\n", 263 | "solarParams = {'bands': 'landcover', 'min':0, 'max': 1}\n", 264 | "\n", 265 | "map = folium.Map(location = [35.402, -78.376])\n", 266 | "map.add_ee_layer(labelimg, solarParams, 'Solar footprint')\n", 267 | "map.add_child(folium.LayerControl())\n", 268 | "map" 269 | ], 270 | "execution_count": null, 271 | "outputs": [] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": { 276 | "id": "fjxyM6Lswn0n" 277 | }, 278 | "source": [ 279 | "Use some pre-made geometries to sample the stack in strategic locations. We constrain sampling to occur within 10km of mapped solar arrays. Because our target features are small and sparse, relative to the landscape, we also guide sampling based on their centroids to ensure that we get training data for solar arrays." 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "metadata": { 285 | "id": "B-xg0yQXwmTJ" 286 | }, 287 | "source": [ 288 | "def buff(ft):\n", 289 | " return ft.buffer(10000)\n", 290 | "\n", 291 | "def centroid(ft):\n", 292 | " return ft.centroid()\n", 293 | "\n", 294 | "centroids = NC_solar_footprints.map(centroid)\n", 295 | "studyArea = NC_solar_footprints.map(buff).union()\n", 296 | "studyImage = ee.Image(0).byte().paint(studyArea, 1)\n", 297 | "studyImage = studyImage.updateMask(studyImage)\n", 298 | "centroids = centroids.randomColumn('random')\n", 299 | "\n", 300 | "aoiParams = {'min':0, 'max': 1, 'palette': ['red']}\n", 301 | "map = folium.Map(location=[35.402, -78.376], zoom_start=8)\n", 302 | "map.add_ee_layer(studyImage, aoiParams, 'Sampling area')\n", 303 | "map.add_child(folium.LayerControl())\n", 304 | "map" 305 | ], 306 | "execution_count": null, 307 | "outputs": [] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": { 312 | "id": "2_Ts4CAYwhv1" 313 | }, 314 | "source": [ 315 | "# Sampling\n", 316 | "\n", 317 | "If the mapped data look reasonable, we use a 2-stage approach to sample 256-256 pixel image 'chips' for use in model training.\n", 318 | "1.) sample from the centroid of each polygon to create 'positive' examples.\n", 319 | "2.) sample the image at random points to generate 'negative' examples.\n", 320 | "\n", 321 | "To sample chips we create an array image in which each pixel contains a nested list of the surrounding 256x256 pixel values. We can sample this array image at points, to get all the pixels in a 256x256 neighborhood at each point. It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record. You do NOT need to export each training/testing patch to a different image. Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error. Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export." 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "metadata": { 327 | "id": "J8me8XqzzEWP" 328 | }, 329 | "source": [ 330 | "def make_array_image(features, labels, aoi):\n", 331 | " \"\"\"Combine predictor bands and label band into an array image\n", 332 | " Parameters:\n", 333 | " features (ee.Image): image containing bands to be used as predictor variables in model\n", 334 | " labels (ee.Image): binary[0,1], single-band image indicating presence (1) and absence (0) of target features\n", 335 | " aoi (ee.Geometry): bounds\n", 336 | " Return:\n", 337 | " ee.Image: array image\n", 338 | " \"\"\"\n", 339 | " \n", 340 | " featureStack = ee.Image.cat([features, labels]).clip(aoi)\n", 341 | "\n", 342 | " ls = ee.List.repeat(1, KERNEL_SIZE)\n", 343 | " lists = ee.List.repeat(ls, KERNEL_SIZE)\n", 344 | " kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)\n", 345 | "\n", 346 | " arrays = featureStack.neighborhoodToArray(kernel)\n", 347 | " return arrays" 348 | ], 349 | "execution_count": null, 350 | "outputs": [] 351 | }, 352 | { 353 | "cell_type": "markdown", 354 | "metadata": { 355 | "id": "kD62TGagw3Im" 356 | }, 357 | "source": [ 358 | "First we'll collect image patches from the centroids of known solar array locations" 359 | ] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "metadata": { 364 | "id": "NT4YxEoMw0qK" 365 | }, 366 | "source": [ 367 | "# Add a random column to the centroids\n", 368 | "S = centroids.size().getInfo()\n", 369 | "centroidList = centroids.toList(S)" 370 | ], 371 | "execution_count": null, 372 | "outputs": [] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "metadata": { 377 | "id": "Jn-RyVA3xDEi" 378 | }, 379 | "source": [ 380 | "#@title Centroids slicing\n", 381 | "# Get samples from delineated features using slice() on a feature collection\n", 382 | "\n", 383 | "x = 0\n", 384 | "\n", 385 | "# set the number of samples to include in a single export. may need to experiment with this parameter to avoid memory issues\n", 386 | "n = 25\n", 387 | "\n", 388 | "while x < S:\n", 389 | " # select a subset of 25 centroids\n", 390 | " subset = ee.FeatureCollection(centroidList.slice(x, x+n))\n", 391 | " # buffer those\n", 392 | " studyArea = subset.map(buff).union()\n", 393 | " arrays = make_array_image(fall.select(BANDS), labelimg.select(RESPONSE), studyArea)\n", 394 | " sample = arrays.sampleRegions(\n", 395 | " collection = subset.geometry(),\n", 396 | " scale = 10,\n", 397 | " tileScale = 12\n", 398 | " )\n", 399 | " x += n\n", 400 | " \n", 401 | " # assign a random number to samples and create a 70/30 train/test split\n", 402 | " sample = sample.randomColumn('random')\n", 403 | " training = sample.filter(ee.Filter.gte('random', 0.3))\n", 404 | " testing = sample.filter(ee.Filter.lt('random', 0.3))\n", 405 | "\n", 406 | " desc = 'UNET_' + str(KERNEL_SIZE) + '_centFall' + str(x)\n", 407 | " task = ee.batch.Export.table.toCloudStorage(\n", 408 | " collection = training,\n", 409 | " description = desc, \n", 410 | " bucket = BUCKET, \n", 411 | " fileNamePrefix = join(FOLDER, TRAIN_BASE, desc),\n", 412 | " fileFormat = 'TFRecord',\n", 413 | " selectors = BANDS + [RESPONSE]\n", 414 | " )\n", 415 | " task.start()\n", 416 | "\n", 417 | " desc = 'UNET_' + str(KERNEL_SIZE) + '_centFall' + str(x)\n", 418 | " task = ee.batch.Export.table.toCloudStorage(\n", 419 | " collection = testing,\n", 420 | " description = desc, \n", 421 | " bucket = BUCKET, \n", 422 | " fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n", 423 | " fileFormat = 'TFRecord',\n", 424 | " selectors = BANDS + [RESPONSE]\n", 425 | " )\n", 426 | " task.start()" 427 | ], 428 | "execution_count": null, 429 | "outputs": [] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": { 434 | "id": "gwHW6fKTxVk7" 435 | }, 436 | "source": [ 437 | "Generate random samples within the buffered area" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "metadata": { 443 | "id": "WIv_-Mc2xRZ8" 444 | }, 445 | "source": [ 446 | "#@title Random sampling\n", 447 | "\n", 448 | "# Define sample sizes for shards and chunks. \n", 449 | "# These numbers determined experimentally.\n", 450 | "n = 30 # Number of shards in each chunk.\n", 451 | "N = 300 # Total sample size in each chunk.\n", 452 | "C = 2# Number of chunks\n", 453 | "\n", 454 | "iterator = iter(range(N*C))\n", 455 | "arrays = make_array_image(fall.select(BANDS),\n", 456 | " labelimg.select(RESPONSE),\n", 457 | " studyArea)\n", 458 | "for c in range(C):\n", 459 | " geomSample = ee.FeatureCollection([])\n", 460 | "\n", 461 | " for i in range(n):\n", 462 | " seed = next(iterator)\n", 463 | " sample = arrays.sample(\n", 464 | " region = studyArea,\n", 465 | " scale = 10,\n", 466 | " numPixels = N/n,\n", 467 | " seed = seed,\n", 468 | " tileScale = 8\n", 469 | " )\n", 470 | " geomSample = geomSample.merge(sample)\n", 471 | "\n", 472 | " #divide samples into training and evaluation data\n", 473 | " geomSample = geomSample.randomColumn('random')\n", 474 | " training = geomSample.filter(ee.Filter.gte('random', 0.3))\n", 475 | " testing = geomSample.filter(ee.Filter.lt('random', 0.3))\n", 476 | "\n", 477 | " desc = 'UNET_' + str(KERNEL_SIZE) + '_randFall'+str(c)\n", 478 | " task = ee.batch.Export.table.toCloudStorage(\n", 479 | " collection = training,\n", 480 | " description = desc, \n", 481 | " bucket = BUCKET, \n", 482 | " fileNamePrefix = join(FOLDER, TRAIN_BASE, desc),\n", 483 | " fileFormat = 'TFRecord',\n", 484 | " selectors = BANDS + [RESPONSE]\n", 485 | " )\n", 486 | " task.start()\n", 487 | "\n", 488 | " desc = 'UNET_' + str(KERNEL_SIZE) + '_randFall' + str(c)\n", 489 | " task = ee.batch.Export.table.toCloudStorage(\n", 490 | " collection = testing,\n", 491 | " description = desc, \n", 492 | " bucket = BUCKET, \n", 493 | " fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n", 494 | " fileFormat = 'TFRecord',\n", 495 | " selectors = BANDS + [RESPONSE]\n", 496 | " )\n", 497 | " task.start() " 498 | ], 499 | "execution_count": null, 500 | "outputs": [] 501 | } 502 | ] 503 | } -------------------------------------------------------------------------------- /re-train.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "source": [ 6 | "from azureml.core import Experiment, Environment, Workspace, Datastore, Dataset, Model, ScriptRunConfig, Run\n", 7 | "import os\n", 8 | "import glob\n", 9 | "# get the current workspace\n", 10 | "ws = Workspace.from_config()" 11 | ], 12 | "outputs": [], 13 | "execution_count": 15, 14 | "metadata": { 15 | "gather": { 16 | "logged": 1684345750891 17 | } 18 | } 19 | }, 20 | { 21 | "cell_type": "code", 22 | "source": [ 23 | "%cd Satellite_ComputerVision\n", 24 | "!git pull\n", 25 | "%cd .." 26 | ], 27 | "outputs": [ 28 | { 29 | "output_type": "stream", 30 | "name": "stdout", 31 | "text": "Already up-to-date.\r\n/mnt/batch/tasks/shared/LS_root/mounts/clusters/test-compute-instance/code/Users/mevans\n" 32 | } 33 | ], 34 | "execution_count": 18, 35 | "metadata": { 36 | "collapsed": true, 37 | "jupyter": { 38 | "source_hidden": false, 39 | "outputs_hidden": false 40 | }, 41 | "nteract": { 42 | "transient": { 43 | "deleting": false 44 | } 45 | } 46 | } 47 | }, 48 | { 49 | "cell_type": "code", 50 | "source": [ 51 | "# access our registered data share containing image data in this workspace\n", 52 | "datastore = Datastore.get(workspace = ws, datastore_name = 'solardatablob')\n", 53 | "\n", 54 | "cpk_train_path = (datastore, 'CPK_solar/data/training/')\n", 55 | "cpk_eval_path = (datastore, 'CPK_solar/data/eval/')\n", 56 | "\n", 57 | "nc_train_path = (datastore, 'NC_solar/data/training/')\n", 58 | "nc_eval_path = (datastore, 'NC_solar/data/eval/')\n", 59 | "\n", 60 | "test_path = (datastore, 'CPK_solar/data/predict/testpred5')\n", 61 | "\n", 62 | "# train_dataset = Dataset.File.from_files(path = [cpk_train_path])\n", 63 | "# eval_dataset = Dataset.File.from_files(path = [cpk_eval_path])\n", 64 | "\n", 65 | "# nc_train_dataset = Dataset.File.from_files(path = [nc_train_path])\n", 66 | "# nc_eval_dataset = Dataset.File.from_files(path = [nc_eval_path])\n", 67 | "\n", 68 | "# when we combine datasets the selected directories and relative paths to the datastore are brought in\n", 69 | "# mount folder\n", 70 | "# |-cddatafilestore\n", 71 | "# | |-GEE\n", 72 | "# | | |-training\n", 73 | "# | | |-eval\n", 74 | "# | |-Onera\n", 75 | "# | | |-training\n", 76 | "# | | |-eval\n", 77 | "\n", 78 | "train_dataset = Dataset.File.from_files(path = [cpk_train_path, nc_train_path])\n", 79 | "eval_dataset = Dataset.File.from_files(path = [cpk_eval_path, nc_eval_path])\n", 80 | "test_dataset = Dataset.File.from_files(path = [test_path])" 81 | ], 82 | "outputs": [], 83 | "execution_count": 16, 84 | "metadata": { 85 | "collapsed": true, 86 | "jupyter": { 87 | "source_hidden": false, 88 | "outputs_hidden": false 89 | }, 90 | "nteract": { 91 | "transient": { 92 | "deleting": false 93 | } 94 | }, 95 | "gather": { 96 | "logged": 1684345752282 97 | } 98 | } 99 | }, 100 | { 101 | "cell_type": "code", 102 | "source": [ 103 | "# FInd the run corresponding to the model we want to register\n", 104 | "# run_id = 'solar-nc-cpk_1624989679_f59da7cf'\n", 105 | "run_id = 'solar-nc-cpk_1684259900_b71cc594'\n", 106 | "run = ws.get_run(run_id)" 107 | ], 108 | "outputs": [], 109 | "execution_count": 18, 110 | "metadata": { 111 | "collapsed": true, 112 | "jupyter": { 113 | "source_hidden": false, 114 | "outputs_hidden": false 115 | }, 116 | "nteract": { 117 | "transient": { 118 | "deleting": false 119 | } 120 | }, 121 | "gather": { 122 | "logged": 1684345784093 123 | } 124 | } 125 | }, 126 | { 127 | "cell_type": "code", 128 | "source": [ 129 | "model_name = 'solar_May23'" 130 | ], 131 | "outputs": [], 132 | "execution_count": 17, 133 | "metadata": { 134 | "collapsed": true, 135 | "jupyter": { 136 | "source_hidden": false, 137 | "outputs_hidden": false 138 | }, 139 | "nteract": { 140 | "transient": { 141 | "deleting": false 142 | } 143 | }, 144 | "gather": { 145 | "logged": 1684345762622 146 | } 147 | } 148 | }, 149 | { 150 | "cell_type": "code", 151 | "source": [ 152 | "model = run.register_model(model_name=model_name,\n", 153 | " tags=run.tags,\n", 154 | " description = 'UNET model delineating ground mounted solar arrays in S2 imagery. Trained on multi-season data from Chesapeake Bay and NC',\n", 155 | " model_path='outputs/',\n", 156 | " model_framework = 'Tensorflow',\n", 157 | " model_framework_version= '2.0',\n", 158 | " datasets = [('training', train_dataset), ('evaluation', eval_dataset), ('testing', test_dataset)])\n", 159 | "print(model.name, model.id, model.version, sep='\\t')" 160 | ], 161 | "outputs": [ 162 | { 163 | "output_type": "stream", 164 | "name": "stdout", 165 | "text": "solar_May23\tsolar_May23:2\t2\n" 166 | } 167 | ], 168 | "execution_count": 19, 169 | "metadata": { 170 | "collapsed": true, 171 | "jupyter": { 172 | "source_hidden": false, 173 | "outputs_hidden": false 174 | }, 175 | "nteract": { 176 | "transient": { 177 | "deleting": false 178 | } 179 | }, 180 | "gather": { 181 | "logged": 1684345790955 182 | } 183 | } 184 | }, 185 | { 186 | "cell_type": "code", 187 | "source": [ 188 | "# use the azure folder as our script folder\n", 189 | "source = 'Satellite_ComputerVision'\n", 190 | "util_folder = 'utils'\n", 191 | "script_folder = f'{source}/azure'\n", 192 | "script_file = 'train_solar.py'" 193 | ], 194 | "outputs": [], 195 | "execution_count": 17, 196 | "metadata": { 197 | "collapsed": true, 198 | "jupyter": { 199 | "source_hidden": false, 200 | "outputs_hidden": false 201 | }, 202 | "nteract": { 203 | "transient": { 204 | "deleting": false 205 | } 206 | }, 207 | "gather": { 208 | "logged": 1638378482125 209 | } 210 | } 211 | }, 212 | { 213 | "cell_type": "code", 214 | "source": [ 215 | "# define the compute target\n", 216 | "ws.compute_targets\n", 217 | "mevansGPU = ws.compute_targets['mevansGPU']" 218 | ], 219 | "outputs": [], 220 | "execution_count": 20, 221 | "metadata": { 222 | "collapsed": true, 223 | "jupyter": { 224 | "source_hidden": false, 225 | "outputs_hidden": false 226 | }, 227 | "nteract": { 228 | "transient": { 229 | "deleting": false 230 | } 231 | }, 232 | "gather": { 233 | "logged": 1684345802021 234 | } 235 | } 236 | }, 237 | { 238 | "cell_type": "code", 239 | "source": [ 240 | "experiment_name = 'solar-nc-cpk'\n", 241 | "exp = Experiment(workspace = ws, name = experiment_name)" 242 | ], 243 | "outputs": [], 244 | "execution_count": 22, 245 | "metadata": { 246 | "collapsed": true, 247 | "jupyter": { 248 | "source_hidden": false, 249 | "outputs_hidden": false 250 | }, 251 | "nteract": { 252 | "transient": { 253 | "deleting": false 254 | } 255 | }, 256 | "gather": { 257 | "logged": 1684345807382 258 | } 259 | } 260 | }, 261 | { 262 | "cell_type": "code", 263 | "source": [ 264 | "custom_env = Environment.from_docker_image(\r\n", 265 | " name = 'tf_training',\r\n", 266 | " image = 'mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.2-cudnn8-ubuntu20.04:20221010.v1',\r\n", 267 | " container_registry=None,\r\n", 268 | " conda_specification='/mnt/batch/tasks/shared/LS_root/mounts/clusters/mevans1/code/Users/mevans/Solar_UNet/conda_env-copy.yml',\r\n", 269 | " pip_requirements=None)" 270 | ], 271 | "outputs": [], 272 | "execution_count": 21, 273 | "metadata": { 274 | "jupyter": { 275 | "source_hidden": false, 276 | "outputs_hidden": false 277 | }, 278 | "nteract": { 279 | "transient": { 280 | "deleting": false 281 | } 282 | }, 283 | "gather": { 284 | "logged": 1684345805116 285 | } 286 | } 287 | }, 288 | { 289 | "cell_type": "code", 290 | "source": [ 291 | "RESPONSE = 'landcover'\n", 292 | "args = [\n", 293 | " '--train_data', train_dataset.as_mount(),\n", 294 | " '--eval_data', eval_dataset.as_mount(),\n", 295 | " '--test_data', test_dataset.as_mount(),\n", 296 | " '--model_id', model_name,\n", 297 | " '--weights', '[1.0, 1.0]',\n", 298 | " '--bias', 0,\n", 299 | " '-lr', 0.0005,\n", 300 | " '--epochs', 200,\n", 301 | " '--epoch_start', 150,\n", 302 | " '--batch', 16,\n", 303 | " '--size', 11020,\n", 304 | " '--kernel_size', 256,\n", 305 | " '--response', f'{RESPONSE}',\n", 306 | " '--bands', '[\"B2\", \"B3\", \"B4\", \"B8\", \"B11\", \"B12\"]',\n", 307 | " '--splits', '[0]']\n", 308 | "\n", 309 | "src = ScriptRunConfig(source_directory='azure',\n", 310 | " script='train_solar.py', \n", 311 | " arguments=args,\n", 312 | " compute_target=mevansGPU,\n", 313 | " environment=custom_env)" 314 | ], 315 | "outputs": [], 316 | "execution_count": 23, 317 | "metadata": { 318 | "collapsed": true, 319 | "jupyter": { 320 | "source_hidden": false, 321 | "outputs_hidden": false 322 | }, 323 | "nteract": { 324 | "transient": { 325 | "deleting": false 326 | } 327 | }, 328 | "gather": { 329 | "logged": 1684345941382 330 | } 331 | } 332 | }, 333 | { 334 | "cell_type": "code", 335 | "source": [ 336 | "# run the training job\n", 337 | "run = exp.submit(config=src, tags = dict({'splits':'None', 'model':'Unet', 'dataset':'NC CPK S2', 'normalization':'S2 moments', 'epochs':'150-200'}))\n", 338 | "run" 339 | ], 340 | "outputs": [ 341 | { 342 | "output_type": "execute_result", 343 | "execution_count": 24, 344 | "data": { 345 | "text/plain": "Run(Experiment: solar-nc-cpk,\nId: solar-nc-cpk_1684345980_10f4f4d2,\nType: azureml.scriptrun,\nStatus: Starting)", 346 | "text/html": "
ExperimentIdTypeStatusDetails PageDocs Page
solar-nc-cpksolar-nc-cpk_1684345980_10f4f4d2azureml.scriptrunStartingLink to Azure Machine Learning studioLink to Documentation
" 347 | }, 348 | "metadata": {} 349 | }, 350 | { 351 | "output_type": "stream", 352 | "name": "stderr", 353 | "text": "Bad pipe message: %s [b'8\\xde(\\xabdJN#r\\xf9\\x05\\xd7\\xe1h\\xee\\x83\\xac\\xda \\xe7\\x80\\x7fCu\\x10\\xe4\\xd9\\x94\\xc0\\xbdj\\xc4\\xd9\\xcb\\x18/\\xde\\x06M\\xfe\\xff\\xe6\\xd0\\x9a\\xf4m\\x08\\xcb\\x8f\\x93p\\x00\\x08\\x13\\x02\\x13\\x03\\x13\\x01\\x00\\xff\\x01\\x00\\x00\\x8f\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x00\\x1e\\x00\\x1c\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08\\x08\\t\\x08\\n\\x08\\x0b\\x08\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06\\x01\\x00+\\x00\\x03\\x02\\x03\\x04']\nBad pipe message: %s [b\"\\x86o~1o\\x1bu.\\xf9\\xdd\\xc4\\xde0\\x92\\xfd\\x18$\\x8d\\x00\\x00|\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0#\\xc0'\\x00g\\x00@\\xc0\\n\\xc0\\x14\\x009\\x008\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00<\\x005\\x00/\\x00\\x9a\\x00\\x99\\xc0\\x07\\xc0\\x11\\x00\\x96\\x00\\x05\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08\\x08\\t\\x08\\n\\x08\\x0b\\x08\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06\\x01\\x03\\x03\\x02\"]\nBad pipe message: %s [b'\\x01\\x02', b'', b'\\x02']\nBad pipe message: %s [b'\\x05\\x02\\x06']\nBad pipe message: %s [b'My\\xef\\x0c\\xa9\\x9eQ\\xc4)d6t\\xc2pc[y\\xe3\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0', b'\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0']\nBad pipe message: %s [b'\\x16\\x00\\x13\\x00\\x10\\x00\\r']\nBad pipe message: %s [b'\\xe4\\x12,{\\x15\\x94\\xb4\\x11\\xc9\\x13\\xb1\\xc4\\xb9\\xfa4\\x1a\\xebf\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x00']\nBad pipe message: %s [b'\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0']\nBad pipe message: %s [b'\\x16\\x00\\x18\\xc0\\x0c\\xc0']\nBad pipe message: %s [b'\\x05']\nBad pipe message: %s [b\"\\xdbvN\\x0c\\xe8D{\\x910\\x1c\\xd0V$B2\\x8d\\xd0\\xc0\\x00\\x00\\x86\\xc00\\xc0,\\xc0(\\xc0$\\xc0\\x14\\xc0\\n\\x00\\xa5\\x00\\xa3\\x00\\xa1\\x00\\x9f\\x00k\\x00j\\x00i\\x00h\\x009\\x008\\x007\\x006\\xc02\\xc0.\\xc0*\\xc0&\\xc0\\x0f\\xc0\\x05\\x00\\x9d\\x00=\\x005\\xc0/\\xc0+\\xc0'\\xc0#\\xc0\\x13\\xc0\\t\\x00\\xa4\\x00\\xa2\\x00\\xa0\\x00\\x9e\\x00g\\x00@\\x00?\\x00>\\x003\\x002\\x001\\x000\\xc01\\xc0-\\xc0)\\xc0%\\xc0\\x0e\\xc0\\x04\\x00\\x9c\\x00<\\x00/\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00\\x96\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\x00\\xff\\x02\\x01\\x00\\x00g\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x1c\\x00\\x1a\\x00\\x17\\x00\\x19\\x00\\x1c\\x00\\x1b\\x00\\x18\\x00\\x1a\\x00\\x16\\x00\\x0e\\x00\\r\\x00\\x0b\\x00\\x0c\\x00\\t\\x00\\n\\x00#\\x00\\x00\\x00\\r\\x00 \\x00\\x1e\", b'\\x06\\x02\\x06\\x03\\x05', b'', b'\\x03', b'\\x04\\x02\\x04', b'\\x01\\x03', b'\\x03', b'\\x02', b'\\x03']\nBad pipe message: %s [b'\\x02\\xc2~\\xda1\\xee\\xd4\\x9c\\xf3\\x08\\xf6']\nBad pipe message: %s [b\"O\\x11L,\\x99\\x00\\x00\\xf4\\xc00\\xc0,\\xc0(\\xc0$\\xc0\\x14\\xc0\\n\\x00\\xa5\\x00\\xa3\\x00\\xa1\\x00\\x9f\\x00k\\x00j\\x00i\\x00h\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00\\xa7\\x00m\\x00:\\x00\\x89\\xc02\\xc0.\\xc0*\\xc0&\\xc0\\x0f\\xc0\\x05\\x00\\x9d\\x00=\\x005\\x00\\x84\\xc0/\\xc0+\\xc0'\\xc0#\\xc0\\x13\\xc0\\t\\x00\\xa4\\x00\\xa2\\x00\\xa0\\x00\\x9e\\x00g\\x00@\"]\nBad pipe message: %s [b\"Es\\xa6z1/\\x87s'\\xd5\\xe5I\\x176b#G\\x10 \\xa4\\xad\\xc7\\x1e\\xcfrs\\x19H\\xaf\\x01\\x0cJTm\\xbe\\xd3X\\xca\\x94\\xebc'\\x8f\\xf6\\x8f\\xa6>\\x99x\\x0eS\\x00\\x08\\x13\\x02\\x13\\x03\\x13\\x01\\x00\\xff\\x01\\x00\\x00\\x8f\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x00\\x1e\\x00\\x1c\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08\\x08\\t\", b'\\x08\\x0b\\x08\\x04\\x08\\x05\\x08']\nBad pipe message: %s [b'\\x01\\x05\\x01\\x06\\x01']\nBad pipe message: %s [b'\\x0b\\x13\\xc5a>C\\x18I\\xcaF\\x19\\xad\\x9e\\xf8\\xf0\\xf2\\x98\\x1e\\x00\\x00\\xa6\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa', b\"\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0s\\xc0w\\x00\\xc4\\x00\\xc3\\xc0#\\xc0'\\x00g\\x00@\\xc0r\\xc0v\\x00\\xbe\\x00\\xbd\\xc0\\n\\xc0\\x14\\x009\\x008\\x00\\x88\\x00\\x87\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9a\\x00\\x99\\x00E\\x00D\\xc0\\x07\\xc0\\x11\\xc0\\x08\\xc0\\x12\\x00\\x16\\x00\\x13\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00\\xc0\\x00<\\x00\\xba\\x005\\x00\\x84\\x00/\\x00\\x96\\x00A\\x00\\x05\\x00\\n\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\", b'\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07']\nBad pipe message: %s [b'\\x08\\t\\x08\\n\\x08\\x0b\\x08']\nBad pipe message: %s [b'\\x05\\x08\\x06']\nBad pipe message: %s [b'\\x05\\x01\\x06', b'', b'\\x03\\x03']\nBad pipe message: %s [b'']\nBad pipe message: %s [b'', b'\\x02']\nBad pipe message: %s [b'\\x05\\x02\\x06']\nBad pipe message: %s [b'h\\x1f\\x8cg9\\x8bqQ^\\xdc\\xae\\x9b\\xdbz0\\xe6\\xc9\\x06\\x00\\x00>\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\xc0\\x0f\\xc0\\x05\\x005\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00\\x96\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\x00\\xff\\x02\\x01\\x00\\x00C\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x1c\\x00\\x1a\\x00\\x17\\x00\\x19\\x00\\x1c\\x00\\x1b\\x00\\x18\\x00\\x1a\\x00\\x16\\x00\\x0e\\x00\\r\\x00\\x0b\\x00\\x0c\\x00']\nBad pipe message: %s [b'\\n\\x00#\\x00\\x00\\x00\\x0f\\x00']\nBad pipe message: %s [b'\\xc6\\xe7\\xcf\\xd7\\xd58\\x8b\\x04\\xeee\\xce\\r\\x84\\xbc\\xdd\\xcf\\x99\\xe1\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0\\x08\\x00\\x16\\x00\\x13\\x00\\x10\\x00\\r\\xc0\\x17\\x00\\x1b\\xc0']\nBad pipe message: %s [b'\\x03\\x00\\n\\x00\\x15\\x00\\x12\\x00\\x0f\\x00\\x0c\\x00']\nBad pipe message: %s [b'\\t\\x00\\x14\\x00\\x11\\x00\\x19\\x00\\x08\\x00\\x06\\x00\\x17\\x00\\x03\\xc0\\x10\\xc0\\x06\\xc0\\x15\\xc0\\x0b\\xc0\\x01']\nBad pipe message: %s [b'\\x04o\\x98r\\x8d\\x9dmQ\\xac/\\xa1\\xd2\\x1f\\xa5\\xbe\\xed\\xb4\\xff\\x00\\x00\\xf4\\xc00\\xc0,\\xc0(\\xc0$\\xc0\\x14\\xc0\\n\\x00\\xa5\\x00\\xa3\\x00\\xa1\\x00\\x9f\\x00k\\x00j\\x00i\\x00h\\x009\\x008\\x007\\x006\\x00\\x88']\nBad pipe message: %s [b\"\\xa0\\xf7\\x16S\\xd2\\x85\\xfa\\x11B+A\\x9b\\xe4>\\xd1\\n>\\xa7\\x00\\x00|\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0#\\xc0'\\x00g\\x00@\\xc0\\n\\xc0\\x14\\x009\\x008\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00<\\x005\\x00/\\x00\\x9a\\x00\\x99\\xc0\\x07\\xc0\\x11\\x00\\x96\\x00\\x05\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\", b'\\x08\\x08\\x08\\t\\x08\\n\\x08', b'\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06']\nBad pipe message: %s [b'', b'\\x03\\x03']\nBad pipe message: %s [b'']\nBad pipe message: %s [b'', b'\\x02']\nBad pipe message: %s [b'\\x05\\x02\\x06']\nBad pipe message: %s [b'\\xe1\\xc9v\\x04\\x0eH\\xba\\xff\\xc3\\xb7\\x93\\x0c\\xf7v\\x18\\x0b~\\xae\\x00\\x00\\xa6\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]']\nBad pipe message: %s [b\"\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0s\\xc0w\\x00\\xc4\\x00\\xc3\\xc0#\\xc0'\\x00g\\x00@\\xc0r\\xc0v\\x00\\xbe\\x00\\xbd\\xc0\\n\\xc0\\x14\\x009\\x008\\x00\\x88\\x00\\x87\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9a\\x00\\x99\\x00E\\x00D\\xc0\\x07\\xc0\\x11\\xc0\\x08\\xc0\\x12\\x00\\x16\\x00\\x13\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00\\xc0\\x00<\\x00\\xba\\x005\\x00\\x84\\x00/\\x00\\x96\\x00A\\x00\\x05\\x00\\n\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\", b'\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08']" 354 | } 355 | ], 356 | "execution_count": 24, 357 | "metadata": { 358 | "collapsed": true, 359 | "jupyter": { 360 | "source_hidden": false, 361 | "outputs_hidden": false 362 | }, 363 | "nteract": { 364 | "transient": { 365 | "deleting": false 366 | } 367 | }, 368 | "gather": { 369 | "logged": 1684345969907 370 | } 371 | } 372 | } 373 | ], 374 | "metadata": { 375 | "kernelspec": { 376 | "name": "python38-azureml", 377 | "language": "python", 378 | "display_name": "Python 3.8 - AzureML" 379 | }, 380 | "language_info": { 381 | "name": "python", 382 | "version": "3.8.5", 383 | "mimetype": "text/x-python", 384 | "codemirror_mode": { 385 | "name": "ipython", 386 | "version": 3 387 | }, 388 | "pygments_lexer": "ipython3", 389 | "nbconvert_exporter": "python", 390 | "file_extension": ".py" 391 | }, 392 | "kernel_info": { 393 | "name": "python38-azureml" 394 | }, 395 | "microsoft": { 396 | "host": { 397 | "AzureML": { 398 | "notebookHasBeenCompleted": true 399 | } 400 | }, 401 | "ms_spell_check": { 402 | "ms_spell_check_language": "en" 403 | } 404 | }, 405 | "nteract": { 406 | "version": "nteract-front-end@1.0.0" 407 | } 408 | }, 409 | "nbformat": 4, 410 | "nbformat_minor": 2 411 | } -------------------------------------------------------------------------------- /Setup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "source": [ 6 | "import azureml.core\r\n", 7 | "from azureml.core import Experiment, Environment, Workspace, Dataset, Datastore, ScriptRunConfig\r\n", 8 | "from azureml.core.conda_dependencies import CondaDependencies\r\n", 9 | "import os\r\n", 10 | "import shutil\r\n", 11 | "\r\n", 12 | "# check core SDK version number\r\n", 13 | "\r\n", 14 | "print(\"Azure ML SDK Version: \", azureml.core.VERSION)" 15 | ], 16 | "outputs": [ 17 | { 18 | "output_type": "stream", 19 | "name": "stdout", 20 | "text": "Azure ML SDK Version: 1.44.0\n" 21 | } 22 | ], 23 | "execution_count": 4, 24 | "metadata": { 25 | "gather": { 26 | "logged": 1668617207337 27 | } 28 | } 29 | }, 30 | { 31 | "cell_type": "code", 32 | "source": [ 33 | "# load workspace configuration from the config.json file in the current folder.\r\n", 34 | "ws = Workspace.from_config()\r\n", 35 | "# get metadata about the workspace\r\n", 36 | "print(ws.name, ws.location, ws.resource_group, sep='\\t')\r\n", 37 | "# list the registered datastores\r\n", 38 | "ws.datastores" 39 | ], 40 | "outputs": [ 41 | { 42 | "output_type": "stream", 43 | "name": "stdout", 44 | "text": "landcover-ai\teastus\tcic_ai\n" 45 | }, 46 | { 47 | "output_type": "execute_result", 48 | "execution_count": 6, 49 | "data": { 50 | "text/plain": "{'solardatablob': {\n \"name\": \"solardatablob\",\n \"container_name\": \"solar\",\n \"account_name\": \"aiprojects\",\n \"protocol\": \"https\",\n \"endpoint\": \"core.windows.net\"\n },\n 'animalopsblobstore2': {\n \"name\": \"animalopsblobstore2\",\n \"container_name\": \"animal-ops\",\n \"account_name\": \"aiprojects\",\n \"protocol\": \"https\",\n \"endpoint\": \"core.windows.net\"\n },\n 'animalopsblobstore': {\n \"name\": \"animalopsblobstore\",\n \"container_name\": \"animal-ops\",\n \"account_name\": \"aiprojects\",\n \"protocol\": \"https\",\n \"endpoint\": \"core.windows.net\"\n },\n 'workspacefilestore': {\n \"name\": \"workspacefilestore\",\n \"container_name\": \"azureml-filestore-0b767baf-fb3d-4e08-a2d6-663739db0e23\",\n \"account_name\": \"aiprojects\",\n \"protocol\": \"https\",\n \"endpoint\": \"core.windows.net\"\n },\n 'workspaceworkingdirectory': {\n \"name\": \"workspaceworkingdirectory\",\n \"container_name\": \"code-391ff5ac-6576-460f-ba4d-7e03433c68b6\",\n \"account_name\": \"aiprojects\",\n \"protocol\": \"https\",\n \"endpoint\": \"core.windows.net\"\n },\n 'workspaceartifactstore': {\n \"name\": \"workspaceartifactstore\",\n \"container_name\": \"azureml\",\n \"account_name\": \"aiprojects\",\n \"protocol\": \"https\",\n \"endpoint\": \"core.windows.net\"\n },\n 'workspaceblobstore': {\n \"name\": \"workspaceblobstore\",\n \"container_name\": \"azureml-blobstore-0b767baf-fb3d-4e08-a2d6-663739db0e23\",\n \"account_name\": \"aiprojects\",\n \"protocol\": \"https\",\n \"endpoint\": \"core.windows.net\"\n }}" 51 | }, 52 | "metadata": {} 53 | } 54 | ], 55 | "execution_count": 6, 56 | "metadata": { 57 | "collapsed": true, 58 | "jupyter": { 59 | "source_hidden": false, 60 | "outputs_hidden": false 61 | }, 62 | "nteract": { 63 | "transient": { 64 | "deleting": false 65 | } 66 | }, 67 | "gather": { 68 | "logged": 1668617220821 69 | } 70 | } 71 | }, 72 | { 73 | "cell_type": "code", 74 | "source": [ 75 | "datastore = Datastore.get(workspace = ws, datastore_name = 'solardatablob')\r\n", 76 | "datastore.unregister()" 77 | ], 78 | "outputs": [], 79 | "execution_count": 5, 80 | "metadata": { 81 | "jupyter": { 82 | "source_hidden": false, 83 | "outputs_hidden": false 84 | }, 85 | "nteract": { 86 | "transient": { 87 | "deleting": false 88 | } 89 | }, 90 | "gather": { 91 | "logged": 1668463545304 92 | } 93 | } 94 | }, 95 | { 96 | "cell_type": "code", 97 | "source": [ 98 | "# register our data share containing image data in this workspace\r\n", 99 | "Datastore.register_azure_blob_container(\r\n", 100 | " account_key = 'sZ/bw2Viouyp/C0Duhboamqx5VDXNtAm2fyYzrucLsUNk5nQXkvURAMnBeehMiL1xE+LEMTRBeaq+AStNkzzkQ==',\r\n", 101 | " workspace = ws,\r\n", 102 | " datastore_name = 'solarDataBlob',\r\n", 103 | " container_name = 'solar',\r\n", 104 | " account_name = 'aiprojects')" 105 | ], 106 | "outputs": [ 107 | { 108 | "output_type": "stream", 109 | "name": "stderr", 110 | "text": "Datastore name solarDataBlob contains capital letters. They will be converted to lowercase letters.\n" 111 | }, 112 | { 113 | "output_type": "execute_result", 114 | "execution_count": 8, 115 | "data": { 116 | "text/plain": "{\n \"name\": \"solardatablob\",\n \"container_name\": \"solar\",\n \"account_name\": \"aiprojects\",\n \"protocol\": \"https\",\n \"endpoint\": \"core.windows.net\"\n}" 117 | }, 118 | "metadata": {} 119 | } 120 | ], 121 | "execution_count": 8, 122 | "metadata": { 123 | "collapsed": true, 124 | "jupyter": { 125 | "source_hidden": false, 126 | "outputs_hidden": false 127 | }, 128 | "nteract": { 129 | "transient": { 130 | "deleting": false 131 | } 132 | }, 133 | "gather": { 134 | "logged": 1668463968153 135 | } 136 | } 137 | }, 138 | { 139 | "cell_type": "code", 140 | "source": [ 141 | "# create a file dataset that can be used in training\r\n", 142 | "datastore = Datastore.get(workspace = ws, datastore_name = 'solardatablob')\r\n", 143 | "datastore_paths = [(datastore, 'CPK_solar'), (datastore, 'NC_solar')]\r\n", 144 | "cpk_dataset = Dataset.File.from_files(path = datastore_paths[0])\r\n", 145 | "nc_dataset = Dataset.File.from_files(path = datastore_paths[1])\r\n", 146 | "\r\n", 147 | "cpk_dataset = cpk_dataset.register(\r\n", 148 | " workspace=ws,\r\n", 149 | " name='gee-cpk-solar-data',\r\n", 150 | " description='training and eval TFRecords for solar arrays exported from GEE',\r\n", 151 | " create_new_version=True)\r\n", 152 | "\r\n", 153 | "nc_dataset = nc_dataset.register(\r\n", 154 | " workspace=ws,\r\n", 155 | " name='gee-nc-solar-data',\r\n", 156 | " description='training and eval TFRecords for solar arrays exported from GEE',\r\n", 157 | " create_new_version=True)" 158 | ], 159 | "outputs": [], 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": true, 163 | "jupyter": { 164 | "source_hidden": false, 165 | "outputs_hidden": false 166 | }, 167 | "nteract": { 168 | "transient": { 169 | "deleting": false 170 | } 171 | }, 172 | "gather": { 173 | "logged": 1642800766789 174 | } 175 | } 176 | }, 177 | { 178 | "cell_type": "code", 179 | "source": [], 180 | "outputs": [], 181 | "execution_count": null, 182 | "metadata": { 183 | "jupyter": { 184 | "source_hidden": false, 185 | "outputs_hidden": false 186 | }, 187 | "nteract": { 188 | "transient": { 189 | "deleting": false 190 | } 191 | } 192 | } 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "source": [ 197 | "## Environments" 198 | ], 199 | "metadata": { 200 | "nteract": { 201 | "transient": { 202 | "deleting": false 203 | } 204 | } 205 | } 206 | }, 207 | { 208 | "cell_type": "code", 209 | "source": [ 210 | "envs = Environment.list(workspace=ws)\r\n", 211 | "\r\n", 212 | "for env in envs:\r\n", 213 | " if env.startswith(\"AzureML\"):\r\n", 214 | " print(\"Name\",env)" 215 | ], 216 | "outputs": [ 217 | { 218 | "output_type": "stream", 219 | "name": "stdout", 220 | "text": "Name AzureML-responsibleai-0.20-ubuntu20.04-py38-cpu\nName AzureML-responsibleai-0.21-ubuntu20.04-py38-cpu\nName AzureML-PTA-pytorch-1.11-py38-cuda11.3-gpu\nName AzureML-PTA-pytorch-1.11-py38-cuda11.5-gpu\nName AzureML-sklearn-1.0-ubuntu20.04-py38-cpu\nName AzureML-tensorflow-2.6-ubuntu20.04-py38-cuda11-gpu\nName AzureML-tensorflow-2.5-ubuntu20.04-py38-cuda11-gpu\nName AzureML-tensorflow-2.7-ubuntu20.04-py38-cuda11-gpu\nName AzureML-ACPT-pytorch-1.11-py38-cuda11.3-gpu\nName AzureML-ACPT-pytorch-1.11-py38-cuda11.5-gpu\nName AzureML-pytorch-1.10-ubuntu18.04-py38-cuda11-gpu\nName AzureML-ACPT-pytorch-1.12-py39-cuda11.6-gpu\nName AzureML-ACPT-pytorch-1.12-py38-cuda11.6-gpu\nName AzureML-minimal-ubuntu18.04-py37-cuda11.0.3-gpu-inference\nName AzureML-lightgbm-3.2-ubuntu18.04-py37-cpu\nName AzureML-sklearn-0.24-ubuntu18.04-py37-cpu\nName AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu\nName AzureML-pytorch-1.7-ubuntu18.04-py37-cuda11-gpu\nName AzureML-pytorch-1.8-ubuntu18.04-py37-cuda11-gpu\nName AzureML-pytorch-1.9-ubuntu18.04-py37-cuda11-gpu\nName AzureML-minimal-ubuntu18.04-py37-cpu-inference\nName AzureML-VowpalWabbit-8.8.0\nName AzureML-PyTorch-1.3-CPU\nName AzureML-Triton\n" 221 | } 222 | ], 223 | "execution_count": 7, 224 | "metadata": { 225 | "jupyter": { 226 | "source_hidden": false, 227 | "outputs_hidden": false 228 | }, 229 | "nteract": { 230 | "transient": { 231 | "deleting": false 232 | } 233 | }, 234 | "gather": { 235 | "logged": 1668617225810 236 | } 237 | } 238 | }, 239 | { 240 | "cell_type": "code", 241 | "source": [ 242 | "base_env = envs.get('AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu')\r\n", 243 | "base_env" 244 | ], 245 | "outputs": [], 246 | "execution_count": null, 247 | "metadata": { 248 | "jupyter": { 249 | "source_hidden": false, 250 | "outputs_hidden": false 251 | }, 252 | "nteract": { 253 | "transient": { 254 | "deleting": false 255 | } 256 | }, 257 | "gather": { 258 | "logged": 1668617330232 259 | } 260 | } 261 | }, 262 | { 263 | "cell_type": "code", 264 | "source": [ 265 | "!pwd" 266 | ], 267 | "outputs": [ 268 | { 269 | "output_type": "stream", 270 | "name": "stdout", 271 | "text": "/mnt/batch/tasks/shared/LS_root/mounts/clusters/mevans1/code/Users/mevans/Solar_UNet\r\n" 272 | } 273 | ], 274 | "execution_count": 27, 275 | "metadata": { 276 | "jupyter": { 277 | "source_hidden": false, 278 | "outputs_hidden": false 279 | }, 280 | "nteract": { 281 | "transient": { 282 | "deleting": false 283 | } 284 | } 285 | } 286 | }, 287 | { 288 | "cell_type": "code", 289 | "source": [ 290 | "env_docker_conda = Environment(\r\n", 291 | " image=\"mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04:20221010.v1\",\r\n", 292 | " conda_file=\"/mnt/batch/tasks/shared/LS_root/mounts/clusters/mevans1/code/Users/mevans/Solar_UNet/envs/conda_env.yml\",\r\n", 293 | " name=\"solar-training\",\r\n", 294 | " description=\"Environment created from a Docker image plus Conda environment.\",\r\n", 295 | ")\r\n", 296 | "\r\n" 297 | ], 298 | "outputs": [], 299 | "execution_count": 34, 300 | "metadata": { 301 | "jupyter": { 302 | "source_hidden": false, 303 | "outputs_hidden": false 304 | }, 305 | "nteract": { 306 | "transient": { 307 | "deleting": false 308 | } 309 | }, 310 | "gather": { 311 | "logged": 1668620052885 312 | } 313 | } 314 | }, 315 | { 316 | "cell_type": "code", 317 | "source": [ 318 | "env_docker_conda" 319 | ], 320 | "outputs": [ 321 | { 322 | "output_type": "execute_result", 323 | "execution_count": 35, 324 | "data": { 325 | "text/plain": "{\n \"assetId\": null,\n \"databricks\": {\n \"eggLibraries\": [],\n \"jarLibraries\": [],\n \"mavenLibraries\": [],\n \"pypiLibraries\": [],\n \"rcranLibraries\": []\n },\n \"docker\": {\n \"arguments\": [],\n \"baseDockerfile\": null,\n \"baseImage\": \"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20220708.v1\",\n \"baseImageRegistry\": {\n \"address\": null,\n \"password\": null,\n \"registryIdentity\": null,\n \"username\": null\n },\n \"buildContext\": null,\n \"enabled\": false,\n \"platform\": {\n \"architecture\": \"amd64\",\n \"os\": \"Linux\"\n },\n \"sharedVolumes\": true,\n \"shmSize\": \"2g\"\n },\n \"environmentVariables\": {\n \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n },\n \"inferencingStackVersion\": null,\n \"name\": \"solar-training\",\n \"python\": {\n \"baseCondaEnvironment\": null,\n \"condaDependencies\": {\n \"channels\": [\n \"anaconda\",\n \"conda-forge\"\n ],\n \"dependencies\": [\n \"python=3.8.13\",\n {\n \"pip\": [\n \"azureml-defaults\"\n ]\n }\n ],\n \"name\": \"project_environment\"\n },\n \"condaDependenciesFile\": null,\n \"interpreterPath\": \"python\",\n \"userManagedDependencies\": false\n },\n \"r\": null,\n \"spark\": {\n \"packages\": [],\n \"precachePackages\": true,\n \"repositories\": []\n },\n \"version\": null\n}" 326 | }, 327 | "metadata": {} 328 | } 329 | ], 330 | "execution_count": 35, 331 | "metadata": { 332 | "jupyter": { 333 | "source_hidden": false, 334 | "outputs_hidden": false 335 | }, 336 | "nteract": { 337 | "transient": { 338 | "deleting": false 339 | } 340 | }, 341 | "gather": { 342 | "logged": 1668620058108 343 | } 344 | } 345 | }, 346 | { 347 | "cell_type": "code", 348 | "source": [ 349 | "base_env.name = 'solar-training'\r\n", 350 | "base_env.register(ws)" 351 | ], 352 | "outputs": [ 353 | { 354 | "output_type": "stream", 355 | "name": "stderr", 356 | "text": "Environment version is set. Attempting to register desired version. To auto-version, reset version to None.\n" 357 | }, 358 | { 359 | "output_type": "execute_result", 360 | "execution_count": 21, 361 | "data": { 362 | "text/plain": "{\n \"assetId\": \"azureml://locations/eastus/workspaces/0b767baf-fb3d-4e08-a2d6-663739db0e23/environments/solar-training/versions/50\",\n \"databricks\": {\n \"eggLibraries\": [],\n \"jarLibraries\": [],\n \"mavenLibraries\": [],\n \"pypiLibraries\": [],\n \"rcranLibraries\": []\n },\n \"docker\": {\n \"arguments\": [],\n \"baseDockerfile\": \"FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04:20221010.v1\\n\\nENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/tensorflow-2.4\\n# Create conda environment\\nRUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \\\\\\n python=3.7 pip=20.2.4\\n\\n# Prepend path to AzureML conda environment\\nENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH\\n\\n# Install pip dependencies\\nRUN HOROVOD_WITH_TENSORFLOW=1 \\\\\\n pip install 'matplotlib>=3.3,<3.4' \\\\\\n 'psutil>=5.8,<5.9' \\\\\\n 'tqdm>=4.59,<4.60' \\\\\\n 'pandas>=1.1,<1.2' \\\\\\n 'scipy>=1.5,<1.6' \\\\\\n 'numpy>=1.10,<1.20' \\\\\\n 'ipykernel~=6.0' \\\\\\n # upper bound azure-core to address typing-extensions conflict\\n 'azure-core<1.23.0' \\\\\\n 'azureml-core~=1.43.0' \\\\\\n 'azureml-defaults~=1.43.0' \\\\\\n 'azureml-mlflow~=1.43.0' \\\\\\n 'azureml-telemetry~=1.43.0' \\\\\\n 'tensorboard==2.4.0' \\\\\\n 'tensorflow-gpu==2.4.1' \\\\\\n 'tensorflow-datasets==4.3.0' \\\\\\n 'onnxruntime-gpu>=1.7,<1.8' \\\\\\n 'protobuf~=3.20' \\\\\\n 'horovod[tensorflow-gpu]==0.21.3' \\\\\\n 'debugpy~=1.6.3'\\n\\n# This is needed for mpi to locate libpython\\nENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH\\n\",\n \"baseImage\": null,\n \"baseImageRegistry\": {\n \"address\": null,\n \"password\": null,\n \"registryIdentity\": null,\n \"username\": null\n },\n \"buildContext\": null,\n \"enabled\": false,\n \"platform\": {\n \"architecture\": \"amd64\",\n \"os\": \"Linux\"\n },\n \"sharedVolumes\": true,\n \"shmSize\": null\n },\n \"environmentVariables\": {\n \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n },\n \"inferencingStackVersion\": null,\n \"name\": \"solar-training\",\n \"python\": {\n \"baseCondaEnvironment\": null,\n \"condaDependenciesFile\": null,\n \"interpreterPath\": \"python\",\n \"userManagedDependencies\": true\n },\n \"r\": null,\n \"spark\": {\n \"packages\": [],\n \"precachePackages\": true,\n \"repositories\": []\n },\n \"version\": \"50\"\n}" 363 | }, 364 | "metadata": {} 365 | } 366 | ], 367 | "execution_count": 21, 368 | "metadata": { 369 | "jupyter": { 370 | "source_hidden": false, 371 | "outputs_hidden": false 372 | }, 373 | "nteract": { 374 | "transient": { 375 | "deleting": false 376 | } 377 | }, 378 | "gather": { 379 | "logged": 1668617840822 380 | } 381 | } 382 | }, 383 | { 384 | "cell_type": "code", 385 | "source": [ 386 | "# create an environment for the first time\r\n", 387 | "\r\n", 388 | "envs = Environment.list(workspace = ws)\r\n", 389 | "# well start with a pre-built tensorflow environment\r\n", 390 | "env = envs.get('AzureML-TensorFlow-2.3-GPU')\r\n", 391 | "env\r\n", 392 | "\r\n", 393 | "# define packages to be installed using CondaDependencies\r\n", 394 | "# get the packages that are already part of the pre-built environment\r\n", 395 | "conda_dep = env.python.conda_dependencies\r\n", 396 | "# list packages to install\r\n", 397 | "pip_packages = ['matplotlib', 'rasterio', 'tensorboard']\r\n", 398 | "\r\n", 399 | "# add each package to the existing conda dependencies\r\n", 400 | "for package in pip_packages:\r\n", 401 | " conda_dep.add_pip_package(package)\r\n", 402 | "\r\n", 403 | "# double check all the packages are there\r\n", 404 | "conda_dep.serialize_to_string()\r\n", 405 | "# conda_dep = CondaDependencies.create(\r\n", 406 | "# pip_packages=pip_packages)\r\n", 407 | "\r\n", 408 | "# Now update the conda dependencies of the python environment\r\n", 409 | "env.python.conda_dependencies=conda_dep\r\n", 410 | "\r\n", 411 | "# # Register environment to re-use later\r\n", 412 | "env.name = 'solar-training'\r\n", 413 | "env.register(workspace = ws)" 414 | ], 415 | "outputs": [], 416 | "execution_count": null, 417 | "metadata": { 418 | "collapsed": true, 419 | "jupyter": { 420 | "source_hidden": false, 421 | "outputs_hidden": false 422 | }, 423 | "nteract": { 424 | "transient": { 425 | "deleting": false 426 | } 427 | }, 428 | "gather": { 429 | "logged": 1622223971377 430 | } 431 | } 432 | }, 433 | { 434 | "cell_type": "code", 435 | "source": [ 436 | "envs = Environment.list(workspace = ws)\r\n", 437 | "# well start with a pre-built tensorflow environment\r\n", 438 | "env = envs.get('AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu')\r\n", 439 | "# env\r\n", 440 | "solar_env = env.clone('test')\r\n", 441 | "conda_dep = CondaDependencies()\r\n", 442 | "# list packages to install\r\n", 443 | "pip_packages = ['matplotlib', 'tensorboard']\r\n", 444 | "\r\n", 445 | "# add each package to the existing conda dependencies\r\n", 446 | "for package in pip_packages:\r\n", 447 | " conda_dep.add_pip_package(package)\r\n", 448 | "\r\n", 449 | "conda_dep.add_conda_package('rasterio')\r\n", 450 | "\r\n", 451 | "solar_env.python.conda_dependencies=conda_dep\r\n", 452 | "\r\n", 453 | "# # Register environment to re-use later\r\n", 454 | "solar_env.name = 'test'\r\n", 455 | "solar_env.register(workspace = ws)" 456 | ], 457 | "outputs": [ 458 | { 459 | "output_type": "execute_result", 460 | "execution_count": 25, 461 | "data": { 462 | "text/plain": "{\n \"databricks\": {\n \"eggLibraries\": [],\n \"jarLibraries\": [],\n \"mavenLibraries\": [],\n \"pypiLibraries\": [],\n \"rcranLibraries\": []\n },\n \"docker\": {\n \"arguments\": [],\n \"baseDockerfile\": \"FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04:20220113.v1\\n\\nENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/tensorflow-2.4\\n\\n# Create conda environment\\nRUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \\\\\\n python=3.7 pip=20.2.4\\n\\n# Prepend path to AzureML conda environment\\nENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH\\n\\n# Install pip dependencies\\nRUN HOROVOD_WITH_TENSORFLOW=1 \\\\\\n pip install 'matplotlib>=3.3,<3.4' \\\\\\n 'psutil>=5.8,<5.9' \\\\\\n 'tqdm>=4.59,<4.60' \\\\\\n 'pandas>=1.1,<1.2' \\\\\\n 'scipy>=1.5,<1.6' \\\\\\n 'numpy>=1.10,<1.20' \\\\\\n 'ipykernel~=6.0' \\\\\\n 'azureml-core==1.37.0.post1' \\\\\\n 'azureml-defaults==1.37.0' \\\\\\n 'azureml-mlflow==1.37.0' \\\\\\n 'azureml-telemetry==1.37.0' \\\\\\n 'tensorboard==2.4.0' \\\\\\n 'tensorflow-gpu==2.4.1' \\\\\\n 'tensorflow-datasets==4.3.0' \\\\\\n 'onnxruntime-gpu>=1.7,<1.8' \\\\\\n 'horovod[tensorflow-gpu]==0.21.3'\\n\\n# This is needed for mpi to locate libpython\\nENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH\",\n \"baseImage\": null,\n \"baseImageRegistry\": {\n \"address\": null,\n \"password\": null,\n \"registryIdentity\": null,\n \"username\": null\n },\n \"enabled\": false,\n \"platform\": {\n \"architecture\": \"amd64\",\n \"os\": \"Linux\"\n },\n \"sharedVolumes\": true,\n \"shmSize\": null\n },\n \"environmentVariables\": {\n \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n },\n \"inferencingStackVersion\": null,\n \"name\": \"test\",\n \"python\": {\n \"baseCondaEnvironment\": null,\n \"condaDependencies\": {\n \"channels\": [\n \"anaconda\",\n \"conda-forge\"\n ],\n \"dependencies\": [\n \"python=3.6.2\",\n {\n \"pip\": [\n \"azureml-defaults\",\n \"matplotlib\",\n \"tensorboard\"\n ]\n },\n \"rasterio\"\n ],\n \"name\": \"project_environment\"\n },\n \"condaDependenciesFile\": null,\n \"interpreterPath\": \"python\",\n \"userManagedDependencies\": true\n },\n \"r\": null,\n \"spark\": {\n \"packages\": [],\n \"precachePackages\": true,\n \"repositories\": []\n },\n \"version\": \"1\"\n}" 463 | }, 464 | "metadata": {} 465 | } 466 | ], 467 | "execution_count": 25, 468 | "metadata": { 469 | "jupyter": { 470 | "source_hidden": false, 471 | "outputs_hidden": false 472 | }, 473 | "nteract": { 474 | "transient": { 475 | "deleting": false 476 | } 477 | }, 478 | "gather": { 479 | "logged": 1643053056600 480 | } 481 | } 482 | }, 483 | { 484 | "cell_type": "code", 485 | "source": [ 486 | "# create an environment for the first time\r\n", 487 | "\r\n", 488 | "envs = Environment.list(workspace = ws)\r\n", 489 | "# well start with a pre-built tensorflow environment\r\n", 490 | "env = envs.get('AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu')\r\n", 491 | "# env\r\n", 492 | "solar_env = env.clone('test')\r\n", 493 | "# define packages to be installed using CondaDependencies\r\n", 494 | "# get the packages that are already part of the pre-built environment\r\n", 495 | "conda_dep = CondaDependencies()\r\n", 496 | "# list packages to install\r\n", 497 | "pip_packages = ['matplotlib', 'tensorboard']\r\n", 498 | "\r\n", 499 | "# add each package to the existing conda dependencies\r\n", 500 | "for package in pip_packages:\r\n", 501 | " conda_dep.add_pip_package(package)\r\n", 502 | "\r\n", 503 | "conda_dep.add_conda_package('rasterio')\r\n", 504 | "\r\n", 505 | "# double check all the packages are there\r\n", 506 | "conda_dep.serialize_to_string()\r\n", 507 | "# conda_dep = CondaDependencies.create(\r\n", 508 | "# pip_packages=pip_packages)\r\n", 509 | "\r\n", 510 | "# Now update the conda dependencies of the python environment\r\n", 511 | "solar_env.python.conda_dependencies=conda_dep\r\n", 512 | "\r\n", 513 | "# # Register environment to re-use later\r\n", 514 | "solar_env.name = 'solar-training'\r\n", 515 | "solar_env.register(workspace = ws)" 516 | ], 517 | "outputs": [ 518 | { 519 | "output_type": "execute_result", 520 | "execution_count": 18, 521 | "data": { 522 | "text/plain": "{\n \"databricks\": {\n \"eggLibraries\": [],\n \"jarLibraries\": [],\n \"mavenLibraries\": [],\n \"pypiLibraries\": [],\n \"rcranLibraries\": []\n },\n \"docker\": {\n \"arguments\": [],\n \"baseDockerfile\": \"FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04:20220113.v1\\n\\nENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/tensorflow-2.4\\n\\n# Create conda environment\\nRUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \\\\\\n python=3.7 pip=20.2.4\\n\\n# Prepend path to AzureML conda environment\\nENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH\\n\\n# Install pip dependencies\\nRUN HOROVOD_WITH_TENSORFLOW=1 \\\\\\n pip install 'matplotlib>=3.3,<3.4' \\\\\\n 'psutil>=5.8,<5.9' \\\\\\n 'tqdm>=4.59,<4.60' \\\\\\n 'pandas>=1.1,<1.2' \\\\\\n 'scipy>=1.5,<1.6' \\\\\\n 'numpy>=1.10,<1.20' \\\\\\n 'ipykernel~=6.0' \\\\\\n 'azureml-core==1.37.0.post1' \\\\\\n 'azureml-defaults==1.37.0' \\\\\\n 'azureml-mlflow==1.37.0' \\\\\\n 'azureml-telemetry==1.37.0' \\\\\\n 'tensorboard==2.4.0' \\\\\\n 'tensorflow-gpu==2.4.1' \\\\\\n 'tensorflow-datasets==4.3.0' \\\\\\n 'onnxruntime-gpu>=1.7,<1.8' \\\\\\n 'horovod[tensorflow-gpu]==0.21.3'\\n\\n# This is needed for mpi to locate libpython\\nENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH\",\n \"baseImage\": null,\n \"baseImageRegistry\": {\n \"address\": null,\n \"password\": null,\n \"registryIdentity\": null,\n \"username\": null\n },\n \"enabled\": false,\n \"platform\": {\n \"architecture\": \"amd64\",\n \"os\": \"Linux\"\n },\n \"sharedVolumes\": true,\n \"shmSize\": null\n },\n \"environmentVariables\": {\n \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n },\n \"inferencingStackVersion\": null,\n \"name\": \"solar-training\",\n \"python\": {\n \"baseCondaEnvironment\": null,\n \"condaDependencies\": {\n \"channels\": [\n \"anaconda\",\n \"conda-forge\"\n ],\n \"dependencies\": [\n \"python=3.6.2\",\n {\n \"pip\": [\n \"azureml-defaults\",\n \"matplotlib\",\n \"tensorboard\"\n ]\n },\n \"rasterio\"\n ],\n \"name\": \"project_environment\"\n },\n \"condaDependenciesFile\": null,\n \"interpreterPath\": \"python\",\n \"userManagedDependencies\": true\n },\n \"r\": null,\n \"spark\": {\n \"packages\": [],\n \"precachePackages\": true,\n \"repositories\": []\n },\n \"version\": \"2\"\n}" 523 | }, 524 | "metadata": {} 525 | } 526 | ], 527 | "execution_count": 18, 528 | "metadata": { 529 | "jupyter": { 530 | "source_hidden": false, 531 | "outputs_hidden": false 532 | }, 533 | "nteract": { 534 | "transient": { 535 | "deleting": false 536 | } 537 | }, 538 | "gather": { 539 | "logged": 1643050589298 540 | } 541 | } 542 | }, 543 | { 544 | "cell_type": "code", 545 | "source": [ 546 | "iterator = iter(env.python.conda_dependencies.conda_packages)" 547 | ], 548 | "outputs": [], 549 | "execution_count": 13, 550 | "metadata": { 551 | "jupyter": { 552 | "source_hidden": false, 553 | "outputs_hidden": false 554 | }, 555 | "nteract": { 556 | "transient": { 557 | "deleting": false 558 | } 559 | }, 560 | "gather": { 561 | "logged": 1643049587175 562 | } 563 | } 564 | }, 565 | { 566 | "cell_type": "code", 567 | "source": [ 568 | "solar_env.name" 569 | ], 570 | "outputs": [ 571 | { 572 | "output_type": "execute_result", 573 | "execution_count": 20, 574 | "data": { 575 | "text/plain": "'solar-training'" 576 | }, 577 | "metadata": {} 578 | } 579 | ], 580 | "execution_count": 20, 581 | "metadata": { 582 | "jupyter": { 583 | "source_hidden": false, 584 | "outputs_hidden": false 585 | }, 586 | "nteract": { 587 | "transient": { 588 | "deleting": false 589 | } 590 | }, 591 | "gather": { 592 | "logged": 1643050736169 593 | } 594 | } 595 | } 596 | ], 597 | "metadata": { 598 | "kernelspec": { 599 | "name": "python38-azureml", 600 | "language": "python", 601 | "display_name": "Python 3.8 - AzureML" 602 | }, 603 | "language_info": { 604 | "name": "python", 605 | "version": "3.8.5", 606 | "mimetype": "text/x-python", 607 | "codemirror_mode": { 608 | "name": "ipython", 609 | "version": 3 610 | }, 611 | "pygments_lexer": "ipython3", 612 | "nbconvert_exporter": "python", 613 | "file_extension": ".py" 614 | }, 615 | "kernel_info": { 616 | "name": "python38-azureml" 617 | }, 618 | "nteract": { 619 | "version": "nteract-front-end@1.0.0" 620 | }, 621 | "microsoft": { 622 | "host": { 623 | "AzureML": { 624 | "notebookHasBeenCompleted": true 625 | } 626 | } 627 | } 628 | }, 629 | "nbformat": 4, 630 | "nbformat_minor": 2 631 | } -------------------------------------------------------------------------------- /demos/SampleUNETdata_GEE.ipynb: -------------------------------------------------------------------------------- 1 | {"cells":[{"cell_type":"markdown","metadata":{"id":"view-in-github"},"source":["\"Open"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"esIMGVxhDI0f"},"outputs":[],"source":["# @title Author: Michael Evans { display-mode: \"form\" }\n","# Licensed under the Apache License, Version 2.0 (the \"License\");\n","# you may not use this file except in compliance with the License.\n","# You may obtain a copy of the License at\n","#\n","# https://www.apache.org/licenses/LICENSE-2.0\n","#\n","# Unless required by applicable law or agreed to in writing, software\n","# distributed under the License is distributed on an \"AS IS\" BASIS,\n","# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n","# See the License for the specific language governing permissions and\n","# limitations under the License."]},{"cell_type":"markdown","metadata":{"id":"_SHAc5qbiR8l"},"source":["# Introduction\n","\n","This notebook demonstrates methods used to extract data to train a U-Net model capable of delineating ground-mounted solar arrays using free satellite imagery. This workflow generates and exports satellite imagery data from Google Earth Engine for analysis in Tensorflow. This analysis predicts the probability of the presence of a solar array as a function of the visible, infrared, and near infrared bands in Sentinel-2 imagery. The model is a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597). This relatively simple model is a mostly unmodified version of [this example](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb) from the TensorFlow docs. This notebook shows:\n","\n","1. Exporting training/testing patches from Earth Engine, suitable for training an FCNN model.\n","2. Preprocessing.\n","3. Training and validating an FCNN model.\n","4. Making predictions with the trained model and importing them to Earth Engine."]},{"cell_type":"markdown","metadata":{"id":"_MJ4kW1pEhwP"},"source":["# Setup software libraries\n","\n","Install needed libraries to the notebook VM. Authenticate as necessary."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"neIa46CpciXq"},"outputs":[],"source":["# Cloud authentication.\n","from google.colab import auth\n","auth.authenticate_user()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jat01FEoUMqg"},"outputs":[],"source":["# Import, authenticate and initialize the Earth Engine library.\n","import ee\n","ee.Authenticate()\n","ee.Initialize()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"n1hFdpBQfyhN"},"outputs":[],"source":["# We use folium to visualize GEE imagery. TODO: Update to use new gee packages\n","import folium\n","print(folium.__version__)\n","\n","# Define a method for displaying Earth Engine image tiles to a folium map.\n","def add_ee_layer(self, ee_image_object, vis_params, name):\n"," map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n"," folium.raster_layers.TileLayer(\n"," tiles = map_id_dict['tile_fetcher'].url_format,\n"," attr = \"Map Data © Google Earth Engine\",\n"," name = name,\n"," overlay = True,\n"," control = True\n"," ).add_to(self)\n","\n","# Add EE drawing method to folium.\n","folium.Map.add_ee_layer = add_ee_layer\n","\n","# Define the URL format used for Earth Engine generated map tiles.\n","#EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'"]},{"cell_type":"markdown","metadata":{"id":"WjUgYcsAs9Ed"},"source":["##Mount Google Drive"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JKDKpX4FtQA1"},"outputs":[],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"yMNfRopTcnYu"},"outputs":[],"source":["# clone repository with modules for computer vision analyses\n","!git clone https://github.com/mjevans26/Satellite_ComputerVision.git"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"c12hxNU2S89-"},"outputs":[],"source":["# add the Google Drive repo directory to path so we can use our modules\n","import sys\n","sys.path.append('/content/Satellite_ComputerVision/utils')\n","from clouds import basicQA"]},{"cell_type":"markdown","metadata":{"id":"iT8ycmzClYwf"},"source":["# Variables\n","\n","Declare the variables that will be in use throughout the notebook."]},{"cell_type":"markdown","metadata":{"id":"qKs6HuxOzjMl"},"source":["Specify a cloud storage bucket to which you have read/write access"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"obDDH1eDzsch"},"outputs":[],"source":["from os.path import join\n","BUCKET = 'cvod-203614-mlengine'\n","BUCKET_PATH = join('gs://', BUCKET)"]},{"cell_type":"markdown","metadata":{"id":"wmfKLl9XcnGJ"},"source":["## Set other global variables"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"psz7wJKalaoj"},"outputs":[],"source":["# Specify names locations for outputs in Cloud Storage. \n","FOLDER = 'CPK_solar'\n","PRED_BASE = 'data/predict'\n","TRAIN_BASE = 'data/training'\n","EVAL_BASE = 'data/eval'\n","MODEL_BASE = 'models/UNET256'\n","log_dir = 'drive/My Drive/Tensorflow/models/UNET256'\n","\n","# Specify inputs (Sentinel bands) to the model and the response variable.\n","opticalBands = ['B2', 'B3', 'B4']\n","thermalBands = ['B8', 'B11', 'B12']\n","\n","# # We may want to run some experiments where we use pca components\n","# pcaBands = ['pc1', 'pc2', 'pc3']\n","\n","BANDS = opticalBands + thermalBands# + pcaBands\n","RESPONSE = 'landcover'\n","FEATURES = BANDS + [RESPONSE]\n","SCENEID = 'SENSING_ORBIT_NUMBER'\n","\n","# Specify the size and shape of patches expected by the model.\n","KERNEL_SIZE = 256\n","KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n"]},{"cell_type":"markdown","metadata":{"id":"hgoDc7Hilfc4"},"source":["# Imagery\n","\n","Process the imagery to use for predictor variables. This is a three-month, cloud-free, Sentinel-2 composite corresponding to the latest date from which we have confirmed training data. Display it in the notebook for a sanity check."]},{"cell_type":"markdown","metadata":{"id":"MjNmEImcGuMb"},"source":["## Create sample image"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-IlgXu-vcUEY"},"outputs":[],"source":["# Use Sentinel-2 surface reflectance data.\n","S2 = ee.ImageCollection(\"COPERNICUS/S2\")\n","# Grab a feature corresponding to our study area - North Carolina\n","states = ee.FeatureCollection(\"TIGER/2016/States\")\n","nc = states.filter(ee.Filter.eq('NAME', 'Delaware')).geometry().buffer(2500)\n","begin = '2019-01-01'\n","end = '2020-03-01'\n","\n","# The image input collection is cloud-masked.\n","filtered = S2.filterDate(begin, end)\\\n",".filterBounds(nc)\\\n",".filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\n","\n","\n","# Create a simple median composite per season to visualize\n","winter = filtered.filterDate('2019-12-01', '2020-02-28').map(basicQA).median().select(BANDS).clip(nc)\n","spring = filtered.filterDate('2019-03-01', '2019-05-31').map(basicQA).median().select(BANDS).clip(nc)\n","summer = filtered.filterDate('2019-06-01', '2019-08-31').map(basicQA).median().select(BANDS).clip(nc)\n","fall = filtered.filterDate('2019-09-01', '2019-11-30').map(basicQA).median().select(BANDS).clip(nc)\n","\n","# Use folium to visualize the imagery.\n","#mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n","rgbParams = {'bands': ['B4', 'B3', 'B2'],\n"," 'min': 250,\n"," 'max': 3000}\n","\n","nirParams = {'bands': ['B8', 'B11', 'B12'],\n"," 'min': 250,\n"," 'max': 3000}\n","\n","map = folium.Map(location=[38.9725, -75.5185])\n","map.add_ee_layer(spring, rgbParams, 'Color')\n","map.add_ee_layer(spring, nirParams, 'Thermal')\n","\n","map.add_child(folium.LayerControl())\n","map"]},{"cell_type":"markdown","metadata":{"id":"gHznnctkJsZJ"},"source":["Prepare the response variable. This is the footprints of ground mounted solar arrays as of 2016, coded into a background class [0] and a target class [1]. Display on the map to verify."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5Wxz9BPYHBwh"},"outputs":[],"source":["def set_landcover(ft):\n"," \"\"\"\n"," Add a property to a feature and set it to 1\n"," Parameters\n"," ---\n"," ft:ee.Feature\n"," feature to have property added\n"," Returns\n"," ---\n"," ee.Feature: input feature with new 'label' property set to 1\n"," \"\"\"\n"," return ft.set('landcover', 1)\n","\n","# Get solar footprints data from our GEE Asset\n","DE_solar_footprints = ee.FeatureCollection(\"projects/mevans-cic-solar/assets/de_footprints\")\n","# Label each polygon with property 'label' equal to 1\n","DE_solar_footprints = DE_solar_footprints.map(set_landcover)\n","# Create an image with all pixels equal to 0\n","blankimg = ee.Image.constant(0)\n","# Convert solar footprints to an image (band value will be 1 based on 'label')\n","solar_footprint = DE_solar_footprints.reduceToImage(['landcover'], ee.Reducer.first())\n","# Convert pixels of blank image to 1 where the values of the footprint image are 1\n","# and rename to 'landcover'\n","labelimg = blankimg.where(solar_footprint, solar_footprint).rename('landcover')\n","\n","solarParams = {'bands': 'landcover', 'min':0, 'max': 1}\n","\n","map = folium.Map(location = [38.9725, -75.5185])\n","map.add_ee_layer(labelimg, solarParams, 'Solar footprint')\n","map.add_child(folium.LayerControl())\n","map"]},{"cell_type":"markdown","metadata":{"id":"F4djSxBRG2el"},"source":["Use some pre-made geometries to sample the stack in strategic locations. We constrain sampling to occur within 10km of mapped solar arrays. Because our target features are small and sparse, relative to the landscape, we also guide sampling based on their centroids to ensure that we get training data for solar arrays."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ure_WaD0itQY"},"outputs":[],"source":["def buff(ft):\n"," return ft.buffer(10000)\n","\n","def centroid(ft):\n"," return ft.centroid()\n","\n","centroids = DE_solar_footprints.map(centroid)\n","studyArea = DE_solar_footprints.map(buff).union()\n","studyImage = ee.Image(0).byte().paint(studyArea, 1)\n","studyImage = studyImage.updateMask(studyImage)\n","centroids = centroids.randomColumn('random')\n","\n","aoiParams = {'min':0, 'max': 1, 'palette': ['red']}\n","map = folium.Map(location=[38.9725, -75.5185], zoom_start=8)\n","map.add_ee_layer(studyImage, aoiParams, 'Sampling area')\n","map.add_child(folium.LayerControl())\n","map"]},{"cell_type":"markdown","metadata":{"id":"ZV890gPHeZqz"},"source":["# Sampling\n","\n","The mapped data look reasonable so take a sample from each polygon and merge the results into a single export. The key step is sampling the array image at points, to get all the pixels in a 256x256 neighborhood at each point. It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record. You do NOT need to export each training/testing patch to a different image. Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error. Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export."]},{"cell_type":"markdown","metadata":{"id":"CTS7_ZzPDhhg"},"source":["Stack the normalized sentinel composite and binary solar indicator image to create a single image from which samples can be taken. Convert the image into an array image in which each pixel stores 256x256 patches of pixels for each band. This is a key step that bears emphasis: to export training patches, convert a multi-band image to [an array image](https://developers.google.com/earth-engine/arrays_array_images#array-images) using [`neighborhoodToArray()`](https://developers.google.com/earth-engine/api_docs#eeimageneighborhoodtoarray), then sample the image at points."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"eGHYsdAOipa4"},"outputs":[],"source":["featureStack = ee.Image.cat([\n"," fall.select(BANDS),\n"," labelimg.select(RESPONSE)\n","])\n","\n","ls = ee.List.repeat(1, KERNEL_SIZE)\n","lists = ee.List.repeat(ls, KERNEL_SIZE)\n","kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)\n","\n","arrays = featureStack.neighborhoodToArray(kernel)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1T1cc6haU_oS"},"outputs":[],"source":["join(BUCKET_PATH, FOLDER, TRAIN_BASE, 'calibrated/')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"2CqL0Y6iLQPP"},"outputs":[],"source":["!gsutil mv {join(BUCKET_PATH, FOLDER, TRAINING_BASE, '*')} {join(BUCKET_PATH, FOLDER, TRAINING_BASE, 'calibrated/')}"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"VXes-Ot17RGI"},"outputs":[],"source":["!gsutil ls gs://cvod-203614-mlengine/NC_solar/data/predict"]},{"cell_type":"markdown","metadata":{"id":"aJ4nGSvdYop6"},"source":["First we'll collect image patches from the centroids of known solar array locations"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"F1W2sVmmsv15"},"outputs":[],"source":["# Add a random column to the centroids\n","S = centroids.size().getInfo()\n","centroidList = centroids.toList(S)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"FyRpvwENxE-A"},"outputs":[],"source":["#@title Centroids slicing\n","# Get samples from delineated features using slice() on a feature collection\n","# THIS TAKES DAYS TO RUN...probably not the optimal\n","\n","x = 250\n","\n","while x < 700:\n"," region = ee.FeatureCollection(centroidList.slice(x, x+50)).geometry()\n"," sample = arrays.sampleRegions(\n"," collection = region,\n"," scale = 10,\n"," tileScale = 12\n"," )\n"," x += 50\n"," \n"," # assign a random number to samples and create a 70/30 train/test split\n"," sample = sample.randomColumn('random')\n"," training = sample.filter(ee.Filter.gte('random', 0.3))\n"," testing = sample.filter(ee.Filter.lt('random', 0.3))\n","\n"," desc = 'UNET_' + str(KERNEL_SIZE) + '_trainCentfall' + str(x)\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = training,\n"," description = desc, \n"," bucket = BUCKET, \n"," fileNamePrefix = join(FOLDER, TRAIN_BASE, desc),\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()\n","\n"," desc = 'UNET_' + str(KERNEL_SIZE) + '_evalCentfall' + str(x)\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = testing,\n"," description = desc, \n"," bucket = BUCKET, \n"," fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"YoJMncFKYwq2"},"outputs":[],"source":["#@title Centroids random sampling\n","\n","# Define sample sizes for shards and chunks. \n","# These numbers determined experimentally.\n","n = 100 # Number of shards in each chunk.\n","N = 200 # Total sample size in each chunk.\n","C = 5 # Number of chunks\n","\n","iterator = iter(range(N*C))\n","\n","# for each 'chunk' - which defines 2 export tasks per chunk: 1 train, 1 eval\n","for c in range(C):\n"," geomSample = ee.FeatureCollection([])\n","\n"," # for each 'shard' - which defines a batch of samples of size N/n\n"," for i in range(n):\n"," # generate a different seed for this iteration\n"," seed = next(iterator)\n"," sample = arrays.sample(\n"," region = NC_solar_footprints,\n"," scale = 10,\n"," numPixels = N/n,\n"," seed = seed,\n"," tileScale = 8\n"," )\n"," geomSample = geomSample.merge(sample)\n","\n"," #divide samples into training and evaluation data\n"," geomSample = geomSample.randomColumn('random')\n"," training = geomSample.filter(ee.Filter.gte('random', 0.3))\n"," testing = geomSample.filter(ee.Filter.lt('random', 0.3))\n","\n"," desc = 'UNET_' + str(KERNEL_SIZE) + '_footprintTrain'+str(c)\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = training,\n"," description = desc, \n"," bucket = BUCKET, \n"," fileNamePrefix = join(FOLDER, TRAINING_BASE, desc),\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()\n","\n"," desc = 'UNET_' + str(KERNEL_SIZE) + '_footprintEval' + str(c)\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = testing,\n"," description = desc, \n"," bucket = BUCKET, \n"," fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start() "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QuRyLGmOYmrR"},"outputs":[],"source":["#@title Random sampling\n","\n","# Define sample sizes for shards and chunks. \n","# These numbers determined experimentally.\n","n = 100 # Number of shards in each chunk.\n","N = 1000 # Total sample size in each chunk.\n","C = 2# Number of chunks\n","\n","iterator = iter(range(N*C))\n","\n","for c in range(C):\n"," geomSample = ee.FeatureCollection([])\n","\n"," for i in range(n):\n"," seed = next(iterator)\n"," sample = arrays.sample(\n"," region = studyArea,\n"," scale = 10,\n"," numPixels = N/n,\n"," seed = seed,\n"," tileScale = 8\n"," )\n"," geomSample = geomSample.merge(sample)\n","\n"," #divide samples into training and evaluation data\n"," geomSample = geomSample.randomColumn('random')\n"," training = geomSample.filter(ee.Filter.gte('random', 0.3))\n"," testing = geomSample.filter(ee.Filter.lt('random', 0.3))\n","\n"," desc = 'UNET_' + str(KERNEL_SIZE) + '_trainfall'+str(c)\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = training,\n"," description = desc, \n"," bucket = BUCKET, \n"," fileNamePrefix = join(FOLDER, TRAIN_BASE, desc),\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start()\n","\n"," desc = 'UNET_' + str(KERNEL_SIZE) + '_evalfall' + str(c)\n"," task = ee.batch.Export.table.toCloudStorage(\n"," collection = testing,\n"," description = desc, \n"," bucket = BUCKET, \n"," fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n"," fileFormat = 'TFRecord',\n"," selectors = BANDS + [RESPONSE]\n"," )\n"," task.start() "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Nj1sFkUyYgnj"},"outputs":[],"source":[]},{"cell_type":"markdown","metadata":{"id":"dk51-l7MH2Sa"},"source":["# Model data"]},{"cell_type":"code","source":["# Tensorflow setup.\n","import tensorflow as tf\n","device_name = tf.test.gpu_device_name()\n","tf.executing_eagerly()\n","print(tf.__version__)\n","print(device_name)\n","%load_ext tensorboard"],"metadata":{"id":"DsCx4Q04f5bA"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["COLUMNS = [\n"," tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n","]\n","FEATURES_DICT = dict(zip(FEATURES, COLUMNS))\n","\n","# Sizes of the training and evaluation datasets.\n","TRAIN_SIZE = 7700\n","EVAL_SIZE = 3300\n","\n","# Specify model training parameters.\n","BATCH_SIZE = 16\n","EPOCHS = 20\n","BUFFER_SIZE = 11000\n","OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.0009, beta_1=0.9, beta_2=0.999)\n","LOSS = 'binary_crossentropy'\n","METRICS = [tf.keras.metrics.categorical_accuracy, tf.keras.metrics.MeanIoU(num_classes=2)]"],"metadata":{"id":"-4pEYPG4gBuf"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rWXrvBE4607G"},"source":["# Training data\n","\n","Load the data exported from Earth Engine into a `tf.data.Dataset`. The following are helper functions for that."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ajyp48-vINuy"},"outputs":[],"source":["from utils import get_training_dataset, get_eval_dataset"]},{"cell_type":"markdown","metadata":{"id":"Xg1fa18336D2"},"source":["Use the helpers to read in the training dataset. Print the first record to check."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"bk9rFou0J_dZ"},"outputs":[],"source":["# make sure we have training records\n","ncPattern = join(BUCKET_PATH, 'NC_solar/data/training/UNET_256_*.tfrecord.gz')\n","ncFiles = tf.io.gfile.glob(ncPattern)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JzpG3kUwZ9J5"},"outputs":[],"source":["training = get_training_dataset(ncFiles)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cIueW4_Fs0ID"},"outputs":[],"source":["#check to make sure our records look like we expect\n","print(iter(training.take(1)).next())"]},{"cell_type":"markdown","metadata":{"id":"j-cQO5RL6vob"},"source":["# Evaluation data\n","\n","Now do the same thing to get an evaluation dataset. Note that unlike the training dataset, the evaluation dataset has a batch size of 1, is not repeated and is not shuffled."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"fkU1JcYlK1s3"},"outputs":[],"source":["# make sure we have eval data\n","# make sure we have training records\n","ncPattern = join(BUCKET_PATH, 'NC_solar/data/eval/UNET_256_neg*.tfrecord.gz')\n","print(ncPattern)\n","ncFiles = tf.io.gfile.glob(ncPattern)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"48aFseSgY-Mp"},"outputs":[],"source":["ncFiles"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"NpcsljQeKzq7"},"outputs":[],"source":["evaluation = get_eval_dataset(ncFiles)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"TDXcbm8e_WyC"},"outputs":[],"source":["print(iter(evaluation.take(1)).next())"]},{"cell_type":"markdown","metadata":{"id":"9JIE7Yl87lgU"},"source":["# Model\n","\n","Here we use the Keras implementation of the U-Net model as found [in the TensorFlow examples](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb). The U-Net model takes 256x256 pixel patches as input and outputs per-pixel class probability, label or a continuous output. We can implement the model essentially unmodified, but will use mean squared error loss on the sigmoidal output since we are treating this as a regression problem, rather than a classification problem. Since impervious surface fraction is constrained to [0,1], with many values close to zero or one, a saturating activation function is suitable here."]},{"cell_type":"markdown","metadata":{"id":"Xh2EZyyPu84H"},"source":["##Metrics"]},{"cell_type":"markdown","metadata":{"id":"HK6BKW_xMNqL"},"source":["We define a weighted binary cross entropy loss function because the training data is potentially sparse. This also gives us greater control over the rates of omission and commission prediciton errors. Because this is an image segmentation exercise, we may also be interested in the intersection over union as a loss measure."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"wsnnnz56yS3l"},"outputs":[],"source":["from tensorflow.python.keras import layers\n","from tensorflow.python.keras import losses\n","from tensorflow.python.keras import models\n","from tensorflow.python.keras import metrics\n","from tensorflow.python.keras import optimizers\n","\n","def weighted_bce(y_true, y_pred):\n"," \"\"\"\n"," Compute the weighted binary cross entropy between predictions and observations\n"," Parameters:\n"," y_true (): 2D tensor of labels\n"," y_pred (): 2D tensor of probabilities\n"," \n"," Returns:\n"," 2D tensor\n"," \"\"\"\n"," bce = tf.nn.weighted_cross_entropy_with_logits(labels = y_true, logits = y_pred, pos_weight = 1)\n"," return tf.reduce_mean(bce)\n","\n","def dice_coef(y_true, y_pred, smooth=1, weight=0.5):\n"," \"\"\"\n"," https://github.com/daifeng2016/End-to-end-CD-for-VHR-satellite-image\n"," \"\"\"\n"," # y_true = y_true[:, :, :, -1] # y_true[:, :, :, :-1]=y_true[:, :, :, -1] if dim(3)=1 等效于[8,256,256,1]==>[8,256,256]\n"," # y_pred = y_pred[:, :, :, -1]\n"," intersection = K.sum(y_true * y_pred)\n"," union = K.sum(y_true) + weight * K.sum(y_pred)\n"," # K.mean((2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth))\n"," return ((2. * intersection + smooth) / (union + smooth)) # not working better using mean\n","\n","def dice_coef_loss(y_true, y_pred):\n"," \"\"\"\n"," https://github.com/daifeng2016/End-to-end-CD-for-VHR-satellite-image\n"," \"\"\"\n"," return 1 - dice_coef(y_true, y_pred)\n","\n","def iou_loss(true, pred):\n"," \"\"\"\n"," Calcaulate the intersection over union metric\n"," \"\"\"\n"," intersection = true * pred\n","\n"," notTrue = 1 - true\n"," union = true + (notTrue * pred)\n","\n"," return tf.subtract(1.0, tf.reduce_sum(intersection)/tf.reduce_sum(union))\n","\n","def conv_block(input_tensor, num_filters):\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\treturn encoder\n","\n","def encoder_block(input_tensor, num_filters):\n","\tencoder = conv_block(input_tensor, num_filters)\n","\tencoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)\n","\treturn encoder_pool, encoder\n","\n","def decoder_block(input_tensor, concat_tensor, num_filters):\n","\tdecoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)\n","\tdecoder = layers.concatenate([concat_tensor, decoder], axis=-1)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\treturn decoder\n","\n","def get_model():\n","\tinputs = layers.Input(shape=[None, None, len(BANDS)])\n","\tencoder0_pool, encoder0 = encoder_block(inputs, 32)\n","\tencoder1_pool, encoder1 = encoder_block(encoder0_pool, 64)\n","\tencoder2_pool, encoder2 = encoder_block(encoder1_pool, 128)\n","\tencoder3_pool, encoder3 = encoder_block(encoder2_pool, 256)\n","\tencoder4_pool, encoder4 = encoder_block(encoder3_pool, 512)\n","\tcenter = conv_block(encoder4_pool, 1024)# center\n","\tdecoder4 = decoder_block(center, encoder4, 512)\n","\tdecoder3 = decoder_block(decoder4, encoder3, 256)\n","\tdecoder2 = decoder_block(decoder3, encoder2, 128)\n","\tdecoder1 = decoder_block(decoder2, encoder1, 64)\n","\tdecoder0 = decoder_block(decoder1, encoder0, 32)\n","\toutputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)\n","\n","\tmodel = models.Model(inputs=[inputs], outputs=[outputs])\n","\n","\tmodel.compile(\n","\t\toptimizer=OPTIMIZER, \n"," loss = weighted_bce,\n","\t\t#loss=losses.get(LOSS),\n","\t\tmetrics=[metrics.get(metric) for metric in METRICS])\n","\n","\treturn model\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PA2gJENE8-J1"},"outputs":[],"source":["# set up tensorboard and checkpoint callbacks\n","log_dir = 'drive/MyDrive/Tensorflow/NC_solar/models/UNET256/Uncalibrated/Seasonal'\n","\n","tensorboard = tf.keras.callbacks.TensorBoard(log_dir= log_dir)\n","\n","checkpoint = tf.keras.callbacks.ModelCheckpoint(\n"," join(log_dir, 'best_weights.hdf5'),\n"," monitor='val_mean_io_u',\n"," verbose=1,\n"," save_best_only=True,\n"," mode='max'\n"," )"]},{"cell_type":"markdown","metadata":{"id":"uu_E7OTDBCoS"},"source":["# Training the model\n","\n","You train a Keras model by calling `.fit()` on it. Here we're going to train for 10 epochs, which is suitable for demonstration purposes. For production use, you probably want to optimize this parameter, for example through [hyperparamter tuning](https://cloud.google.com/ml-engine/docs/tensorflow/using-hyperparameter-tuning)."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5yQPxgtISibx"},"outputs":[],"source":["m = get_model()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"NzzaWxOhSxBy"},"outputs":[],"source":["m.fit(\n"," x=training, \n"," epochs=EPOCHS, \n"," steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n"," validation_data=evaluation,\n"," validation_steps=int(EVAL_SIZE/BATCH_SIZE),\n"," callbacks = [checkpoint, tensorboard]\n"," )\n","\n","#We save the model definition and weights to google drive (free) \n","m.save(join(log_dir, 'UNET256.h5'))"]},{"cell_type":"markdown","metadata":{"id":"zvIqqpNXqJSE"},"source":["##Train from checkpoints\n","If we want to resume or continue training from a previous checkpoint we load the model and best weights from GDrive, check the current accuracy on the evaluation data, and resume training."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"q0xgBhsaqInV"},"outputs":[],"source":["#bring in the architecture and best weights from Drive\n","m = models.load_model(join(log_dir, 'UNET256.h5'), custom_objects={'weighted_bce': weighted_bce})\n","# m.load_weights(join(log_dir, 'best_weights.hdf5'))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"umZy0rBzs1Th"},"outputs":[],"source":["#lets see where were at\n","evalMetrics = m.evaluate(x=evaluation, verbose = 1)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xlsFciElxOUA"},"outputs":[],"source":["#set the monitored value (val_mean_io_u) to current evaluation output\n","checkpoint = tf.keras.callbacks.ModelCheckpoint(\n"," join(log_dir, 'best_weights.hdf5'),\n"," monitor='val_mean_io_u',\n"," verbose=1,\n"," save_best_only=True,\n"," mode='max'\n"," )\n","\n","checkpoint.best = evalMetrics[2]\n","print(checkpoint.__dict__)\n","print(checkpoint.best)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Ty8wCxDtqWBM"},"outputs":[],"source":["#Now keep training!\n","m.fit(\n"," x=training, \n"," epochs= 10, \n"," steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n"," validation_data=evaluation,\n"," validation_steps=EVAL_SIZE/BATCH_SIZE,\n"," callbacks = [checkpoint, tensorboard]\n"," )"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"tyhWcGHJ82e8"},"outputs":[],"source":["m.save(join(log_dir, 'UNET256.h5'))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"i9OM5BiS1xYQ"},"outputs":[],"source":["%tensorboard --logdir 'drive/My Drive/Tensorflow/models/UNET256'"]},{"cell_type":"markdown","metadata":{"id":"J1ySNup0xCqN"},"source":["# Prediction\n","\n","The prediction pipeline is:\n","\n","1. Export imagery on which to do predictions from Earth Engine in TFRecord format to a Cloud Storge bucket.\n","2. Use the trained model to make the predictions.\n","3. Write the predictions to a TFRecord file in a Cloud Storage.\n","4. Upload the predictions TFRecord file to Earth Engine.\n","\n","The following functions handle this process. It's useful to separate the export from the predictions so that you can experiment with different models without running the export every time."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lv6nb0ShH4_T"},"outputs":[],"source":["#Inspect the prediction outputs\n","predictions = m.predict(evaluation, steps=1, verbose=1)\n","for prediction in predictions:\n"," print(predictions)"]},{"cell_type":"markdown","metadata":{"id":"_FAgadEJcZoz"},"source":["### Functions"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"M3WDAa-RUpXP"},"outputs":[],"source":["def doExport(image, path, out_image_base, kernel_buffer, region):\n"," \"\"\"\n"," Run an image export task on which to run predictions. Block until complete.\n"," Parameters:\n"," image (ee.Image): image to be exported for prediction\n"," path (str): google cloud directory path for export\n"," out_image_base (str): base filename of exported image\n"," kernel_buffer (array): pixels to buffer the prediction patch. half added to each side\n"," region (ee.Geometry):\n"," \"\"\"\n"," task = ee.batch.Export.image.toCloudStorage(\n"," image = image.select(BANDS), \n"," description = out_image_base, \n"," bucket = BUCKET, \n"," fileNamePrefix = join(path, out_image_base),\n"," region = region,#.getInfo()['coordinates'], \n"," scale = 10, \n"," fileFormat = 'TFRecord', \n"," maxPixels = 1e13,\n"," formatOptions = { \n"," 'patchDimensions': KERNEL_SHAPE,\n"," 'kernelSize': kernel_buffer,\n"," 'compressed': True,\n"," 'maxFileSize': 104857600\n"," }\n"," )\n"," task.start()\n","\n"," # Block until the task completes.\n"," print('Running image export to Cloud Storage...')\n"," import time\n"," while task.active():\n"," time.sleep(30)\n","\n"," # Error condition\n"," if task.status()['state'] != 'COMPLETED':\n"," print('Error with image export.')\n"," else:\n"," print('Image export completed.')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zb_9_FflygVw"},"outputs":[],"source":["def doPrediction(pred_path, pred_image_base, user_folder, out_image_base, kernel_buffer, region):\n"," \"\"\"\n"," Perform inference on exported imagery, upload to Earth Engine.\n"," Parameters:\n"," pred_path (str): Google cloud (or Drive) path storing prediction image files\n"," pred_image_base (str):\n"," user_folder (str): GEE directory to store asset\n"," out_image_base (str): base filename for GEE asset\n"," kernel_buffer (Array): length 2 array \n"," region (ee.Geometry)):\n"," \"\"\"\n","\n"," print('Looking for TFRecord files...')\n"," \n"," # Get a list of all the files in the output bucket.\n"," filesList = !gsutil ls {join(BUCKET_PATH, pred_path)}\n"," # Get only the files generated by the image export.\n"," exportFilesList = [s for s in filesList if pred_image_base in s]\n","\n"," # Get the list of image files and the JSON mixer file.\n"," imageFilesList = []\n"," jsonFile = None\n"," for f in exportFilesList:\n"," if f.endswith('.tfrecord.gz'):\n"," imageFilesList.append(f)\n"," elif f.endswith('.json'):\n"," jsonFile = f\n","\n"," # Make sure the files are in the right order.\n"," imageFilesList.sort()\n","\n"," from pprint import pprint\n"," pprint(imageFilesList)\n"," print(jsonFile)\n"," \n"," import json\n"," # Load the contents of the mixer file to a JSON object.\n"," jsonText = !gsutil cat {jsonFile}\n"," # Get a single string w/ newlines from the IPython.utils.text.SList\n"," mixer = json.loads(jsonText.nlstr)\n"," pprint(mixer)\n"," patches = mixer['totalPatches']\n"," \n"," # Get set up for prediction.\n"," x_buffer = int(kernel_buffer[0] / 2)\n"," y_buffer = int(kernel_buffer[1] / 2)\n","\n"," buffered_shape = [\n"," KERNEL_SHAPE[0] + kernel_buffer[0],\n"," KERNEL_SHAPE[1] + kernel_buffer[1]]\n","\n"," imageColumns = [\n"," tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) \n"," for k in BANDS\n"," ]\n","\n"," imageFeaturesDict = dict(zip(BANDS, imageColumns))\n","\n"," def parse_image(example_proto):\n"," return tf.io.parse_single_example(example_proto, imageFeaturesDict)\n","\n"," def toTupleImage(dic):\n"," inputsList = [dic.get(key) for key in BANDS]\n"," stacked = tf.stack(inputsList, axis=0)\n"," stacked = tf.transpose(stacked, [1, 2, 0])\n"," stacked = normalize(stacked, [0, 1])\n"," return stacked\n"," \n"," # Create a dataset(s) from the TFRecord file(s) in Cloud Storage.\n"," i = 0\n"," patches = 0\n"," written_files = []\n"," while i < len(imageFilesList):\n","\n"," imageDataset = tf.data.TFRecordDataset(imageFilesList[i:i+100], compression_type='GZIP')\n"," imageDataset = imageDataset.map(parse_image, num_parallel_calls=5)\n"," imageDataset = imageDataset.map(toTupleImage).batch(1)\n"," \n"," # Perform inference.\n"," print('Running predictions...')\n"," predictions = m.predict(imageDataset, steps=None, verbose=1)\n"," # print(predictions[0])\n","\n"," out_image_file = join(BUCKET_PATH,\n"," pred_path,\n"," 'outputs',\n"," '{}{}.TFRecord'.format(out_image_base, i))\n"," \n"," print('Writing predictions to ' + out_image_file + '...')\n"," writer = tf.io.TFRecordWriter(out_image_file)\n"," for predictionPatch in predictions:\n"," print('Writing patch ' + str(patches) + '...')\n"," predictionPatch = predictionPatch[\n"," x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]\n","\n"," # Create an example.\n"," example = tf.train.Example(\n"," features=tf.train.Features(\n"," feature={\n"," 'probability': tf.train.Feature(\n"," float_list=tf.train.FloatList(\n"," value=predictionPatch.flatten()))\n"," }\n"," )\n"," )\n"," # Write the example.\n"," writer.write(example.SerializeToString())\n"," patches += 1\n","\n"," writer.close()\n"," i += 100\n"," written_files.append(out_image_file)\n"," \n"," out_image_files = ' '.join(written_files)\n"," # Start the upload.\n"," out_image_asset = join(user_folder, out_image_base)\n"," !earthengine upload image --asset_id={out_image_asset} {out_image_files} {jsonFile}"]},{"cell_type":"markdown","metadata":{"id":"LZqlymOehnQO"},"source":["Now there's all the code needed to run the prediction pipeline, all that remains is to specify the output region in which to do the prediction, the names of the output files, where to put them, and the shape of the outputs. In terms of the shape, the model is trained on 256x256 patches, but can work (in theory) on any patch that's big enough with even dimensions ([reference](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf)). Because of tile boundary artifacts, give the model slightly larger patches for prediction, then clip out the middle 256x256 patch. This is controlled with a kernel buffer, half the size of which will extend beyond the kernel buffer. For example, specifying a 128x128 kernel will append 64 pixels on each side of the patch, to ensure that the pixels in the output are taken from inputs completely covered by the kernel. "]},{"cell_type":"markdown","metadata":{"id":"G9UaJxPS3uZw"},"source":["### Test images"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"BqDRwb6j27w-"},"outputs":[],"source":["# create several small aois to test predictions\n","test_aoi_1 = ee.Geometry.Polygon(\n"," [[[-78.19610376358034, 35.086989862385884],\n"," [-78.19610376358034, 34.735631502732396],\n"," [-77.67974634170534, 34.735631502732396],\n"," [-77.67974634170534, 35.086989862385884]]], None, False)\n","test_aoi_2 = ee.Geometry.Polygon(\n"," [[[-81.59087915420534, 35.84308746418702],\n"," [-81.59087915420534, 35.47711130797561],\n"," [-81.03057641983034, 35.47711130797561],\n"," [-81.03057641983034, 35.84308746418702]]], None, False)\n","test_aoi_3 = ee.Geometry.Polygon(\n"," [[[-78.74447677513596, 36.4941960586897],\n"," [-78.74447677513596, 36.17115435938789],\n"," [-78.21713302513596, 36.17115435938789],\n"," [-78.21713302513596, 36.4941960586897]]], None, False)\n","test_aoi_4 = ee.Geometry.Polygon(\n"," [[[-76.62411544701096, 36.33505523381603],\n"," [-76.62411544701096, 36.03800955668766],\n"," [-76.16818282982346, 36.03800955668766],\n"," [-76.16818282982346, 36.33505523381603]]], None, False)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CQyLfPdt3TcA"},"outputs":[],"source":["# Create a prediciton image for the whole state\n","S2 = ee.ImageCollection(\"COPERNICUS/S2\")\n","# Grab a feature corresponding to our study area - North Carolina\n","states = ee.FeatureCollection(\"TIGER/2016/States\")\n","nc = states.filter(ee.Filter.eq('NAME', 'North Carolina'))\n","begin = '2018-05-01'\n","end = '2018-08-30'\n","\n","# The image input collection is cloud-masked.\n","filtered = S2.filterDate(begin, end)\\\n",".filterBounds(nc)\\\n",".filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\\\n",".map(basicQA)\n","\n","# Create a simple median composite to visualize\n","test = filtered.median().select(BANDS).clip(test_aoi_4)\n","\n","# Use folium to visualize the imagery.\n","#mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n","rgbParams = {'bands': ['B4', 'B3', 'B2'],\n"," 'min': 0,\n"," 'max': 0.3}\n","\n","nirParams = {'bands': ['B8', 'B11', 'B12'],\n"," 'min': 0,\n"," 'max': 0.3}\n","\n","map = folium.Map(location=[35.402, -78.376])\n","map.add_ee_layer(test, rgbParams, 'Color')\n","map.add_ee_layer(test, nirParams, 'Thermal')\n","\n","map.add_child(folium.LayerControl())\n","map"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1YMkpeS7cjec"},"outputs":[],"source":["# break up large images into smaller pieces\n","NC_coords = ee.Array(nc.bounds().coordinates())\n","mins = NC_coords.reduce(\n"," reducer= ee.Reducer.min(),\n"," axes= [1]\n",").project([2])\n","\n","maxs = NC_coords.reduce(\n"," reducer= ee.Reducer.max(),\n"," axes= [1]\n",").project([2])\n","\n","xs = ee.List.sequence(\n"," start= mins.get([0]),\n"," end= maxs.get([0]),\n"," count= 6)\n"," \n","ys = ee.List.sequence(\n"," start= mins.get([1]),\n"," end= maxs.get([1]),\n"," count= 4)\n","\n","ls = ee.List([])\n","xsize = xs.size().getInfo() - 1\n","ysize = ys.size().getInfo() - 1\n","\n","for x in range(xsize):\n"," xmin = xs.get(x)\n"," xmax = xs.get(x+1)\n"," for y in range(ysize):\n"," ymin = ys.get(y)\n"," ymax = ys.get(y+1)\n"," box = ee.Algorithms.GeometryConstructors.Rectangle([xmin, ymin, xmax, ymax])\n"," ft = ee.Feature(box, {'id': '{}.{}'.format(x,y)})\n"," ls = ls.add(ft)\n"," y += 1\n"," x += 1\n","\n","\n","boxes = ee.FeatureCollection(ls.flatten())"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"FPANwc7B1-TS"},"outputs":[],"source":["# Choose the GEE folder in which to ingest prediction image:\n","user_folder = 'users/defendersofwildlifeGIS/NC'\n","# prediction path\n","nc_path = join(FOLDER, PRED_BASE)\n","# Base file name to use for TFRecord files and assets. The name structure includes:\n","# the image processing used ['raw', 'calibrated', 'normalized'], the model\n","nc_image_base = 'raw_unet256_summerpred'\n","# Half this will extend on the sides of each patch.\n","nc_kernel_buffer = [128, 128]\n","# NC\n","nc_region = nc#boxes.filterMetadata('id', 'equals', '1.1')"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"both","id":"lLNEOLkXWvSi"},"outputs":[],"source":["# Run the export.\n","doExport(summer, nc_path, nc_image_base, nc_kernel_buffer, nc_region)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"both","id":"KxACnxKFrQ_J"},"outputs":[],"source":["# Run the prediction.\n","doPrediction(pred_path = nc_path,\n"," pred_image_base = nc_image_base,\n"," user_folder = user_folder,\n"," out_image_base = 'raw_unet256_30_summer',\n"," kernel_buffer = nc_kernel_buffer,\n"," region = nc_region)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9FDKc2ZwzODu"},"outputs":[],"source":["# Start the upload.\n","filesList = !gsutil ls {join(BUCKET_PATH, nc_path)}\n","\n","jsonFile = [s for s in filesList if nc_image_base+'mixer.json' in s][0] \n","print(jsonFile)\n","out_image_files = [join(BUCKET_PATH, nc_path, 'outputs','raw_unet256_30_summer{:02}.TFRecord'.format(i)) for i in range(0,17)]\n","files = ' '.join(out_image_files)\n","print(files)\n","asset_id = join(user_folder, 'raw_unet256_30_summer')\n","\n","!earthengine --no-use_cloud_api upload image --asset_id={asset_id} {files} {jsonFile}"]},{"cell_type":"markdown","metadata":{"id":"uj_G9OZ1xH6K"},"source":["# Display the output\n","\n","One the data has been exported, the model has made predictions and the predictions have been written to a file, and the image imported to Earth Engine, it's possible to display the resultant Earth Engine asset. Here, display the solar array predictions over test areas in North Carolina."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Jgco6HJ4R5p2"},"outputs":[],"source":["out_image = ee.Image(user_folder + '/' + nc_image_base)\n","mapid = out_image.getMapId({'min': 0, 'max': 1})\n","map = folium.Map(location=[39.898, 116.5097])\n","map.add_ee_layer(out_image, {'min': 0, 'max': 1}, 'solar predictions')\n","map.add_child(folium.LayerControl())\n","map"]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":["hgoDc7Hilfc4","9JIE7Yl87lgU","uu_E7OTDBCoS"],"machine_shape":"hm","private_outputs":true,"provenance":[]},"kernel_info":{"name":"python3"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.9"},"nteract":{"version":"nteract-front-end@1.0.0"}},"nbformat":4,"nbformat_minor":0} 2 | --------------------------------------------------------------------------------