├── .gitmodules
├── README.md
├── azure
    ├── NY_geotiff.json
    ├── VA_geotiff.json
    ├── DE_geotiff.json
    ├── MD_geotiff.json
    ├── PA_geotiff.json
    ├── DE_tfrecord.json
    ├── subset.py
    ├── predict_solar.py
    ├── predict_solar_terminal.py
    ├── train_solar.py
    └── train_autoencoder.py
├── envs
    ├── conda_env.yml
    ├── requirements.txt
    ├── solar_training.yml
    └── conda_env_jan29.yml
├── conda_env-copy.yml
├── LICENSE
├── .gitignore
├── Tensorboard.ipynb
├── predict_solar.py
├── demos
    ├── Training.ipynb
    ├── Prediction.ipynb
    ├── Extract_Data_GEE.ipynb
    └── SampleUNETdata_GEE.ipynb
├── re-train.ipynb
└── Setup.ipynb


/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "azure/scv"]
2 | 	path = azure/scv
3 | 	url = https://github.com/mjevans26/Satellite_ComputerVision.git
4 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Solar_UNet
2 | Repository contianing code to train U-Net models delineating solar arrays in Sentinel-2 imagery
3 | 
4 | # Use
5 | The https://github.com/mjevans26/Satellite_ComputerVision.git repository contains utility tools and functions for training U-Net models, and is included as a git submodule in this directory. Demonstration notebooks for sampling training data and runing predictions are in the 'demos' directory.
6 | 
7 | 


--------------------------------------------------------------------------------
/azure/NY_geotiff.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "workspace":
 3 |     {
 4 |     "workspace_name" :"solar", 
 5 |     "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 
 6 |     "resource_group":"cic_ai"
 7 |     },
 8 | "blobContainer":
 9 |     {
10 |     "container_name" : "solar",
11 |     "account_name" : "aiprojects",
12 | 	"datastore_name": "solardatablob",
13 |     "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/NewYork/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-10T17:36:46Z&se=2022-02-11T01:36:46Z&sv=2020-08-04&sr=c&sig=zL2WPTgN8ks729VcvTz84vjHKVQAhP3aa7AvtJIDhgY%3D"
14 |     },
15 | "data":"CPK_solar/data/predict/{}/{}",
16 | "model": "solar_Jun21"
17 | }


--------------------------------------------------------------------------------
/azure/VA_geotiff.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "workspace":
 3 |     {
 4 |     "workspace_name" :"solar", 
 5 |     "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 
 6 |     "resource_group":"cic_ai"
 7 |     },
 8 | "blobContainer":
 9 |     {
10 |     "container_name" : "solar",
11 |     "account_name" : "aiprojects",
12 | 	"datastore_name": "solardatablob",
13 |     "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Virginia/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-04T17:43:12Z&se=2022-02-05T01:43:12Z&sv=2020-08-04&sr=c&sig=e1WIPuD63XQ6c0tBtgpgWiEjgf2FyPTalufcaIaryuw%3D"
14 |     },
15 | "data":"CPK_solar/data/predict/{}/{}",
16 | "model": "solar_Jun21"
17 | }


--------------------------------------------------------------------------------
/azure/DE_geotiff.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "workspace":
 3 |     {
 4 |     "workspace_name" :"solar", 
 5 |     "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 
 6 |     "resource_group":"cic_ai"
 7 |     },
 8 | "blobContainer":
 9 |     {
10 |     "container_name" : "solar",
11 |     "account_name" : "aiprojects",
12 | 	"datastore_name": "solardatablob",
13 |     "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Delaware/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-01T15:56:08Z&se=2022-02-01T23:56:08Z&sv=2020-08-04&sr=c&sig=NKdC7QTH0x291Yn9cTnV5l0q%2BMMVrr%2F1EskLVNPwYI8%3D"
14 |     },
15 | "data":"CPK_solar/data/predict/{}/{}",
16 | "model": "solar_Jun21"
17 | }


--------------------------------------------------------------------------------
/azure/MD_geotiff.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "workspace":
 3 |     {
 4 |     "workspace_name" :"solar", 
 5 |     "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 
 6 |     "resource_group":"cic_ai"
 7 |     },
 8 | "blobContainer":
 9 |     {
10 |     "container_name" : "solar",
11 |     "account_name" : "aiprojects",
12 | 	"datastore_name": "solardatablob",
13 |     "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Maryland/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-01T15:56:08Z&se=2022-02-01T23:56:08Z&sv=2020-08-04&sr=c&sig=NKdC7QTH0x291Yn9cTnV5l0q%2BMMVrr%2F1EskLVNPwYI8%3D"
14 |     },
15 | "data":"CPK_solar/data/predict/{}/{}",
16 | "model": "solar_Jun21"
17 | }


--------------------------------------------------------------------------------
/azure/PA_geotiff.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "workspace":
 3 |     {
 4 |     "workspace_name" :"solar", 
 5 |     "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 
 6 |     "resource_group":"cic_ai"
 7 |     },
 8 | "blobContainer":
 9 |     {
10 |     "container_name" : "solar",
11 |     "account_name" : "aiprojects",
12 | 	"datastore_name": "solardatablob",
13 |     "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Pennsylvania/outputs/geotiff/{}_{}_{}.tif?sp=racwdl&st=2022-02-02T20:12:00Z&se=2022-02-03T04:12:00Z&sv=2020-08-04&sr=c&sig=A4H2NpKBa58Rc5vqGm%2F0l6sZpPxgQom7jf%2FDmBxVbEQ%3D"
14 |     },
15 | "data":"CPK_solar/data/predict/{}/{}",
16 | "model": "solar_Jun21"
17 | }


--------------------------------------------------------------------------------
/azure/DE_tfrecord.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "workspace":
 3 |     {
 4 |     "workspace_name" :"solar", 
 5 |     "subscription_id":"d54a5b4d-fd0c-4790-9898-ad1092502ab5", 
 6 |     "resource_group":"cic_ai"
 7 |     },
 8 | "blobContainer":
 9 |     {
10 |     "container_name" : "solar",
11 |     "account_name" : "aiprojects",
12 | 	"datastore_name": "solardatablob",
13 |     "blob_url": "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Delaware/outputs/tfrecord/{}_{}_{}.TFRecord?sp=racw&st=2022-01-27T18:38:10Z&se=2022-01-29T02:38:10Z&sv=2020-08-04&sr=c&sig=vrHeB7LHAc2R2B6rhS%2BwRLqYM4xY5v1%2B9SlGyj8TTIY%3D"
14 |     },
15 | "data":"CPK_solar/data/predict/{}/{}",
16 | "model": "solar_Jun21"
17 | }


--------------------------------------------------------------------------------
/envs/conda_env.yml:
--------------------------------------------------------------------------------
 1 | name: solar_training
 2 | channels:
 3 | - anaconda
 4 | - conda-forge
 5 | dependencies:
 6 | - python=3.7 
 7 | - pip=20.2.4
 8 | - pip:
 9 |   - matplotlib>=3.3,<3.4
10 |   - psutil>=5.8,<5.9
11 |   - tqdm>=4.59,<4.60
12 |   - pandas>=1.1,<1.2
13 |   - scipy>=1.5,<1.6
14 |   - numpy>=1.10,<1.20
15 |   - rasterio==1.2.10
16 |   - ipykernel~=6.0
17 |   - azureml-core==1.37.0
18 |   - azure-storage-blob==12.9.0
19 |   - azureml-defaults==1.37.0
20 |   - azureml-mlflow==1.37.0
21 |   - azureml-telemetry==1.37.0
22 |   - tensorboard==2.4.0
23 |   - tensorflow-gpu==2.4.1
24 |   - tensorflow-datasets==4.3.0
25 |   - onnxruntime-gpu>=1.7,<1.8
26 |   - horovod[tensorflow-gpu]==0.21.3
27 | 
28 | 


--------------------------------------------------------------------------------
/conda_env-copy.yml:
--------------------------------------------------------------------------------
 1 | name: tf_training
 2 | channels:
 3 | - anaconda
 4 | - conda-forge
 5 | dependencies:
 6 | - python=3.8 
 7 | - pip=20.2.4
 8 | - pip:
 9 |   - matplotlib~=3.5.0
10 |   - psutil~=5.8.0
11 |   - tqdm~=4.62.0
12 |   - pandas>=1.1,<1.2
13 |   - scipy~=1.7.0
14 |   - numpy~=1.21.0
15 |   - rasterio==1.2.10
16 |   - ipykernel~=6.0
17 |   - azureml-core==1.47.0
18 |   - azure-storage-blob==12.9.0
19 |   - azureml-defaults==1.47.0
20 |   - azureml-mlflow==1.47.0
21 |   - azureml-telemetry==1.47.0
22 |   - tensorboard~=2.11.0
23 |   - tensorflow-gpu~=2.11.0
24 |   - tensorflow-datasets~=4.5.0
25 |   - onnxruntime-gpu~=1.9.0
26 |   - 'horovod[tensorflow-gpu]~=0.23.0'
27 |   - debugpy~=1.6.3
28 |   - protobuf~=3.20


--------------------------------------------------------------------------------
/azure/subset.py:
--------------------------------------------------------------------------------
 1 | import rasterio as rio
 2 | import json
 3 | from rasterio.windows import Window
 4 | import numpy as np
 5 | 
 6 | with rio.open('./outputs/raw_unet256_Virginia_solar_Jun21.tif') as src:
 7 |     H,W = src.shape
 8 |     crs = src.crs
 9 |     windows = [Window(0,0, W//2, H//2), Window(0, H//2, W//2, H-(H//2)), Window(W//2, 0, W-(W//2), H//2), Window(W//2, H//2, W-(W//2), H-(H//2))]
10 |     for i, window in enumerate(windows):
11 |         subset = src.read(window = window)
12 |         print(subset.shape)
13 |         transform = src.window_transform(window)
14 |         with rio.open(
15 |             f'./outputs/VA2021_Jun21preds{i}.tif',
16 |             'w',
17 |             driver = 'GTiff',
18 |             width = W,
19 |             height = H,
20 |             count = 1,
21 |             dtype = subset.dtype,
22 |             crs = crs,
23 |             transform = transform) as dst:
24 |             dst.write(subset)
25 |             
26 | 


--------------------------------------------------------------------------------
/envs/requirements.txt:
--------------------------------------------------------------------------------
 1 | matplotlib>=3.3,<3.4
 2 | psutil>=5.8,<5.9
 3 | tqdm>=4.59,<4.60 
 4 | pandas>=1.1,<1.2 
 5 | scipy>=1.5,<1.6
 6 | numpy>=1.10,<1.20 
 7 | ipykernel~=6.0
 8 | azure-common==1.1.27
 9 | azureml-core==1.37.0.post1 
10 | azureml-defaults==1.37.0
11 | azure-graphrbac==0.61.1
12 | azure-identity==1.7.0
13 | azure-mgmt-authorization==0.61.0
14 | azure-mgmt-containerregistry==9.0.0
15 | azure-mgmt-core==1.3.0
16 | azure-mgmt-keyvault==9.3.0
17 | azure-mgmt-resource==19.0.0
18 | azure-mgmt-storage==19.0.0
19 | azure-storage-blob==12.9.0
20 | azureml-core==1.37.0.post1
21 | azureml-dataprep==2.25.2
22 | azureml-dataprep-native==38.0.0
23 | azureml-dataprep-rslex==2.1.1
24 | azureml-dataset-runtime==1.37.0
25 | azureml-defaults==1.37.0
26 | azureml-inference-server-http==0.4.2
27 | azureml-mlflow==1.37.0
28 | azureml-telemetry==1.37.0 
29 | tensorboard==2.4.0
30 | tensorflow-gpu==2.4.1 
31 | tensorflow-datasets==4.3.0 
32 | onnxruntime-gpu>=1.7,<1.8
33 | horovod[tensorflow-gpu]==0.21.3
34 | azure-storage-blob==12.9.0
35 | rasterio==1.2.10


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 mjevans26
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .httr-oauth
  2 | 
  3 | # Byte-compiled / optimized / DLL files
  4 | *__pycache__/
  5 | *.py[cod]
  6 | *$py.class
  7 | *.pyc
  8 | 
  9 | # Data directories
 10 | data/
 11 | 
 12 | # C extensions
 13 | *.so
 14 | 
 15 | # Azure stuff
 16 | *.amlignore
 17 | *.amltmp
 18 | *azureml-models/
 19 | azure/outputs/
 20 | .ipynb_aml_checkpoints/
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | pip-wheel-metadata/
 37 | share/python-wheels/
 38 | *.egg-info/
 39 | .installed.cfg
 40 | *.egg
 41 | MANIFEST
 42 | 
 43 | # PyInstaller
 44 | #  Usually these files are written by a python script from a template
 45 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 46 | *.manifest
 47 | *.spec
 48 | 
 49 | # Installer logs
 50 | pip-log.txt
 51 | pip-delete-this-directory.txt
 52 | 
 53 | # Unit test / coverage reports
 54 | htmlcov/
 55 | .tox/
 56 | .nox/
 57 | .coverage
 58 | .coverage.*
 59 | .cache
 60 | nosetests.xml
 61 | coverage.xml
 62 | *.cover
 63 | *.py,cover
 64 | .hypothesis/
 65 | .pytest_cache/
 66 | 
 67 | # Translations
 68 | *.mo
 69 | *.pot
 70 | 
 71 | # Django stuff:
 72 | *.log
 73 | local_settings.py
 74 | db.sqlite3
 75 | db.sqlite3-journal
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | .ipynb_aml_checkpoints/
 93 | 
 94 | 
 95 | # IPython
 96 | profile_default/
 97 | ipython_config.py
 98 | 
 99 | # pyenv
100 | .python-version
101 | 
102 | # pipenv
103 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
104 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
105 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
106 | #   install all needed dependencies.
107 | #Pipfile.lock
108 | 
109 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
110 | __pypackages__/
111 | 
112 | # Celery stuff
113 | celerybeat-schedule
114 | celerybeat.pid
115 | 
116 | # SageMath parsed files
117 | *.sage.py
118 | 
119 | # Environments
120 | .env
121 | .venv
122 | env/
123 | venv/
124 | ENV/
125 | env.bak/
126 | venv.bak/
127 | 
128 | # Spyder project settings
129 | .spyderproject
130 | .spyproject
131 | 
132 | # Rope project settings
133 | .ropeproject
134 | 
135 | # mkdocs documentation
136 | /site
137 | 
138 | # mypy
139 | .mypy_cache/
140 | .dmypy.json
141 | dmypy.json
142 | 
143 | # Pyre type checker
144 | .pyre
145 | 


--------------------------------------------------------------------------------
/azure/predict_solar.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Nov 22 19:51:32 2021
  4 | 
  5 | @author: MEvans
  6 | """
  7 | 
  8 | from utils import model_tools, processing
  9 | from utils.prediction_tools import makePredDataset, write_tfrecord_predictions
 10 | from matplotlib import pyplot as plt
 11 | import argparse
 12 | import os
 13 | import glob
 14 | import json
 15 | import math
 16 | import tensorflow as tf
 17 | from datetime import datetime
 18 | from azureml.core import Run, Workspace, Model
 19 | from azure.storage.blob import BlobClient
 20 | 
 21 | 
 22 | # Set Global variables
 23 | 
 24 | parser = argparse.ArgumentParser()
 25 | 
 26 | parser.add_argument('--pred_data', type = str, default = True, help = 'directory containing test image(s) and mixer')
 27 | parser.add_argument('--model_id', type = str, required = True, default = None, help = 'model id for continued training')
 28 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches')
 29 | parser.add_argument('--bands', type = str, nargs = '+', required = False, default = ["B2", "B3", "B4", "B8", "B11", "B12"])
 30 | parser.add_argument('--blob_url', type = str, required = True, help = 'blob url for upload to blob storage')
 31 | 
 32 | args = parser.parse_args()
 33 | 
 34 | # get the run context
 35 | run = Run.get_context()
 36 | exp = run.experiment
 37 | ws = exp.workspace
 38 | 
 39 | BANDS = args.bands
 40 | # BANDS = json.loads(args.bands)
 41 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999)
 42 | 
 43 | METRICS = {
 44 |         'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')],
 45 |         'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')]
 46 |         }
 47 | 
 48 | def get_weighted_bce(y_true, y_pred):
 49 |     return model_tools.weighted_bce(y_true, y_pred, 1)
 50 | 
 51 | # if a model directory provided we will reload previously trained model and weights
 52 | # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run
 53 | model_dir = Model.get_model_path(args.model_id, _workspace = ws)
 54 | #    model_dir = os.path.join('./models', args.model_id, '1', 'outputs')
 55 | 
 56 | # load our previously trained model and weights
 57 | model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0]
 58 | weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0]
 59 | m = model_tools.get_binary_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = None)
 60 | m.load_weights(weights_file)
 61 | 
 62 | # Specify the size and shape of patches expected by the model.
 63 | KERNEL_SIZE = args.kernel_size
 64 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
 65 | 
 66 | 
 67 | # create special folders './outputs' and './logs' which automatically get saved
 68 | os.makedirs('outputs', exist_ok = True)
 69 | os.makedirs('logs', exist_ok = True)
 70 | out_dir = './outputs'
 71 | log_dir = './logs'
 72 | 
 73 | testFiles = []
 74 | 
 75 | for root, dirs, files in os.walk(args.pred_data):
 76 |     for f in files:
 77 |         testFiles.append(os.path.join(root, f))
 78 | 
 79 | 
 80 | predFiles = [x for x in testFiles if '.gz' in x]
 81 | jsonFiles = [x for x in testFiles if '.json' in x]
 82 | jsonFile = jsonFiles[0]
 83 | predData = makePredDataset(predFiles, BANDS, one_hot = None)
 84 | 
 85 | write_tfrecord_predictions(
 86 |     imageDataset = predData,
 87 |     model = m, 
 88 |     pred_path = './', 
 89 |     out_image_base = f'{jsonFile[:-10]}_{args.model_id}', 
 90 |     kernel_shape = KERNEL_SHAPE,
 91 |     kernel_buffer = [128,128])
 92 | 
 93 | # get the current time
 94 | now = datetime.now() 
 95 | date = now.strftime("%d%b%y")
 96 | date
 97 | 
 98 | blob_url = args.blob_url
 99 | blob_client = BlobClient.from_blob_url(blob_url)
100 | with open(f'{jsonFile[:-10]}_{args.model_id}.TFRecord', 'rb') as f:
101 |     blob_client.upload_blob(f)
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/Tensorboard.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "code",
  5 |       "source": [
  6 |         "from azureml.tensorboard import Tensorboard\r\n",
  7 |         "from azureml.core import Experiment, Environment, Workspace, Datastore, Dataset, Model, ScriptRunConfig\r\n",
  8 |         "import os\r\n",
  9 |         "import glob\r\n",
 10 |         "# get the current workspace\r\n",
 11 |         "ws = Workspace.from_config()"
 12 |       ],
 13 |       "outputs": [],
 14 |       "execution_count": 1,
 15 |       "metadata": {
 16 |         "gather": {
 17 |           "logged": 1648739190271
 18 |         }
 19 |       }
 20 |     },
 21 |     {
 22 |       "cell_type": "code",
 23 |       "source": [
 24 |         "model = run.register_model(model_name='acd-unet-geeOnera',\r\n",
 25 |         "                           tags=run.tags,\r\n",
 26 |         "                           description = 'UNET model delineating anthropogenic land cover change in S2 imagery. Trained on GEE + Onera data.',\r\n",
 27 |         "                           model_path='outputs/',\r\n",
 28 |         "                           model_framework = 'Tensorflow',\r\n",
 29 |         "                           model_framework_version= '2.0',\r\n",
 30 |         "                           datasets = [('training', gee_train_dataset), ('evaluation', eval_dataset), ('testing', test_dataset)])\r\n",
 31 |         "print(model.name, model.id, model.version, sep='\\t')"
 32 |       ],
 33 |       "outputs": [],
 34 |       "execution_count": null,
 35 |       "metadata": {
 36 |         "collapsed": true,
 37 |         "jupyter": {
 38 |           "source_hidden": false,
 39 |           "outputs_hidden": false
 40 |         },
 41 |         "nteract": {
 42 |           "transient": {
 43 |             "deleting": false
 44 |           }
 45 |         }
 46 |       }
 47 |     },
 48 |     {
 49 |       "cell_type": "code",
 50 |       "source": [
 51 |         "# run_id = 'solar-nc-cpk_1638381140_a602c63e'\r\n",
 52 |         "\r\n",
 53 |         "# run = ws.get_run(run_id)\r\n",
 54 |         "\r\n",
 55 |         "# run.get_file_names()\r\n",
 56 |         "# The TensorBoard constructor takes an array of runs, so be sure and pass it in as a single-element array here\r\n",
 57 |         "# tb = Tensorboard([run])\r\n",
 58 |         "tb = Tensorboard([], local_root = f'{os.getcwd()}/logs', port = 6006)\r\n",
 59 |         "\r\n",
 60 |         "# If successful, start() returns a string with the URI of the instance.\r\n",
 61 |         "tb.start()"
 62 |       ],
 63 |       "outputs": [
 64 |         {
 65 |           "output_type": "stream",
 66 |           "name": "stdout",
 67 |           "text": "https://mevans1-6006.eastus.instances.azureml.ms\n"
 68 |         },
 69 |         {
 70 |           "output_type": "execute_result",
 71 |           "execution_count": 8,
 72 |           "data": {
 73 |             "text/plain": "'https://mevans1-6006.eastus.instances.azureml.ms'"
 74 |           },
 75 |           "metadata": {}
 76 |         }
 77 |       ],
 78 |       "execution_count": 8,
 79 |       "metadata": {
 80 |         "collapsed": true,
 81 |         "jupyter": {
 82 |           "source_hidden": false,
 83 |           "outputs_hidden": false
 84 |         },
 85 |         "nteract": {
 86 |           "transient": {
 87 |             "deleting": false
 88 |           }
 89 |         },
 90 |         "gather": {
 91 |           "logged": 1648739374862
 92 |         }
 93 |       }
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "source": [
 98 |         "tb.stop()"
 99 |       ],
100 |       "outputs": [],
101 |       "execution_count": 7,
102 |       "metadata": {
103 |         "collapsed": true,
104 |         "jupyter": {
105 |           "source_hidden": false,
106 |           "outputs_hidden": false
107 |         },
108 |         "nteract": {
109 |           "transient": {
110 |             "deleting": false
111 |           }
112 |         },
113 |         "gather": {
114 |           "logged": 1648739371516
115 |         }
116 |       }
117 |     }
118 |   ],
119 |   "metadata": {
120 |     "kernelspec": {
121 |       "name": "python3-azureml",
122 |       "language": "python",
123 |       "display_name": "Python 3.6 - AzureML"
124 |     },
125 |     "language_info": {
126 |       "name": "python",
127 |       "version": "3.6.9",
128 |       "mimetype": "text/x-python",
129 |       "codemirror_mode": {
130 |         "name": "ipython",
131 |         "version": 3
132 |       },
133 |       "pygments_lexer": "ipython3",
134 |       "nbconvert_exporter": "python",
135 |       "file_extension": ".py"
136 |     },
137 |     "kernel_info": {
138 |       "name": "python3-azureml"
139 |     },
140 |     "nteract": {
141 |       "version": "nteract-front-end@1.0.0"
142 |     },
143 |     "microsoft": {
144 |       "host": {
145 |         "AzureML": {
146 |           "notebookHasBeenCompleted": true
147 |         }
148 |       }
149 |     }
150 |   },
151 |   "nbformat": 4,
152 |   "nbformat_minor": 2
153 | }


--------------------------------------------------------------------------------
/envs/solar_training.yml:
--------------------------------------------------------------------------------
  1 | name: solar-training
  2 | channels:
  3 |   - anaconda
  4 |   - defaults
  5 | dependencies:
  6 |   - ca-certificates=2020.10.14=0
  7 |   - certifi=2020.6.20=py37_0
  8 |   - ld_impl_linux-64=2.33.1=h53a641e_7
  9 |   - libedit=3.1.20191231=h14c3975_1
 10 |   - libffi=3.3=he6710b0_2
 11 |   - libgcc-ng=9.1.0=hdf63c60_0
 12 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 13 |   - ncurses=6.2=he6710b0_1
 14 |   - openssl=1.1.1h=h7b6447c_0
 15 |   - pip=20.2.4=py37_0
 16 |   - python=3.7.9=h7579374_0
 17 |   - readline=8.0=h7b6447c_0
 18 |   - setuptools=50.3.0=py37hb0f4dca_1
 19 |   - sqlite=3.33.0=h62c20be_0
 20 |   - tk=8.6.10=hbc83047_0
 21 |   - wheel=0.35.1=py_0
 22 |   - xz=5.2.5=h7b6447c_0
 23 |   - zlib=1.2.11=h7b6447c_3
 24 |   - pip:
 25 |     - absl-py==1.0.0
 26 |     - adal==1.2.7
 27 |     - affine==2.3.0
 28 |     - applicationinsights==0.11.10
 29 |     - argcomplete==1.12.3
 30 |     - astunparse==1.6.3
 31 |     - attrs==21.4.0
 32 |     - azure-common==1.1.27
 33 |     - azure-core==1.20.1
 34 |     - azure-graphrbac==0.61.1
 35 |     - azure-identity==1.7.0
 36 |     - azure-mgmt-authorization==0.61.0
 37 |     - azure-mgmt-containerregistry==9.0.0
 38 |     - azure-mgmt-core==1.3.0
 39 |     - azure-mgmt-keyvault==9.3.0
 40 |     - azure-mgmt-resource==19.0.0
 41 |     - azure-mgmt-storage==19.0.0
 42 |     - azure-storage-blob==12.9.0
 43 |     - azureml-core==1.37.0.post1
 44 |     - azureml-dataprep==2.25.2
 45 |     - azureml-dataprep-native==38.0.0
 46 |     - azureml-dataprep-rslex==2.1.1
 47 |     - azureml-dataset-runtime==1.37.0
 48 |     - azureml-defaults==1.37.0
 49 |     - azureml-inference-server-http==0.4.2
 50 |     - azureml-mlflow==1.37.0
 51 |     - azureml-telemetry==1.37.0
 52 |     - backcall==0.2.0
 53 |     - backports-tempfile==1.0
 54 |     - backports-weakref==1.0.post1
 55 |     - bcrypt==3.2.0
 56 |     - cachetools==4.2.4
 57 |     - cffi==1.15.0
 58 |     - charset-normalizer==2.0.10
 59 |     - click==8.0.3
 60 |     - click-plugins==1.1.1
 61 |     - cligj==0.7.2
 62 |     - cloudpickle==2.0.0
 63 |     - colorama==0.4.4
 64 |     - configparser==3.7.4
 65 |     - contextlib2==21.6.0
 66 |     - cryptography==3.4.8
 67 |     - cycler==0.11.0
 68 |     - databricks-cli==0.16.2
 69 |     - debugpy==1.5.1
 70 |     - decorator==5.1.1
 71 |     - dill==0.3.4
 72 |     - distro==1.6.0
 73 |     - docker==5.0.3
 74 |     - dotnetcore2==2.1.22
 75 |     - entrypoints==0.3
 76 |     - flask==1.0.3
 77 |     - flatbuffers==1.12
 78 |     - fusepy==3.0.1
 79 |     - future==0.18.2
 80 |     - gast==0.3.3
 81 |     - gitdb==4.0.9
 82 |     - gitpython==3.1.26
 83 |     - google-auth==1.35.0
 84 |     - google-auth-oauthlib==0.4.6
 85 |     - google-pasta==0.2.0
 86 |     - googleapis-common-protos==1.54.0
 87 |     - grpcio==1.43.0
 88 |     - gunicorn==20.1.0
 89 |     - h5py==2.10.0
 90 |     - horovod==0.21.3
 91 |     - humanfriendly==9.2
 92 |     - idna==3.3
 93 |     - importlib-metadata==4.10.1
 94 |     - importlib-resources==5.4.0
 95 |     - inference-schema==1.3.0
 96 |     - ipykernel==6.7.0
 97 |     - ipython==7.31.1
 98 |     - isodate==0.6.1
 99 |     - itsdangerous==2.0.1
100 |     - jedi==0.18.1
101 |     - jeepney==0.7.1
102 |     - jinja2==3.0.3
103 |     - jmespath==0.10.0
104 |     - json-logging-py==0.2
105 |     - jsonpickle==2.1.0
106 |     - jupyter-client==7.1.2
107 |     - jupyter-core==4.9.1
108 |     - keras-preprocessing==1.1.2
109 |     - kiwisolver==1.3.2
110 |     - knack==0.8.2
111 |     - markdown==3.3.6
112 |     - markupsafe==2.0.1
113 |     - matplotlib==3.3.4
114 |     - matplotlib-inline==0.1.3
115 |     - mlflow-skinny==1.23.0
116 |     - msal==1.16.0
117 |     - msal-extensions==0.3.1
118 |     - msrest==0.6.21
119 |     - msrestazure==0.6.4
120 |     - ndg-httpsclient==0.5.1
121 |     - nest-asyncio==1.5.4
122 |     - numpy==1.19.5
123 |     - oauthlib==3.1.1
124 |     - onnxruntime-gpu==1.7.0
125 |     - opt-einsum==3.3.0
126 |     - packaging==21.3
127 |     - pandas==1.1.5
128 |     - paramiko==2.9.2
129 |     - parso==0.8.3
130 |     - pathspec==0.9.0
131 |     - pexpect==4.8.0
132 |     - pickleshare==0.7.5
133 |     - pillow==9.0.0
134 |     - pkginfo==1.8.2
135 |     - portalocker==2.3.2
136 |     - promise==2.3
137 |     - prompt-toolkit==3.0.24
138 |     - protobuf==3.19.3
139 |     - psutil==5.8.0
140 |     - ptyprocess==0.7.0
141 |     - pyarrow==3.0.0
142 |     - pyasn1==0.4.8
143 |     - pyasn1-modules==0.2.8
144 |     - pycparser==2.21
145 |     - pygments==2.11.2
146 |     - pyjwt==2.3.0
147 |     - pynacl==1.5.0
148 |     - pyopenssl==21.0.0
149 |     - pyparsing==3.0.7
150 |     - pysocks==1.7.1
151 |     - python-dateutil==2.8.2
152 |     - pytz==2021.3
153 |     - pyyaml==6.0
154 |     - pyzmq==22.3.0
155 |     - rasterio==1.2.10
156 |     - requests==2.27.1
157 |     - requests-oauthlib==1.3.0
158 |     - rsa==4.8
159 |     - scipy==1.5.4
160 |     - secretstorage==3.3.1
161 |     - six==1.16.0
162 |     - smmap==5.0.0
163 |     - snuggs==1.4.7
164 |     - tabulate==0.8.9
165 |     - tensorboard==2.4.0
166 |     - tensorboard-plugin-wit==1.8.1
167 |     - tensorflow-datasets==4.3.0
168 |     - tensorflow-estimator==2.4.0
169 |     - tensorflow-gpu==2.4.1
170 |     - tensorflow-metadata==1.6.0
171 |     - termcolor==1.1.0
172 |     - tornado==6.1
173 |     - tqdm==4.59.0
174 |     - traitlets==5.1.1
175 |     - typing-extensions==3.7.4.3
176 |     - urllib3==1.26.7
177 |     - wcwidth==0.2.5
178 |     - websocket-client==1.2.3
179 |     - werkzeug==2.0.2
180 |     - wrapt==1.12.1
181 |     - zipp==3.7.0
182 | prefix: /anaconda/envs/solar-training
183 | 


--------------------------------------------------------------------------------
/predict_solar.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Nov 22 19:51:32 2021
  4 | 
  5 | @author: MEvans
  6 | """
  7 | 
  8 | from utils import model_tools, processing
  9 | from utils.prediction_tools import makePredDataset, write_tfrecord_predictions
 10 | from matplotlib import pyplot as plt
 11 | import argparse
 12 | import os
 13 | import glob
 14 | import json
 15 | import math
 16 | import tensorflow as tf
 17 | from datetime import datetime
 18 | from azureml.core import Run, Workspace, Model, Datastore, Dataset
 19 | from azure.storage.blob import BlobClient
 20 | 
 21 | 
 22 | # Set Global variables
 23 | 
 24 | parser = argparse.ArgumentParser()
 25 | 
 26 | # parser.add_argument('--pred_data', type = str, default = True, help = 'directory containing test image(s) and mixer')
 27 | # parser.add_argument('--model_id', type = str, required = True, default = None, help = 'model id for continued training')
 28 | 
 29 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches')
 30 | parser.add_argument('--bands', type = str, nargs = '+', required = False, default = '["B2", "B3", "B4", "B8", "B11", "B12"]')
 31 | parser.add_argument('-c', type=str, help='The path to the job config file')
 32 | parser.add_argument('--aoi', type=str, required = True, default = 'Delaware', help = 'Name of the area to run predictions')
 33 | parser.add_argument('--year', type=str, required = True, default = 'Summer2020', help = 'Season and year subdirectory')
 34 | 
 35 | args = parser.parse_args()
 36 | 
 37 | # # get the run context
 38 | # run = Run.get_context()
 39 | # exp = run.experiment
 40 | # read annual config file
 41 | with open(args.c, 'r') as f:
 42 |     config = json.load(f)
 43 | 
 44 | # access relevant key values
 45 | blob = config['blobContainer']
 46 | wksp = config['workspace']
 47 | model = config['model']
 48 | 
 49 | # load workspace configuration from the config.json file in the current folder.
 50 | ws = Workspace(subscription_id = wksp["subscription_id"], workspace_name = wksp["workspace_name"], resource_group = wksp["resource_group"])
 51 | 
 52 | # access our registered data share containing image data in this workspace
 53 | datastore = Datastore.get(workspace = ws, datastore_name = blob['datastore_name'])
 54 | pred_path = (datastore, config['data'].format(args.aoi, args.year))
 55 | # pred_path = (datastore, 'CPK_solar/data/predict/testpred6')
 56 | blob_files = Dataset.File.from_files(path = [pred_path])
 57 | 
 58 | # BANDS = args.bands
 59 | BANDS = json.loads(args.bands)
 60 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999)
 61 | 
 62 | METRICS = {
 63 |         'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')],
 64 |         'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')]
 65 |         }
 66 | 
 67 | def get_weighted_bce(y_true, y_pred):
 68 |     return model_tools.weighted_bce(y_true, y_pred, 1)
 69 | 
 70 | print(f'Loading model {config["model"]}')
 71 | # if a model directory provided we will reload previously trained model and weights
 72 | # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run
 73 | model_dir = Model.get_model_path(model, _workspace = ws)
 74 | #    model_dir = os.path.join('./models', args.model_id, '1', 'outputs')
 75 | 
 76 | # load our previously trained model and weights
 77 | model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0]
 78 | weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0]
 79 | m = model_tools.get_binary_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = None)
 80 | m.load_weights(weights_file)
 81 | 
 82 | print('found model file:', model_file, '/n weights file:', weights_file)
 83 | 
 84 | # Specify the size and shape of patches expected by the model.
 85 | KERNEL_SIZE = args.kernel_size
 86 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
 87 | 
 88 | 
 89 | # create special folders './outputs' and './logs' which automatically get saved
 90 | os.makedirs('outputs', exist_ok = True)
 91 | os.makedirs('logs', exist_ok = True)
 92 | out_dir = './outputs'
 93 | log_dir = './logs'
 94 | 
 95 | testFiles = []
 96 | 
 97 | print('loading prediction data')
 98 | with blob_files.mount() as mount:
 99 |     mount_point = mount.mount_point
100 |     for root, dirs, files in os.walk(mount_point):
101 |         for f in files:
102 |             testFiles.append(os.path.join(root, f))
103 | 
104 |     predFiles = [x for x in testFiles if '.gz' in x]
105 |     jsonFiles = [x for x in testFiles if '.json' in x]
106 |     jsonFile = jsonFiles[0]
107 |     predData = makePredDataset(predFiles, BANDS, one_hot = None)
108 | 
109 |     predictions = m.predict(predData, steps=None, verbose=1)
110 | 
111 | base = os.path.basename(jsonFile)[:-10]
112 | write_tfrecord_predictions(
113 |     predictions = predictions,
114 |     pred_path = out_dir, 
115 |     # pred_path = '.',
116 |     # out_image_base = 'raw_unet256_testpred_solar_Jun21',
117 |     out_image_base = f'{base}_{model}', 
118 |     kernel_shape = KERNEL_SHAPE,
119 |     kernel_buffer = [128,128])
120 | 
121 | # get the current time
122 | now = datetime.now() 
123 | date = now.strftime("%d%b%y")
124 | date
125 | 
126 | print('moving predicitons to blob')
127 | # blob_url = "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Delaware/outputs/tfrecord/testpred.tfrecords?sp=racw&st=2022-01-27T18:38:10Z&se=2022-01-29T02:38:10Z&sv=2020-08-04&sr=c&sig=vrHeB7LHAc2R2B6rhS%2BwRLqYM4xY5v1%2B9SlGyj8TTIY%3D"
128 | blob_url = blob['blob_url'].format(args.aoi, args.year)
129 | blob_client = BlobClient.from_blob_url(blob_url)
130 | # with open(f'./raw_unet256_testpred_solar_Jun21.tfrecords', 'rb') as f:
131 | with open(f'{out_dir}/{base}_{model}.tfrecords', 'rb') as f:
132 |     blob_client.upload_blob(f)
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/azure/predict_solar_terminal.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Mon Nov 22 19:51:32 2021
  4 | 
  5 | @author: MEvans
  6 | """
  7 | import sys
  8 | import argparse
  9 | import os
 10 | import glob
 11 | import json
 12 | import math
 13 | 
 14 | sys.path.append(os.path.join(sys.path[0], 'scv'))
 15 | 
 16 | from scv.utils import model_tools, processing
 17 | from scv.utils.prediction_tools import makePredDataset, write_tfrecord_predictions, write_geotiff_predictions
 18 | from matplotlib import pyplot as plt
 19 | 
 20 | import tensorflow as tf
 21 | from datetime import datetime
 22 | from azureml.core import Run, Workspace, Model, Datastore, Dataset
 23 | from azure.storage.blob import BlobClient
 24 | 
 25 | 
 26 | # Set Global variables
 27 | 
 28 | parser = argparse.ArgumentParser()
 29 | 
 30 | # parser.add_argument('--pred_data', type = str, default = True, help = 'directory containing test image(s) and mixer')
 31 | # parser.add_argument('--model_id', type = str, required = True, default = None, help = 'model id for continued training')
 32 | 
 33 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches')
 34 | parser.add_argument('--bands', type = str, nargs = '+', required = False, default = '["B2", "B3", "B4", "B8", "B11", "B12"]')
 35 | parser.add_argument('-c', type=str, help='The path to the job config file')
 36 | parser.add_argument('--aoi', type=str, required = True, default = 'Delaware', help = 'Name of the area to run predictions')
 37 | parser.add_argument('--year', type=str, required = True, default = 'Summer2020', help = 'Season and year subdirectory')
 38 | 
 39 | args = parser.parse_args()
 40 | 
 41 | # # get the run context
 42 | # run = Run.get_context()
 43 | # exp = run.experiment
 44 | # read annual config file
 45 | with open(args.c, 'r') as f:
 46 |     config = json.load(f)
 47 | 
 48 | # access relevant key values
 49 | blob = config['blobContainer']
 50 | wksp = config['workspace']
 51 | model = config['model']
 52 | 
 53 | # load workspace configuration from the config.json file in the current folder.
 54 | ws = Workspace(subscription_id = wksp["subscription_id"], workspace_name = wksp["workspace_name"], resource_group = wksp["resource_group"])
 55 | 
 56 | # access our registered data share containing image data in this workspace
 57 | datastore = Datastore.get(workspace = ws, datastore_name = blob['datastore_name'])
 58 | pred_path = (datastore, config['data'].format(args.aoi, args.year))
 59 | # pred_path = (datastore, 'CPK_solar/data/predict/testpred6')
 60 | blob_files = Dataset.File.from_files(path = [pred_path])
 61 | 
 62 | # BANDS = args.bands
 63 | BANDS = json.loads(args.bands)
 64 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999)
 65 | 
 66 | METRICS = {
 67 |         'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')],
 68 |         'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')]
 69 |         }
 70 | 
 71 | def get_weighted_bce(y_true, y_pred):
 72 |     return model_tools.weighted_bce(y_true, y_pred, 1)
 73 | 
 74 | print(f'Loading model {config["model"]}')
 75 | # if a model directory provided we will reload previously trained model and weights
 76 | # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run
 77 | model_dir = Model.get_model_path(model, _workspace = ws)
 78 | #    model_dir = os.path.join('./models', args.model_id, '1', 'outputs')
 79 | 
 80 | # load our previously trained model and weights
 81 | model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0]
 82 | weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0]
 83 | m = model_tools.get_binary_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = None)
 84 | m.load_weights(weights_file)
 85 | 
 86 | print('found model file:', model_file, '/n weights file:', weights_file)
 87 | 
 88 | # Specify the size and shape of patches expected by the model.
 89 | KERNEL_SIZE = args.kernel_size
 90 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
 91 | 
 92 | 
 93 | # create special folders './outputs' and './logs' which automatically get saved
 94 | os.makedirs('outputs', exist_ok = True)
 95 | os.makedirs('logs', exist_ok = True)
 96 | out_dir = './outputs'
 97 | log_dir = './logs'
 98 | 
 99 | testFiles = []
100 | 
101 | print('loading prediction data')
102 | with blob_files.mount() as mount:
103 |     mount_point = mount.mount_point
104 |     for root, dirs, files in os.walk(mount_point):
105 |         for f in files:
106 |             testFiles.append(os.path.join(root, f))
107 | 
108 |     predFiles = [x for x in testFiles if '.gz' in x]
109 |     jsonFiles = [x for x in testFiles if '.json' in x]
110 |     jsonFile = jsonFiles[0]
111 |     base = os.path.basename(jsonFile)[:-10]
112 |     predData = makePredDataset(predFiles, BANDS, one_hot = None)
113 |     print('writing predictons to geotiff')
114 |     write_geotiff_predictions(
115 |         imageDataset = predData,
116 |         model = m,
117 |         jsonFile = jsonFile,
118 |         outImgBase = f'{base}_{model}',
119 |         outImgPath = out_dir,
120 |         kernel_buffer = [128, 128]
121 |     )
122 | 
123 | # write_tfrecord_predictions(
124 | #     predictions = predictions,
125 | #     pred_path = out_dir, 
126 | #     # pred_path = '.',
127 | #     # out_image_base = 'raw_unet256_testpred_solar_Jun21',
128 | #     out_image_base = f'{base}_{model}', 
129 | #     kernel_shape = KERNEL_SHAPE,
130 | #     kernel_buffer = [128,128])
131 | 
132 | # write_geotiff_predictions(
133 | #     predictions = predictions, 
134 | #     mixer = mixer,
135 | #     outImgBase = f'{base}_{model}',
136 | #     outImgPath = out_dir,
137 | #     kernel_buffer = [128,128]
138 | #     )
139 | 
140 | # get the current time
141 | now = datetime.now() 
142 | date = now.strftime("%d%b%y")
143 | date
144 | 
145 | print('moving predicitons to blob')
146 | # blob_url = "https://aiprojects.blob.core.windows.net/solar/CPK_solar/data/predict/Delaware/outputs/tfrecord/testpred.tfrecords?sp=racwdl&st=2022-02-01T15:56:08Z&se=2022-02-01T23:56:08Z&sv=2020-08-04&sr=c&sig=NKdC7QTH0x291Yn9cTnV5l0q%2BMMVrr%2F1EskLVNPwYI8%3D"
147 | blob_url = blob['blob_url'].format(args.aoi, args.year, model)
148 | blob_client = BlobClient.from_blob_url(blob_url)
149 | # with open(f'./raw_unet256_testpred_solar_Jun21.tfrecords', 'rb') as f:
150 | with open(f'{out_dir}/{base}_{model}.tif', 'rb') as f:
151 |     blob_client.upload_blob(f)
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/envs/conda_env_jan29.yml:
--------------------------------------------------------------------------------
  1 | name: solar-training
  2 | channels:
  3 |   - anaconda
  4 |   - conda-forge
  5 |   - defaults
  6 | dependencies:
  7 |   - _libgcc_mutex=0.1=conda_forge
  8 |   - _openmp_mutex=4.5=1_llvm
  9 |   - _tflow_select=2.3.0=mkl
 10 |   - abseil-cpp=20200923.3=h2531618_0
 11 |   - aiohttp=3.7.3=py37h4abf009_0
 12 |   - astor=0.8.1=pyh9f0ad1d_0
 13 |   - astunparse=1.6.3=pyhd8ed1ab_0
 14 |   - async-timeout=3.0.1=py_1000
 15 |   - attrs=21.4.0=pyhd8ed1ab_0
 16 |   - blas=2.17=openblas
 17 |   - blinker=1.4=py_1
 18 |   - brotlipy=0.7.0=py37hb5d75c8_1001
 19 |   - c-ares=1.17.1=h36c2ea0_0
 20 |   - ca-certificates=2021.10.8=ha878542_0
 21 |   - cached-property=1.5.2=hd8ed1ab_1
 22 |   - cached_property=1.5.2=pyha770c72_1
 23 |   - cachetools=4.2.4=pyhd8ed1ab_0
 24 |   - certifi=2021.10.8=py37h89c1867_1
 25 |   - cffi=1.15.0=py37hd667e15_1
 26 |   - chardet=3.0.4=py37he5f6b98_1008
 27 |   - charset-normalizer=2.0.10=pyhd8ed1ab_0
 28 |   - click=8.0.3=py37h89c1867_1
 29 |   - dataclasses=0.8=pyhc8e2a94_3
 30 |   - flatbuffers=1.12.0=h58526e2_0
 31 |   - giflib=5.2.1=h36c2ea0_2
 32 |   - google-pasta=0.2.0=pyh8c360ce_0
 33 |   - hdf5=1.10.6=nompi_h7c3c948_1111
 34 |   - icu=68.1=h58526e2_0
 35 |   - idna=3.3=pyhd8ed1ab_0
 36 |   - importlib-metadata=4.10.1=py37h89c1867_0
 37 |   - jpeg=9d=h36c2ea0_0
 38 |   - keras-preprocessing=1.1.2=pyhd8ed1ab_0
 39 |   - krb5=1.17.2=h926e7f8_0
 40 |   - ld_impl_linux-64=2.33.1=h53a641e_7
 41 |   - libblas=3.8.0=17_openblas
 42 |   - libcblas=3.8.0=17_openblas
 43 |   - libcurl=7.71.1=hcdd3856_3
 44 |   - libedit=3.1.20191231=h14c3975_1
 45 |   - libffi=3.3=he6710b0_2
 46 |   - libgcc-ng=9.1.0=hdf63c60_0
 47 |   - libgfortran-ng=7.5.0=h14aa051_19
 48 |   - libgfortran4=7.5.0=h14aa051_19
 49 |   - liblapack=3.8.0=17_openblas
 50 |   - liblapacke=3.8.0=17_openblas
 51 |   - libopenblas=0.3.10=pthreads_hb3c22a3_5
 52 |   - libpng=1.6.37=h21135ba_2
 53 |   - libprotobuf=3.14.0=h8c45485_0
 54 |   - libssh2=1.9.0=hab1572f_5
 55 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 56 |   - llvm-openmp=12.0.1=h4bd325d_1
 57 |   - markdown=3.3.6=pyhd8ed1ab_0
 58 |   - multidict=4.7.5=py37h8f50634_2
 59 |   - ncurses=6.2=he6710b0_1
 60 |   - oauthlib=3.1.1=pyhd8ed1ab_0
 61 |   - openssl=1.1.1m=h7f8727e_0
 62 |   - opt_einsum=3.3.0=pyhd8ed1ab_1
 63 |   - pip=20.2.4=py37_0
 64 |   - pyasn1=0.4.8=py_0
 65 |   - pycparser=2.21=pyhd8ed1ab_0
 66 |   - pyjwt=2.3.0=pyhd8ed1ab_1
 67 |   - pyopenssl=21.0.0=pyhd8ed1ab_0
 68 |   - pysocks=1.7.1=py37h89c1867_4
 69 |   - python=3.7.9=h7579374_0
 70 |   - python-flatbuffers=1.12=pyhd8ed1ab_1
 71 |   - python_abi=3.7=2_cp37m
 72 |   - pyu2f=0.1.5=pyhd8ed1ab_0
 73 |   - readline=8.0=h7b6447c_0
 74 |   - requests=2.27.1=pyhd8ed1ab_0
 75 |   - requests-oauthlib=1.3.0=pyh9f0ad1d_0
 76 |   - rsa=4.8=pyhd8ed1ab_0
 77 |   - setuptools=50.3.0=py37hb0f4dca_1
 78 |   - snappy=1.1.8=he1b5a44_3
 79 |   - sqlite=3.37.0=hc218d9a_0
 80 |   - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
 81 |   - tensorflow=2.5.0=mkl_py37h3e8b3f4_0
 82 |   - tensorflow-base=2.5.0=mkl_py37h35b2a3d_0
 83 |   - tk=8.6.10=hbc83047_0
 84 |   - werkzeug=2.0.2=pyhd8ed1ab_0
 85 |   - wheel=0.35.1=py_0
 86 |   - wrapt=1.12.1=py37h4abf009_2
 87 |   - xz=5.2.5=h7b6447c_0
 88 |   - yarl=1.6.3=py37h4abf009_0
 89 |   - zipp=3.7.0=pyhd8ed1ab_0
 90 |   - zlib=1.2.11=h7b6447c_3
 91 |   - pip:
 92 |     - absl-py==0.15.0
 93 |     - adal==1.2.7
 94 |     - affine==2.3.0
 95 |     - applicationinsights==0.11.10
 96 |     - argcomplete==1.12.3
 97 |     - azure-common==1.1.27
 98 |     - azure-core==1.20.1
 99 |     - azure-graphrbac==0.61.1
100 |     - azure-identity==1.7.0
101 |     - azure-mgmt-authorization==0.61.0
102 |     - azure-mgmt-containerregistry==9.0.0
103 |     - azure-mgmt-core==1.3.0
104 |     - azure-mgmt-keyvault==9.3.0
105 |     - azure-mgmt-resource==19.0.0
106 |     - azure-mgmt-storage==19.0.0
107 |     - azure-storage-blob==12.9.0
108 |     - azureml-core==1.37.0.post1
109 |     - azureml-dataprep==2.25.2
110 |     - azureml-dataprep-native==38.0.0
111 |     - azureml-dataprep-rslex==2.1.1
112 |     - azureml-dataset-runtime==1.37.0
113 |     - azureml-defaults==1.37.0
114 |     - azureml-inference-server-http==0.4.2
115 |     - azureml-mlflow==1.37.0
116 |     - azureml-telemetry==1.37.0
117 |     - backcall==0.2.0
118 |     - backports-tempfile==1.0
119 |     - backports-weakref==1.0.post1
120 |     - bcrypt==3.2.0
121 |     - click-plugins==1.1.1
122 |     - cligj==0.7.2
123 |     - cloudpickle==2.0.0
124 |     - colorama==0.4.4
125 |     - configparser==3.7.4
126 |     - contextlib2==21.6.0
127 |     - cryptography==3.4.8
128 |     - cycler==0.11.0
129 |     - databricks-cli==0.16.2
130 |     - debugpy==1.5.1
131 |     - decorator==5.1.1
132 |     - dill==0.3.4
133 |     - distro==1.6.0
134 |     - docker==5.0.3
135 |     - dotnetcore2==2.1.22
136 |     - earthengine-api==0.1.296
137 |     - entrypoints==0.3
138 |     - flask==1.0.3
139 |     - fusepy==3.0.1
140 |     - future==0.18.2
141 |     - gast==0.3.3
142 |     - gitdb==4.0.9
143 |     - gitpython==3.1.26
144 |     - google-api-core==2.4.0
145 |     - google-api-python-client==1.12.10
146 |     - google-auth==1.35.0
147 |     - google-auth-httplib2==0.1.0
148 |     - google-auth-oauthlib==0.4.6
149 |     - google-cloud-core==2.2.2
150 |     - google-cloud-storage==2.1.0
151 |     - google-crc32c==1.3.0
152 |     - google-resumable-media==2.1.0
153 |     - googleapis-common-protos==1.54.0
154 |     - grpcio==1.34.1
155 |     - gunicorn==20.1.0
156 |     - h5py==2.10.0
157 |     - horovod==0.21.3
158 |     - httplib2==0.20.2
159 |     - httplib2shim==0.0.3
160 |     - humanfriendly==9.2
161 |     - importlib-resources==5.4.0
162 |     - inference-schema==1.3.0
163 |     - ipykernel==6.7.0
164 |     - ipython==7.31.1
165 |     - isodate==0.6.1
166 |     - itsdangerous==2.0.1
167 |     - jedi==0.18.1
168 |     - jeepney==0.7.1
169 |     - jinja2==3.0.3
170 |     - jmespath==0.10.0
171 |     - json-logging-py==0.2
172 |     - jsonpickle==2.1.0
173 |     - jupyter-client==7.1.2
174 |     - jupyter-core==4.9.1
175 |     - keras-nightly==2.5.0.dev2021032900
176 |     - kiwisolver==1.3.2
177 |     - knack==0.8.2
178 |     - markupsafe==2.0.1
179 |     - matplotlib==3.3.4
180 |     - matplotlib-inline==0.1.3
181 |     - mlflow-skinny==1.23.0
182 |     - msal==1.16.0
183 |     - msal-extensions==0.3.1
184 |     - msrest==0.6.21
185 |     - msrestazure==0.6.4
186 |     - ndg-httpsclient==0.5.1
187 |     - nest-asyncio==1.5.4
188 |     - numpy==1.19.5
189 |     - onnxruntime-gpu==1.7.0
190 |     - packaging==21.3
191 |     - pandas==1.1.5
192 |     - paramiko==2.9.2
193 |     - parso==0.8.3
194 |     - pathspec==0.9.0
195 |     - pexpect==4.8.0
196 |     - pickleshare==0.7.5
197 |     - pillow==9.0.0
198 |     - pkginfo==1.8.2
199 |     - portalocker==2.3.2
200 |     - promise==2.3
201 |     - prompt-toolkit==3.0.24
202 |     - protobuf==3.19.3
203 |     - psutil==5.8.0
204 |     - ptyprocess==0.7.0
205 |     - pyarrow==3.0.0
206 |     - pyasn1-modules==0.2.8
207 |     - pygments==2.11.2
208 |     - pynacl==1.5.0
209 |     - pyparsing==3.0.7
210 |     - python-dateutil==2.8.2
211 |     - pytz==2021.3
212 |     - pyyaml==6.0
213 |     - pyzmq==22.3.0
214 |     - rasterio==1.2.10
215 |     - scipy==1.5.4
216 |     - secretstorage==3.3.1
217 |     - six==1.15.0
218 |     - smmap==5.0.0
219 |     - snuggs==1.4.7
220 |     - tabulate==0.8.9
221 |     - tensorboard==2.4.0
222 |     - tensorboard-data-server==0.6.1
223 |     - tensorflow-datasets==4.3.0
224 |     - tensorflow-estimator==2.4.0
225 |     - tensorflow-gpu==2.5.0
226 |     - tensorflow-metadata==1.6.0
227 |     - termcolor==1.1.0
228 |     - tornado==6.1
229 |     - tqdm==4.59.0
230 |     - traitlets==5.1.1
231 |     - typing-extensions==3.7.4.3
232 |     - uritemplate==3.0.1
233 |     - urllib3==1.26.7
234 |     - wcwidth==0.2.5
235 |     - websocket-client==1.2.3
236 | prefix: /anaconda/envs/solar-training
237 | 


--------------------------------------------------------------------------------
/azure/train_solar.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Jan  2 12:41:40 2021
  4 | 
  5 | @author: MEvans
  6 | """
  7 | 
  8 | from scv.utils import model_tools, processing
  9 | from scv.utils.prediction_tools import make_pred_dataset, callback_predictions, plot_to_image
 10 | from matplotlib import pyplot as plt
 11 | import argparse
 12 | import os
 13 | import glob
 14 | import json
 15 | import math
 16 | import tensorflow as tf
 17 | from datetime import datetime
 18 | from azureml.core import Run, Workspace, Model
 19 | 
 20 | print(tf.__version__)
 21 | # Set Global variables
 22 | 
 23 | parser = argparse.ArgumentParser()
 24 | parser.add_argument('--train_data', type = str, required = True, help = 'Training datasets')
 25 | parser.add_argument('--eval_data', type = str, required = True, help = 'Evaluation datasets')
 26 | parser.add_argument('--test_data', type = str, default = None, help = 'directory containing test image(s) and mixer')
 27 | parser.add_argument('--model_id', type = str, required = False, default = None, help = 'model id for continued training')
 28 | parser.add_argument('-lr', '--learning_rate', type = float, default = 0.001, help = 'Initial learning rate')
 29 | parser.add_argument('-w', '--weights', type = str, default = None, help = 'sample weight for classes in iou, bce, etc.')
 30 | parser.add_argument('--bias', type = float, default = None, help = 'bias value for keras output layer initializer')
 31 | parser.add_argument('-e', '--epochs', type = int, default = 10, help = 'Number of epochs to train the model for')
 32 | parser.add_argument('-b', '--batch', type = int, default = 16, help = 'Training batch size')
 33 | parser.add_argument('--size', type = int, default = 3000, help = 'Size of training dataset')
 34 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches')
 35 | parser.add_argument('--response', type = str, required = True, help = 'Name of the response variable in tfrecords')
 36 | parser.add_argument('--bands', type = str, required = False, default = '["B2", "B3", "B4", "B8", "B11", "B12"]')
 37 | parser.add_argument('--splits', type = str, required = False, default = '[0]')
 38 | parser.add_argument('--epoch_start', type = int, required = False, help = 'If re-training, the last epoch')
 39 | 
 40 | args = parser.parse_args()
 41 | print('bands', args.bands)
 42 | TRAIN_SIZE = args.size
 43 | BATCH = args.batch
 44 | EPOCHS = args.epochs
 45 | LAST = args.epoch_start
 46 | BIAS = args.bias
 47 | WEIGHTS = json.loads(args.weights)
 48 | LR = args.learning_rate
 49 | BANDS = json.loads(args.bands)
 50 | DEPTH = len(BANDS)
 51 | SPLITS = json.loads(args.splits)
 52 | if sum(SPLITS) == 0:
 53 |     SPLITS = None
 54 | RESPONSE = dict({args.response:2})
 55 | MOMENTS = [(0,10000),(0,10000),(0,10000),(0,10000),(0,10000),(0,10000) ]
 56 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999)
 57 | 
 58 | METRICS = {
 59 |         'logits':[tf.keras.metrics.CategoricalAccuracy()],
 60 |         'classes':[tf.keras.metrics.MeanIoU(num_classes = 2, sparse_y_pred = True, sparse_y_true = False)]
 61 |         }
 62 | 
 63 | def weighted_crossentropy(y_true, y_pred):
 64 |     class_weights = tf.compat.v2.constant(WEIGHTS)
 65 |     weights = tf.reduce_sum(class_weights * y_true, axis = -1)
 66 |     print('weights shape', weights.shape)
 67 |     unweighted_loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
 68 |     weighted_loss = weights * unweighted_loss
 69 |     loss = tf.reduce_mean(weighted_loss)
 70 |     return loss
 71 | 
 72 | 
 73 | LOSSES = {
 74 |     'logits':weighted_crossentropy
 75 |     }
 76 | 
 77 | FEATURES = BANDS + [args.response]
 78 | 
 79 | # round the training data size up to nearest 100 to define buffer
 80 | BUFFER = math.ceil(args.size/100)*100
 81 | 
 82 | # Specify the size and shape of patches expected by the model.
 83 | KERNEL_SIZE = args.kernel_size
 84 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
 85 | COLUMNS = [
 86 |   tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES
 87 | ]
 88 | FEATURES_DICT = dict(zip(FEATURES, COLUMNS))
 89 | 
 90 | # create special folders './outputs' and './logs' which automatically get saved
 91 | os.makedirs('outputs', exist_ok = True)
 92 | os.makedirs('logs', exist_ok = True)
 93 | out_dir = './outputs'
 94 | log_dir = './logs'
 95 | 
 96 | # create training dataset
 97 | 
 98 | # train_files = glob.glob(os.path.join(args.data_folder, 'training', 'UNET_256_[A-Z]*.gz'))
 99 | # eval_files =  glob.glob(os.path.join(args.data_folder, 'eval', 'UNET_256_[A-Z]*.gz'))
100 | 
101 | train_files = []
102 | for root, dirs, files in os.walk(args.train_data):
103 |     for f in files:
104 |         train_files.append(os.path.join(root, f))
105 | 
106 | eval_files = []
107 | for root, dirs, files in os.walk(args.eval_data):
108 |     for f in files:
109 |         eval_files.append(os.path.join(root, f))
110 |         
111 | # train_files = glob.glob(os.path.join(args.train_data, 'UNET_256_[A-Z]*.gz'))
112 | # eval_files =  glob.glob(os.path.join(args.eval_data, 'UNET_256_[A-Z]*.gz'))
113 | 
114 | training = processing.get_training_dataset(
115 |         files = train_files,
116 |         ftDict = FEATURES_DICT,
117 |         features = BANDS,
118 |         response = RESPONSE,
119 |         moments = MOMENTS,
120 |         buff = BUFFER,
121 |         batch = BATCH,
122 |         axes = [2],
123 |         repeat = True,
124 |         splits = SPLITS)
125 | 
126 | evaluation = processing.get_eval_dataset(
127 |         files = eval_files,
128 |         ftDict = FEATURES_DICT,
129 |         features = BANDS,
130 |         response = RESPONSE,
131 |         moments = MOMENTS,
132 |         splits = SPLITS)
133 | 
134 | ## DEFINE CALLBACKS
135 | 
136 | # get the current time
137 | now = datetime.now() 
138 | date = now.strftime("%d%b%y")
139 | date
140 | 
141 | # define a checkpoint callback to save best models during training
142 | checkpoint = tf.keras.callbacks.ModelCheckpoint(
143 |     os.path.join(out_dir, 'best_weights_'+date+'_{epoch:02d}.hdf5'),
144 |     monitor='val_classes_mean_io_u',
145 |     verbose=1,
146 |     save_best_only=True,
147 |     mode='max'
148 |     )
149 | 
150 | # define a tensorboard callback to write training logs
151 | tensorboard = tf.keras.callbacks.TensorBoard(log_dir = log_dir)
152 | 
153 | # get the run context
154 | run = Run.get_context()
155 | exp = run.experiment
156 | ws = exp.workspace
157 | 
158 | ## BUILD THE MODEL
159 | 
160 | # if a model directory provided we will reload previously trained model and weights
161 | if args.model_id:
162 |     # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run
163 |     model_dir = Model.get_model_path(args.model_id, _workspace = ws)
164 | #    model_dir = os.path.join('./models', args.model_id, '1', 'outputs')
165 |     
166 |     # load our previously trained model and weights
167 |     model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0]
168 |     weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0]
169 |     m, checkpoint = model_tools.retrain_model(
170 |         model_file = model_file,
171 |         checkpoint = checkpoint,
172 |         eval_data = evaluation,
173 |         metric = 'classes_mean_io_u',
174 |         weights_file = weights_file,
175 |         custom_objects = {'weighted_crossentropy': weighted_crossentropy},
176 |         lr = LR)
177 |     # TODO: make this dynamic
178 |     initial_epoch = LAST
179 | # otherwise build a model from scratch with provided specs
180 | else:
181 |     m = model_tools.get_unet_model(nclasses = 2, nchannels = DEPTH, optim = OPTIMIZER, loss = LOSSES, mets = METRICS, bias = BIAS)
182 |     initial_epoch = 0
183 | 
184 | # if test images provided, define an image saving callback
185 | if args.test_data:
186 |     
187 |     test_files = glob.glob(os.path.join(args.test_data, '*.gz'))
188 |     mixer_file = glob.glob(os.path.join(args.test_data, '*.json'))
189 |     
190 |     # run predictions on a test image and log so we can see what the model is doing at each epoch
191 |     jsonFile = mixer_file[0]
192 |     with open(jsonFile,) as file:
193 |         mixer = json.load(file)
194 |         
195 |     pred_data = make_pred_dataset(test_files, BANDS, moments = MOMENTS)
196 |     file_writer = tf.summary.create_file_writer(log_dir + '/preds')
197 | 
198 |     def log_pred_image(epoch, logs):
199 |       out_image = callback_predictions(pred_data, m, mixer)
200 |       prob = out_image[:, :]
201 |       figure = plt.figure(figsize=(10, 10))
202 |       plt.imshow(prob)
203 |       image = plot_to_image(figure)
204 |     
205 |       with file_writer.as_default():
206 |         tf.summary.image("Predicted Image", image, step=epoch)
207 |     
208 |     pred_callback = tf.keras.callbacks.LambdaCallback(on_epoch_end = log_pred_image)
209 |     
210 |     callbacks = [checkpoint, tensorboard, pred_callback]
211 | else:
212 |     callbacks = [checkpoint, tensorboard]
213 |     
214 | # train the model
215 | steps_per_epoch = int(TRAIN_SIZE//BATCH)
216 | m.fit(
217 |         x = training,
218 |         epochs = EPOCHS,
219 |         steps_per_epoch = steps_per_epoch,
220 |         validation_data = evaluation,
221 |         callbacks = callbacks,
222 |         initial_epoch = initial_epoch
223 |         )
224 | 
225 | m.save(os.path.join(out_dir, f'solar_unet256_{date}.h5'))
226 | 


--------------------------------------------------------------------------------
/azure/train_autoencoder.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sat Jan  2 12:41:40 2021
  4 | 
  5 | @author: MEvans
  6 | """
  7 | 
  8 | from Satellite_ComputerVision import model_tools, processing
  9 | from Satellite_ComputerVision.prediction_tools import makePredDataset, callback_predictions, plot_to_image
 10 | from matplotlib import pyplot as plt
 11 | import argparse
 12 | import os
 13 | import glob
 14 | import json
 15 | import math
 16 | import tensorflow as tf
 17 | from datetime import datetime
 18 | from azureml.core import Run, Workspace, Model
 19 | 
 20 | 
 21 | # Set Global variables
 22 | 
 23 | parser = argparse.ArgumentParser()
 24 | parser.add_argument('--train_data', type = str, required = True, help = 'Training datasets')
 25 | parser.add_argument('--eval_data', type = str, required = True, help = 'Evaluation datasets')
 26 | parser.add_argument('--test_data', type = str, default = None, help = 'directory containing test image(s) and mixer')
 27 | parser.add_argument('--model_id', type = str, required = False, default = None, help = 'model id for continued training')
 28 | parser.add_argument('-lr', '--learning_rate', type = float, default = 0.001, help = 'Initial learning rate')
 29 | parser.add_argument('-e', '--epochs', type = int, default = 10, help = 'Number of epochs to train the model for')
 30 | parser.add_argument('-b', '--batch', type = int, default = 16, help = 'Training batch size')
 31 | parser.add_argument('--size', type = int, default = 3000, help = 'Size of training dataset')
 32 | parser.add_argument('--kernel_size', type = int, default = 256, dest = 'kernel_size', help = 'Size in pixels of incoming patches')
 33 | parser.add_argument('--bands', type = str, nargs = '+', required = False, default = ['B2', 'B3', 'B4', 'B8'])
 34 | args = parser.parse_args()
 35 | 
 36 | TRAIN_SIZE = args.size
 37 | BATCH = args.batch
 38 | EPOCHS = args.epochs
 39 | LR = args.learning_rate
 40 | BANDS = args.bands
 41 | OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999)
 42 | LOSS = 'mean_squared_error'
 43 | METRICS = [tf.keras.metrics.MeanSquaredError(name = 'mse')]
 44 | 
 45 | # round the training data size up to nearest 100 to define buffer
 46 | BUFFER = math.ceil(args.size//4/100)*100
 47 | 
 48 | # Specify the size and shape of patches expected by the model.
 49 | KERNEL_SIZE = args.kernel_size
 50 | KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
 51 | 
 52 | FEATURES = [tf.io.FixedLenFeature(shape = KERNEL_SHAPE, dtype = tf.float32) for band in BANDS]
 53 | FEATURES_DICT = dict(zip(BANDS, FEATURES))
 54 | 
 55 | # create special folders './outputs' and './logs' which automatically get saved
 56 | os.makedirs('outputs', exist_ok = True)
 57 | os.makedirs('logs', exist_ok = True)
 58 | out_dir = './outputs'
 59 | log_dir = './logs'
 60 | 
 61 | # create training dataset
 62 | 
 63 | # train_files = glob.glob(os.path.join(args.data_folder, 'training', 'UNET_256_[A-Z]*.gz'))
 64 | # eval_files =  glob.glob(os.path.join(args.data_folder, 'eval', 'UNET_256_[A-Z]*.gz'))
 65 | 
 66 | train_files = []
 67 | for root, dirs, files in os.walk(args.train_data):
 68 |     for f in files:
 69 |         train_files.append(os.path.join(root, f))
 70 | 
 71 | eval_files = []
 72 | for root, dirs, files in os.walk(args.eval_data):
 73 |     for f in files:
 74 |         eval_files.append(os.path.join(root, f))
 75 | print(f'number of train files = {len(train_files)}')
 76 | print(f'first train file is {train_files[0]}')
 77 | def to_tuple(inputs):
 78 |   """Function to convert a dictionary of tensors to a tuple of (inputs, outputs).
 79 |   Turn the tensors returned by parse_tfrecord into a stack in HWC shape.
 80 |   Args:
 81 |     inputs: A dictionary of tensors, keyed by feature name.
 82 |   Returns: 
 83 |     A dtuple of (inputs, outputs).
 84 |   """
 85 |   # double up our bands to match the structure of before/after data
 86 |   inputsList = [inputs.get(key) for key in BANDS]
 87 |   stacked = tf.stack(inputsList, axis=0)
 88 |   # Convert from CHW to HWC
 89 |   stacked = tf.transpose(stacked, [1, 2, 0])
 90 |   # Perform image augmentation
 91 |   stacked = processing.aug_img(stacked)
 92 |   normalized = processing.normalize(stacked, [2])
 93 |   # do color augmentation on input features
 94 |   before = processing.aug_color(normalized)
 95 |   after = processing.aug_color(normalized)
 96 |   # standardize each patch of bands
 97 |   bands = tf.concat([before, after], axis = -1)
 98 |   response = bands
 99 |   return bands, response 
100 | 
101 | def get_dataset(files, ftDict, axes = [2], splits = None, one_hot = None, moments = None, **kwargs):
102 |   """Function to read, parse and format to tuple a set of input tfrecord files.
103 |   Get all the files matching the pattern, parse and convert to tuple.
104 |   Args:
105 |     files (list): A list of filenames storing tfrecords
106 |     FtDict (dic): Dictionary of input features in tfrecords
107 |     features (list): List of input feature names
108 |     respones (str): response name(s)
109 |     axes (list): axes along which to calculate moments for rescaling
110 |     one_hot (dict): key:value pairs for name of one-hot variable and desired one-hot depth
111 |     splits (list): size(s) of groups of features to be kept together
112 |     moments (list<tpl>): list of [mean, var] tuples for standardization
113 |   Returns: 
114 |     A tf.data.Dataset
115 |   """
116 | 
117 |   def parse_tfrecord(example_proto):
118 |       return tf.io.parse_single_example(example_proto, ftDict)
119 |   
120 |   def tupelize(ftDict):
121 |       return to_tuple(ftDict)
122 |   
123 |   dataset = tf.data.TFRecordDataset(files, compression_type='GZIP')
124 |   dataset = dataset.map(parse_tfrecord, num_parallel_calls=5)
125 |   dataset = dataset.map(tupelize, num_parallel_calls=5)
126 |   return dataset
127 | 
128 | def get_training_dataset(files, ftDict, buff, batch = 16, repeat = True, axes = [2], splits = None, one_hot = None, moments = None, **kwargs):
129 |     """
130 |     Get the preprocessed training dataset
131 |     Args:
132 |         files (list): list of tfrecord files to be used for training
133 |         FtDict (dic): Dictionary of input features in tfrecords
134 |         features (list): List of input feature names
135 |         respones (str): response name(s)
136 |         axes (list): axes along which to calculate moments for rescaling
137 |         buffer (int): buffer size for shuffle
138 |         batch (int): batch size for training
139 |         repeat (bool): should the dataset be repeated
140 |     Returns: 
141 |       A tf.data.Dataset of training data.
142 |     """
143 |     dataset = get_dataset(files, ftDict, axes, splits, one_hot, moments, **kwargs)
144 |     if repeat:
145 |         dataset = dataset.shuffle(buff).batch(batch).repeat()
146 |     else:
147 |         dataset = dataset.shuffle(buff).batch(batch)
148 |     return dataset
149 | 
150 | def get_eval_dataset(files, ftDict, axes = [2], splits = None, one_hot = None, moments = None, **kwargs):
151 | 	"""
152 |     Get the preprocessed evaluation dataset
153 |     Args:
154 |         files (list): list of tfrecords to be used for evaluation
155 |     Returns: 
156 |       A tf.data.Dataset of evaluation data.
157 |     """
158 | 
159 | 	dataset = get_dataset(files, ftDict, axes, splits, one_hot, moments, **kwargs)
160 | 	dataset = dataset.batch(1)
161 | 	return dataset
162 | 
163 | training = get_training_dataset(
164 |         files = train_files[:len(train_files)//2],
165 |         ftDict = FEATURES_DICT,
166 |         buff = BUFFER,
167 |         batch = BATCH,
168 |         repeat = True)
169 | 
170 | evaluation = get_eval_dataset(
171 |         files = eval_files[:len(eval_files)//2],
172 |         ftDict = FEATURES_DICT,
173 |         features = BANDS)
174 | 
175 | ## DEFINE CALLBACKS
176 | 
177 | # get the current time
178 | now = datetime.now() 
179 | date = now.strftime("%d%b%y")
180 | date
181 | 
182 | # define a checkpoint callback to save best models during training
183 | checkpoint = tf.keras.callbacks.ModelCheckpoint(
184 |     os.path.join(out_dir, 'best_weights'+date+'.hdf5'),
185 |     monitor='val_mse',
186 |     verbose=1,
187 |     save_best_only=True,
188 |     mode='min'
189 |     )
190 | 
191 | # define a tensorboard callback to write training logs
192 | tensorboard = tf.keras.callbacks.TensorBoard(log_dir = log_dir)
193 | 
194 | callbacks = [checkpoint, tensorboard]
195 | 
196 | # get the run context
197 | run = Run.get_context()
198 | exp = run.experiment
199 | ws = exp.workspace
200 | 
201 | # # Create a MirroredStrategy.
202 | # strategy = tf.distribute.MirroredStrategy()
203 | # print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
204 | 
205 | # ## BUILD THE MODEL
206 | # with strategy.scope():
207 | #     METRICS = [tf.keras.metrics.MeanSquaredError(name = 'mse')]
208 | 
209 | #     OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999)
210 | m = model_tools.get_autoencoder(depth = len(BANDS)*2, optim = OPTIMIZER, loss = LOSS, mets = METRICS)
211 | # # if a model directory provided we will reload previously trained model and weights
212 | # if args.model_id:
213 | #     # we will package the 'models' directory within the 'azure' dirrectory submitted with experiment run
214 | #     model_dir = Model.get_model_path(args.model_id, _workspace = ws)
215 | # #    model_dir = os.path.join('./models', args.model_id, '1', 'outputs')
216 |     
217 | #     # load our previously trained model and weights
218 | #     model_file = glob.glob(os.path.join(model_dir, '*.h5'))[0]
219 | #     weights_file = glob.glob(os.path.join(model_dir, '*.hdf5'))[0]
220 | #     m, checkpoint = model_tools.retrain_model(model_file, checkpoint, evaluation, 'classes_mean_iou', weights_file, weight = WEIGHT, lr = LR)
221 | #     # TODO: make this dynamic
222 | #     initial_epoch = 100
223 | # # otherwise build a model from scratch with provided specs
224 | # else:
225 | #     m = model_tools.get_autoencoder(depth = len(BANDS)*2, optim = OPTIMIZER, loss = LOSS, mets = METRICS)
226 | #     initial_epoch = 0
227 | 
228 | # train the model
229 | steps_per_epoch = int(TRAIN_SIZE//BATCH//4)
230 | print('steps per epoch', steps_per_epoch)
231 | m.fit(
232 |         x = training,
233 |         epochs = EPOCHS,
234 |         steps_per_epoch = steps_per_epoch,
235 |         validation_data = evaluation,
236 |         callbacks = callbacks
237 |         )
238 | 
239 | m.save(os.path.join(out_dir, 'unet256_autoencoder_8band.h5'))
240 | 


--------------------------------------------------------------------------------
/demos/Training.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Training.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "code",
 21 |       "metadata": {
 22 |         "id": "xnHMtxTUavbx"
 23 |       },
 24 |       "source": [
 25 |         "#@title Author: Michael Evans { display-mode: \"form\" }\n",
 26 |         "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 27 |         "# you may not use this file except in compliance with the License.\n",
 28 |         "# You may obtain a copy of the License at\n",
 29 |         "#\n",
 30 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 31 |         "#\n",
 32 |         "# Unless required by applicable law or agreed to in writing, software\n",
 33 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 34 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 35 |         "# See the License for the specific language governing permissions and\n",
 36 |         "# limitations under the License."
 37 |       ],
 38 |       "execution_count": null,
 39 |       "outputs": []
 40 |     },
 41 |     {
 42 |       "cell_type": "markdown",
 43 |       "metadata": {
 44 |         "id": "8Ciecm6Ia2Xa"
 45 |       },
 46 |       "source": [
 47 |         "# Introduction\n",
 48 |         "\n",
 49 |         "This notebook demonstrates a workflow for training a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597) on previously exctracted remote sensing data using Tensorflow. In this example, we read 256x256 pixel image chips saved as zipped tfrecords in Google Cloud Storage (Note: the data can be read in from anywhere) containing the visible, infrared, and near infrared bands of Sentinel-2 imagery and a binary label band. This relatively simple model is a mostly unmodified version of [this example](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb) from the TensorFlow docs."
 50 |       ]
 51 |     },
 52 |     {
 53 |       "cell_type": "code",
 54 |       "metadata": {
 55 |         "id": "Yla55CsQa2yw"
 56 |       },
 57 |       "source": [
 58 |         "from os.path import join\n",
 59 |         "from sys import path\n",
 60 |         "import json\n",
 61 |         "import numpy as np\n",
 62 |         "import tensorflow as tf"
 63 |       ],
 64 |       "execution_count": 1,
 65 |       "outputs": []
 66 |     },
 67 |     {
 68 |       "cell_type": "code",
 69 |       "metadata": {
 70 |         "id": "jDBvGhTXa5II"
 71 |       },
 72 |       "source": [
 73 |         "## Clone repo containing preprocessing and prediction functions\n",
 74 |         "!git clone https://github.com/mjevans26/Satellite_ComputerVision.git"
 75 |       ],
 76 |       "execution_count": null,
 77 |       "outputs": []
 78 |     },
 79 |     {
 80 |       "cell_type": "code",
 81 |       "metadata": {
 82 |         "id": "Dl2DPfr9a8eW"
 83 |       },
 84 |       "source": [
 85 |         "# Load the necessary modules from repo\n",
 86 |         "path.append('/content/Satellite_ComputerVision')\n",
 87 |         "\n",
 88 |         "from utils.processing import get_training_dataset, get_eval_dataset\n",
 89 |         "from utils.model_tools import get_model, weighted_bce, make_confusion_matrix"
 90 |       ],
 91 |       "execution_count": null,
 92 |       "outputs": []
 93 |     },
 94 |     {
 95 |       "cell_type": "code",
 96 |       "metadata": {
 97 |         "id": "qoYO47K1gllv"
 98 |       },
 99 |       "source": [
100 |         "# Specify names locations for outputs in Cloud Storage. \n",
101 |         "BUCKET = '{YOUR_GCS BUCKET HERE}'\n",
102 |         "BUCKET_PATH = join('gs://', BUCKET)\n",
103 |         "\n",
104 |         "FOLDER = 'NC_solar'\n",
105 |         "PRED_BASE = 'data/predict'\n",
106 |         "TRAIN_BASE = 'data/training'\n",
107 |         "EVAL_BASE = 'data/eval'\n",
108 |         "\n",
109 |         "# Specify inputs (Sentinel bands) to the model and the response variable.\n",
110 |         "opticalBands = ['B2', 'B3', 'B4']\n",
111 |         "thermalBands = ['B8', 'B11', 'B12']\n",
112 |         "\n",
113 |         "BANDS = opticalBands + thermalBands# + pcaBands\n",
114 |         "RESPONSE = 'landcover'\n",
115 |         "FEATURES = BANDS + [RESPONSE]\n",
116 |         "\n",
117 |         "# Specify the size and shape of patches expected by the model.\n",
118 |         "KERNEL_SIZE = 256\n",
119 |         "KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n",
120 |         "COLUMNS = [\n",
121 |         "  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n",
122 |         "]\n",
123 |         "FEATURES_DICT = dict(zip(FEATURES, COLUMNS))"
124 |       ],
125 |       "execution_count": 2,
126 |       "outputs": []
127 |     },
128 |     {
129 |       "cell_type": "markdown",
130 |       "metadata": {
131 |         "id": "WBQN2UdagYj6"
132 |       },
133 |       "source": [
134 |         "## Training Data\n",
135 |         "First, we will read previously exported training data fro GCS into TFRecordDatasets"
136 |       ]
137 |     },
138 |     {
139 |       "cell_type": "code",
140 |       "metadata": {
141 |         "id": "f7biGY7cbBSU"
142 |       },
143 |       "source": [
144 |         "# make sure we have training records\n",
145 |         "trainPattern = join(BUCKET_PATH, FOLDER, TRAIN_BASE, '*.tfrecord.gz')\n",
146 |         "print(trainPattern)\n",
147 |         "trainFiles = !gsutil ls {trainPattern}"
148 |       ],
149 |       "execution_count": null,
150 |       "outputs": []
151 |     },
152 |     {
153 |       "cell_type": "code",
154 |       "metadata": {
155 |         "id": "6e3C9k5Ugm0R"
156 |       },
157 |       "source": [
158 |         "# create training dataset with default arguments for batch (16), repeat (True), and normalization axis (0)\n",
159 |         "training = get_training_dataset(trainFiles, FEATURES_DICT, BANDS, RESPONSE, 2000)"
160 |       ],
161 |       "execution_count": null,
162 |       "outputs": []
163 |     },
164 |     {
165 |       "cell_type": "code",
166 |       "metadata": {
167 |         "id": "zhylXyE4g2U2"
168 |       },
169 |       "source": [
170 |         "# confirm the training dataset produces expected results\n",
171 |         "iterator = iter(training)\n",
172 |         "print(iterator.next())"
173 |       ],
174 |       "execution_count": null,
175 |       "outputs": []
176 |     },
177 |     {
178 |       "cell_type": "code",
179 |       "metadata": {
180 |         "id": "OnpFtwj_g_lA"
181 |       },
182 |       "source": [
183 |         "evalPattern = join(BUCKET_PATH, FOLDER, EVAL_BASE, '*.tfrecord.gz')\n",
184 |         "print(evalPattern)\n",
185 |         "evalFiles = !gsutil ls {evalPattern}"
186 |       ],
187 |       "execution_count": null,
188 |       "outputs": []
189 |     },
190 |     {
191 |       "cell_type": "code",
192 |       "metadata": {
193 |         "id": "Js6Dn2dshHYL"
194 |       },
195 |       "source": [
196 |         "# create evaluation dataset\n",
197 |         "evaluation = get_eval_dataset(evalFiles, FEATURES_DICT, BANDS, RESPONSE)"
198 |       ],
199 |       "execution_count": null,
200 |       "outputs": []
201 |     },
202 |     {
203 |       "cell_type": "markdown",
204 |       "metadata": {
205 |         "id": "RR06Y089jeSk"
206 |       },
207 |       "source": [
208 |         "## Model"
209 |       ]
210 |     },
211 |     {
212 |       "cell_type": "code",
213 |       "metadata": {
214 |         "id": "8Ww2Yq36kbJm"
215 |       },
216 |       "source": [
217 |         "# Define Global variables for Model Training\n",
218 |         "EPOCHS = 100\n",
219 |         "LR = 0.0001\n",
220 |         "BATCH = 16\n",
221 |         "\n",
222 |         "OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=LR, beta_1=0.9, beta_2=0.999)\n",
223 |         "\n",
224 |         "METRICS = {\n",
225 |         "        'logits':[tf.keras.metrics.MeanSquaredError(name='mse'), tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall')],\n",
226 |         "        'classes':[tf.keras.metrics.MeanIoU(num_classes=2, name = 'mean_iou')]\n",
227 |         "        }\n",
228 |         "\n",
229 |         "OUT_DIR  = '{YOUR DIRECTORY FOR SAVING MODEL FILES HERE}'"
230 |       ],
231 |       "execution_count": 5,
232 |       "outputs": []
233 |     },
234 |     {
235 |       "cell_type": "markdown",
236 |       "metadata": {
237 |         "id": "Vf1VcE0h9ZBj"
238 |       },
239 |       "source": [
240 |         "When our training data is unbalanced it can be helpful to provide weights for the positive examples so that the model doesn't 'learn' to just predict zeros everywhere. To calculate the weight we read through the dataset and count up the number of 1s and 0s in our labels."
241 |       ]
242 |     },
243 |     {
244 |       "cell_type": "code",
245 |       "metadata": {
246 |         "id": "xk2tu-Q6l613"
247 |       },
248 |       "source": [
249 |         "# Instantiate a nonsense model\n",
250 |         "m = get_model(depth = len(BANDS), optim = OPTIMIZER, loss = 'mse', mets = [tf.keras.metrics.categorical_accuracy], bias = None)\n",
251 |         "train_con_mat = make_confusion_matrix(training, m)\n",
252 |         "classums = train_con_mat.sum(axis = 1)\n",
253 |         "\n",
254 |         "# Calculate and save Bias, Weight, and Train size based on data\n",
255 |         "BIAS = np.log(classums[1]/classums[0])\n",
256 |         "WEIGHT = classums[0]/classums[1]\n",
257 |         "TRAIN_SIZE = train_con_mat.sum()//(256*256)"
258 |       ],
259 |       "execution_count": null,
260 |       "outputs": []
261 |     },
262 |     {
263 |       "cell_type": "markdown",
264 |       "metadata": {
265 |         "id": "w7ouh97-9qP7"
266 |       },
267 |       "source": [
268 |         "During model training we will save the best performing set of weights as calculated on evaluation data at the end of each epoch. THe metric we track is the mean intersection over union."
269 |       ]
270 |     },
271 |     {
272 |       "cell_type": "code",
273 |       "metadata": {
274 |         "id": "WlRa0mR6kRwY"
275 |       },
276 |       "source": [
277 |         "## DEFINE CALLBACKS\n",
278 |         "\n",
279 |         "def get_weighted_bce(y_true, y_pred):\n",
280 |         "    return weighted_bce(y_true, y_pred, WEIGHT)\n",
281 |         "\n",
282 |         "# get the current time\n",
283 |         "now = datetime.now() \n",
284 |         "date = now.strftime(\"%d%b%y\")\n",
285 |         "date\n",
286 |         "\n",
287 |         "# define a checkpoint callback to save best models during training\n",
288 |         "checkpoint = tf.keras.callbacks.ModelCheckpoint(\n",
289 |         "    os.path.join(OUT_DIR, 'best_weights_' + date + '.hdf5'),\n",
290 |         "    monitor='val_classes_mean_iou',\n",
291 |         "    verbose=1,\n",
292 |         "    save_best_only=True,\n",
293 |         "    mode='max'\n",
294 |         "    )"
295 |       ],
296 |       "execution_count": null,
297 |       "outputs": []
298 |     },
299 |     {
300 |       "cell_type": "markdown",
301 |       "metadata": {
302 |         "id": "t0FluNo_9xzL"
303 |       },
304 |       "source": [
305 |         "Create and train the model"
306 |       ]
307 |     },
308 |     {
309 |       "cell_type": "code",
310 |       "metadata": {
311 |         "id": "rmC4a8c7jfb3"
312 |       },
313 |       "source": [
314 |         "m = get_model(depth = len(BANDS), optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = BIAS)"
315 |       ],
316 |       "execution_count": null,
317 |       "outputs": []
318 |     },
319 |     {
320 |       "cell_type": "code",
321 |       "metadata": {
322 |         "id": "0mKm-2j3ki6u"
323 |       },
324 |       "source": [
325 |         "# train the model\n",
326 |         "m.fit(\n",
327 |         "        x = training,\n",
328 |         "        epochs = EPOCHS,\n",
329 |         "        steps_per_epoch = int(TRAIN_SIZE//BATCH),\n",
330 |         "        validation_data = evaluation,\n",
331 |         "        callbacks = [checkpoint]\n",
332 |         "        )\n",
333 |         "\n",
334 |         "m.save(os.path.join(OUT_DIR, f'{date}_unet256.h5'))"
335 |       ],
336 |       "execution_count": null,
337 |       "outputs": []
338 |     },
339 |     {
340 |       "cell_type": "markdown",
341 |       "metadata": {
342 |         "id": "r73nInHK5HkZ"
343 |       },
344 |       "source": [
345 |         "## Re-Training\n",
346 |         " The code below will continue training an existing model. You may need to re-create your training and evaluation datasets if you intend to use new or different data from that on which the model was originally trained."
347 |       ]
348 |     },
349 |     {
350 |       "cell_type": "code",
351 |       "metadata": {
352 |         "id": "dKhUr2BI5MjN"
353 |       },
354 |       "source": [
355 |         "from tensorflow.python.keras import models"
356 |       ],
357 |       "execution_count": null,
358 |       "outputs": []
359 |     },
360 |     {
361 |       "cell_type": "code",
362 |       "metadata": {
363 |         "id": "Pfpl6-436ajg"
364 |       },
365 |       "source": [
366 |         "# Define where pre-trained model files and weights will come from\n",
367 |         "MODEL_FILE = '{PATH TO .h5 MODEL FILE}'\n",
368 |         "WEIGHT_FILE = '{PATH TO .hdf5 WEIGHT FILE'\n",
369 |         "EVAL_METRIC = 'val_classes_mean_iou'\n",
370 |         "# optionally change the learning rate\n",
371 |         "LR = 0.0001\n",
372 |         "# optionally change the number of epochs to re-train\n",
373 |         "EPOCHS = 100"
374 |       ],
375 |       "execution_count": null,
376 |       "outputs": []
377 |     },
378 |     {
379 |       "cell_type": "code",
380 |       "metadata": {
381 |         "id": "ujfWjVTc7DG7"
382 |       },
383 |       "source": [
384 |         "# this non-keras native function was used during training so we need to supply it when re-instantiating the trained model\n",
385 |         "def get_weighted_bce(y_true, y_pred):\n",
386 |         "    return weighted_bce(y_true, y_pred, weight)\n",
387 |         "\n",
388 |         "# get the current time\n",
389 |         "now = datetime.now() \n",
390 |         "date = now.strftime(\"%d%b%y\")\n",
391 |         "date\n",
392 |         "\n",
393 |         "# define a checkpoint callback to save best models during training\n",
394 |         "checkpoint = tf.keras.callbacks.ModelCheckpoint(\n",
395 |         "    os.path.join(OUT_DIR, 'best_weights_' + date + '.hdf5'),\n",
396 |         "    monitor='val_classes_mean_iou',\n",
397 |         "    verbose=1,\n",
398 |         "    save_best_only=True,\n",
399 |         "    mode='max'\n",
400 |         "    )"
401 |       ],
402 |       "execution_count": null,
403 |       "outputs": []
404 |     },
405 |     {
406 |       "cell_type": "code",
407 |       "metadata": {
408 |         "id": "fdLb1x9R6XO-"
409 |       },
410 |       "source": [
411 |         "# load our trained model from the model and weights file\n",
412 |         "custom_objects = {'get_weighted_bce': get_weighted_bce}\n",
413 |         "m = models.load_model(MODEL_FILE, custom_objects = custom_objects)\n",
414 |         "m.load_weights(WEIGHT_FILE)\n"
415 |       ],
416 |       "execution_count": null,
417 |       "outputs": []
418 |     },
419 |     {
420 |       "cell_type": "code",
421 |       "metadata": {
422 |         "id": "Pk7X7tC66nlD"
423 |       },
424 |       "source": [
425 |         "# set the initial evaluation metric for saving checkpoints to the previous best value\n",
426 |         "evalMetrics = m.evaluate(x = eval_data, verbose = 1)\n",
427 |         "metrics = m.metrics_names\n",
428 |         "index = metrics.index(EVAL_METRIC)\n",
429 |         "checkpoint.best = evalMetrics[index]\n"
430 |       ],
431 |       "execution_count": null,
432 |       "outputs": []
433 |     },
434 |     {
435 |       "cell_type": "code",
436 |       "metadata": {
437 |         "id": "B1xL8CEZ7VNs"
438 |       },
439 |       "source": [
440 |         "# OPTIONALLY set the learning rate for re-training\n",
441 |         "lr = backend.eval(m.optimizer.learning_rate)\n",
442 |         "print('current learning rate', lr)\n",
443 |         "backend.set_value(m.optimizer.learning_rate, LR)\n",
444 |         "print('new learning rate', LR)"
445 |       ],
446 |       "execution_count": null,
447 |       "outputs": []
448 |     },
449 |     {
450 |       "cell_type": "code",
451 |       "metadata": {
452 |         "id": "QbSVDeJz7fem"
453 |       },
454 |       "source": [
455 |         "# train the model\n",
456 |         "m.fit(\n",
457 |         "        x = training,\n",
458 |         "        epochs = EPOCHS,\n",
459 |         "        steps_per_epoch = steps_per_epoch,\n",
460 |         "        validation_data = evaluation,\n",
461 |         "        callbacks = [checkpoint]\n",
462 |         "        )\n",
463 |         "\n",
464 |         "m.save(os.path.join(OUT_DIR, f'{date}_unet256.h5'))"
465 |       ],
466 |       "execution_count": null,
467 |       "outputs": []
468 |     }
469 |   ]
470 | }


--------------------------------------------------------------------------------
/demos/Prediction.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Prediction.ipynb",
  7 |       "provenance": [],
  8 |       "collapsed_sections": []
  9 |     },
 10 |     "kernelspec": {
 11 |       "name": "python3",
 12 |       "display_name": "Python 3"
 13 |     },
 14 |     "language_info": {
 15 |       "name": "python"
 16 |     }
 17 |   },
 18 |   "cells": [
 19 |     {
 20 |       "cell_type": "code",
 21 |       "metadata": {
 22 |         "id": "ObJ-wgPO93nJ"
 23 |       },
 24 |       "source": [
 25 |         "#@title Author: Michael Evans { display-mode: \"form\" }\n",
 26 |         "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 27 |         "# you may not use this file except in compliance with the License.\n",
 28 |         "# You may obtain a copy of the License at\n",
 29 |         "#\n",
 30 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 31 |         "#\n",
 32 |         "# Unless required by applicable law or agreed to in writing, software\n",
 33 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 34 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 35 |         "# See the License for the specific language governing permissions and\n",
 36 |         "# limitations under the License."
 37 |       ],
 38 |       "execution_count": null,
 39 |       "outputs": []
 40 |     },
 41 |     {
 42 |       "cell_type": "markdown",
 43 |       "metadata": {
 44 |         "id": "4h8d4KK-95sl"
 45 |       },
 46 |       "source": [
 47 |         "# Introduction\n",
 48 |         "\n",
 49 |         "This notebook demonstrates a workflow for generating a map of predicted solar array footprints using a trained [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597) in Tensorflow. In this example, we create and export images that contain the same variables as used to train our model - the 3 visible, infrared, and 2 near-infrared bands of Sentinel-2 imagery from Google Earth Engine. We load the trained model structure and [weights](https://osf.io/eg35t/) and then run overlapping subsets of these images through the trained model to generate a 2-band output raster containing per-pixel probabilities and classes."
 50 |       ]
 51 |     },
 52 |     {
 53 |       "cell_type": "code",
 54 |       "metadata": {
 55 |         "id": "dKLeYISt4FWZ"
 56 |       },
 57 |       "source": [
 58 |         "!pip install rasterio"
 59 |       ],
 60 |       "execution_count": null,
 61 |       "outputs": []
 62 |     },
 63 |     {
 64 |       "cell_type": "code",
 65 |       "metadata": {
 66 |         "id": "foiZFwAhu5FY"
 67 |       },
 68 |       "source": [
 69 |         "import os\n",
 70 |         "import shutil\n",
 71 |         "import glob\n",
 72 |         "from os.path import join\n",
 73 |         "import ee\n",
 74 |         "import folium\n",
 75 |         "from tensorflow.python.keras import models\n",
 76 |         "from sys import path\n",
 77 |         "import numpy as np\n",
 78 |         "import rasterio as rio\n",
 79 |         "import json\n",
 80 |         "from matplotlib import pyplot as plt\n",
 81 |         "from matplotlib import colors\n",
 82 |         "from tensorflow.python.keras import models"
 83 |       ],
 84 |       "execution_count": null,
 85 |       "outputs": []
 86 |     },
 87 |     {
 88 |       "cell_type": "code",
 89 |       "metadata": {
 90 |         "id": "n-8MlFrmuycO"
 91 |       },
 92 |       "source": [
 93 |         "# Authenticate and initiatlize GEE Account\n",
 94 |         "ee.Authenticate()\n",
 95 |         "ee.Initialize()"
 96 |       ],
 97 |       "execution_count": null,
 98 |       "outputs": []
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "metadata": {
103 |         "id": "L6tuEC1yvEdP"
104 |       },
105 |       "source": [
106 |         "## Clone repo containing preprocessing and prediction functions\n",
107 |         "!git clone https://github.com/mjevans26/Satellite_ComputerVision.git"
108 |       ],
109 |       "execution_count": null,
110 |       "outputs": []
111 |     },
112 |     {
113 |       "cell_type": "code",
114 |       "metadata": {
115 |         "id": "A2UDJlJyvKT9"
116 |       },
117 |       "source": [
118 |         "# Load the necessary modules from repo\n",
119 |         "path.append('./Satellite_ComputerVision')"
120 |       ],
121 |       "execution_count": null,
122 |       "outputs": []
123 |     },
124 |     {
125 |       "cell_type": "code",
126 |       "metadata": {
127 |         "id": "Pm8LpdnwvHEm"
128 |       },
129 |       "source": [
130 |         "from utils.model_tools import get_model, make_confusion_matrix, weighted_bce\n",
131 |         "from utils.prediction_tools import doExport, makePredDataset, make_array_predictions, get_img_bounds, write_tfrecord_predictions, write_geotiff_prediction\n",
132 |         "from utils.clouds import basicQA"
133 |       ],
134 |       "execution_count": null,
135 |       "outputs": []
136 |     },
137 |     {
138 |       "cell_type": "code",
139 |       "metadata": {
140 |         "id": "XmWIhMaxxS0o"
141 |       },
142 |       "source": [
143 |         "# Define a method for displaying Earth Engine image tiles to a folium map.\n",
144 |         "def add_ee_layer(self, ee_image_object, vis_params, name):\n",
145 |         "  map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n",
146 |         "  folium.raster_layers.TileLayer(\n",
147 |         "    tiles = map_id_dict['tile_fetcher'].url_format,\n",
148 |         "    attr = \"Map Data © Google Earth Engine\",\n",
149 |         "    name = name,\n",
150 |         "    overlay = True,\n",
151 |         "    control = True\n",
152 |         "  ).add_to(self)\n",
153 |         "\n",
154 |         "# Add EE drawing method to folium.\n",
155 |         "folium.Map.add_ee_layer = add_ee_layer"
156 |       ],
157 |       "execution_count": null,
158 |       "outputs": []
159 |     },
160 |     {
161 |       "cell_type": "code",
162 |       "metadata": {
163 |         "id": "7fPiFxk_xWn7"
164 |       },
165 |       "source": [
166 |         "# Specify names locations for outputs in Cloud Storage. \n",
167 |         "BUCKET = '{YOUR_GCS BUCKET HERE}'\n",
168 |         "BUCKET_PATH = join('gs://', BUCKET)\n",
169 |         "FOLDER = '{YOUR PROJECT FOLDER HERE}'\n",
170 |         "PRED_BASE = '{YOUR PROJECT SUBDIRECTORY FOR PREDICTION FILES HERE}'\n",
171 |         "MODEL_PATH = '{PATH TO MODEL .h5 File}'\n",
172 |         "MODEL_WEIGHTS = '{PATH TO MODEL WEIGHTS .hdf5 file}'\n",
173 |         "\n",
174 |         "# Specify inputs (Sentinel bands) to the model and the response variable.\n",
175 |         "opticalBands = ['B2', 'B3', 'B4']\n",
176 |         "thermalBands = ['B8', 'B11', 'B12']\n",
177 |         "\n",
178 |         "BANDS = opticalBands + thermalBands"
179 |       ],
180 |       "execution_count": null,
181 |       "outputs": []
182 |     },
183 |     {
184 |       "cell_type": "markdown",
185 |       "metadata": {
186 |         "id": "IT7d4APrvjJG"
187 |       },
188 |       "source": [
189 |         "## Test images\n",
190 |         "We first need to create and export some images in GEE on which we can run predictions. This notebook uses a few test aois, but you can incorporate your own study areas in GEE or existing Sentinel-2 imagery"
191 |       ]
192 |     },
193 |     {
194 |       "cell_type": "code",
195 |       "metadata": {
196 |         "id": "JSaS7FOgvyco"
197 |       },
198 |       "source": [
199 |         "# create several small aois to test predictions. These are all in NC\n",
200 |         "aois = dict({\n",
201 |         "    'Test1': ee.Geometry.Polygon(\n",
202 |         "        [[[-78.19610376358034, 35.086989862385884],\n",
203 |         "          [-78.19610376358034, 34.735631502732396],\n",
204 |         "          [-77.67974634170534, 34.735631502732396],\n",
205 |         "          [-77.67974634170534, 35.086989862385884]]], None, False),\n",
206 |         "    'Test2': ee.Geometry.Polygon(\n",
207 |         "        [[[-81.59087915420534, 35.84308746418702],\n",
208 |         "          [-81.59087915420534, 35.47711130797561],\n",
209 |         "          [-81.03057641983034, 35.47711130797561],\n",
210 |         "          [-81.03057641983034, 35.84308746418702]]], None, False),\n",
211 |         "    'Test3': ee.Geometry.Polygon(\n",
212 |         "        [[[-78.74447677513596, 36.4941960586897],\n",
213 |         "          [-78.74447677513596, 36.17115435938789],\n",
214 |         "          [-78.21713302513596, 36.17115435938789],\n",
215 |         "          [-78.21713302513596, 36.4941960586897]]], None, False),\n",
216 |         "    'Test4': ee.Geometry.Polygon(\n",
217 |         "        [[[-76.62411544701096, 36.33505523381603],\n",
218 |         "          [-76.62411544701096, 36.03800955668766],\n",
219 |         "          [-76.16818282982346, 36.03800955668766],\n",
220 |         "          [-76.16818282982346, 36.33505523381603]]], None, False)\n",
221 |         "})"
222 |       ],
223 |       "execution_count": null,
224 |       "outputs": []
225 |     },
226 |     {
227 |       "cell_type": "code",
228 |       "metadata": {
229 |         "id": "yA36Bcwfv1U_"
230 |       },
231 |       "source": [
232 |         "# Choose the GEE folder in which to ingest prediction image:\n",
233 |         "aoi = 'Test4'\n",
234 |         "\n",
235 |         "# prediction path\n",
236 |         "test_path = join(FOLDER, PRED_BASE, aoi)\n",
237 |         "\n",
238 |         "# Base file name to use for TFRecord files and assets. The name structure includes:\n",
239 |         "test_image_base = 'unet256_' + aoi\n",
240 |         "\n",
241 |         "# Half this will extend on the sides of each patch.\n",
242 |         "kernel_buffer = [128, 128]\n",
243 |         "\n",
244 |         "test_region = aois[aoi]\n",
245 |         "\n",
246 |         "# find the center of our aoi for map visualization\n",
247 |         "center = test_region.centroid(5).coordinates().getInfo()\n",
248 |         "center.reverse()"
249 |       ],
250 |       "execution_count": null,
251 |       "outputs": []
252 |     },
253 |     {
254 |       "cell_type": "code",
255 |       "metadata": {
256 |         "id": "s7xAe359wG8n"
257 |       },
258 |       "source": [
259 |         "# Create a test image\n",
260 |         "S2 = ee.ImageCollection(\"COPERNICUS/S2\")\n",
261 |         "\n",
262 |         "## Change dates here\n",
263 |         "######\n",
264 |         "begin = '2020-05-01'\n",
265 |         "end = '2020-08-30'\n",
266 |         "######\n",
267 |         "\n",
268 |         "# The image input collection is cloud-masked.\n",
269 |         "filtered = S2.filterDate(begin, end)\\\n",
270 |         ".filterBounds(test_region)\\\n",
271 |         ".filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\\\n",
272 |         ".map(basicQA)\n",
273 |         "\n",
274 |         "# Create a simple median composite to visualize\n",
275 |         "## Change .clip to change test area \n",
276 |         "test = filtered.median().select(BANDS).clip(test_region)\n",
277 |         "\n",
278 |         "# Use folium to visualize the imagery.\n",
279 |         "#mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n",
280 |         "rgbParams = {'bands': ['B4', 'B3', 'B2'],\n",
281 |         "             'min': 0,\n",
282 |         "             'max': 3000}\n",
283 |         "\n",
284 |         "nirParams = {'bands': ['B8', 'B11', 'B12'],\n",
285 |         "             'min': 0,\n",
286 |         "             'max': 3000}\n",
287 |         "\n",
288 |         "\n",
289 |         "## Change coordinates to center map based on aoi used \n",
290 |         "map = folium.Map(location=center)\n",
291 |         "map.add_ee_layer(test, rgbParams, 'Color')\n",
292 |         "map.add_ee_layer(test, nirParams, 'Thermal')\n",
293 |         "\n",
294 |         "map.add_child(folium.LayerControl())\n",
295 |         "map"
296 |       ],
297 |       "execution_count": null,
298 |       "outputs": []
299 |     },
300 |     {
301 |       "cell_type": "code",
302 |       "metadata": {
303 |         "id": "CBW7l5xQxG1Y"
304 |       },
305 |       "source": [
306 |         "# Run the export.\n",
307 |         "## takes some time (~10 min) --> check GEE tasks to see when completed \n",
308 |         "doExport(test, features = BANDS, pred_path = test_path, pred_base = test_image_base, scale = 10, bucket = BUCKET, region = test_region)"
309 |       ],
310 |       "execution_count": null,
311 |       "outputs": []
312 |     },
313 |     {
314 |       "cell_type": "markdown",
315 |       "metadata": {
316 |         "id": "tfg9rxv4xHuS"
317 |       },
318 |       "source": [
319 |         "## Predictions"
320 |       ]
321 |     },
322 |     {
323 |       "cell_type": "markdown",
324 |       "metadata": {
325 |         "id": "vYgzH4DpAw2o"
326 |       },
327 |       "source": [
328 |         "First we load the model structure and weights"
329 |       ]
330 |     },
331 |     {
332 |       "cell_type": "code",
333 |       "metadata": {
334 |         "id": "jd7ysDkezBR1"
335 |       },
336 |       "source": [
337 |         "def get_weighted_bce(y_true,y_pred):\n",
338 |         "  return weighted_bce(y_true, y_pred, 1)\n",
339 |         "m = models.load_model(MODEL_PATH, custom_objects = {'get_weighted_bce': get_weighted_bce})\n",
340 |         "# m = get_model(depth = DEPTH, optim = OPTIMIZER, loss = get_weighted_bce, mets = METRICS, bias = None)\n",
341 |         "m.load_weights(MODEL_WEIGHTS)"
342 |       ],
343 |       "execution_count": null,
344 |       "outputs": []
345 |     },
346 |     {
347 |       "cell_type": "markdown",
348 |       "metadata": {
349 |         "id": "sePpmbMPA0xG"
350 |       },
351 |       "source": [
352 |         "Then generate a file list of our previously exported image data on which we want to make predictions. NOTE: This example reads from Google Cloud Storage, but any means of generating a list of filenames is sufficient"
353 |       ]
354 |     },
355 |     {
356 |       "cell_type": "code",
357 |       "metadata": {
358 |         "id": "OtgnbS3Q1xmy"
359 |       },
360 |       "source": [
361 |         "predFiles = !gsutil ls {join(BUCKET_PATH, test_path, test_image_base + '*.tfrecord.gz')}\n",
362 |         "jsonFile = !gsutil ls {join(BUCKET_PATH, test_path, test_image_base + '*.json')}\n",
363 |         "jsonFile = jsonFile[0]"
364 |       ],
365 |       "execution_count": null,
366 |       "outputs": []
367 |     },
368 |     {
369 |       "cell_type": "code",
370 |       "metadata": {
371 |         "id": "vgyohfwV1rqT"
372 |       },
373 |       "source": [
374 |         "# load our predictions data into a Dataset and inspect the first one\n",
375 |         "predData = makePredDataset(predFiles, BANDS, one_hot = None)\n",
376 |         "iterator = iter(predData)\n",
377 |         "print(iterator.next())"
378 |       ],
379 |       "execution_count": null,
380 |       "outputs": []
381 |     },
382 |     {
383 |       "cell_type": "markdown",
384 |       "metadata": {
385 |         "id": "FG9KksCBBG9F"
386 |       },
387 |       "source": [
388 |         "Generate and plot the output predictions"
389 |       ]
390 |     },
391 |     {
392 |       "cell_type": "code",
393 |       "metadata": {
394 |         "id": "ebY2MYsv18HO"
395 |       },
396 |       "source": [
397 |         "# generate prediction rasters\n",
398 |         "preds = make_array_predictions(imageDataset = predData, model = m, jsonFile = jsonFile)"
399 |       ],
400 |       "execution_count": null,
401 |       "outputs": []
402 |     },
403 |     {
404 |       "cell_type": "code",
405 |       "metadata": {
406 |         "id": "8F1da8OA2CEM"
407 |       },
408 |       "source": [
409 |         "# We can quickly visualize the predictions to see if they look sensible\n",
410 |         "figure = plt.figure(figsize = (12,12))\n",
411 |         "\n",
412 |         "prob = preds[:, :, 0]\n",
413 |         "cls = out_image[:, :, 0]\n",
414 |         "\n",
415 |         "plt.imshow(prob)"
416 |       ],
417 |       "execution_count": null,
418 |       "outputs": []
419 |     },
420 |     {
421 |       "cell_type": "code",
422 |       "metadata": {
423 |         "id": "YBG2Ndga2MJr"
424 |       },
425 |       "source": [
426 |         "# overlay the predicted outputs on the original satellite data map\n",
427 |         "heatmap = folium.raster_layers.ImageOverlay(\n",
428 |         "    image=prob,\n",
429 |         "    bounds= get_img_bounds(prob, jsonFile),\n",
430 |         "    colormap=lambda x: (0.5, 0, 0.5, 1) if x >= 0.9 else (0, 0, 0, 0),\n",
431 |         ")\n",
432 |         "map.add_child(heatmap)\n",
433 |         "map.add_child(folium.LayerControl())\n",
434 |         "map"
435 |       ],
436 |       "execution_count": null,
437 |       "outputs": []
438 |     },
439 |     {
440 |       "cell_type": "markdown",
441 |       "metadata": {
442 |         "id": "7ouKDMpcBJbL"
443 |       },
444 |       "source": [
445 |         "Export and save predictions (optional)"
446 |       ]
447 |     },
448 |     {
449 |       "cell_type": "code",
450 |       "metadata": {
451 |         "id": "8rYA_HkF2kMV"
452 |       },
453 |       "source": [
454 |         "# optionally, write predictions to either tfrecord files (best for re-ingesting into GEE)...\n",
455 |         "write_tfrecord_predictions(predData, m, test_path, test_image_base)\n",
456 |         "#...or a geotiff\n",
457 |         "write_geotiff_predictions(image, jsonFile, '{OUTFILE}'):"
458 |       ],
459 |       "execution_count": null,
460 |       "outputs": []
461 |     }
462 |   ]
463 | }


--------------------------------------------------------------------------------
/demos/Extract_Data_GEE.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "nbformat": 4,
  3 |   "nbformat_minor": 0,
  4 |   "metadata": {
  5 |     "colab": {
  6 |       "name": "Extract_Data_GEE.ipynb",
  7 |       "provenance": []
  8 |     },
  9 |     "kernelspec": {
 10 |       "name": "python3",
 11 |       "display_name": "Python 3"
 12 |     },
 13 |     "language_info": {
 14 |       "name": "python"
 15 |     }
 16 |   },
 17 |   "cells": [
 18 |     {
 19 |       "cell_type": "code",
 20 |       "metadata": {
 21 |         "id": "DzmS6y3XJGEl"
 22 |       },
 23 |       "source": [
 24 |         "#@title Author: Michael Evans { display-mode: \"form\" }\n",
 25 |         "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
 26 |         "# you may not use this file except in compliance with the License.\n",
 27 |         "# You may obtain a copy of the License at\n",
 28 |         "#\n",
 29 |         "# https://www.apache.org/licenses/LICENSE-2.0\n",
 30 |         "#\n",
 31 |         "# Unless required by applicable law or agreed to in writing, software\n",
 32 |         "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
 33 |         "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
 34 |         "# See the License for the specific language governing permissions and\n",
 35 |         "# limitations under the License."
 36 |       ],
 37 |       "execution_count": null,
 38 |       "outputs": []
 39 |     },
 40 |     {
 41 |       "cell_type": "markdown",
 42 |       "metadata": {
 43 |         "id": "TbdLwIXWJQMt"
 44 |       },
 45 |       "source": [
 46 |         "# Introduction\n",
 47 |         "\n",
 48 |         "This notebook demonstrates methods used to acquire training data from Google Earth Engine that can be used to train a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597) using Tensorflow. In this example, we extract 256x256 pixel image chips containing the 3 visible, infrared, and 2 near infrared bands in Sentinel-2 imagery based on [hand-delineated solar array footprints in North Carolina](https://osf.io/ygbwj/). This relatively simple model is a mostly unmodified version of [this example](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb) from the TensorFlow docs."
 49 |       ]
 50 |     },
 51 |     {
 52 |       "cell_type": "code",
 53 |       "metadata": {
 54 |         "id": "S0eAagvtJi2B"
 55 |       },
 56 |       "source": [
 57 |         "from os.path import join\n",
 58 |         "from google.cloud import storage\n",
 59 |         "import ee\n",
 60 |         "from sys import path\n",
 61 |         "import json\n",
 62 |         "import numpy as np\n",
 63 |         "import rasterio as rio\n",
 64 |         "import folium"
 65 |       ],
 66 |       "execution_count": null,
 67 |       "outputs": []
 68 |     },
 69 |     {
 70 |       "cell_type": "code",
 71 |       "metadata": {
 72 |         "id": "Qp7doIHHJnys"
 73 |       },
 74 |       "source": [
 75 |         "## Clone repo containing preprocessing and prediction functions\n",
 76 |         "!git clone https://github.com/mjevans26/Satellite_ComputerVision.git"
 77 |       ],
 78 |       "execution_count": null,
 79 |       "outputs": []
 80 |     },
 81 |     {
 82 |       "cell_type": "code",
 83 |       "metadata": {
 84 |         "id": "0VxQa03hJufS"
 85 |       },
 86 |       "source": [
 87 |         "# Load the necessary modules from repo\n",
 88 |         "path.append('/content/Satellite_ComputerVision')\n",
 89 |         "from utils.clouds import basicQA, maskTOA, maskSR"
 90 |       ],
 91 |       "execution_count": null,
 92 |       "outputs": []
 93 |     },
 94 |     {
 95 |       "cell_type": "code",
 96 |       "metadata": {
 97 |         "id": "j6JsEWUZJMrx"
 98 |       },
 99 |       "source": [
100 |         "# Import, authenticate and initialize the Earth Engine library.\n",
101 |         "ee.Authenticate()\n",
102 |         "ee.Initialize()"
103 |       ],
104 |       "execution_count": null,
105 |       "outputs": []
106 |     },
107 |     {
108 |       "cell_type": "code",
109 |       "metadata": {
110 |         "id": "FxE4n2_3J4Uz"
111 |       },
112 |       "source": [
113 |         "# Folium setup.\n",
114 |         "\n",
115 |         "print(folium.__version__)\n",
116 |         "\n",
117 |         "# Define a method for displaying Earth Engine image tiles to a folium map.\n",
118 |         "def add_ee_layer(self, ee_image_object, vis_params, name):\n",
119 |         "  map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n",
120 |         "  folium.raster_layers.TileLayer(\n",
121 |         "    tiles = map_id_dict['tile_fetcher'].url_format,\n",
122 |         "    attr = \"Map Data © Google Earth Engine\",\n",
123 |         "    name = name,\n",
124 |         "    overlay = True,\n",
125 |         "    control = True\n",
126 |         "  ).add_to(self)\n",
127 |         "\n",
128 |         "# Add EE drawing method to folium.\n",
129 |         "folium.Map.add_ee_layer = add_ee_layer"
130 |       ],
131 |       "execution_count": null,
132 |       "outputs": []
133 |     },
134 |     {
135 |       "cell_type": "code",
136 |       "metadata": {
137 |         "id": "nYwdLS8tKA-Y"
138 |       },
139 |       "source": [
140 |         "# Specify names locations for outputs in Cloud Storage. \n",
141 |         "BUCKET = '{YOUR_GCS BUCKET HERE}'\n",
142 |         "BUCKET_PATH = join('gs://', BUCKET)\n",
143 |         "\n",
144 |         "FOLDER = 'NC_solar'\n",
145 |         "PRED_BASE = 'data/predict'\n",
146 |         "TRAIN_BASE = 'data/training'\n",
147 |         "EVAL_BASE = 'data/eval'\n",
148 |         "\n",
149 |         "# Specify inputs (Sentinel bands) to the model and the response variable.\n",
150 |         "opticalBands = ['B2', 'B3', 'B4']\n",
151 |         "thermalBands = ['B8', 'B11', 'B12']\n",
152 |         "\n",
153 |         "BANDS = opticalBands + thermalBands\n",
154 |         "RESPONSE = 'landcover'\n",
155 |         "FEATURES = BANDS + [RESPONSE]\n",
156 |         "SCENEID = 'SENSING_ORBIT_NUMBER'\n",
157 |         "\n",
158 |         "# Specify the size and shape of patches expected by the model.\n",
159 |         "KERNEL_SIZE = 256\n",
160 |         "KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n",
161 |         "COLUMNS = [\n",
162 |         "  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n",
163 |         "]\n",
164 |         "FEATURES_DICT = dict(zip(FEATURES, COLUMNS))"
165 |       ],
166 |       "execution_count": null,
167 |       "outputs": []
168 |     },
169 |     {
170 |       "cell_type": "markdown",
171 |       "metadata": {
172 |         "id": "r-5l_EfywDvK"
173 |       },
174 |       "source": [
175 |         "# Imagery\n",
176 |         "\n",
177 |         "Access and process the imagery to use for predictor variables using Google Earth Engine.  This is a three-month, cloud-free, Sentinel-2 composite corresponding to the latest date from which we have confirmed training data.  Display it in the notebook for a sanity check."
178 |       ]
179 |     },
180 |     {
181 |       "cell_type": "code",
182 |       "metadata": {
183 |         "id": "btEC3dluJfGq"
184 |       },
185 |       "source": [
186 |         "# Use Sentinel-2 surface reflectance data.\n",
187 |         "S2 = ee.ImageCollection(\"COPERNICUS/S2\")\n",
188 |         "# Grab a feature corresponding to our study area - North Carolina\n",
189 |         "states = ee.FeatureCollection(\"TIGER/2016/States\")\n",
190 |         "nc = states.filter(ee.Filter.eq('NAME', 'North Carolina')).geometry().buffer(2500)\n",
191 |         "begin = '2019-01-01'\n",
192 |         "end = '2020-03-01'\n",
193 |         "\n",
194 |         "# The image input collection is cloud-masked.\n",
195 |         "filtered = S2.filterDate(begin, end)\\\n",
196 |         ".filterBounds(nc)\\\n",
197 |         ".filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\n",
198 |         "\n",
199 |         "\n",
200 |         "# Create a simple median composite to visualize\n",
201 |         "winter = filtered.filterDate('2019-12-01', '2020-02-28').map(basicQA).median().select(BANDS).clip(nc)\n",
202 |         "spring = filtered.filterDate('2019-03-01', '2019-05-31').map(basicQA).median().select(BANDS).clip(nc)\n",
203 |         "summer = filtered.filterDate('2019-06-01', '2019-08-31').map(basicQA).median().select(BANDS).clip(nc)\n",
204 |         "fall = filtered.filterDate('2019-09-01', '2019-11-30').map(basicQA).median().select(BANDS).clip(nc)\n",
205 |         "\n",
206 |         "# Use folium to visualize the imagery.\n",
207 |         "#mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n",
208 |         "rgbParams = {'bands': ['B4', 'B3', 'B2'],\n",
209 |         "             'min': 0,\n",
210 |         "             'max': 0.3}\n",
211 |         "\n",
212 |         "nirParams = {'bands': ['B8', 'B11', 'B12'],\n",
213 |         "             'min': 0,\n",
214 |         "             'max': 0.3}\n",
215 |         "\n",
216 |         "map = folium.Map(location=[35.402, -78.376])\n",
217 |         "map.add_ee_layer(spring, rgbParams, 'Color')\n",
218 |         "map.add_ee_layer(spring, nirParams, 'Thermal')\n",
219 |         "\n",
220 |         "map.add_child(folium.LayerControl())\n",
221 |         "map"
222 |       ],
223 |       "execution_count": null,
224 |       "outputs": []
225 |     },
226 |     {
227 |       "cell_type": "markdown",
228 |       "metadata": {
229 |         "id": "wpCLxJLVwTHw"
230 |       },
231 |       "source": [
232 |         "Prepare the response variable.  This is the footprints of ground mounted solar arrays as of 2019. These polygons have been loaded into GEE as a FeatureCollection asset, and coded into a background class [0] and a target class [1].Display on the map to verify."
233 |       ]
234 |     },
235 |     {
236 |       "cell_type": "code",
237 |       "metadata": {
238 |         "id": "pS6FWd90wSDW"
239 |       },
240 |       "source": [
241 |         "def set_landcover(ft):\n",
242 |         "  \"\"\"\n",
243 |         "  Add a property to a feature and set it to 1\n",
244 |         "  Parameters:\n",
245 |         "    ft (ee.Feature): feature to have property added\n",
246 |         "  Returns:\n",
247 |         "    ee.Feature: input feature with 'label' property set to 1\n",
248 |         "  \"\"\"\n",
249 |         "  return ft.set('landcover', 1)\n",
250 |         "\n",
251 |         "# Get solar footprints data from our GEE Asset\n",
252 |         "NC_solar_footprints = ee.FeatureCollection(\"users/defendersofwildlifeGIS/NC/NC_solar_footprints\")\n",
253 |         "# Label each polygon with property 'label' equal to 1\n",
254 |         "NC_solar_footprints = NC_solar_footprints.map(set_landcover)\n",
255 |         "# Create an image with all pixels equal to 0\n",
256 |         "blankimg = ee.Image.constant(0)\n",
257 |         "# Convert solar footprints to an image (band value will be 1 based on 'label')\n",
258 |         "solar_footprint = NC_solar_footprints.reduceToImage(['landcover'], ee.Reducer.first())\n",
259 |         "# Convert pixels of blank image to 1 where the values of the footprint image are 1\n",
260 |         "# and rename to 'landcover'\n",
261 |         "labelimg = blankimg.where(solar_footprint, solar_footprint).rename('landcover')\n",
262 |         "\n",
263 |         "solarParams = {'bands': 'landcover', 'min':0, 'max': 1}\n",
264 |         "\n",
265 |         "map = folium.Map(location = [35.402, -78.376])\n",
266 |         "map.add_ee_layer(labelimg,  solarParams, 'Solar footprint')\n",
267 |         "map.add_child(folium.LayerControl())\n",
268 |         "map"
269 |       ],
270 |       "execution_count": null,
271 |       "outputs": []
272 |     },
273 |     {
274 |       "cell_type": "markdown",
275 |       "metadata": {
276 |         "id": "fjxyM6Lswn0n"
277 |       },
278 |       "source": [
279 |         "Use some pre-made geometries to sample the stack in strategic locations.  We constrain sampling to occur within 10km of mapped solar arrays. Because our target features are small and sparse, relative to the landscape, we also guide sampling based on their centroids to ensure that we get training data for solar arrays."
280 |       ]
281 |     },
282 |     {
283 |       "cell_type": "code",
284 |       "metadata": {
285 |         "id": "B-xg0yQXwmTJ"
286 |       },
287 |       "source": [
288 |         "def buff(ft):\n",
289 |         "  return ft.buffer(10000)\n",
290 |         "\n",
291 |         "def centroid(ft):\n",
292 |         "  return ft.centroid()\n",
293 |         "\n",
294 |         "centroids = NC_solar_footprints.map(centroid)\n",
295 |         "studyArea = NC_solar_footprints.map(buff).union()\n",
296 |         "studyImage = ee.Image(0).byte().paint(studyArea, 1)\n",
297 |         "studyImage = studyImage.updateMask(studyImage)\n",
298 |         "centroids = centroids.randomColumn('random')\n",
299 |         "\n",
300 |         "aoiParams = {'min':0, 'max': 1, 'palette': ['red']}\n",
301 |         "map = folium.Map(location=[35.402, -78.376], zoom_start=8)\n",
302 |         "map.add_ee_layer(studyImage, aoiParams, 'Sampling area')\n",
303 |         "map.add_child(folium.LayerControl())\n",
304 |         "map"
305 |       ],
306 |       "execution_count": null,
307 |       "outputs": []
308 |     },
309 |     {
310 |       "cell_type": "markdown",
311 |       "metadata": {
312 |         "id": "2_Ts4CAYwhv1"
313 |       },
314 |       "source": [
315 |         "# Sampling\n",
316 |         "\n",
317 |         "If the mapped data look reasonable, we use a 2-stage approach to sample 256-256 pixel image 'chips' for use in model training.\n",
318 |         "1.) sample from the centroid of each polygon to create 'positive' examples.\n",
319 |         "2.) sample the image at random points to generate 'negative' examples.\n",
320 |         "\n",
321 |         "To sample chips we create an array image in which each pixel contains a nested list of the surrounding 256x256 pixel values. We can sample this array image at points, to get all the pixels in a 256x256 neighborhood at each point.  It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record.  You do NOT need to export each training/testing patch to a different image.  Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error.  Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export."
322 |       ]
323 |     },
324 |     {
325 |       "cell_type": "code",
326 |       "metadata": {
327 |         "id": "J8me8XqzzEWP"
328 |       },
329 |       "source": [
330 |         "def make_array_image(features, labels, aoi):\n",
331 |         "  \"\"\"Combine predictor bands and label band into an array image\n",
332 |         "  Parameters:\n",
333 |         "    features (ee.Image): image containing bands to be used as predictor variables in model\n",
334 |         "    labels (ee.Image): binary[0,1], single-band image indicating presence (1) and absence (0) of target features\n",
335 |         "    aoi (ee.Geometry): bounds\n",
336 |         "  Return:\n",
337 |         "    ee.Image: array image\n",
338 |         "  \"\"\"\n",
339 |         "  \n",
340 |         "  featureStack = ee.Image.cat([features, labels]).clip(aoi)\n",
341 |         "\n",
342 |         "  ls = ee.List.repeat(1, KERNEL_SIZE)\n",
343 |         "  lists = ee.List.repeat(ls, KERNEL_SIZE)\n",
344 |         "  kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)\n",
345 |         "\n",
346 |         "  arrays = featureStack.neighborhoodToArray(kernel)\n",
347 |         "  return arrays"
348 |       ],
349 |       "execution_count": null,
350 |       "outputs": []
351 |     },
352 |     {
353 |       "cell_type": "markdown",
354 |       "metadata": {
355 |         "id": "kD62TGagw3Im"
356 |       },
357 |       "source": [
358 |         "First we'll collect image patches from the centroids of known solar array locations"
359 |       ]
360 |     },
361 |     {
362 |       "cell_type": "code",
363 |       "metadata": {
364 |         "id": "NT4YxEoMw0qK"
365 |       },
366 |       "source": [
367 |         "# Add a random column to the centroids\n",
368 |         "S = centroids.size().getInfo()\n",
369 |         "centroidList = centroids.toList(S)"
370 |       ],
371 |       "execution_count": null,
372 |       "outputs": []
373 |     },
374 |     {
375 |       "cell_type": "code",
376 |       "metadata": {
377 |         "id": "Jn-RyVA3xDEi"
378 |       },
379 |       "source": [
380 |         "#@title Centroids slicing\n",
381 |         "# Get samples from delineated features using slice() on a feature collection\n",
382 |         "\n",
383 |         "x = 0\n",
384 |         "\n",
385 |         "# set the number of samples to include in a single export. may need to experiment with this parameter to avoid memory issues\n",
386 |         "n = 25\n",
387 |         "\n",
388 |         "while x < S:\n",
389 |         "  # select a subset of 25 centroids\n",
390 |         "  subset = ee.FeatureCollection(centroidList.slice(x, x+n))\n",
391 |         "  # buffer those\n",
392 |         "  studyArea = subset.map(buff).union()\n",
393 |         "  arrays = make_array_image(fall.select(BANDS), labelimg.select(RESPONSE), studyArea)\n",
394 |         "  sample = arrays.sampleRegions(\n",
395 |         "      collection = subset.geometry(),\n",
396 |         "      scale = 10,\n",
397 |         "      tileScale = 12\n",
398 |         "  )\n",
399 |         "  x += n\n",
400 |         "                                  \n",
401 |         "  # assign a random number to samples and create a 70/30 train/test split\n",
402 |         "  sample = sample.randomColumn('random')\n",
403 |         "  training = sample.filter(ee.Filter.gte('random', 0.3))\n",
404 |         "  testing = sample.filter(ee.Filter.lt('random', 0.3))\n",
405 |         "\n",
406 |         "  desc = 'UNET_' + str(KERNEL_SIZE) + '_centFall' + str(x)\n",
407 |         "  task = ee.batch.Export.table.toCloudStorage(\n",
408 |         "    collection = training,\n",
409 |         "    description = desc, \n",
410 |         "    bucket = BUCKET, \n",
411 |         "    fileNamePrefix = join(FOLDER, TRAIN_BASE, desc),\n",
412 |         "    fileFormat = 'TFRecord',\n",
413 |         "    selectors = BANDS + [RESPONSE]\n",
414 |         "  )\n",
415 |         "  task.start()\n",
416 |         "\n",
417 |         "  desc = 'UNET_' + str(KERNEL_SIZE) + '_centFall' + str(x)\n",
418 |         "  task = ee.batch.Export.table.toCloudStorage(\n",
419 |         "    collection = testing,\n",
420 |         "    description = desc, \n",
421 |         "    bucket = BUCKET, \n",
422 |         "    fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n",
423 |         "    fileFormat = 'TFRecord',\n",
424 |         "    selectors = BANDS + [RESPONSE]\n",
425 |         "  )\n",
426 |         "  task.start()"
427 |       ],
428 |       "execution_count": null,
429 |       "outputs": []
430 |     },
431 |     {
432 |       "cell_type": "markdown",
433 |       "metadata": {
434 |         "id": "gwHW6fKTxVk7"
435 |       },
436 |       "source": [
437 |         "Generate random samples within the buffered area"
438 |       ]
439 |     },
440 |     {
441 |       "cell_type": "code",
442 |       "metadata": {
443 |         "id": "WIv_-Mc2xRZ8"
444 |       },
445 |       "source": [
446 |         "#@title Random sampling\n",
447 |         "\n",
448 |         "# Define sample sizes for shards and chunks. \n",
449 |         "# These numbers determined experimentally.\n",
450 |         "n = 30 # Number of shards in each chunk.\n",
451 |         "N = 300 # Total sample size in each chunk.\n",
452 |         "C = 2# Number of chunks\n",
453 |         "\n",
454 |         "iterator = iter(range(N*C))\n",
455 |         "arrays = make_array_image(fall.select(BANDS),\n",
456 |         "                          labelimg.select(RESPONSE),\n",
457 |         "                          studyArea)\n",
458 |         "for c in range(C):\n",
459 |         "  geomSample = ee.FeatureCollection([])\n",
460 |         "\n",
461 |         "  for i in range(n):\n",
462 |         "    seed = next(iterator)\n",
463 |         "    sample = arrays.sample(\n",
464 |         "        region = studyArea,\n",
465 |         "        scale = 10,\n",
466 |         "        numPixels = N/n,\n",
467 |         "        seed = seed,\n",
468 |         "        tileScale = 8\n",
469 |         "    )\n",
470 |         "    geomSample = geomSample.merge(sample)\n",
471 |         "\n",
472 |         "  #divide samples into training and evaluation data\n",
473 |         "  geomSample = geomSample.randomColumn('random')\n",
474 |         "  training = geomSample.filter(ee.Filter.gte('random', 0.3))\n",
475 |         "  testing = geomSample.filter(ee.Filter.lt('random', 0.3))\n",
476 |         "\n",
477 |         "  desc = 'UNET_' + str(KERNEL_SIZE) + '_randFall'+str(c)\n",
478 |         "  task = ee.batch.Export.table.toCloudStorage(\n",
479 |         "    collection = training,\n",
480 |         "    description = desc, \n",
481 |         "    bucket = BUCKET, \n",
482 |         "    fileNamePrefix = join(FOLDER, TRAIN_BASE, desc),\n",
483 |         "    fileFormat = 'TFRecord',\n",
484 |         "    selectors = BANDS + [RESPONSE]\n",
485 |         "  )\n",
486 |         "  task.start()\n",
487 |         "\n",
488 |         "  desc = 'UNET_' + str(KERNEL_SIZE) + '_randFall' + str(c)\n",
489 |         "  task = ee.batch.Export.table.toCloudStorage(\n",
490 |         "    collection = testing,\n",
491 |         "    description = desc, \n",
492 |         "    bucket = BUCKET, \n",
493 |         "    fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n",
494 |         "    fileFormat = 'TFRecord',\n",
495 |         "    selectors = BANDS + [RESPONSE]\n",
496 |         "  )\n",
497 |         "  task.start() "
498 |       ],
499 |       "execution_count": null,
500 |       "outputs": []
501 |     }
502 |   ]
503 | }


--------------------------------------------------------------------------------
/re-train.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "code",
  5 |       "source": [
  6 |         "from azureml.core import Experiment, Environment, Workspace, Datastore, Dataset, Model, ScriptRunConfig, Run\n",
  7 |         "import os\n",
  8 |         "import glob\n",
  9 |         "# get the current workspace\n",
 10 |         "ws = Workspace.from_config()"
 11 |       ],
 12 |       "outputs": [],
 13 |       "execution_count": 15,
 14 |       "metadata": {
 15 |         "gather": {
 16 |           "logged": 1684345750891
 17 |         }
 18 |       }
 19 |     },
 20 |     {
 21 |       "cell_type": "code",
 22 |       "source": [
 23 |         "%cd Satellite_ComputerVision\n",
 24 |         "!git pull\n",
 25 |         "%cd .."
 26 |       ],
 27 |       "outputs": [
 28 |         {
 29 |           "output_type": "stream",
 30 |           "name": "stdout",
 31 |           "text": "Already up-to-date.\r\n/mnt/batch/tasks/shared/LS_root/mounts/clusters/test-compute-instance/code/Users/mevans\n"
 32 |         }
 33 |       ],
 34 |       "execution_count": 18,
 35 |       "metadata": {
 36 |         "collapsed": true,
 37 |         "jupyter": {
 38 |           "source_hidden": false,
 39 |           "outputs_hidden": false
 40 |         },
 41 |         "nteract": {
 42 |           "transient": {
 43 |             "deleting": false
 44 |           }
 45 |         }
 46 |       }
 47 |     },
 48 |     {
 49 |       "cell_type": "code",
 50 |       "source": [
 51 |         "# access our registered data share containing image data in this workspace\n",
 52 |         "datastore = Datastore.get(workspace = ws, datastore_name = 'solardatablob')\n",
 53 |         "\n",
 54 |         "cpk_train_path = (datastore, 'CPK_solar/data/training/')\n",
 55 |         "cpk_eval_path = (datastore, 'CPK_solar/data/eval/')\n",
 56 |         "\n",
 57 |         "nc_train_path = (datastore, 'NC_solar/data/training/')\n",
 58 |         "nc_eval_path = (datastore, 'NC_solar/data/eval/')\n",
 59 |         "\n",
 60 |         "test_path = (datastore, 'CPK_solar/data/predict/testpred5')\n",
 61 |         "\n",
 62 |         "# train_dataset = Dataset.File.from_files(path = [cpk_train_path])\n",
 63 |         "# eval_dataset = Dataset.File.from_files(path = [cpk_eval_path])\n",
 64 |         "\n",
 65 |         "# nc_train_dataset = Dataset.File.from_files(path = [nc_train_path])\n",
 66 |         "# nc_eval_dataset = Dataset.File.from_files(path = [nc_eval_path])\n",
 67 |         "\n",
 68 |         "# when we combine datasets the selected directories and relative paths to the datastore are brought in\n",
 69 |         "# mount folder\n",
 70 |         "# |-cddatafilestore\n",
 71 |         "# | |-GEE\n",
 72 |         "# | | |-training\n",
 73 |         "# | | |-eval\n",
 74 |         "# | |-Onera\n",
 75 |         "# | | |-training\n",
 76 |         "# | | |-eval\n",
 77 |         "\n",
 78 |         "train_dataset = Dataset.File.from_files(path = [cpk_train_path, nc_train_path])\n",
 79 |         "eval_dataset = Dataset.File.from_files(path = [cpk_eval_path, nc_eval_path])\n",
 80 |         "test_dataset = Dataset.File.from_files(path = [test_path])"
 81 |       ],
 82 |       "outputs": [],
 83 |       "execution_count": 16,
 84 |       "metadata": {
 85 |         "collapsed": true,
 86 |         "jupyter": {
 87 |           "source_hidden": false,
 88 |           "outputs_hidden": false
 89 |         },
 90 |         "nteract": {
 91 |           "transient": {
 92 |             "deleting": false
 93 |           }
 94 |         },
 95 |         "gather": {
 96 |           "logged": 1684345752282
 97 |         }
 98 |       }
 99 |     },
100 |     {
101 |       "cell_type": "code",
102 |       "source": [
103 |         "# FInd the run corresponding to the model we want to register\n",
104 |         "# run_id = 'solar-nc-cpk_1624989679_f59da7cf'\n",
105 |         "run_id = 'solar-nc-cpk_1684259900_b71cc594'\n",
106 |         "run = ws.get_run(run_id)"
107 |       ],
108 |       "outputs": [],
109 |       "execution_count": 18,
110 |       "metadata": {
111 |         "collapsed": true,
112 |         "jupyter": {
113 |           "source_hidden": false,
114 |           "outputs_hidden": false
115 |         },
116 |         "nteract": {
117 |           "transient": {
118 |             "deleting": false
119 |           }
120 |         },
121 |         "gather": {
122 |           "logged": 1684345784093
123 |         }
124 |       }
125 |     },
126 |     {
127 |       "cell_type": "code",
128 |       "source": [
129 |         "model_name = 'solar_May23'"
130 |       ],
131 |       "outputs": [],
132 |       "execution_count": 17,
133 |       "metadata": {
134 |         "collapsed": true,
135 |         "jupyter": {
136 |           "source_hidden": false,
137 |           "outputs_hidden": false
138 |         },
139 |         "nteract": {
140 |           "transient": {
141 |             "deleting": false
142 |           }
143 |         },
144 |         "gather": {
145 |           "logged": 1684345762622
146 |         }
147 |       }
148 |     },
149 |     {
150 |       "cell_type": "code",
151 |       "source": [
152 |         "model = run.register_model(model_name=model_name,\n",
153 |         "                           tags=run.tags,\n",
154 |         "                           description = 'UNET model delineating ground mounted solar arrays in S2 imagery. Trained on multi-season data from Chesapeake Bay and NC',\n",
155 |         "                           model_path='outputs/',\n",
156 |         "                           model_framework = 'Tensorflow',\n",
157 |         "                           model_framework_version= '2.0',\n",
158 |         "                           datasets = [('training', train_dataset), ('evaluation', eval_dataset), ('testing', test_dataset)])\n",
159 |         "print(model.name, model.id, model.version, sep='\\t')"
160 |       ],
161 |       "outputs": [
162 |         {
163 |           "output_type": "stream",
164 |           "name": "stdout",
165 |           "text": "solar_May23\tsolar_May23:2\t2\n"
166 |         }
167 |       ],
168 |       "execution_count": 19,
169 |       "metadata": {
170 |         "collapsed": true,
171 |         "jupyter": {
172 |           "source_hidden": false,
173 |           "outputs_hidden": false
174 |         },
175 |         "nteract": {
176 |           "transient": {
177 |             "deleting": false
178 |           }
179 |         },
180 |         "gather": {
181 |           "logged": 1684345790955
182 |         }
183 |       }
184 |     },
185 |     {
186 |       "cell_type": "code",
187 |       "source": [
188 |         "# use the azure folder as our script folder\n",
189 |         "source = 'Satellite_ComputerVision'\n",
190 |         "util_folder = 'utils'\n",
191 |         "script_folder = f'{source}/azure'\n",
192 |         "script_file = 'train_solar.py'"
193 |       ],
194 |       "outputs": [],
195 |       "execution_count": 17,
196 |       "metadata": {
197 |         "collapsed": true,
198 |         "jupyter": {
199 |           "source_hidden": false,
200 |           "outputs_hidden": false
201 |         },
202 |         "nteract": {
203 |           "transient": {
204 |             "deleting": false
205 |           }
206 |         },
207 |         "gather": {
208 |           "logged": 1638378482125
209 |         }
210 |       }
211 |     },
212 |     {
213 |       "cell_type": "code",
214 |       "source": [
215 |         "# define the compute target\n",
216 |         "ws.compute_targets\n",
217 |         "mevansGPU = ws.compute_targets['mevansGPU']"
218 |       ],
219 |       "outputs": [],
220 |       "execution_count": 20,
221 |       "metadata": {
222 |         "collapsed": true,
223 |         "jupyter": {
224 |           "source_hidden": false,
225 |           "outputs_hidden": false
226 |         },
227 |         "nteract": {
228 |           "transient": {
229 |             "deleting": false
230 |           }
231 |         },
232 |         "gather": {
233 |           "logged": 1684345802021
234 |         }
235 |       }
236 |     },
237 |     {
238 |       "cell_type": "code",
239 |       "source": [
240 |         "experiment_name = 'solar-nc-cpk'\n",
241 |         "exp = Experiment(workspace = ws, name = experiment_name)"
242 |       ],
243 |       "outputs": [],
244 |       "execution_count": 22,
245 |       "metadata": {
246 |         "collapsed": true,
247 |         "jupyter": {
248 |           "source_hidden": false,
249 |           "outputs_hidden": false
250 |         },
251 |         "nteract": {
252 |           "transient": {
253 |             "deleting": false
254 |           }
255 |         },
256 |         "gather": {
257 |           "logged": 1684345807382
258 |         }
259 |       }
260 |     },
261 |     {
262 |       "cell_type": "code",
263 |       "source": [
264 |         "custom_env = Environment.from_docker_image(\r\n",
265 |         "    name = 'tf_training',\r\n",
266 |         "    image = 'mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.2-cudnn8-ubuntu20.04:20221010.v1',\r\n",
267 |         "    container_registry=None,\r\n",
268 |         "    conda_specification='/mnt/batch/tasks/shared/LS_root/mounts/clusters/mevans1/code/Users/mevans/Solar_UNet/conda_env-copy.yml',\r\n",
269 |         "    pip_requirements=None)"
270 |       ],
271 |       "outputs": [],
272 |       "execution_count": 21,
273 |       "metadata": {
274 |         "jupyter": {
275 |           "source_hidden": false,
276 |           "outputs_hidden": false
277 |         },
278 |         "nteract": {
279 |           "transient": {
280 |             "deleting": false
281 |           }
282 |         },
283 |         "gather": {
284 |           "logged": 1684345805116
285 |         }
286 |       }
287 |     },
288 |     {
289 |       "cell_type": "code",
290 |       "source": [
291 |         "RESPONSE = 'landcover'\n",
292 |         "args = [\n",
293 |         "    '--train_data', train_dataset.as_mount(),\n",
294 |         "    '--eval_data', eval_dataset.as_mount(),\n",
295 |         "    '--test_data', test_dataset.as_mount(),\n",
296 |         "    '--model_id', model_name,\n",
297 |         "    '--weights', '[1.0, 1.0]',\n",
298 |         "    '--bias', 0,\n",
299 |         "    '-lr', 0.0005,\n",
300 |         "    '--epochs', 200,\n",
301 |         "    '--epoch_start', 150,\n",
302 |         "    '--batch', 16,\n",
303 |         "    '--size', 11020,\n",
304 |         "    '--kernel_size', 256,\n",
305 |         "    '--response', f'{RESPONSE}',\n",
306 |         "    '--bands', '[\"B2\", \"B3\", \"B4\", \"B8\", \"B11\", \"B12\"]',\n",
307 |         "    '--splits', '[0]']\n",
308 |         "\n",
309 |         "src = ScriptRunConfig(source_directory='azure',\n",
310 |         "                      script='train_solar.py', \n",
311 |         "                      arguments=args,\n",
312 |         "                      compute_target=mevansGPU,\n",
313 |         "                      environment=custom_env)"
314 |       ],
315 |       "outputs": [],
316 |       "execution_count": 23,
317 |       "metadata": {
318 |         "collapsed": true,
319 |         "jupyter": {
320 |           "source_hidden": false,
321 |           "outputs_hidden": false
322 |         },
323 |         "nteract": {
324 |           "transient": {
325 |             "deleting": false
326 |           }
327 |         },
328 |         "gather": {
329 |           "logged": 1684345941382
330 |         }
331 |       }
332 |     },
333 |     {
334 |       "cell_type": "code",
335 |       "source": [
336 |         "#  run the training job\n",
337 |         "run = exp.submit(config=src, tags = dict({'splits':'None', 'model':'Unet', 'dataset':'NC CPK S2', 'normalization':'S2 moments', 'epochs':'150-200'}))\n",
338 |         "run"
339 |       ],
340 |       "outputs": [
341 |         {
342 |           "output_type": "execute_result",
343 |           "execution_count": 24,
344 |           "data": {
345 |             "text/plain": "Run(Experiment: solar-nc-cpk,\nId: solar-nc-cpk_1684345980_10f4f4d2,\nType: azureml.scriptrun,\nStatus: Starting)",
346 |             "text/html": "<table style=\"width:100%\"><tr><th>Experiment</th><th>Id</th><th>Type</th><th>Status</th><th>Details Page</th><th>Docs Page</th></tr><tr><td>solar-nc-cpk</td><td>solar-nc-cpk_1684345980_10f4f4d2</td><td>azureml.scriptrun</td><td>Starting</td><td><a href=\"https://ml.azure.com/runs/solar-nc-cpk_1684345980_10f4f4d2?wsid=/subscriptions/76630eb1-4c8e-4e5f-82c7-f8676ff25bf0/resourcegroups/cic_ai/workspaces/landcover-ai&amp;tid=2a55ee5f-4b13-43bb-b2d8-0c693229ec8a\" target=\"_blank\" rel=\"noopener\">Link to Azure Machine Learning studio</a></td><td><a href=\"https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.script_run.ScriptRun?view=azure-ml-py\" target=\"_blank\" rel=\"noopener\">Link to Documentation</a></td></tr></table>"
347 |           },
348 |           "metadata": {}
349 |         },
350 |         {
351 |           "output_type": "stream",
352 |           "name": "stderr",
353 |           "text": "Bad pipe message: %s [b'8\\xde(\\xabdJN#r\\xf9\\x05\\xd7\\xe1h\\xee\\x83\\xac\\xda \\xe7\\x80\\x7fCu\\x10\\xe4\\xd9\\x94\\xc0\\xbdj\\xc4\\xd9\\xcb\\x18/\\xde\\x06M\\xfe\\xff\\xe6\\xd0\\x9a\\xf4m\\x08\\xcb\\x8f\\x93p\\x00\\x08\\x13\\x02\\x13\\x03\\x13\\x01\\x00\\xff\\x01\\x00\\x00\\x8f\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x00\\x1e\\x00\\x1c\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08\\x08\\t\\x08\\n\\x08\\x0b\\x08\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06\\x01\\x00+\\x00\\x03\\x02\\x03\\x04']\nBad pipe message: %s [b\"\\x86o~1o\\x1bu.\\xf9\\xdd\\xc4\\xde0\\x92\\xfd\\x18$\\x8d\\x00\\x00|\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0#\\xc0'\\x00g\\x00@\\xc0\\n\\xc0\\x14\\x009\\x008\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00<\\x005\\x00/\\x00\\x9a\\x00\\x99\\xc0\\x07\\xc0\\x11\\x00\\x96\\x00\\x05\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08\\x08\\t\\x08\\n\\x08\\x0b\\x08\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06\\x01\\x03\\x03\\x02\"]\nBad pipe message: %s [b'\\x01\\x02', b'', b'\\x02']\nBad pipe message: %s [b'\\x05\\x02\\x06']\nBad pipe message: %s [b'My\\xef\\x0c\\xa9\\x9eQ\\xc4)d6t\\xc2pc[y\\xe3\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0', b'\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0']\nBad pipe message: %s [b'\\x16\\x00\\x13\\x00\\x10\\x00\\r']\nBad pipe message: %s [b'\\xe4\\x12,{\\x15\\x94\\xb4\\x11\\xc9\\x13\\xb1\\xc4\\xb9\\xfa4\\x1a\\xebf\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x00']\nBad pipe message: %s [b'\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0']\nBad pipe message: %s [b'\\x16\\x00\\x18\\xc0\\x0c\\xc0']\nBad pipe message: %s [b'\\x05']\nBad pipe message: %s [b\"\\xdbvN\\x0c\\xe8D{\\x910\\x1c\\xd0V$B2\\x8d\\xd0\\xc0\\x00\\x00\\x86\\xc00\\xc0,\\xc0(\\xc0$\\xc0\\x14\\xc0\\n\\x00\\xa5\\x00\\xa3\\x00\\xa1\\x00\\x9f\\x00k\\x00j\\x00i\\x00h\\x009\\x008\\x007\\x006\\xc02\\xc0.\\xc0*\\xc0&\\xc0\\x0f\\xc0\\x05\\x00\\x9d\\x00=\\x005\\xc0/\\xc0+\\xc0'\\xc0#\\xc0\\x13\\xc0\\t\\x00\\xa4\\x00\\xa2\\x00\\xa0\\x00\\x9e\\x00g\\x00@\\x00?\\x00>\\x003\\x002\\x001\\x000\\xc01\\xc0-\\xc0)\\xc0%\\xc0\\x0e\\xc0\\x04\\x00\\x9c\\x00<\\x00/\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00\\x96\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\x00\\xff\\x02\\x01\\x00\\x00g\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x1c\\x00\\x1a\\x00\\x17\\x00\\x19\\x00\\x1c\\x00\\x1b\\x00\\x18\\x00\\x1a\\x00\\x16\\x00\\x0e\\x00\\r\\x00\\x0b\\x00\\x0c\\x00\\t\\x00\\n\\x00#\\x00\\x00\\x00\\r\\x00 \\x00\\x1e\", b'\\x06\\x02\\x06\\x03\\x05', b'', b'\\x03', b'\\x04\\x02\\x04', b'\\x01\\x03', b'\\x03', b'\\x02', b'\\x03']\nBad pipe message: %s [b'\\x02\\xc2~\\xda1\\xee\\xd4\\x9c\\xf3\\x08\\xf6']\nBad pipe message: %s [b\"O\\x11L,\\x99\\x00\\x00\\xf4\\xc00\\xc0,\\xc0(\\xc0$\\xc0\\x14\\xc0\\n\\x00\\xa5\\x00\\xa3\\x00\\xa1\\x00\\x9f\\x00k\\x00j\\x00i\\x00h\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00\\xa7\\x00m\\x00:\\x00\\x89\\xc02\\xc0.\\xc0*\\xc0&\\xc0\\x0f\\xc0\\x05\\x00\\x9d\\x00=\\x005\\x00\\x84\\xc0/\\xc0+\\xc0'\\xc0#\\xc0\\x13\\xc0\\t\\x00\\xa4\\x00\\xa2\\x00\\xa0\\x00\\x9e\\x00g\\x00@\"]\nBad pipe message: %s [b\"Es\\xa6z1/\\x87s'\\xd5\\xe5I\\x176b#G\\x10 \\xa4\\xad\\xc7\\x1e\\xcfrs\\x19H\\xaf\\x01\\x0cJTm\\xbe\\xd3X\\xca\\x94\\xebc'\\x8f\\xf6\\x8f\\xa6>\\x99x\\x0eS\\x00\\x08\\x13\\x02\\x13\\x03\\x13\\x01\\x00\\xff\\x01\\x00\\x00\\x8f\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x00\\x1e\\x00\\x1c\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08\\x08\\t\", b'\\x08\\x0b\\x08\\x04\\x08\\x05\\x08']\nBad pipe message: %s [b'\\x01\\x05\\x01\\x06\\x01']\nBad pipe message: %s [b'\\x0b\\x13\\xc5a>C\\x18I\\xcaF\\x19\\xad\\x9e\\xf8\\xf0\\xf2\\x98\\x1e\\x00\\x00\\xa6\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa', b\"\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0s\\xc0w\\x00\\xc4\\x00\\xc3\\xc0#\\xc0'\\x00g\\x00@\\xc0r\\xc0v\\x00\\xbe\\x00\\xbd\\xc0\\n\\xc0\\x14\\x009\\x008\\x00\\x88\\x00\\x87\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9a\\x00\\x99\\x00E\\x00D\\xc0\\x07\\xc0\\x11\\xc0\\x08\\xc0\\x12\\x00\\x16\\x00\\x13\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00\\xc0\\x00<\\x00\\xba\\x005\\x00\\x84\\x00/\\x00\\x96\\x00A\\x00\\x05\\x00\\n\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\", b'\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07']\nBad pipe message: %s [b'\\x08\\t\\x08\\n\\x08\\x0b\\x08']\nBad pipe message: %s [b'\\x05\\x08\\x06']\nBad pipe message: %s [b'\\x05\\x01\\x06', b'', b'\\x03\\x03']\nBad pipe message: %s [b'']\nBad pipe message: %s [b'', b'\\x02']\nBad pipe message: %s [b'\\x05\\x02\\x06']\nBad pipe message: %s [b'h\\x1f\\x8cg9\\x8bqQ^\\xdc\\xae\\x9b\\xdbz0\\xe6\\xc9\\x06\\x00\\x00>\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\xc0\\x0f\\xc0\\x05\\x005\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00\\x96\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\x00\\xff\\x02\\x01\\x00\\x00C\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x1c\\x00\\x1a\\x00\\x17\\x00\\x19\\x00\\x1c\\x00\\x1b\\x00\\x18\\x00\\x1a\\x00\\x16\\x00\\x0e\\x00\\r\\x00\\x0b\\x00\\x0c\\x00']\nBad pipe message: %s [b'\\n\\x00#\\x00\\x00\\x00\\x0f\\x00']\nBad pipe message: %s [b'\\xc6\\xe7\\xcf\\xd7\\xd58\\x8b\\x04\\xeee\\xce\\r\\x84\\xbc\\xdd\\xcf\\x99\\xe1\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0\\x08\\x00\\x16\\x00\\x13\\x00\\x10\\x00\\r\\xc0\\x17\\x00\\x1b\\xc0']\nBad pipe message: %s [b'\\x03\\x00\\n\\x00\\x15\\x00\\x12\\x00\\x0f\\x00\\x0c\\x00']\nBad pipe message: %s [b'\\t\\x00\\x14\\x00\\x11\\x00\\x19\\x00\\x08\\x00\\x06\\x00\\x17\\x00\\x03\\xc0\\x10\\xc0\\x06\\xc0\\x15\\xc0\\x0b\\xc0\\x01']\nBad pipe message: %s [b'\\x04o\\x98r\\x8d\\x9dmQ\\xac/\\xa1\\xd2\\x1f\\xa5\\xbe\\xed\\xb4\\xff\\x00\\x00\\xf4\\xc00\\xc0,\\xc0(\\xc0$\\xc0\\x14\\xc0\\n\\x00\\xa5\\x00\\xa3\\x00\\xa1\\x00\\x9f\\x00k\\x00j\\x00i\\x00h\\x009\\x008\\x007\\x006\\x00\\x88']\nBad pipe message: %s [b\"\\xa0\\xf7\\x16S\\xd2\\x85\\xfa\\x11B+A\\x9b\\xe4>\\xd1\\n>\\xa7\\x00\\x00|\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0#\\xc0'\\x00g\\x00@\\xc0\\n\\xc0\\x14\\x009\\x008\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00<\\x005\\x00/\\x00\\x9a\\x00\\x99\\xc0\\x07\\xc0\\x11\\x00\\x96\\x00\\x05\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\", b'\\x08\\x08\\x08\\t\\x08\\n\\x08', b'\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06']\nBad pipe message: %s [b'', b'\\x03\\x03']\nBad pipe message: %s [b'']\nBad pipe message: %s [b'', b'\\x02']\nBad pipe message: %s [b'\\x05\\x02\\x06']\nBad pipe message: %s [b'\\xe1\\xc9v\\x04\\x0eH\\xba\\xff\\xc3\\xb7\\x93\\x0c\\xf7v\\x18\\x0b~\\xae\\x00\\x00\\xa6\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]']\nBad pipe message: %s [b\"\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0s\\xc0w\\x00\\xc4\\x00\\xc3\\xc0#\\xc0'\\x00g\\x00@\\xc0r\\xc0v\\x00\\xbe\\x00\\xbd\\xc0\\n\\xc0\\x14\\x009\\x008\\x00\\x88\\x00\\x87\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9a\\x00\\x99\\x00E\\x00D\\xc0\\x07\\xc0\\x11\\xc0\\x08\\xc0\\x12\\x00\\x16\\x00\\x13\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00\\xc0\\x00<\\x00\\xba\\x005\\x00\\x84\\x00/\\x00\\x96\\x00A\\x00\\x05\\x00\\n\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\", b'\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08']"
354 |         }
355 |       ],
356 |       "execution_count": 24,
357 |       "metadata": {
358 |         "collapsed": true,
359 |         "jupyter": {
360 |           "source_hidden": false,
361 |           "outputs_hidden": false
362 |         },
363 |         "nteract": {
364 |           "transient": {
365 |             "deleting": false
366 |           }
367 |         },
368 |         "gather": {
369 |           "logged": 1684345969907
370 |         }
371 |       }
372 |     }
373 |   ],
374 |   "metadata": {
375 |     "kernelspec": {
376 |       "name": "python38-azureml",
377 |       "language": "python",
378 |       "display_name": "Python 3.8 - AzureML"
379 |     },
380 |     "language_info": {
381 |       "name": "python",
382 |       "version": "3.8.5",
383 |       "mimetype": "text/x-python",
384 |       "codemirror_mode": {
385 |         "name": "ipython",
386 |         "version": 3
387 |       },
388 |       "pygments_lexer": "ipython3",
389 |       "nbconvert_exporter": "python",
390 |       "file_extension": ".py"
391 |     },
392 |     "kernel_info": {
393 |       "name": "python38-azureml"
394 |     },
395 |     "microsoft": {
396 |       "host": {
397 |         "AzureML": {
398 |           "notebookHasBeenCompleted": true
399 |         }
400 |       },
401 |       "ms_spell_check": {
402 |         "ms_spell_check_language": "en"
403 |       }
404 |     },
405 |     "nteract": {
406 |       "version": "nteract-front-end@1.0.0"
407 |     }
408 |   },
409 |   "nbformat": 4,
410 |   "nbformat_minor": 2
411 | }


--------------------------------------------------------------------------------
/Setup.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |   "cells": [
  3 |     {
  4 |       "cell_type": "code",
  5 |       "source": [
  6 |         "import azureml.core\r\n",
  7 |         "from azureml.core import Experiment, Environment, Workspace, Dataset, Datastore, ScriptRunConfig\r\n",
  8 |         "from azureml.core.conda_dependencies import CondaDependencies\r\n",
  9 |         "import os\r\n",
 10 |         "import shutil\r\n",
 11 |         "\r\n",
 12 |         "# check core SDK version number\r\n",
 13 |         "\r\n",
 14 |         "print(\"Azure ML SDK Version: \", azureml.core.VERSION)"
 15 |       ],
 16 |       "outputs": [
 17 |         {
 18 |           "output_type": "stream",
 19 |           "name": "stdout",
 20 |           "text": "Azure ML SDK Version:  1.44.0\n"
 21 |         }
 22 |       ],
 23 |       "execution_count": 4,
 24 |       "metadata": {
 25 |         "gather": {
 26 |           "logged": 1668617207337
 27 |         }
 28 |       }
 29 |     },
 30 |     {
 31 |       "cell_type": "code",
 32 |       "source": [
 33 |         "# load workspace configuration from the config.json file in the current folder.\r\n",
 34 |         "ws = Workspace.from_config()\r\n",
 35 |         "# get metadata about the workspace\r\n",
 36 |         "print(ws.name, ws.location, ws.resource_group, sep='\\t')\r\n",
 37 |         "# list the registered datastores\r\n",
 38 |         "ws.datastores"
 39 |       ],
 40 |       "outputs": [
 41 |         {
 42 |           "output_type": "stream",
 43 |           "name": "stdout",
 44 |           "text": "landcover-ai\teastus\tcic_ai\n"
 45 |         },
 46 |         {
 47 |           "output_type": "execute_result",
 48 |           "execution_count": 6,
 49 |           "data": {
 50 |             "text/plain": "{'solardatablob': {\n   \"name\": \"solardatablob\",\n   \"container_name\": \"solar\",\n   \"account_name\": \"aiprojects\",\n   \"protocol\": \"https\",\n   \"endpoint\": \"core.windows.net\"\n },\n 'animalopsblobstore2': {\n   \"name\": \"animalopsblobstore2\",\n   \"container_name\": \"animal-ops\",\n   \"account_name\": \"aiprojects\",\n   \"protocol\": \"https\",\n   \"endpoint\": \"core.windows.net\"\n },\n 'animalopsblobstore': {\n   \"name\": \"animalopsblobstore\",\n   \"container_name\": \"animal-ops\",\n   \"account_name\": \"aiprojects\",\n   \"protocol\": \"https\",\n   \"endpoint\": \"core.windows.net\"\n },\n 'workspacefilestore': {\n   \"name\": \"workspacefilestore\",\n   \"container_name\": \"azureml-filestore-0b767baf-fb3d-4e08-a2d6-663739db0e23\",\n   \"account_name\": \"aiprojects\",\n   \"protocol\": \"https\",\n   \"endpoint\": \"core.windows.net\"\n },\n 'workspaceworkingdirectory': {\n   \"name\": \"workspaceworkingdirectory\",\n   \"container_name\": \"code-391ff5ac-6576-460f-ba4d-7e03433c68b6\",\n   \"account_name\": \"aiprojects\",\n   \"protocol\": \"https\",\n   \"endpoint\": \"core.windows.net\"\n },\n 'workspaceartifactstore': {\n   \"name\": \"workspaceartifactstore\",\n   \"container_name\": \"azureml\",\n   \"account_name\": \"aiprojects\",\n   \"protocol\": \"https\",\n   \"endpoint\": \"core.windows.net\"\n },\n 'workspaceblobstore': {\n   \"name\": \"workspaceblobstore\",\n   \"container_name\": \"azureml-blobstore-0b767baf-fb3d-4e08-a2d6-663739db0e23\",\n   \"account_name\": \"aiprojects\",\n   \"protocol\": \"https\",\n   \"endpoint\": \"core.windows.net\"\n }}"
 51 |           },
 52 |           "metadata": {}
 53 |         }
 54 |       ],
 55 |       "execution_count": 6,
 56 |       "metadata": {
 57 |         "collapsed": true,
 58 |         "jupyter": {
 59 |           "source_hidden": false,
 60 |           "outputs_hidden": false
 61 |         },
 62 |         "nteract": {
 63 |           "transient": {
 64 |             "deleting": false
 65 |           }
 66 |         },
 67 |         "gather": {
 68 |           "logged": 1668617220821
 69 |         }
 70 |       }
 71 |     },
 72 |     {
 73 |       "cell_type": "code",
 74 |       "source": [
 75 |         "datastore = Datastore.get(workspace = ws, datastore_name = 'solardatablob')\r\n",
 76 |         "datastore.unregister()"
 77 |       ],
 78 |       "outputs": [],
 79 |       "execution_count": 5,
 80 |       "metadata": {
 81 |         "jupyter": {
 82 |           "source_hidden": false,
 83 |           "outputs_hidden": false
 84 |         },
 85 |         "nteract": {
 86 |           "transient": {
 87 |             "deleting": false
 88 |           }
 89 |         },
 90 |         "gather": {
 91 |           "logged": 1668463545304
 92 |         }
 93 |       }
 94 |     },
 95 |     {
 96 |       "cell_type": "code",
 97 |       "source": [
 98 |         "# register our data share containing image data in this workspace\r\n",
 99 |         "Datastore.register_azure_blob_container(\r\n",
100 |         "    account_key = 'sZ/bw2Viouyp/C0Duhboamqx5VDXNtAm2fyYzrucLsUNk5nQXkvURAMnBeehMiL1xE+LEMTRBeaq+AStNkzzkQ==',\r\n",
101 |         "    workspace = ws,\r\n",
102 |         "    datastore_name = 'solarDataBlob',\r\n",
103 |         "    container_name = 'solar',\r\n",
104 |         "    account_name = 'aiprojects')"
105 |       ],
106 |       "outputs": [
107 |         {
108 |           "output_type": "stream",
109 |           "name": "stderr",
110 |           "text": "Datastore name solarDataBlob contains capital letters. They will be converted to lowercase letters.\n"
111 |         },
112 |         {
113 |           "output_type": "execute_result",
114 |           "execution_count": 8,
115 |           "data": {
116 |             "text/plain": "{\n  \"name\": \"solardatablob\",\n  \"container_name\": \"solar\",\n  \"account_name\": \"aiprojects\",\n  \"protocol\": \"https\",\n  \"endpoint\": \"core.windows.net\"\n}"
117 |           },
118 |           "metadata": {}
119 |         }
120 |       ],
121 |       "execution_count": 8,
122 |       "metadata": {
123 |         "collapsed": true,
124 |         "jupyter": {
125 |           "source_hidden": false,
126 |           "outputs_hidden": false
127 |         },
128 |         "nteract": {
129 |           "transient": {
130 |             "deleting": false
131 |           }
132 |         },
133 |         "gather": {
134 |           "logged": 1668463968153
135 |         }
136 |       }
137 |     },
138 |     {
139 |       "cell_type": "code",
140 |       "source": [
141 |         "# create a file dataset that can be used in training\r\n",
142 |         "datastore = Datastore.get(workspace = ws, datastore_name = 'solardatablob')\r\n",
143 |         "datastore_paths = [(datastore, 'CPK_solar'), (datastore, 'NC_solar')]\r\n",
144 |         "cpk_dataset = Dataset.File.from_files(path = datastore_paths[0])\r\n",
145 |         "nc_dataset = Dataset.File.from_files(path = datastore_paths[1])\r\n",
146 |         "\r\n",
147 |         "cpk_dataset = cpk_dataset.register(\r\n",
148 |         "    workspace=ws,\r\n",
149 |         "    name='gee-cpk-solar-data',\r\n",
150 |         "    description='training and eval TFRecords for solar arrays exported from GEE',\r\n",
151 |         "    create_new_version=True)\r\n",
152 |         "\r\n",
153 |         "nc_dataset = nc_dataset.register(\r\n",
154 |         "    workspace=ws,\r\n",
155 |         "    name='gee-nc-solar-data',\r\n",
156 |         "    description='training and eval TFRecords for solar arrays exported from GEE',\r\n",
157 |         "    create_new_version=True)"
158 |       ],
159 |       "outputs": [],
160 |       "execution_count": null,
161 |       "metadata": {
162 |         "collapsed": true,
163 |         "jupyter": {
164 |           "source_hidden": false,
165 |           "outputs_hidden": false
166 |         },
167 |         "nteract": {
168 |           "transient": {
169 |             "deleting": false
170 |           }
171 |         },
172 |         "gather": {
173 |           "logged": 1642800766789
174 |         }
175 |       }
176 |     },
177 |     {
178 |       "cell_type": "code",
179 |       "source": [],
180 |       "outputs": [],
181 |       "execution_count": null,
182 |       "metadata": {
183 |         "jupyter": {
184 |           "source_hidden": false,
185 |           "outputs_hidden": false
186 |         },
187 |         "nteract": {
188 |           "transient": {
189 |             "deleting": false
190 |           }
191 |         }
192 |       }
193 |     },
194 |     {
195 |       "cell_type": "markdown",
196 |       "source": [
197 |         "## Environments"
198 |       ],
199 |       "metadata": {
200 |         "nteract": {
201 |           "transient": {
202 |             "deleting": false
203 |           }
204 |         }
205 |       }
206 |     },
207 |     {
208 |       "cell_type": "code",
209 |       "source": [
210 |         "envs = Environment.list(workspace=ws)\r\n",
211 |         "\r\n",
212 |         "for env in envs:\r\n",
213 |         "    if env.startswith(\"AzureML\"):\r\n",
214 |         "        print(\"Name\",env)"
215 |       ],
216 |       "outputs": [
217 |         {
218 |           "output_type": "stream",
219 |           "name": "stdout",
220 |           "text": "Name AzureML-responsibleai-0.20-ubuntu20.04-py38-cpu\nName AzureML-responsibleai-0.21-ubuntu20.04-py38-cpu\nName AzureML-PTA-pytorch-1.11-py38-cuda11.3-gpu\nName AzureML-PTA-pytorch-1.11-py38-cuda11.5-gpu\nName AzureML-sklearn-1.0-ubuntu20.04-py38-cpu\nName AzureML-tensorflow-2.6-ubuntu20.04-py38-cuda11-gpu\nName AzureML-tensorflow-2.5-ubuntu20.04-py38-cuda11-gpu\nName AzureML-tensorflow-2.7-ubuntu20.04-py38-cuda11-gpu\nName AzureML-ACPT-pytorch-1.11-py38-cuda11.3-gpu\nName AzureML-ACPT-pytorch-1.11-py38-cuda11.5-gpu\nName AzureML-pytorch-1.10-ubuntu18.04-py38-cuda11-gpu\nName AzureML-ACPT-pytorch-1.12-py39-cuda11.6-gpu\nName AzureML-ACPT-pytorch-1.12-py38-cuda11.6-gpu\nName AzureML-minimal-ubuntu18.04-py37-cuda11.0.3-gpu-inference\nName AzureML-lightgbm-3.2-ubuntu18.04-py37-cpu\nName AzureML-sklearn-0.24-ubuntu18.04-py37-cpu\nName AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu\nName AzureML-pytorch-1.7-ubuntu18.04-py37-cuda11-gpu\nName AzureML-pytorch-1.8-ubuntu18.04-py37-cuda11-gpu\nName AzureML-pytorch-1.9-ubuntu18.04-py37-cuda11-gpu\nName AzureML-minimal-ubuntu18.04-py37-cpu-inference\nName AzureML-VowpalWabbit-8.8.0\nName AzureML-PyTorch-1.3-CPU\nName AzureML-Triton\n"
221 |         }
222 |       ],
223 |       "execution_count": 7,
224 |       "metadata": {
225 |         "jupyter": {
226 |           "source_hidden": false,
227 |           "outputs_hidden": false
228 |         },
229 |         "nteract": {
230 |           "transient": {
231 |             "deleting": false
232 |           }
233 |         },
234 |         "gather": {
235 |           "logged": 1668617225810
236 |         }
237 |       }
238 |     },
239 |     {
240 |       "cell_type": "code",
241 |       "source": [
242 |         "base_env = envs.get('AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu')\r\n",
243 |         "base_env"
244 |       ],
245 |       "outputs": [],
246 |       "execution_count": null,
247 |       "metadata": {
248 |         "jupyter": {
249 |           "source_hidden": false,
250 |           "outputs_hidden": false
251 |         },
252 |         "nteract": {
253 |           "transient": {
254 |             "deleting": false
255 |           }
256 |         },
257 |         "gather": {
258 |           "logged": 1668617330232
259 |         }
260 |       }
261 |     },
262 |     {
263 |       "cell_type": "code",
264 |       "source": [
265 |         "!pwd"
266 |       ],
267 |       "outputs": [
268 |         {
269 |           "output_type": "stream",
270 |           "name": "stdout",
271 |           "text": "/mnt/batch/tasks/shared/LS_root/mounts/clusters/mevans1/code/Users/mevans/Solar_UNet\r\n"
272 |         }
273 |       ],
274 |       "execution_count": 27,
275 |       "metadata": {
276 |         "jupyter": {
277 |           "source_hidden": false,
278 |           "outputs_hidden": false
279 |         },
280 |         "nteract": {
281 |           "transient": {
282 |             "deleting": false
283 |           }
284 |         }
285 |       }
286 |     },
287 |     {
288 |       "cell_type": "code",
289 |       "source": [
290 |         "env_docker_conda = Environment(\r\n",
291 |         "    image=\"mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04:20221010.v1\",\r\n",
292 |         "    conda_file=\"/mnt/batch/tasks/shared/LS_root/mounts/clusters/mevans1/code/Users/mevans/Solar_UNet/envs/conda_env.yml\",\r\n",
293 |         "    name=\"solar-training\",\r\n",
294 |         "    description=\"Environment created from a Docker image plus Conda environment.\",\r\n",
295 |         ")\r\n",
296 |         "\r\n"
297 |       ],
298 |       "outputs": [],
299 |       "execution_count": 34,
300 |       "metadata": {
301 |         "jupyter": {
302 |           "source_hidden": false,
303 |           "outputs_hidden": false
304 |         },
305 |         "nteract": {
306 |           "transient": {
307 |             "deleting": false
308 |           }
309 |         },
310 |         "gather": {
311 |           "logged": 1668620052885
312 |         }
313 |       }
314 |     },
315 |     {
316 |       "cell_type": "code",
317 |       "source": [
318 |         "env_docker_conda"
319 |       ],
320 |       "outputs": [
321 |         {
322 |           "output_type": "execute_result",
323 |           "execution_count": 35,
324 |           "data": {
325 |             "text/plain": "{\n    \"assetId\": null,\n    \"databricks\": {\n        \"eggLibraries\": [],\n        \"jarLibraries\": [],\n        \"mavenLibraries\": [],\n        \"pypiLibraries\": [],\n        \"rcranLibraries\": []\n    },\n    \"docker\": {\n        \"arguments\": [],\n        \"baseDockerfile\": null,\n        \"baseImage\": \"mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20220708.v1\",\n        \"baseImageRegistry\": {\n            \"address\": null,\n            \"password\": null,\n            \"registryIdentity\": null,\n            \"username\": null\n        },\n        \"buildContext\": null,\n        \"enabled\": false,\n        \"platform\": {\n            \"architecture\": \"amd64\",\n            \"os\": \"Linux\"\n        },\n        \"sharedVolumes\": true,\n        \"shmSize\": \"2g\"\n    },\n    \"environmentVariables\": {\n        \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n    },\n    \"inferencingStackVersion\": null,\n    \"name\": \"solar-training\",\n    \"python\": {\n        \"baseCondaEnvironment\": null,\n        \"condaDependencies\": {\n            \"channels\": [\n                \"anaconda\",\n                \"conda-forge\"\n            ],\n            \"dependencies\": [\n                \"python=3.8.13\",\n                {\n                    \"pip\": [\n                        \"azureml-defaults\"\n                    ]\n                }\n            ],\n            \"name\": \"project_environment\"\n        },\n        \"condaDependenciesFile\": null,\n        \"interpreterPath\": \"python\",\n        \"userManagedDependencies\": false\n    },\n    \"r\": null,\n    \"spark\": {\n        \"packages\": [],\n        \"precachePackages\": true,\n        \"repositories\": []\n    },\n    \"version\": null\n}"
326 |           },
327 |           "metadata": {}
328 |         }
329 |       ],
330 |       "execution_count": 35,
331 |       "metadata": {
332 |         "jupyter": {
333 |           "source_hidden": false,
334 |           "outputs_hidden": false
335 |         },
336 |         "nteract": {
337 |           "transient": {
338 |             "deleting": false
339 |           }
340 |         },
341 |         "gather": {
342 |           "logged": 1668620058108
343 |         }
344 |       }
345 |     },
346 |     {
347 |       "cell_type": "code",
348 |       "source": [
349 |         "base_env.name = 'solar-training'\r\n",
350 |         "base_env.register(ws)"
351 |       ],
352 |       "outputs": [
353 |         {
354 |           "output_type": "stream",
355 |           "name": "stderr",
356 |           "text": "Environment version is set. Attempting to register desired version. To auto-version, reset version to None.\n"
357 |         },
358 |         {
359 |           "output_type": "execute_result",
360 |           "execution_count": 21,
361 |           "data": {
362 |             "text/plain": "{\n    \"assetId\": \"azureml://locations/eastus/workspaces/0b767baf-fb3d-4e08-a2d6-663739db0e23/environments/solar-training/versions/50\",\n    \"databricks\": {\n        \"eggLibraries\": [],\n        \"jarLibraries\": [],\n        \"mavenLibraries\": [],\n        \"pypiLibraries\": [],\n        \"rcranLibraries\": []\n    },\n    \"docker\": {\n        \"arguments\": [],\n        \"baseDockerfile\": \"FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04:20221010.v1\\n\\nENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/tensorflow-2.4\\n# Create conda environment\\nRUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \\\\\\n    python=3.7 pip=20.2.4\\n\\n# Prepend path to AzureML conda environment\\nENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH\\n\\n# Install pip dependencies\\nRUN HOROVOD_WITH_TENSORFLOW=1 \\\\\\n    pip install 'matplotlib>=3.3,<3.4' \\\\\\n                'psutil>=5.8,<5.9' \\\\\\n                'tqdm>=4.59,<4.60' \\\\\\n                'pandas>=1.1,<1.2' \\\\\\n                'scipy>=1.5,<1.6' \\\\\\n                'numpy>=1.10,<1.20' \\\\\\n                'ipykernel~=6.0' \\\\\\n                # upper bound azure-core to address typing-extensions conflict\\n                'azure-core<1.23.0' \\\\\\n                'azureml-core~=1.43.0' \\\\\\n                'azureml-defaults~=1.43.0' \\\\\\n                'azureml-mlflow~=1.43.0' \\\\\\n                'azureml-telemetry~=1.43.0' \\\\\\n                'tensorboard==2.4.0' \\\\\\n                'tensorflow-gpu==2.4.1' \\\\\\n                'tensorflow-datasets==4.3.0' \\\\\\n                'onnxruntime-gpu>=1.7,<1.8' \\\\\\n                'protobuf~=3.20' \\\\\\n                'horovod[tensorflow-gpu]==0.21.3' \\\\\\n                'debugpy~=1.6.3'\\n\\n# This is needed for mpi to locate libpython\\nENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH\\n\",\n        \"baseImage\": null,\n        \"baseImageRegistry\": {\n            \"address\": null,\n            \"password\": null,\n            \"registryIdentity\": null,\n            \"username\": null\n        },\n        \"buildContext\": null,\n        \"enabled\": false,\n        \"platform\": {\n            \"architecture\": \"amd64\",\n            \"os\": \"Linux\"\n        },\n        \"sharedVolumes\": true,\n        \"shmSize\": null\n    },\n    \"environmentVariables\": {\n        \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n    },\n    \"inferencingStackVersion\": null,\n    \"name\": \"solar-training\",\n    \"python\": {\n        \"baseCondaEnvironment\": null,\n        \"condaDependenciesFile\": null,\n        \"interpreterPath\": \"python\",\n        \"userManagedDependencies\": true\n    },\n    \"r\": null,\n    \"spark\": {\n        \"packages\": [],\n        \"precachePackages\": true,\n        \"repositories\": []\n    },\n    \"version\": \"50\"\n}"
363 |           },
364 |           "metadata": {}
365 |         }
366 |       ],
367 |       "execution_count": 21,
368 |       "metadata": {
369 |         "jupyter": {
370 |           "source_hidden": false,
371 |           "outputs_hidden": false
372 |         },
373 |         "nteract": {
374 |           "transient": {
375 |             "deleting": false
376 |           }
377 |         },
378 |         "gather": {
379 |           "logged": 1668617840822
380 |         }
381 |       }
382 |     },
383 |     {
384 |       "cell_type": "code",
385 |       "source": [
386 |         "# create an environment for the first time\r\n",
387 |         "\r\n",
388 |         "envs = Environment.list(workspace = ws)\r\n",
389 |         "# well start with a pre-built tensorflow environment\r\n",
390 |         "env = envs.get('AzureML-TensorFlow-2.3-GPU')\r\n",
391 |         "env\r\n",
392 |         "\r\n",
393 |         "# define packages to be installed using CondaDependencies\r\n",
394 |         "# get the packages that are already part of the pre-built environment\r\n",
395 |         "conda_dep = env.python.conda_dependencies\r\n",
396 |         "# list packages to install\r\n",
397 |         "pip_packages = ['matplotlib', 'rasterio', 'tensorboard']\r\n",
398 |         "\r\n",
399 |         "# add each package to the existing conda dependencies\r\n",
400 |         "for package in pip_packages:\r\n",
401 |         "    conda_dep.add_pip_package(package)\r\n",
402 |         "\r\n",
403 |         "# double check all the packages are there\r\n",
404 |         "conda_dep.serialize_to_string()\r\n",
405 |         "# conda_dep = CondaDependencies.create(\r\n",
406 |         "#     pip_packages=pip_packages)\r\n",
407 |         "\r\n",
408 |         "# Now update the conda dependencies of the python environment\r\n",
409 |         "env.python.conda_dependencies=conda_dep\r\n",
410 |         "\r\n",
411 |         "# # Register environment to re-use later\r\n",
412 |         "env.name = 'solar-training'\r\n",
413 |         "env.register(workspace = ws)"
414 |       ],
415 |       "outputs": [],
416 |       "execution_count": null,
417 |       "metadata": {
418 |         "collapsed": true,
419 |         "jupyter": {
420 |           "source_hidden": false,
421 |           "outputs_hidden": false
422 |         },
423 |         "nteract": {
424 |           "transient": {
425 |             "deleting": false
426 |           }
427 |         },
428 |         "gather": {
429 |           "logged": 1622223971377
430 |         }
431 |       }
432 |     },
433 |     {
434 |       "cell_type": "code",
435 |       "source": [
436 |         "envs = Environment.list(workspace = ws)\r\n",
437 |         "# well start with a pre-built tensorflow environment\r\n",
438 |         "env = envs.get('AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu')\r\n",
439 |         "# env\r\n",
440 |         "solar_env = env.clone('test')\r\n",
441 |         "conda_dep = CondaDependencies()\r\n",
442 |         "# list packages to install\r\n",
443 |         "pip_packages = ['matplotlib', 'tensorboard']\r\n",
444 |         "\r\n",
445 |         "# add each package to the existing conda dependencies\r\n",
446 |         "for package in pip_packages:\r\n",
447 |         "    conda_dep.add_pip_package(package)\r\n",
448 |         "\r\n",
449 |         "conda_dep.add_conda_package('rasterio')\r\n",
450 |         "\r\n",
451 |         "solar_env.python.conda_dependencies=conda_dep\r\n",
452 |         "\r\n",
453 |         "# # Register environment to re-use later\r\n",
454 |         "solar_env.name = 'test'\r\n",
455 |         "solar_env.register(workspace = ws)"
456 |       ],
457 |       "outputs": [
458 |         {
459 |           "output_type": "execute_result",
460 |           "execution_count": 25,
461 |           "data": {
462 |             "text/plain": "{\n    \"databricks\": {\n        \"eggLibraries\": [],\n        \"jarLibraries\": [],\n        \"mavenLibraries\": [],\n        \"pypiLibraries\": [],\n        \"rcranLibraries\": []\n    },\n    \"docker\": {\n        \"arguments\": [],\n        \"baseDockerfile\": \"FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04:20220113.v1\\n\\nENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/tensorflow-2.4\\n\\n# Create conda environment\\nRUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \\\\\\n    python=3.7 pip=20.2.4\\n\\n# Prepend path to AzureML conda environment\\nENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH\\n\\n# Install pip dependencies\\nRUN HOROVOD_WITH_TENSORFLOW=1 \\\\\\n    pip install 'matplotlib>=3.3,<3.4' \\\\\\n                'psutil>=5.8,<5.9' \\\\\\n                'tqdm>=4.59,<4.60' \\\\\\n                'pandas>=1.1,<1.2' \\\\\\n                'scipy>=1.5,<1.6' \\\\\\n                'numpy>=1.10,<1.20' \\\\\\n                'ipykernel~=6.0' \\\\\\n                'azureml-core==1.37.0.post1' \\\\\\n                'azureml-defaults==1.37.0' \\\\\\n                'azureml-mlflow==1.37.0' \\\\\\n                'azureml-telemetry==1.37.0' \\\\\\n                'tensorboard==2.4.0' \\\\\\n                'tensorflow-gpu==2.4.1' \\\\\\n                'tensorflow-datasets==4.3.0' \\\\\\n                'onnxruntime-gpu>=1.7,<1.8' \\\\\\n                'horovod[tensorflow-gpu]==0.21.3'\\n\\n# This is needed for mpi to locate libpython\\nENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH\",\n        \"baseImage\": null,\n        \"baseImageRegistry\": {\n            \"address\": null,\n            \"password\": null,\n            \"registryIdentity\": null,\n            \"username\": null\n        },\n        \"enabled\": false,\n        \"platform\": {\n            \"architecture\": \"amd64\",\n            \"os\": \"Linux\"\n        },\n        \"sharedVolumes\": true,\n        \"shmSize\": null\n    },\n    \"environmentVariables\": {\n        \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n    },\n    \"inferencingStackVersion\": null,\n    \"name\": \"test\",\n    \"python\": {\n        \"baseCondaEnvironment\": null,\n        \"condaDependencies\": {\n            \"channels\": [\n                \"anaconda\",\n                \"conda-forge\"\n            ],\n            \"dependencies\": [\n                \"python=3.6.2\",\n                {\n                    \"pip\": [\n                        \"azureml-defaults\",\n                        \"matplotlib\",\n                        \"tensorboard\"\n                    ]\n                },\n                \"rasterio\"\n            ],\n            \"name\": \"project_environment\"\n        },\n        \"condaDependenciesFile\": null,\n        \"interpreterPath\": \"python\",\n        \"userManagedDependencies\": true\n    },\n    \"r\": null,\n    \"spark\": {\n        \"packages\": [],\n        \"precachePackages\": true,\n        \"repositories\": []\n    },\n    \"version\": \"1\"\n}"
463 |           },
464 |           "metadata": {}
465 |         }
466 |       ],
467 |       "execution_count": 25,
468 |       "metadata": {
469 |         "jupyter": {
470 |           "source_hidden": false,
471 |           "outputs_hidden": false
472 |         },
473 |         "nteract": {
474 |           "transient": {
475 |             "deleting": false
476 |           }
477 |         },
478 |         "gather": {
479 |           "logged": 1643053056600
480 |         }
481 |       }
482 |     },
483 |     {
484 |       "cell_type": "code",
485 |       "source": [
486 |         "# create an environment for the first time\r\n",
487 |         "\r\n",
488 |         "envs = Environment.list(workspace = ws)\r\n",
489 |         "# well start with a pre-built tensorflow environment\r\n",
490 |         "env = envs.get('AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu')\r\n",
491 |         "# env\r\n",
492 |         "solar_env = env.clone('test')\r\n",
493 |         "# define packages to be installed using CondaDependencies\r\n",
494 |         "# get the packages that are already part of the pre-built environment\r\n",
495 |         "conda_dep = CondaDependencies()\r\n",
496 |         "# list packages to install\r\n",
497 |         "pip_packages = ['matplotlib', 'tensorboard']\r\n",
498 |         "\r\n",
499 |         "# add each package to the existing conda dependencies\r\n",
500 |         "for package in pip_packages:\r\n",
501 |         "    conda_dep.add_pip_package(package)\r\n",
502 |         "\r\n",
503 |         "conda_dep.add_conda_package('rasterio')\r\n",
504 |         "\r\n",
505 |         "# double check all the packages are there\r\n",
506 |         "conda_dep.serialize_to_string()\r\n",
507 |         "# conda_dep = CondaDependencies.create(\r\n",
508 |         "#     pip_packages=pip_packages)\r\n",
509 |         "\r\n",
510 |         "# Now update the conda dependencies of the python environment\r\n",
511 |         "solar_env.python.conda_dependencies=conda_dep\r\n",
512 |         "\r\n",
513 |         "# # Register environment to re-use later\r\n",
514 |         "solar_env.name = 'solar-training'\r\n",
515 |         "solar_env.register(workspace = ws)"
516 |       ],
517 |       "outputs": [
518 |         {
519 |           "output_type": "execute_result",
520 |           "execution_count": 18,
521 |           "data": {
522 |             "text/plain": "{\n    \"databricks\": {\n        \"eggLibraries\": [],\n        \"jarLibraries\": [],\n        \"mavenLibraries\": [],\n        \"pypiLibraries\": [],\n        \"rcranLibraries\": []\n    },\n    \"docker\": {\n        \"arguments\": [],\n        \"baseDockerfile\": \"FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.0.3-cudnn8-ubuntu18.04:20220113.v1\\n\\nENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/tensorflow-2.4\\n\\n# Create conda environment\\nRUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \\\\\\n    python=3.7 pip=20.2.4\\n\\n# Prepend path to AzureML conda environment\\nENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH\\n\\n# Install pip dependencies\\nRUN HOROVOD_WITH_TENSORFLOW=1 \\\\\\n    pip install 'matplotlib>=3.3,<3.4' \\\\\\n                'psutil>=5.8,<5.9' \\\\\\n                'tqdm>=4.59,<4.60' \\\\\\n                'pandas>=1.1,<1.2' \\\\\\n                'scipy>=1.5,<1.6' \\\\\\n                'numpy>=1.10,<1.20' \\\\\\n                'ipykernel~=6.0' \\\\\\n                'azureml-core==1.37.0.post1' \\\\\\n                'azureml-defaults==1.37.0' \\\\\\n                'azureml-mlflow==1.37.0' \\\\\\n                'azureml-telemetry==1.37.0' \\\\\\n                'tensorboard==2.4.0' \\\\\\n                'tensorflow-gpu==2.4.1' \\\\\\n                'tensorflow-datasets==4.3.0' \\\\\\n                'onnxruntime-gpu>=1.7,<1.8' \\\\\\n                'horovod[tensorflow-gpu]==0.21.3'\\n\\n# This is needed for mpi to locate libpython\\nENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH\",\n        \"baseImage\": null,\n        \"baseImageRegistry\": {\n            \"address\": null,\n            \"password\": null,\n            \"registryIdentity\": null,\n            \"username\": null\n        },\n        \"enabled\": false,\n        \"platform\": {\n            \"architecture\": \"amd64\",\n            \"os\": \"Linux\"\n        },\n        \"sharedVolumes\": true,\n        \"shmSize\": null\n    },\n    \"environmentVariables\": {\n        \"EXAMPLE_ENV_VAR\": \"EXAMPLE_VALUE\"\n    },\n    \"inferencingStackVersion\": null,\n    \"name\": \"solar-training\",\n    \"python\": {\n        \"baseCondaEnvironment\": null,\n        \"condaDependencies\": {\n            \"channels\": [\n                \"anaconda\",\n                \"conda-forge\"\n            ],\n            \"dependencies\": [\n                \"python=3.6.2\",\n                {\n                    \"pip\": [\n                        \"azureml-defaults\",\n                        \"matplotlib\",\n                        \"tensorboard\"\n                    ]\n                },\n                \"rasterio\"\n            ],\n            \"name\": \"project_environment\"\n        },\n        \"condaDependenciesFile\": null,\n        \"interpreterPath\": \"python\",\n        \"userManagedDependencies\": true\n    },\n    \"r\": null,\n    \"spark\": {\n        \"packages\": [],\n        \"precachePackages\": true,\n        \"repositories\": []\n    },\n    \"version\": \"2\"\n}"
523 |           },
524 |           "metadata": {}
525 |         }
526 |       ],
527 |       "execution_count": 18,
528 |       "metadata": {
529 |         "jupyter": {
530 |           "source_hidden": false,
531 |           "outputs_hidden": false
532 |         },
533 |         "nteract": {
534 |           "transient": {
535 |             "deleting": false
536 |           }
537 |         },
538 |         "gather": {
539 |           "logged": 1643050589298
540 |         }
541 |       }
542 |     },
543 |     {
544 |       "cell_type": "code",
545 |       "source": [
546 |         "iterator = iter(env.python.conda_dependencies.conda_packages)"
547 |       ],
548 |       "outputs": [],
549 |       "execution_count": 13,
550 |       "metadata": {
551 |         "jupyter": {
552 |           "source_hidden": false,
553 |           "outputs_hidden": false
554 |         },
555 |         "nteract": {
556 |           "transient": {
557 |             "deleting": false
558 |           }
559 |         },
560 |         "gather": {
561 |           "logged": 1643049587175
562 |         }
563 |       }
564 |     },
565 |     {
566 |       "cell_type": "code",
567 |       "source": [
568 |         "solar_env.name"
569 |       ],
570 |       "outputs": [
571 |         {
572 |           "output_type": "execute_result",
573 |           "execution_count": 20,
574 |           "data": {
575 |             "text/plain": "'solar-training'"
576 |           },
577 |           "metadata": {}
578 |         }
579 |       ],
580 |       "execution_count": 20,
581 |       "metadata": {
582 |         "jupyter": {
583 |           "source_hidden": false,
584 |           "outputs_hidden": false
585 |         },
586 |         "nteract": {
587 |           "transient": {
588 |             "deleting": false
589 |           }
590 |         },
591 |         "gather": {
592 |           "logged": 1643050736169
593 |         }
594 |       }
595 |     }
596 |   ],
597 |   "metadata": {
598 |     "kernelspec": {
599 |       "name": "python38-azureml",
600 |       "language": "python",
601 |       "display_name": "Python 3.8 - AzureML"
602 |     },
603 |     "language_info": {
604 |       "name": "python",
605 |       "version": "3.8.5",
606 |       "mimetype": "text/x-python",
607 |       "codemirror_mode": {
608 |         "name": "ipython",
609 |         "version": 3
610 |       },
611 |       "pygments_lexer": "ipython3",
612 |       "nbconvert_exporter": "python",
613 |       "file_extension": ".py"
614 |     },
615 |     "kernel_info": {
616 |       "name": "python38-azureml"
617 |     },
618 |     "nteract": {
619 |       "version": "nteract-front-end@1.0.0"
620 |     },
621 |     "microsoft": {
622 |       "host": {
623 |         "AzureML": {
624 |           "notebookHasBeenCompleted": true
625 |         }
626 |       }
627 |     }
628 |   },
629 |   "nbformat": 4,
630 |   "nbformat_minor": 2
631 | }


--------------------------------------------------------------------------------
/demos/SampleUNETdata_GEE.ipynb:
--------------------------------------------------------------------------------
1 | {"cells":[{"cell_type":"markdown","metadata":{"id":"view-in-github"},"source":["<a href=\"https://colab.research.google.com/github/mjevans26/Satellite_ComputerVision/blob/master/UNET_G4G_2019_solar.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"esIMGVxhDI0f"},"outputs":[],"source":["# @title Author: Michael Evans { display-mode: \"form\" }\n","# Licensed under the Apache License, Version 2.0 (the \"License\");\n","# you may not use this file except in compliance with the License.\n","# You may obtain a copy of the License at\n","#\n","# https://www.apache.org/licenses/LICENSE-2.0\n","#\n","# Unless required by applicable law or agreed to in writing, software\n","# distributed under the License is distributed on an \"AS IS\" BASIS,\n","# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n","# See the License for the specific language governing permissions and\n","# limitations under the License."]},{"cell_type":"markdown","metadata":{"id":"_SHAc5qbiR8l"},"source":["# Introduction\n","\n","This notebook demonstrates methods used to extract data to train a U-Net model capable of delineating ground-mounted solar arrays using free satellite imagery.  This workflow generates and exports satellite imagery data from Google Earth Engine for analysis in Tensorflow.  This analysis predicts the probability of the presence of a solar array as a function of the visible, infrared, and near infrared bands in Sentinel-2 imagery.  The model is a [fully convolutional neural network (FCNN)](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf), specifically [U-net](https://arxiv.org/abs/1505.04597).  This relatively simple model is a mostly unmodified version of [this example](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb) from the TensorFlow docs.  This notebook shows:\n","\n","1.   Exporting training/testing patches from Earth Engine, suitable for training an FCNN model.\n","2.   Preprocessing.\n","3.   Training and validating an FCNN model.\n","4.   Making predictions with the trained model and importing them to Earth Engine."]},{"cell_type":"markdown","metadata":{"id":"_MJ4kW1pEhwP"},"source":["# Setup software libraries\n","\n","Install needed libraries to the notebook VM.  Authenticate as necessary."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"neIa46CpciXq"},"outputs":[],"source":["# Cloud authentication.\n","from google.colab import auth\n","auth.authenticate_user()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"jat01FEoUMqg"},"outputs":[],"source":["# Import, authenticate and initialize the Earth Engine library.\n","import ee\n","ee.Authenticate()\n","ee.Initialize()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"n1hFdpBQfyhN"},"outputs":[],"source":["# We use folium to visualize  GEE imagery. TODO: Update to use new gee packages\n","import folium\n","print(folium.__version__)\n","\n","# Define a method for displaying Earth Engine image tiles to a folium map.\n","def add_ee_layer(self, ee_image_object, vis_params, name):\n","  map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)\n","  folium.raster_layers.TileLayer(\n","    tiles = map_id_dict['tile_fetcher'].url_format,\n","    attr = \"Map Data © Google Earth Engine\",\n","    name = name,\n","    overlay = True,\n","    control = True\n","  ).add_to(self)\n","\n","# Add EE drawing method to folium.\n","folium.Map.add_ee_layer = add_ee_layer\n","\n","# Define the URL format used for Earth Engine generated map tiles.\n","#EE_TILES = 'https://earthengine.googleapis.com/map/{mapid}/{{z}}/{{x}}/{{y}}?token={token}'"]},{"cell_type":"markdown","metadata":{"id":"WjUgYcsAs9Ed"},"source":["##Mount Google Drive"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JKDKpX4FtQA1"},"outputs":[],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"yMNfRopTcnYu"},"outputs":[],"source":["# clone repository with modules for computer vision analyses\n","!git clone https://github.com/mjevans26/Satellite_ComputerVision.git"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"c12hxNU2S89-"},"outputs":[],"source":["# add the Google Drive repo directory to path so we can use our modules\n","import sys\n","sys.path.append('/content/Satellite_ComputerVision/utils')\n","from clouds import basicQA"]},{"cell_type":"markdown","metadata":{"id":"iT8ycmzClYwf"},"source":["# Variables\n","\n","Declare the variables that will be in use throughout the notebook."]},{"cell_type":"markdown","metadata":{"id":"qKs6HuxOzjMl"},"source":["Specify a cloud storage bucket to which you have read/write access"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"obDDH1eDzsch"},"outputs":[],"source":["from os.path import join\n","BUCKET = 'cvod-203614-mlengine'\n","BUCKET_PATH = join('gs://', BUCKET)"]},{"cell_type":"markdown","metadata":{"id":"wmfKLl9XcnGJ"},"source":["## Set other global variables"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"psz7wJKalaoj"},"outputs":[],"source":["# Specify names locations for outputs in Cloud Storage. \n","FOLDER = 'CPK_solar'\n","PRED_BASE = 'data/predict'\n","TRAIN_BASE = 'data/training'\n","EVAL_BASE = 'data/eval'\n","MODEL_BASE = 'models/UNET256'\n","log_dir = 'drive/My Drive/Tensorflow/models/UNET256'\n","\n","# Specify inputs (Sentinel bands) to the model and the response variable.\n","opticalBands = ['B2', 'B3', 'B4']\n","thermalBands = ['B8', 'B11', 'B12']\n","\n","# # We may want to run some experiments where we use pca components\n","# pcaBands = ['pc1', 'pc2', 'pc3']\n","\n","BANDS = opticalBands + thermalBands# + pcaBands\n","RESPONSE = 'landcover'\n","FEATURES = BANDS + [RESPONSE]\n","SCENEID = 'SENSING_ORBIT_NUMBER'\n","\n","# Specify the size and shape of patches expected by the model.\n","KERNEL_SIZE = 256\n","KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]\n"]},{"cell_type":"markdown","metadata":{"id":"hgoDc7Hilfc4"},"source":["# Imagery\n","\n","Process the imagery to use for predictor variables.  This is a three-month, cloud-free, Sentinel-2 composite corresponding to the latest date from which we have confirmed training data.  Display it in the notebook for a sanity check."]},{"cell_type":"markdown","metadata":{"id":"MjNmEImcGuMb"},"source":["## Create sample image"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"-IlgXu-vcUEY"},"outputs":[],"source":["# Use Sentinel-2 surface reflectance data.\n","S2 = ee.ImageCollection(\"COPERNICUS/S2\")\n","# Grab a feature corresponding to our study area - North Carolina\n","states = ee.FeatureCollection(\"TIGER/2016/States\")\n","nc = states.filter(ee.Filter.eq('NAME', 'Delaware')).geometry().buffer(2500)\n","begin = '2019-01-01'\n","end = '2020-03-01'\n","\n","# The image input collection is cloud-masked.\n","filtered = S2.filterDate(begin, end)\\\n",".filterBounds(nc)\\\n",".filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\n","\n","\n","# Create a simple median composite per season to visualize\n","winter = filtered.filterDate('2019-12-01', '2020-02-28').map(basicQA).median().select(BANDS).clip(nc)\n","spring = filtered.filterDate('2019-03-01', '2019-05-31').map(basicQA).median().select(BANDS).clip(nc)\n","summer = filtered.filterDate('2019-06-01', '2019-08-31').map(basicQA).median().select(BANDS).clip(nc)\n","fall = filtered.filterDate('2019-09-01', '2019-11-30').map(basicQA).median().select(BANDS).clip(nc)\n","\n","# Use folium to visualize the imagery.\n","#mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n","rgbParams = {'bands': ['B4', 'B3', 'B2'],\n","             'min': 250,\n","             'max': 3000}\n","\n","nirParams = {'bands': ['B8', 'B11', 'B12'],\n","             'min': 250,\n","             'max': 3000}\n","\n","map = folium.Map(location=[38.9725, -75.5185])\n","map.add_ee_layer(spring, rgbParams, 'Color')\n","map.add_ee_layer(spring, nirParams, 'Thermal')\n","\n","map.add_child(folium.LayerControl())\n","map"]},{"cell_type":"markdown","metadata":{"id":"gHznnctkJsZJ"},"source":["Prepare the response variable.  This is the footprints of ground mounted solar arrays as of 2016, coded into a background class [0] and a target class [1]. Display on the map to verify."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5Wxz9BPYHBwh"},"outputs":[],"source":["def set_landcover(ft):\n","  \"\"\"\n","  Add a property to a feature and set it to 1\n","  Parameters\n","  ---\n","    ft:ee.Feature\n","      feature to have property added\n","  Returns\n","  ---\n","  ee.Feature: input feature with new 'label' property set to 1\n","  \"\"\"\n","  return ft.set('landcover', 1)\n","\n","# Get solar footprints data from our GEE Asset\n","DE_solar_footprints = ee.FeatureCollection(\"projects/mevans-cic-solar/assets/de_footprints\")\n","# Label each polygon with property 'label' equal to 1\n","DE_solar_footprints = DE_solar_footprints.map(set_landcover)\n","# Create an image with all pixels equal to 0\n","blankimg = ee.Image.constant(0)\n","# Convert solar footprints to an image (band value will be 1 based on 'label')\n","solar_footprint = DE_solar_footprints.reduceToImage(['landcover'], ee.Reducer.first())\n","# Convert pixels of blank image to 1 where the values of the footprint image are 1\n","# and rename to 'landcover'\n","labelimg = blankimg.where(solar_footprint, solar_footprint).rename('landcover')\n","\n","solarParams = {'bands': 'landcover', 'min':0, 'max': 1}\n","\n","map = folium.Map(location = [38.9725, -75.5185])\n","map.add_ee_layer(labelimg,  solarParams, 'Solar footprint')\n","map.add_child(folium.LayerControl())\n","map"]},{"cell_type":"markdown","metadata":{"id":"F4djSxBRG2el"},"source":["Use some pre-made geometries to sample the stack in strategic locations.  We constrain sampling to occur within 10km of mapped solar arrays. Because our target features are small and sparse, relative to the landscape, we also guide sampling based on their centroids to ensure that we get training data for solar arrays."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ure_WaD0itQY"},"outputs":[],"source":["def buff(ft):\n","  return ft.buffer(10000)\n","\n","def centroid(ft):\n","  return ft.centroid()\n","\n","centroids = DE_solar_footprints.map(centroid)\n","studyArea = DE_solar_footprints.map(buff).union()\n","studyImage = ee.Image(0).byte().paint(studyArea, 1)\n","studyImage = studyImage.updateMask(studyImage)\n","centroids = centroids.randomColumn('random')\n","\n","aoiParams = {'min':0, 'max': 1, 'palette': ['red']}\n","map = folium.Map(location=[38.9725, -75.5185], zoom_start=8)\n","map.add_ee_layer(studyImage, aoiParams, 'Sampling area')\n","map.add_child(folium.LayerControl())\n","map"]},{"cell_type":"markdown","metadata":{"id":"ZV890gPHeZqz"},"source":["# Sampling\n","\n","The mapped data look reasonable so take a sample from each polygon and merge the results into a single export.  The key step is sampling the array image at points, to get all the pixels in a 256x256 neighborhood at each point.  It's worth noting that to build the training and testing data for the FCNN, you export a single TFRecord file that contains patches of pixel values in each record.  You do NOT need to export each training/testing patch to a different image.  Since each record potentially contains a lot of data (especially with big patches or many input bands), some manual sharding of the computation is necessary to avoid the `computed value too large` error.  Specifically, the following code takes multiple (smaller) samples within each geometry, merging the results to get a single export."]},{"cell_type":"markdown","metadata":{"id":"CTS7_ZzPDhhg"},"source":["Stack the normalized sentinel composite and binary solar indicator image to create a single image from which samples can be taken.  Convert the image into an array image in which each pixel stores 256x256 patches of pixels for each band.  This is a key step that bears emphasis: to export training patches, convert a multi-band image to [an array image](https://developers.google.com/earth-engine/arrays_array_images#array-images) using [`neighborhoodToArray()`](https://developers.google.com/earth-engine/api_docs#eeimageneighborhoodtoarray), then sample the image at points."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"eGHYsdAOipa4"},"outputs":[],"source":["featureStack = ee.Image.cat([\n","  fall.select(BANDS),\n","  labelimg.select(RESPONSE)\n","])\n","\n","ls = ee.List.repeat(1, KERNEL_SIZE)\n","lists = ee.List.repeat(ls, KERNEL_SIZE)\n","kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)\n","\n","arrays = featureStack.neighborhoodToArray(kernel)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1T1cc6haU_oS"},"outputs":[],"source":["join(BUCKET_PATH, FOLDER, TRAIN_BASE, 'calibrated/')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"2CqL0Y6iLQPP"},"outputs":[],"source":["!gsutil mv {join(BUCKET_PATH, FOLDER, TRAINING_BASE, '*')} {join(BUCKET_PATH, FOLDER, TRAINING_BASE, 'calibrated/')}"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"VXes-Ot17RGI"},"outputs":[],"source":["!gsutil ls gs://cvod-203614-mlengine/NC_solar/data/predict"]},{"cell_type":"markdown","metadata":{"id":"aJ4nGSvdYop6"},"source":["First we'll collect image patches from the centroids of known solar array locations"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"F1W2sVmmsv15"},"outputs":[],"source":["# Add a random column to the centroids\n","S = centroids.size().getInfo()\n","centroidList = centroids.toList(S)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"form","id":"FyRpvwENxE-A"},"outputs":[],"source":["#@title Centroids slicing\n","# Get samples from delineated features using slice() on a feature collection\n","# THIS TAKES DAYS TO RUN...probably not the optimal\n","\n","x = 250\n","\n","while x < 700:\n","  region = ee.FeatureCollection(centroidList.slice(x, x+50)).geometry()\n","  sample = arrays.sampleRegions(\n","      collection = region,\n","      scale = 10,\n","      tileScale = 12\n","  )\n","  x += 50\n","                                  \n","  # assign a random number to samples and create a 70/30 train/test split\n","  sample = sample.randomColumn('random')\n","  training = sample.filter(ee.Filter.gte('random', 0.3))\n","  testing = sample.filter(ee.Filter.lt('random', 0.3))\n","\n","  desc = 'UNET_' + str(KERNEL_SIZE) + '_trainCentfall' + str(x)\n","  task = ee.batch.Export.table.toCloudStorage(\n","    collection = training,\n","    description = desc, \n","    bucket = BUCKET, \n","    fileNamePrefix = join(FOLDER, TRAIN_BASE, desc),\n","    fileFormat = 'TFRecord',\n","    selectors = BANDS + [RESPONSE]\n","  )\n","  task.start()\n","\n","  desc = 'UNET_' + str(KERNEL_SIZE) + '_evalCentfall' + str(x)\n","  task = ee.batch.Export.table.toCloudStorage(\n","    collection = testing,\n","    description = desc, \n","    bucket = BUCKET, \n","    fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n","    fileFormat = 'TFRecord',\n","    selectors = BANDS + [RESPONSE]\n","  )\n","  task.start()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"YoJMncFKYwq2"},"outputs":[],"source":["#@title Centroids random sampling\n","\n","# Define sample sizes for shards and chunks. \n","# These numbers determined experimentally.\n","n = 100 # Number of shards in each chunk.\n","N = 200 # Total sample size in each chunk.\n","C = 5 # Number of chunks\n","\n","iterator = iter(range(N*C))\n","\n","# for each 'chunk' - which defines 2 export tasks per chunk: 1 train, 1 eval\n","for c in range(C):\n","  geomSample = ee.FeatureCollection([])\n","\n","  # for each 'shard' - which defines a batch of samples of size N/n\n","  for i in range(n):\n","    # generate a different seed for this iteration\n","    seed = next(iterator)\n","    sample = arrays.sample(\n","        region = NC_solar_footprints,\n","        scale = 10,\n","        numPixels = N/n,\n","        seed = seed,\n","        tileScale = 8\n","    )\n","    geomSample = geomSample.merge(sample)\n","\n","  #divide samples into training and evaluation data\n","  geomSample = geomSample.randomColumn('random')\n","  training = geomSample.filter(ee.Filter.gte('random', 0.3))\n","  testing = geomSample.filter(ee.Filter.lt('random', 0.3))\n","\n","  desc = 'UNET_' + str(KERNEL_SIZE) + '_footprintTrain'+str(c)\n","  task = ee.batch.Export.table.toCloudStorage(\n","    collection = training,\n","    description = desc, \n","    bucket = BUCKET, \n","    fileNamePrefix = join(FOLDER, TRAINING_BASE, desc),\n","    fileFormat = 'TFRecord',\n","    selectors = BANDS + [RESPONSE]\n","  )\n","  task.start()\n","\n","  desc = 'UNET_' + str(KERNEL_SIZE) + '_footprintEval' + str(c)\n","  task = ee.batch.Export.table.toCloudStorage(\n","    collection = testing,\n","    description = desc, \n","    bucket = BUCKET, \n","    fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n","    fileFormat = 'TFRecord',\n","    selectors = BANDS + [RESPONSE]\n","  )\n","  task.start() "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"QuRyLGmOYmrR"},"outputs":[],"source":["#@title Random sampling\n","\n","# Define sample sizes for shards and chunks. \n","# These numbers determined experimentally.\n","n = 100 # Number of shards in each chunk.\n","N = 1000 # Total sample size in each chunk.\n","C = 2# Number of chunks\n","\n","iterator = iter(range(N*C))\n","\n","for c in range(C):\n","  geomSample = ee.FeatureCollection([])\n","\n","  for i in range(n):\n","    seed = next(iterator)\n","    sample = arrays.sample(\n","        region = studyArea,\n","        scale = 10,\n","        numPixels = N/n,\n","        seed = seed,\n","        tileScale = 8\n","    )\n","    geomSample = geomSample.merge(sample)\n","\n","  #divide samples into training and evaluation data\n","  geomSample = geomSample.randomColumn('random')\n","  training = geomSample.filter(ee.Filter.gte('random', 0.3))\n","  testing = geomSample.filter(ee.Filter.lt('random', 0.3))\n","\n","  desc = 'UNET_' + str(KERNEL_SIZE) + '_trainfall'+str(c)\n","  task = ee.batch.Export.table.toCloudStorage(\n","    collection = training,\n","    description = desc, \n","    bucket = BUCKET, \n","    fileNamePrefix = join(FOLDER, TRAIN_BASE, desc),\n","    fileFormat = 'TFRecord',\n","    selectors = BANDS + [RESPONSE]\n","  )\n","  task.start()\n","\n","  desc = 'UNET_' + str(KERNEL_SIZE) + '_evalfall' + str(c)\n","  task = ee.batch.Export.table.toCloudStorage(\n","    collection = testing,\n","    description = desc, \n","    bucket = BUCKET, \n","    fileNamePrefix = join(FOLDER, EVAL_BASE, desc),\n","    fileFormat = 'TFRecord',\n","    selectors = BANDS + [RESPONSE]\n","  )\n","  task.start() "]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Nj1sFkUyYgnj"},"outputs":[],"source":[]},{"cell_type":"markdown","metadata":{"id":"dk51-l7MH2Sa"},"source":["# Model data"]},{"cell_type":"code","source":["# Tensorflow setup.\n","import tensorflow as tf\n","device_name = tf.test.gpu_device_name()\n","tf.executing_eagerly()\n","print(tf.__version__)\n","print(device_name)\n","%load_ext tensorboard"],"metadata":{"id":"DsCx4Q04f5bA"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["COLUMNS = [\n","  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES\n","]\n","FEATURES_DICT = dict(zip(FEATURES, COLUMNS))\n","\n","# Sizes of the training and evaluation datasets.\n","TRAIN_SIZE = 7700\n","EVAL_SIZE = 3300\n","\n","# Specify model training parameters.\n","BATCH_SIZE = 16\n","EPOCHS = 20\n","BUFFER_SIZE = 11000\n","OPTIMIZER = tf.keras.optimizers.Adam(learning_rate=0.0009, beta_1=0.9, beta_2=0.999)\n","LOSS = 'binary_crossentropy'\n","METRICS = [tf.keras.metrics.categorical_accuracy, tf.keras.metrics.MeanIoU(num_classes=2)]"],"metadata":{"id":"-4pEYPG4gBuf"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"rWXrvBE4607G"},"source":["# Training data\n","\n","Load the data exported from Earth Engine into a `tf.data.Dataset`.  The following are helper functions for that."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"ajyp48-vINuy"},"outputs":[],"source":["from utils import get_training_dataset, get_eval_dataset"]},{"cell_type":"markdown","metadata":{"id":"Xg1fa18336D2"},"source":["Use the helpers to read in the training dataset.  Print the first record to check."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"bk9rFou0J_dZ"},"outputs":[],"source":["# make sure we have training records\n","ncPattern = join(BUCKET_PATH, 'NC_solar/data/training/UNET_256_*.tfrecord.gz')\n","ncFiles = tf.io.gfile.glob(ncPattern)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"JzpG3kUwZ9J5"},"outputs":[],"source":["training = get_training_dataset(ncFiles)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"cIueW4_Fs0ID"},"outputs":[],"source":["#check to make sure our records look like we expect\n","print(iter(training.take(1)).next())"]},{"cell_type":"markdown","metadata":{"id":"j-cQO5RL6vob"},"source":["# Evaluation data\n","\n","Now do the same thing to get an evaluation dataset.  Note that unlike the training dataset, the evaluation dataset has a batch size of 1, is not repeated and is not shuffled."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"fkU1JcYlK1s3"},"outputs":[],"source":["# make sure we have eval data\n","# make sure we have training records\n","ncPattern = join(BUCKET_PATH, 'NC_solar/data/eval/UNET_256_neg*.tfrecord.gz')\n","print(ncPattern)\n","ncFiles = tf.io.gfile.glob(ncPattern)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"48aFseSgY-Mp"},"outputs":[],"source":["ncFiles"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"NpcsljQeKzq7"},"outputs":[],"source":["evaluation = get_eval_dataset(ncFiles)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"TDXcbm8e_WyC"},"outputs":[],"source":["print(iter(evaluation.take(1)).next())"]},{"cell_type":"markdown","metadata":{"id":"9JIE7Yl87lgU"},"source":["# Model\n","\n","Here we use the Keras implementation of the U-Net model as found [in the TensorFlow examples](https://github.com/tensorflow/models/blob/master/samples/outreach/blogs/segmentation_blogpost/image_segmentation.ipynb).  The U-Net model takes 256x256 pixel patches as input and outputs per-pixel class probability, label or a continuous output.  We can implement the model essentially unmodified, but will use mean squared error loss on the sigmoidal output since we are treating this as a regression problem, rather than a classification problem.  Since impervious surface fraction is constrained to [0,1], with many values close to zero or one, a saturating activation function is suitable here."]},{"cell_type":"markdown","metadata":{"id":"Xh2EZyyPu84H"},"source":["##Metrics"]},{"cell_type":"markdown","metadata":{"id":"HK6BKW_xMNqL"},"source":["We define a weighted binary cross entropy loss function because the training data is potentially sparse. This also gives us greater control over the rates of omission and commission prediciton errors. Because this is an image segmentation exercise, we may also be interested in the intersection over union as a loss measure."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"wsnnnz56yS3l"},"outputs":[],"source":["from tensorflow.python.keras import layers\n","from tensorflow.python.keras import losses\n","from tensorflow.python.keras import models\n","from tensorflow.python.keras import metrics\n","from tensorflow.python.keras import optimizers\n","\n","def weighted_bce(y_true, y_pred):\n","    \"\"\"\n","    Compute the weighted binary cross entropy between predictions and observations\n","    Parameters:\n","        y_true (): 2D tensor of labels\n","        y_pred (): 2D tensor of probabilities\n","        \n","    Returns:\n","        2D tensor\n","    \"\"\"\n","    bce = tf.nn.weighted_cross_entropy_with_logits(labels = y_true, logits = y_pred, pos_weight = 1)\n","    return tf.reduce_mean(bce)\n","\n","def dice_coef(y_true, y_pred, smooth=1, weight=0.5):\n","    \"\"\"\n","    https://github.com/daifeng2016/End-to-end-CD-for-VHR-satellite-image\n","    \"\"\"\n","    # y_true = y_true[:, :, :, -1]  # y_true[:, :, :, :-1]=y_true[:, :, :, -1] if dim(3)=1 等效于[8,256,256,1]==>[8,256,256]\n","    # y_pred = y_pred[:, :, :, -1]\n","    intersection = K.sum(y_true * y_pred)\n","    union = K.sum(y_true) + weight * K.sum(y_pred)\n","    # K.mean((2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth))\n","    return ((2. * intersection + smooth) / (union + smooth))  # not working better using mean\n","\n","def dice_coef_loss(y_true, y_pred):\n","    \"\"\"\n","    https://github.com/daifeng2016/End-to-end-CD-for-VHR-satellite-image\n","    \"\"\"\n","    return 1 - dice_coef(y_true, y_pred)\n","\n","def iou_loss(true, pred):\n","    \"\"\"\n","    Calcaulate the intersection over union metric\n","    \"\"\"\n","    intersection = true * pred\n","\n","    notTrue = 1 - true\n","    union = true + (notTrue * pred)\n","\n","    return tf.subtract(1.0, tf.reduce_sum(intersection)/tf.reduce_sum(union))\n","\n","def conv_block(input_tensor, num_filters):\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(input_tensor)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\tencoder = layers.Conv2D(num_filters, (3, 3), padding='same')(encoder)\n","\tencoder = layers.BatchNormalization()(encoder)\n","\tencoder = layers.Activation('relu')(encoder)\n","\treturn encoder\n","\n","def encoder_block(input_tensor, num_filters):\n","\tencoder = conv_block(input_tensor, num_filters)\n","\tencoder_pool = layers.MaxPooling2D((2, 2), strides=(2, 2))(encoder)\n","\treturn encoder_pool, encoder\n","\n","def decoder_block(input_tensor, concat_tensor, num_filters):\n","\tdecoder = layers.Conv2DTranspose(num_filters, (2, 2), strides=(2, 2), padding='same')(input_tensor)\n","\tdecoder = layers.concatenate([concat_tensor, decoder], axis=-1)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\tdecoder = layers.Conv2D(num_filters, (3, 3), padding='same')(decoder)\n","\tdecoder = layers.BatchNormalization()(decoder)\n","\tdecoder = layers.Activation('relu')(decoder)\n","\treturn decoder\n","\n","def get_model():\n","\tinputs = layers.Input(shape=[None, None, len(BANDS)])\n","\tencoder0_pool, encoder0 = encoder_block(inputs, 32)\n","\tencoder1_pool, encoder1 = encoder_block(encoder0_pool, 64)\n","\tencoder2_pool, encoder2 = encoder_block(encoder1_pool, 128)\n","\tencoder3_pool, encoder3 = encoder_block(encoder2_pool, 256)\n","\tencoder4_pool, encoder4 = encoder_block(encoder3_pool, 512)\n","\tcenter = conv_block(encoder4_pool, 1024)# center\n","\tdecoder4 = decoder_block(center, encoder4, 512)\n","\tdecoder3 = decoder_block(decoder4, encoder3, 256)\n","\tdecoder2 = decoder_block(decoder3, encoder2, 128)\n","\tdecoder1 = decoder_block(decoder2, encoder1, 64)\n","\tdecoder0 = decoder_block(decoder1, encoder0, 32)\n","\toutputs = layers.Conv2D(1, (1, 1), activation='sigmoid')(decoder0)\n","\n","\tmodel = models.Model(inputs=[inputs], outputs=[outputs])\n","\n","\tmodel.compile(\n","\t\toptimizer=OPTIMIZER, \n","    loss = weighted_bce,\n","\t\t#loss=losses.get(LOSS),\n","\t\tmetrics=[metrics.get(metric) for metric in METRICS])\n","\n","\treturn model\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"PA2gJENE8-J1"},"outputs":[],"source":["# set up tensorboard and checkpoint callbacks\n","log_dir = 'drive/MyDrive/Tensorflow/NC_solar/models/UNET256/Uncalibrated/Seasonal'\n","\n","tensorboard = tf.keras.callbacks.TensorBoard(log_dir= log_dir)\n","\n","checkpoint = tf.keras.callbacks.ModelCheckpoint(\n","    join(log_dir, 'best_weights.hdf5'),\n","    monitor='val_mean_io_u',\n","    verbose=1,\n","    save_best_only=True,\n","    mode='max'\n","    )"]},{"cell_type":"markdown","metadata":{"id":"uu_E7OTDBCoS"},"source":["# Training the model\n","\n","You train a Keras model by calling `.fit()` on it.  Here we're going to train for 10 epochs, which is suitable for demonstration purposes.  For production use, you probably want to optimize this parameter, for example through [hyperparamter tuning](https://cloud.google.com/ml-engine/docs/tensorflow/using-hyperparameter-tuning)."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"5yQPxgtISibx"},"outputs":[],"source":["m = get_model()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"NzzaWxOhSxBy"},"outputs":[],"source":["m.fit(\n","    x=training, \n","    epochs=EPOCHS, \n","    steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n","    validation_data=evaluation,\n","    validation_steps=int(EVAL_SIZE/BATCH_SIZE),\n","    callbacks = [checkpoint, tensorboard]\n","    )\n","\n","#We save the model definition and weights to google drive (free) \n","m.save(join(log_dir, 'UNET256.h5'))"]},{"cell_type":"markdown","metadata":{"id":"zvIqqpNXqJSE"},"source":["##Train from checkpoints\n","If we want to resume or continue training from a previous checkpoint we load the model and best weights from GDrive, check the current accuracy on the evaluation data, and resume training."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"q0xgBhsaqInV"},"outputs":[],"source":["#bring in the architecture and best weights from Drive\n","m = models.load_model(join(log_dir, 'UNET256.h5'), custom_objects={'weighted_bce': weighted_bce})\n","# m.load_weights(join(log_dir, 'best_weights.hdf5'))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"umZy0rBzs1Th"},"outputs":[],"source":["#lets see where were at\n","evalMetrics = m.evaluate(x=evaluation, verbose = 1)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xlsFciElxOUA"},"outputs":[],"source":["#set the monitored value (val_mean_io_u) to current evaluation output\n","checkpoint = tf.keras.callbacks.ModelCheckpoint(\n","    join(log_dir, 'best_weights.hdf5'),\n","    monitor='val_mean_io_u',\n","    verbose=1,\n","    save_best_only=True,\n","    mode='max'\n","    )\n","\n","checkpoint.best = evalMetrics[2]\n","print(checkpoint.__dict__)\n","print(checkpoint.best)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Ty8wCxDtqWBM"},"outputs":[],"source":["#Now keep training!\n","m.fit(\n","    x=training, \n","    epochs= 10, \n","    steps_per_epoch=int(TRAIN_SIZE / BATCH_SIZE), \n","    validation_data=evaluation,\n","    validation_steps=EVAL_SIZE/BATCH_SIZE,\n","    callbacks = [checkpoint, tensorboard]\n","    )"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"tyhWcGHJ82e8"},"outputs":[],"source":["m.save(join(log_dir, 'UNET256.h5'))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"i9OM5BiS1xYQ"},"outputs":[],"source":["%tensorboard --logdir 'drive/My Drive/Tensorflow/models/UNET256'"]},{"cell_type":"markdown","metadata":{"id":"J1ySNup0xCqN"},"source":["# Prediction\n","\n","The prediction pipeline is:\n","\n","1.  Export imagery on which to do predictions from Earth Engine in TFRecord format to a Cloud Storge bucket.\n","2.  Use the trained model to make the predictions.\n","3.  Write the predictions to a TFRecord file in a Cloud Storage.\n","4.  Upload the predictions TFRecord file to Earth Engine.\n","\n","The following functions handle this process.  It's useful to separate the export from the predictions so that you can experiment with different models without running the export every time."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lv6nb0ShH4_T"},"outputs":[],"source":["#Inspect the prediction outputs\n","predictions = m.predict(evaluation, steps=1, verbose=1)\n","for prediction in predictions:\n","  print(predictions)"]},{"cell_type":"markdown","metadata":{"id":"_FAgadEJcZoz"},"source":["### Functions"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"M3WDAa-RUpXP"},"outputs":[],"source":["def doExport(image, path, out_image_base, kernel_buffer, region):\n","  \"\"\"\n","  Run an image export task on which to run predictions.  Block until complete.\n","  Parameters:\n","    image (ee.Image): image to be exported for prediction\n","    path (str): google cloud directory path for export\n","    out_image_base (str): base filename of exported image\n","    kernel_buffer (array<int>): pixels to buffer the prediction patch. half added to each side\n","    region (ee.Geometry):\n","  \"\"\"\n","  task = ee.batch.Export.image.toCloudStorage(\n","    image = image.select(BANDS), \n","    description = out_image_base, \n","    bucket = BUCKET, \n","    fileNamePrefix = join(path, out_image_base),\n","    region = region,#.getInfo()['coordinates'], \n","    scale = 10, \n","    fileFormat = 'TFRecord', \n","    maxPixels = 1e13,\n","    formatOptions = { \n","      'patchDimensions': KERNEL_SHAPE,\n","      'kernelSize': kernel_buffer,\n","      'compressed': True,\n","      'maxFileSize': 104857600\n","    }\n","  )\n","  task.start()\n","\n","  # Block until the task completes.\n","  print('Running image export to Cloud Storage...')\n","  import time\n","  while task.active():\n","    time.sleep(30)\n","\n","  # Error condition\n","  if task.status()['state'] != 'COMPLETED':\n","    print('Error with image export.')\n","  else:\n","    print('Image export completed.')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zb_9_FflygVw"},"outputs":[],"source":["def doPrediction(pred_path, pred_image_base, user_folder, out_image_base, kernel_buffer, region):\n","  \"\"\"\n","  Perform inference on exported imagery, upload to Earth Engine.\n","  Parameters:\n","    pred_path (str): Google cloud (or Drive) path storing prediction image files\n","    pred_image_base (str):\n","    user_folder (str): GEE directory to store asset\n","    out_image_base (str): base filename for GEE asset\n","    kernel_buffer (Array<int>): length 2 array \n","    region (ee.Geometry)):\n","  \"\"\"\n","\n","  print('Looking for TFRecord files...')\n","  \n","  # Get a list of all the files in the output bucket.\n","  filesList = !gsutil ls {join(BUCKET_PATH, pred_path)}\n","  # Get only the files generated by the image export.\n","  exportFilesList = [s for s in filesList if pred_image_base in s]\n","\n","  # Get the list of image files and the JSON mixer file.\n","  imageFilesList = []\n","  jsonFile = None\n","  for f in exportFilesList:\n","    if f.endswith('.tfrecord.gz'):\n","      imageFilesList.append(f)\n","    elif f.endswith('.json'):\n","      jsonFile = f\n","\n","  # Make sure the files are in the right order.\n","  imageFilesList.sort()\n","\n","  from pprint import pprint\n","  pprint(imageFilesList)\n","  print(jsonFile)\n","  \n","  import json\n","  # Load the contents of the mixer file to a JSON object.\n","  jsonText = !gsutil cat {jsonFile}\n","  # Get a single string w/ newlines from the IPython.utils.text.SList\n","  mixer = json.loads(jsonText.nlstr)\n","  pprint(mixer)\n","  patches = mixer['totalPatches']\n","  \n","  # Get set up for prediction.\n","  x_buffer = int(kernel_buffer[0] / 2)\n","  y_buffer = int(kernel_buffer[1] / 2)\n","\n","  buffered_shape = [\n","      KERNEL_SHAPE[0] + kernel_buffer[0],\n","      KERNEL_SHAPE[1] + kernel_buffer[1]]\n","\n","  imageColumns = [\n","    tf.io.FixedLenFeature(shape=buffered_shape, dtype=tf.float32) \n","      for k in BANDS\n","  ]\n","\n","  imageFeaturesDict = dict(zip(BANDS, imageColumns))\n","\n","  def parse_image(example_proto):\n","    return tf.io.parse_single_example(example_proto, imageFeaturesDict)\n","\n","  def toTupleImage(dic):\n","    inputsList = [dic.get(key) for key in BANDS]\n","    stacked = tf.stack(inputsList, axis=0)\n","    stacked = tf.transpose(stacked, [1, 2, 0])\n","    stacked = normalize(stacked, [0, 1])\n","    return stacked\n","  \n","  # Create a dataset(s) from the TFRecord file(s) in Cloud Storage.\n","  i = 0\n","  patches = 0\n","  written_files = []\n","  while i < len(imageFilesList):\n","\n","    imageDataset = tf.data.TFRecordDataset(imageFilesList[i:i+100], compression_type='GZIP')\n","    imageDataset = imageDataset.map(parse_image, num_parallel_calls=5)\n","    imageDataset = imageDataset.map(toTupleImage).batch(1)\n","    \n","    # Perform inference.\n","    print('Running predictions...')\n","    predictions = m.predict(imageDataset, steps=None, verbose=1)\n","    # print(predictions[0])\n","\n","    out_image_file = join(BUCKET_PATH,\n","                          pred_path,\n","                          'outputs',\n","                          '{}{}.TFRecord'.format(out_image_base, i))\n","    \n","    print('Writing predictions to ' + out_image_file + '...')\n","    writer = tf.io.TFRecordWriter(out_image_file)\n","    for predictionPatch in predictions:\n","      print('Writing patch ' + str(patches) + '...')\n","      predictionPatch = predictionPatch[\n","          x_buffer:x_buffer+KERNEL_SIZE, y_buffer:y_buffer+KERNEL_SIZE]\n","\n","      # Create an example.\n","      example = tf.train.Example(\n","        features=tf.train.Features(\n","          feature={\n","            'probability': tf.train.Feature(\n","                float_list=tf.train.FloatList(\n","                    value=predictionPatch.flatten()))\n","          }\n","        )\n","      )\n","      # Write the example.\n","      writer.write(example.SerializeToString())\n","      patches += 1\n","\n","    writer.close()\n","    i += 100\n","    written_files.append(out_image_file)\n"," \n","  out_image_files = ' '.join(written_files)\n","  # Start the upload.\n","  out_image_asset = join(user_folder, out_image_base)\n","  !earthengine upload image --asset_id={out_image_asset} {out_image_files} {jsonFile}"]},{"cell_type":"markdown","metadata":{"id":"LZqlymOehnQO"},"source":["Now there's all the code needed to run the prediction pipeline, all that remains is to specify the output region in which to do the prediction, the names of the output files, where to put them, and the shape of the outputs.  In terms of the shape, the model is trained on 256x256 patches, but can work (in theory) on any patch that's big enough with even dimensions ([reference](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Long_Fully_Convolutional_Networks_2015_CVPR_paper.pdf)).  Because of tile boundary artifacts, give the model slightly larger patches for prediction, then clip out the middle 256x256 patch.  This is controlled with a kernel buffer, half the size of which will extend beyond the kernel buffer.  For example, specifying a 128x128 kernel will append 64 pixels on each side of the patch, to ensure that the pixels in the output are taken from inputs completely covered by the kernel.  "]},{"cell_type":"markdown","metadata":{"id":"G9UaJxPS3uZw"},"source":["### Test images"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"BqDRwb6j27w-"},"outputs":[],"source":["# create several small aois to test predictions\n","test_aoi_1 = ee.Geometry.Polygon(\n","        [[[-78.19610376358034, 35.086989862385884],\n","          [-78.19610376358034, 34.735631502732396],\n","          [-77.67974634170534, 34.735631502732396],\n","          [-77.67974634170534, 35.086989862385884]]], None, False)\n","test_aoi_2 = ee.Geometry.Polygon(\n","        [[[-81.59087915420534, 35.84308746418702],\n","          [-81.59087915420534, 35.47711130797561],\n","          [-81.03057641983034, 35.47711130797561],\n","          [-81.03057641983034, 35.84308746418702]]], None, False)\n","test_aoi_3 = ee.Geometry.Polygon(\n","        [[[-78.74447677513596, 36.4941960586897],\n","          [-78.74447677513596, 36.17115435938789],\n","          [-78.21713302513596, 36.17115435938789],\n","          [-78.21713302513596, 36.4941960586897]]], None, False)\n","test_aoi_4 = ee.Geometry.Polygon(\n","        [[[-76.62411544701096, 36.33505523381603],\n","          [-76.62411544701096, 36.03800955668766],\n","          [-76.16818282982346, 36.03800955668766],\n","          [-76.16818282982346, 36.33505523381603]]], None, False)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"CQyLfPdt3TcA"},"outputs":[],"source":["# Create a prediciton image for the whole state\n","S2 = ee.ImageCollection(\"COPERNICUS/S2\")\n","# Grab a feature corresponding to our study area - North Carolina\n","states = ee.FeatureCollection(\"TIGER/2016/States\")\n","nc = states.filter(ee.Filter.eq('NAME', 'North Carolina'))\n","begin = '2018-05-01'\n","end = '2018-08-30'\n","\n","# The image input collection is cloud-masked.\n","filtered = S2.filterDate(begin, end)\\\n",".filterBounds(nc)\\\n",".filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20))\\\n",".map(basicQA)\n","\n","# Create a simple median composite to visualize\n","test = filtered.median().select(BANDS).clip(test_aoi_4)\n","\n","# Use folium to visualize the imagery.\n","#mapid = image.getMapId({'bands': ['B4', 'B3', 'B2'], 'min': 0, 'max': 0.3})\n","rgbParams = {'bands': ['B4', 'B3', 'B2'],\n","             'min': 0,\n","             'max': 0.3}\n","\n","nirParams = {'bands': ['B8', 'B11', 'B12'],\n","             'min': 0,\n","             'max': 0.3}\n","\n","map = folium.Map(location=[35.402, -78.376])\n","map.add_ee_layer(test, rgbParams, 'Color')\n","map.add_ee_layer(test, nirParams, 'Thermal')\n","\n","map.add_child(folium.LayerControl())\n","map"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"1YMkpeS7cjec"},"outputs":[],"source":["# break up large images into smaller pieces\n","NC_coords = ee.Array(nc.bounds().coordinates())\n","mins = NC_coords.reduce(\n","  reducer= ee.Reducer.min(),\n","  axes= [1]\n",").project([2])\n","\n","maxs = NC_coords.reduce(\n","  reducer= ee.Reducer.max(),\n","  axes= [1]\n",").project([2])\n","\n","xs = ee.List.sequence(\n","  start= mins.get([0]),\n","  end= maxs.get([0]),\n","  count= 6)\n","  \n","ys = ee.List.sequence(\n","  start= mins.get([1]),\n","  end= maxs.get([1]),\n","  count= 4)\n","\n","ls = ee.List([])\n","xsize = xs.size().getInfo() - 1\n","ysize = ys.size().getInfo() - 1\n","\n","for x in range(xsize):\n","  xmin = xs.get(x)\n","  xmax = xs.get(x+1)\n","  for y in range(ysize):\n","    ymin = ys.get(y)\n","    ymax = ys.get(y+1)\n","    box = ee.Algorithms.GeometryConstructors.Rectangle([xmin, ymin, xmax, ymax])\n","    ft = ee.Feature(box, {'id': '{}.{}'.format(x,y)})\n","    ls = ls.add(ft)\n","    y += 1\n","  x += 1\n","\n","\n","boxes = ee.FeatureCollection(ls.flatten())"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"FPANwc7B1-TS"},"outputs":[],"source":["# Choose the GEE folder in which to ingest prediction image:\n","user_folder = 'users/defendersofwildlifeGIS/NC'\n","# prediction path\n","nc_path = join(FOLDER, PRED_BASE)\n","# Base file name to use for TFRecord files and assets. The name structure includes:\n","# the image processing used ['raw', 'calibrated', 'normalized'], the model\n","nc_image_base = 'raw_unet256_summerpred'\n","# Half this will extend on the sides of each patch.\n","nc_kernel_buffer = [128, 128]\n","# NC\n","nc_region = nc#boxes.filterMetadata('id', 'equals', '1.1')"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"both","id":"lLNEOLkXWvSi"},"outputs":[],"source":["# Run the export.\n","doExport(summer, nc_path, nc_image_base, nc_kernel_buffer, nc_region)"]},{"cell_type":"code","execution_count":null,"metadata":{"cellView":"both","id":"KxACnxKFrQ_J"},"outputs":[],"source":["# Run the prediction.\n","doPrediction(pred_path = nc_path,\n","             pred_image_base = nc_image_base,\n","             user_folder = user_folder,\n","             out_image_base = 'raw_unet256_30_summer',\n","             kernel_buffer = nc_kernel_buffer,\n","             region = nc_region)"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"9FDKc2ZwzODu"},"outputs":[],"source":["# Start the upload.\n","filesList = !gsutil ls {join(BUCKET_PATH, nc_path)}\n","\n","jsonFile = [s for s in filesList if nc_image_base+'mixer.json' in s][0]  \n","print(jsonFile)\n","out_image_files = [join(BUCKET_PATH, nc_path, 'outputs','raw_unet256_30_summer{:02}.TFRecord'.format(i)) for i in range(0,17)]\n","files = ' '.join(out_image_files)\n","print(files)\n","asset_id = join(user_folder, 'raw_unet256_30_summer')\n","\n","!earthengine --no-use_cloud_api upload image --asset_id={asset_id} {files} {jsonFile}"]},{"cell_type":"markdown","metadata":{"id":"uj_G9OZ1xH6K"},"source":["# Display the output\n","\n","One the data has been exported, the model has made predictions and the predictions have been written to a file, and the image imported to Earth Engine, it's possible to display the resultant Earth Engine asset.  Here, display the solar array predictions over test areas in North Carolina."]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Jgco6HJ4R5p2"},"outputs":[],"source":["out_image = ee.Image(user_folder + '/' + nc_image_base)\n","mapid = out_image.getMapId({'min': 0, 'max': 1})\n","map = folium.Map(location=[39.898, 116.5097])\n","map.add_ee_layer(out_image, {'min': 0, 'max': 1}, 'solar predictions')\n","map.add_child(folium.LayerControl())\n","map"]}],"metadata":{"accelerator":"GPU","colab":{"collapsed_sections":["hgoDc7Hilfc4","9JIE7Yl87lgU","uu_E7OTDBCoS"],"machine_shape":"hm","private_outputs":true,"provenance":[]},"kernel_info":{"name":"python3"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.9"},"nteract":{"version":"nteract-front-end@1.0.0"}},"nbformat":4,"nbformat_minor":0}
2 | 


--------------------------------------------------------------------------------