├── .gitignore
├── Code
    ├── 01_Data_Acquisition_and_Understanding
    │   ├── 01_HDInsight_Spark_Provisioning
    │   │   └── template.json
    │   └── 02_Batch_AI_Training_Provisioning
    │   │   ├── prep_nfs.sh
    │   │   └── retrain_model_distributed.py
    ├── 02_Modeling
    │   ├── run_batch_ai.py
    │   └── run_mmlspark.py
    ├── 03_Deployment
    │   └── batch_score_spark.py
    ├── 04_Result_Analysis
    │   ├── Model prediction analysis.ipynb
    │   └── analysis_config_loader.py
    └── settings.cfg
├── LICENSE.TXT
├── README.md
├── aml_config
    ├── conda_dependencies.yml
    ├── docker.compute
    ├── docker.runconfig
    ├── jupyter_notebook_config.py
    ├── local.compute
    ├── local.runconfig
    └── spark_dependencies.yml
└── docs
    └── Images
        ├── example_labels.PNG
        ├── middlesex_ma.png
        ├── sample_tile_developed.png
        └── scenario_schematic.PNG


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/Code/01_Data_Acquisition_and_Understanding/01_HDInsight_Spark_Provisioning/template.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#",
  3 |     "contentVersion": "0.9.0.0",
  4 |     "parameters": {
  5 |         "storageAccountKey": {
  6 |             "type": "string",
  7 |             "metadata": {
  8 |                 "description": "The access key for the storage account."
  9 |             }
 10 |         },"storageAccountName": {
 11 |             "type": "string",
 12 |             "metadata": {
 13 |                 "description": "The access key for the storage account."
 14 |             }
 15 |         },"clusterName": {
 16 |             "type": "string",
 17 |             "metadata": {
 18 |                 "description": "The name of the HDInsight cluster to create."
 19 |             }
 20 |         },
 21 |         "clusterLoginUserName": {
 22 |             "type": "string",
 23 |             "defaultValue": "admin",
 24 |             "metadata": {
 25 |                 "description": "These credentials can be used to submit jobs to the cluster and to log into cluster dashboards."
 26 |             }
 27 |         },
 28 |         "clusterLoginPassword": {
 29 |             "type": "securestring",
 30 |             "metadata": {
 31 |                 "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter."
 32 |             }
 33 |         },
 34 |         "location": {
 35 |             "type": "string",
 36 |             "defaultValue": "eastus",
 37 |             "metadata": {
 38 |                 "description": "The location where all azure resources will be deployed."
 39 |             }
 40 |         },
 41 |         "clusterVersion": {
 42 |             "type": "string",
 43 |             "defaultValue": "3.6",
 44 |             "metadata": {
 45 |                 "description": "HDInsight cluster version."
 46 |             }
 47 |         },
 48 |         "clusterWorkerNodeCount": {
 49 |             "type": "int",
 50 |             "defaultValue": 40,
 51 |             "metadata": {
 52 |                 "description": "The number of nodes in the HDInsight cluster."
 53 |             }
 54 |         },
 55 |         "clusterKind": {
 56 |             "type": "string",
 57 |             "defaultValue": "SPARK",
 58 |             "metadata": {
 59 |                 "description": "The type of the HDInsight cluster to create."
 60 |             }
 61 |         },
 62 |         "sshUserName": {
 63 |             "type": "string",
 64 |             "defaultValue": "sshuser",
 65 |             "metadata": {
 66 |                 "description": "These credentials can be used to remotely access the cluster."
 67 |             }
 68 |         }
 69 |     },
 70 |     "resources": [
 71 |         {
 72 |             "apiVersion": "2015-03-01-preview",
 73 |             "name": "[parameters('clusterName')]",
 74 |             "type": "Microsoft.HDInsight/clusters",
 75 |             "location": "[parameters('location')]",
 76 |             "dependsOn": [],
 77 |             "properties": {
 78 |                 "clusterVersion": "[parameters('clusterVersion')]",
 79 |                 "osType": "Linux",
 80 |                 "tier": "standard",
 81 |                 "clusterDefinition": {
 82 |                     "kind": "[parameters('clusterKind')]",
 83 |                     "configurations": {
 84 |                         "gateway": {
 85 |                             "restAuthCredential.isEnabled": true,
 86 |                             "restAuthCredential.username": "[parameters('clusterLoginUserName')]",
 87 |                             "restAuthCredential.password": "[parameters('clusterLoginPassword')]"
 88 |                         }
 89 |                     }
 90 |                 },
 91 |                 "storageProfile": {
 92 |                     "storageaccounts": [
 93 |                         {
 94 |                             "name": "[parameters('storageAccountName')]",
 95 |                             "isDefault": true,
 96 |                             "container": "cluster",
 97 |                             "key": "[parameters('storageAccountKey')]"
 98 |                         }
 99 |                     ]
100 |                 },
101 |                 "computeProfile": {
102 |                     "roles": [
103 |                         {
104 |                             "name": "headnode",
105 |                             "minInstanceCount": 1,
106 |                             "targetInstanceCount": 2,
107 |                             "hardwareProfile": {
108 |                                 "vmSize": "Standard_D12_V2"
109 |                             },
110 |                             "osProfile": {
111 |                                 "linuxOperatingSystemProfile": {
112 |                                     "username": "[parameters('sshUserName')]",
113 |                                     "password": "[parameters('clusterLoginPassword')]"
114 |                                 }
115 |                             },
116 |                             "virtualNetworkProfile": null,
117 |                             "scriptActions": [
118 |                                 {
119 |                                     "name": "mmlspark",
120 |                                     "uri": "https://mmlspark.azureedge.net/buildartifacts/0.11/install-mmlspark.sh",
121 |                                     "parameters": "",
122 |                                     "isHeadNode": true,
123 |                                     "isWorkerNode": true,
124 |                                     "isPersisted": true,
125 |                                     "isZookeeperNode": false,
126 |                                     "isEdgeNode": false,
127 |                                     "applicationName": null
128 |                                 }
129 |                             ]
130 |                         },
131 |                         {
132 |                             "name": "workernode",
133 |                             "minInstanceCount": 1,
134 |                             "targetInstanceCount": "[parameters('clusterWorkerNodeCount')]",
135 |                             "hardwareProfile": {
136 |                                 "vmSize": "Standard_D4_V2"
137 |                             },
138 |                             "osProfile": {
139 |                                 "linuxOperatingSystemProfile": {
140 |                                     "username": "[parameters('sshUserName')]",
141 |                                     "password": "[parameters('clusterLoginPassword')]"
142 |                                 }
143 |                             },
144 |                             "virtualNetworkProfile": null,
145 |                             "scriptActions": [
146 |                                 {
147 |                                     "name": "mmlspark",
148 |                                     "uri": "https://mmlspark.azureedge.net/buildartifacts/0.11/install-mmlspark.sh",
149 |                                     "parameters": "",
150 |                                     "isHeadNode": true,
151 |                                     "isWorkerNode": true,
152 |                                     "isPersisted": true,
153 |                                     "isZookeeperNode": false,
154 |                                     "isEdgeNode": false,
155 |                                     "applicationName": null
156 |                                 }
157 |                             ]
158 |                         }
159 |                     ]
160 |                 }
161 |             }
162 |         }
163 |     ]
164 | }
165 | 


--------------------------------------------------------------------------------
/Code/01_Data_Acquisition_and_Understanding/02_Batch_AI_Training_Provisioning/prep_nfs.sh:
--------------------------------------------------------------------------------
1 | sudo apt-get update
2 | sudo apt-get install unzip
3 | mkdir -p /data/training_images
4 | mkdir -p /data/validation_images
5 | wget https://mawahstorage.blob.core.windows.net/aerialimageclassification/imagesets/balanced_training_set.zip
6 | wget https://mawahstorage.blob.core.windows.net/aerialimageclassification/imagesets/balanced_validation_set.zip
7 | unzip balanced_validation_set.zip -d /data/validation_images
8 | unzip balanced_training_set.zip -d /data/training_images
9 | 


--------------------------------------------------------------------------------
/Code/01_Data_Acquisition_and_Understanding/02_Batch_AI_Training_Provisioning/retrain_model_distributed.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | retrain_model_distributed.py
  3 | by Mary Wahl, 2017
  4 | Copyright Microsoft, all rights reserved
  5 | 
  6 | Retrain AlexNet and ResNet 18 models to classify aerial images by land use.
  7 | Makes use of distributed learners.
  8 | Expects the following parameters:
  9 | - input_dir:         The parent directory containing training images. This
 10 |                      directory should contain only subdirectories (whose names
 11 |                      will be used as the class labels). Each subdirectory should
 12 |                      contain only image files and should not be empty.
 13 | - validation_dir:    The parent directory containing validation images, similar
 14 | 				     in contents to input_dir.
 15 | - output_model_name: The filepath where the retrained model will be stored.
 16 |                      Supporting files will be stored to the same directory.
 17 | - model_path:        The location of the pretrained AlexNet or ResNet 18 model
 18 | - retraining_type:   Must be "last_only", "fully_connected", or "all". Cannot
 19 |                      use retraining type "fully_connected" with model type
 20 |                      "resnet18"
 21 | - model_type:        Must be "alexnet" or "resnet18"
 22 | 
 23 | Side effects:
 24 | This script will create a temporary directory, in which it will write MAP
 25 | files The directory will be removed on completion.
 26 | '''
 27 | 
 28 | import numpy as np
 29 | import pandas as pd
 30 | import os, argparse, glob, tempfile, cntk
 31 | from cntk.io import transforms as xforms
 32 | import cntk.train.distributed as distributed
 33 | from cntk.train.training_session import CheckpointConfig, training_session
 34 | from PIL import Image
 35 | 
 36 | 
 37 | def write_map_file(map_filename, input_dir, output_dir):
 38 | 	'''
 39 | 	Writes the map file required by ImageDeserializer. Returns the number of
 40 | 	distinct classes found in the training set.
 41 | 	'''
 42 | 	df = pd.DataFrame([])
 43 | 	df['filename'] = list(glob.iglob(os.path.join(input_dir, '*', '*')))
 44 | 	df['label'] = df['filename'].apply(lambda x:
 45 | 									   os.path.basename(os.path.dirname(x)))
 46 | 	labels = list(np.sort(df['label'].unique().tolist()))
 47 | 	with open(os.path.join(output_dir, 'labels_to_inds.tsv'), 'w') as f:
 48 | 		for i, label in enumerate(labels):
 49 | 			f.write('{}\t{}\n'.format(label, i))
 50 | 	df['idx'] = df['label'].apply(lambda x: labels.index(x))
 51 | 	df = df[['filename', 'idx']].sample(frac=1)
 52 | 	df.to_csv(map_filename, index=False, sep='\t', header=False)
 53 | 	return(len(labels), len(df.index))
 54 | 
 55 | 
 56 | def create_minibatch_source(map_filename, num_classes):
 57 | 	transforms = [xforms.crop(crop_type='randomside',
 58 | 										  side_ratio=0.85,
 59 | 										  jitter_type='uniratio'),
 60 | 				  xforms.scale(width=224,
 61 | 				  						   height=224,
 62 | 				  						   channels=3,
 63 | 				  						   interpolations='linear'),
 64 | 				  xforms.color(brightness_radius=0.2,
 65 | 				  						   contrast_radius=0.2,
 66 | 				  						   saturation_radius=0.2)]
 67 | 	return(cntk.io.MinibatchSource(cntk.io.ImageDeserializer(
 68 | 		map_filename,
 69 | 		cntk.io.StreamDefs(
 70 | 			features=cntk.io.StreamDef(
 71 | 				field='image', transforms=transforms, is_sparse=False),
 72 | 			labels=cntk.io.StreamDef(
 73 | 				field='label', shape=num_classes, is_sparse=False)))))
 74 | 
 75 | 
 76 | def load_alexnet_model(image_input, num_classes, model_filename,
 77 | 					   retraining_type):
 78 | 	''' Load pretrained AlexNet for desired level of retraining '''
 79 | 	loaded_model = cntk.load_model(model_filename)
 80 | 
 81 | 	# Load the convolutional layers, freezing if desired
 82 | 	feature_node = cntk.logging.graph.find_by_name(loaded_model, 'features')
 83 | 	last_conv_node = cntk.logging.graph.find_by_name(loaded_model, 'conv5.y')
 84 | 	conv_layers = cntk.ops.combine([last_conv_node.owner]).clone(
 85 | 		cntk.ops.functions.CloneMethod.clone if retraining_type == 'all' \
 86 | 			else cntk.ops.functions.CloneMethod.freeze,
 87 | 		{feature_node: cntk.ops.placeholder()})
 88 | 
 89 | 	# Load the fully connected layers, freezing if desired
 90 | 	last_node = cntk.logging.graph.find_by_name(loaded_model, 'h2_d')
 91 | 	fully_connected_layers = cntk.ops.combine([last_node.owner]).clone(
 92 | 		cntk.ops.functions.CloneMethod.freeze if retraining_type == \
 93 | 			'last_only' else cntk.ops.functions.CloneMethod.clone,
 94 | 		{last_conv_node: cntk.ops.placeholder()})
 95 | 
 96 | 	# Define the network using the loaded layers
 97 | 	feat_norm = image_input - cntk.layers.Constant(114)
 98 | 	conv_out = conv_layers(feat_norm)
 99 | 	fc_out = fully_connected_layers(conv_out)
100 | 	new_model = cntk.layers.Dense(shape=num_classes, name='lastlayer')(fc_out)
101 | 	return(new_model)
102 | 
103 | 
104 | def load_resnet18_model(image_input, num_classes, model_filename,
105 | 					   retraining_type):
106 | 	''' Load pretrained ResNet18 for desired level of retraining '''
107 | 
108 | 	# Load existing layers, freezing as desired
109 | 	loaded_model = cntk.load_model(model_filename)
110 | 	feature_node = cntk.logging.graph.find_by_name(loaded_model, 'features')
111 | 	last_node = cntk.logging.graph.find_by_name(loaded_model, 'z.x')
112 | 	cloned_layers = cntk.ops.combine([last_node.owner]).clone(
113 | 		cntk.ops.functions.CloneMethod.freeze if retraining_type == \
114 | 			'last_only' else cntk.ops.functions.CloneMethod.clone,
115 | 		{feature_node: cntk.ops.placeholder()})
116 | 
117 | 	# Define the network using the loaded layers
118 | 	feat_norm = image_input - cntk.layers.Constant(114)
119 | 	cloned_out = cloned_layers(feat_norm)
120 | 	W = cntk.ops.parameter(shape=(512, 1, 1, num_classes),
121 | 		init=cntk.initializer.glorot_uniform())
122 | 	b = cntk.ops.parameter(shape=num_classes, init=0)
123 | 	new_model = cntk.ops.plus(cntk.ops.times(cloned_out, W, name='lasttimes'),
124 | 		b, name='lastplus')
125 | 	return(new_model)
126 | 
127 | 
128 | def retrain_model(map_filename, output_dir, num_classes, epoch_size,
129 | 				  model_filename, num_epochs, model_type, retraining_type):
130 | 	''' Coordinates retraining after MAP file creation '''
131 | 
132 | 	# load minibatch and model
133 | 	minibatch_source = create_minibatch_source(map_filename, num_classes)
134 | 
135 | 	image_input = cntk.ops.input_variable((3, 224, 224))
136 | 	label_input = cntk.ops.input_variable((num_classes))
137 | 	input_map = {image_input: minibatch_source.streams.features,
138 | 				 label_input: minibatch_source.streams.labels}
139 | 
140 | 	if model_type == 'alexnet':
141 | 		model = load_alexnet_model(image_input, num_classes, model_filename,
142 | 								   retraining_type)
143 | 	elif model_type == 'resnet18':
144 | 		model = load_resnet18_model(image_input, num_classes, model_filename,
145 | 								    retraining_type)
146 | 
147 | 	# Set learning parameters
148 | 	ce = cntk.losses.cross_entropy_with_softmax(model, label_input)
149 | 	pe = cntk.metrics.classification_error(model, label_input)
150 | 	l2_reg_weight = 0.0005
151 | 	lr_per_sample = [0.00001] * 33 + [0.000001] * 33 + [0.0000001]
152 | 	momentum_time_constant = 10
153 | 	mb_size = 16
154 | 	lr_schedule = cntk.learners.learning_rate_schedule(lr_per_sample,
155 | 		unit=cntk.UnitType.sample)
156 | 	mm_schedule = cntk.learners.momentum_as_time_constant_schedule(
157 | 		momentum_time_constant)
158 | 
159 | 	# Instantiate the appropriate trainer object
160 | 	my_rank = distributed.Communicator.rank()
161 | 	num_workers = distributed.Communicator.num_workers()
162 | 	num_minibatches = int(np.ceil(epoch_size / mb_size))
163 | 
164 | 	progress_writers = [cntk.logging.progress_print.ProgressPrinter(
165 | 		tag='Training',
166 | 		num_epochs=num_epochs,
167 | 		freq=num_minibatches,
168 | 		rank=my_rank)]
169 | 	learner = cntk.learners.fsadagrad(parameters=model.parameters,
170 | 									  lr=lr_schedule,
171 | 									  momentum=mm_schedule,
172 | 									  l2_regularization_weight=l2_reg_weight)
173 | 	if num_workers > 1:
174 | 		parameter_learner = distributed.data_parallel_distributed_learner(
175 | 			learner, num_quantization_bits=32)
176 | 		trainer = cntk.Trainer(model, (ce, pe), parameter_learner,
177 | 							   progress_writers)
178 | 	else:
179 | 		trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers)
180 | 
181 | 	# Print summary lines to stdout and perform training
182 | 	if my_rank == 0:
183 | 		print('Retraining model for {} epochs.'.format(num_epochs))
184 | 		print('Found {} workers'.format(num_workers))
185 | 		print('Printing progress every {} minibatches'.format(num_minibatches))
186 | 		cntk.logging.progress_print.log_number_of_parameters(model)
187 | 
188 | 	training_session(
189 | 		trainer=trainer,
190 | 		max_samples=num_epochs * epoch_size,
191 | 		mb_source=minibatch_source, 
192 | 		mb_size=mb_size,
193 | 		model_inputs_to_streams=input_map,
194 | 		checkpoint_config=CheckpointConfig(
195 | 			frequency=epoch_size,
196 | 			filename=os.path.join(output_dir, 'retrained_checkpoint.model')),
197 | 		progress_frequency=epoch_size
198 | 	).train()
199 | 
200 | 	distributed.Communicator.finalize()
201 | 	if my_rank == 0:
202 | 		trainer.model.save(os.path.join(output_dir, 'retrained.model'))
203 | 
204 | 	return(my_rank)
205 | 
206 | 
207 | def evaluate_model(map_filename, output_dir, num_classes):
208 | 	''' Evaluate the model on the test set, storing predictions to a file '''
209 | 	inds_to_labels = {}
210 | 	with open(os.path.join(output_dir, 'labels_to_inds.tsv'), 'r') as f:
211 | 		for line in f:
212 | 			label, ind = line.strip().split('\t')
213 | 			inds_to_labels[int(ind)] = label
214 | 
215 | 	loaded_model = cntk.load_model(os.path.join(output_dir, 'retrained.model'))
216 | 	with open(map_filename, 'r') as f:
217 | 		with open(os.path.join(output_dir, 'predictions.csv'), 'w') as g:
218 | 			g.write('filename,label,pred_label\n')
219 | 			for line in f:
220 | 				filename, true_ind = line.strip().split('\t')
221 | 				image_data = np.array(Image.open(filename), dtype=np.float32)
222 | 				image_data = np.ascontiguousarray(np.transpose(
223 | 					image_data[:, :, ::-1], (2,0,1)))
224 | 				dnn_output = loaded_model.eval(
225 | 					{loaded_model.arguments[0]: [image_data]})
226 | 				true_label = inds_to_labels[int(true_ind)]
227 | 				pred_label = inds_to_labels[np.argmax(np.squeeze(dnn_output))]
228 | 				g.write('{},{},{}\n'.format(filename, true_label, pred_label))
229 | 
230 | 	df = pd.read_csv(os.path.join(output_dir, 'predictions.csv'))
231 | 	num_correct = len(df.loc[df['true_label'] == df['pred_label']].index)
232 | 	print('Overall accuracy on test set: {:0.3f}'.format(
233 | 		  len(df.loc[df['true_label'] == df['pred_label']].index) /
234 | 		  len(df.index)))
235 | 
236 | 	return
237 | 
238 | 
239 | def main(input_dir, validation_dir, output_dir, model_filename, num_epochs,
240 | 	model_type, retraining_type):
241 | 	''' Coordinates all activities for the script '''
242 | 
243 | 	# Create a temporary directory to house the MAP file
244 | 	with tempfile.TemporaryDirectory() as temp_dir:
245 | 		training_map_filename = os.path.join(temp_dir, 'map_train.tsv')
246 | 		validation_map_filename = os.path.join(temp_dir, 'map_test.tsv')
247 | 		
248 | 		_, _ = write_map_file(validation_map_filename, input_dir, output_dir)
249 | 		num_classes, epoch_size = write_map_file(training_map_filename,
250 | 												 input_dir, output_dir)
251 | 
252 | 		my_rank = retrain_model(training_map_filename, output_dir,
253 | 								num_classes, epoch_size, model_filename,
254 | 								num_epochs, model_type, retraining_type)
255 | 		if my_rank == 0:
256 | 			evaluate_model(validation_map_filename, output_dir, num_classes)
257 | 
258 | 	return
259 |     
260 | 
261 | if __name__ == '__main__':
262 | 	parser = argparse.ArgumentParser(description='''
263 | Retrains a pretrained DNN model using supplied images. Creates MAP files
264 | (in a temporary directory) used by ImageDeserializer during training and
265 | validation. Outputs the retrained model, a tsv file mapping the class names to
266 | indices, and the validation set predictions to the specified directory.
267 | ''')
268 | 	parser.add_argument('-i', '--input_dir', type=str, required=True,
269 | 						help='Directory containing all training image files' +
270 | 						' in subfolders named by class.')
271 | 	parser.add_argument('-v', '--validation_dir', type=str, required=True,
272 | 						help='Directory containing all test image files' +
273 | 						' in subfolders named by class.')
274 | 	parser.add_argument('-o', '--output_dir',
275 | 						type=str, required=True,
276 | 						help='Output directory for the model. Supporting ' +
277 | 						     'files will be placed in the same folder.')
278 | 	parser.add_argument('-m', '--model_filename',
279 | 						type=str, required=True,
280 | 						help='Filepath of the pretrained model.')
281 | 	parser.add_argument('-n', '--num_epochs',
282 | 						type=int, required=True,
283 | 						help='Number of epochs to retrain the model.')
284 | 	parser.add_argument('-t', '--model_type', type=str, required=True,
285 | 						help='The model type to retrain, which should be ' +
286 | 						'either "resnet18" or "alexnet".')
287 | 	parser.add_argument('-r', '--retraining_type',
288 | 						type=str, required=True,
289 | 						help='Specifies which layers to retrain in the model.' +
290 | 						' Should be one of "last_only", "fully_connected", ' +
291 | 						'or "all". Cannot use "fully_connected" retraining ' +
292 | 						'type with "resnet18" model type.')
293 | 	args = parser.parse_args()
294 | 
295 | 	# Ensure argument values are acceptable before proceeding
296 | 	assert os.path.exists(args.input_dir), \
297 | 		'Input directory {} does not exist'.format(args.input_dir)
298 | 	assert os.path.exists(args.validation_dir), \
299 | 		'Validation directory {} does not exist'.format(args.validation_dir)
300 | 	assert os.path.exists(args.model_filename), \
301 | 		'Model file {} does not exist'.format(args.model_filename)
302 | 	assert args.num_epochs > 0, 'Number of epochs must be greater than zero'
303 | 	assert args.model_type in ['resnet18', 'alexnet'], \
304 | 		'Model type must be "resnet18" or "alexnet" (without the quotes).'
305 | 	assert args.retraining_type in ['last_only', 'fully_connected', 'all'], \
306 | 		'Retraining type must be "last_only", "fully_connected", or "all" ' + \
307 | 		'(without the quotes).'
308 | 	if (args.retraining_type == 'fully_connected') and \
309 | 		(args.model_type == 'resnet18'):
310 | 		raise Exception('Can only use "all" or "last_only" retraining types ' +
311 | 						'with ResNet 18.')
312 | 	os.makedirs(args.output_dir, exist_ok=True)
313 | 
314 | 	main(args.input_dir, args.validation_dir, args.output_dir,
315 | 		 args.model_filename, args.num_epochs, args.model_type,
316 | 		 args.retraining_type)
317 | 


--------------------------------------------------------------------------------
/Code/02_Modeling/run_batch_ai.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | run_batch_ai.py
  3 | (c) Microsoft Corporation, 2017
  4 | 
  5 | This script is designed to call Batch AI training from Vienna and log the
  6 | results to Vienna's run history feature. This script assumes that the
  7 | associated config file and Azure file share have been set up in advance. It
  8 | waits for the cluster to reach steady state (if necessary), submits the job,
  9 | downloads its output after completion, and finally parses the output
 10 | files to return metrics to Vienna's run history.
 11 | '''
 12 | import argparse, os, time, datetime, requests, re
 13 | import azure.mgmt.batchai as training
 14 | import azure.mgmt.batchai.models as tm
 15 | from azure.common.credentials import ServicePrincipalCredentials
 16 | from azureml.logging import get_azureml_logger
 17 | import pandas as pd
 18 | from configparser import ConfigParser
 19 | from azure.storage.file import FileService
 20 | from azure.storage.blob import BlockBlobService
 21 | from tempfile import TemporaryFile
 22 | 
 23 | def ensure_str(str_data):
 24 | 	''' Helper function to correct type of imported strings '''
 25 | 	if isinstance(str_data, str):
 26 | 		return(str_data)
 27 | 	return(str_data.encode('utf-8'))
 28 | 
 29 | class ConfigFile(object):
 30 | 	''' Copies ConfigParser results into attributes, correcting type '''
 31 | 	def __init__(self, config_filename):
 32 | 		''' Load static info for cluster/job creation from a config file '''
 33 | 		config = ConfigParser(allow_no_value=True)
 34 | 		config.read(config_filename)
 35 | 		my_config = config['Settings']
 36 | 
 37 | 		# General info needed for creating clients/clusters/jobs
 38 | 		self.bait_subscription_id = ensure_str(my_config['bait_subscription_id'])
 39 | 		self.bait_aad_client_id = ensure_str(my_config['bait_aad_client_id'])
 40 | 		self.bait_aad_secret = ensure_str(my_config['bait_aad_secret'])
 41 | 		self.bait_aad_token_uri = 'https://login.microsoftonline.com/' + \
 42 | 			'{0}/oauth2/token'.format(ensure_str(my_config['bait_aad_tenant']))
 43 | 		self.bait_region = ensure_str(my_config['bait_region'])
 44 | 		self.bait_resource_group_name = ensure_str(
 45 | 			my_config['bait_resource_group_name'])
 46 | 		self.bait_vms_in_cluster = int(my_config['bait_vms_in_cluster'])
 47 | 		self.bait_vms_per_job = int(my_config['bait_vms_per_job'])
 48 | 		self.bait_cluster_name = ensure_str(my_config['bait_cluster_name'])
 49 | 
 50 | 		assert self.bait_vms_per_job <= self.bait_vms_in_cluster, \
 51 | 			'Number of VMs in cluster ({}) < Number of VMs for job ({}'.format(
 52 | 				self.bait_vms_in_cluster, self.bait_vms_in_cluster)
 53 | 		assert self.bait_vms_in_cluster > 0, \
 54 | 			'Number of VMs used for the job must be greater than zero.'
 55 | 
 56 | 		# Storage account where results will be written
 57 | 		self.storage_account_name = ensure_str(
 58 | 			my_config['storage_account_name'])
 59 | 		self.storage_account_key = ensure_str(my_config['storage_account_key'])
 60 | 		self.storage_account_fileshare_url = 'https://' + \
 61 | 			'{}.file.core.windows.net/baitshare'.format(
 62 | 				self.storage_account_name)
 63 | 		self.container_trained_models = ensure_str(
 64 | 			my_config['container_trained_models'])
 65 | 		self.predictions_container = ensure_str(
 66 | 			my_config['container_prediction_results'])
 67 | 
 68 | 		return
 69 | 
 70 | 
 71 | def write_model_summary_to_blob(config, output_model_name,
 72 | 	pretrained_model_type, retraining_type):
 73 | 	''' Writes a summary file describing the model to be used during o16n '''
 74 | 	output_str = '''output_model_name,{}
 75 | model_source,batchaitraining
 76 | pretrained_model_type,{}
 77 | retraining_type,{}
 78 | mmlspark_model_type,none
 79 | '''.format(output_model_name, pretrained_model_type, retraining_type)
 80 | 	file_name = '{}/model.info'.format(output_model_name)
 81 | 	blob_service = BlockBlobService(config.storage_account_name,
 82 | 									config.storage_account_key)
 83 | 	blob_service.create_container(config.container_trained_models)
 84 | 	blob_service.create_blob_from_text(
 85 | 			config.container_trained_models, file_name, output_str)
 86 | 	return
 87 | 
 88 | 
 89 | def get_client(config):
 90 | 	''' Connect to Batch AI '''
 91 | 	client = training.BatchAIManagementClient(
 92 | 		credentials=ServicePrincipalCredentials(
 93 | 			client_id=config.bait_aad_client_id,
 94 | 			secret=config.bait_aad_secret,
 95 | 			token_uri=config.bait_aad_token_uri),
 96 | 		subscription_id=config.bait_subscription_id,
 97 | 		base_url=None)
 98 | 	return(client)
 99 | 
100 | 
101 | def get_cluster(config):
102 | 	'''
103 | 	Checks whether a cluster with the specified name already exists. If so, it
104 | 	uses that cluster; otherwise, it creates a new one.
105 | 	'''
106 | 	client = get_client(config)
107 | 
108 | 	# Start cluster creation if necessary
109 | 	try:
110 | 		cluster = client.clusters.get(config.bait_resource_group_name,
111 | 										 config.bait_cluster_name)
112 | 	except:
113 | 		print('Error: could not find cluster named {}'.format(
114 | 			config.bait_cluster_name))
115 | 
116 | 	return(cluster)
117 | 
118 | 
119 | def check_for_steady_cluster_status(config, max_sec_to_wait=1200):
120 | 	'''
121 | 	Waits until the cluster reaches a "steady" status. Checks every ten
122 | 	seconds.
123 | 	'''
124 | 	client = get_client(config)
125 | 	start = time.time()
126 | 	while (time.time() - start < max_sec_to_wait):
127 | 		cluster = client.clusters.get(config.bait_resource_group_name,
128 | 									 config.bait_cluster_name)
129 | 		if cluster.allocation_state == tm.AllocationState.steady:
130 | 			print('Cluster has reached "steady" allocation state. Ready for ' +
131 | 				  'job submission.')
132 | 			if cluster.errors is not None:
133 | 				raise Exception('Errors were thrown during cluster creation:' +
134 | 					            '\n{}'.format('\n'.join(cluster.errors)))
135 | 			return
136 | 		time.sleep(10)
137 | 	raise Exception('Max wait time exceeded for cluster to reach "steady" ' +
138 | 					'state ({} seconds).'.format(max_sec_to_wait))
139 | 
140 | 	
141 | def submit_job(config, pretrained_model_type, retraining_type,
142 | 			   output_model_name, num_epochs):
143 | 	''' Defines and submits a job. Does not check for completion. '''
144 | 	client = get_client(config)
145 | 	job_name = 'job{}'.format(
146 | 		datetime.datetime.utcnow().strftime('%m_%d_%H_%M_%S'))
147 | 	cluster = client.clusters.get(config.bait_resource_group_name,
148 | 								 config.bait_cluster_name)
149 | 
150 | 	# Define the command line arguments to the retraining script
151 | 	command_line_args = '--input_dir $AZ_BATCHAI_INPUT_TRAININGDATA ' + \
152 | 		'--validation_dir $AZ_BATCHAI_INPUT_VALIDATIONDATA ' + \
153 | 		'--output_dir $AZ_BATCHAI_OUTPUT_MODEL ' + \
154 | 		'--num_epochs {} '.format(num_epochs) + \
155 | 		'--retraining_type {} '.format(retraining_type) + \
156 | 		'--model_type {} '.format(pretrained_model_type) + \
157 | 		'--model_filename $AZ_BATCHAI_INPUT_PRETRAINEDMODELS/'
158 | 	if pretrained_model_type == 'alexnet':
159 | 		command_line_args += 'AlexNet.model'
160 | 	elif pretrained_model_type == 'resnet18':
161 | 		command_line_args += 'ResNet_18.model'
162 | 
163 | 	# Define the job
164 | 	cntk_settings = tm.CNTKsettings(
165 | 		language_type='python',
166 | 		python_script_file_path='$AZ_BATCHAI_INPUT_SCRIPT/' +
167 | 			'retrain_model_distributed.py',
168 | 		command_line_args=command_line_args,
169 | 		process_count=config.bait_vms_per_job) # NC6s -- one GPU per VM
170 | 
171 | 	job_create_params = tm.job_create_parameters.JobCreateParameters(
172 | 		location=config.bait_region,
173 | 		cluster=tm.ResourceId(cluster.id),                
174 | 		node_count=config.bait_vms_per_job,
175 | 		std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/afs', 
176 | 		output_directories=[
177 | 			tm.OutputDirectory(
178 | 				id='MODEL',
179 | 				path_prefix='$AZ_BATCHAI_MOUNT_ROOT/afs')],
180 | 		input_directories=[
181 | 			tm.InputDirectory(
182 | 				id='SCRIPT',
183 | 				path='$AZ_BATCHAI_MOUNT_ROOT/afs/scripts'),
184 | 			tm.InputDirectory(
185 | 				id='PRETRAINEDMODELS',
186 | 				path='$AZ_BATCHAI_MOUNT_ROOT/afs/pretrainedmodels'),
187 | 			tm.InputDirectory(
188 | 				id='TRAININGDATA',
189 | 				path='$AZ_BATCHAI_MOUNT_ROOT/nfs/training_images'),
190 | 			tm.InputDirectory(
191 | 				id='VALIDATIONDATA',
192 | 				path='$AZ_BATCHAI_MOUNT_ROOT/nfs/validation_images')],
193 |         cntk_settings=cntk_settings)
194 | 
195 | 	# Submit the job
196 | 	job = client.jobs.create(
197 | 		resource_group_name=config.bait_resource_group_name,
198 | 		job_name=job_name,
199 | 		parameters=job_create_params)    
200 | 
201 | 	return(job_name)
202 | 
203 | 
204 | def check_for_job_completion(config, job_name, max_sec_to_wait=7200):
205 | 	''' Check for the job status to change indicating completion '''
206 | 	client = get_client(config)
207 | 	time.sleep(10)
208 | 	start = time.time()
209 | 	while (time.time() - start < max_sec_to_wait):
210 | 		job = client.jobs.get(config.bait_resource_group_name, job_name)
211 | 		if (job.execution_state == tm.ExecutionState.succeeded) or \
212 | 			(job.execution_state == tm.ExecutionState.failed):
213 | 			return
214 | 		time.sleep(10)
215 | 	raise Exception('Max wait time exceeded for job completion ' +
216 | 					'({} seconds).'.format(max_sec_to_wait))
217 | 
218 | 
219 | def download_from_file_share(azure_filename, local_filename):
220 | 	''' Save an output file from Azure File Share '''
221 | 	r = requests.get(azure_filename, stream=True)
222 | 	with open(local_filename, 'wb') as f:
223 | 		for chunk in r.iter_content(chunk_size=512 * 1024):
224 | 			if chunk:
225 | 				f.write(chunk)
226 | 
227 | 
228 | def transfer_fileshare_to_blob(config, fileshare_uri, output_model_name):
229 | 	''' NB -- transfer proceeds via local temporary file! '''
230 | 	file_service = FileService(config.storage_account_name,
231 | 							   config.storage_account_key)
232 | 	blob_service = BlockBlobService(config.storage_account_name,
233 | 									config.storage_account_key)
234 | 	blob_service.create_container(config.container_trained_models)
235 | 	blob_service.create_container(config.predictions_container)
236 | 
237 | 	uri_core = fileshare_uri.split('.file.core.windows.net/')[1].split('?')[0]
238 | 	fields = uri_core.split('/')
239 | 	fileshare = fields.pop(0)
240 | 	subdirectory = '/'.join(fields[:-1])
241 | 	file_name = '{}/{}'.format(output_model_name, fields[-1])
242 | 	
243 | 	with TemporaryFile() as f:
244 | 		file_service.get_file_to_stream(share_name=fileshare,
245 | 										directory_name=subdirectory,
246 | 										file_name=fields[-1],
247 | 										stream=f)
248 | 		f.seek(0)
249 | 		if 'predictions' in fields[-1]:
250 | 			blob_service.create_blob_from_stream(
251 | 				config.predictions_container,
252 | 				'{}_predictions_test_set.csv'.format(output_model_name),
253 | 				f)
254 | 		else:
255 | 			blob_service.create_blob_from_stream(
256 | 				config.container_trained_models, file_name, f)
257 | 
258 | 	return
259 | 
260 | 
261 | def retrieve_outputs(config, job_name, output_model_name):
262 | 	''' Get stdout, stderr, retrained model, and label-to-index dict '''
263 | 	client = get_client(config)
264 | 	status_files = client.jobs.list_output_files(
265 | 		resource_group_name=config.bait_resource_group_name,
266 | 		job_name=job_name,
267 | 		jobs_list_output_files_options=tm.JobsListOutputFilesOptions('stdOuterr'))
268 | 	for file in list(status_files):
269 | 		download_from_file_share(file.download_url,
270 | 								 os.path.join('outputs', file.name))
271 | 
272 | 	output_files = client.jobs.list_output_files(
273 | 		resource_group_name=config.bait_resource_group_name,
274 | 		job_name=job_name,
275 | 		jobs_list_output_files_options=tm.JobsListOutputFilesOptions('MODEL'))
276 | 	for file in list(output_files):
277 | 		transfer_fileshare_to_blob(config, file.download_url, output_model_name)
278 | 
279 | 	client.jobs.delete(resource_group_name=config.bait_resource_group_name,
280 | 					   job_name=job_name)
281 | 	return
282 | 
283 | 
284 | def parse_stdout(run_logger):
285 | 	''' Parse the training logs and record using Vienna SDK '''
286 | 	with open(os.path.join('outputs', 'stdout.txt'), 'r') as f:
287 | 		lines = f.readlines()
288 | 
289 | 	progress_re = 'Finished Epoch\[(\d+) of \d+\]: \[Training\] loss = ' + \
290 | 			      '([0-9.]+) \* [0-9]+, metric = ([0-9.]+)% \* [0-9]+ ' + \
291 | 			      '([0-9.]+)s \( ([0-9.]+) samples/s\);'
292 | 	progress_re2 = 'Finished Epoch\[(\d+) of \d+\]: \[Training\] loss = ' + \
293 | 			       '([0-9.]+) \* [0-9]+, metric = ([0-9.]+)% \* [0-9]+ ' + \
294 | 			       '([0-9.]+)s \(([0-9.]+) samples/s\);'
295 | 	p = re.compile(progress_re)
296 | 	p2 = re.compile(progress_re2)
297 | 
298 | 	progress_lines = []
299 | 	for line in lines:
300 | 		m = p.match(line)
301 | 		if m is not None:
302 | 			progress_lines.append(list(m.groups()))
303 | 		else: # try a minor variation
304 | 			m = p2.match(line)
305 | 			if m is not None:
306 | 				progress_lines.append(list(m.groups()))
307 | 
308 | 	df = pd.DataFrame(progress_lines,
309 | 					  columns=['epoch', 'loss', 'accuracy', 'duration', 'rate'],
310 | 					  dtype=float).groupby('epoch').mean().reset_index()
311 | 	run_logger.log('training_loss', df['loss'].values.tolist())	
312 | 	run_logger.log('training_error_pct', df['accuracy'].values.tolist())
313 | 	run_logger.log('epoch_duration', df['duration'].values.tolist())
314 | 	run_logger.log('samples_per_sec', df['rate'].values.tolist())
315 | 
316 | 	accuracy_re = 'Overall accuracy on test set: ([0-9.]+)'
317 | 	p = re.compile(accuracy_re)
318 | 	for line in lines:
319 | 		m = p.match(line)
320 | 		if m is not None:
321 | 			print('Test set accuracy: {}'.format(m.groups(1)[0]))
322 | 			run_logger.log('test_set_accuracy', m.groups(1)[0])
323 | 	return
324 | 
325 | def main(pretrained_model_type, retraining_type, config_filename,
326 | 		 output_model_name, num_epochs):
327 | 	''' Coordinate all activities for Batch AI training '''
328 | 
329 | 	# Log the parameters used for this run
330 | 	run_logger = get_azureml_logger()
331 | 	run_logger.log('amlrealworld.aerial_image_classification.run_batch_ai','true')
332 | 	run_logger.log('pretrained_model_type', pretrained_model_type)
333 | 	run_logger.log('config_filename', config_filename)
334 | 	run_logger.log('retraining_type', retraining_type)
335 | 	run_logger.log('output_model_name', output_model_name)
336 | 
337 | 	# Load the configuration file and save relevant info
338 | 	config = ConfigFile(config_filename)
339 | 	write_model_summary_to_blob(config, output_model_name,
340 | 		pretrained_model_type, retraining_type)
341 | 
342 | 	# Create a cluster (if necessary) and wait till it's ready
343 | 	get_cluster(config)
344 | 	check_for_steady_cluster_status(config)
345 | 
346 | 	# Submit the job and wait until it completes
347 | 	job_name = submit_job(config, pretrained_model_type, retraining_type,
348 | 						  output_model_name, num_epochs)
349 | 	print('Job submitted: checking for job completion')
350 | 	check_for_job_completion(config, job_name)
351 | 	print('Job complete: retrieving output files')
352 | 
353 | 	# Download the output files and store metrics to Vienna
354 | 	retrieve_outputs(config, job_name, output_model_name)
355 | 	print('Parsing output logs')
356 | 	parse_stdout(run_logger)
357 | 
358 | 	return
359 | 
360 | 
361 | if __name__ == '__main__':
362 | 	parser = argparse.ArgumentParser(description='''
363 | Orchestrates pretrained image classifier retraining through Batch AI training.
364 | Can retrain multiple model types and to different depths. The training data for
365 | this example is fixed and provided in the docker image specified in the config
366 | file.
367 | ''')
368 | 	parser.add_argument('-p', '--pretrained_model_type', type=str,
369 | 						required=True,
370 | 						help='The model type to retrain, which should be ' +
371 | 						'either "resnet18" or "alexnet".')
372 | 	parser.add_argument('-r', '--retraining_type',
373 | 						type=str, required=True,
374 | 						help='Specifies which layers to retrain in the model.' +
375 | 						' Should be one of "last_only", "fully_connected", ' +
376 | 						'or "all".')
377 | 	parser.add_argument('-c', '--config_filename',
378 | 						type=str, required=True,
379 | 						help='Filepath of the configuration file specifying ' +
380 | 						'credentials for a storage account, container ' +
381 | 						'registry, and Batch AI training itself.')
382 | 	parser.add_argument('-o', '--output_model_name',
383 | 						type=str, required=True,
384 | 						help='Retrained model files will be saved under this ' +
385 | 						'"subdirectory" (prefix) in the trained model blob ' +
386 | 						'container specified by the config file.')
387 | 	parser.add_argument('-f', '--sample_frac',
388 | 						type=float, required=False, default=1.0,
389 | 						help='Subsamples data. Default sampling fraction is ' +
390 | 						'1.0 (all samples used).')
391 | 	parser.add_argument('-n', '--num_epochs',
392 | 						type=int, required=False, default=10,
393 | 						help='Number of epochs to retrain the model for.')
394 | 	args = parser.parse_args()
395 | 
396 | 	# Ensure specified files/directories exist
397 | 	assert args.pretrained_model_type in ['resnet18', 'alexnet'], \
398 | 		'Pretrained model type must be "resnet18" or "alexnet".'
399 | 	assert args.retraining_type in ['last_only', 'fully_connected', 'all'], \
400 | 		'Retraining type must be "last_only", "fully_connected", or "all" ' + \
401 | 		'(without the quotes).'
402 | 	assert os.path.exists(args.config_filename), \
403 | 		'Could not find config file {}'.format(args.config_filename)
404 | 	assert args.num_epochs > 0, 'Number of epochs must be greater than zero'
405 | 	os.makedirs('outputs', exist_ok=True)
406 | 
407 | 	main(args.pretrained_model_type, args.retraining_type, args.config_filename,
408 | 		 args.output_model_name, args.num_epochs)
409 | 


--------------------------------------------------------------------------------
/Code/02_Modeling/run_mmlspark.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | run_mmlspark.py
  3 | (c) Microsoft Corporation, 2017
  4 | 
  5 | Trains an MMLSpark model to classify images featurized by a specified CNTK
  6 | pretrained model. Saves the model and test set predictions to blob storage.
  7 | Logs some evaluation metrics directly to run history.
  8 | '''
  9 | 
 10 | import os, time, mmlspark, pyspark, argparse
 11 | import numpy as np
 12 | from io import BytesIO
 13 | from pyspark.sql.functions import udf
 14 | from pyspark.sql.types import *
 15 | from pyspark.ml.classification import RandomForestClassifier, \
 16 | 	LogisticRegression
 17 | from azureml.logging import get_azureml_logger
 18 | import pandas as pd
 19 | from configparser import ConfigParser
 20 | from azure.storage.blob import BlockBlobService
 21 | 
 22 | 
 23 | def ensure_str(str_data):
 24 | 	''' Helper function to correct type of imported strings '''
 25 | 	if isinstance(str_data, str):
 26 | 		return(str_data)
 27 | 	return(str_data.encode('utf-8'))
 28 | 
 29 | class ConfigFile(object):
 30 | 	''' Copies ConfigParser results into attributes, correcting type '''
 31 | 	def __init__(self, config_filename, pretrained_model_type,
 32 | 		mmlspark_model_type, output_model_name):
 33 | 		''' Load static info for cluster/job creation from a config file '''
 34 | 		config = ConfigParser(allow_no_value=True)
 35 | 		config.read(config_filename)
 36 | 		my_config = config['Settings']
 37 | 		self.spark = pyspark.sql.SparkSession.builder.appName('vienna') \
 38 | 			.getOrCreate()
 39 | 
 40 | 		self.pretrained_model_type = pretrained_model_type
 41 | 		self.mmlspark_model_type = mmlspark_model_type
 42 | 		self.output_model_name = output_model_name
 43 | 
 44 | 		# Storage account where results will be written
 45 | 		self.storage_account_name = ensure_str(
 46 | 			my_config['storage_account_name'])
 47 | 		self.storage_account_key = ensure_str(my_config['storage_account_key'])
 48 | 		self.container_pretrained_models = ensure_str(
 49 | 			my_config['container_pretrained_models'])
 50 | 		self.container_trained_models = ensure_str(
 51 | 			my_config['container_trained_models'])
 52 | 		self.container_data_training = ensure_str(
 53 | 			my_config['container_data_training'])
 54 | 		self.container_data_testing = ensure_str(
 55 | 			my_config['container_data_testing'])
 56 | 		self.container_prediction_results = ensure_str(
 57 | 			my_config['container_prediction_results'])
 58 | 
 59 | 		# URIs where data will be loaded or saved
 60 | 		self.train_uri = 'wasb://{}@{}.blob.core.windows.net/*/*.png'.format(
 61 | 			self.container_data_training, self.storage_account_name)
 62 | 		self.test_uri = 'wasb://{}@{}.blob.core.windows.net/*/*.png'.format(
 63 | 			self.container_data_testing, self.storage_account_name)
 64 | 		self.model_uri = 'wasb://{}@{}.blob.core.windows.net/{}'.format(
 65 | 			self.container_pretrained_models, self.storage_account_name,
 66 | 			'ResNet_18.model' if pretrained_model_type == 'resnet18' \
 67 | 			else 'AlexNet.model')
 68 | 		self.output_uri = 'wasb://{}@{}.blob.core.windows.net/{}/model'.format(
 69 | 			self.container_trained_models, self.storage_account_name,
 70 | 			output_model_name)
 71 | 		self.predictions_filename = '{}_predictions_test_set.csv'.format(
 72 | 			output_model_name)
 73 | 
 74 | 		# Load the pretrained model
 75 | 		self.last_layer_name = 'z.x' if (pretrained_model_type == 'resnet18') \
 76 | 			else 'h2_d'
 77 | 		self.cntk_model = mmlspark.CNTKModel().setInputCol('unrolled') \
 78 | 			.setOutputCol('features') \
 79 | 			.setModelLocation(self.spark, self.model_uri) \
 80 | 			.setOutputNodeName(self.last_layer_name)
 81 | 
 82 | 		# Initialize other Spark pipeline components
 83 | 		self.extract_label_udf = udf(lambda row: os.path.basename(
 84 | 										os.path.dirname(row.path)),
 85 | 									 StringType())
 86 | 		self.extract_path_udf = udf(lambda row: row.path, StringType())
 87 | 		if mmlspark_model_type == 'randomforest':
 88 | 			self.mmlspark_model_type = RandomForestClassifier(numTrees=20,
 89 | 															  maxDepth=5)
 90 | 		elif mmlspark_model_type == 'logisticregression':
 91 | 			self.mmlspark_model_type = LogisticRegression(regParam=0.01,
 92 | 														  maxIter=10)
 93 | 		self.unroller = mmlspark.UnrollImage().setInputCol('image') \
 94 | 			.setOutputCol('unrolled')
 95 | 
 96 | 		return
 97 | 
 98 | 
 99 | def write_model_summary_to_blob(config, mmlspark_model_type):
100 | 	''' Writes a summary file describing the model to be used during o16n '''
101 | 	output_str = '''output_model_name,{}
102 | model_source,mmlspark
103 | pretrained_model_type,{}
104 | retraining_type,last_only
105 | mmlspark_model_type,{}
106 | '''.format(config.output_model_name, config.pretrained_model_type,
107 | 		   mmlspark_model_type)
108 | 	file_name = '{}/model.info'.format(config.output_model_name)
109 | 	blob_service = BlockBlobService(config.storage_account_name,
110 | 									config.storage_account_key)
111 | 	blob_service.create_container(config.container_trained_models)
112 | 	blob_service.create_blob_from_text(
113 | 			config.container_trained_models, file_name, output_str)
114 | 	return
115 | 
116 | 
117 | def load_data(data_uri, config, sample_frac):
118 | 	df = config.spark.readImages(data_uri, recursive=True,
119 | 		sampleRatio=sample_frac).toDF('image')
120 | 	df = df.withColumn('label', config.extract_label_udf(df['image']))
121 | 	df = df.withColumn('filepath', config.extract_path_udf(df['image']))
122 | 	df = config.unroller.transform(df).select('filepath', 'unrolled', 'label')
123 | 	df = config.cntk_model.transform(df).select(
124 | 		['filepath', 'features', 'label'])
125 | 	return(df)
126 | 
127 | 
128 | def main(pretrained_model_type, mmlspark_model_type, config_filename,
129 | 		 output_model_name, sample_frac):
130 | 	# Load the configuration file
131 | 	config = ConfigFile(config_filename, pretrained_model_type,
132 | 		mmlspark_model_type, output_model_name)
133 | 	write_model_summary_to_blob(config, mmlspark_model_type)
134 | 
135 | 	# Log the parameters of the run
136 | 	run_logger = get_azureml_logger()
137 | 	run_logger.log('amlrealworld.aerial_image_classification.run_mmlspark','true')
138 | 	run_logger.log('pretrained_model_type', pretrained_model_type)
139 | 	run_logger.log('mmlspark_model_type', mmlspark_model_type)
140 | 	run_logger.log('config_filename', config_filename)
141 | 	run_logger.log('output_model_name', output_model_name)
142 | 	run_logger.log('sample_frac', sample_frac)
143 | 
144 | 	# Train and save the MMLSpark model
145 | 	train_df = load_data(config.train_uri, config, sample_frac)
146 | 	mmlspark_model = mmlspark.TrainClassifier(
147 | 		model=config.mmlspark_model_type, labelCol='label').fit(train_df)
148 | 	mmlspark_model.write().overwrite().save(config.output_uri)
149 | 
150 | 	# Apply the MMLSpark model to the test set and save the accuracy metric
151 | 	test_df = load_data(config.test_uri, config, sample_frac)
152 | 	predictions = mmlspark_model.transform(test_df)
153 | 	metrics = mmlspark.ComputeModelStatistics(evaluationMetric='accuracy') \
154 | 		.transform(predictions)
155 | 	metrics.show()
156 | 	run_logger.log('accuracy_on_test_set', metrics.first()['accuracy'])
157 | 	
158 | 	# Save the predictions
159 | 	tf = mmlspark.IndexToValue().setInputCol('scored_labels') \
160 | 		.setOutputCol('pred_label')
161 | 	predictions = tf.transform(predictions).select(
162 | 		'filepath', 'label', 'pred_label')
163 | 	output_str = predictions.toPandas().to_csv(index=False)
164 | 	blob_service = BlockBlobService(config.storage_account_name,
165 | 									config.storage_account_key)
166 | 	blob_service.create_container(config.container_prediction_results)
167 | 	blob_service.create_blob_from_text(
168 | 			config.container_prediction_results,
169 | 			config.predictions_filename,
170 | 			output_str)
171 | 
172 | 	return
173 | 
174 | 
175 | if __name__ == '__main__':
176 | 	parser = argparse.ArgumentParser(description='''
177 | Trains an MMLSpark model to classify images featurized by a specified CNTK
178 | pretrained model. Saves the model and test set predictions to blob storage.
179 | Logs some evaluation metrics directly to run history.''')
180 | 	parser.add_argument('-p', '--pretrained_model_type', type=str,
181 | 						required=True,
182 | 						help='The model type to retrain, which should be ' +
183 | 						'either "resnet18" or "alexnet".')
184 | 	parser.add_argument('-m', '--mmlspark_model_type',
185 | 						type=str, required=True,
186 | 						help='Specifies which type of model should be ' +
187 | 						'trained on featurized images. Should be either ' +
188 | 						'"randomforest" or "logisticregresssion".')
189 | 	parser.add_argument('-c', '--config_filename',
190 | 						type=str, required=True,
191 | 						help='Filepath of the configuration file specifying ' +
192 | 						'credentials for a storage account, container ' +
193 | 						'registry, and Batch AI training itself.')
194 | 	parser.add_argument('-o', '--output_model_name',
195 | 						type=str, required=True,
196 | 						help='Retrained model files will be saved under this ' +
197 | 						'"subdirectory" (prefix) in the trained model blob ' +
198 | 						'container specified by the config file.')
199 | 	parser.add_argument('-f', '--sample_frac',
200 | 						type=float, required=False, default=1.0,
201 | 						help='Subsamples training and test data for faster ' +
202 | 						'results. Default sampling fraction is 1.0 (all ' +
203 | 						'samples used).')
204 | 	args = parser.parse_args()
205 | 
206 | 	assert args.pretrained_model_type in ['resnet18', 'alexnet'], \
207 | 		'Pretrained model type must be "resnet18" or "alexnet".'
208 | 	assert args.mmlspark_model_type in ['randomforest', 'logisticregression'], \
209 | 		'MMLSpark model type must be "randomforest" or "logisticregression".'
210 | 	assert os.path.exists(args.config_filename), \
211 | 		'Could not find config file {}'.format(args.config_filename)
212 | 	assert (args.sample_frac <= 1.0) and (args.sample_frac > 0.0), \
213 | 		'Sampling fraction must be between 0.0 and 1.0.'
214 | 
215 | 	print('Arguments ok...preparing to run')
216 | 	main(args.pretrained_model_type, args.mmlspark_model_type,
217 | 		 args.config_filename, args.output_model_name, args.sample_frac)
218 | 


--------------------------------------------------------------------------------
/Code/03_Deployment/batch_score_spark.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | batch_score_spark.py
  3 | by Mary Wahl
  4 | (c) Microsoft Corporation, 2017
  5 | 
  6 | Applies a trained Batch AI Training or MMLSpark model to a large static dataset
  7 | in an HDInsight cluster's associated blob storage account. This script requires
  8 | the following arguments:
  9 | - config_filename:   Includes storage account credentials and container names
 10 | - output_model_name: The model name specified at the time of training; used for
 11 |                      lookup of output files in blob storage.
 12 | '''
 13 | import os, io, argparse, mmlspark, pyspark
 14 | from azureml.logging import get_azureml_logger
 15 | import numpy as np
 16 | import pandas as pd
 17 | from configparser import ConfigParser
 18 | from azure.storage.blob import BlockBlobService
 19 | from pyspark.sql.functions import udf
 20 | from pyspark.sql.types import *
 21 | from pyspark.ml.feature import IndexToString
 22 | from mmlspark import TrainedClassifierModel
 23 | 
 24 | run_logger = get_azureml_logger()
 25 | run_logger.log('amlrealworld.aerial_image_classification.batch_score_spark','true')
 26 | 
 27 | def ensure_str(str_data):
 28 | 	''' Helper function to correct type of imported strings '''
 29 | 	if isinstance(str_data, str):
 30 | 		return(str_data)
 31 | 	return(str_data.encode('utf-8'))
 32 | 
 33 | class ConfigFile(object):
 34 | 	''' Copies ConfigParser results into attributes, correcting type '''
 35 | 	def __init__(self, config_filename, output_model_name):
 36 | 		''' Load/validate model information from a config file '''
 37 | 		config = ConfigParser(allow_no_value=True)
 38 | 		config.read(config_filename)
 39 | 		my_config = config['Settings']
 40 | 		self.spark = pyspark.sql.SparkSession.builder.appName('vienna') \
 41 | 			.getOrCreate()
 42 | 
 43 | 		# Load storage account info
 44 | 		self.storage_account_name = ensure_str(
 45 | 			my_config['storage_account_name'])
 46 | 		self.storage_account_key = ensure_str(my_config['storage_account_key'])
 47 | 		self.container_pretrained_models = ensure_str(
 48 | 			my_config['container_pretrained_models'])
 49 | 		self.container_trained_models = ensure_str(
 50 | 			my_config['container_trained_models'])
 51 | 		self.container_data_o16n = ensure_str(
 52 | 			my_config['container_data_o16n'])
 53 | 		self.container_prediction_results = ensure_str(
 54 | 			my_config['container_prediction_results'])
 55 | 		self.predictions_filename = '{}_predictions_o16n.csv'.format(
 56 | 			output_model_name)
 57 | 
 58 | 		# Load blob service and ensure containers are available
 59 | 		blob_service = BlockBlobService(self.storage_account_name,
 60 | 										self.storage_account_key)
 61 | 		container_list = [i.name for i in blob_service.list_containers()]
 62 | 		for container in [self.container_pretrained_models,
 63 | 						  self.container_trained_models,
 64 | 						  self.container_data_o16n,
 65 | 						  self.container_prediction_results]:
 66 | 			assert container in container_list, \
 67 | 				'Could not find container {} in storage '.format(container) + \
 68 | 				'account {}'.format(self.storage_account_name)
 69 | 
 70 | 		# Load information on the named model
 71 | 		self.output_model_name = output_model_name
 72 | 		description = blob_service.get_blob_to_text(
 73 | 			container_name=self.container_trained_models,
 74 | 			blob_name='{}/model.info'.format(self.output_model_name))
 75 | 		description_dict = {}
 76 | 		for line in description.content.split('\n'):
 77 | 			if len(line) == 0:
 78 | 				continue
 79 | 			key, val = line.strip().split(',')
 80 | 			description_dict[key] = val
 81 | 		self.model_source = description_dict['model_source']
 82 | 		self.pretrained_model_type = description_dict['pretrained_model_type']
 83 | 
 84 | 		# Create pipeline components common to both model types
 85 | 		self.extract_path_udf = udf(lambda row: os.path.basename(row.path),
 86 | 									StringType())
 87 | 		self.unroller = mmlspark.UnrollImage().setInputCol('image') \
 88 | 			.setOutputCol('unrolled')
 89 | 		return
 90 | 
 91 | def load_batchaitraining_model_components(config):
 92 | 	''' Loads all components needed to apply a trained BAIT model '''
 93 | 	# Get the CNTK model itself
 94 | 	model_uri = 'wasb://{}@'.format(config.container_trained_models) + \
 95 | 				'{}.blob.core'.format(config.storage_account_name) + \
 96 | 				'.windows.net/{}'.format(config.output_model_name) + \
 97 | 				'/retrained.model'
 98 | 	config.cntk_model = mmlspark.CNTKModel().setInputCol('unrolled') \
 99 | 		.setOutputCol('features').setModelLocation(config.spark, model_uri) \
100 | 		.setOutputNodeIndex(0)
101 | 
102 | 	# Load the correspondence between indices and labels
103 | 	blob_service = BlockBlobService(config.storage_account_name,
104 | 		config.storage_account_key)
105 | 	labels_to_inds_str = blob_service.get_blob_to_text(
106 | 		container_name=config.container_trained_models,
107 | 		blob_name='{}/labels_to_inds.tsv'.format(config.output_model_name))
108 | 	config.inds_to_labels = {}
109 | 	for line in labels_to_inds_str.content.split('\n'):
110 | 		if len(line) == 0:
111 | 			continue
112 | 		key, val = line.strip().split('\t')
113 | 		config.inds_to_labels[int(val)] = key
114 | 
115 | 	return(config)
116 | 
117 | 
118 | def load_mmlspark_model_components(config):
119 | 	''' Loads all components needed to apply a trained MMLSpark model '''
120 | 	# Load the pretrained featurization model
121 | 	if config.pretrained_model_type == 'resnet18':
122 | 		model_filename = 'ResNet_18.model'
123 | 		last_layer_name = 'z.x'
124 | 	elif config.pretrained_model_type == 'alexnet':
125 | 		model_filename = 'AlexNet.model'
126 | 		last_layer_name = 'h2_d'
127 | 	model_uri = 'wasb://{}@'.format(config.container_pretrained_models) + \
128 | 				'{}.blob.core.windows'.format(config.storage_account_name) + \
129 | 				'.net/{}'.format(model_filename)
130 | 	config.cntk_model = mmlspark.CNTKModel().setInputCol('unrolled') \
131 | 		.setOutputCol('features').setModelLocation(config.spark, model_uri) \
132 | 		.setOutputNodeName(last_layer_name)
133 | 
134 | 	# Load the MMLSpark-trained model
135 | 	mmlspark_uri = 'wasb://{}@'.format(config.container_trained_models) + \
136 | 				   '{}.blob.core.'.format(config.storage_account_name) + \
137 | 				   'windows.net/{}/model'.format(config.output_model_name)
138 | 	config.mmlspark_model = TrainedClassifierModel.load(mmlspark_uri)
139 | 
140 | 	# Load the transform that will convert model output from indices to strings
141 | 	config.tf = mmlspark.IndexToValue().setInputCol('scored_labels') \
142 | 		.setOutputCol('pred_label')
143 | 
144 | 	return(config)
145 | 
146 | 
147 | def load_data(config, sample_frac=1.0):
148 | 	data_uri = 'wasb://{}@{}.blob.core.windows.net/*.png'.format(
149 | 			config.container_data_o16n, config.storage_account_name)
150 | 	df = config.spark.readImages(data_uri, recursive=True,
151 | 		sampleRatio=sample_frac).toDF('image')
152 | 	df = df.withColumn('filepath', config.extract_path_udf(df['image']))
153 | 	df = config.unroller.transform(df).select('filepath', 'unrolled')
154 | 	df = config.cntk_model.transform(df).select(
155 | 		['filepath', 'features'])
156 | 	return(df)
157 | 
158 | 
159 | def main(config_filename, output_model_name, sample_frac):
160 | 	''' Coordinate application of trained models to large static image set '''
161 | 	config = ConfigFile(config_filename, output_model_name)
162 | 
163 | 	if config.model_source == 'batchaitraining':
164 | 		config = load_batchaitraining_model_components(config)
165 | 	elif config.model_source == 'mmlspark':
166 | 		config = load_mmlspark_model_components(config)
167 | 	else:
168 | 		raise Exception('Model source not recognized')
169 | 
170 | 	df = load_data(config, sample_frac)
171 | 
172 | 	if config.model_source == 'batchaitraining':
173 | 		# Create a UDF to find argmax on model output and convert to a string label
174 | 		inds_to_labels = config.inds_to_labels
175 | 		label_udf = udf(lambda x: str(inds_to_labels[np.argmax(x.toArray())]),
176 | 			StringType())
177 | 		predictions = df.withColumn('pred_label', label_udf(df['features'])) \
178 | 			.select('filepath', 'pred_label')
179 | 	elif config.model_source == 'mmlspark':
180 | 		predictions = config.mmlspark_model.transform(df)
181 | 		predictions = config.tf.transform(predictions).select(
182 | 			'filepath', 'pred_label')
183 | 
184 | 	output_str = predictions.toPandas().to_csv(index=False)
185 | 	blob_service = BlockBlobService(config.storage_account_name,
186 | 									config.storage_account_key)
187 | 	blob_service.create_blob_from_text(
188 | 			config.container_prediction_results,
189 | 			config.predictions_filename,
190 | 			output_str)
191 | 
192 | 	return
193 | 
194 | 
195 | if __name__ == '__main__':
196 | 	parser = argparse.ArgumentParser(description='''
197 | Applies a trained Batch AI Training or MMLSpark model to a large static dataset
198 | in an HDInsight cluster's associated blob storage account.
199 | ''')
200 | 	parser.add_argument('-c', '--config_filename',
201 | 						type=str, required=True,
202 | 						help='Includes storage account credentials and ' +
203 | 						'container names.')
204 | 	parser.add_argument('-o', '--output_model_name',
205 | 						type=str, required=True,
206 | 						help='The model name specified at the time of ' + \
207 | 						'training; used for lookup of output files in ' + \
208 | 						'blob storage.')
209 | 	parser.add_argument('-f', '--sample_frac',
210 | 						type=float, required=False, default=1.0,
211 | 						help='Subsamples data. Default sampling fraction is ' +
212 | 						'1.0 (all samples used).')
213 | 	args = parser.parse_args()
214 | 
215 | 	assert os.path.exists(args.config_filename), \
216 | 		'Could not find config file {}'.format(args.config_filename)
217 | 	main(args.config_filename, args.output_model_name, args.sample_frac)
218 | 


--------------------------------------------------------------------------------
/Code/04_Result_Analysis/Model prediction analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Model prediction analysis"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Before running this notebook, you *must* edit the code cell below to specify `output_model_name` for your model of interest. Batch scoring (and training) must already have been performed using this model, since this notebook will use the prediction results thus created."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "from analysis_config_loader import ConfigFile\n",
 24 |     "import os\n",
 25 |     "%matplotlib inline\n",
 26 |     "import matplotlib.pyplot as plt\n",
 27 |     "from PIL import Image\n",
 28 |     "from collections import defaultdict\n",
 29 |     "from sklearn.metrics import confusion_matrix\n",
 30 |     "import pandas as pd\n",
 31 |     "import numpy as np\n",
 32 |     "\n",
 33 |     "output_model_name = '' # <-- fill in this value with your desired model's name!\n",
 34 |     "config_filename = '../settings.cfg'\n",
 35 |     "\n",
 36 |     "assert output_model_name != '', \\\n",
 37 |     "    'You must fill in the output_model_name field with the name of a model you trained ' + \\\n",
 38 |     "    'and applied to the operationalization dataset using batch_score_spark.py'\n",
 39 |     "assert os.path.exists(config_filename), \\\n",
 40 |     "    'Could not find configuration file'.format(config_filename)\n",
 41 |     "    \n",
 42 |     "config = ConfigFile(config_filename, output_model_name)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "## Test set analysis"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "### Overall and class-specific model performance metrics"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 3,
 62 |    "metadata": {},
 63 |    "outputs": [
 64 |     {
 65 |      "name": "stdout",
 66 |      "output_type": "stream",
 67 |      "text": [
 68 |       "Overall accuracy: 0.773\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "n_correct_label = len(config.test_df.loc[config.test_df['label'] == config.test_df['pred_label']].index)\n",
 74 |     "n_total = len(config.test_df.index)\n",
 75 |     "\n",
 76 |     "print('Overall accuracy: {:.3f}'.format(n_correct_label / n_total))"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "markdown",
 81 |    "metadata": {},
 82 |    "source": [
 83 |     "We can also calculate precision, recall, and accuracy for each label:"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 4,
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Metrics for Barren vs. not-Barren classification\n",
 96 |       "- Recall:    0.127\n",
 97 |       "- Precision: 0.935\n",
 98 |       "- Accuracy:  0.922\n",
 99 |       "Metrics for Cultivated vs. not-Cultivated classification\n",
100 |       "- Recall:    0.161\n",
101 |       "- Precision: 0.761\n",
102 |       "- Accuracy:  0.945\n",
103 |       "Metrics for Developed vs. not-Developed classification\n",
104 |       "- Recall:    0.202\n",
105 |       "- Precision: 0.955\n",
106 |       "- Accuracy:  0.977\n",
107 |       "Metrics for Forest vs. not-Forest classification\n",
108 |       "- Recall:    0.192\n",
109 |       "- Precision: 0.776\n",
110 |       "- Accuracy:  0.943\n",
111 |       "Metrics for Herbaceous vs. not-Herbaceous classification\n",
112 |       "- Recall:    0.091\n",
113 |       "- Precision: 0.721\n",
114 |       "- Accuracy:  0.901\n",
115 |       "Metrics for Shrub vs. not-Shrub classification\n",
116 |       "- Recall:    0.164\n",
117 |       "- Precision: 0.576\n",
118 |       "- Accuracy:  0.859\n"
119 |      ]
120 |     }
121 |    ],
122 |    "source": [
123 |     "labels = np.sort(config.test_df['label'].unique()).tolist()\n",
124 |     "\n",
125 |     "for label in labels:\n",
126 |     "    print('Metrics for {0} vs. not-{0} classification'.format(label))\n",
127 |     "    \n",
128 |     "    n_true_pos = len(config.test_df.loc[(config.test_df['label'] == label) & \n",
129 |     "                                        (config.test_df['pred_label'] == label)].index)\n",
130 |     "    n_true_neg = len(config.test_df.loc[(config.test_df['label'] != label) & \n",
131 |     "                                        (config.test_df['pred_label'] != label)].index)\n",
132 |     "    n_false_pos = len(config.test_df.loc[(config.test_df['label'] != label) & \n",
133 |     "                                        (config.test_df['pred_label'] == label)].index)\n",
134 |     "    n_false_neg = len(config.test_df.loc[(config.test_df['label'] == label) & \n",
135 |     "                                        (config.test_df['pred_label'] != label)].index)\n",
136 |     "    n_predicted_pos = n_true_pos + n_false_pos\n",
137 |     "    n_labeled_pos = n_true_neg + n_false_neg\n",
138 |     "    n_correct = n_true_pos + n_true_neg\n",
139 |     "    \n",
140 |     "    if n_labeled_pos == 0:\n",
141 |     "        print('- Recall:    Undefined (no images have this true label)')\n",
142 |     "    else:\n",
143 |     "        print('- Recall:    {:0.3f}'.format(n_true_pos / n_labeled_pos))\n",
144 |     "    if n_predicted_pos == 0:\n",
145 |     "        print('- Precision: Undefined (No images predicted to have this label)')\n",
146 |     "    else:\n",
147 |     "        print('- Precision: {:0.3f}'.format(n_true_pos / n_predicted_pos))\n",
148 |     "    print('- Accuracy:  {:0.3f}'.format(n_correct / n_total))"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "metadata": {},
154 |    "source": [
155 |     "### Confusion matrix construction\n",
156 |     "\n",
157 |     "We now construct a confusion matrix to check which types of classification errors are most common:"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": 5,
163 |    "metadata": {},
164 |    "outputs": [
165 |     {
166 |      "data": {
167 |       "text/plain": [
168 |        "<matplotlib.text.Text at 0x1c7c074a240>"
169 |       ]
170 |      },
171 |      "execution_count": 5,
172 |      "metadata": {},
173 |      "output_type": "execute_result"
174 |     },
175 |     {
176 |      "data": {
177 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAJRCAYAAAAAkRChAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XmYZGV59/Hvb4ZVQBBBRAUhcYtRQUAiiggqLtFETIhE\nTQQ1wbgvMXFJIoomahJNiAsJagB33Ig7iwgqJCowjiAq4MYrArKoyDoww/3+cU5DTdPT0zN09Tmn\n+vvxqqvrLPXU3eXQfff9bKkqJEmS1A9Lug5AkiRJtzE5kyRJ6hGTM0mSpB4xOZMkSeoRkzNJkqQe\nMTmTJEnqEZMzSZKkHjE5kyRJ6hGTM0mSpB7ZoOsAJEmS1tfSO9+7auUNC/JedcMVJ1bVE8f9PiZn\nkiRpsGrlDWx8/6cvyHvduPzd2yzE+9itKUmS1CNWziRJ0oAFMlm1psn6biRJkgbOypkkSRquAEnX\nUcwrK2eSJEk9YuVMkiQNm2POJEmSNC5WziRJ0rA55kySJEnjYnImSZLUI3ZrSpKkAXMRWkmSJI2R\nlTNJkjRsTgiQJEnSuFg5kyRJwxUccyZJkqTxsXImSZIGLI45kyRJ0vhYOZMkScPmmDNJkiSNi5Uz\nSZI0bI45kyRJ0rhYOZMkSQPm3pqSJEkaIytnkiRpuIJjziRJkjQ+JmeSJEk9YremJEkaNicESJIk\naVxMziRJ0oC1S2ksxGMu0SRbJflkkh8k+X6SvZJsneTkJBe2X+8yWxsmZ5IkSfPnCOCEqnoAsAvw\nfeA1wClVdV/glPZ4jUzOJC2YJJsm+VySq5N84g6086wkJ81nbF1I8qUkB3cdhzR4S7Iwj7VIsiWw\nD/B+gKq6qap+DTwVOLa97VjggFm/nTv0YUiaSEmemeSsJNcmubRNIvaeh6YPBLYD7lpVf7K+jVTV\nh6vq8fMQz2qS7Jukkhw/7fwu7fnT5tjOG5J8aG33VdWTqurYtd0nqTe2aX82Tj0OnXZ9Z+AK4Ogk\n307yviSbAdtV1aXtPZfR/BxcI2drSlpNklfSlNz/CjgRuAl4Is1ffqffwebvDVxQVSvvYDvjdAWw\nV5K7VtVV7bmDgQvm6w2SBEhV3TJfbUqLVljI2ZpXVtUes1zfANgNeElVfTPJEUzrwqyqSlKzvYmV\nM0m3akvyhwMvqqpPV9V1VXVzVX2uqv6mvWfjJP+e5JL28e9JNm6v7Zvk4iR/neTytur2nPbaG4HX\nAwe1FbnnTa8wJdmprVBt0B4fkuTHSa5J8pMkzxo5f/rI6x6R5My2u/TMJI8YuXZakjclOaNt56Qk\n28zyMdwE/A/wp+3rlwIHAR+e9lkdkeRnSX6T5Owkj2rPPxF43cj3+Z2ROP4xyRnA9cBvtef+or1+\nZJJPjbT/tiSntImcpGG4GLi4qr7ZHn+SJln7RZLtAdqvl8/WiMmZpFF7AZsAx89yz98BDwd2pRns\nuifw9yPX7w5sCdwTeB7w7iR3qarDgH8Cjquqzavq/bMF0nYF/AfwpKraAngEsHyG+7YGvtDee1fg\nHcAXktx15LZnAs8B7gZsBLxqtvcGPgA8u33+BOC7wCXT7jmT5jPYGvgI8Ikkm1TVCdO+z11GXvPn\nwKHAFsBF09r7a+DBbeL5KJrP7uCqmvUvbEk02zctxGMtquoy4GdJ7t+eeizwPeCzNBV42q+fma0d\nkzNJo+5KU7afrdvxWcDhVXV5VV0BvJEm6Zhyc3v95qr6InAtcP8Z2pmLW4AHJdm0qi6tqvNmuOfJ\nwIVV9cGqWllVHwV+APzByD1HV9UFVXUD8HGapGqNqup/ga3bH7DPpknWpt/zoaq6qn3PtwMbs/bv\n85iqOq99zc3T2rue5nN8B/Ahmm6Ri9fSnqT+eQnw4STn0Pys+SfgrcD+SS4EHtcer5FjziSNuopm\nwOsGsyRo92D1qs9F7blb25j22uuBzdc1kKq6LslBNFWu97fdgX9dVT9YSzxTMd1z5Piy9Yjng8CL\ngf2A59JU326V5FU01a17AAXcGZituxTgZ7NdbMeo/JimwvfxOcQoaWqds56oquXATOPSHjvXNvrz\n3Ujqg/8DVjD7NO9LaAb2T9mR23f5zdV1wJ1Gju8+erGqTqyq/YHtaaph751DPFMx/Xw9Y5ryQeCF\nwBfbqtat2m7HvwWeDtylqrYCrqYZmgxNsjaTWbsok7yIpgJ3Sdu+pEXI5EzSrarqappB++9OckCS\nOyXZMMmTkvxze9tHgb9Psm07sP71NN1w62M5sE+SHdvJCK+dupBkuyRPbceeraDpHp1pduMXgfu1\ny39s0FbbHgh8fj1jAqCqfgI8mmaM3XRbACtpZnZukOT1NJWzKb8Adkrm/ud8kvsBbwb+jKZ782+T\nzNr9KqnVkzFn88XkTNJq2vFTr6QZ5H8FTVfci2lmMEKTQJwFnAOcCyxrz63Pe50MHNe2dTarJ1RL\n2jguAX5Jkyi9YIY2rgKeQjOg/iqaitNTqurK9YlpWtunV9VMVcETgRNolte4CLiR1bsspxbYvSrJ\nsrW9Tzs79UPA26rqO1V1Ic2Mzw9OzYSVtHjEiUCSJGmoltz5XrXxw1+2IO9148l/e/Za1jmbF1bO\nJEmSesTkTJIkqUdcSkOSJA3XAg/WXwhWziRJknrEypkkSRq2Hi1COx9MzsZsg822rI23uvvab1zk\nHnD3LboOQVq0bnHS/pz88oabug5hEK669GKu/fUvJ6ufcYGZnI3ZxlvdnQe+8L+6DqP3vv7qfbsO\nQVq0Vty8qusQBuGjy2fdfUuttzz3Dxf+TR1zJkmSpHGxciZJkgasXxufz4fJ+m4kSZIGzsqZJEka\nNsecSZIkaVysnEmSpOEKjjmTJEnS+Fg5kyRJA+ZsTUmSJI2RlTNJkjRsztaUJEnSuJicSZIk9Yjd\nmpIkadicECBJkqRxsXImSZKGzQkBkiRJGhcrZ5IkabjiIrSSJEkaIytnkiRp2BxzJkmSpHGxciZJ\nkgYtVs4kSZI0LlbOJEnSYAUrZ5IkSRojK2eSJGm40j4miJUzSZKkHrFyJkmSBiyOOZMkSdL4WDmT\nJEmDZuVMkiRJY2NyJkmS1CN2a0qSpEGzW7NnkqxKsjzJd5IsS/KIrmOSJElaX5NQObuhqnYFSPIE\n4C3Ao+fywjSpdqrqlpFzS6tq1VgilSRJ887KWb/dGfgVQJLNk5zSVtPOTfLU9vxOSb6f5D3AMmCH\nJNcmOTzJN4G9kuye5KtJzk5yYpLt29eeluRtSb6V5IIkj+rqG5UkSZNpEipnmyZZDmwCbA88pj1/\nI/C0qvpNkm2AbyT5bHvt/sBzquqFAEk2A75bVa9PsiHwVeCpVXVFkoOAfwSe2752g6raM8nvA4cB\nj5seUJJDgUMBNtpyuzF8y5IkCZjI7ZsmITkb7dbcC/hAkgfR/F/1T0n2AW4B7glMZUoXVdU3RtpY\nBXyqfX5/4EHAyW2ZdClw6ci9n26/ng3sNFNAVXUUcBTAZve8f92Rb06SJC0uk5Cc3aqq/q+tkm0L\n/H77dfequjnJT2mqawDXTXvpjSPjzAKcV1V7reFtVrRfVzFhn58kSUMTt2/qtyQPoKl0XQVsCVze\nJmb7AfeeYzPnA9u2VTiSbJjkd8cSsCRJ0jSTUPmZGnMGTdXr4KpaleTDwOeSnAUsB34wl8aq6qYk\nBwL/kWRLms/o34HzxhC7JEm6gyatcjb45Kyqlq7h/JXAmromHzTt3s2nHS8H9pmhzX2ntb/TukUr\nSZI0u8EnZ5IkaXGbtMrZRI05kyRJGjorZ5IkadCsnEmSJGlsrJxJkqThmsAdAqycSZIk9YjJmSRJ\nUo/YrSlJkgbNCQGSJEkaGytnkiRpsNz4XJIkSWNl5UySJA2alTNJkiSNjZUzSZI0bJNVOLNyJkmS\n1CdWziRJ0nDFMWeSJEkaIytnkiRp0KycSZIkaWysnEmSpEGzciZJkqSxsXImSZIGy701JUmSNFYm\nZ5IkST1it6YkSRq2HvVqJvkpcA2wClhZVXsk2Ro4DtgJ+Cnw9Kr61ZrasHImSZI0v/arql2rao/2\n+DXAKVV1X+CU9niNrJxJkqThGsb2TU8F9m2fHwucBrx6TTdbOZMkSZqbbZKcNfI4dIZ7Cjgpydkj\n17erqkvb55cB2832JlbOJEnSoC1g5ezKka7KNdm7qn6e5G7AyUl+MHqxqipJzdaAlTNJkqR5UlU/\nb79eDhwP7An8Isn2AO3Xy2drw8rZmD3g7lvw9Vfv23UYvbfTCz7ZdQiD8dMjD+w6BE2YG25a1XUI\ng3DIw3bqOoRBeO+dNlrw9+zLmLMkmwFLquqa9vnjgcOBzwIHA29tv35mtnZMziRJkubHdsDxbbK4\nAfCRqjohyZnAx5M8D7gIePpsjZicSZKkYetH4Yyq+jGwywznrwIeO9d2HHMmSZLUI1bOJEnSoPVl\nzNl8sXImSZLUI1bOJEnSYCWxciZJkqTxsXImSZIGzcqZJEmSxsbkTJIkqUfs1pQkSYNmt6YkSZLG\nxsqZJEkatskqnFk5kyRJ6hMrZ5IkadAccyZJkqSxsXImSZKGK1bOJEmSNEZWziRJ0mAFmLDCmZUz\nSZKkPrFyJkmSBiyOOZMkSdL4WDmTJEmDNmGFMytnkiRJfWLlTJIkDZpjziRJkjQ2JmeSJEk9Yrem\nJEkarjghQJIkSWNk5UySJA1WgCVLJqt0ZuVMkiSpR6ycSZKkQXPMmSRJksbGypkkSRo0F6GdR0nu\nnuRjSX6U5HtJvpjkfrPcf1qSPdrnr5t27X/XM4adkjxzPV53TJID1+c9JUmS1qSz5CxNmns8cFpV\n/XZVPRB4HbDdHJtYLTmrqkesZyg7AeucnEmSpB5o1zlbiMdC6bJyth9wc1X959SJqloOLE3y+alz\nSd6V5JDRFyZ5K7BpkuVJPtyeu7b9+rEkTx6595gkB7YVsq8nWdY+ppK5twKPatt6RZKlSf4lyZlJ\nzkny/LadtLF8L8kXgLuN5VORJEmLWpdjzh4EnL0+L6yq1yR5cVXtOsPl44CnA19IshHwWOAFNEuh\n7F9VNya5L/BRYA/gNcCrquopAEkOBa6uqocl2Rg4I8lJwEOB+wMPpqnufQ/47/WJX5IkzY8weWPO\nJnFCwJeAI9rE6onA16rqhiRbAu9KsiuwCljT2LbHAw8ZGU+2JXBfYB/go1W1CrgkyVfWFECb4B0K\nsMOOO87H9yRJkhaJLpOz84CZBtSvZPXu1k3WpdG2MnYa8ATgIOBj7aVXAL8Admnbv3ENTQR4SVWd\nuNrJ5PfXIYajgKMAdt99j1qX+CVJ0rrIxFXOuhxz9hVg47bKBECShwFLgQcm2TjJVjTdkjO5OcmG\na7h2HPAc4FHACe25LYFLq+oW4M/b9wG4Bthi5LUnAi+YajvJ/ZJsBnwNOKgdk7Y9zZg5SZKkedVZ\nclZVBTwNeFy7lMZ5wBuAS4CPA+cAHwS+vYYmjgLOmZoQMM1JwKOBL1fVTe259wAHJ/kGTZfmde35\nc4BVSb6T5BXA+2jGky1L8l3gv2gqjMcDFwLnAkcCX13f712SJM2fSZut2emYs6q6hGbw/nR/2z6m\n37/vyPNXA68eOd585PnNwNbTXnsh8JCRU68dufcx097qdUxbqqP14pm/E0mSpPnh9k2SJEk9Momz\nNSVJ0iLihABJkiSNjZUzSZI0XAs8WH8hWDmTJEnqEStnkiRpsCZx+yYrZ5IkST1i5UySJA3ahBXO\nrJxJkiT1iZUzSZI0aI45kyRJ0thYOZMkSYM2YYUzK2eSJEl9YuVMkiQNVxxzJkmSpDGyciZJkgar\n2SGg6yjml5UzSZKkHjE5kyRJ6hG7NSVJ0oDFCQGSJEkaHytnkiRp0CascGblTJIkqU+snEmSpEFz\nzJkkSZLGxsqZJEkarjjmTJIkSWNk5UySJA1Ws33TZJXOrJxJkiT1iJUzSZI0aFbOJEmSNDZWziRJ\n0qBNWOHM5GzcCli56pauw+i9H77rj7oOYTDu8rAXdx3CIFz2v0d0HcJgbL6Jvwrmwp/lc1NdBzAB\n/C9SkiQNmmPOJEmSNDYmZ5IkST1iciZJkoar3b5pIR5zCidZmuTbST7fHu+c5JtJfpjkuCQbra0N\nkzNJkqT58zLg+yPHbwP+raruA/wKeN7aGjA5kyRJgxVCsjCPtcaS3At4MvC+9jjAY4BPtrccCxyw\ntnZMziRJkuZmmyRnjTwOnXb934G/BabWXbkr8OuqWtkeXwzcc21v4lIakiRp0BZwJY0rq2qPmWPI\nU4DLq+rsJPvekTcxOZMkSbrjHgn8YZLfBzYB7gwcAWyVZIO2enYv4Odra8huTUmSNGhLkgV5zKaq\nXltV96qqnYA/Bb5SVc8CTgUObG87GPjMWr+fO/ZxSJIkaRavBl6Z5Ic0Y9Dev7YX2K0pSZIGrW+7\nN1XVacBp7fMfA3uuy+utnEmSJPWIlTNJkjRYzer9PSud3UFWziRJknrEypkkSRq0JZNVOLNyJkmS\n1CdWziRJ0qA55kySJEljY3ImSZLUI3ZrSpKkQZuwXk0rZ5IkSX1i5UySJA1WgDBZpTMrZ5IkST1i\n5UySJA2ai9BKkiRpbKycSZKk4UpchFaSJEnjY+VMkiQN2oQVzqycSZIk9YmVM0mSNFgBlkxY6czK\nmSRJUo9YOZMkSYM2YYWzha+cJVmVZHmS85J8J8lfJ5nXOJLslOS789nmDO9xSJJ3jfM9JEnS4tNF\n5eyGqtoVIMndgI8AdwYO6yAWSZI0cK5zNo+q6nLgUODFaSxN8i9JzkxyTpLnAyT5WJInT70uyTFJ\nDlzT/aOSbJLk6CTnJvl2kv3a84ck+UySE5Kcn+Swkdf8WZJvtRW+/0qytD3/nCQXJPkq8MgxfzyS\nJGkR6nzMWVX9uE1+7gY8Fbi6qh6WZGPgjCQnAccBTwe+kGQj4LHAC4DnreH+GnmLFzVvUw9O8gDg\npCT3a6/tCTwIuB44M8kXgOuAg4BHVtXNSd4DPCvJycAbgd2Bq4FTgW+P7YORJEmLUufJ2TSPBx6S\n5MD2eEvgvsCXgCPaBOyJwNeq6oYka7r/gpE29wbeCVBVP0hyETCVnJ1cVVcBJPl0e+9KmgTszLZM\nuilwOfB7wGlVdUV7/3Ej7awmyaE0FUF22GHH9f80JEnSrJLJmxDQeXKW5LeAVTQJUICXVNWJM9x3\nGvAEmqrWx6ZOz3R/kp3m+PY1w3GAY6vqtdPaPGCObVJVRwFHAey2+x7T30OSJGmNOh1zlmRb4D+B\nd1VVAScCL0iyYXv9fkk2a28/DngO8CjghPbcbPdP+TrwrKnrwI7A+e21/ZNsnWRT4ADgDOAU4MB2\nsgLt9XsD3wQeneSu7fv9yXx+FpIkaf0sSRbksVC6qJxtmmQ5sCFNF+IHgXe0194H7AQsS9OneAVN\n0gRwUnvvZ6rqpjncP+U9wJFJzm3f75CqWtF2WZ7etnkf4CNVdRZAkr+nGZu2BLgZeFFVfSPJG4D/\nAy4FlgFL5+MDkSRJmrLgyVlVrTGhqapbgNe1j+nXbga2nuP9V9MM9KeqbqSpuM3k8qp68QzvdRxN\npW76+aOBo9cUvyRJWngTNuTM7ZskSZL6pPMJAV2pqmOAYzoOQ5Ik3UEuQitJkqSxWbSVM0mSNHwB\nlkxW4czKmSRJUp9YOZMkScOVOOZMkiRJ42PlTJIkDdqEFc7WnJwlOZ7b7z15q6r6o7FEJEmStIjN\nVjl714JFIUmStJ4mbczZGpOzqjpl6nmSjYAdq+qHCxKVJEnSIrXWCQFJngycC5zcHu/adnlKkiR1\namqds4V4LJS5zNY8HPg94NcAVbUcuM84g5IkSVqs5pKc3VxVv552bo0TBSRJkrT+5rKUxveTPB1Y\nkmRn4KXAN8YbliRJ0txM2oSAuVTOXgzsDtwCHA/cBLx8nEFJkiQtVmutnFXVdcCrk7yxOawbxh+W\nJEnS3ExW3WxuszV3S/Jt4ALgwiRnJ9lt/KFJkiQtPnMZc3Y08PKqOhUgyb7tuV3GGJckSdJaJbBk\nEY45u24qMQOoqtOAa8cWkSRJ0iI2296aD2mffjPJu4GP0iyhcRBw6ppeJ0mStJAmrHA2a7fmu6cd\nP2TkueucSZIkjcFse2s+aiEDkSRJWh+Tts7ZXCYEkOQJwO8Cm0ydq6p/GldQkiRJi9Vak7Mk7wG2\nAvahmaX5x7hDgCRJ6okJK5zNabbm3lX1TOCqqvoHmk3Q7zXesCRJkhanuXRrTu0IcGOSuwNXATuN\nLSJJkqQ5Cpm4dc7mkpx9KclWwL8Cy4FVwLFjjUqSJGmRmsvemm9on34iyeeBTYGdxxmUJEnSnGTy\nxpzNabbmlHbT8xuSLAd2HE9IkiRJi9dcJgTMZMJyVEmSpH5Yp8rZCHcImKMAGyxd3xx48bh+xcqu\nQxiMX535rq5DGIS7PPFtXYcwGL864dVdhzAIK25e1XUIg1AdZAiLZhHaJMczcxIW4K5ji0iSJGkR\nm61yNtuf5/7pLkmSemHS+qdm21vzlIUMRJIkSes/5kySJKlzYfLGnE1aJVCSJGnQ5lw5S7JxVa0Y\nZzCSJEnraslkFc7WXjlLsmeSc4EL2+Ndkrxz7JFJkiQtQnOpnP0H8BTgfwCq6jtJ9htrVJIkSXO0\n6CpnwJKqumjaOVfikyRJGoO5VM5+lmRPoJIsBV4CXDDesCRJktYuWZyzNV8AvJJmo/NfAA9vz0mS\nJGmerbVyVlWXA3+6ALFIkiSts0kbc7bW5CzJe5lhj82qOnQsEUmSJC1icxlz9uWR55sATwN+Np5w\nJEmS1k1fhpwl2QT4GrAxTY71yao6LMnOwMeAuwJnA39eVTetqZ25dGseN+2NPwicfAdilyRJmkQr\ngMdU1bVJNgROT/IlmrH7/1ZVH0vyn8DzgCPX1Mj6bN+0M3Dv9YlYkiRpUlXj2vZww/ZRwGOAT7bn\njwUOmK2duYw5+xW3jTlbAvwSeM16xCxJkjSvAizpS78m0C47djZwH+DdwI+AX1fVyvaWi4F7ztbG\nrMlZmoVDdgF+3p66papuNzlAkiRpEdgmyVkjx0dV1VGjN1TVKmDXJFsBxwMPWNc3mTU5q6pKcnxV\n7b6uDUuSJC2E9RmjtZ6urKo95nJjVf06yanAXsBWSTZoq2f34rai14zm8v18K8lD5xKIJEnSYpVk\n27ZiRpJNgf2B7wOnAge2tx0MfGa2dtZYORvJ8PYG/jLJj4DraLp3q6p2u8PfhSRJ0h3UoyFn2wPH\ntuPOlgAfr6rPJ/ke8LEkbwa+Dbx/tkZm69b8FrAba5lRIEmSJKiqc4Db9TZW1Y+BPefazmzJWdoG\nf7TO0UmSJC2AJL2arTkfZkvOtk3yyjVdrKp3jCEeSZKkRW225GwpsDltBU2SJKmPJqxwNmtydmlV\nHb5gkUiSJGntY84kSZL6bMmEZSyzrXP22AWLQpIkScAslbOq+uVCBiJJkrSu+ra35nxYwB0PJEmS\ntDaz7q0pSZLUdxNWOLNyJkmS1CcmZ5IkST1it6YkSRquLK6lNAYlyaoky0ceO43xvV6e5E7jal+S\nJC1ek1Q5u6Gqdl3XFyXZoKpWruPLXg58CLh+Xd9PkiTNr0zYuvmTlJzdTpJNgCOBPYCVwCur6tQk\nhwBPBjYBNgMek+RvgKcDGwPHV9VhSTYDPg7ci2av0TcB2wH3AE5NcmVV7bfA35YkSZpgk5ScbZpk\nefv8J1X1NOBFQFXVg5M8ADgpyf3ae/YCHlJVv0zyeOC+wJ4069l9Nsk+wLbAJVX1ZIAkW1bV1Ule\nCexXVVfOFEiSQ4FDAXbYccfxfLeSJKldhLbrKObXxIw5o+3WbB9Pa8/tTdP9SFX9ALgImErOTh7Z\nBeHx7ePbwDLgATTJ2rnA/kneluRRVXX1XAKpqqOqao+q2mPbbbadl29OkiQtDpNUOVtX1408D/CW\nqvqv6Tcl2Q34feAtSU6qqsMXKkBJkrR2Vs6G5evAswDa7swdgfNnuO9E4LlJNm/vvWeSuyW5B3B9\nVX0I+Fdgt/b+a4Atxh28JElafCa9cvYe4Mgk59JMCDikqlZk2j4PVXVSkt8B/q+9di3wZ8B9gH9J\ncgtwM/CC9iVHASckucQJAZIkdWv67/Whm5jkrKo2n+HcjcBzZjh/DHDMtHNHAEdMu/VHNFW16a9/\nJ/DO9Y9WkiRpZhOTnEmSpMXH2ZqSJEkaKytnkiRpuAITNuTMypkkSVKfWDmTJEmDtmTCSmdWziRJ\nknrE5EySJKlH7NaUJEmD5VIakiRJGisrZ5IkadAmbD6AlTNJkqQ+sXImSZIGLCxhskpnVs4kSZJ6\nxMqZJEkarOCYM0mSJI2RlTNJkjRccZ0zSZIkjZGVM0mSNGhufC5JkqSxsXImSZIGy9makiRJGisr\nZ5IkadAccyZJkqSxMTmTJEnqEbs1JUnSoE1Yr6aVM0mSpD6xciZJkgYrTF6ladK+H0mSpEGzciZJ\nkoYrkAkbdGblTJIkqUesnEmSpEGbrLqZydnY3VKw4uZVXYfRexf/8oauQxiMe29zp65DGITLPveq\nrkMYjNd+8QddhzAIL9nr3l2HMAgrb7ml6xAGz+RMkiQNVnD7JkmSJI2RlTNJkjRok1U3s3ImSZLU\nK1bOJEnSoE3YkDMrZ5IkSX1i5UySJA1Y3CFAkiRJ42NyJkmS1CN2a0qSpMEKk1dpmrTvR5IkadCs\nnEmSpEFzQoAkSZLGxsqZJEkatMmqm1k5kyRJ6hUrZ5IkabjimDNJkiSNkZUzSZI0WK5zJkmSpLGy\nciZJkgbNMWeSJEkaGytnkiRp0CarbmblTJIkqVdMziRJ0qAlC/NYexzZIcmpSb6X5LwkL2vPb53k\n5CQXtl/vMls7JmeSJEnzYyXw11X1QODhwIuSPBB4DXBKVd0XOKU9XiOTM0mSpHlQVZdW1bL2+TXA\n94F7Ak8Fjm1vOxY4YLZ2nBAgSZIGq1mEtn9TApLsBDwU+CawXVVd2l66DNhutteanEmSJM3NNknO\nGjk+qqqOmn5Tks2BTwEvr6rfjK7DVlWVpGZ7E5MzSZI0aAu4Bu2VVbXHbDck2ZAmMftwVX26Pf2L\nJNtX1aWYejF8AAAcUklEQVRJtgcun60Nx5xJkiTNgzQlsvcD36+qd4xc+ixwcPv8YOAzs7Vj5UyS\nJA1YSH/GnD0S+HPg3CTL23OvA94KfDzJ84CLgKfP1ojJmSRJ0jyoqtNZ84YFj51rOyZnkiRp0CZs\n3/PxjTlLcu2040OSvGsd21jn10iSJA1ZbytnSXobmyRJ6oe+rnN2R3QyWzPJtkk+leTM9vHI9vwb\nkhyV5CTgA+3tOyQ5Icn5SQ4baeN/kpzd7l116Mj5JyZZluQ7SU5pz22W5L+TfCvJt5M8tT2/SZKj\nk5zbnt+vPb9axS7J55Psm2RpkmOSfLd9zSvG/2lJkqTFZJzVqU1HZioAbE0zlRTgCODfqur0JDsC\nJwK/017bHdi7qm5IcgiwJ/Ag4HrgzCRfqKqzgOdW1S+TbNqe/xRNsvleYJ+q+kmSrds2/w74SlU9\nN8lWwLeSfBn4K5r14B6c5AHASUnuN8v3tCtwz6p6EEDbliRJ6socNyUfknEmZzdU1a5TB22iNbVw\n2+OAB46smHvndjVdgM9W1Q0j7ZxcVVe1bXwa2Bs4C3hpkqe19+wA3BfYFvhaVf0EoKp+2V5/PPCH\nSV7VHm8C7Ni29c723h8kuQiYLTn7MfBbSd4JfAE4aaab2kreoQA77LDjLM1JkiStrqtxXUuAh1fV\njaMn22Ttumn3Tt/ioJLsS5Pg7VVV1yc5jSbhWpMAf1xV58/wfjNZyepdvpsAVNWvkuwCPAF4Ec06\nJc+d/uJ2K4ejAB662x6zbtEgSZLumEmrnHW1Q8BJwEumDpLsOsu9+yfZuu2+PAA4A9gS+FWbmD0A\neHh77zeAfZLs3LY71a15IvCSduVekjy0Pf914FntufvRVNPOB34K7JpkSZIdaLpWSbINsKSqPgX8\nA7Db+n8EkiRJt9dV5eylwLuTnNPG8DWa8V8zOR34IHAf4CNVdVaSc4G/al9/Pk1SRlVd0XYpfjrJ\nEpq9q/YH3gT8O3BOe/4nwFOA9wBHtu2tBA6pqhVJzmjvORf4LrCsjeWewNFtGwCvnZ+PQ5Ikra8e\n7RAwL8aWnFXV5tOOjwGOaZ9fCRw0w2vesKbXTDu/AnjSGt73S8CXpp27AXj+DPfeCDxnhvNFW1Gb\ngdUySZI0Nm58LkmS1CMu9CpJkgYrwJLJ6tW0ciZJktQnVs4kSdKgTdqEACtnkiRJPWLlTJIkDZqL\n0EqSJGlsrJxJkqRBc8yZJEmSxsbKmSRJGizXOZMkSdJYWTmTJEkDFsecSZIkaXysnEmSpOGK65xJ\nkiRpjKycSZKkQZuwwpmVM0mSpD6xciZJkgarWedssmpnVs4kSZJ6xORMkiSpR+zWlCRJgzZZnZpW\nziRJknrFypkkSRq2CSudWTmTJEnqEStnkiRp0Nz4XJIkSWNj5UySJA3ahK1Ba+VMkiSpT6ycSZKk\nQZuwwpmVM0mSpD6xciZJkoZtwkpnJmdjtiSw8YZLuw6j9+63/RZdhzAYK1fd0nUIg3DhZdd2HcJg\nvG6/3+46hEHY/XVf6jqEQbjskt90HcLgmZxJkqTBCq5zJkmSpDGyciZJkoYrrnMmSZKkMTI5kyRJ\n6hG7NSVJ0qBNWK+mlTNJkqQ+sXImSZKGbcJKZ1bOJEmSesTKmSRJGrC4CK0kSZLGx8qZJEkaNBeh\nlSRJ0thYOZMkSYMVJm6yppUzSZKkPrFyJkmShm3CSmdWziRJknrEypkkSRo01zmTJEnS2Fg5kyRJ\ng+Y6Z5IkSRobkzNJkqQesVtTkiQN2oT1alo5kyRJ6hMrZ5IkabgmcP8mK2eSJEk9YuVMkiQNmovQ\nSpIkaWysnEmSpMEKLkIrSZKkMbJyJkmSBm3CCmdWziRJkvrEypkkSRq2CSudWTmTJEnqkYlMzpL8\nXZLzkpyTZHmS30vy0yTb3MF2r52vGCVJ0vzIAv1vrXEk/53k8iTfHTm3dZKTk1zYfr3L2tqZuOQs\nyV7AU4DdquohwOOAn83xtXbzSpKk9XUM8MRp514DnFJV9wVOaY9nNXHJGbA9cGVVrQCoqiur6pL2\n2kuSLEtybpIHACR5Q5KjkpwEfCDJIUneNdVYks8n2Xfk+O1tG6ck2Xbhvi1JkjSTZGEea1NVXwN+\nOe30U4Fj2+fHAgesrZ1JTM5OAnZIckGS9yR59Mi1K6tqN+BI4FUj53cHnlpVz1xL25sBy9o2vgoc\nNtNNSQ5NclaSs6648or1/04kSdLQbVdVl7bPLwO2W9sLJi45q6praZKtQ4ErgOOSHNJe/nT79Wxg\np5GXfbaqbphD87cAx7XPPwTsvYYYjqqqPapqj223sbgmSdKE2Gaq+NI+Dl2XF1dVAbW2+yZyjFVV\nrQJOA05Lci5wcHtpRft1Fat/79eNPF/J6knrJrO91R2LVJIk3VELuJLGlVW1xzq+5hdJtq+qS5Ns\nD1y+thdMXOUsyf2T3Hfk1K7ARevQxE+BXZMsSbIDsOfItSXAge3zZwKn35FYJUnSxPsstxWJDgY+\ns7YXTGLlbHPgnUm2oqmC/ZCmi/Mpc3z9GcBPgHOB7wLLRq5dB/xukrOBq4GD5itoSZK0nnqyCG2S\njwL70nR/XkwzNv2twMeTPI+mWPT0tbUzcclZVZ0NPGKGSzuN3HMWzYdHVb1h2usLeNYa2t68ffoP\ndzxSSZI0SarqGWu49Nh1aWfikjNJkrR4BOa0QOyQTNyYM0mSpCGzciZJkoZrjgvEDomVM0mSpB6x\nciZJkgZtwgpnVs4kSZL6xMqZJEkatgkrnVk5kyRJ6hErZ5IkacDiOmeSJEkaHytnkiRp0FznTJIk\nSWNjciZJktQjdmtKkqTBChO3koaVM0mSpD6xciZJkoZtwkpnVs4kSZJ6xMqZJEkaNBehlSRJ0thY\nOZMkSYPmIrSSJEkaGytnkiRp0CascGblTJIkqU+snEmSpOGKY84kSZI0RlbOJEnSwE1W6czKmSRJ\nUo9YOZMkSYMVHHMmSZKkMTI5kyRJ6hG7NSVJ0qBNWK+mydm4LVt29pWbbpiLuo5jmm2AK7sOYgD8\nnObOz2pu/Jzmxs9p7vr4Wd276wCGzuRszKpq265jmC7JWVW1R9dx9J2f09z5Wc2Nn9Pc+DnNnZ9V\nwwkBkiRJGhsrZ5IkadAyYaPOrJwtTkd1HcBA+DnNnZ/V3Pg5zY2f09z5WU2gVFXXMUiSJK2XXR66\ne5341W8syHttv+VGZy/EGD8rZ5IkST3imDNJkjRokzXizMqZJElSr1g5kyRJg5VM3jpnJmeLRJJH\nAm+gWbl5A5oqcFXVb3UZV18k+aPZrlfVpxcqlr5L8k5gjTOJquqlCxhO7yU5paoeu7Zzi12SlwFH\nA9cA7wMeCrymqk7qNLCeSXJXmp/lj6T57/B04PCquqrLuDS/TM4Wj/cDrwDOBlZ1HEsf/UH79W7A\nI4CvtMf7AacBJme3Oav9+kjggcBx7fGf0Pz7EpBkE+BOwDZJ7sJtw2LuDNyzs8D667lVdUSSJwDb\nAs+hSdZMzlb3MeBrwB+3x8+i+W/wcZ1F1AOTts6ZydnicXVVfanrIPqqqp4DkOTzwAOr6tL2eHvg\n3V3G1jdVdSxAkkOA/arq5vb4P/EX6ajnAy8H7kGTtE799vgN8K6uguqxqc/n94Gjq+o7yaR1Vs2L\nravqTSPHb05yQGfRaCxMzhaPU5P8C00FaMXUyapa1l1IvbTTVGLW+gVwv66C6bl7AFsAv2yPN2/P\nCaiqI4Ajkrykqt7ZdTwDcHaSk4Cdgdcm2QK4peOY+ujUJH8KfLw9PhD4Qofx9MOEpfEmZ4vH77Vf\nRxfPK+AxHcTSZ6clORH4aHt8EHBqh/H02VuBbyc5leZH4z40Y2G0usuSbFFV1yT5e2A34M3+YXQ7\nzwN2BX5cVde3Y6ue03FMvZHkGpqf2QFeCXyovbQEuBY4rKPQNAYmZ4tEVe3XdQxDUFUvTvI0mkQD\n4KiqOr7LmPqqqo5O8iVuS/xfXVWXdRlTT/1DVX0iyd7AE4B/BY7kts9Njb3brw+xN/P2qmqLrmPQ\nwjE5WySSbAf8E3CPqnpSkgcCe1XV+zsOrY+WAddU1ZeT3Gmq6tF1UH3Tjgd6HPBbVXV4kh2T7FlV\n3+o6tp6ZmoDzZODIqvpMkjd0GE9f/c3I802APWnG6lndH5Fkn5nOV9XXFjqWPpm0dN7kbPE4hmbm\n09+1xxfQzPAxORuR5C+BQ4Gtgd+mmVX3n4DLHtzee2jGBD0GOJxmCYRPAQ/rMqge+nmS/wL2B96W\nZGNcAPx2quoPRo+T7AD8c0fh9JlJ7CLgD4jFY5uq+jjtANuqWolLaszkRTRLRPwGoKoupFleQ7f3\ne1X1IuBGgKr6FbBRtyH10tOBE4EnVNWvaRL/v5n9JQIuBh7UdRB9U1V/MPLYn+Yz+kXXcXVtaiHa\ncT8WipWzxeO6doBtASR5OHB1tyH10oqqumlqzEuSDZhlwdVF7uYkS7nt39S2OLvudtrB7ZfTjKm6\nEFjZftWIaYsbL6GZHPCd7iIaDJPYCWRytni8Evgs8NtJzqBZ5PHAbkPqpa8meR2waZL9gRcCn+s4\npr76D+B4YLsk/0jz7+nvuw2pf5IcRjNL+v40Qws2pJlp98gu4+qhs0aerwQ+WlVndBVMX5nEziQu\nQqvhSbKEZmzCo2l+QQQ4f2rxUK3mNTRT+s+lWUT0i1X13m5D6qeq+nCSs7ltPN4BVfX9LmPqqafR\nbEW0DKCqLmnX8NKIqjo2yUbctq7g+V3G02MmsYuAydkiUFW3JHl7Ve0FnNd1PD33knbx0FsTsiQv\na8/p9u4ETHVtbtpxLH11U1VVkqnu3826DqiPkuwLHAv8lOYPyB2SHLzYZyGOaocR7F9Vf9Z1LH0S\nJm/jcycELB4nJfljt0NZq4NnOHfIQgcxBEleT/PLdGtgG+DodpFVre7j7WzNrdrZwF9mJPnXrd4O\nPL6qHl1V+9CsCfdvHcfUK1W1Cti2rTBqglk5WzxeCWwGrExyI80fG1VVd+42rH5I8gzgmcDOST47\ncml0eyKt7hnAQ6vqRoAkb6Xpuntzp1H1TFX9azt+8Tc0wwpeX1UndxxWH21YVbd2ZVbVBUk27DKg\nnvopcEb7c+q6qZNV9Y7OItK8MzlbBNpq2e9W1f/rOpYe+1/gUpoK0NtHzl8DnNNJRP33U5qxjDe2\nxxsDP+osmh5qu6FOrKrHASZkszsryfu4bVuiZ7H6+Co1LmkfS2j+eNQEMjlbBNrxLscDu3cdS19V\n1UXARcBeXccyICuA85KcTDPmbH/g9CT/AVBVL+0yuD6oqlVJrk+yZVW5dM3sXkCzzuDUv5uv0yx0\nrBFV9cauY+ijSRuwY3K2eHwjycOq6syuA+mzdv23dwK/Q7Og6lLgOrt/Z3R8+5hyWkdx9N2NwLlt\nEjvaDbXok9dRVbUiyZHAF0a7N7W6JPcDXgXsxMjv8Kpyh4AJYnK2eOwHPD/JRTS/IKbGnD2k27B6\n513AnwKfoFmb6tnAfTqNqKdmWvrA5Vlm9IX2oVkk+UPgX2j+KNo5ya7A4VX1h91G1jufoNlS7n24\ny8utXOdMQ/WkrgMYiqr6YZKl7cyoo5P8b9cx9ZFLH8yNSeycHUazT+RpAFW1PMnOnUbUTyur6siu\ng9B4mZwtEu2YKpLcjWYQt2Z2ffuLdHmSf6aZJOC6VDObWvrgfLi1u+WjOLZxNSaxc3ZzVV09bbUf\nt05rJdm6ffq5JC+kGVKwYup6VS3eWeULvO/lQjA5WyTaLoO3A/cALgfuDXwf+N0u4+qhP6eZBfVi\n4BXADsAfdxpRf7n0wdyYxM7NeUmeCSxNcl+aiQFWrW9zNk2yOpWGvGra9d9a2HA0Ti5Cu3i8CXg4\ncEFV7Uyz5Y5bftze7jRj8X5TVW+sqldW1Q+7DqqnzkryviT7to/34tIHM7ldEkuzv6ZW9xKaPxZX\nAB8BrgZe3mlE/XIQ8Miq2rn9Gf5G4LvA52nGx2qCmJwtHjdX1VXAkiRLqupUmg1ztbo/AC5I8sEk\nT0lidXnNXgB8j6bC8dL2+Qs6jaifTGLnoKqur6q/q6qHtY+/n1rgWEAzCWAFQJJ9gLfQdJdfDRzV\nYVydywI+Foq/eBaPXyfZHPga8OEkl9NsmqsRVfWctmvuSTQr4L87yclV9Rcdh9Y7VbUCeEf70Jq5\nftcctEuN/ElV/bo9vgvwsap6QreR9cbSkXFlBwFHVdWngE8lWd5hXBoDk7PF46nADTTjqJ4FbAkc\n3mlEPVVVNyf5Erdt5n0AYHLWSnIuswzUdnmWRpIdq+r/mcTO2TZTiRlAVf2qncCkxtIkG1TVSpph\nKYeOXPN3uRMCNERVNbX45S1JvgBcVVXOhJomyZNo/irdl2ZK//uAp3cYUh89pesABuJ/gN0Aknyq\nqpxYMrtbphJagCT3xtmaoz4KfDXJlTR/aH8dIMl9aLo2NUFMziZcu+L9W2k2734T8EGa/SOXJHl2\nVZ3QZXw99GzgOOD5bcVD00wtywK3/gK9b1V9Ocmm+DNl1Ojf8s6kW7u/o9n+66s0n92jWL06tKhV\n1T8mOQXYHjhp5I/rJTSTKRY1F6HV0LwLeB1NN+ZXgCdV1TeSPIDmLzGTsxFV9YyuYxiKJH9J88tz\na+C3gXvRDFp+bJdx9Uit4blmUFUnJNmNZlY5wMur6souY+qbqvrGDOcu6CIWjZfJ2eTboKpOAkhy\n+NR/3FX1g0zaqn13QJLTq2rvJNew+i/SqW2u3Fvz9l5Es6L7NwGq6kLHCK1mlyS/ofk3tGn7HPw3\nNZtVNOswbgI8MAku1qu5mLRfZyZnk++Wkec3TLvmX/Otqtq7/bpF17EMyIqqumkqyW+XHfHfVKuq\nlnYdw5Ak+QvgZTQV2OU0FbT/A9zQW4uO65xNvl2S/KatCD2kfT51/OCug+ubJB+cyzkBzeDk19FU\nhfan2ZD5cx3HpOF6GfAw4KKq2g94KHBFtyFpKFznTIPiX+/rbLXtrNpqkNvszOw1wPOAc4HnA1+k\nmd0qrY8bq+rGJCTZuB16cf+ug5K6YHImAUleSzNxYvrYoJtY5Ktvz+IA4ANV9d6uA9FEuDjJVjRL\nkJyc5FfAJR3HpKGYsDFncakr6TZJ3lJVr+06jiFIcjTNeKCv0Sw/ckK7QKZ0hyR5NM0M8xOq6qau\n41G/7bb7HnX6N85ckPfabKMlZ1fV2PcytXImAe0UfoBPjDy/VVUtW+CQes+trjTf2v/29qaZWHKG\niZnmqk/rnCV5InAEsBR4X1W9dV3bMDmTGm+f5VrhjLEZudWV5kuS1wN/Any6PXV0kk9U1Zs7DEta\nJ0mWAu8G9gcuBs5M8tmq+t66tGNyJgHt7DCtA7e60jx7BvDQqroRIMlbgWWAyZlmFXq1ztmewA+r\n6scAST5Gs7e1yZm0vpI8e6bzVfWBhY5lANzqSvPppzSLz97YHm8M/KizaKT1c0/gZyPHFwO/t66N\nmJxJq3vYyPNNaLYiWgaYnE1TVc9o99Z8FHDr3ppVdU3HoWlAkryTplt8BXBekpPb4/2B07uMTcOw\nbNnZJ266YbZZoLfbJMlZI8dHVdW8z+g3OZNGVNVqGwi3U/uP7SicXnNvTc2TqV90ZwPHj5w/beFD\n0RBV1RO7jmHEz4EdRo7v1Z5bJy6lIc2inY14TlX9Ttex9E2S5bR7a1bVQ9tz51aVO09onbSDqI+t\nqj/rOhbpjmgXLr+A5o/UnwNnAs+sqvPWpR0rZ9KIJJ/jtv0hlwAPBD7eXUS95t6amhdVtSrJtkk2\ncvkMDVlVrUzyYuBEmqU0/ntdEzMwOZMASHIfYDvgX0dOr6T5j2udS9KLxPS9NV+Ie2tq/f0UOCPJ\nZ4Hrpk5W1Ts6i0haD1X1RZrt7Nab3ZoSkOTzwOuq6pxp5/cADquqP+gmsv5KsoRmb83H08xmP5Fm\nwUV/qGidJTlspvNV9caFjkXqmsmZBCT5blU9aA3XHEe1Bkm2BaiqK7qORZMhyZ2q6vqu45C6tKTr\nAKSe2GSWa5suWBQDkMYbklwJnA+cn+SKdoV3ab0k2SvJ94AftMe7JHlPx2FJnTA5kxpntktDrCbJ\nX9BM8ddtXgE8EnhYVW1dVVvTLLL4yCSv6DY0Ddi/A08ArgKoqu8A+3QakdQRuzUlIMl2NGss3cRt\nydgewEbA06rqsq5i65sk3wb2r6orp53flv/f3r2FajrFcRz//hzHYRySQ6TUOExDzTCOI0KauHCM\nciiJkkOGRCkuHC4ouSAp5EYiOSZTYxwGQ0PMyRCSJFxIchzcjL+LZ+28dvY0795j3qf291NvPe96\n1rPWep52u3/r/7xrwdKxZTWkYSR5r6qOSbJ6YGmWtVU1d9Rjk7Y0f60pAVX1HbAgycnA2Ltni6vq\n9REOq6+2HR+YQffeWVsXTpqMr5MsAKr9HV0HfDLiMUkjYXAmDaiqZcCyUY+j5za2DpVrVGmyrgTu\no9ub8FtgKXDNSEckjYhpTUlDSbKBgXWoBk8BM6rK2TNJmgKDM0nSyAxsfP6fqmrRFhyO1AumNSVJ\no/TBwPHtwH8uRitNJ86cSZJ6YfCXmtJ05jpnkqS+cLZAwuBMkiSpV0xrSpJGJsmv/DNjtiMwtq9m\ngKqqXUYyMGmEDM4kSZJ6xLSmJElSjxicSZIk9YjBmaQpSbIhyZokHyV5OsmOU2jrpCQvteMzk9y8\nkbq7Jbl6En3cluTGTS3fSDu/bY5+JWk8gzNJU/VHVc2rqsPo9ta8cvBkOkP/r6mqF6vq7o1U2Q0Y\nOjiTpL4zOJO0OS0HDkxyQJJPkjwIrAL2T7IwyYokq9oM284ASU5L8mmSt4FzxxpKcmmSB9rx3kme\nT7K2fRYAdwOz2qzdPa3eTUneT/JhktsH2rolyWdJXgUOGeaGkryQZGWSj5NcMe7cve1+XkuyZyub\nlWRJu2Z5ktmTeI6SpjGDM0mbRZJtgNOBda3oEOCxtuL7euBW4NSqOoJuy54bkswAHgHOAE4A9pmg\n+fuBN6tqLnAE8DFwM/BFm7W7KclC4CDgaGAeMD/JiUnmAxcAh9MFf0cNeWuXVdV84EhgUZI9WvlO\nwKp2P2/yz7ZDDwPXtmtuBB4csj9J05x7a0qaqh2SrGnHy4FHgX2Br6rq3VZ+LDAHeCcJwHbACmA2\n8GVVfQ6Q5HHgX7NTzSnAJQBVtQH4Ocnu4+osbJ/V7fvOdMHaTOD5qvq99fHikPe3KMk57Xj/1uYP\nwF/AU638ceC5Nhu4AHi63SfA9kP2J2maMziTNFV/VNW8wYIWmKwfLAJeqaoLx9X713VTFOCuqnpo\nXB/XT7rB5CTgVOC4qvo9yRvAjAmqF1024qfxz0OShmFaU9KW8C5wfJIDAZLslORg4FPggCSzWr0L\nJ7j+NeCqdu3WSXYFfqWbFRvzMnDZwLts+yXZC3gLODvJDklm0qVQN9WuwI8tMJtNNwM4ZivgvHZ8\nEfB2Vf0CfJnk/DaGJJk7RH+SZHAm6f9XVd8DlwJPJvmQltKsqj/p0piL2w8CvpqgieuAk5OsA1YC\nc6rqB7o06UdJ7qmqpcATwIpW7xlgZlWtoks/rgGepUu9TuTWJN+MfYAlwDZtzHfSBZlj1gOHJllJ\nl3a9o5VfDFyeZC3du3FnbepzkiRw+yZJkqReceZMkiSpRwzOJEmSesTgTJIkqUcMziRJknrE4EyS\nJKlHDM4kSZJ6xOBMkiSpRwzOJEmSeuRveM9hBVbairIAAAAASUVORK5CYII=\n",
178 |       "text/plain": [
179 |        "<matplotlib.figure.Figure at 0x1c7ba013a20>"
180 |       ]
181 |      },
182 |      "metadata": {},
183 |      "output_type": "display_data"
184 |     }
185 |    ],
186 |    "source": [
187 |     "cm = confusion_matrix(config.test_df['label'], config.test_df['pred_label'], labels=labels)\n",
188 |     "\n",
189 |     "plt.figure(figsize=(10,10))\n",
190 |     "plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)\n",
191 |     "plt.colorbar()\n",
192 |     "tick_marks = np.arange(len(labels))\n",
193 |     "plt.xticks(tick_marks, labels, rotation=90)\n",
194 |     "plt.yticks(tick_marks, labels)\n",
195 |     "plt.xlabel('Predicted Label')\n",
196 |     "plt.ylabel('True Label')\n",
197 |     "plt.title('Confusion Matrix')"
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "markdown",
202 |    "metadata": {},
203 |    "source": [
204 |     "Off-diagonal elements of this matrix constitute errors. If you trained with the full dataset (the default setting of `sample_frac=1.0`), you should see that most elements fall along the diagonal, and off-diagonal elements are near-zero."
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "## Operationalization image set analysis\n",
212 |     "\n",
213 |     "The true labels for the Middlesex County, MA images are unknown (and in many cases would be undefined, since an image may include land of multiple types), so we will not test their accuracy. However, we will use them to draw a predicted land use map of Middlesex County, MA:\n",
214 |     "\n",
215 |     "During data loading, we merged the operationalization prediction dataframe with information on each tile's latitude and longitude boundaries:"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "code",
220 |    "execution_count": 6,
221 |    "metadata": {},
222 |    "outputs": [
223 |     {
224 |      "data": {
225 |       "text/html": [
226 |        "<div>\n",
227 |        "<table border=\"1\" class=\"dataframe\">\n",
228 |        "  <thead>\n",
229 |        "    <tr style=\"text-align: right;\">\n",
230 |        "      <th></th>\n",
231 |        "      <th>name</th>\n",
232 |        "      <th>pred_label</th>\n",
233 |        "      <th>llcrnrlat</th>\n",
234 |        "      <th>llcrnrlon</th>\n",
235 |        "      <th>urcrnrlat</th>\n",
236 |        "      <th>urcrnrlon</th>\n",
237 |        "    </tr>\n",
238 |        "  </thead>\n",
239 |        "  <tbody>\n",
240 |        "    <tr>\n",
241 |        "      <th>0</th>\n",
242 |        "      <td>ortho_1-1_hn_s_ma017_2016_1_100240.png</td>\n",
243 |        "      <td>Forest</td>\n",
244 |        "      <td>42.679883</td>\n",
245 |        "      <td>-71.791586</td>\n",
246 |        "      <td>42.681898</td>\n",
247 |        "      <td>-71.788856</td>\n",
248 |        "    </tr>\n",
249 |        "    <tr>\n",
250 |        "      <th>1</th>\n",
251 |        "      <td>ortho_1-1_hn_s_ma017_2016_1_100640.png</td>\n",
252 |        "      <td>Forest</td>\n",
253 |        "      <td>42.681898</td>\n",
254 |        "      <td>-71.666017</td>\n",
255 |        "      <td>42.683912</td>\n",
256 |        "      <td>-71.663287</td>\n",
257 |        "    </tr>\n",
258 |        "    <tr>\n",
259 |        "      <th>2</th>\n",
260 |        "      <td>ortho_1-1_hn_s_ma017_2016_1_101026.png</td>\n",
261 |        "      <td>Forest</td>\n",
262 |        "      <td>42.683912</td>\n",
263 |        "      <td>-71.578665</td>\n",
264 |        "      <td>42.685927</td>\n",
265 |        "      <td>-71.575935</td>\n",
266 |        "    </tr>\n",
267 |        "    <tr>\n",
268 |        "      <th>3</th>\n",
269 |        "      <td>ortho_1-1_hn_s_ma017_2016_1_101498.png</td>\n",
270 |        "      <td>Forest</td>\n",
271 |        "      <td>42.685927</td>\n",
272 |        "      <td>-71.256552</td>\n",
273 |        "      <td>42.687941</td>\n",
274 |        "      <td>-71.253823</td>\n",
275 |        "    </tr>\n",
276 |        "    <tr>\n",
277 |        "      <th>4</th>\n",
278 |        "      <td>ortho_1-1_hn_s_ma017_2016_1_101864.png</td>\n",
279 |        "      <td>Forest</td>\n",
280 |        "      <td>42.687941</td>\n",
281 |        "      <td>-71.223795</td>\n",
282 |        "      <td>42.689956</td>\n",
283 |        "      <td>-71.221065</td>\n",
284 |        "    </tr>\n",
285 |        "  </tbody>\n",
286 |        "</table>\n",
287 |        "</div>"
288 |       ],
289 |       "text/plain": [
290 |        "                                     name pred_label  llcrnrlat  llcrnrlon  \\\n",
291 |        "0  ortho_1-1_hn_s_ma017_2016_1_100240.png     Forest  42.679883 -71.791586   \n",
292 |        "1  ortho_1-1_hn_s_ma017_2016_1_100640.png     Forest  42.681898 -71.666017   \n",
293 |        "2  ortho_1-1_hn_s_ma017_2016_1_101026.png     Forest  42.683912 -71.578665   \n",
294 |        "3  ortho_1-1_hn_s_ma017_2016_1_101498.png     Forest  42.685927 -71.256552   \n",
295 |        "4  ortho_1-1_hn_s_ma017_2016_1_101864.png     Forest  42.687941 -71.223795   \n",
296 |        "\n",
297 |        "   urcrnrlat  urcrnrlon  \n",
298 |        "0  42.681898 -71.788856  \n",
299 |        "1  42.683912 -71.663287  \n",
300 |        "2  42.685927 -71.575935  \n",
301 |        "3  42.687941 -71.253823  \n",
302 |        "4  42.689956 -71.221065  "
303 |       ]
304 |      },
305 |      "execution_count": 6,
306 |      "metadata": {},
307 |      "output_type": "execute_result"
308 |     }
309 |    ],
310 |    "source": [
311 |     "config.o16n_df.head()"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {},
317 |    "source": [
318 |     "In our county map, each image will be represented by a single pixel. The color of that pixel will be determined by the image's predicted label. We convert the latitude and longitude of each image's lower-righthand corner into an x and y index for a single pixel in the final image:"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": 7,
324 |    "metadata": {
325 |     "collapsed": true
326 |    },
327 |    "outputs": [],
328 |    "source": [
329 |     "lat_values = np.sort(config.o16n_df['llcrnrlat'].unique()).tolist()\n",
330 |     "max_lat_idx = len(lat_values) - 1\n",
331 |     "lon_values = np.sort(config.o16n_df['llcrnrlon'].unique()).tolist()\n",
332 |     "config.o16n_df['x_idx'] = config.o16n_df['llcrnrlon'].apply(lambda x: lon_values.index(x))\n",
333 |     "config.o16n_df['y_idx'] = config.o16n_df['llcrnrlat'].apply(lambda x: lat_values.index(x))"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "markdown",
338 |    "metadata": {},
339 |    "source": [
340 |     "Then we fill in the appropriate color for each pixel in an otherwise-black array, and display the result. (Note that the image's y-orientation will be inverted by the `pillow` package, so we fill in the rows from bottom to top.)"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": 8,
346 |    "metadata": {},
347 |    "outputs": [
348 |     {
349 |      "data": {
350 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVsAAAE5CAIAAACxkRTrAAAqdElEQVR4nO1d25ajOAzM//+056F7\n0sTWpUqSgSSqs2enA7JkbN0h5PEQMaS/h0DoD2cxjE8885GYTGzgOsrlc1xhRmhmmWkYwkRt0Sgl\ndfIvZBURWGd4xKkLe0fETF30FCIrbQuf58dy7uQ9QRQOHI4T116jwi1wNYXSH+LmrmPB+bkXYy7C\nzMadGCPiO5C++JlBRm92YHVEN9GSolmM5YoMj03NxRoSYGfTLy6D44GnMFUD3x5j+f96dv1bO2JL\n0YcQmcJwznNzQLL6pELEjGTNsWMp9w2Rn/T/xYk4cWQEkG685cqjACsqqmJEyDQabLFVqktCAXIV\nW5Vofw9CTvrWPoix+7BixNOQU3CDKeyGYfz2wYysTUW1Jm4rSCk+OZ5M5WVdiPEYD7QFgJKth3RN\nW9dzq+6/M4w1rSolNKGZEIZUAeLBWHXgXj7rKY4KuhYOabiJVVzgCUYDq0cb8H4ELN8goBoQF21v\nRGzep4DpNGm4v6a0tYMrmqsYPHRdIuZX0oxQmH23Q8nYrZFgIfU5e7wKJa7KGGtfV7jNYRyvWDFh\ndu/bSdsx2/Fma5AGtf2Tfx1YLNdElK90SWYR6K1Q6yZGbCp70krozZqreo2H6Fc4hn/LkklCJUKt\nYVFb+H4Epl1Mhu5MJR8Tym5gSb/gNGSyLYxrDJqBQfLK/XWa59d7ARxujg06lMJpsL5p65TwKonl\nXMewqucDTeR82xpQi+SvBRMqG9tlvKKw9ga9O+iJcInGKPjqImqBJ7fTjQaRyetVyPm2KHyDf5zT\nhCrONp+x5VoapVA2Zu9+lRT2YUGx4WwRJP79dAdaYFxbDOH03j0fqCXrGgS1Y9Wh3+h3dlyzkYCN\n0r4tlb8owVagP1cPoFuD5PTmBDm06lvuWVKZ2ilN/m+0ehlXrETQGdsVoCuSTx/Gyz8YMQ6+2hJK\nAICtTOvmArWOIJBKHA5yk3G1iISoOO1BNiDWm4iEOX7IzzhDEYsyW6oilgsBzwWIhC9qnekEZUw9\nSekOeV4kvMr0LnwgAk0aZEjFstGVMCV0R8fhyEyr8XUvM6Rz6Lyi125HvIj0UOUl+CZyRxzyIoX8\ndHeggVKQZMTYtMxFPLXniCz2IdFQb8VYq+UU7vE2O/YQ9iRnE6O5CjNZf4cvsDXMHfuQ3UHV2jll\n85oNUmEf7CPE1qfK5dntj1CCbV1UGFplEq5HsGJdOD+kFcr5l+/wBT/QrZrjEB4+jRqH/yiG4iVM\nbHEO2txADm4m5TE5PsnruMXVbwbKQINn7XjRX2iVknj8FOvM1lDvDcpJVmhbATQdwgncU3gCzyzF\nj6mPIXYMHGllpIHAmxdKccikVIFTOMPvcAlpOwcDgnFQ41PigJ52G2KSNQ/KN/HTcDOIGNsIh8xT\nAyXuYOURSDD1M9/iDdSLZovwsDfNuKFMFTgkN3HDmDAVxptuiAIsS5yjzARmnc2sotdzB0W4E/j0\n+GVgfoibLAyl5KayDCmbqHxQZ6takb5sADS78bvez5ss5yiYMfbqBbkNcLOpWjIwh2fF4d6kdu9j\nnYhacSXEx3HG27GZsmje6pzZq6OTq42IKBL1zshU4HU7lB1yZjlTaH47xW1h5TlcQsL4SxwKvwKz\nsgqXMF/jFMo1LJAIUHm+xnDVTlFfq5pYSo6A23lBbbLTO8SCvN3AQceckM1NJ3dkr28M25ZYDtqR\nEpzYBSz7Kbp9djuW/++eTM6xzoN4JrHk1eC2ecB3onyZwFrAGH5QnPfY9X39jifP2hrhob/rxavP\nw3k7MiuQU1Da53uEQJA5E8bEqjoFVde+yYynv59uznjz0sohmT4QZi0ResWRkF+yr41oJEG8+vZv\nTIJ4+dvipOWltvStBinNBFXzqokNiyfnHp9uYueSEqOH8cnnQ/sb8STmYBtFWDfZDmsxTdUGanES\nVD3Xtk2FjqO8XP5xZOl2JpqeJKqDgZFhjCJVhKss34faBVhThpL4H5uk7TjwOSSWiMj5S2riC6vC\nbIhPkieHNR4b3COo8W4xbJBRBCxEs/QYWlfzGqXj8y0qTHznPN3Rl8SJ6bdAtNMykYU1T5SQvzsC\nl1uezR4/TlrORkiqaoDn5Q83jLM6SMocwulSktBdfAbOsi95Fec3ocMH5qB/+XDYAQuMz0PfsTH9\nywOvOLYWPgbZ+4O+iGWA+9BhZSY3nN2Zv0PB+4jP2FYMhf0b4/iqMdrAUCiOJDpwe+yXYFImUGSy\nWinE5LgzFQc1xDBCfA2L6iNLelqjGgx27KgrEU9o8VmJwdBkMx8MBKgMTlnw2bgob2uze93HTJkQ\nQTuGF5wQBgNAaoTklPCAFhUk/zTbctBnX+XpYkOUFE/mAXAGDf73nVTA2IgH+XYvgF1/VjWrhlRh\np+jLLgvsp4h9n+OxwO+mFF0z0quJiIpVfFmpH4DS3VX5s6eOBOsMqyIPQfdLzKVQhQ0FhNXa+DA4\nP/8PT5KLvQznC3DnuV2MqqWhavi8LF7EFh+xAVJpUcQX26CXt8VevRpZVMz/3ddgC7RfN5n/EM+u\nHy9a4xqxV/Ttshzg4uKkJcp1Iek56Lr3G1zMoulDPUJhLvDImTc4trx/tg4Fcw17wm+qL7urxd3M\nj/FoymjedEdujuCqitXj8WDAlYgVrx308CK8OBYnRIA9sLPMLDv4Tc3yTaddgl/PKtnweJRGfiRl\nPXkndvgIl6dh82Dj8PnHpuUyY2zWWXjjx/L/3fib1Dc7gsfj8RDfC5hclNW8cYYx12D0xsSzSL2z\nFpmTGRuvVFyzmKUsL3uMh807KAl4hpVAzPKT5X5kyQvf2X9HxFJcN0VXTMvJNUpiXaawv26vCY9s\nmA5eXyBpGoISZ4ElC7SQfanT9+CvcHi8w2pmKvaARSVEs7nAVWs/OyZmHoIr1m5IYb/bwiZA1lQa\nHKqaYSCTaz1OVK46zoi3VamwHSXBgK/kCKgl8zTIpMjTwYF4SjTs0x8OLU6KzrmywtMpbRPaUVNc\nDqSdkWROsfXcTckLUa/dB9u531BHboDVKRi6VbuE+2zjnK3ONAhrmRuXrOUIGH/xW0Yuk8KN3aQj\nsqwPbyWuqDJvvKW3dYUnZdmkhkliqkwr2ZHCSg3ozkDSxvSvcLJEp+L9iG9HVIHOc6K484rNSLRD\nXCszCPH0G4Fmv+A3SWa2rz4dXKuSUhFxtmdN7H4oTKePVnRmYheDEZu0swCT+LXqIx2jLcry0LSm\naDeD3T6Qp+Qq76mG74ySxt6+bdnAea6xwaQJPJFJxKi6owqHCXNO6vE7pCqdnLkYXmQ8HkNORhoh\nrEocztVrS9mkSWTq0cBZe6Co0No12hV2pp42j0euz8h6sKH2lC0m8HQt5yK1Rb6p0Ri40tIa+4zf\nTQ9YNWA/cypByQ2cCl2F9bS1zordlLBHZUWw3Nz3rMuuYYmI3+MPHo9HUVaf5PDcCa08Hq9/F4oW\nZ+KeAlxAwbyolCFRnlBV+6aAiTB1d6BWXIGYdwK7Azuk2zuct3yDv20wJV7yh5PdC3BnGBcM81kI\n/PTH54FiXQC6WHglUCmlEzLxWjF9j1/gVp9O2sxRP+tuv5eJEl3iQahRu+MUvg61Wd7+Sm4HhBIg\nprH2kc8EaDxGDg9ypgD6p3OCZ16ZqjjjfLAryn4d25egMh+F9x5e+fs8k/VgwH0/DzJjx3SEqv6C\nYK/NvlQ78XaXsrzQNQbGmJRshtEROGPLzwJVYijEgarhdwlBVwMvtT3PAPbFAkjAdh17LyWO1RSG\nJZvc4tpJ1QWBxBhmXsA4x2KfA59/j2t39SeSRFO8YDn8LTCaEaUioIMkq5c3UzwJSuv5SM1lToCo\njf5vipYDaV+dKlzvpyBCQ2pVCfJPNYLcku3LkDRRVzGTOUUSxxYAWA+zZ/dg1ysVSkeluNnbYbvU\n9HSdBCcQDz4TAZsJm5k2CO7ejfWv1Rlt3chhfEJPhSmPA1IB9TUV2dSckRnW1VyCMhTiS93BCQi4\nFZ7ZFiSE+UPDDmPPEthCxI/Tjz4MLMeBwj61OO76tG1fg6J1fynMX6vrl7+RuIacRQrxil6a2iAA\njiDnhJUCZkVgTel/WhuxSqcqlWtrL8buAvTyDQt03qhu4nIm1dVLj9VakknBs0cmJ+p+Y+LvaF4h\nt23ux2KIkXcm+qU8Y0IAnJeUxfmGKD2dw38Vdj7PWhpHnoKmM7Gvhwk0ikdQfQl48ePgp++izm8E\nfJWnAwG1MOj17t1N99RO5hcDSBUlGPWmheKeoTyQ+mGodsadC/gIL8GWXHb505Qyt7K0Iaup7Sxw\n3CgYX7l76CtYcvmTvdDFx7yvmMU0Ho8rVwT9WhfKrpTSTm4zADxjobyw57IMrapriIsnaeQS9AuN\nH73kZG9GG/KqfRAbylW72l3o+PMNMIwwUnVNq20bhve3d1QmxDnME0b4DmC2CMPGHd0iFaSMC6Ac\nDcI5xgv2XFS0n0k8IxfOl+ZbtPQjTXm7mipCNAIkkHwm2DSRWg5R7exHiUnO87dueA5xsiGEP7dK\n9dc723I4NGK8Nst0XJj8Fdq/t8iwuZ9Q4LwfLl+RSY/XbzRJZDR/3D4U+Qh7I9VAIlksflaOIXkl\ndSfWDaByAhaXW8PtEc4mMuVA0vjzfHhRu5GsrrKCpyPDOp/hXY62cBhGJNRSqYBzPiGI7KNnuPm0\nJoXRHivR6aq6LcbHGVVotTEFSBdu74nJng+rYKhjXAUq1jeSReP5cIk7SF9m3Fpy1imsbUJWcBn+\nK6FaKsZZl1VGX4Hx8k8JL/1IzvBop/D0dLp6zWcQ1cEcStiNQJ2FxH79NUnKXxs5jE+loMJMXfb3\nTXDX5WQvm0n5AtwQu7S9Gy6iSkG9wq2gOli+43wSxqv0Op72waJ09vOAZIz7mkhD3ZmhfijCmP49\nfIhGJDQHWWjYysafTwlHO+2yJ2DLNT2sJU3cIDA2KHNr18BjzTdjUX0XuTXSC66l0jMoVMwcn8pw\n/T/+OzaLpwk1JApdQFc+E2btPf0ReVtOSa5hySDPZoj3MVRGISFWPsHmO4YAj0kwTvBgy7tGEfIW\nZbhbftOyKXQU4W5cSaMmO31k7u9gQJFaLM698Xg81oRta3qdcTd4ah12Q3prI4B6fTOu63TllgWG\nvCjUCtjZTmrX8B/H/Sv5JgIj0SCB2kt4Sj2kk5mLxcbaLxqRyjJebrRH49pzlhXDM31ZdWjX8FJw\nUvnu+TWcLgLSb2SGB0dT8POLXsCMuACaxfVKDqUALnWASln0yxfkrUBprh0BDQOGX0nok2zdXmR6\nRU3yI1nOlUWla8Tw8ICaJEijUr0G+X3eM7oR8eacljskK1jEWeQwhL82IGxsy8BgEmES7+4zxAO1\ncn4sh8q8ieQy5yryC1xBbacsjdg8sIpGOO+qCEMu012ysrkJrM0UaOmuvdK89GM9qMtpSDhuQ7Qu\ncJjHyhCRJLSZq1XkQUysXAER3we2JIZwDBmN02faBw4xEwPEVQned/sQVF0ty2da8/+bMP+/cDNy\nxlAgn+mbUJNY0+xzYFi+6n+Sk9QjUGnZ2vAQ9N2BbNbrIJBFwOGEEVLG4zHOsitxJqZQ3K7Y/gsU\n/MWaIhafH8J3n93WyeU1WUMCbIh/gX2NiKcssyEOtUR4noM3Qp8jdUoy16DTjCHBTvRHxrUUzFxR\niYIS5pthveICG04kAmyqf9Uu/k80snPIVOqruj93Kj8xeEZbAV0HMhWd5nJVeh8MR/9ugaJZpX77\nOCaxYmRl52Xfpa9aJMmqXXyryjEqykYQQH7+e1zZZudxEU44ddonBx9avLVS1VY2Rb/0aeXqByvN\nzv3Ax+1ZNBiUrNyQP6meokImjnUabBZfCbancMocsHrl9aARhEt8rZHoX7l/DQ2ANrhbyeWQoQ12\nRFDtO0eS3BFU2WqpNZnuIkU4xO+G/YiqarGaYeMtUfNKUrYcPaEpY04pnkLXzjbPrW4+7QduBC1g\nhjP5B9BpE00mbhUnJ/P77yCoQl+PgUNTpIWXibFq7wAjvVRla32HStuewOXTSCNzEwSpUNRIoJyN\nx4f8qMYLyCbQ3VLLoCSwFiCT84uh2OqQPswLQN0CUNolq7X73Po+wRuAShnZslkpGWqmVM6ypGeu\n0ePGUN5g0ynOL1zYRs08uHEGTtEIlD3VqC+cSpK5bfnViPkl8U5L8n5LDeo7Ge0+wlAWjvtZQYZz\nhFKbpHb2WPiEH5vD3mG/qwYONkvnNG6IJ0IzChPjA9qKTwccysDHjaxvB+/zIBW4RvmQ+yKkaRY8\nKZy4NUD3DhjXYDUgx/KR4d/wMRQvINw41N5FYW6G/N2qhC6awl7/dhtpK8GmyO/S4ynbgZz2xpl8\nILkyYz5GTD6aSzb2YNnLh2bnCB9jyNZ+QZUUeLjmarcjWpwH5nrG5TFuL+vZvxxazK/qCaPvHQpp\nopNYYpwtJuIpMflfj9tKfLOXi1NphXb1GK/MPH5JxmNewPHQPjS2Al/rNRcATdRLIqTahpdiAxlr\nlzyO0Xhj3UxKZOu9RYrLu/n8Lww7bNzkS3SNA8Lh/azkvJi5lgWAw92ztaPyABs6y7KE3wFXSP+T\nLwjlzzf8XkMWbITfJL2WuavNpnlHviK1aRnX9dG6elQGweNvwbQJlDAPsWwrPwmpt1lndjg25GSG\nhfx3jpUrFTFUosf+TjGtPYDdQ3o331Sn8L6amkXjALwBZh+kSbyRu8PvDtyGeab7UQhbody0IzXD\ndgAbMW0jmLKWJN6GS1qlJyp24+ViH6Nav2U2N2D9U/gIntoLqyPZcBEz1/svdtI9gWyrspISPkxW\ndeEGqr0RoAKip31/Rb0dNvX2AhO4fIie6aijI0rqMj0J89wz15KYw4XDGznwyz+GmWqbzgh9P6JY\ndH62plAdn02rkecZKBIDwi8PeG+JVWlwnTMp0dt4brp40fsU4l/3NJZin2pu4JxPgFwO5+VYHx8q\n6kE1sVcVzy234D5szuENTvbfx+G/GBOtPAmb0ZSK6YXPwIRkdlJoEySeC2orvg3W4F+iJuJH8BQ+\nh9BUHVPRLgGXpfm1KlRftZ0pspIRQSXBxRnbDgZCyTK5NW0tRIUN2CdvCrNzAL/WXa7mjMRIjk0c\nzrCsHpi56sbnQLRwPLtZ82kpYbU4aS7G8C/u9Oy6wwZl0kP45Po06iN4aiIqKkkbDQq7q9Xq0sDn\nR/kyfTRU6GTcVhQCb0Bcf9Pp3RDQYzY5x4vjQiDpg0FWOIH1wI+LSX9hMdlOCZAJbnzNba7wVo39\ncPc1tsHh7kMmp9DaaCbDIfzlkkJnB8KTx9/F1f0MrzBVLplpvBeK9s/xHu+pJVZukXCLlyyGFsLR\n3kiih9J4QwwpAiNxT88YI69+xKB26ZKyDK8mJRRDIkmKwskild1yqqyY01em8XFwg+Euw5emEa6k\n3VYCmfb754dkcAqfqjosMKqmv9A+4EPAdoAyGx8dS+tiPllgJW5gnGyYBIbvXrb2Gp+OmNLF6KkO\nlmETtUqJi1hsnarPWFDljNZy9Vm7xI0PwabK/7+hPtvguJy555/o/BM3/DZl8GI1wXDb0YPgrrXd\nwJcDVQCArkCX0k28pFNAhlNzZJsY59ljdJ3KGpaNW+MZnyvqBYONcJPihAqF4r2+gYJqPdprmMgL\nAl3ettgGjECTqq5VebQaPKrXvBABKDqE9xefDu73VPQWQMGtiiGcCnNu3BV4h0qM54lq//f8GA9D\n77XnHfjKfDquMpj0/uAdWGkGgbPMZhFUZYHprWu8LdQXLiF7Xmf8ZQCnvV60VAXIpoa0S4ZovKpj\njTkxnNgvcTAZ7Qgar6hIybdgLP+nBrLSjmFZ+8WdujXKc/rzetL0Yq2hxofCjn6kpsz9swQrXt4i\nASkETIa/x8B+AXOBSgrBC+FbiVCHONTdJPk1PgBAduvvvegpyNA6kUe+QyHmO6xVG0NPNIKIR5iO\n8OlVYaXTuBn43fu9/QYOBJp5+SkVDv/rAmT43N8mzBm6LmLd//t0kxoJ4D0zav9jBcjzNv8z5EeV\nCBpXrqGBRsY0fF/kVcbe4TZq47MQ7G6bx1cCrCwoU+10SyU9jnQOybTo8E/eraWn07gDkJ3E4rxP\nYMjCFUoLeuw4xfhvqNOUmwrOHyglSlk2bgi8I7BvAusx46dlPD7j9aMvN+yAjhx2ejGUuI2vkQdq\nP8IYoGTYlGw/jV97FogU4PucZBs1jJK8DCQWXTPJtZ3Sp2F7n4nSyLq5IF952BJ+S5czewfEHZ6o\nGvZ1RRsngorn97AKVOarCxjTQWvkrinFEKjqOPqK60X6y41Pweo17O5gXsqZOF3u1hi/i6NH08b/\nWbhHFRgpGo79x+p65y84u/awtdTyyx31NDotWAG0XKA9ws3ANvPAe40amX1/LqOaNq6yTMQOym4Y\nRCmr8VY14tej5tUgKvcTWWkE+dxajFxsNmRM7zqLKWj4eOoTc1nuqrSzOAuZxg6SKE7fY8iFOLrv\nZaptRMmKFFNgg9zImLwS41xkLye9zLbkEnHX41P2s9JXAleNgG3/1+ONO1x+1yPcPGN6G7LS39UQ\n7BTKHkgdfzykN1M2tsJ59RjKpWYyM0PmrsRQP/w/YkdUV+IwPxKTo91W1eqOA684z2WkG0Taou+N\n8iialKPQjVcN5iRpSrqkqo7jC1RPbEGsOS/xJFJZxCl85Fs0zz9s39tO5AJcUJ5dss9MokEwXP++\nCNP1ZWYUc4B5ho13BltBhnNKPIG3cZbO0qW1mX4/8BQJEhZPDWsbL05xF2PaOBVnRgE+0lmEfMVh\n5U0B37Q1m7Az7wC3HK1YDE3dm7brdwOmYUTsOjEI4CV3pRjE5oG24mmmEhPkNmQzUpDkpV3J6YDD\nqfOq0oAusJH85wEH7dci+Jmc0ZOLKTVy380oPSp6phQH4yoBr4iebeyB1s22jhJsI2fLxU20rhPJ\nSHQvXMomoFmcez8eT/7EWqCN+W1BKqVDTYUAEDmeAnmg98ZWwFp3kcwRkrn9OQXTenC61k1hpbEX\nalNACwFUqn9yKHa5ao8G4q+MH8un0JTwhCWWxIVqFIJA6yOuQ2IlXdKnNKIoDOaZ3DHsO6g64Jya\n5eBJKRejMQuspX9i2bIdHkQb5bqJdgR3QmY3pC0V4kbJk1FitkKFOUSIO9XTlXcocucD0YkZcV6M\n227iAEpBTjXuAnv/iODs5oIZTdFq+ABqvYxy1ePV44QT7Nik/siUQirAPFMgNO4NZZcynYGZbbhT\nENNWnAAZayyEu0ZsniJ+BHbCTXDwkF4OTXSm9GwUIVOnnu//h6I+LJPMBLaOSubTFSaV9fs62+Mf\no2gzGwXIr76sNOJRLGDKVG6VUQXGYv+UuOQBB2kidvQOirqfzV2YpDQqEc73g9wnK9HFWG7FGYqh\nrhyYUvqCpTNvbdwkG28X8BVA9xVvIri5Bt43A9pm1nPZAZ0t8RppWzm2DKsML8mnfC0bFyObGtSV\np2ouwFbayCkkjzdY5XsWC4faWmpathPMHlzLdgeng9HadRf9QYyivdAeg7Y2yUSKHmOSwqZm3YE9\nJROZDpjrDPLi4BIQJWjswWq9Q+2c5Z16MCgF7Ko6c57ZsZ1UhEacs+QQ5fVI903C+VCYoHxgIwLZ\n4IHC2zgoC6KGuMKmv79Wa/QLR/crt3Ra8XJ+u6GxH/C2jOM/icT+ZRz4mPNQtZKraEoQmEOEK0p/\nWlje0Cdpp3ApyjSgNjRsCP5qP3IL37/EHicGzxiON950FGerz4FkbBJIASBYVzZKwP3u2yWxrxyb\nUtuq+iqEQIp2/E0ng77cUxBL0j/odHPMaodvmOj2NzcChvBXbPzOITx7rTowTHdd6Uy63qn+h+Mv\nVmg/8QQcEY7Hs1gGQxKqUL4QDD4EsWmURy/aahjXmmXxTYefQqZzBAhIjMhERfdtH2bZqXEtoKtT\nj1nVKM5Ugq5bfCwbH8r/weHepFAgk9SOWGPXHu0Yj3W/Gn94aoGhFPq4iKyPQOo3Rc/qBcSQ8Swl\nsuwJ2Brq+KYP0sArYC7fmGiOBqLFy2cusHpuZhb12zplSWCUVuKPzBmfQwkZORoMuZBwrLSh0IZ8\nFlhLALnpJ39zNn9mJPtApstS4mBVfulHyASJ6bg8Vpr8wlSZfbuDi4AtvHhz0bjjGHMyYwzxHWEv\n9LjliMY2aAvwyb0J+I4LT6NJQwF5GDNEkvnaydjHG3fENT8JnSngh+IgeE7yoZPXQxKn2ZvtgAKp\niT3EdX9dO9wY+TQbLhlQ9sn9Z9UZaZrgQp9JwfIuEyHNUROs22FK1tQJ3/9KGg7swItscLrWNSqC\nIRPpE9AUdvUCl+qu3FMZj2E+i8tNefBDFilrUpXM8NtjfBoENT48wXr8WMI8RxfkdP0t67UpM72g\nKdak4adAZZAVTY/GOyCQIhLfO0xORksHwvKTGROGmO8L3OOMhXSkrUjh6pys4UI0Gz7zc3703WbJ\n295rNmKwNjHSGnof7ea7dru7NMKQfM51nwX/dqy2lyxMq8jdGFeVwibr5hjEZooolppGtLMQHpVh\nG4tWjTTYzvzjtwEmEIZCAVgaz2PANAFUK8Z9CE3BE7SVzwU2ivZIDBc2FD2RW6rpdLDBIF3bBVrf\n7plKDZicjZvplLfrqB6HHS7dzbKd1DaTKuy6tNnfBhlVAxjsQ7BJodHkL8M2dar42lB6P93R9Acr\n8+QOTHuKU/Hy1PDmtZ8DZKKsRYX5k1CIqVSZTS7EpCjvAqINvFRvghdhpG6NOyHUQnvZ6ePXhJdR\nWzt0Q/1wOLJf9SI2WX/96km+C1lGhg9pB3E9pi8XVXP/FREdCh9P5uHJBThcZnB6Ns0wPmWx1SmF\nXcb53duGD8OSnbC8nHR21MwpxLRWDX1gRrCk6+IVbVfEmAB8FMn/16XBKV5C1AUMGzqwxd77PO/W\nDa+ozP9YuYwpi2EnFrLPJOB8RT6OdGba4N8W0a2LJ34iqcFuygvCtpeEbRPa8VNqgMIkHGHV9v9u\n2NFl8hiUd6EYohAxNW6zYYdf8Wq4pnSyohz3+LZ3uDuc6gALO+vZZLMvR31jbC7Kq2q9LJdAC0M5\nXlgCNlSwrb4L5lDFC2yaGLT665f1uwtnKXFdNiEytkZL54wF0cZNBt/Gf3tojXesIT+9NzE5B5aq\nsDlqNC6qBFjpd3VVMnawnYUcVopPGdo1XASjNTSUj/tnkVIIY7aMjd1IKfWpZFuGhS1HRk7jDQH2\nFnQyzo1EbxD4WUBVAy3Aqhr4soeY1t2EYPjkG1KNIohGSOXnutHbhur+trcuUjoO6pTLIVqbRIl0\nC6c+bgIYEhofAqtX5lP57KWfXQCGReWxmOpbsydS6AJQ4N22UF+z3sPALKiJNU4EHBUx1wGdtjpn\nBhushYngJTexzQwXatvkgU/csxQWPqWjVUrzxzFFX+cebJyFcNkM1xrJfUVT650w7Plo+ZFqyGJK\nnpVmiUh32xNjPTv+DghncYkmTTcRLgW4/EZPi/mlFvy+YEwnmM4AJJpgVSIblFJDFaOux9XyvxWc\nxlZUfmpUK79TQOEdFNAN4MBJnvhOK3OnuXwrIJdxbtIeEUc2INarVot6IFGyhA6ZILWiE8/l2tcU\ncKgfgjPTssw26a8BqNR4TuKGwUQJESh3TX7qx9qKRWaOUKqEqflYJJf/HFbjMpy59bCbcH4NDfFf\nV6t0sHMB88wyH8anRkOEZpA2MX3ylYa/DZHR5he7LQyMerR3axCV5lKrbZfxrcDDkKfsw7YxMyFn\nA6xveICgLAxeQy/NFVqHYWg6bdWNavzP5ZFANx8o0sfJy5ym5bKTmvya19WTCQfHxphYcFh3ERsz\ncjq4qRElWMjUaFBnhHK2jiNNe0awQzuZaI4z54XBiq99ROOJst6Y4jsEnV4ohZFDOB5r1CM26MRw\nxIPoVxED7jpwIZMLIL1T47shWCNoWD/aZn+xUvmoRXK+KenJ3WAKmN/Yw90ka7NvvILUiPzDy+r4\npTQYL25kC4j+ZWGl8Eph/XIMLRkXW8y58f6oVXFc26p76dzYoquG2MCy+tGgxnuArUL9byubbuMc\nwyBafcqxs813uUEQbG0M4VPsTkfj85HXjJc6nKoZYl3BNH2sebaYZwWANgPenfUZoiSNxopsL57k\nZtMrDiWp3AW2gXk6u5LipkH3aRqNENhim2T9emAs1gwo+jxD4P4FAa+Zqd3ySImLn+fkBNKi9jCN\nx+Oh397b0PguqFNOBtJyTGdMBc1L5aztFyZn2x7hK2GHxMB9bPamJkcOMMynDPVzqhltGfPPjczw\nta/FS/uDb8S66wEXsFKzscZo2WnWwPsdpyY4wQD0jsOv84Xfemrwi6FvgjZmXHhfbUgH94gCT3is\n8LoAFAE2L0hWseXseuF7ETMVNWwDvxOZ0e/YiIxyC7mLEcZLzKi0SXqiwMan4++B4ulLh3p6eZpu\nbc2fWb6FDogTlpR01p2Oxlshej9q9RcFCmTYlt45Nw4KZxELP6Gifi0o2vYat0HoDlaKEhkFV+ls\nxftS1/x/IKLeIG1HFeZAjsu0A5C7G41GSEGoBhsvMZVHe4O5fAG5TZMx9aOZaquKtzwrZtX4ELyo\n1/Ngbbacj5MnKGrYwZ2MYy6TvpXAyGx8G6RSlvrFN5oioWi7b1KaIrleBklRCq8UalNvEAi/THkL\nfpSbfMOiSg06qcstxtsBm8Y/22ioWGsHQ53suFPbXFTOBoWIbi7QhfPIZkEGPXglCz8U2I0M+Wz6\nG6WNN0foHuSxGbEpyvqWbB8v12vWvM4xrLyUdgANG/e/+XTexCpapHuxtWXT+F4c3ICQ7Wr9KjEw\n4jlDtNt/pcqydxlLYjjY19TlzmcuX8bGXYBpwfV3/kpmgEf4feKmU+vtQ7CF0WicgQ05Z4F0cgpQ\n8hLh4pNVrtP5d3MaDQ6hpiM7MGX5FLzxVhKOsF9bof1gQOMNELjxNo1+HWVZwqBF+Ln0+9oSclPw\nfa+u8TkI3LZ2LRbkWWcAc0OTitgizfNCeL/ZaLwTIvZIldlDkRF2BKuXiXUKbEfGPllRW/C032nc\nC5q5UnYC2zxrfRAXhD52ORrl6alQo3EiomHqJIV/Tk+yavrrm8lJTzcd+uWljc8EexMu0QJUz+P3\n4SgzXIndIzFfo7PihjQaN8EQ/jKITpgHeiaS5ZQUMFSPoy2/cUckH7aRBowwW7cZGZ/U/1M/oX63\nuWr+pb1A46bIhES3u3Zp4lB2T1QcZZcY4lm8z9ptiMb12BEtA9+uB4e4jswoJMzzwmmPlfUHA+EX\ncRuNTwBun1dBdCh3m2SjcT1KrWJMPDeY3EtcrXmqwRVp8g88E5Fn0mjcFFp/Mc0kC7tYEO9JnNwQ\n0dDeoXE2Qqq/3kd4OTvGS3usXK2xpqBF5XYHNLJw8xVPLtoLNG6BuvQe6pYbtpebwFytlHBF0nv2\n5kWj8dbgXn9qn3qe/Z+xx4oL9EZd7Ebj8e9ae27v0Hg/xG6hYafDFuEPVFIDLXGoxRD+ajTeF/vu\nC9Qx3GtrSK2ReXajPUXjcxDzF2InfyLRkv8S+wn3ArVTQz4cRLuPxh2hdc52P25k8D/THsxmAV2q\nKB/bwBvvD0SLKzSd4JFxUnibcHJMscefPHFactRfbWjcBuxTdEBRALJCQrH1GIHWAqCsNwS518Cu\nzMSgnULjYthPBEjh7sUOw9kE8rBQxo+8WpdKD2YBpjzKB60237cqGncC3uLyDHt3K+DXE9mPR9im\na0fg9LyzK9AeoXELJJ8TCAx3b0OwbM3YPpYjQSk2Z3EipFfNzKjRyGHrDYUcK240VuwgMthmJYFY\nk7LRuB1iLbqNtmVyulUQdl1Su4nGmyHXOf/lEMvPY3KNhwtCXUOI/v81ov3LRuOOKHzoAGwobA6J\ne+2O4R5usDQa74loEb7PSOyWXqqJ2Gg03hgdoBuNm8N/FI9MMH4Y/j0apD8JIN96bH/RaNSDbRyw\njM+02/YRjUYxthrVUgs8X+rYPYJG4wbIGyH+AHWj0XgDAI8D7TJ7sCMgfe2ifjKNRuPxAJ6D5p8j\nhujHWkMQowtm0mg0ipHIHVD73frFjUaj4QK6R1BaR6w3F/u7Ao3GFcBuPTrfDhaPH3sEpEHT311o\nh9Fo7IVuY+f0HM/k2Wg0UFxigWNJLOz7Ie0mGo17I9cGiH0Js9FoVECJxWD570byKlBf6W40GkH8\nWX74+wuv34DyHyLCBEFUpzmkRqOxBc+S4P8fSG9SeCihfxOh0XgL+EnA+h6UQtmNRmMH6IcCDmdP\nNcz2Ao3GPiDfe6LORkmD6Gqi0diI8PeY4vLG35+NRuN0TK0+ctwfAzE6b74X0F6j0ShFzqTio7E6\nZJCNiXYQjUYFTrGkEXp1WvJZiUajUQw08Tfo7NbAz9crp5pDkSQ3DttfNBpnADHSy63x8gk0Gp+D\nIf4pHIm/8sg7lbLodgeNRiUYi3JKBtIXWA8+sE9HtV9oNIpBGltByiCehG1bHtuuodEohl087ODf\naDTuhYrHBLdYuuKe2qu8L/4BaUELmxAC0R4AAAAASUVORK5CYII=\n",
351 |       "text/plain": [
352 |        "<PIL.Image.Image image mode=RGB size=347x313 at 0x1C7C5A7CE10>"
353 |       ]
354 |      },
355 |      "execution_count": 8,
356 |      "metadata": {},
357 |      "output_type": "execute_result"
358 |     }
359 |    ],
360 |    "source": [
361 |     "label_to_color_dict = {'Barren': np.array([0, 255, 0]),\n",
362 |     "                       'Cultivated': np.array([255, 255, 255]),\n",
363 |     "                       'Developed': np.array([255, 0, 0]),\n",
364 |     "                       'Forest': np.array([0, 255, 0]),\n",
365 |     "                       'Herbaceous': np.array([0, 255, 0]),\n",
366 |     "                       'Shrub': np.array([0, 255, 0])}\n",
367 |     "\n",
368 |     "county_image = np.zeros((len(lat_values), len(lon_values), 3))\n",
369 |     "for row in config.o16n_df.itertuples():\n",
370 |     "    county_image[max_lat_idx - row.y_idx, row.x_idx, :] = label_to_color_dict[row.pred_label]\n",
371 |     "Image.fromarray(np.uint8(county_image))"
372 |    ]
373 |   }
374 |  ],
375 |  "metadata": {
376 |   "kernelspec": {
377 |    "display_name": "Python 3",
378 |    "language": "python",
379 |    "name": "python3"
380 |   },
381 |   "language_info": {
382 |    "codemirror_mode": {
383 |     "name": "ipython",
384 |     "version": 3
385 |    },
386 |    "file_extension": ".py",
387 |    "mimetype": "text/x-python",
388 |    "name": "python",
389 |    "nbconvert_exporter": "python",
390 |    "pygments_lexer": "ipython3",
391 |    "version": "3.5.2"
392 |   }
393 |  },
394 |  "nbformat": 4,
395 |  "nbformat_minor": 2
396 | }
397 | 


--------------------------------------------------------------------------------
/Code/04_Result_Analysis/analysis_config_loader.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | analysis_config_loader.py
  3 | by Mary Wahl
  4 | (c) Microsoft Corporation, 2017
  5 | 
  6 | Loads dataframes of prediction results and the description of a trained model.
  7 | '''
  8 | import os, io
  9 | import numpy as np
 10 | import pandas as pd
 11 | from configparser import ConfigParser
 12 | from azure.storage.blob import BlockBlobService
 13 | 
 14 | def ensure_str(str_data):
 15 | 	''' Helper function to correct type of imported strings '''
 16 | 	if isinstance(str_data, str):
 17 | 		return(str_data)
 18 | 	return(str_data.encode('utf-8'))
 19 | 
 20 | class ConfigFile(object):
 21 | 	''' Copies ConfigParser results into attributes, correcting type '''
 22 | 	def __init__(self, config_filename, output_model_name):
 23 | 		''' Load/validate model information from a config file '''
 24 | 		config = ConfigParser(allow_no_value=True)
 25 | 		config.read(config_filename)
 26 | 		my_config = config['Settings']
 27 | 		self.output_model_name = output_model_name
 28 | 
 29 | 		# Load storage account info
 30 | 		self.storage_account_name = ensure_str(
 31 | 			my_config['storage_account_name'])
 32 | 		self.storage_account_key = ensure_str(my_config['storage_account_key'])
 33 | 		self.container_prediction_results = ensure_str(
 34 | 			my_config['container_prediction_results'])
 35 | 		self.container_trained_models = ensure_str(
 36 | 			my_config['container_trained_models'])
 37 | 		self.container_data_o16n = ensure_str(
 38 | 			my_config['container_data_o16n'])
 39 | 		self.predictions_o16n_filename = '{}_predictions_o16n.csv'.format(
 40 | 			output_model_name)
 41 | 		self.predictions_test_filename = '{}_predictions_test_set.csv'.format(
 42 | 			output_model_name)
 43 | 
 44 | 		# Load blob service and ensure containers are available
 45 | 		blob_service = BlockBlobService(self.storage_account_name,
 46 | 										self.storage_account_key)
 47 | 		container_list = [i.name for i in blob_service.list_containers()]
 48 | 		for container in [self.container_trained_models,
 49 | 						  self.container_prediction_results,
 50 | 						  self.container_data_o16n]:
 51 | 			assert container in container_list, \
 52 | 				'Could not find container {} in storage '.format(container) + \
 53 | 				'account {}'.format(self.storage_account_name)
 54 | 
 55 | 		# Load the predictions themselves
 56 | 		try:
 57 | 			o16n_blob = blob_service.get_blob_to_text(
 58 | 				container_name=self.container_prediction_results,
 59 | 				blob_name=self.predictions_o16n_filename)
 60 | 			self.o16n_df = pd.read_csv(io.StringIO(o16n_blob.content))
 61 | 		except Exception as e:
 62 | 			raise Exception('Error loading operationalization predictions;' +
 63 | 				'did you run batch_score_spark.py with this model?\n{}'.format(
 64 | 					e))
 65 | 		self.o16n_df['name'] = self.o16n_df['filepath'].apply(
 66 | 			lambda x: os.path.basename(x))
 67 | 		self.o16n_df.drop('filepath', axis=1, inplace=True)
 68 | 
 69 | 		try:
 70 | 			test_blob = blob_service.get_blob_to_text(
 71 | 				container_name=self.container_prediction_results,
 72 | 				blob_name=self.predictions_test_filename)
 73 | 			self.test_df = pd.read_csv(io.StringIO(test_blob.content))
 74 | 		except Exception as e:
 75 | 			raise Exception('Error downloading test set predictions:' +
 76 | 				'\n{}'.format(e))
 77 | 
 78 | 		try:
 79 | 			tile_blob = blob_service.get_blob_to_text(
 80 | 				container_name=self.container_data_o16n,
 81 | 				blob_name='tile_summaries.csv')
 82 | 			self.tile_summaries_df = pd.read_csv(io.StringIO(tile_blob.content))
 83 | 		except Exception as e:
 84 | 			raise Exception('Error downloading tile summaries for o16n data:' +
 85 | 				'\n{}'.format(e))
 86 | 		self.tile_summaries_df['name'] = self.tile_summaries_df['filename'] \
 87 | 			.apply(lambda x: os.path.basename(x))
 88 | 		self.tile_summaries_df.drop('filename', axis=1, inplace=True)
 89 | 		self.o16n_df = self.o16n_df.merge(self.tile_summaries_df,
 90 | 										   on='name', how='inner')
 91 | 		self.o16n_df = self.o16n_df[['name', 'pred_label', 'llcrnrlat',
 92 | 			'llcrnrlon', 'urcrnrlat', 'urcrnrlon']]
 93 | 
 94 | 		# Load the description of the trained model
 95 | 		try:
 96 | 			description = blob_service.get_blob_to_text(
 97 | 				container_name=self.container_trained_models,
 98 | 				blob_name='{}/model.info'.format(self.output_model_name))
 99 | 		except Exception as e:
100 | 			raise Exception('Error downloading model description:' +
101 | 				'\n{}'.format(e))
102 | 		description_dict = {}
103 | 		for line in description.content.split('\n'):
104 | 			if len(line) == 0:
105 | 				continue
106 | 			key, val = line.strip().split(',')
107 | 			description_dict[key] = val
108 | 		self.model_source = description_dict['model_source']
109 | 		self.pretrained_model_type = description_dict['pretrained_model_type']
110 | 		self.mmlspark_model_type = description_dict['mmlspark_model_type']
111 | 		return
112 | 


--------------------------------------------------------------------------------
/Code/settings.cfg:
--------------------------------------------------------------------------------
 1 | [Settings]
 2 |     # Credentials for the Azure Storage account
 3 |     #     All three values should be updated for the user's storage account.
 4 |     storage_account_name = 
 5 |     storage_account_key = 
 6 | 
 7 |     # Batch AI training credentials
 8 |     bait_subscription_id = 
 9 |     bait_aad_client_id = 
10 |     bait_aad_secret = 
11 |     bait_aad_tenant = 
12 |     bait_region = eastus
13 |     bait_resource_group_name = 
14 |     bait_vms_in_cluster = 2
15 |     bait_vms_per_job = 2
16 |     bait_cluster_name = landuseclassifier
17 |     
18 |     # Named of containers in Azure Storage account (no need to modify)
19 |     container_data_training = train
20 |     container_data_testing = test
21 |     container_data_o16n = middlesexma2016
22 |     container_trained_models = trainedmodels
23 |     container_pretrained_models = pretrainedmodels
24 |     container_prediction_results = predictions
25 | 


--------------------------------------------------------------------------------
/LICENSE.TXT:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation. All rights reserved.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Aerial Image Classification
 2 | 
 3 | > **NOTE** This content is no longer maintained. Visit the [Azure Machine Learning Notebook](https://github.com/Azure/MachineLearningNotebooks) project for sample Jupyter notebooks for ML and deep learning with Azure Machine Learning.
 4 | 
 5 | ## Link to the Microsoft DOCS site
 6 | 
 7 | The detailed documentation for this real world scenario includes the step-by-step walkthrough:
 8 | 
 9 | [https://docs.microsoft.com/azure/machine-learning/preview/scenario-aerial-image-classification](https://docs.microsoft.com/azure/machine-learning/preview/scenario-aerial-image-classification)
10 | 
11 | ## Link to the Gallery GitHub repository
12 | 
13 | The public GitHub repository for this real world scenario contains all the code samples:
14 | [https://github.com/Azure/MachineLearningSamples-AerialImageClassification](https://github.com/Azure/MachineLearningSamples-AerialImageClassification)
15 | 
16 | ## Overview
17 | 
18 | In this scenario, we train machine learning models to classify the type of land shown in aerial images of 224-meter x 224-meter plots. Land use classification models can be used to track urbanization, deforestation, loss of wetlands, and other major environmental trends using periodically collected aerial imagery. After training and validating the classification model, we will apply it to aerial images spanning Middlesex County, MA -- home of Microsoft's New England Research & Development (NERD) Center -- to demonstrate how these models can be used to study trends in urban development. This example includes two approaches for distributed model training with Azure Machine Learning (AML) Workbench: deep neural network training on [Azure Batch AI](https://batchaitraining.azure.com/) GPU clusters, and transfer learning using the [Microsoft Machine Learning for Apache Spark (MMLSpark)](https://github.com/Azure/mmlspark) package. The example concludes with an illustration of model operationalization for scoring large static image sets on an [Azure HDInsight Spark](https://azure.microsoft.com/en-us/services/hdinsight/apache-spark/) cluster.
19 | 
20 | ## Key components needed to run this scenario
21 | - An [Azure account](https://azure.microsoft.com/en-us/free/) (free trials are available), which will be used to create an HDInsight Spark cluster with 40 worker nodes and an Azure Batch AI GPU cluster with two VMs/two GPUs.
22 | - [Azure Machine Learning Workbench](https://review.docs.microsoft.com/en-us/azure/machine-learning/preview/overview-what-is-azure-ml).
23 | - [AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy), a free utility for coordinating file transfer between Azure storage accounts.
24 | - An SSH client; we recommend [PuTTy](http://www.putty.org/).
25 | 
26 | ## Data/Telemetry
27 | Aerial Image Classification collects usage data and sends it to Microsoft to help improve our products and services. Read our [privacy statement](http://go.microsoft.com/fwlink/?LinkId=521839) to learn more. 
28 | 
29 | ## Contributing
30 | 
31 | This project welcomes contributions and suggestions.  Most contributions require you to agree to a
32 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
33 | the rights to use your contribution. For details, visit https://cla.microsoft.com.
34 | 
35 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
36 | a CLA and decorate the PR appropriately (for example, label, comment). Simply follow the instructions
37 | provided by the bot. You will only need to do this once across all repos using our CLA.
38 | 
39 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
40 | For more information, see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or
41 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
42 | 


--------------------------------------------------------------------------------
/aml_config/conda_dependencies.yml:
--------------------------------------------------------------------------------
 1 | # Conda environment specification. The dependencies defined in this file will
 2 | # be automatically provisioned for managed runs. These include runs against
 3 | # the localdocker, remotedocker, and cluster compute targets.
 4 | 
 5 | # Note that this file is NOT used to automatically manage dependencies for the
 6 | # local compute target. To provision these dependencies locally, run:
 7 | # conda env update --file conda_dependencies.yml
 8 | 
 9 | # Details about the Conda environment file format:
10 | # https://conda.io/docs/using/envs.html#create-environment-file-by-hand
11 | 
12 | # For managing Spark packages and configuration, see spark_dependencies.yml.
13 | 
14 | name: project_environment
15 | dependencies:
16 |   # The python interpreter version.
17 |   # Currently Azure ML Workbench only supports 3.5.2.
18 |   - python=3.5.2
19 | 
20 |   # Required for Jupyter Notebooks.
21 |   - ipykernel=4.6.1
22 | 
23 |   - pip:
24 |     # The API for Azure Machine Learning Model Management Service.
25 |     # Details: https://github.com/Azure/Machine-Learning-Operationalization
26 |     - azure-common==1.1.8
27 |     - azure-storage==0.36.0
28 |     - azure-ml-api-sdk==0.1.0a11
29 |     - pandas
30 | 
31 |     # Helper utilities for dealing with Azure ML Workbench Assets.
32 |     - https://azuremldownloads.blob.core.windows.net/wheels/latest/azureml.assets-1.0.0-py3-none-any.whl?sv=2016-05-31&si=ro-2017&sr=c&sig=xnUdTm0B%2F%2FfknhTaRInBXyu2QTTt8wA3OsXwGVgU%2BJk%3D
33 | 


--------------------------------------------------------------------------------
/aml_config/docker.compute:
--------------------------------------------------------------------------------
 1 | # Defines a localdocker compute target that uses a local Docker container.
 2 | type: "localdocker"
 3 | 
 4 | # The base image for the Docker container. This is used to provision Spark and
 5 | # the Conda package manager. Supported based images are microsoft/mmlspark:plus
 6 | # variants. The default 0.7 version includes Spark 2.1.1.
 7 | baseDockerImage: "microsoft/mmlspark:plus-0.7.91"
 8 | 
 9 | # Azure ML Workbench uses the Docker shared volumes feature to improve run
10 | # performance and to enable the automatic mounting of the shared directory.
11 | # This Docker features isn't completely stable yet on Windows, and so it's
12 | # disabled by default to ensure compatibility.
13 | sharedVolumes: false
14 | 
15 | # The $AZUREML_NATIVE_SHARE_DIRECTORY environment variable inside runs points
16 | # at a persistent directory that is shared between all runs of the same project
17 | # on the same target. This specifies the base path for those directories.
18 | # Note that this is not available if sharedVolumes is false.
19 | nativeSharedDirectory: "~/.azureml/share/"
20 | 


--------------------------------------------------------------------------------
/aml_config/docker.runconfig:
--------------------------------------------------------------------------------
 1 | # The program name and arguments to run when they aren't specified through
 2 | # other means. The $file token is replaced with the currently selected file
 3 | # by the Workbench application.
 4 | ArgumentVector:
 5 |   - "$file"
 6 | 
 7 | # The name of the compute target to use for this run.
 8 | Target: "docker"
 9 | 
10 | # Environment variables set for the run.
11 | EnvironmentVariables:
12 |   "EXAMPLE_ENV_VAR": "Example Value"
13 | 
14 | # Framework to execute inside. Allowed values are "Python" and "PySpark".
15 | Framework: "PySpark"
16 | 
17 | # Path to the Conda dependencies file to use for this run. If a project
18 | # contains multiple programs with different sets of dependencies, it may be
19 | # convenient to manage those environments with separate files.
20 | CondaDependenciesFile: "aml_config/conda_dependencies.yml"
21 | 
22 | # Path to the Spark dependencies file to use for this run. If a project
23 | # contains multiple programs with different sets of dependencies, it may be
24 | # convenient to manage those environments with separate files.
25 | SparkDependenciesFile: "aml_config/spark_dependencies.yml"
26 | 
27 | # Automatically prepare the run environment as part of the run itself.
28 | # Manual preparation of a compute target can be perfomed with:
29 | # az ml experiment prepare --run-configuration <run configuration name>
30 | PrepareEnvironment: false
31 | 
32 | # Enable history tracking -- this allows status, logs, metrics, and outputs
33 | # to be collected by Azure ML Workbench and uploaded to the cloud project.
34 | TrackedRun: true
35 | 
36 | 


--------------------------------------------------------------------------------
/aml_config/jupyter_notebook_config.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for jupyter-notebook.
  2 | 
  3 | #------------------------------------------------------------------------------
  4 | # Application(SingletonConfigurable) configuration
  5 | #------------------------------------------------------------------------------
  6 | 
  7 | ## This is an application.
  8 | 
  9 | ## The date format used by logging formatters for %(asctime)s
 10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S'
 11 | 
 12 | ## The Logging format template
 13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s'
 14 | 
 15 | ## Set the log level by value or name.
 16 | #c.Application.log_level = 30
 17 | 
 18 | #------------------------------------------------------------------------------
 19 | # JupyterApp(Application) configuration
 20 | #------------------------------------------------------------------------------
 21 | 
 22 | ## Base class for Jupyter applications
 23 | 
 24 | ## Answer yes to any prompts.
 25 | #c.JupyterApp.answer_yes = False
 26 | 
 27 | ## Full path of a config file.
 28 | #c.JupyterApp.config_file = ''
 29 | 
 30 | ## Specify a config file to load.
 31 | #c.JupyterApp.config_file_name = ''
 32 | 
 33 | ## Generate default config file.
 34 | #c.JupyterApp.generate_config = False
 35 | 
 36 | #------------------------------------------------------------------------------
 37 | # NotebookApp(JupyterApp) configuration
 38 | #------------------------------------------------------------------------------
 39 | 
 40 | ## Set the Access-Control-Allow-Credentials: true header
 41 | #c.NotebookApp.allow_credentials = False
 42 | 
 43 | ## Set the Access-Control-Allow-Origin header
 44 | #  
 45 | #  Use '*' to allow any origin to access your server.
 46 | #  
 47 | #  Takes precedence over allow_origin_pat.
 48 | #c.NotebookApp.allow_origin = ''
 49 | 
 50 | ## Use a regular expression for the Access-Control-Allow-Origin header
 51 | #  
 52 | #  Requests from an origin matching the expression will get replies with:
 53 | #  
 54 | #      Access-Control-Allow-Origin: origin
 55 | #  
 56 | #  where `origin` is the origin of the request.
 57 | #  
 58 | #  Ignored if allow_origin is set.
 59 | #c.NotebookApp.allow_origin_pat = ''
 60 | 
 61 | ## Whether to allow the user to run the notebook as root.
 62 | #c.NotebookApp.allow_root = False
 63 | 
 64 | ## DEPRECATED use base_url
 65 | #c.NotebookApp.base_project_url = '/'
 66 | 
 67 | ## The base URL for the notebook server.
 68 | #  
 69 | #  Leading and trailing slashes can be omitted, and will automatically be added.
 70 | #c.NotebookApp.base_url = '/'
 71 | 
 72 | ## Specify what command to use to invoke a web browser when opening the notebook.
 73 | #  If not specified, the default browser will be determined by the `webbrowser`
 74 | #  standard library module, which allows setting of the BROWSER environment
 75 | #  variable to override it.
 76 | #c.NotebookApp.browser = ''
 77 | 
 78 | ## The full path to an SSL/TLS certificate file.
 79 | #c.NotebookApp.certfile = ''
 80 | 
 81 | ## The full path to a certificate authority certificate for SSL/TLS client
 82 | #  authentication.
 83 | #c.NotebookApp.client_ca = ''
 84 | 
 85 | ## The config manager class to use
 86 | #c.NotebookApp.config_manager_class = 'notebook.services.config.manager.ConfigManager'
 87 | 
 88 | ## The notebook manager class to use.
 89 | #c.NotebookApp.contents_manager_class = 'notebook.services.contents.largefilemanager.LargeFileManager'
 90 | 
 91 | ## Extra keyword arguments to pass to `set_secure_cookie`. See tornado's
 92 | #  set_secure_cookie docs for details.
 93 | #c.NotebookApp.cookie_options = {}
 94 | 
 95 | ## The random bytes used to secure cookies. By default this is a new random
 96 | #  number every time you start the Notebook. Set it to a value in a config file
 97 | #  to enable logins to persist across server sessions.
 98 | #  
 99 | #  Note: Cookie secrets should be kept private, do not share config files with
100 | #  cookie_secret stored in plaintext (you can read the value from a file).
101 | #c.NotebookApp.cookie_secret = b''
102 | 
103 | ## The file where the cookie secret is stored.
104 | #c.NotebookApp.cookie_secret_file = ''
105 | 
106 | ## The default URL to redirect to from `/`
107 | #c.NotebookApp.default_url = '/tree'
108 | 
109 | ## Disable cross-site-request-forgery protection
110 | #  
111 | #  Jupyter notebook 4.3.1 introduces protection from cross-site request
112 | #  forgeries, requiring API requests to either:
113 | #  
114 | #  - originate from pages served by this server (validated with XSRF cookie and
115 | #  token), or - authenticate with a token
116 | #  
117 | #  Some anonymous compute resources still desire the ability to run code,
118 | #  completely without authentication. These services can disable all
119 | #  authentication and security checks, with the full knowledge of what that
120 | #  implies.
121 | #c.NotebookApp.disable_check_xsrf = False
122 | 
123 | ## Whether to enable MathJax for typesetting math/TeX
124 | #  
125 | #  MathJax is the javascript library Jupyter uses to render math/LaTeX. It is
126 | #  very large, so you may want to disable it if you have a slow internet
127 | #  connection, or for offline use of the notebook.
128 | #  
129 | #  When disabled, equations etc. will appear as their untransformed TeX source.
130 | #c.NotebookApp.enable_mathjax = True
131 | 
132 | ## extra paths to look for Javascript notebook extensions
133 | #c.NotebookApp.extra_nbextensions_path = []
134 | 
135 | ## Extra paths to search for serving static files.
136 | #  
137 | #  This allows adding javascript/css to be available from the notebook server
138 | #  machine, or overriding individual files in the IPython
139 | #c.NotebookApp.extra_static_paths = []
140 | 
141 | ## Extra paths to search for serving jinja templates.
142 | #  
143 | #  Can be used to override templates from notebook.templates.
144 | #c.NotebookApp.extra_template_paths = []
145 | 
146 | ## 
147 | #c.NotebookApp.file_to_run = ''
148 | 
149 | ## Deprecated: Use minified JS file or not, mainly use during dev to avoid JS
150 | #  recompilation
151 | #c.NotebookApp.ignore_minified_js = False
152 | 
153 | ## (bytes/sec) Maximum rate at which messages can be sent on iopub before they
154 | #  are limited.
155 | #c.NotebookApp.iopub_data_rate_limit = 1000000
156 | 
157 | ## (msgs/sec) Maximum rate at which messages can be sent on iopub before they are
158 | #  limited.
159 | #c.NotebookApp.iopub_msg_rate_limit = 1000
160 | 
161 | ## The IP address the notebook server will listen on.
162 | #c.NotebookApp.ip = 'localhost'
163 | 
164 | ## Supply extra arguments that will be passed to Jinja environment.
165 | #c.NotebookApp.jinja_environment_options = {}
166 | 
167 | ## Extra variables to supply to jinja templates when rendering.
168 | #c.NotebookApp.jinja_template_vars = {}
169 | 
170 | ## The kernel manager class to use.
171 | #c.NotebookApp.kernel_manager_class = 'notebook.services.kernels.kernelmanager.MappingKernelManager'
172 | 
173 | ## The kernel spec manager class to use. Should be a subclass of
174 | #  `jupyter_client.kernelspec.KernelSpecManager`.
175 | #  
176 | #  The Api of KernelSpecManager is provisional and might change without warning
177 | #  between this version of Jupyter and the next stable one.
178 | #c.NotebookApp.kernel_spec_manager_class = 'jupyter_client.kernelspec.KernelSpecManager'
179 | 
180 | ## The full path to a private key file for usage with SSL/TLS.
181 | #c.NotebookApp.keyfile = ''
182 | 
183 | ## The login handler class to use.
184 | #c.NotebookApp.login_handler_class = 'notebook.auth.login.LoginHandler'
185 | 
186 | ## The logout handler class to use.
187 | #c.NotebookApp.logout_handler_class = 'notebook.auth.logout.LogoutHandler'
188 | 
189 | ## The MathJax.js configuration file that is to be used.
190 | #c.NotebookApp.mathjax_config = 'TeX-AMS-MML_HTMLorMML-full,Safe'
191 | 
192 | ## A custom url for MathJax.js. Should be in the form of a case-sensitive url to
193 | #  MathJax, for example:  /static/components/MathJax/MathJax.js
194 | #c.NotebookApp.mathjax_url = ''
195 | 
196 | ## Dict of Python modules to load as notebook server extensions.Entry values can
197 | #  be used to enable and disable the loading ofthe extensions. The extensions
198 | #  will be loaded in alphabetical order.
199 | #c.NotebookApp.nbserver_extensions = {}
200 | 
201 | ## The directory to use for notebooks and kernels.
202 | #c.NotebookApp.notebook_dir = ''
203 | 
204 | ## Whether to open in a browser after starting. The specific browser used is
205 | #  platform dependent and determined by the python standard library `webbrowser`
206 | #  module, unless it is overridden using the --browser (NotebookApp.browser)
207 | #  configuration option.
208 | #c.NotebookApp.open_browser = True
209 | 
210 | ## Hashed password to use for web authentication.
211 | #  
212 | #  To generate, type in a python/IPython shell:
213 | #  
214 | #    from notebook.auth import passwd; passwd()
215 | #  
216 | #  The string should be of the form type:salt:hashed-password.
217 | #c.NotebookApp.password = ''
218 | 
219 | ## Forces users to use a password for the Notebook server. This is useful in a
220 | #  multi user environment, for instance when everybody in the LAN can access each
221 | #  other's machine though ssh.
222 | #  
223 | #  In such a case, server the notebook server on localhost is not secure since
224 | #  any user can connect to the notebook server via ssh.
225 | #c.NotebookApp.password_required = False
226 | 
227 | ## The port the notebook server will listen on.
228 | #c.NotebookApp.port = 8888
229 | 
230 | ## The number of additional ports to try if the specified port is not available.
231 | #c.NotebookApp.port_retries = 50
232 | 
233 | ## DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib.
234 | #c.NotebookApp.pylab = 'disabled'
235 | 
236 | ## (sec) Time window used to  check the message and data rate limits.
237 | #c.NotebookApp.rate_limit_window = 3
238 | 
239 | ## Reraise exceptions encountered loading server extensions?
240 | #c.NotebookApp.reraise_server_extension_failures = False
241 | 
242 | ## DEPRECATED use the nbserver_extensions dict instead
243 | #c.NotebookApp.server_extensions = []
244 | 
245 | ## The session manager class to use.
246 | #c.NotebookApp.session_manager_class = 'notebook.services.sessions.sessionmanager.SessionManager'
247 | 
248 | ## Supply SSL options for the tornado HTTPServer. See the tornado docs for
249 | #  details.
250 | #c.NotebookApp.ssl_options = {}
251 | 
252 | ## Supply overrides for terminado. Currently only supports "shell_command".
253 | #c.NotebookApp.terminado_settings = {}
254 | 
255 | ## Token used for authenticating first-time connections to the server.
256 | #  
257 | #  When no password is enabled, the default is to generate a new, random token.
258 | #  
259 | #  Setting to an empty string disables authentication altogether, which is NOT
260 | #  RECOMMENDED.
261 | #c.NotebookApp.token = '<generated>'
262 | 
263 | ## Supply overrides for the tornado.web.Application that the Jupyter notebook
264 | #  uses.
265 | #c.NotebookApp.tornado_settings = {}
266 | 
267 | ## Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded-
268 | #  For headerssent by the upstream reverse proxy. Necessary if the proxy handles
269 | #  SSL
270 | #c.NotebookApp.trust_xheaders = False
271 | 
272 | ## DEPRECATED, use tornado_settings
273 | #c.NotebookApp.webapp_settings = {}
274 | 
275 | ## The base URL for websockets, if it differs from the HTTP server (hint: it
276 | #  almost certainly doesn't).
277 | #  
278 | #  Should be in the form of an HTTP origin: ws[s]://hostname[:port]
279 | #c.NotebookApp.websocket_url = ''
280 | 
281 | #------------------------------------------------------------------------------
282 | # ConnectionFileMixin(LoggingConfigurable) configuration
283 | #------------------------------------------------------------------------------
284 | 
285 | ## Mixin for configurable classes that work with connection files
286 | 
287 | ## JSON file in which to store connection info [default: kernel-<pid>.json]
288 | #  
289 | #  This file will contain the IP, ports, and authentication key needed to connect
290 | #  clients to this kernel. By default, this file will be created in the security
291 | #  dir of the current profile, but can be specified by absolute path.
292 | #c.ConnectionFileMixin.connection_file = ''
293 | 
294 | ## set the control (ROUTER) port [default: random]
295 | #c.ConnectionFileMixin.control_port = 0
296 | 
297 | ## set the heartbeat port [default: random]
298 | #c.ConnectionFileMixin.hb_port = 0
299 | 
300 | ## set the iopub (PUB) port [default: random]
301 | #c.ConnectionFileMixin.iopub_port = 0
302 | 
303 | ## Set the kernel's IP address [default localhost]. If the IP address is
304 | #  something other than localhost, then Consoles on other machines will be able
305 | #  to connect to the Kernel, so be careful!
306 | #c.ConnectionFileMixin.ip = ''
307 | 
308 | ## set the shell (ROUTER) port [default: random]
309 | #c.ConnectionFileMixin.shell_port = 0
310 | 
311 | ## set the stdin (ROUTER) port [default: random]
312 | #c.ConnectionFileMixin.stdin_port = 0
313 | 
314 | ## 
315 | #c.ConnectionFileMixin.transport = 'tcp'
316 | 
317 | #------------------------------------------------------------------------------
318 | # KernelManager(ConnectionFileMixin) configuration
319 | #------------------------------------------------------------------------------
320 | 
321 | ## Manages a single kernel in a subprocess on this host.
322 | #  
323 | #  This version starts kernels with Popen.
324 | 
325 | ## Should we autorestart the kernel if it dies.
326 | #c.KernelManager.autorestart = True
327 | 
328 | ## DEPRECATED: Use kernel_name instead.
329 | #  
330 | #  The Popen Command to launch the kernel. Override this if you have a custom
331 | #  kernel. If kernel_cmd is specified in a configuration file, Jupyter does not
332 | #  pass any arguments to the kernel, because it cannot make any assumptions about
333 | #  the arguments that the kernel understands. In particular, this means that the
334 | #  kernel does not receive the option --debug if it given on the Jupyter command
335 | #  line.
336 | #c.KernelManager.kernel_cmd = []
337 | 
338 | ## Time to wait for a kernel to terminate before killing it, in seconds.
339 | #c.KernelManager.shutdown_wait_time = 5.0
340 | 
341 | #------------------------------------------------------------------------------
342 | # Session(Configurable) configuration
343 | #------------------------------------------------------------------------------
344 | 
345 | ## Object for handling serialization and sending of messages.
346 | #  
347 | #  The Session object handles building messages and sending them with ZMQ sockets
348 | #  or ZMQStream objects.  Objects can communicate with each other over the
349 | #  network via Session objects, and only need to work with the dict-based IPython
350 | #  message spec. The Session will handle serialization/deserialization, security,
351 | #  and metadata.
352 | #  
353 | #  Sessions support configurable serialization via packer/unpacker traits, and
354 | #  signing with HMAC digests via the key/keyfile traits.
355 | #  
356 | #  Parameters ----------
357 | #  
358 | #  debug : bool
359 | #      whether to trigger extra debugging statements
360 | #  packer/unpacker : str : 'json', 'pickle' or import_string
361 | #      importstrings for methods to serialize message parts.  If just
362 | #      'json' or 'pickle', predefined JSON and pickle packers will be used.
363 | #      Otherwise, the entire importstring must be used.
364 | #  
365 | #      The functions must accept at least valid JSON input, and output *bytes*.
366 | #  
367 | #      For example, to use msgpack:
368 | #      packer = 'msgpack.packb', unpacker='msgpack.unpackb'
369 | #  pack/unpack : callables
370 | #      You can also set the pack/unpack callables for serialization directly.
371 | #  session : bytes
372 | #      the ID of this Session object.  The default is to generate a new UUID.
373 | #  username : unicode
374 | #      username added to message headers.  The default is to ask the OS.
375 | #  key : bytes
376 | #      The key used to initialize an HMAC signature.  If unset, messages
377 | #      will not be signed or checked.
378 | #  keyfile : filepath
379 | #      The file containing a key.  If this is set, `key` will be initialized
380 | #      to the contents of the file.
381 | 
382 | ## Threshold (in bytes) beyond which an object's buffer should be extracted to
383 | #  avoid pickling.
384 | #c.Session.buffer_threshold = 1024
385 | 
386 | ## Whether to check PID to protect against calls after fork.
387 | #  
388 | #  This check can be disabled if fork-safety is handled elsewhere.
389 | #c.Session.check_pid = True
390 | 
391 | ## Threshold (in bytes) beyond which a buffer should be sent without copying.
392 | #c.Session.copy_threshold = 65536
393 | 
394 | ## Debug output in the Session
395 | #c.Session.debug = False
396 | 
397 | ## The maximum number of digests to remember.
398 | #  
399 | #  The digest history will be culled when it exceeds this value.
400 | #c.Session.digest_history_size = 65536
401 | 
402 | ## The maximum number of items for a container to be introspected for custom
403 | #  serialization. Containers larger than this are pickled outright.
404 | #c.Session.item_threshold = 64
405 | 
406 | ## execution key, for signing messages.
407 | #c.Session.key = b''
408 | 
409 | ## path to file containing execution key.
410 | #c.Session.keyfile = ''
411 | 
412 | ## Metadata dictionary, which serves as the default top-level metadata dict for
413 | #  each message.
414 | #c.Session.metadata = {}
415 | 
416 | ## The name of the packer for serializing messages. Should be one of 'json',
417 | #  'pickle', or an import name for a custom callable serializer.
418 | #c.Session.packer = 'json'
419 | 
420 | ## The UUID identifying this session.
421 | #c.Session.session = ''
422 | 
423 | ## The digest scheme used to construct the message signatures. Must have the form
424 | #  'hmac-HASH'.
425 | #c.Session.signature_scheme = 'hmac-sha256'
426 | 
427 | ## The name of the unpacker for unserializing messages. Only used with custom
428 | #  functions for `packer`.
429 | #c.Session.unpacker = 'json'
430 | 
431 | ## Username for the Session. Default is your system username.
432 | #c.Session.username = 'username'
433 | 
434 | #------------------------------------------------------------------------------
435 | # MultiKernelManager(LoggingConfigurable) configuration
436 | #------------------------------------------------------------------------------
437 | 
438 | ## A class for managing multiple kernels.
439 | 
440 | ## The name of the default kernel to start
441 | #c.MultiKernelManager.default_kernel_name = 'python3'
442 | 
443 | ## The kernel manager class.  This is configurable to allow subclassing of the
444 | #  KernelManager for customized behavior.
445 | #c.MultiKernelManager.kernel_manager_class = 'jupyter_client.ioloop.IOLoopKernelManager'
446 | 
447 | #------------------------------------------------------------------------------
448 | # MappingKernelManager(MultiKernelManager) configuration
449 | #------------------------------------------------------------------------------
450 | 
451 | ## A KernelManager that handles notebook mapping and HTTP error handling
452 | 
453 | ## 
454 | #c.MappingKernelManager.root_dir = ''
455 | 
456 | #------------------------------------------------------------------------------
457 | # ContentsManager(LoggingConfigurable) configuration
458 | #------------------------------------------------------------------------------
459 | 
460 | ## Base class for serving files and directories.
461 | #  
462 | #  This serves any text or binary file, as well as directories, with special
463 | #  handling for JSON notebook documents.
464 | #  
465 | #  Most APIs take a path argument, which is always an API-style unicode path, and
466 | #  always refers to a directory.
467 | #  
468 | #  - unicode, not url-escaped
469 | #  - '/'-separated
470 | #  - leading and trailing '/' will be stripped
471 | #  - if unspecified, path defaults to '',
472 | #    indicating the root path.
473 | 
474 | ## 
475 | #c.ContentsManager.checkpoints = None
476 | 
477 | ## 
478 | #c.ContentsManager.checkpoints_class = 'notebook.services.contents.checkpoints.Checkpoints'
479 | 
480 | ## 
481 | #c.ContentsManager.checkpoints_kwargs = {}
482 | 
483 | ## Glob patterns to hide in file and directory listings.
484 | #c.ContentsManager.hide_globs = ['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dylib', '*~']
485 | 
486 | ## Python callable or importstring thereof
487 | #  
488 | #  To be called on a contents model prior to save.
489 | #  
490 | #  This can be used to process the structure, such as removing notebook outputs
491 | #  or other side effects that should not be saved.
492 | #  
493 | #  It will be called as (all arguments passed by keyword)::
494 | #  
495 | #      hook(path=path, model=model, contents_manager=self)
496 | #  
497 | #  - model: the model to be saved. Includes file contents.
498 | #    Modifying this dict will affect the file that is stored.
499 | #  - path: the API path of the save destination
500 | #  - contents_manager: this ContentsManager instance
501 | #c.ContentsManager.pre_save_hook = None
502 | 
503 | ## 
504 | #c.ContentsManager.root_dir = '/'
505 | 
506 | ## The base name used when creating untitled directories.
507 | #c.ContentsManager.untitled_directory = 'Untitled Folder'
508 | 
509 | ## The base name used when creating untitled files.
510 | #c.ContentsManager.untitled_file = 'untitled'
511 | 
512 | ## The base name used when creating untitled notebooks.
513 | #c.ContentsManager.untitled_notebook = 'Untitled'
514 | 
515 | #------------------------------------------------------------------------------
516 | # FileManagerMixin(Configurable) configuration
517 | #------------------------------------------------------------------------------
518 | 
519 | ## Mixin for ContentsAPI classes that interact with the filesystem.
520 | #  
521 | #  Provides facilities for reading, writing, and copying both notebooks and
522 | #  generic files.
523 | #  
524 | #  Shared by FileContentsManager and FileCheckpoints.
525 | #  
526 | #  Note ---- Classes using this mixin must provide the following attributes:
527 | #  
528 | #  root_dir : unicode
529 | #      A directory against against which API-style paths are to be resolved.
530 | #  
531 | #  log : logging.Logger
532 | 
533 | ## By default notebooks are saved on disk on a temporary file and then if
534 | #  succefully written, it replaces the old ones. This procedure, namely
535 | #  'atomic_writing', causes some bugs on file system whitout operation order
536 | #  enforcement (like some networked fs). If set to False, the new notebook is
537 | #  written directly on the old one which could fail (eg: full filesystem or quota
538 | #  )
539 | #c.FileManagerMixin.use_atomic_writing = True
540 | 
541 | #------------------------------------------------------------------------------
542 | # FileContentsManager(FileManagerMixin,ContentsManager) configuration
543 | #------------------------------------------------------------------------------
544 | 
545 | ## Python callable or importstring thereof
546 | #  
547 | #  to be called on the path of a file just saved.
548 | #  
549 | #  This can be used to process the file on disk, such as converting the notebook
550 | #  to a script or HTML via nbconvert.
551 | #  
552 | #  It will be called as (all arguments passed by keyword)::
553 | #  
554 | #      hook(os_path=os_path, model=model, contents_manager=instance)
555 | #  
556 | #  - path: the filesystem path to the file just written - model: the model
557 | #  representing the file - contents_manager: this ContentsManager instance
558 | #c.FileContentsManager.post_save_hook = None
559 | 
560 | ## 
561 | #c.FileContentsManager.root_dir = ''
562 | 
563 | ## DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0
564 | #c.FileContentsManager.save_script = False
565 | 
566 | #------------------------------------------------------------------------------
567 | # NotebookNotary(LoggingConfigurable) configuration
568 | #------------------------------------------------------------------------------
569 | 
570 | ## A class for computing and verifying notebook signatures.
571 | 
572 | ## The hashing algorithm used to sign notebooks.
573 | #c.NotebookNotary.algorithm = 'sha256'
574 | 
575 | ## The sqlite file in which to store notebook signatures. By default, this will
576 | #  be in your Jupyter data directory. You can set it to ':memory:' to disable
577 | #  sqlite writing to the filesystem.
578 | #c.NotebookNotary.db_file = ''
579 | 
580 | ## The secret key with which notebooks are signed.
581 | #c.NotebookNotary.secret = b''
582 | 
583 | ## The file where the secret key is stored.
584 | #c.NotebookNotary.secret_file = ''
585 | 
586 | ## A callable returning the storage backend for notebook signatures. The default
587 | #  uses an SQLite database.
588 | #c.NotebookNotary.store_factory = traitlets.Undefined
589 | 
590 | #------------------------------------------------------------------------------
591 | # KernelSpecManager(LoggingConfigurable) configuration
592 | #------------------------------------------------------------------------------
593 | 
594 | ## If there is no Python kernelspec registered and the IPython kernel is
595 | #  available, ensure it is added to the spec list.
596 | #c.KernelSpecManager.ensure_native_kernel = True
597 | 
598 | ## The kernel spec class.  This is configurable to allow subclassing of the
599 | #  KernelSpecManager for customized behavior.
600 | #c.KernelSpecManager.kernel_spec_class = 'jupyter_client.kernelspec.KernelSpec'
601 | 
602 | ## Whitelist of allowed kernel names.
603 | #  
604 | #  By default, all installed kernels are allowed.
605 | #c.KernelSpecManager.whitelist = set()
606 | 


--------------------------------------------------------------------------------
/aml_config/local.compute:
--------------------------------------------------------------------------------
 1 | # Defines a local compute target that uses an existing python environment.
 2 | type: "local"
 3 | 
 4 | # Specifies the user-managed python environment for the run. By default this
 5 | # is "python" which uses the currently active python environment. The Azure ML
 6 | # Workbench will use the python environment installed with it and the Azure ML
 7 | # CLI will use whatever python environment it was installed into.
 8 | #
 9 | # You can change this to point at any python environment on your system,
10 | # including virtual environments and Conda environments. Note that backslashes
11 | # need to be escaped in this path, so it's easier to use forward slashes.
12 | pythonLocation: "python"
13 | 
14 | # Specifies the path to spark-submit for local Spark runs. By default this
15 | # assumes that Spark is on the path.
16 | sparkSubmitLocation: "spark-submit"
17 | 
18 | # The $AZUREML_NATIVE_SHARE_DIRECTORY environment variable inside runs points
19 | # at a persistent directory that is shared between all runs of the same project
20 | # on the same target. This specifies the base path for those directories.
21 | nativeSharedDirectory: "~/.azureml/share/"


--------------------------------------------------------------------------------
/aml_config/local.runconfig:
--------------------------------------------------------------------------------
 1 | # The program name and arguments to run when they aren't specified through
 2 | # other means. The $file token is replaced with the currently selected file
 3 | # by the Workbench application.
 4 | ArgumentVector:
 5 |   - "$file"
 6 | 
 7 | # The name of the compute target to use for this run.
 8 | Target: "local"
 9 | 
10 | # Environment variables set for the run.
11 | EnvironmentVariables:
12 |   "EXAMPLE_ENV_VAR": "Example Value"
13 | 
14 | # Framework to execute inside. Allowed values are "Python" and "PySpark".
15 | Framework: "Python"
16 | 
17 | # Path to the Conda dependencies file to use for this run. If a project
18 | # contains multiple programs with different sets of dependencies, it may be
19 | # convenient to manage those environments with separate files.
20 | CondaDependenciesFile: "aml_config/conda_dependencies.yml"
21 | 
22 | # Path to the Spark dependencies file to use for this run. If a project
23 | # contains multiple programs with different sets of dependencies, it may be
24 | # convenient to manage those environments with separate files.
25 | SparkDependenciesFile: "aml_config/spark_dependencies.yml"
26 | 
27 | # Automatically prepare the run environment as part of the run itself.
28 | # Manual preparation of a compute target can be perfomed with:
29 | # az ml experiment prepare --run-configuration <run configuration name>
30 | PrepareEnvironment: false
31 | 
32 | # Enable history tracking -- this allows status, logs, metrics, and outputs
33 | # to be collected by Azure ML Workbench and uploaded to the cloud project.
34 | TrackedRun: true


--------------------------------------------------------------------------------
/aml_config/spark_dependencies.yml:
--------------------------------------------------------------------------------
 1 | # Spark configuration and packages specification. The dependencies defined in
 2 | # this file will be automatically provisioned for runs that use Spark.
 3 | 
 4 | # For managing third-party python libraries, see conda_dependencies.yml.
 5 | 
 6 | # Spark configuration properties.
 7 | configuration:
 8 |   "spark.app.name": "Azure ML Experiment"
 9 |   "spark.yarn.maxAppAttempts": 1
10 | 
11 | # Repositories to search for the specified Spark packages.
12 | repositories:
13 |   - "https://mmlspark.azureedge.net/maven"
14 | 
15 | # Spark packages to include in the run.
16 | packages:
17 |   # Microsoft Machine Learning for Apache Spark provides a number of deep
18 |   # learning and data science tools, including seamless integration of Spark
19 |   # Machine Learning pipelines with Microsoft Cognitive Toolkit (CNTK) and
20 |   # OpenCV, enabling you to quickly create powerful, highly-scalable
21 |   # predictive and analytical models for large image and text datasets.
22 |   # Details: https://github.com/Azure/mmlspark
23 |   - group: "com.microsoft.ml.spark"
24 |     artifact: "mmlspark_2.11"
25 |     version: "0.7.91"
26 | 
27 |   #	Required for SQL Server data sources.
28 |   - group: "com.microsoft.sqlserver"
29 |     artifact: "mssql-jdbc"
30 |     version: "6.2.1.jre8"
31 | 


--------------------------------------------------------------------------------
/docs/Images/example_labels.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/MachineLearningSamples-AerialImageClassification/ffce6171015bd9669c433c6c86e9b82c71dafbf0/docs/Images/example_labels.PNG


--------------------------------------------------------------------------------
/docs/Images/middlesex_ma.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/MachineLearningSamples-AerialImageClassification/ffce6171015bd9669c433c6c86e9b82c71dafbf0/docs/Images/middlesex_ma.png


--------------------------------------------------------------------------------
/docs/Images/sample_tile_developed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/MachineLearningSamples-AerialImageClassification/ffce6171015bd9669c433c6c86e9b82c71dafbf0/docs/Images/sample_tile_developed.png


--------------------------------------------------------------------------------
/docs/Images/scenario_schematic.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Azure-Samples/MachineLearningSamples-AerialImageClassification/ffce6171015bd9669c433c6c86e9b82c71dafbf0/docs/Images/scenario_schematic.PNG


--------------------------------------------------------------------------------