├── .gitignore ├── Code ├── 01_Data_Acquisition_and_Understanding │ ├── 01_HDInsight_Spark_Provisioning │ │ └── template.json │ └── 02_Batch_AI_Training_Provisioning │ │ ├── prep_nfs.sh │ │ └── retrain_model_distributed.py ├── 02_Modeling │ ├── run_batch_ai.py │ └── run_mmlspark.py ├── 03_Deployment │ └── batch_score_spark.py ├── 04_Result_Analysis │ ├── Model prediction analysis.ipynb │ └── analysis_config_loader.py └── settings.cfg ├── LICENSE.TXT ├── README.md ├── aml_config ├── conda_dependencies.yml ├── docker.compute ├── docker.runconfig ├── jupyter_notebook_config.py ├── local.compute ├── local.runconfig └── spark_dependencies.yml └── docs └── Images ├── example_labels.PNG ├── middlesex_ma.png ├── sample_tile_developed.png └── scenario_schematic.PNG /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /Code/01_Data_Acquisition_and_Understanding/01_HDInsight_Spark_Provisioning/template.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://schema.management.azure.com/schemas/2014-04-01-preview/deploymentTemplate.json#", 3 | "contentVersion": "0.9.0.0", 4 | "parameters": { 5 | "storageAccountKey": { 6 | "type": "string", 7 | "metadata": { 8 | "description": "The access key for the storage account." 9 | } 10 | },"storageAccountName": { 11 | "type": "string", 12 | "metadata": { 13 | "description": "The access key for the storage account." 14 | } 15 | },"clusterName": { 16 | "type": "string", 17 | "metadata": { 18 | "description": "The name of the HDInsight cluster to create." 19 | } 20 | }, 21 | "clusterLoginUserName": { 22 | "type": "string", 23 | "defaultValue": "admin", 24 | "metadata": { 25 | "description": "These credentials can be used to submit jobs to the cluster and to log into cluster dashboards." 26 | } 27 | }, 28 | "clusterLoginPassword": { 29 | "type": "securestring", 30 | "metadata": { 31 | "description": "The password must be at least 10 characters in length and must contain at least one digit, one non-alphanumeric character, and one upper or lower case letter." 32 | } 33 | }, 34 | "location": { 35 | "type": "string", 36 | "defaultValue": "eastus", 37 | "metadata": { 38 | "description": "The location where all azure resources will be deployed." 39 | } 40 | }, 41 | "clusterVersion": { 42 | "type": "string", 43 | "defaultValue": "3.6", 44 | "metadata": { 45 | "description": "HDInsight cluster version." 46 | } 47 | }, 48 | "clusterWorkerNodeCount": { 49 | "type": "int", 50 | "defaultValue": 40, 51 | "metadata": { 52 | "description": "The number of nodes in the HDInsight cluster." 53 | } 54 | }, 55 | "clusterKind": { 56 | "type": "string", 57 | "defaultValue": "SPARK", 58 | "metadata": { 59 | "description": "The type of the HDInsight cluster to create." 60 | } 61 | }, 62 | "sshUserName": { 63 | "type": "string", 64 | "defaultValue": "sshuser", 65 | "metadata": { 66 | "description": "These credentials can be used to remotely access the cluster." 67 | } 68 | } 69 | }, 70 | "resources": [ 71 | { 72 | "apiVersion": "2015-03-01-preview", 73 | "name": "[parameters('clusterName')]", 74 | "type": "Microsoft.HDInsight/clusters", 75 | "location": "[parameters('location')]", 76 | "dependsOn": [], 77 | "properties": { 78 | "clusterVersion": "[parameters('clusterVersion')]", 79 | "osType": "Linux", 80 | "tier": "standard", 81 | "clusterDefinition": { 82 | "kind": "[parameters('clusterKind')]", 83 | "configurations": { 84 | "gateway": { 85 | "restAuthCredential.isEnabled": true, 86 | "restAuthCredential.username": "[parameters('clusterLoginUserName')]", 87 | "restAuthCredential.password": "[parameters('clusterLoginPassword')]" 88 | } 89 | } 90 | }, 91 | "storageProfile": { 92 | "storageaccounts": [ 93 | { 94 | "name": "[parameters('storageAccountName')]", 95 | "isDefault": true, 96 | "container": "cluster", 97 | "key": "[parameters('storageAccountKey')]" 98 | } 99 | ] 100 | }, 101 | "computeProfile": { 102 | "roles": [ 103 | { 104 | "name": "headnode", 105 | "minInstanceCount": 1, 106 | "targetInstanceCount": 2, 107 | "hardwareProfile": { 108 | "vmSize": "Standard_D12_V2" 109 | }, 110 | "osProfile": { 111 | "linuxOperatingSystemProfile": { 112 | "username": "[parameters('sshUserName')]", 113 | "password": "[parameters('clusterLoginPassword')]" 114 | } 115 | }, 116 | "virtualNetworkProfile": null, 117 | "scriptActions": [ 118 | { 119 | "name": "mmlspark", 120 | "uri": "https://mmlspark.azureedge.net/buildartifacts/0.11/install-mmlspark.sh", 121 | "parameters": "", 122 | "isHeadNode": true, 123 | "isWorkerNode": true, 124 | "isPersisted": true, 125 | "isZookeeperNode": false, 126 | "isEdgeNode": false, 127 | "applicationName": null 128 | } 129 | ] 130 | }, 131 | { 132 | "name": "workernode", 133 | "minInstanceCount": 1, 134 | "targetInstanceCount": "[parameters('clusterWorkerNodeCount')]", 135 | "hardwareProfile": { 136 | "vmSize": "Standard_D4_V2" 137 | }, 138 | "osProfile": { 139 | "linuxOperatingSystemProfile": { 140 | "username": "[parameters('sshUserName')]", 141 | "password": "[parameters('clusterLoginPassword')]" 142 | } 143 | }, 144 | "virtualNetworkProfile": null, 145 | "scriptActions": [ 146 | { 147 | "name": "mmlspark", 148 | "uri": "https://mmlspark.azureedge.net/buildartifacts/0.11/install-mmlspark.sh", 149 | "parameters": "", 150 | "isHeadNode": true, 151 | "isWorkerNode": true, 152 | "isPersisted": true, 153 | "isZookeeperNode": false, 154 | "isEdgeNode": false, 155 | "applicationName": null 156 | } 157 | ] 158 | } 159 | ] 160 | } 161 | } 162 | } 163 | ] 164 | } 165 | -------------------------------------------------------------------------------- /Code/01_Data_Acquisition_and_Understanding/02_Batch_AI_Training_Provisioning/prep_nfs.sh: -------------------------------------------------------------------------------- 1 | sudo apt-get update 2 | sudo apt-get install unzip 3 | mkdir -p /data/training_images 4 | mkdir -p /data/validation_images 5 | wget https://mawahstorage.blob.core.windows.net/aerialimageclassification/imagesets/balanced_training_set.zip 6 | wget https://mawahstorage.blob.core.windows.net/aerialimageclassification/imagesets/balanced_validation_set.zip 7 | unzip balanced_validation_set.zip -d /data/validation_images 8 | unzip balanced_training_set.zip -d /data/training_images 9 | -------------------------------------------------------------------------------- /Code/01_Data_Acquisition_and_Understanding/02_Batch_AI_Training_Provisioning/retrain_model_distributed.py: -------------------------------------------------------------------------------- 1 | ''' 2 | retrain_model_distributed.py 3 | by Mary Wahl, 2017 4 | Copyright Microsoft, all rights reserved 5 | 6 | Retrain AlexNet and ResNet 18 models to classify aerial images by land use. 7 | Makes use of distributed learners. 8 | Expects the following parameters: 9 | - input_dir: The parent directory containing training images. This 10 | directory should contain only subdirectories (whose names 11 | will be used as the class labels). Each subdirectory should 12 | contain only image files and should not be empty. 13 | - validation_dir: The parent directory containing validation images, similar 14 | in contents to input_dir. 15 | - output_model_name: The filepath where the retrained model will be stored. 16 | Supporting files will be stored to the same directory. 17 | - model_path: The location of the pretrained AlexNet or ResNet 18 model 18 | - retraining_type: Must be "last_only", "fully_connected", or "all". Cannot 19 | use retraining type "fully_connected" with model type 20 | "resnet18" 21 | - model_type: Must be "alexnet" or "resnet18" 22 | 23 | Side effects: 24 | This script will create a temporary directory, in which it will write MAP 25 | files The directory will be removed on completion. 26 | ''' 27 | 28 | import numpy as np 29 | import pandas as pd 30 | import os, argparse, glob, tempfile, cntk 31 | from cntk.io import transforms as xforms 32 | import cntk.train.distributed as distributed 33 | from cntk.train.training_session import CheckpointConfig, training_session 34 | from PIL import Image 35 | 36 | 37 | def write_map_file(map_filename, input_dir, output_dir): 38 | ''' 39 | Writes the map file required by ImageDeserializer. Returns the number of 40 | distinct classes found in the training set. 41 | ''' 42 | df = pd.DataFrame([]) 43 | df['filename'] = list(glob.iglob(os.path.join(input_dir, '*', '*'))) 44 | df['label'] = df['filename'].apply(lambda x: 45 | os.path.basename(os.path.dirname(x))) 46 | labels = list(np.sort(df['label'].unique().tolist())) 47 | with open(os.path.join(output_dir, 'labels_to_inds.tsv'), 'w') as f: 48 | for i, label in enumerate(labels): 49 | f.write('{}\t{}\n'.format(label, i)) 50 | df['idx'] = df['label'].apply(lambda x: labels.index(x)) 51 | df = df[['filename', 'idx']].sample(frac=1) 52 | df.to_csv(map_filename, index=False, sep='\t', header=False) 53 | return(len(labels), len(df.index)) 54 | 55 | 56 | def create_minibatch_source(map_filename, num_classes): 57 | transforms = [xforms.crop(crop_type='randomside', 58 | side_ratio=0.85, 59 | jitter_type='uniratio'), 60 | xforms.scale(width=224, 61 | height=224, 62 | channels=3, 63 | interpolations='linear'), 64 | xforms.color(brightness_radius=0.2, 65 | contrast_radius=0.2, 66 | saturation_radius=0.2)] 67 | return(cntk.io.MinibatchSource(cntk.io.ImageDeserializer( 68 | map_filename, 69 | cntk.io.StreamDefs( 70 | features=cntk.io.StreamDef( 71 | field='image', transforms=transforms, is_sparse=False), 72 | labels=cntk.io.StreamDef( 73 | field='label', shape=num_classes, is_sparse=False))))) 74 | 75 | 76 | def load_alexnet_model(image_input, num_classes, model_filename, 77 | retraining_type): 78 | ''' Load pretrained AlexNet for desired level of retraining ''' 79 | loaded_model = cntk.load_model(model_filename) 80 | 81 | # Load the convolutional layers, freezing if desired 82 | feature_node = cntk.logging.graph.find_by_name(loaded_model, 'features') 83 | last_conv_node = cntk.logging.graph.find_by_name(loaded_model, 'conv5.y') 84 | conv_layers = cntk.ops.combine([last_conv_node.owner]).clone( 85 | cntk.ops.functions.CloneMethod.clone if retraining_type == 'all' \ 86 | else cntk.ops.functions.CloneMethod.freeze, 87 | {feature_node: cntk.ops.placeholder()}) 88 | 89 | # Load the fully connected layers, freezing if desired 90 | last_node = cntk.logging.graph.find_by_name(loaded_model, 'h2_d') 91 | fully_connected_layers = cntk.ops.combine([last_node.owner]).clone( 92 | cntk.ops.functions.CloneMethod.freeze if retraining_type == \ 93 | 'last_only' else cntk.ops.functions.CloneMethod.clone, 94 | {last_conv_node: cntk.ops.placeholder()}) 95 | 96 | # Define the network using the loaded layers 97 | feat_norm = image_input - cntk.layers.Constant(114) 98 | conv_out = conv_layers(feat_norm) 99 | fc_out = fully_connected_layers(conv_out) 100 | new_model = cntk.layers.Dense(shape=num_classes, name='lastlayer')(fc_out) 101 | return(new_model) 102 | 103 | 104 | def load_resnet18_model(image_input, num_classes, model_filename, 105 | retraining_type): 106 | ''' Load pretrained ResNet18 for desired level of retraining ''' 107 | 108 | # Load existing layers, freezing as desired 109 | loaded_model = cntk.load_model(model_filename) 110 | feature_node = cntk.logging.graph.find_by_name(loaded_model, 'features') 111 | last_node = cntk.logging.graph.find_by_name(loaded_model, 'z.x') 112 | cloned_layers = cntk.ops.combine([last_node.owner]).clone( 113 | cntk.ops.functions.CloneMethod.freeze if retraining_type == \ 114 | 'last_only' else cntk.ops.functions.CloneMethod.clone, 115 | {feature_node: cntk.ops.placeholder()}) 116 | 117 | # Define the network using the loaded layers 118 | feat_norm = image_input - cntk.layers.Constant(114) 119 | cloned_out = cloned_layers(feat_norm) 120 | W = cntk.ops.parameter(shape=(512, 1, 1, num_classes), 121 | init=cntk.initializer.glorot_uniform()) 122 | b = cntk.ops.parameter(shape=num_classes, init=0) 123 | new_model = cntk.ops.plus(cntk.ops.times(cloned_out, W, name='lasttimes'), 124 | b, name='lastplus') 125 | return(new_model) 126 | 127 | 128 | def retrain_model(map_filename, output_dir, num_classes, epoch_size, 129 | model_filename, num_epochs, model_type, retraining_type): 130 | ''' Coordinates retraining after MAP file creation ''' 131 | 132 | # load minibatch and model 133 | minibatch_source = create_minibatch_source(map_filename, num_classes) 134 | 135 | image_input = cntk.ops.input_variable((3, 224, 224)) 136 | label_input = cntk.ops.input_variable((num_classes)) 137 | input_map = {image_input: minibatch_source.streams.features, 138 | label_input: minibatch_source.streams.labels} 139 | 140 | if model_type == 'alexnet': 141 | model = load_alexnet_model(image_input, num_classes, model_filename, 142 | retraining_type) 143 | elif model_type == 'resnet18': 144 | model = load_resnet18_model(image_input, num_classes, model_filename, 145 | retraining_type) 146 | 147 | # Set learning parameters 148 | ce = cntk.losses.cross_entropy_with_softmax(model, label_input) 149 | pe = cntk.metrics.classification_error(model, label_input) 150 | l2_reg_weight = 0.0005 151 | lr_per_sample = [0.00001] * 33 + [0.000001] * 33 + [0.0000001] 152 | momentum_time_constant = 10 153 | mb_size = 16 154 | lr_schedule = cntk.learners.learning_rate_schedule(lr_per_sample, 155 | unit=cntk.UnitType.sample) 156 | mm_schedule = cntk.learners.momentum_as_time_constant_schedule( 157 | momentum_time_constant) 158 | 159 | # Instantiate the appropriate trainer object 160 | my_rank = distributed.Communicator.rank() 161 | num_workers = distributed.Communicator.num_workers() 162 | num_minibatches = int(np.ceil(epoch_size / mb_size)) 163 | 164 | progress_writers = [cntk.logging.progress_print.ProgressPrinter( 165 | tag='Training', 166 | num_epochs=num_epochs, 167 | freq=num_minibatches, 168 | rank=my_rank)] 169 | learner = cntk.learners.fsadagrad(parameters=model.parameters, 170 | lr=lr_schedule, 171 | momentum=mm_schedule, 172 | l2_regularization_weight=l2_reg_weight) 173 | if num_workers > 1: 174 | parameter_learner = distributed.data_parallel_distributed_learner( 175 | learner, num_quantization_bits=32) 176 | trainer = cntk.Trainer(model, (ce, pe), parameter_learner, 177 | progress_writers) 178 | else: 179 | trainer = cntk.Trainer(model, (ce, pe), learner, progress_writers) 180 | 181 | # Print summary lines to stdout and perform training 182 | if my_rank == 0: 183 | print('Retraining model for {} epochs.'.format(num_epochs)) 184 | print('Found {} workers'.format(num_workers)) 185 | print('Printing progress every {} minibatches'.format(num_minibatches)) 186 | cntk.logging.progress_print.log_number_of_parameters(model) 187 | 188 | training_session( 189 | trainer=trainer, 190 | max_samples=num_epochs * epoch_size, 191 | mb_source=minibatch_source, 192 | mb_size=mb_size, 193 | model_inputs_to_streams=input_map, 194 | checkpoint_config=CheckpointConfig( 195 | frequency=epoch_size, 196 | filename=os.path.join(output_dir, 'retrained_checkpoint.model')), 197 | progress_frequency=epoch_size 198 | ).train() 199 | 200 | distributed.Communicator.finalize() 201 | if my_rank == 0: 202 | trainer.model.save(os.path.join(output_dir, 'retrained.model')) 203 | 204 | return(my_rank) 205 | 206 | 207 | def evaluate_model(map_filename, output_dir, num_classes): 208 | ''' Evaluate the model on the test set, storing predictions to a file ''' 209 | inds_to_labels = {} 210 | with open(os.path.join(output_dir, 'labels_to_inds.tsv'), 'r') as f: 211 | for line in f: 212 | label, ind = line.strip().split('\t') 213 | inds_to_labels[int(ind)] = label 214 | 215 | loaded_model = cntk.load_model(os.path.join(output_dir, 'retrained.model')) 216 | with open(map_filename, 'r') as f: 217 | with open(os.path.join(output_dir, 'predictions.csv'), 'w') as g: 218 | g.write('filename,label,pred_label\n') 219 | for line in f: 220 | filename, true_ind = line.strip().split('\t') 221 | image_data = np.array(Image.open(filename), dtype=np.float32) 222 | image_data = np.ascontiguousarray(np.transpose( 223 | image_data[:, :, ::-1], (2,0,1))) 224 | dnn_output = loaded_model.eval( 225 | {loaded_model.arguments[0]: [image_data]}) 226 | true_label = inds_to_labels[int(true_ind)] 227 | pred_label = inds_to_labels[np.argmax(np.squeeze(dnn_output))] 228 | g.write('{},{},{}\n'.format(filename, true_label, pred_label)) 229 | 230 | df = pd.read_csv(os.path.join(output_dir, 'predictions.csv')) 231 | num_correct = len(df.loc[df['true_label'] == df['pred_label']].index) 232 | print('Overall accuracy on test set: {:0.3f}'.format( 233 | len(df.loc[df['true_label'] == df['pred_label']].index) / 234 | len(df.index))) 235 | 236 | return 237 | 238 | 239 | def main(input_dir, validation_dir, output_dir, model_filename, num_epochs, 240 | model_type, retraining_type): 241 | ''' Coordinates all activities for the script ''' 242 | 243 | # Create a temporary directory to house the MAP file 244 | with tempfile.TemporaryDirectory() as temp_dir: 245 | training_map_filename = os.path.join(temp_dir, 'map_train.tsv') 246 | validation_map_filename = os.path.join(temp_dir, 'map_test.tsv') 247 | 248 | _, _ = write_map_file(validation_map_filename, input_dir, output_dir) 249 | num_classes, epoch_size = write_map_file(training_map_filename, 250 | input_dir, output_dir) 251 | 252 | my_rank = retrain_model(training_map_filename, output_dir, 253 | num_classes, epoch_size, model_filename, 254 | num_epochs, model_type, retraining_type) 255 | if my_rank == 0: 256 | evaluate_model(validation_map_filename, output_dir, num_classes) 257 | 258 | return 259 | 260 | 261 | if __name__ == '__main__': 262 | parser = argparse.ArgumentParser(description=''' 263 | Retrains a pretrained DNN model using supplied images. Creates MAP files 264 | (in a temporary directory) used by ImageDeserializer during training and 265 | validation. Outputs the retrained model, a tsv file mapping the class names to 266 | indices, and the validation set predictions to the specified directory. 267 | ''') 268 | parser.add_argument('-i', '--input_dir', type=str, required=True, 269 | help='Directory containing all training image files' + 270 | ' in subfolders named by class.') 271 | parser.add_argument('-v', '--validation_dir', type=str, required=True, 272 | help='Directory containing all test image files' + 273 | ' in subfolders named by class.') 274 | parser.add_argument('-o', '--output_dir', 275 | type=str, required=True, 276 | help='Output directory for the model. Supporting ' + 277 | 'files will be placed in the same folder.') 278 | parser.add_argument('-m', '--model_filename', 279 | type=str, required=True, 280 | help='Filepath of the pretrained model.') 281 | parser.add_argument('-n', '--num_epochs', 282 | type=int, required=True, 283 | help='Number of epochs to retrain the model.') 284 | parser.add_argument('-t', '--model_type', type=str, required=True, 285 | help='The model type to retrain, which should be ' + 286 | 'either "resnet18" or "alexnet".') 287 | parser.add_argument('-r', '--retraining_type', 288 | type=str, required=True, 289 | help='Specifies which layers to retrain in the model.' + 290 | ' Should be one of "last_only", "fully_connected", ' + 291 | 'or "all". Cannot use "fully_connected" retraining ' + 292 | 'type with "resnet18" model type.') 293 | args = parser.parse_args() 294 | 295 | # Ensure argument values are acceptable before proceeding 296 | assert os.path.exists(args.input_dir), \ 297 | 'Input directory {} does not exist'.format(args.input_dir) 298 | assert os.path.exists(args.validation_dir), \ 299 | 'Validation directory {} does not exist'.format(args.validation_dir) 300 | assert os.path.exists(args.model_filename), \ 301 | 'Model file {} does not exist'.format(args.model_filename) 302 | assert args.num_epochs > 0, 'Number of epochs must be greater than zero' 303 | assert args.model_type in ['resnet18', 'alexnet'], \ 304 | 'Model type must be "resnet18" or "alexnet" (without the quotes).' 305 | assert args.retraining_type in ['last_only', 'fully_connected', 'all'], \ 306 | 'Retraining type must be "last_only", "fully_connected", or "all" ' + \ 307 | '(without the quotes).' 308 | if (args.retraining_type == 'fully_connected') and \ 309 | (args.model_type == 'resnet18'): 310 | raise Exception('Can only use "all" or "last_only" retraining types ' + 311 | 'with ResNet 18.') 312 | os.makedirs(args.output_dir, exist_ok=True) 313 | 314 | main(args.input_dir, args.validation_dir, args.output_dir, 315 | args.model_filename, args.num_epochs, args.model_type, 316 | args.retraining_type) 317 | -------------------------------------------------------------------------------- /Code/02_Modeling/run_batch_ai.py: -------------------------------------------------------------------------------- 1 | ''' 2 | run_batch_ai.py 3 | (c) Microsoft Corporation, 2017 4 | 5 | This script is designed to call Batch AI training from Vienna and log the 6 | results to Vienna's run history feature. This script assumes that the 7 | associated config file and Azure file share have been set up in advance. It 8 | waits for the cluster to reach steady state (if necessary), submits the job, 9 | downloads its output after completion, and finally parses the output 10 | files to return metrics to Vienna's run history. 11 | ''' 12 | import argparse, os, time, datetime, requests, re 13 | import azure.mgmt.batchai as training 14 | import azure.mgmt.batchai.models as tm 15 | from azure.common.credentials import ServicePrincipalCredentials 16 | from azureml.logging import get_azureml_logger 17 | import pandas as pd 18 | from configparser import ConfigParser 19 | from azure.storage.file import FileService 20 | from azure.storage.blob import BlockBlobService 21 | from tempfile import TemporaryFile 22 | 23 | def ensure_str(str_data): 24 | ''' Helper function to correct type of imported strings ''' 25 | if isinstance(str_data, str): 26 | return(str_data) 27 | return(str_data.encode('utf-8')) 28 | 29 | class ConfigFile(object): 30 | ''' Copies ConfigParser results into attributes, correcting type ''' 31 | def __init__(self, config_filename): 32 | ''' Load static info for cluster/job creation from a config file ''' 33 | config = ConfigParser(allow_no_value=True) 34 | config.read(config_filename) 35 | my_config = config['Settings'] 36 | 37 | # General info needed for creating clients/clusters/jobs 38 | self.bait_subscription_id = ensure_str(my_config['bait_subscription_id']) 39 | self.bait_aad_client_id = ensure_str(my_config['bait_aad_client_id']) 40 | self.bait_aad_secret = ensure_str(my_config['bait_aad_secret']) 41 | self.bait_aad_token_uri = 'https://login.microsoftonline.com/' + \ 42 | '{0}/oauth2/token'.format(ensure_str(my_config['bait_aad_tenant'])) 43 | self.bait_region = ensure_str(my_config['bait_region']) 44 | self.bait_resource_group_name = ensure_str( 45 | my_config['bait_resource_group_name']) 46 | self.bait_vms_in_cluster = int(my_config['bait_vms_in_cluster']) 47 | self.bait_vms_per_job = int(my_config['bait_vms_per_job']) 48 | self.bait_cluster_name = ensure_str(my_config['bait_cluster_name']) 49 | 50 | assert self.bait_vms_per_job <= self.bait_vms_in_cluster, \ 51 | 'Number of VMs in cluster ({}) < Number of VMs for job ({}'.format( 52 | self.bait_vms_in_cluster, self.bait_vms_in_cluster) 53 | assert self.bait_vms_in_cluster > 0, \ 54 | 'Number of VMs used for the job must be greater than zero.' 55 | 56 | # Storage account where results will be written 57 | self.storage_account_name = ensure_str( 58 | my_config['storage_account_name']) 59 | self.storage_account_key = ensure_str(my_config['storage_account_key']) 60 | self.storage_account_fileshare_url = 'https://' + \ 61 | '{}.file.core.windows.net/baitshare'.format( 62 | self.storage_account_name) 63 | self.container_trained_models = ensure_str( 64 | my_config['container_trained_models']) 65 | self.predictions_container = ensure_str( 66 | my_config['container_prediction_results']) 67 | 68 | return 69 | 70 | 71 | def write_model_summary_to_blob(config, output_model_name, 72 | pretrained_model_type, retraining_type): 73 | ''' Writes a summary file describing the model to be used during o16n ''' 74 | output_str = '''output_model_name,{} 75 | model_source,batchaitraining 76 | pretrained_model_type,{} 77 | retraining_type,{} 78 | mmlspark_model_type,none 79 | '''.format(output_model_name, pretrained_model_type, retraining_type) 80 | file_name = '{}/model.info'.format(output_model_name) 81 | blob_service = BlockBlobService(config.storage_account_name, 82 | config.storage_account_key) 83 | blob_service.create_container(config.container_trained_models) 84 | blob_service.create_blob_from_text( 85 | config.container_trained_models, file_name, output_str) 86 | return 87 | 88 | 89 | def get_client(config): 90 | ''' Connect to Batch AI ''' 91 | client = training.BatchAIManagementClient( 92 | credentials=ServicePrincipalCredentials( 93 | client_id=config.bait_aad_client_id, 94 | secret=config.bait_aad_secret, 95 | token_uri=config.bait_aad_token_uri), 96 | subscription_id=config.bait_subscription_id, 97 | base_url=None) 98 | return(client) 99 | 100 | 101 | def get_cluster(config): 102 | ''' 103 | Checks whether a cluster with the specified name already exists. If so, it 104 | uses that cluster; otherwise, it creates a new one. 105 | ''' 106 | client = get_client(config) 107 | 108 | # Start cluster creation if necessary 109 | try: 110 | cluster = client.clusters.get(config.bait_resource_group_name, 111 | config.bait_cluster_name) 112 | except: 113 | print('Error: could not find cluster named {}'.format( 114 | config.bait_cluster_name)) 115 | 116 | return(cluster) 117 | 118 | 119 | def check_for_steady_cluster_status(config, max_sec_to_wait=1200): 120 | ''' 121 | Waits until the cluster reaches a "steady" status. Checks every ten 122 | seconds. 123 | ''' 124 | client = get_client(config) 125 | start = time.time() 126 | while (time.time() - start < max_sec_to_wait): 127 | cluster = client.clusters.get(config.bait_resource_group_name, 128 | config.bait_cluster_name) 129 | if cluster.allocation_state == tm.AllocationState.steady: 130 | print('Cluster has reached "steady" allocation state. Ready for ' + 131 | 'job submission.') 132 | if cluster.errors is not None: 133 | raise Exception('Errors were thrown during cluster creation:' + 134 | '\n{}'.format('\n'.join(cluster.errors))) 135 | return 136 | time.sleep(10) 137 | raise Exception('Max wait time exceeded for cluster to reach "steady" ' + 138 | 'state ({} seconds).'.format(max_sec_to_wait)) 139 | 140 | 141 | def submit_job(config, pretrained_model_type, retraining_type, 142 | output_model_name, num_epochs): 143 | ''' Defines and submits a job. Does not check for completion. ''' 144 | client = get_client(config) 145 | job_name = 'job{}'.format( 146 | datetime.datetime.utcnow().strftime('%m_%d_%H_%M_%S')) 147 | cluster = client.clusters.get(config.bait_resource_group_name, 148 | config.bait_cluster_name) 149 | 150 | # Define the command line arguments to the retraining script 151 | command_line_args = '--input_dir $AZ_BATCHAI_INPUT_TRAININGDATA ' + \ 152 | '--validation_dir $AZ_BATCHAI_INPUT_VALIDATIONDATA ' + \ 153 | '--output_dir $AZ_BATCHAI_OUTPUT_MODEL ' + \ 154 | '--num_epochs {} '.format(num_epochs) + \ 155 | '--retraining_type {} '.format(retraining_type) + \ 156 | '--model_type {} '.format(pretrained_model_type) + \ 157 | '--model_filename $AZ_BATCHAI_INPUT_PRETRAINEDMODELS/' 158 | if pretrained_model_type == 'alexnet': 159 | command_line_args += 'AlexNet.model' 160 | elif pretrained_model_type == 'resnet18': 161 | command_line_args += 'ResNet_18.model' 162 | 163 | # Define the job 164 | cntk_settings = tm.CNTKsettings( 165 | language_type='python', 166 | python_script_file_path='$AZ_BATCHAI_INPUT_SCRIPT/' + 167 | 'retrain_model_distributed.py', 168 | command_line_args=command_line_args, 169 | process_count=config.bait_vms_per_job) # NC6s -- one GPU per VM 170 | 171 | job_create_params = tm.job_create_parameters.JobCreateParameters( 172 | location=config.bait_region, 173 | cluster=tm.ResourceId(cluster.id), 174 | node_count=config.bait_vms_per_job, 175 | std_out_err_path_prefix='$AZ_BATCHAI_MOUNT_ROOT/afs', 176 | output_directories=[ 177 | tm.OutputDirectory( 178 | id='MODEL', 179 | path_prefix='$AZ_BATCHAI_MOUNT_ROOT/afs')], 180 | input_directories=[ 181 | tm.InputDirectory( 182 | id='SCRIPT', 183 | path='$AZ_BATCHAI_MOUNT_ROOT/afs/scripts'), 184 | tm.InputDirectory( 185 | id='PRETRAINEDMODELS', 186 | path='$AZ_BATCHAI_MOUNT_ROOT/afs/pretrainedmodels'), 187 | tm.InputDirectory( 188 | id='TRAININGDATA', 189 | path='$AZ_BATCHAI_MOUNT_ROOT/nfs/training_images'), 190 | tm.InputDirectory( 191 | id='VALIDATIONDATA', 192 | path='$AZ_BATCHAI_MOUNT_ROOT/nfs/validation_images')], 193 | cntk_settings=cntk_settings) 194 | 195 | # Submit the job 196 | job = client.jobs.create( 197 | resource_group_name=config.bait_resource_group_name, 198 | job_name=job_name, 199 | parameters=job_create_params) 200 | 201 | return(job_name) 202 | 203 | 204 | def check_for_job_completion(config, job_name, max_sec_to_wait=7200): 205 | ''' Check for the job status to change indicating completion ''' 206 | client = get_client(config) 207 | time.sleep(10) 208 | start = time.time() 209 | while (time.time() - start < max_sec_to_wait): 210 | job = client.jobs.get(config.bait_resource_group_name, job_name) 211 | if (job.execution_state == tm.ExecutionState.succeeded) or \ 212 | (job.execution_state == tm.ExecutionState.failed): 213 | return 214 | time.sleep(10) 215 | raise Exception('Max wait time exceeded for job completion ' + 216 | '({} seconds).'.format(max_sec_to_wait)) 217 | 218 | 219 | def download_from_file_share(azure_filename, local_filename): 220 | ''' Save an output file from Azure File Share ''' 221 | r = requests.get(azure_filename, stream=True) 222 | with open(local_filename, 'wb') as f: 223 | for chunk in r.iter_content(chunk_size=512 * 1024): 224 | if chunk: 225 | f.write(chunk) 226 | 227 | 228 | def transfer_fileshare_to_blob(config, fileshare_uri, output_model_name): 229 | ''' NB -- transfer proceeds via local temporary file! ''' 230 | file_service = FileService(config.storage_account_name, 231 | config.storage_account_key) 232 | blob_service = BlockBlobService(config.storage_account_name, 233 | config.storage_account_key) 234 | blob_service.create_container(config.container_trained_models) 235 | blob_service.create_container(config.predictions_container) 236 | 237 | uri_core = fileshare_uri.split('.file.core.windows.net/')[1].split('?')[0] 238 | fields = uri_core.split('/') 239 | fileshare = fields.pop(0) 240 | subdirectory = '/'.join(fields[:-1]) 241 | file_name = '{}/{}'.format(output_model_name, fields[-1]) 242 | 243 | with TemporaryFile() as f: 244 | file_service.get_file_to_stream(share_name=fileshare, 245 | directory_name=subdirectory, 246 | file_name=fields[-1], 247 | stream=f) 248 | f.seek(0) 249 | if 'predictions' in fields[-1]: 250 | blob_service.create_blob_from_stream( 251 | config.predictions_container, 252 | '{}_predictions_test_set.csv'.format(output_model_name), 253 | f) 254 | else: 255 | blob_service.create_blob_from_stream( 256 | config.container_trained_models, file_name, f) 257 | 258 | return 259 | 260 | 261 | def retrieve_outputs(config, job_name, output_model_name): 262 | ''' Get stdout, stderr, retrained model, and label-to-index dict ''' 263 | client = get_client(config) 264 | status_files = client.jobs.list_output_files( 265 | resource_group_name=config.bait_resource_group_name, 266 | job_name=job_name, 267 | jobs_list_output_files_options=tm.JobsListOutputFilesOptions('stdOuterr')) 268 | for file in list(status_files): 269 | download_from_file_share(file.download_url, 270 | os.path.join('outputs', file.name)) 271 | 272 | output_files = client.jobs.list_output_files( 273 | resource_group_name=config.bait_resource_group_name, 274 | job_name=job_name, 275 | jobs_list_output_files_options=tm.JobsListOutputFilesOptions('MODEL')) 276 | for file in list(output_files): 277 | transfer_fileshare_to_blob(config, file.download_url, output_model_name) 278 | 279 | client.jobs.delete(resource_group_name=config.bait_resource_group_name, 280 | job_name=job_name) 281 | return 282 | 283 | 284 | def parse_stdout(run_logger): 285 | ''' Parse the training logs and record using Vienna SDK ''' 286 | with open(os.path.join('outputs', 'stdout.txt'), 'r') as f: 287 | lines = f.readlines() 288 | 289 | progress_re = 'Finished Epoch\[(\d+) of \d+\]: \[Training\] loss = ' + \ 290 | '([0-9.]+) \* [0-9]+, metric = ([0-9.]+)% \* [0-9]+ ' + \ 291 | '([0-9.]+)s \( ([0-9.]+) samples/s\);' 292 | progress_re2 = 'Finished Epoch\[(\d+) of \d+\]: \[Training\] loss = ' + \ 293 | '([0-9.]+) \* [0-9]+, metric = ([0-9.]+)% \* [0-9]+ ' + \ 294 | '([0-9.]+)s \(([0-9.]+) samples/s\);' 295 | p = re.compile(progress_re) 296 | p2 = re.compile(progress_re2) 297 | 298 | progress_lines = [] 299 | for line in lines: 300 | m = p.match(line) 301 | if m is not None: 302 | progress_lines.append(list(m.groups())) 303 | else: # try a minor variation 304 | m = p2.match(line) 305 | if m is not None: 306 | progress_lines.append(list(m.groups())) 307 | 308 | df = pd.DataFrame(progress_lines, 309 | columns=['epoch', 'loss', 'accuracy', 'duration', 'rate'], 310 | dtype=float).groupby('epoch').mean().reset_index() 311 | run_logger.log('training_loss', df['loss'].values.tolist()) 312 | run_logger.log('training_error_pct', df['accuracy'].values.tolist()) 313 | run_logger.log('epoch_duration', df['duration'].values.tolist()) 314 | run_logger.log('samples_per_sec', df['rate'].values.tolist()) 315 | 316 | accuracy_re = 'Overall accuracy on test set: ([0-9.]+)' 317 | p = re.compile(accuracy_re) 318 | for line in lines: 319 | m = p.match(line) 320 | if m is not None: 321 | print('Test set accuracy: {}'.format(m.groups(1)[0])) 322 | run_logger.log('test_set_accuracy', m.groups(1)[0]) 323 | return 324 | 325 | def main(pretrained_model_type, retraining_type, config_filename, 326 | output_model_name, num_epochs): 327 | ''' Coordinate all activities for Batch AI training ''' 328 | 329 | # Log the parameters used for this run 330 | run_logger = get_azureml_logger() 331 | run_logger.log('amlrealworld.aerial_image_classification.run_batch_ai','true') 332 | run_logger.log('pretrained_model_type', pretrained_model_type) 333 | run_logger.log('config_filename', config_filename) 334 | run_logger.log('retraining_type', retraining_type) 335 | run_logger.log('output_model_name', output_model_name) 336 | 337 | # Load the configuration file and save relevant info 338 | config = ConfigFile(config_filename) 339 | write_model_summary_to_blob(config, output_model_name, 340 | pretrained_model_type, retraining_type) 341 | 342 | # Create a cluster (if necessary) and wait till it's ready 343 | get_cluster(config) 344 | check_for_steady_cluster_status(config) 345 | 346 | # Submit the job and wait until it completes 347 | job_name = submit_job(config, pretrained_model_type, retraining_type, 348 | output_model_name, num_epochs) 349 | print('Job submitted: checking for job completion') 350 | check_for_job_completion(config, job_name) 351 | print('Job complete: retrieving output files') 352 | 353 | # Download the output files and store metrics to Vienna 354 | retrieve_outputs(config, job_name, output_model_name) 355 | print('Parsing output logs') 356 | parse_stdout(run_logger) 357 | 358 | return 359 | 360 | 361 | if __name__ == '__main__': 362 | parser = argparse.ArgumentParser(description=''' 363 | Orchestrates pretrained image classifier retraining through Batch AI training. 364 | Can retrain multiple model types and to different depths. The training data for 365 | this example is fixed and provided in the docker image specified in the config 366 | file. 367 | ''') 368 | parser.add_argument('-p', '--pretrained_model_type', type=str, 369 | required=True, 370 | help='The model type to retrain, which should be ' + 371 | 'either "resnet18" or "alexnet".') 372 | parser.add_argument('-r', '--retraining_type', 373 | type=str, required=True, 374 | help='Specifies which layers to retrain in the model.' + 375 | ' Should be one of "last_only", "fully_connected", ' + 376 | 'or "all".') 377 | parser.add_argument('-c', '--config_filename', 378 | type=str, required=True, 379 | help='Filepath of the configuration file specifying ' + 380 | 'credentials for a storage account, container ' + 381 | 'registry, and Batch AI training itself.') 382 | parser.add_argument('-o', '--output_model_name', 383 | type=str, required=True, 384 | help='Retrained model files will be saved under this ' + 385 | '"subdirectory" (prefix) in the trained model blob ' + 386 | 'container specified by the config file.') 387 | parser.add_argument('-f', '--sample_frac', 388 | type=float, required=False, default=1.0, 389 | help='Subsamples data. Default sampling fraction is ' + 390 | '1.0 (all samples used).') 391 | parser.add_argument('-n', '--num_epochs', 392 | type=int, required=False, default=10, 393 | help='Number of epochs to retrain the model for.') 394 | args = parser.parse_args() 395 | 396 | # Ensure specified files/directories exist 397 | assert args.pretrained_model_type in ['resnet18', 'alexnet'], \ 398 | 'Pretrained model type must be "resnet18" or "alexnet".' 399 | assert args.retraining_type in ['last_only', 'fully_connected', 'all'], \ 400 | 'Retraining type must be "last_only", "fully_connected", or "all" ' + \ 401 | '(without the quotes).' 402 | assert os.path.exists(args.config_filename), \ 403 | 'Could not find config file {}'.format(args.config_filename) 404 | assert args.num_epochs > 0, 'Number of epochs must be greater than zero' 405 | os.makedirs('outputs', exist_ok=True) 406 | 407 | main(args.pretrained_model_type, args.retraining_type, args.config_filename, 408 | args.output_model_name, args.num_epochs) 409 | -------------------------------------------------------------------------------- /Code/02_Modeling/run_mmlspark.py: -------------------------------------------------------------------------------- 1 | ''' 2 | run_mmlspark.py 3 | (c) Microsoft Corporation, 2017 4 | 5 | Trains an MMLSpark model to classify images featurized by a specified CNTK 6 | pretrained model. Saves the model and test set predictions to blob storage. 7 | Logs some evaluation metrics directly to run history. 8 | ''' 9 | 10 | import os, time, mmlspark, pyspark, argparse 11 | import numpy as np 12 | from io import BytesIO 13 | from pyspark.sql.functions import udf 14 | from pyspark.sql.types import * 15 | from pyspark.ml.classification import RandomForestClassifier, \ 16 | LogisticRegression 17 | from azureml.logging import get_azureml_logger 18 | import pandas as pd 19 | from configparser import ConfigParser 20 | from azure.storage.blob import BlockBlobService 21 | 22 | 23 | def ensure_str(str_data): 24 | ''' Helper function to correct type of imported strings ''' 25 | if isinstance(str_data, str): 26 | return(str_data) 27 | return(str_data.encode('utf-8')) 28 | 29 | class ConfigFile(object): 30 | ''' Copies ConfigParser results into attributes, correcting type ''' 31 | def __init__(self, config_filename, pretrained_model_type, 32 | mmlspark_model_type, output_model_name): 33 | ''' Load static info for cluster/job creation from a config file ''' 34 | config = ConfigParser(allow_no_value=True) 35 | config.read(config_filename) 36 | my_config = config['Settings'] 37 | self.spark = pyspark.sql.SparkSession.builder.appName('vienna') \ 38 | .getOrCreate() 39 | 40 | self.pretrained_model_type = pretrained_model_type 41 | self.mmlspark_model_type = mmlspark_model_type 42 | self.output_model_name = output_model_name 43 | 44 | # Storage account where results will be written 45 | self.storage_account_name = ensure_str( 46 | my_config['storage_account_name']) 47 | self.storage_account_key = ensure_str(my_config['storage_account_key']) 48 | self.container_pretrained_models = ensure_str( 49 | my_config['container_pretrained_models']) 50 | self.container_trained_models = ensure_str( 51 | my_config['container_trained_models']) 52 | self.container_data_training = ensure_str( 53 | my_config['container_data_training']) 54 | self.container_data_testing = ensure_str( 55 | my_config['container_data_testing']) 56 | self.container_prediction_results = ensure_str( 57 | my_config['container_prediction_results']) 58 | 59 | # URIs where data will be loaded or saved 60 | self.train_uri = 'wasb://{}@{}.blob.core.windows.net/*/*.png'.format( 61 | self.container_data_training, self.storage_account_name) 62 | self.test_uri = 'wasb://{}@{}.blob.core.windows.net/*/*.png'.format( 63 | self.container_data_testing, self.storage_account_name) 64 | self.model_uri = 'wasb://{}@{}.blob.core.windows.net/{}'.format( 65 | self.container_pretrained_models, self.storage_account_name, 66 | 'ResNet_18.model' if pretrained_model_type == 'resnet18' \ 67 | else 'AlexNet.model') 68 | self.output_uri = 'wasb://{}@{}.blob.core.windows.net/{}/model'.format( 69 | self.container_trained_models, self.storage_account_name, 70 | output_model_name) 71 | self.predictions_filename = '{}_predictions_test_set.csv'.format( 72 | output_model_name) 73 | 74 | # Load the pretrained model 75 | self.last_layer_name = 'z.x' if (pretrained_model_type == 'resnet18') \ 76 | else 'h2_d' 77 | self.cntk_model = mmlspark.CNTKModel().setInputCol('unrolled') \ 78 | .setOutputCol('features') \ 79 | .setModelLocation(self.spark, self.model_uri) \ 80 | .setOutputNodeName(self.last_layer_name) 81 | 82 | # Initialize other Spark pipeline components 83 | self.extract_label_udf = udf(lambda row: os.path.basename( 84 | os.path.dirname(row.path)), 85 | StringType()) 86 | self.extract_path_udf = udf(lambda row: row.path, StringType()) 87 | if mmlspark_model_type == 'randomforest': 88 | self.mmlspark_model_type = RandomForestClassifier(numTrees=20, 89 | maxDepth=5) 90 | elif mmlspark_model_type == 'logisticregression': 91 | self.mmlspark_model_type = LogisticRegression(regParam=0.01, 92 | maxIter=10) 93 | self.unroller = mmlspark.UnrollImage().setInputCol('image') \ 94 | .setOutputCol('unrolled') 95 | 96 | return 97 | 98 | 99 | def write_model_summary_to_blob(config, mmlspark_model_type): 100 | ''' Writes a summary file describing the model to be used during o16n ''' 101 | output_str = '''output_model_name,{} 102 | model_source,mmlspark 103 | pretrained_model_type,{} 104 | retraining_type,last_only 105 | mmlspark_model_type,{} 106 | '''.format(config.output_model_name, config.pretrained_model_type, 107 | mmlspark_model_type) 108 | file_name = '{}/model.info'.format(config.output_model_name) 109 | blob_service = BlockBlobService(config.storage_account_name, 110 | config.storage_account_key) 111 | blob_service.create_container(config.container_trained_models) 112 | blob_service.create_blob_from_text( 113 | config.container_trained_models, file_name, output_str) 114 | return 115 | 116 | 117 | def load_data(data_uri, config, sample_frac): 118 | df = config.spark.readImages(data_uri, recursive=True, 119 | sampleRatio=sample_frac).toDF('image') 120 | df = df.withColumn('label', config.extract_label_udf(df['image'])) 121 | df = df.withColumn('filepath', config.extract_path_udf(df['image'])) 122 | df = config.unroller.transform(df).select('filepath', 'unrolled', 'label') 123 | df = config.cntk_model.transform(df).select( 124 | ['filepath', 'features', 'label']) 125 | return(df) 126 | 127 | 128 | def main(pretrained_model_type, mmlspark_model_type, config_filename, 129 | output_model_name, sample_frac): 130 | # Load the configuration file 131 | config = ConfigFile(config_filename, pretrained_model_type, 132 | mmlspark_model_type, output_model_name) 133 | write_model_summary_to_blob(config, mmlspark_model_type) 134 | 135 | # Log the parameters of the run 136 | run_logger = get_azureml_logger() 137 | run_logger.log('amlrealworld.aerial_image_classification.run_mmlspark','true') 138 | run_logger.log('pretrained_model_type', pretrained_model_type) 139 | run_logger.log('mmlspark_model_type', mmlspark_model_type) 140 | run_logger.log('config_filename', config_filename) 141 | run_logger.log('output_model_name', output_model_name) 142 | run_logger.log('sample_frac', sample_frac) 143 | 144 | # Train and save the MMLSpark model 145 | train_df = load_data(config.train_uri, config, sample_frac) 146 | mmlspark_model = mmlspark.TrainClassifier( 147 | model=config.mmlspark_model_type, labelCol='label').fit(train_df) 148 | mmlspark_model.write().overwrite().save(config.output_uri) 149 | 150 | # Apply the MMLSpark model to the test set and save the accuracy metric 151 | test_df = load_data(config.test_uri, config, sample_frac) 152 | predictions = mmlspark_model.transform(test_df) 153 | metrics = mmlspark.ComputeModelStatistics(evaluationMetric='accuracy') \ 154 | .transform(predictions) 155 | metrics.show() 156 | run_logger.log('accuracy_on_test_set', metrics.first()['accuracy']) 157 | 158 | # Save the predictions 159 | tf = mmlspark.IndexToValue().setInputCol('scored_labels') \ 160 | .setOutputCol('pred_label') 161 | predictions = tf.transform(predictions).select( 162 | 'filepath', 'label', 'pred_label') 163 | output_str = predictions.toPandas().to_csv(index=False) 164 | blob_service = BlockBlobService(config.storage_account_name, 165 | config.storage_account_key) 166 | blob_service.create_container(config.container_prediction_results) 167 | blob_service.create_blob_from_text( 168 | config.container_prediction_results, 169 | config.predictions_filename, 170 | output_str) 171 | 172 | return 173 | 174 | 175 | if __name__ == '__main__': 176 | parser = argparse.ArgumentParser(description=''' 177 | Trains an MMLSpark model to classify images featurized by a specified CNTK 178 | pretrained model. Saves the model and test set predictions to blob storage. 179 | Logs some evaluation metrics directly to run history.''') 180 | parser.add_argument('-p', '--pretrained_model_type', type=str, 181 | required=True, 182 | help='The model type to retrain, which should be ' + 183 | 'either "resnet18" or "alexnet".') 184 | parser.add_argument('-m', '--mmlspark_model_type', 185 | type=str, required=True, 186 | help='Specifies which type of model should be ' + 187 | 'trained on featurized images. Should be either ' + 188 | '"randomforest" or "logisticregresssion".') 189 | parser.add_argument('-c', '--config_filename', 190 | type=str, required=True, 191 | help='Filepath of the configuration file specifying ' + 192 | 'credentials for a storage account, container ' + 193 | 'registry, and Batch AI training itself.') 194 | parser.add_argument('-o', '--output_model_name', 195 | type=str, required=True, 196 | help='Retrained model files will be saved under this ' + 197 | '"subdirectory" (prefix) in the trained model blob ' + 198 | 'container specified by the config file.') 199 | parser.add_argument('-f', '--sample_frac', 200 | type=float, required=False, default=1.0, 201 | help='Subsamples training and test data for faster ' + 202 | 'results. Default sampling fraction is 1.0 (all ' + 203 | 'samples used).') 204 | args = parser.parse_args() 205 | 206 | assert args.pretrained_model_type in ['resnet18', 'alexnet'], \ 207 | 'Pretrained model type must be "resnet18" or "alexnet".' 208 | assert args.mmlspark_model_type in ['randomforest', 'logisticregression'], \ 209 | 'MMLSpark model type must be "randomforest" or "logisticregression".' 210 | assert os.path.exists(args.config_filename), \ 211 | 'Could not find config file {}'.format(args.config_filename) 212 | assert (args.sample_frac <= 1.0) and (args.sample_frac > 0.0), \ 213 | 'Sampling fraction must be between 0.0 and 1.0.' 214 | 215 | print('Arguments ok...preparing to run') 216 | main(args.pretrained_model_type, args.mmlspark_model_type, 217 | args.config_filename, args.output_model_name, args.sample_frac) 218 | -------------------------------------------------------------------------------- /Code/03_Deployment/batch_score_spark.py: -------------------------------------------------------------------------------- 1 | ''' 2 | batch_score_spark.py 3 | by Mary Wahl 4 | (c) Microsoft Corporation, 2017 5 | 6 | Applies a trained Batch AI Training or MMLSpark model to a large static dataset 7 | in an HDInsight cluster's associated blob storage account. This script requires 8 | the following arguments: 9 | - config_filename: Includes storage account credentials and container names 10 | - output_model_name: The model name specified at the time of training; used for 11 | lookup of output files in blob storage. 12 | ''' 13 | import os, io, argparse, mmlspark, pyspark 14 | from azureml.logging import get_azureml_logger 15 | import numpy as np 16 | import pandas as pd 17 | from configparser import ConfigParser 18 | from azure.storage.blob import BlockBlobService 19 | from pyspark.sql.functions import udf 20 | from pyspark.sql.types import * 21 | from pyspark.ml.feature import IndexToString 22 | from mmlspark import TrainedClassifierModel 23 | 24 | run_logger = get_azureml_logger() 25 | run_logger.log('amlrealworld.aerial_image_classification.batch_score_spark','true') 26 | 27 | def ensure_str(str_data): 28 | ''' Helper function to correct type of imported strings ''' 29 | if isinstance(str_data, str): 30 | return(str_data) 31 | return(str_data.encode('utf-8')) 32 | 33 | class ConfigFile(object): 34 | ''' Copies ConfigParser results into attributes, correcting type ''' 35 | def __init__(self, config_filename, output_model_name): 36 | ''' Load/validate model information from a config file ''' 37 | config = ConfigParser(allow_no_value=True) 38 | config.read(config_filename) 39 | my_config = config['Settings'] 40 | self.spark = pyspark.sql.SparkSession.builder.appName('vienna') \ 41 | .getOrCreate() 42 | 43 | # Load storage account info 44 | self.storage_account_name = ensure_str( 45 | my_config['storage_account_name']) 46 | self.storage_account_key = ensure_str(my_config['storage_account_key']) 47 | self.container_pretrained_models = ensure_str( 48 | my_config['container_pretrained_models']) 49 | self.container_trained_models = ensure_str( 50 | my_config['container_trained_models']) 51 | self.container_data_o16n = ensure_str( 52 | my_config['container_data_o16n']) 53 | self.container_prediction_results = ensure_str( 54 | my_config['container_prediction_results']) 55 | self.predictions_filename = '{}_predictions_o16n.csv'.format( 56 | output_model_name) 57 | 58 | # Load blob service and ensure containers are available 59 | blob_service = BlockBlobService(self.storage_account_name, 60 | self.storage_account_key) 61 | container_list = [i.name for i in blob_service.list_containers()] 62 | for container in [self.container_pretrained_models, 63 | self.container_trained_models, 64 | self.container_data_o16n, 65 | self.container_prediction_results]: 66 | assert container in container_list, \ 67 | 'Could not find container {} in storage '.format(container) + \ 68 | 'account {}'.format(self.storage_account_name) 69 | 70 | # Load information on the named model 71 | self.output_model_name = output_model_name 72 | description = blob_service.get_blob_to_text( 73 | container_name=self.container_trained_models, 74 | blob_name='{}/model.info'.format(self.output_model_name)) 75 | description_dict = {} 76 | for line in description.content.split('\n'): 77 | if len(line) == 0: 78 | continue 79 | key, val = line.strip().split(',') 80 | description_dict[key] = val 81 | self.model_source = description_dict['model_source'] 82 | self.pretrained_model_type = description_dict['pretrained_model_type'] 83 | 84 | # Create pipeline components common to both model types 85 | self.extract_path_udf = udf(lambda row: os.path.basename(row.path), 86 | StringType()) 87 | self.unroller = mmlspark.UnrollImage().setInputCol('image') \ 88 | .setOutputCol('unrolled') 89 | return 90 | 91 | def load_batchaitraining_model_components(config): 92 | ''' Loads all components needed to apply a trained BAIT model ''' 93 | # Get the CNTK model itself 94 | model_uri = 'wasb://{}@'.format(config.container_trained_models) + \ 95 | '{}.blob.core'.format(config.storage_account_name) + \ 96 | '.windows.net/{}'.format(config.output_model_name) + \ 97 | '/retrained.model' 98 | config.cntk_model = mmlspark.CNTKModel().setInputCol('unrolled') \ 99 | .setOutputCol('features').setModelLocation(config.spark, model_uri) \ 100 | .setOutputNodeIndex(0) 101 | 102 | # Load the correspondence between indices and labels 103 | blob_service = BlockBlobService(config.storage_account_name, 104 | config.storage_account_key) 105 | labels_to_inds_str = blob_service.get_blob_to_text( 106 | container_name=config.container_trained_models, 107 | blob_name='{}/labels_to_inds.tsv'.format(config.output_model_name)) 108 | config.inds_to_labels = {} 109 | for line in labels_to_inds_str.content.split('\n'): 110 | if len(line) == 0: 111 | continue 112 | key, val = line.strip().split('\t') 113 | config.inds_to_labels[int(val)] = key 114 | 115 | return(config) 116 | 117 | 118 | def load_mmlspark_model_components(config): 119 | ''' Loads all components needed to apply a trained MMLSpark model ''' 120 | # Load the pretrained featurization model 121 | if config.pretrained_model_type == 'resnet18': 122 | model_filename = 'ResNet_18.model' 123 | last_layer_name = 'z.x' 124 | elif config.pretrained_model_type == 'alexnet': 125 | model_filename = 'AlexNet.model' 126 | last_layer_name = 'h2_d' 127 | model_uri = 'wasb://{}@'.format(config.container_pretrained_models) + \ 128 | '{}.blob.core.windows'.format(config.storage_account_name) + \ 129 | '.net/{}'.format(model_filename) 130 | config.cntk_model = mmlspark.CNTKModel().setInputCol('unrolled') \ 131 | .setOutputCol('features').setModelLocation(config.spark, model_uri) \ 132 | .setOutputNodeName(last_layer_name) 133 | 134 | # Load the MMLSpark-trained model 135 | mmlspark_uri = 'wasb://{}@'.format(config.container_trained_models) + \ 136 | '{}.blob.core.'.format(config.storage_account_name) + \ 137 | 'windows.net/{}/model'.format(config.output_model_name) 138 | config.mmlspark_model = TrainedClassifierModel.load(mmlspark_uri) 139 | 140 | # Load the transform that will convert model output from indices to strings 141 | config.tf = mmlspark.IndexToValue().setInputCol('scored_labels') \ 142 | .setOutputCol('pred_label') 143 | 144 | return(config) 145 | 146 | 147 | def load_data(config, sample_frac=1.0): 148 | data_uri = 'wasb://{}@{}.blob.core.windows.net/*.png'.format( 149 | config.container_data_o16n, config.storage_account_name) 150 | df = config.spark.readImages(data_uri, recursive=True, 151 | sampleRatio=sample_frac).toDF('image') 152 | df = df.withColumn('filepath', config.extract_path_udf(df['image'])) 153 | df = config.unroller.transform(df).select('filepath', 'unrolled') 154 | df = config.cntk_model.transform(df).select( 155 | ['filepath', 'features']) 156 | return(df) 157 | 158 | 159 | def main(config_filename, output_model_name, sample_frac): 160 | ''' Coordinate application of trained models to large static image set ''' 161 | config = ConfigFile(config_filename, output_model_name) 162 | 163 | if config.model_source == 'batchaitraining': 164 | config = load_batchaitraining_model_components(config) 165 | elif config.model_source == 'mmlspark': 166 | config = load_mmlspark_model_components(config) 167 | else: 168 | raise Exception('Model source not recognized') 169 | 170 | df = load_data(config, sample_frac) 171 | 172 | if config.model_source == 'batchaitraining': 173 | # Create a UDF to find argmax on model output and convert to a string label 174 | inds_to_labels = config.inds_to_labels 175 | label_udf = udf(lambda x: str(inds_to_labels[np.argmax(x.toArray())]), 176 | StringType()) 177 | predictions = df.withColumn('pred_label', label_udf(df['features'])) \ 178 | .select('filepath', 'pred_label') 179 | elif config.model_source == 'mmlspark': 180 | predictions = config.mmlspark_model.transform(df) 181 | predictions = config.tf.transform(predictions).select( 182 | 'filepath', 'pred_label') 183 | 184 | output_str = predictions.toPandas().to_csv(index=False) 185 | blob_service = BlockBlobService(config.storage_account_name, 186 | config.storage_account_key) 187 | blob_service.create_blob_from_text( 188 | config.container_prediction_results, 189 | config.predictions_filename, 190 | output_str) 191 | 192 | return 193 | 194 | 195 | if __name__ == '__main__': 196 | parser = argparse.ArgumentParser(description=''' 197 | Applies a trained Batch AI Training or MMLSpark model to a large static dataset 198 | in an HDInsight cluster's associated blob storage account. 199 | ''') 200 | parser.add_argument('-c', '--config_filename', 201 | type=str, required=True, 202 | help='Includes storage account credentials and ' + 203 | 'container names.') 204 | parser.add_argument('-o', '--output_model_name', 205 | type=str, required=True, 206 | help='The model name specified at the time of ' + \ 207 | 'training; used for lookup of output files in ' + \ 208 | 'blob storage.') 209 | parser.add_argument('-f', '--sample_frac', 210 | type=float, required=False, default=1.0, 211 | help='Subsamples data. Default sampling fraction is ' + 212 | '1.0 (all samples used).') 213 | args = parser.parse_args() 214 | 215 | assert os.path.exists(args.config_filename), \ 216 | 'Could not find config file {}'.format(args.config_filename) 217 | main(args.config_filename, args.output_model_name, args.sample_frac) 218 | -------------------------------------------------------------------------------- /Code/04_Result_Analysis/Model prediction analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Model prediction analysis" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Before running this notebook, you *must* edit the code cell below to specify `output_model_name` for your model of interest. Batch scoring (and training) must already have been performed using this model, since this notebook will use the prediction results thus created." 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "from analysis_config_loader import ConfigFile\n", 24 | "import os\n", 25 | "%matplotlib inline\n", 26 | "import matplotlib.pyplot as plt\n", 27 | "from PIL import Image\n", 28 | "from collections import defaultdict\n", 29 | "from sklearn.metrics import confusion_matrix\n", 30 | "import pandas as pd\n", 31 | "import numpy as np\n", 32 | "\n", 33 | "output_model_name = '' # <-- fill in this value with your desired model's name!\n", 34 | "config_filename = '../settings.cfg'\n", 35 | "\n", 36 | "assert output_model_name != '', \\\n", 37 | " 'You must fill in the output_model_name field with the name of a model you trained ' + \\\n", 38 | " 'and applied to the operationalization dataset using batch_score_spark.py'\n", 39 | "assert os.path.exists(config_filename), \\\n", 40 | " 'Could not find configuration file'.format(config_filename)\n", 41 | " \n", 42 | "config = ConfigFile(config_filename, output_model_name)" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": {}, 48 | "source": [ 49 | "## Test set analysis" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "### Overall and class-specific model performance metrics" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 3, 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "name": "stdout", 66 | "output_type": "stream", 67 | "text": [ 68 | "Overall accuracy: 0.773\n" 69 | ] 70 | } 71 | ], 72 | "source": [ 73 | "n_correct_label = len(config.test_df.loc[config.test_df['label'] == config.test_df['pred_label']].index)\n", 74 | "n_total = len(config.test_df.index)\n", 75 | "\n", 76 | "print('Overall accuracy: {:.3f}'.format(n_correct_label / n_total))" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "We can also calculate precision, recall, and accuracy for each label:" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 4, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "Metrics for Barren vs. not-Barren classification\n", 96 | "- Recall: 0.127\n", 97 | "- Precision: 0.935\n", 98 | "- Accuracy: 0.922\n", 99 | "Metrics for Cultivated vs. not-Cultivated classification\n", 100 | "- Recall: 0.161\n", 101 | "- Precision: 0.761\n", 102 | "- Accuracy: 0.945\n", 103 | "Metrics for Developed vs. not-Developed classification\n", 104 | "- Recall: 0.202\n", 105 | "- Precision: 0.955\n", 106 | "- Accuracy: 0.977\n", 107 | "Metrics for Forest vs. not-Forest classification\n", 108 | "- Recall: 0.192\n", 109 | "- Precision: 0.776\n", 110 | "- Accuracy: 0.943\n", 111 | "Metrics for Herbaceous vs. not-Herbaceous classification\n", 112 | "- Recall: 0.091\n", 113 | "- Precision: 0.721\n", 114 | "- Accuracy: 0.901\n", 115 | "Metrics for Shrub vs. not-Shrub classification\n", 116 | "- Recall: 0.164\n", 117 | "- Precision: 0.576\n", 118 | "- Accuracy: 0.859\n" 119 | ] 120 | } 121 | ], 122 | "source": [ 123 | "labels = np.sort(config.test_df['label'].unique()).tolist()\n", 124 | "\n", 125 | "for label in labels:\n", 126 | " print('Metrics for {0} vs. not-{0} classification'.format(label))\n", 127 | " \n", 128 | " n_true_pos = len(config.test_df.loc[(config.test_df['label'] == label) & \n", 129 | " (config.test_df['pred_label'] == label)].index)\n", 130 | " n_true_neg = len(config.test_df.loc[(config.test_df['label'] != label) & \n", 131 | " (config.test_df['pred_label'] != label)].index)\n", 132 | " n_false_pos = len(config.test_df.loc[(config.test_df['label'] != label) & \n", 133 | " (config.test_df['pred_label'] == label)].index)\n", 134 | " n_false_neg = len(config.test_df.loc[(config.test_df['label'] == label) & \n", 135 | " (config.test_df['pred_label'] != label)].index)\n", 136 | " n_predicted_pos = n_true_pos + n_false_pos\n", 137 | " n_labeled_pos = n_true_neg + n_false_neg\n", 138 | " n_correct = n_true_pos + n_true_neg\n", 139 | " \n", 140 | " if n_labeled_pos == 0:\n", 141 | " print('- Recall: Undefined (no images have this true label)')\n", 142 | " else:\n", 143 | " print('- Recall: {:0.3f}'.format(n_true_pos / n_labeled_pos))\n", 144 | " if n_predicted_pos == 0:\n", 145 | " print('- Precision: Undefined (No images predicted to have this label)')\n", 146 | " else:\n", 147 | " print('- Precision: {:0.3f}'.format(n_true_pos / n_predicted_pos))\n", 148 | " print('- Accuracy: {:0.3f}'.format(n_correct / n_total))" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "### Confusion matrix construction\n", 156 | "\n", 157 | "We now construct a confusion matrix to check which types of classification errors are most common:" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 5, 163 | "metadata": {}, 164 | "outputs": [ 165 | { 166 | "data": { 167 | "text/plain": [ 168 | "" 169 | ] 170 | }, 171 | "execution_count": 5, 172 | "metadata": {}, 173 | "output_type": "execute_result" 174 | }, 175 | { 176 | "data": { 177 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAmcAAAJRCAYAAAAAkRChAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzs3XmYZGV59/Hvb4ZVQBBBRAUhcYtRQUAiiggqLtFETIhE\nTQQ1wbgvMXFJIoomahJNiAsJagB33Ig7iwgqJCowjiAq4MYrArKoyDoww/3+cU5DTdPT0zN09Tmn\n+vvxqqvrLPXU3eXQfff9bKkqJEmS1A9Lug5AkiRJtzE5kyRJ6hGTM0mSpB4xOZMkSeoRkzNJkqQe\nMTmTJEnqEZMzSZKkHjE5kyRJ6hGTM0mSpB7ZoOsAJEmS1tfSO9+7auUNC/JedcMVJ1bVE8f9PiZn\nkiRpsGrlDWx8/6cvyHvduPzd2yzE+9itKUmS1CNWziRJ0oAFMlm1psn6biRJkgbOypkkSRquAEnX\nUcwrK2eSJEk9YuVMkiQNm2POJEmSNC5WziRJ0rA55kySJEnjYnImSZLUI3ZrSpKkAXMRWkmSJI2R\nlTNJkjRsTgiQJEnSuFg5kyRJwxUccyZJkqTxsXImSZIGLI45kyRJ0vhYOZMkScPmmDNJkiSNi5Uz\nSZI0bI45kyRJ0rhYOZMkSQPm3pqSJEkaIytnkiRpuIJjziRJkjQ+JmeSJEk9YremJEkaNicESJIk\naVxMziRJ0oC1S2ksxGMu0SRbJflkkh8k+X6SvZJsneTkJBe2X+8yWxsmZ5IkSfPnCOCEqnoAsAvw\nfeA1wClVdV/glPZ4jUzOJC2YJJsm+VySq5N84g6086wkJ81nbF1I8qUkB3cdhzR4S7Iwj7VIsiWw\nD/B+gKq6qap+DTwVOLa97VjggFm/nTv0YUiaSEmemeSsJNcmubRNIvaeh6YPBLYD7lpVf7K+jVTV\nh6vq8fMQz2qS7Jukkhw/7fwu7fnT5tjOG5J8aG33VdWTqurYtd0nqTe2aX82Tj0OnXZ9Z+AK4Ogk\n307yviSbAdtV1aXtPZfR/BxcI2drSlpNklfSlNz/CjgRuAl4Is1ffqffwebvDVxQVSvvYDvjdAWw\nV5K7VtVV7bmDgQvm6w2SBEhV3TJfbUqLVljI2ZpXVtUes1zfANgNeElVfTPJEUzrwqyqSlKzvYmV\nM0m3akvyhwMvqqpPV9V1VXVzVX2uqv6mvWfjJP+e5JL28e9JNm6v7Zvk4iR/neTytur2nPbaG4HX\nAwe1FbnnTa8wJdmprVBt0B4fkuTHSa5J8pMkzxo5f/rI6x6R5My2u/TMJI8YuXZakjclOaNt56Qk\n28zyMdwE/A/wp+3rlwIHAR+e9lkdkeRnSX6T5Owkj2rPPxF43cj3+Z2ROP4xyRnA9cBvtef+or1+\nZJJPjbT/tiSntImcpGG4GLi4qr7ZHn+SJln7RZLtAdqvl8/WiMmZpFF7AZsAx89yz98BDwd2pRns\nuifw9yPX7w5sCdwTeB7w7iR3qarDgH8Cjquqzavq/bMF0nYF/AfwpKraAngEsHyG+7YGvtDee1fg\nHcAXktx15LZnAs8B7gZsBLxqtvcGPgA8u33+BOC7wCXT7jmT5jPYGvgI8Ikkm1TVCdO+z11GXvPn\nwKHAFsBF09r7a+DBbeL5KJrP7uCqmvUvbEk02zctxGMtquoy4GdJ7t+eeizwPeCzNBV42q+fma0d\nkzNJo+5KU7afrdvxWcDhVXV5VV0BvJEm6Zhyc3v95qr6InAtcP8Z2pmLW4AHJdm0qi6tqvNmuOfJ\nwIVV9cGqWllVHwV+APzByD1HV9UFVXUD8HGapGqNqup/ga3bH7DPpknWpt/zoaq6qn3PtwMbs/bv\n85iqOq99zc3T2rue5nN8B/Ahmm6Ri9fSnqT+eQnw4STn0Pys+SfgrcD+SS4EHtcer5FjziSNuopm\nwOsGsyRo92D1qs9F7blb25j22uuBzdc1kKq6LslBNFWu97fdgX9dVT9YSzxTMd1z5Piy9Yjng8CL\ngf2A59JU326V5FU01a17AAXcGZituxTgZ7NdbMeo/JimwvfxOcQoaWqds56oquXATOPSHjvXNvrz\n3Ujqg/8DVjD7NO9LaAb2T9mR23f5zdV1wJ1Gju8+erGqTqyq/YHtaaph751DPFMx/Xw9Y5ryQeCF\nwBfbqtat2m7HvwWeDtylqrYCrqYZmgxNsjaTWbsok7yIpgJ3Sdu+pEXI5EzSrarqappB++9OckCS\nOyXZMMmTkvxze9tHgb9Psm07sP71NN1w62M5sE+SHdvJCK+dupBkuyRPbceeraDpHp1pduMXgfu1\ny39s0FbbHgh8fj1jAqCqfgI8mmaM3XRbACtpZnZukOT1NJWzKb8Adkrm/ud8kvsBbwb+jKZ782+T\nzNr9KqnVkzFn88XkTNJq2vFTr6QZ5H8FTVfci2lmMEKTQJwFnAOcCyxrz63Pe50MHNe2dTarJ1RL\n2jguAX5Jkyi9YIY2rgKeQjOg/iqaitNTqurK9YlpWtunV9VMVcETgRNolte4CLiR1bsspxbYvSrJ\nsrW9Tzs79UPA26rqO1V1Ic2Mzw9OzYSVtHjEiUCSJGmoltz5XrXxw1+2IO9148l/e/Za1jmbF1bO\nJEmSesTkTJIkqUdcSkOSJA3XAg/WXwhWziRJknrEypkkSRq2Hi1COx9MzsZsg822rI23uvvab1zk\nHnD3LboOQVq0bnHS/pz88oabug5hEK669GKu/fUvJ6ufcYGZnI3ZxlvdnQe+8L+6DqP3vv7qfbsO\nQVq0Vty8qusQBuGjy2fdfUuttzz3Dxf+TR1zJkmSpHGxciZJkgasXxufz4fJ+m4kSZIGzsqZJEka\nNsecSZIkaVysnEmSpOEKjjmTJEnS+Fg5kyRJA+ZsTUmSJI2RlTNJkjRsztaUJEnSuJicSZIk9Yjd\nmpIkadicECBJkqRxsXImSZKGzQkBkiRJGhcrZ5IkabjiIrSSJEkaIytnkiRp2BxzJkmSpHGxciZJ\nkgYtVs4kSZI0LlbOJEnSYAUrZ5IkSRojK2eSJGm40j4miJUzSZKkHrFyJkmSBiyOOZMkSdL4WDmT\nJEmDZuVMkiRJY2NyJkmS1CN2a0qSpEGzW7NnkqxKsjzJd5IsS/KIrmOSJElaX5NQObuhqnYFSPIE\n4C3Ao+fywjSpdqrqlpFzS6tq1VgilSRJ887KWb/dGfgVQJLNk5zSVtPOTfLU9vxOSb6f5D3AMmCH\nJNcmOTzJN4G9kuye5KtJzk5yYpLt29eeluRtSb6V5IIkj+rqG5UkSZNpEipnmyZZDmwCbA88pj1/\nI/C0qvpNkm2AbyT5bHvt/sBzquqFAEk2A75bVa9PsiHwVeCpVXVFkoOAfwSe2752g6raM8nvA4cB\nj5seUJJDgUMBNtpyuzF8y5IkCZjI7ZsmITkb7dbcC/hAkgfR/F/1T0n2AW4B7glMZUoXVdU3RtpY\nBXyqfX5/4EHAyW2ZdClw6ci9n26/ng3sNFNAVXUUcBTAZve8f92Rb06SJC0uk5Cc3aqq/q+tkm0L\n/H77dfequjnJT2mqawDXTXvpjSPjzAKcV1V7reFtVrRfVzFhn58kSUMTt2/qtyQPoKl0XQVsCVze\nJmb7AfeeYzPnA9u2VTiSbJjkd8cSsCRJ0jSTUPmZGnMGTdXr4KpaleTDwOeSnAUsB34wl8aq6qYk\nBwL/kWRLms/o34HzxhC7JEm6gyatcjb45Kyqlq7h/JXAmromHzTt3s2nHS8H9pmhzX2ntb/TukUr\nSZI0u8EnZ5IkaXGbtMrZRI05kyRJGjorZ5IkadCsnEmSJGlsrJxJkqThmsAdAqycSZIk9YjJmSRJ\nUo/YrSlJkgbNCQGSJEkaGytnkiRpsNz4XJIkSWNl5UySJA2alTNJkiSNjZUzSZI0bJNVOLNyJkmS\n1CdWziRJ0nDFMWeSJEkaIytnkiRp0KycSZIkaWysnEmSpEGzciZJkqSxsXImSZIGy701JUmSNFYm\nZ5IkST1it6YkSRq2HvVqJvkpcA2wClhZVXsk2Ro4DtgJ+Cnw9Kr61ZrasHImSZI0v/arql2rao/2\n+DXAKVV1X+CU9niNrJxJkqThGsb2TU8F9m2fHwucBrx6TTdbOZMkSZqbbZKcNfI4dIZ7Cjgpydkj\n17erqkvb55cB2832JlbOJEnSoC1g5ezKka7KNdm7qn6e5G7AyUl+MHqxqipJzdaAlTNJkqR5UlU/\nb79eDhwP7An8Isn2AO3Xy2drw8rZmD3g7lvw9Vfv23UYvbfTCz7ZdQiD8dMjD+w6BE2YG25a1XUI\ng3DIw3bqOoRBeO+dNlrw9+zLmLMkmwFLquqa9vnjgcOBzwIHA29tv35mtnZMziRJkubHdsDxbbK4\nAfCRqjohyZnAx5M8D7gIePpsjZicSZKkYetH4Yyq+jGwywznrwIeO9d2HHMmSZLUI1bOJEnSoPVl\nzNl8sXImSZLUI1bOJEnSYCWxciZJkqTxsXImSZIGzcqZJEmSxsbkTJIkqUfs1pQkSYNmt6YkSZLG\nxsqZJEkatskqnFk5kyRJ6hMrZ5IkadAccyZJkqSxsXImSZKGK1bOJEmSNEZWziRJ0mAFmLDCmZUz\nSZKkPrFyJkmSBiyOOZMkSdL4WDmTJEmDNmGFMytnkiRJfWLlTJIkDZpjziRJkjQ2JmeSJEk9Yrem\nJEkarjghQJIkSWNk5UySJA1WgCVLJqt0ZuVMkiSpR6ycSZKkQXPMmSRJksbGypkkSRo0F6GdR0nu\nnuRjSX6U5HtJvpjkfrPcf1qSPdrnr5t27X/XM4adkjxzPV53TJID1+c9JUmS1qSz5CxNmns8cFpV\n/XZVPRB4HbDdHJtYLTmrqkesZyg7AeucnEmSpB5o1zlbiMdC6bJyth9wc1X959SJqloOLE3y+alz\nSd6V5JDRFyZ5K7BpkuVJPtyeu7b9+rEkTx6595gkB7YVsq8nWdY+ppK5twKPatt6RZKlSf4lyZlJ\nzkny/LadtLF8L8kXgLuN5VORJEmLWpdjzh4EnL0+L6yq1yR5cVXtOsPl44CnA19IshHwWOAFNEuh\n7F9VNya5L/BRYA/gNcCrquopAEkOBa6uqocl2Rg4I8lJwEOB+wMPpqnufQ/47/WJX5IkzY8weWPO\nJnFCwJeAI9rE6onA16rqhiRbAu9KsiuwCljT2LbHAw8ZGU+2JXBfYB/go1W1CrgkyVfWFECb4B0K\nsMOOO87H9yRJkhaJLpOz84CZBtSvZPXu1k3WpdG2MnYa8ATgIOBj7aVXAL8Admnbv3ENTQR4SVWd\nuNrJ5PfXIYajgKMAdt99j1qX+CVJ0rrIxFXOuhxz9hVg47bKBECShwFLgQcm2TjJVjTdkjO5OcmG\na7h2HPAc4FHACe25LYFLq+oW4M/b9wG4Bthi5LUnAi+YajvJ/ZJsBnwNOKgdk7Y9zZg5SZKkedVZ\nclZVBTwNeFy7lMZ5wBuAS4CPA+cAHwS+vYYmjgLOmZoQMM1JwKOBL1fVTe259wAHJ/kGTZfmde35\nc4BVSb6T5BXA+2jGky1L8l3gv2gqjMcDFwLnAkcCX13f712SJM2fSZut2emYs6q6hGbw/nR/2z6m\n37/vyPNXA68eOd585PnNwNbTXnsh8JCRU68dufcx097qdUxbqqP14pm/E0mSpPnh9k2SJEk9Momz\nNSVJ0iLihABJkiSNjZUzSZI0XAs8WH8hWDmTJEnqEStnkiRpsCZx+yYrZ5IkST1i5UySJA3ahBXO\nrJxJkiT1iZUzSZI0aI45kyRJ0thYOZMkSYM2YYUzK2eSJEl9YuVMkiQNVxxzJkmSpDGyciZJkgar\n2SGg6yjml5UzSZKkHjE5kyRJ6hG7NSVJ0oDFCQGSJEkaHytnkiRp0CascGblTJIkqU+snEmSpEFz\nzJkkSZLGxsqZJEkarjjmTJIkSWNk5UySJA1Ws33TZJXOrJxJkiT1iJUzSZI0aFbOJEmSNDZWziRJ\n0qBNWOHM5GzcCli56pauw+i9H77rj7oOYTDu8rAXdx3CIFz2v0d0HcJgbL6Jvwrmwp/lc1NdBzAB\n/C9SkiQNmmPOJEmSNDYmZ5IkST1iciZJkoar3b5pIR5zCidZmuTbST7fHu+c5JtJfpjkuCQbra0N\nkzNJkqT58zLg+yPHbwP+raruA/wKeN7aGjA5kyRJgxVCsjCPtcaS3At4MvC+9jjAY4BPtrccCxyw\ntnZMziRJkuZmmyRnjTwOnXb934G/BabWXbkr8OuqWtkeXwzcc21v4lIakiRp0BZwJY0rq2qPmWPI\nU4DLq+rsJPvekTcxOZMkSbrjHgn8YZLfBzYB7gwcAWyVZIO2enYv4Odra8huTUmSNGhLkgV5zKaq\nXltV96qqnYA/Bb5SVc8CTgUObG87GPjMWr+fO/ZxSJIkaRavBl6Z5Ic0Y9Dev7YX2K0pSZIGrW+7\nN1XVacBp7fMfA3uuy+utnEmSJPWIlTNJkjRYzer9PSud3UFWziRJknrEypkkSRq0JZNVOLNyJkmS\n1CdWziRJ0qA55kySJEljY3ImSZLUI3ZrSpKkQZuwXk0rZ5IkSX1i5UySJA1WgDBZpTMrZ5IkST1i\n5UySJA2ai9BKkiRpbKycSZKk4UpchFaSJEnjY+VMkiQN2oQVzqycSZIk9YmVM0mSNFgBlkxY6czK\nmSRJUo9YOZMkSYM2YYWzha+cJVmVZHmS85J8J8lfJ5nXOJLslOS789nmDO9xSJJ3jfM9JEnS4tNF\n5eyGqtoVIMndgI8AdwYO6yAWSZI0cK5zNo+q6nLgUODFaSxN8i9JzkxyTpLnAyT5WJInT70uyTFJ\nDlzT/aOSbJLk6CTnJvl2kv3a84ck+UySE5Kcn+Swkdf8WZJvtRW+/0qytD3/nCQXJPkq8MgxfzyS\nJGkR6nzMWVX9uE1+7gY8Fbi6qh6WZGPgjCQnAccBTwe+kGQj4LHAC4DnreH+GnmLFzVvUw9O8gDg\npCT3a6/tCTwIuB44M8kXgOuAg4BHVtXNSd4DPCvJycAbgd2Bq4FTgW+P7YORJEmLUufJ2TSPBx6S\n5MD2eEvgvsCXgCPaBOyJwNeq6oYka7r/gpE29wbeCVBVP0hyETCVnJ1cVVcBJPl0e+9KmgTszLZM\nuilwOfB7wGlVdUV7/3Ej7awmyaE0FUF22GHH9f80JEnSrJLJmxDQeXKW5LeAVTQJUICXVNWJM9x3\nGvAEmqrWx6ZOz3R/kp3m+PY1w3GAY6vqtdPaPGCObVJVRwFHAey2+x7T30OSJGmNOh1zlmRb4D+B\nd1VVAScCL0iyYXv9fkk2a28/DngO8CjghPbcbPdP+TrwrKnrwI7A+e21/ZNsnWRT4ADgDOAU4MB2\nsgLt9XsD3wQeneSu7fv9yXx+FpIkaf0sSRbksVC6qJxtmmQ5sCFNF+IHgXe0194H7AQsS9OneAVN\n0gRwUnvvZ6rqpjncP+U9wJFJzm3f75CqWtF2WZ7etnkf4CNVdRZAkr+nGZu2BLgZeFFVfSPJG4D/\nAy4FlgFL5+MDkSRJmrLgyVlVrTGhqapbgNe1j+nXbga2nuP9V9MM9KeqbqSpuM3k8qp68QzvdRxN\npW76+aOBo9cUvyRJWngTNuTM7ZskSZL6pPMJAV2pqmOAYzoOQ5Ik3UEuQitJkqSxWbSVM0mSNHwB\nlkxW4czKmSRJUp9YOZMkScOVOOZMkiRJ42PlTJIkDdqEFc7WnJwlOZ7b7z15q6r6o7FEJEmStIjN\nVjl714JFIUmStJ4mbczZGpOzqjpl6nmSjYAdq+qHCxKVJEnSIrXWCQFJngycC5zcHu/adnlKkiR1\namqds4V4LJS5zNY8HPg94NcAVbUcuM84g5IkSVqs5pKc3VxVv552bo0TBSRJkrT+5rKUxveTPB1Y\nkmRn4KXAN8YbliRJ0txM2oSAuVTOXgzsDtwCHA/cBLx8nEFJkiQtVmutnFXVdcCrk7yxOawbxh+W\nJEnS3ExW3WxuszV3S/Jt4ALgwiRnJ9lt/KFJkiQtPnMZc3Y08PKqOhUgyb7tuV3GGJckSdJaJbBk\nEY45u24qMQOoqtOAa8cWkSRJ0iI2296aD2mffjPJu4GP0iyhcRBw6ppeJ0mStJAmrHA2a7fmu6cd\nP2TkueucSZIkjcFse2s+aiEDkSRJWh+Tts7ZXCYEkOQJwO8Cm0ydq6p/GldQkiRJi9Vak7Mk7wG2\nAvahmaX5x7hDgCRJ6okJK5zNabbm3lX1TOCqqvoHmk3Q7zXesCRJkhanuXRrTu0IcGOSuwNXATuN\nLSJJkqQ5Cpm4dc7mkpx9KclWwL8Cy4FVwLFjjUqSJGmRmsvemm9on34iyeeBTYGdxxmUJEnSnGTy\nxpzNabbmlHbT8xuSLAd2HE9IkiRJi9dcJgTMZMJyVEmSpH5Yp8rZCHcImKMAGyxd3xx48bh+xcqu\nQxiMX535rq5DGIS7PPFtXYcwGL864dVdhzAIK25e1XUIg1AdZAiLZhHaJMczcxIW4K5ji0iSJGkR\nm61yNtuf5/7pLkmSemHS+qdm21vzlIUMRJIkSes/5kySJKlzYfLGnE1aJVCSJGnQ5lw5S7JxVa0Y\nZzCSJEnraslkFc7WXjlLsmeSc4EL2+Ndkrxz7JFJkiQtQnOpnP0H8BTgfwCq6jtJ9htrVJIkSXO0\n6CpnwJKqumjaOVfikyRJGoO5VM5+lmRPoJIsBV4CXDDesCRJktYuWZyzNV8AvJJmo/NfAA9vz0mS\nJGmerbVyVlWXA3+6ALFIkiSts0kbc7bW5CzJe5lhj82qOnQsEUmSJC1icxlz9uWR55sATwN+Np5w\nJEmS1k1fhpwl2QT4GrAxTY71yao6LMnOwMeAuwJnA39eVTetqZ25dGseN+2NPwicfAdilyRJmkQr\ngMdU1bVJNgROT/IlmrH7/1ZVH0vyn8DzgCPX1Mj6bN+0M3Dv9YlYkiRpUlXj2vZww/ZRwGOAT7bn\njwUOmK2duYw5+xW3jTlbAvwSeM16xCxJkjSvAizpS78m0C47djZwH+DdwI+AX1fVyvaWi4F7ztbG\nrMlZmoVDdgF+3p66papuNzlAkiRpEdgmyVkjx0dV1VGjN1TVKmDXJFsBxwMPWNc3mTU5q6pKcnxV\n7b6uDUuSJC2E9RmjtZ6urKo95nJjVf06yanAXsBWSTZoq2f34rai14zm8v18K8lD5xKIJEnSYpVk\n27ZiRpJNgf2B7wOnAge2tx0MfGa2dtZYORvJ8PYG/jLJj4DraLp3q6p2u8PfhSRJ0h3UoyFn2wPH\ntuPOlgAfr6rPJ/ke8LEkbwa+Dbx/tkZm69b8FrAba5lRIEmSJKiqc4Db9TZW1Y+BPefazmzJWdoG\nf7TO0UmSJC2AJL2arTkfZkvOtk3yyjVdrKp3jCEeSZKkRW225GwpsDltBU2SJKmPJqxwNmtydmlV\nHb5gkUiSJGntY84kSZL6bMmEZSyzrXP22AWLQpIkScAslbOq+uVCBiJJkrSu+ra35nxYwB0PJEmS\ntDaz7q0pSZLUdxNWOLNyJkmS1CcmZ5IkST1it6YkSRquLK6lNAYlyaoky0ceO43xvV6e5E7jal+S\nJC1ek1Q5u6Gqdl3XFyXZoKpWruPLXg58CLh+Xd9PkiTNr0zYuvmTlJzdTpJNgCOBPYCVwCur6tQk\nhwBPBjYBNgMek+RvgKcDGwPHV9VhSTYDPg7ci2av0TcB2wH3AE5NcmVV7bfA35YkSZpgk5ScbZpk\nefv8J1X1NOBFQFXVg5M8ADgpyf3ae/YCHlJVv0zyeOC+wJ4069l9Nsk+wLbAJVX1ZIAkW1bV1Ule\nCexXVVfOFEiSQ4FDAXbYccfxfLeSJKldhLbrKObXxIw5o+3WbB9Pa8/tTdP9SFX9ALgImErOTh7Z\nBeHx7ePbwDLgATTJ2rnA/kneluRRVXX1XAKpqqOqao+q2mPbbbadl29OkiQtDpNUOVtX1408D/CW\nqvqv6Tcl2Q34feAtSU6qqsMXKkBJkrR2Vs6G5evAswDa7swdgfNnuO9E4LlJNm/vvWeSuyW5B3B9\nVX0I+Fdgt/b+a4Atxh28JElafCa9cvYe4Mgk59JMCDikqlZk2j4PVXVSkt8B/q+9di3wZ8B9gH9J\ncgtwM/CC9iVHASckucQJAZIkdWv67/Whm5jkrKo2n+HcjcBzZjh/DHDMtHNHAEdMu/VHNFW16a9/\nJ/DO9Y9WkiRpZhOTnEmSpMXH2ZqSJEkaKytnkiRpuAITNuTMypkkSVKfWDmTJEmDtmTCSmdWziRJ\nknrE5EySJKlH7NaUJEmD5VIakiRJGisrZ5IkadAmbD6AlTNJkqQ+sXImSZIGLCxhskpnVs4kSZJ6\nxMqZJEkarOCYM0mSJI2RlTNJkjRccZ0zSZIkjZGVM0mSNGhufC5JkqSxsXImSZIGy9makiRJGisr\nZ5IkadAccyZJkqSxMTmTJEnqEbs1JUnSoE1Yr6aVM0mSpD6xciZJkgYrTF6ladK+H0mSpEGzciZJ\nkoYrkAkbdGblTJIkqUesnEmSpEGbrLqZydnY3VKw4uZVXYfRexf/8oauQxiMe29zp65DGITLPveq\nrkMYjNd+8QddhzAIL9nr3l2HMAgrb7ml6xAGz+RMkiQNVnD7JkmSJI2RlTNJkjRok1U3s3ImSZLU\nK1bOJEnSoE3YkDMrZ5IkSX1i5UySJA1Y3CFAkiRJ42NyJkmS1CN2a0qSpMEKk1dpmrTvR5IkadCs\nnEmSpEFzQoAkSZLGxsqZJEkatMmqm1k5kyRJ6hUrZ5IkabjimDNJkiSNkZUzSZI0WK5zJkmSpLGy\nciZJkgbNMWeSJEkaGytnkiRp0CarbmblTJIkqVdMziRJ0qAlC/NYexzZIcmpSb6X5LwkL2vPb53k\n5CQXtl/vMls7JmeSJEnzYyXw11X1QODhwIuSPBB4DXBKVd0XOKU9XiOTM0mSpHlQVZdW1bL2+TXA\n94F7Ak8Fjm1vOxY4YLZ2nBAgSZIGq1mEtn9TApLsBDwU+CawXVVd2l66DNhutteanEmSJM3NNknO\nGjk+qqqOmn5Tks2BTwEvr6rfjK7DVlWVpGZ7E5MzSZI0aAu4Bu2VVbXHbDck2ZAmMftwVX26Pf2L\nJNtX1aWYejF8AAAcUklEQVRJtgcun60Nx5xJkiTNgzQlsvcD36+qd4xc+ixwcPv8YOAzs7Vj5UyS\nJA1YSH/GnD0S+HPg3CTL23OvA94KfDzJ84CLgKfP1ojJmSRJ0jyoqtNZ84YFj51rOyZnkiRp0CZs\n3/PxjTlLcu2040OSvGsd21jn10iSJA1ZbytnSXobmyRJ6oe+rnN2R3QyWzPJtkk+leTM9vHI9vwb\nkhyV5CTgA+3tOyQ5Icn5SQ4baeN/kpzd7l116Mj5JyZZluQ7SU5pz22W5L+TfCvJt5M8tT2/SZKj\nk5zbnt+vPb9axS7J55Psm2RpkmOSfLd9zSvG/2lJkqTFZJzVqU1HZioAbE0zlRTgCODfqur0JDsC\nJwK/017bHdi7qm5IcgiwJ/Ag4HrgzCRfqKqzgOdW1S+TbNqe/xRNsvleYJ+q+kmSrds2/w74SlU9\nN8lWwLeSfBn4K5r14B6c5AHASUnuN8v3tCtwz6p6EEDbliRJ6socNyUfknEmZzdU1a5TB22iNbVw\n2+OAB46smHvndjVdgM9W1Q0j7ZxcVVe1bXwa2Bs4C3hpkqe19+wA3BfYFvhaVf0EoKp+2V5/PPCH\nSV7VHm8C7Ni29c723h8kuQiYLTn7MfBbSd4JfAE4aaab2kreoQA77LDjLM1JkiStrqtxXUuAh1fV\njaMn22Ttumn3Tt/ioJLsS5Pg7VVV1yc5jSbhWpMAf1xV58/wfjNZyepdvpsAVNWvkuwCPAF4Ec06\nJc+d/uJ2K4ejAB662x6zbtEgSZLumEmrnHW1Q8BJwEumDpLsOsu9+yfZuu2+PAA4A9gS+FWbmD0A\neHh77zeAfZLs3LY71a15IvCSduVekjy0Pf914FntufvRVNPOB34K7JpkSZIdaLpWSbINsKSqPgX8\nA7Db+n8EkiRJt9dV5eylwLuTnNPG8DWa8V8zOR34IHAf4CNVdVaSc4G/al9/Pk1SRlVd0XYpfjrJ\nEpq9q/YH3gT8O3BOe/4nwFOA9wBHtu2tBA6pqhVJzmjvORf4LrCsjeWewNFtGwCvnZ+PQ5Ikra8e\n7RAwL8aWnFXV5tOOjwGOaZ9fCRw0w2vesKbXTDu/AnjSGt73S8CXpp27AXj+DPfeCDxnhvNFW1Gb\ngdUySZI0Nm58LkmS1CMu9CpJkgYrwJLJ6tW0ciZJktQnVs4kSdKgTdqEACtnkiRJPWLlTJIkDZqL\n0EqSJGlsrJxJkqRBc8yZJEmSxsbKmSRJGizXOZMkSdJYWTmTJEkDFsecSZIkaXysnEmSpOGK65xJ\nkiRpjKycSZKkQZuwwpmVM0mSpD6xciZJkgarWedssmpnVs4kSZJ6xORMkiSpR+zWlCRJgzZZnZpW\nziRJknrFypkkSRq2CSudWTmTJEnqEStnkiRp0Nz4XJIkSWNj5UySJA3ahK1Ba+VMkiSpT6ycSZKk\nQZuwwpmVM0mSpD6xciZJkoZtwkpnJmdjtiSw8YZLuw6j9+63/RZdhzAYK1fd0nUIg3DhZdd2HcJg\nvG6/3+46hEHY/XVf6jqEQbjskt90HcLgmZxJkqTBCq5zJkmSpDGyciZJkoYrrnMmSZKkMTI5kyRJ\n6hG7NSVJ0qBNWK+mlTNJkqQ+sXImSZKGbcJKZ1bOJEmSesTKmSRJGrC4CK0kSZLGx8qZJEkaNBeh\nlSRJ0thYOZMkSYMVJm6yppUzSZKkPrFyJkmShm3CSmdWziRJknrEypkkSRo01zmTJEnS2Fg5kyRJ\ng+Y6Z5IkSRobkzNJkqQesVtTkiQN2oT1alo5kyRJ6hMrZ5IkabgmcP8mK2eSJEk9YuVMkiQNmovQ\nSpIkaWysnEmSpMEKLkIrSZKkMbJyJkmSBm3CCmdWziRJkvrEypkkSRq2CSudWTmTJEnqkYlMzpL8\nXZLzkpyTZHmS30vy0yTb3MF2r52vGCVJ0vzIAv1vrXEk/53k8iTfHTm3dZKTk1zYfr3L2tqZuOQs\nyV7AU4DdquohwOOAn83xtXbzSpKk9XUM8MRp514DnFJV9wVOaY9nNXHJGbA9cGVVrQCoqiur6pL2\n2kuSLEtybpIHACR5Q5KjkpwEfCDJIUneNdVYks8n2Xfk+O1tG6ck2Xbhvi1JkjSTZGEea1NVXwN+\nOe30U4Fj2+fHAgesrZ1JTM5OAnZIckGS9yR59Mi1K6tqN+BI4FUj53cHnlpVz1xL25sBy9o2vgoc\nNtNNSQ5NclaSs6648or1/04kSdLQbVdVl7bPLwO2W9sLJi45q6praZKtQ4ErgOOSHNJe/nT79Wxg\np5GXfbaqbphD87cAx7XPPwTsvYYYjqqqPapqj223sbgmSdKE2Gaq+NI+Dl2XF1dVAbW2+yZyjFVV\nrQJOA05Lci5wcHtpRft1Fat/79eNPF/J6knrJrO91R2LVJIk3VELuJLGlVW1xzq+5hdJtq+qS5Ns\nD1y+thdMXOUsyf2T3Hfk1K7ARevQxE+BXZMsSbIDsOfItSXAge3zZwKn35FYJUnSxPsstxWJDgY+\ns7YXTGLlbHPgnUm2oqmC/ZCmi/Mpc3z9GcBPgHOB7wLLRq5dB/xukrOBq4GD5itoSZK0nnqyCG2S\njwL70nR/XkwzNv2twMeTPI+mWPT0tbUzcclZVZ0NPGKGSzuN3HMWzYdHVb1h2usLeNYa2t68ffoP\ndzxSSZI0SarqGWu49Nh1aWfikjNJkrR4BOa0QOyQTNyYM0mSpCGzciZJkoZrjgvEDomVM0mSpB6x\nciZJkgZtwgpnVs4kSZL6xMqZJEkatgkrnVk5kyRJ6hErZ5IkacDiOmeSJEkaHytnkiRp0FznTJIk\nSWNjciZJktQjdmtKkqTBChO3koaVM0mSpD6xciZJkoZtwkpnVs4kSZJ6xMqZJEkaNBehlSRJ0thY\nOZMkSYPmIrSSJEkaGytnkiRp0CascGblTJIkqU+snEmSpOGKY84kSZI0RlbOJEnSwE1W6czKmSRJ\nUo9YOZMkSYMVHHMmSZKkMTI5kyRJ6hG7NSVJ0qBNWK+mydm4LVt29pWbbpiLuo5jmm2AK7sOYgD8\nnObOz2pu/Jzmxs9p7vr4Wd276wCGzuRszKpq265jmC7JWVW1R9dx9J2f09z5Wc2Nn9Pc+DnNnZ9V\nwwkBkiRJGhsrZ5IkadAyYaPOrJwtTkd1HcBA+DnNnZ/V3Pg5zY2f09z5WU2gVFXXMUiSJK2XXR66\ne5341W8syHttv+VGZy/EGD8rZ5IkST3imDNJkjRokzXizMqZJElSr1g5kyRJg5VM3jpnJmeLRJJH\nAm+gWbl5A5oqcFXVb3UZV18k+aPZrlfVpxcqlr5L8k5gjTOJquqlCxhO7yU5paoeu7Zzi12SlwFH\nA9cA7wMeCrymqk7qNLCeSXJXmp/lj6T57/B04PCquqrLuDS/TM4Wj/cDrwDOBlZ1HEsf/UH79W7A\nI4CvtMf7AacBJme3Oav9+kjggcBx7fGf0Pz7EpBkE+BOwDZJ7sJtw2LuDNyzs8D667lVdUSSJwDb\nAs+hSdZMzlb3MeBrwB+3x8+i+W/wcZ1F1AOTts6ZydnicXVVfanrIPqqqp4DkOTzwAOr6tL2eHvg\n3V3G1jdVdSxAkkOA/arq5vb4P/EX6ajnAy8H7kGTtE799vgN8K6uguqxqc/n94Gjq+o7yaR1Vs2L\nravqTSPHb05yQGfRaCxMzhaPU5P8C00FaMXUyapa1l1IvbTTVGLW+gVwv66C6bl7AFsAv2yPN2/P\nCaiqI4Ajkrykqt7ZdTwDcHaSk4Cdgdcm2QK4peOY+ujUJH8KfLw9PhD4Qofx9MOEpfEmZ4vH77Vf\nRxfPK+AxHcTSZ6clORH4aHt8EHBqh/H02VuBbyc5leZH4z40Y2G0usuSbFFV1yT5e2A34M3+YXQ7\nzwN2BX5cVde3Y6ue03FMvZHkGpqf2QFeCXyovbQEuBY4rKPQNAYmZ4tEVe3XdQxDUFUvTvI0mkQD\n4KiqOr7LmPqqqo5O8iVuS/xfXVWXdRlTT/1DVX0iyd7AE4B/BY7kts9Njb3brw+xN/P2qmqLrmPQ\nwjE5WySSbAf8E3CPqnpSkgcCe1XV+zsOrY+WAddU1ZeT3Gmq6tF1UH3Tjgd6HPBbVXV4kh2T7FlV\n3+o6tp6ZmoDzZODIqvpMkjd0GE9f/c3I802APWnG6lndH5Fkn5nOV9XXFjqWPpm0dN7kbPE4hmbm\n09+1xxfQzPAxORuR5C+BQ4Gtgd+mmVX3n4DLHtzee2jGBD0GOJxmCYRPAQ/rMqge+nmS/wL2B96W\nZGNcAPx2quoPRo+T7AD8c0fh9JlJ7CLgD4jFY5uq+jjtANuqWolLaszkRTRLRPwGoKoupFleQ7f3\ne1X1IuBGgKr6FbBRtyH10tOBE4EnVNWvaRL/v5n9JQIuBh7UdRB9U1V/MPLYn+Yz+kXXcXVtaiHa\ncT8WipWzxeO6doBtASR5OHB1tyH10oqqumlqzEuSDZhlwdVF7uYkS7nt39S2OLvudtrB7ZfTjKm6\nEFjZftWIaYsbL6GZHPCd7iIaDJPYCWRytni8Evgs8NtJzqBZ5PHAbkPqpa8meR2waZL9gRcCn+s4\npr76D+B4YLsk/0jz7+nvuw2pf5IcRjNL+v40Qws2pJlp98gu4+qhs0aerwQ+WlVndBVMX5nEziQu\nQqvhSbKEZmzCo2l+QQQ4f2rxUK3mNTRT+s+lWUT0i1X13m5D6qeq+nCSs7ltPN4BVfX9LmPqqafR\nbEW0DKCqLmnX8NKIqjo2yUbctq7g+V3G02MmsYuAydkiUFW3JHl7Ve0FnNd1PD33knbx0FsTsiQv\na8/p9u4ETHVtbtpxLH11U1VVkqnu3826DqiPkuwLHAv8lOYPyB2SHLzYZyGOaocR7F9Vf9Z1LH0S\nJm/jcycELB4nJfljt0NZq4NnOHfIQgcxBEleT/PLdGtgG+DodpFVre7j7WzNrdrZwF9mJPnXrd4O\nPL6qHl1V+9CsCfdvHcfUK1W1Cti2rTBqglk5WzxeCWwGrExyI80fG1VVd+42rH5I8gzgmcDOST47\ncml0eyKt7hnAQ6vqRoAkb6Xpuntzp1H1TFX9azt+8Tc0wwpeX1UndxxWH21YVbd2ZVbVBUk27DKg\nnvopcEb7c+q6qZNV9Y7OItK8MzlbBNpq2e9W1f/rOpYe+1/gUpoK0NtHzl8DnNNJRP33U5qxjDe2\nxxsDP+osmh5qu6FOrKrHASZkszsryfu4bVuiZ7H6+Co1LmkfS2j+eNQEMjlbBNrxLscDu3cdS19V\n1UXARcBeXccyICuA85KcTDPmbH/g9CT/AVBVL+0yuD6oqlVJrk+yZVW5dM3sXkCzzuDUv5uv0yx0\nrBFV9cauY+ijSRuwY3K2eHwjycOq6syuA+mzdv23dwK/Q7Og6lLgOrt/Z3R8+5hyWkdx9N2NwLlt\nEjvaDbXok9dRVbUiyZHAF0a7N7W6JPcDXgXsxMjv8Kpyh4AJYnK2eOwHPD/JRTS/IKbGnD2k27B6\n513AnwKfoFmb6tnAfTqNqKdmWvrA5Vlm9IX2oVkk+UPgX2j+KNo5ya7A4VX1h91G1jufoNlS7n24\ny8utXOdMQ/WkrgMYiqr6YZKl7cyoo5P8b9cx9ZFLH8yNSeycHUazT+RpAFW1PMnOnUbUTyur6siu\ng9B4mZwtEu2YKpLcjWYQt2Z2ffuLdHmSf6aZJOC6VDObWvrgfLi1u+WjOLZxNSaxc3ZzVV09bbUf\nt05rJdm6ffq5JC+kGVKwYup6VS3eWeULvO/lQjA5WyTaLoO3A/cALgfuDXwf+N0u4+qhP6eZBfVi\n4BXADsAfdxpRf7n0wdyYxM7NeUmeCSxNcl+aiQFWrW9zNk2yOpWGvGra9d9a2HA0Ti5Cu3i8CXg4\ncEFV7Uyz5Y5bftze7jRj8X5TVW+sqldW1Q+7DqqnzkryviT7to/34tIHM7ldEkuzv6ZW9xKaPxZX\nAB8BrgZe3mlE/XIQ8Miq2rn9Gf5G4LvA52nGx2qCmJwtHjdX1VXAkiRLqupUmg1ztbo/AC5I8sEk\nT0lidXnNXgB8j6bC8dL2+Qs6jaifTGLnoKqur6q/q6qHtY+/n1rgWEAzCWAFQJJ9gLfQdJdfDRzV\nYVydywI+Foq/eBaPXyfZHPga8OEkl9NsmqsRVfWctmvuSTQr4L87yclV9Rcdh9Y7VbUCeEf70Jq5\nftcctEuN/ElV/bo9vgvwsap6QreR9cbSkXFlBwFHVdWngE8lWd5hXBoDk7PF46nADTTjqJ4FbAkc\n3mlEPVVVNyf5Erdt5n0AYHLWSnIuswzUdnmWRpIdq+r/mcTO2TZTiRlAVf2qncCkxtIkG1TVSpph\nKYeOXPN3uRMCNERVNbX45S1JvgBcVVXOhJomyZNo/irdl2ZK//uAp3cYUh89pesABuJ/gN0Aknyq\nqpxYMrtbphJagCT3xtmaoz4KfDXJlTR/aH8dIMl9aLo2NUFMziZcu+L9W2k2734T8EGa/SOXJHl2\nVZ3QZXw99GzgOOD5bcVD00wtywK3/gK9b1V9Ocmm+DNl1Ojf8s6kW7u/o9n+66s0n92jWL06tKhV\n1T8mOQXYHjhp5I/rJTSTKRY1F6HV0LwLeB1NN+ZXgCdV1TeSPIDmLzGTsxFV9YyuYxiKJH9J88tz\na+C3gXvRDFp+bJdx9Uit4blmUFUnJNmNZlY5wMur6souY+qbqvrGDOcu6CIWjZfJ2eTboKpOAkhy\n+NR/3FX1g0zaqn13QJLTq2rvJNew+i/SqW2u3Fvz9l5Es6L7NwGq6kLHCK1mlyS/ofk3tGn7HPw3\nNZtVNOswbgI8MAku1qu5mLRfZyZnk++Wkec3TLvmX/Otqtq7/bpF17EMyIqqumkqyW+XHfHfVKuq\nlnYdw5Ak+QvgZTQV2OU0FbT/A9zQW4uO65xNvl2S/KatCD2kfT51/OCug+ubJB+cyzkBzeDk19FU\nhfan2ZD5cx3HpOF6GfAw4KKq2g94KHBFtyFpKFznTIPiX+/rbLXtrNpqkNvszOw1wPOAc4HnA1+k\nmd0qrY8bq+rGJCTZuB16cf+ug5K6YHImAUleSzNxYvrYoJtY5Ktvz+IA4ANV9d6uA9FEuDjJVjRL\nkJyc5FfAJR3HpKGYsDFncakr6TZJ3lJVr+06jiFIcjTNeKCv0Sw/ckK7QKZ0hyR5NM0M8xOq6qau\n41G/7bb7HnX6N85ckPfabKMlZ1fV2PcytXImAe0UfoBPjDy/VVUtW+CQes+trjTf2v/29qaZWHKG\niZnmqk/rnCV5InAEsBR4X1W9dV3bMDmTGm+f5VrhjLEZudWV5kuS1wN/Any6PXV0kk9U1Zs7DEta\nJ0mWAu8G9gcuBs5M8tmq+t66tGNyJgHt7DCtA7e60jx7BvDQqroRIMlbgWWAyZlmFXq1ztmewA+r\n6scAST5Gs7e1yZm0vpI8e6bzVfWBhY5lANzqSvPppzSLz97YHm8M/KizaKT1c0/gZyPHFwO/t66N\nmJxJq3vYyPNNaLYiWgaYnE1TVc9o99Z8FHDr3ppVdU3HoWlAkryTplt8BXBekpPb4/2B07uMTcOw\nbNnZJ266YbZZoLfbJMlZI8dHVdW8z+g3OZNGVNVqGwi3U/uP7SicXnNvTc2TqV90ZwPHj5w/beFD\n0RBV1RO7jmHEz4EdRo7v1Z5bJy6lIc2inY14TlX9Ttex9E2S5bR7a1bVQ9tz51aVO09onbSDqI+t\nqj/rOhbpjmgXLr+A5o/UnwNnAs+sqvPWpR0rZ9KIJJ/jtv0hlwAPBD7eXUS95t6amhdVtSrJtkk2\ncvkMDVlVrUzyYuBEmqU0/ntdEzMwOZMASHIfYDvgX0dOr6T5j2udS9KLxPS9NV+Ie2tq/f0UOCPJ\nZ4Hrpk5W1Ts6i0haD1X1RZrt7Nab3ZoSkOTzwOuq6pxp5/cADquqP+gmsv5KsoRmb83H08xmP5Fm\nwUV/qGidJTlspvNV9caFjkXqmsmZBCT5blU9aA3XHEe1Bkm2BaiqK7qORZMhyZ2q6vqu45C6tKTr\nAKSe2GSWa5suWBQDkMYbklwJnA+cn+SKdoV3ab0k2SvJ94AftMe7JHlPx2FJnTA5kxpntktDrCbJ\nX9BM8ddtXgE8EnhYVW1dVVvTLLL4yCSv6DY0Ddi/A08ArgKoqu8A+3QakdQRuzUlIMl2NGss3cRt\nydgewEbA06rqsq5i65sk3wb2r6orp53flv/f3r2FajrFcRz//hzHYRySQ6TUOExDzTCOI0KauHCM\nciiJkkOGRCkuHC4ouSAp5EYiOSZTYxwGQ0PMyRCSJFxIchzcjL+LZ+28dvY0795j3qf291NvPe96\n1rPWep52u3/r/7xrwdKxZTWkYSR5r6qOSbJ6YGmWtVU1d9Rjk7Y0f60pAVX1HbAgycnA2Ltni6vq\n9REOq6+2HR+YQffeWVsXTpqMr5MsAKr9HV0HfDLiMUkjYXAmDaiqZcCyUY+j5za2DpVrVGmyrgTu\no9ub8FtgKXDNSEckjYhpTUlDSbKBgXWoBk8BM6rK2TNJmgKDM0nSyAxsfP6fqmrRFhyO1AumNSVJ\no/TBwPHtwH8uRitNJ86cSZJ6YfCXmtJ05jpnkqS+cLZAwuBMkiSpV0xrSpJGJsmv/DNjtiMwtq9m\ngKqqXUYyMGmEDM4kSZJ6xLSmJElSjxicSZIk9YjBmaQpSbIhyZokHyV5OsmOU2jrpCQvteMzk9y8\nkbq7Jbl6En3cluTGTS3fSDu/bY5+JWk8gzNJU/VHVc2rqsPo9ta8cvBkOkP/r6mqF6vq7o1U2Q0Y\nOjiTpL4zOJO0OS0HDkxyQJJPkjwIrAL2T7IwyYokq9oM284ASU5L8mmSt4FzxxpKcmmSB9rx3kme\nT7K2fRYAdwOz2qzdPa3eTUneT/JhktsH2rolyWdJXgUOGeaGkryQZGWSj5NcMe7cve1+XkuyZyub\nlWRJu2Z5ktmTeI6SpjGDM0mbRZJtgNOBda3oEOCxtuL7euBW4NSqOoJuy54bkswAHgHOAE4A9pmg\n+fuBN6tqLnAE8DFwM/BFm7W7KclC4CDgaGAeMD/JiUnmAxcAh9MFf0cNeWuXVdV84EhgUZI9WvlO\nwKp2P2/yz7ZDDwPXtmtuBB4csj9J05x7a0qaqh2SrGnHy4FHgX2Br6rq3VZ+LDAHeCcJwHbACmA2\n8GVVfQ6Q5HHgX7NTzSnAJQBVtQH4Ocnu4+osbJ/V7fvOdMHaTOD5qvq99fHikPe3KMk57Xj/1uYP\nwF/AU638ceC5Nhu4AHi63SfA9kP2J2maMziTNFV/VNW8wYIWmKwfLAJeqaoLx9X713VTFOCuqnpo\nXB/XT7rB5CTgVOC4qvo9yRvAjAmqF1024qfxz0OShmFaU9KW8C5wfJIDAZLslORg4FPggCSzWr0L\nJ7j+NeCqdu3WSXYFfqWbFRvzMnDZwLts+yXZC3gLODvJDklm0qVQN9WuwI8tMJtNNwM4ZivgvHZ8\nEfB2Vf0CfJnk/DaGJJk7RH+SZHAm6f9XVd8DlwJPJvmQltKsqj/p0piL2w8CvpqgieuAk5OsA1YC\nc6rqB7o06UdJ7qmqpcATwIpW7xlgZlWtoks/rgGepUu9TuTWJN+MfYAlwDZtzHfSBZlj1gOHJllJ\nl3a9o5VfDFyeZC3du3FnbepzkiRw+yZJkqReceZMkiSpRwzOJEmSesTgTJIkqUcMziRJknrE4EyS\nJKlHDM4kSZJ6xOBMkiSpRwzOJEmSeuRveM9hBVbairIAAAAASUVORK5CYII=\n", 178 | "text/plain": [ 179 | "" 180 | ] 181 | }, 182 | "metadata": {}, 183 | "output_type": "display_data" 184 | } 185 | ], 186 | "source": [ 187 | "cm = confusion_matrix(config.test_df['label'], config.test_df['pred_label'], labels=labels)\n", 188 | "\n", 189 | "plt.figure(figsize=(10,10))\n", 190 | "plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)\n", 191 | "plt.colorbar()\n", 192 | "tick_marks = np.arange(len(labels))\n", 193 | "plt.xticks(tick_marks, labels, rotation=90)\n", 194 | "plt.yticks(tick_marks, labels)\n", 195 | "plt.xlabel('Predicted Label')\n", 196 | "plt.ylabel('True Label')\n", 197 | "plt.title('Confusion Matrix')" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": {}, 203 | "source": [ 204 | "Off-diagonal elements of this matrix constitute errors. If you trained with the full dataset (the default setting of `sample_frac=1.0`), you should see that most elements fall along the diagonal, and off-diagonal elements are near-zero." 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "## Operationalization image set analysis\n", 212 | "\n", 213 | "The true labels for the Middlesex County, MA images are unknown (and in many cases would be undefined, since an image may include land of multiple types), so we will not test their accuracy. However, we will use them to draw a predicted land use map of Middlesex County, MA:\n", 214 | "\n", 215 | "During data loading, we merged the operationalization prediction dataframe with information on each tile's latitude and longitude boundaries:" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 6, 221 | "metadata": {}, 222 | "outputs": [ 223 | { 224 | "data": { 225 | "text/html": [ 226 | "
\n", 227 | "\n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | "
namepred_labelllcrnrlatllcrnrlonurcrnrlaturcrnrlon
0ortho_1-1_hn_s_ma017_2016_1_100240.pngForest42.679883-71.79158642.681898-71.788856
1ortho_1-1_hn_s_ma017_2016_1_100640.pngForest42.681898-71.66601742.683912-71.663287
2ortho_1-1_hn_s_ma017_2016_1_101026.pngForest42.683912-71.57866542.685927-71.575935
3ortho_1-1_hn_s_ma017_2016_1_101498.pngForest42.685927-71.25655242.687941-71.253823
4ortho_1-1_hn_s_ma017_2016_1_101864.pngForest42.687941-71.22379542.689956-71.221065
\n", 287 | "
" 288 | ], 289 | "text/plain": [ 290 | " name pred_label llcrnrlat llcrnrlon \\\n", 291 | "0 ortho_1-1_hn_s_ma017_2016_1_100240.png Forest 42.679883 -71.791586 \n", 292 | "1 ortho_1-1_hn_s_ma017_2016_1_100640.png Forest 42.681898 -71.666017 \n", 293 | "2 ortho_1-1_hn_s_ma017_2016_1_101026.png Forest 42.683912 -71.578665 \n", 294 | "3 ortho_1-1_hn_s_ma017_2016_1_101498.png Forest 42.685927 -71.256552 \n", 295 | "4 ortho_1-1_hn_s_ma017_2016_1_101864.png Forest 42.687941 -71.223795 \n", 296 | "\n", 297 | " urcrnrlat urcrnrlon \n", 298 | "0 42.681898 -71.788856 \n", 299 | "1 42.683912 -71.663287 \n", 300 | "2 42.685927 -71.575935 \n", 301 | "3 42.687941 -71.253823 \n", 302 | "4 42.689956 -71.221065 " 303 | ] 304 | }, 305 | "execution_count": 6, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "config.o16n_df.head()" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "In our county map, each image will be represented by a single pixel. The color of that pixel will be determined by the image's predicted label. We convert the latitude and longitude of each image's lower-righthand corner into an x and y index for a single pixel in the final image:" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 7, 324 | "metadata": { 325 | "collapsed": true 326 | }, 327 | "outputs": [], 328 | "source": [ 329 | "lat_values = np.sort(config.o16n_df['llcrnrlat'].unique()).tolist()\n", 330 | "max_lat_idx = len(lat_values) - 1\n", 331 | "lon_values = np.sort(config.o16n_df['llcrnrlon'].unique()).tolist()\n", 332 | "config.o16n_df['x_idx'] = config.o16n_df['llcrnrlon'].apply(lambda x: lon_values.index(x))\n", 333 | "config.o16n_df['y_idx'] = config.o16n_df['llcrnrlat'].apply(lambda x: lat_values.index(x))" 334 | ] 335 | }, 336 | { 337 | "cell_type": "markdown", 338 | "metadata": {}, 339 | "source": [ 340 | "Then we fill in the appropriate color for each pixel in an otherwise-black array, and display the result. (Note that the image's y-orientation will be inverted by the `pillow` package, so we fill in the rows from bottom to top.)" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 8, 346 | "metadata": {}, 347 | "outputs": [ 348 | { 349 | "data": { 350 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVsAAAE5CAIAAACxkRTrAAAqdElEQVR4nO1d25ajOAzM//+056F7\n0sTWpUqSgSSqs2enA7JkbN0h5PEQMaS/h0DoD2cxjE8885GYTGzgOsrlc1xhRmhmmWkYwkRt0Sgl\ndfIvZBURWGd4xKkLe0fETF30FCIrbQuf58dy7uQ9QRQOHI4T116jwi1wNYXSH+LmrmPB+bkXYy7C\nzMadGCPiO5C++JlBRm92YHVEN9GSolmM5YoMj03NxRoSYGfTLy6D44GnMFUD3x5j+f96dv1bO2JL\n0YcQmcJwznNzQLL6pELEjGTNsWMp9w2Rn/T/xYk4cWQEkG685cqjACsqqmJEyDQabLFVqktCAXIV\nW5Vofw9CTvrWPoix+7BixNOQU3CDKeyGYfz2wYysTUW1Jm4rSCk+OZ5M5WVdiPEYD7QFgJKth3RN\nW9dzq+6/M4w1rSolNKGZEIZUAeLBWHXgXj7rKY4KuhYOabiJVVzgCUYDq0cb8H4ELN8goBoQF21v\nRGzep4DpNGm4v6a0tYMrmqsYPHRdIuZX0oxQmH23Q8nYrZFgIfU5e7wKJa7KGGtfV7jNYRyvWDFh\ndu/bSdsx2/Fma5AGtf2Tfx1YLNdElK90SWYR6K1Q6yZGbCp70krozZqreo2H6Fc4hn/LkklCJUKt\nYVFb+H4Epl1Mhu5MJR8Tym5gSb/gNGSyLYxrDJqBQfLK/XWa59d7ARxujg06lMJpsL5p65TwKonl\nXMewqucDTeR82xpQi+SvBRMqG9tlvKKw9ga9O+iJcInGKPjqImqBJ7fTjQaRyetVyPm2KHyDf5zT\nhCrONp+x5VoapVA2Zu9+lRT2YUGx4WwRJP79dAdaYFxbDOH03j0fqCXrGgS1Y9Wh3+h3dlyzkYCN\n0r4tlb8owVagP1cPoFuD5PTmBDm06lvuWVKZ2ilN/m+0ehlXrETQGdsVoCuSTx/Gyz8YMQ6+2hJK\nAICtTOvmArWOIJBKHA5yk3G1iISoOO1BNiDWm4iEOX7IzzhDEYsyW6oilgsBzwWIhC9qnekEZUw9\nSekOeV4kvMr0LnwgAk0aZEjFstGVMCV0R8fhyEyr8XUvM6Rz6Lyi125HvIj0UOUl+CZyRxzyIoX8\ndHeggVKQZMTYtMxFPLXniCz2IdFQb8VYq+UU7vE2O/YQ9iRnE6O5CjNZf4cvsDXMHfuQ3UHV2jll\n85oNUmEf7CPE1qfK5dntj1CCbV1UGFplEq5HsGJdOD+kFcr5l+/wBT/QrZrjEB4+jRqH/yiG4iVM\nbHEO2txADm4m5TE5PsnruMXVbwbKQINn7XjRX2iVknj8FOvM1lDvDcpJVmhbATQdwgncU3gCzyzF\nj6mPIXYMHGllpIHAmxdKccikVIFTOMPvcAlpOwcDgnFQ41PigJ52G2KSNQ/KN/HTcDOIGNsIh8xT\nAyXuYOURSDD1M9/iDdSLZovwsDfNuKFMFTgkN3HDmDAVxptuiAIsS5yjzARmnc2sotdzB0W4E/j0\n+GVgfoibLAyl5KayDCmbqHxQZ6takb5sADS78bvez5ss5yiYMfbqBbkNcLOpWjIwh2fF4d6kdu9j\nnYhacSXEx3HG27GZsmje6pzZq6OTq42IKBL1zshU4HU7lB1yZjlTaH47xW1h5TlcQsL4SxwKvwKz\nsgqXMF/jFMo1LJAIUHm+xnDVTlFfq5pYSo6A23lBbbLTO8SCvN3AQceckM1NJ3dkr28M25ZYDtqR\nEpzYBSz7Kbp9djuW/++eTM6xzoN4JrHk1eC2ecB3onyZwFrAGH5QnPfY9X39jifP2hrhob/rxavP\nw3k7MiuQU1Da53uEQJA5E8bEqjoFVde+yYynv59uznjz0sohmT4QZi0ResWRkF+yr41oJEG8+vZv\nTIJ4+dvipOWltvStBinNBFXzqokNiyfnHp9uYueSEqOH8cnnQ/sb8STmYBtFWDfZDmsxTdUGanES\nVD3Xtk2FjqO8XP5xZOl2JpqeJKqDgZFhjCJVhKss34faBVhThpL4H5uk7TjwOSSWiMj5S2riC6vC\nbIhPkieHNR4b3COo8W4xbJBRBCxEs/QYWlfzGqXj8y0qTHznPN3Rl8SJ6bdAtNMykYU1T5SQvzsC\nl1uezR4/TlrORkiqaoDn5Q83jLM6SMocwulSktBdfAbOsi95Fec3ocMH5qB/+XDYAQuMz0PfsTH9\nywOvOLYWPgbZ+4O+iGWA+9BhZSY3nN2Zv0PB+4jP2FYMhf0b4/iqMdrAUCiOJDpwe+yXYFImUGSy\nWinE5LgzFQc1xDBCfA2L6iNLelqjGgx27KgrEU9o8VmJwdBkMx8MBKgMTlnw2bgob2uze93HTJkQ\nQTuGF5wQBgNAaoTklPCAFhUk/zTbctBnX+XpYkOUFE/mAXAGDf73nVTA2IgH+XYvgF1/VjWrhlRh\np+jLLgvsp4h9n+OxwO+mFF0z0quJiIpVfFmpH4DS3VX5s6eOBOsMqyIPQfdLzKVQhQ0FhNXa+DA4\nP/8PT5KLvQznC3DnuV2MqqWhavi8LF7EFh+xAVJpUcQX26CXt8VevRpZVMz/3ddgC7RfN5n/EM+u\nHy9a4xqxV/Ttshzg4uKkJcp1Iek56Lr3G1zMoulDPUJhLvDImTc4trx/tg4Fcw17wm+qL7urxd3M\nj/FoymjedEdujuCqitXj8WDAlYgVrx308CK8OBYnRIA9sLPMLDv4Tc3yTaddgl/PKtnweJRGfiRl\nPXkndvgIl6dh82Dj8PnHpuUyY2zWWXjjx/L/3fib1Dc7gsfj8RDfC5hclNW8cYYx12D0xsSzSL2z\nFpmTGRuvVFyzmKUsL3uMh807KAl4hpVAzPKT5X5kyQvf2X9HxFJcN0VXTMvJNUpiXaawv26vCY9s\nmA5eXyBpGoISZ4ElC7SQfanT9+CvcHi8w2pmKvaARSVEs7nAVWs/OyZmHoIr1m5IYb/bwiZA1lQa\nHKqaYSCTaz1OVK46zoi3VamwHSXBgK/kCKgl8zTIpMjTwYF4SjTs0x8OLU6KzrmywtMpbRPaUVNc\nDqSdkWROsfXcTckLUa/dB9u531BHboDVKRi6VbuE+2zjnK3ONAhrmRuXrOUIGH/xW0Yuk8KN3aQj\nsqwPbyWuqDJvvKW3dYUnZdmkhkliqkwr2ZHCSg3ozkDSxvSvcLJEp+L9iG9HVIHOc6K484rNSLRD\nXCszCPH0G4Fmv+A3SWa2rz4dXKuSUhFxtmdN7H4oTKePVnRmYheDEZu0swCT+LXqIx2jLcry0LSm\naDeD3T6Qp+Qq76mG74ySxt6+bdnAea6xwaQJPJFJxKi6owqHCXNO6vE7pCqdnLkYXmQ8HkNORhoh\nrEocztVrS9mkSWTq0cBZe6Co0No12hV2pp42j0euz8h6sKH2lC0m8HQt5yK1Rb6p0Ri40tIa+4zf\nTQ9YNWA/cypByQ2cCl2F9bS1zordlLBHZUWw3Nz3rMuuYYmI3+MPHo9HUVaf5PDcCa08Hq9/F4oW\nZ+KeAlxAwbyolCFRnlBV+6aAiTB1d6BWXIGYdwK7Azuk2zuct3yDv20wJV7yh5PdC3BnGBcM81kI\n/PTH54FiXQC6WHglUCmlEzLxWjF9j1/gVp9O2sxRP+tuv5eJEl3iQahRu+MUvg61Wd7+Sm4HhBIg\nprH2kc8EaDxGDg9ypgD6p3OCZ16ZqjjjfLAryn4d25egMh+F9x5e+fs8k/VgwH0/DzJjx3SEqv6C\nYK/NvlQ78XaXsrzQNQbGmJRshtEROGPLzwJVYijEgarhdwlBVwMvtT3PAPbFAkjAdh17LyWO1RSG\nJZvc4tpJ1QWBxBhmXsA4x2KfA59/j2t39SeSRFO8YDn8LTCaEaUioIMkq5c3UzwJSuv5SM1lToCo\njf5vipYDaV+dKlzvpyBCQ2pVCfJPNYLcku3LkDRRVzGTOUUSxxYAWA+zZ/dg1ysVSkeluNnbYbvU\n9HSdBCcQDz4TAZsJm5k2CO7ejfWv1Rlt3chhfEJPhSmPA1IB9TUV2dSckRnW1VyCMhTiS93BCQi4\nFZ7ZFiSE+UPDDmPPEthCxI/Tjz4MLMeBwj61OO76tG1fg6J1fynMX6vrl7+RuIacRQrxil6a2iAA\njiDnhJUCZkVgTel/WhuxSqcqlWtrL8buAvTyDQt03qhu4nIm1dVLj9VakknBs0cmJ+p+Y+LvaF4h\nt23ux2KIkXcm+qU8Y0IAnJeUxfmGKD2dw38Vdj7PWhpHnoKmM7Gvhwk0ikdQfQl48ePgp++izm8E\nfJWnAwG1MOj17t1N99RO5hcDSBUlGPWmheKeoTyQ+mGodsadC/gIL8GWXHb505Qyt7K0Iaup7Sxw\n3CgYX7l76CtYcvmTvdDFx7yvmMU0Ho8rVwT9WhfKrpTSTm4zADxjobyw57IMrapriIsnaeQS9AuN\nH73kZG9GG/KqfRAbylW72l3o+PMNMIwwUnVNq20bhve3d1QmxDnME0b4DmC2CMPGHd0iFaSMC6Ac\nDcI5xgv2XFS0n0k8IxfOl+ZbtPQjTXm7mipCNAIkkHwm2DSRWg5R7exHiUnO87dueA5xsiGEP7dK\n9dc723I4NGK8Nst0XJj8Fdq/t8iwuZ9Q4LwfLl+RSY/XbzRJZDR/3D4U+Qh7I9VAIlksflaOIXkl\ndSfWDaByAhaXW8PtEc4mMuVA0vjzfHhRu5GsrrKCpyPDOp/hXY62cBhGJNRSqYBzPiGI7KNnuPm0\nJoXRHivR6aq6LcbHGVVotTEFSBdu74nJng+rYKhjXAUq1jeSReP5cIk7SF9m3Fpy1imsbUJWcBn+\nK6FaKsZZl1VGX4Hx8k8JL/1IzvBop/D0dLp6zWcQ1cEcStiNQJ2FxH79NUnKXxs5jE+loMJMXfb3\nTXDX5WQvm0n5AtwQu7S9Gy6iSkG9wq2gOli+43wSxqv0Op72waJ09vOAZIz7mkhD3ZmhfijCmP49\nfIhGJDQHWWjYysafTwlHO+2yJ2DLNT2sJU3cIDA2KHNr18BjzTdjUX0XuTXSC66l0jMoVMwcn8pw\n/T/+OzaLpwk1JApdQFc+E2btPf0ReVtOSa5hySDPZoj3MVRGISFWPsHmO4YAj0kwTvBgy7tGEfIW\nZbhbftOyKXQU4W5cSaMmO31k7u9gQJFaLM698Xg81oRta3qdcTd4ah12Q3prI4B6fTOu63TllgWG\nvCjUCtjZTmrX8B/H/Sv5JgIj0SCB2kt4Sj2kk5mLxcbaLxqRyjJebrRH49pzlhXDM31ZdWjX8FJw\nUvnu+TWcLgLSb2SGB0dT8POLXsCMuACaxfVKDqUALnWASln0yxfkrUBprh0BDQOGX0nok2zdXmR6\nRU3yI1nOlUWla8Tw8ICaJEijUr0G+X3eM7oR8eacljskK1jEWeQwhL82IGxsy8BgEmES7+4zxAO1\ncn4sh8q8ieQy5yryC1xBbacsjdg8sIpGOO+qCEMu012ysrkJrM0UaOmuvdK89GM9qMtpSDhuQ7Qu\ncJjHyhCRJLSZq1XkQUysXAER3we2JIZwDBmN02faBw4xEwPEVQned/sQVF0ty2da8/+bMP+/cDNy\nxlAgn+mbUJNY0+xzYFi+6n+Sk9QjUGnZ2vAQ9N2BbNbrIJBFwOGEEVLG4zHOsitxJqZQ3K7Y/gsU\n/MWaIhafH8J3n93WyeU1WUMCbIh/gX2NiKcssyEOtUR4noM3Qp8jdUoy16DTjCHBTvRHxrUUzFxR\niYIS5pthveICG04kAmyqf9Uu/k80snPIVOqruj93Kj8xeEZbAV0HMhWd5nJVeh8MR/9ugaJZpX77\nOCaxYmRl52Xfpa9aJMmqXXyryjEqykYQQH7+e1zZZudxEU44ddonBx9avLVS1VY2Rb/0aeXqByvN\nzv3Ax+1ZNBiUrNyQP6meokImjnUabBZfCbancMocsHrl9aARhEt8rZHoX7l/DQ2ANrhbyeWQoQ12\nRFDtO0eS3BFU2WqpNZnuIkU4xO+G/YiqarGaYeMtUfNKUrYcPaEpY04pnkLXzjbPrW4+7QduBC1g\nhjP5B9BpE00mbhUnJ/P77yCoQl+PgUNTpIWXibFq7wAjvVRla32HStuewOXTSCNzEwSpUNRIoJyN\nx4f8qMYLyCbQ3VLLoCSwFiCT84uh2OqQPswLQN0CUNolq7X73Po+wRuAShnZslkpGWqmVM6ypGeu\n0ePGUN5g0ynOL1zYRs08uHEGTtEIlD3VqC+cSpK5bfnViPkl8U5L8n5LDeo7Ge0+wlAWjvtZQYZz\nhFKbpHb2WPiEH5vD3mG/qwYONkvnNG6IJ0IzChPjA9qKTwccysDHjaxvB+/zIBW4RvmQ+yKkaRY8\nKZy4NUD3DhjXYDUgx/KR4d/wMRQvINw41N5FYW6G/N2qhC6awl7/dhtpK8GmyO/S4ynbgZz2xpl8\nILkyYz5GTD6aSzb2YNnLh2bnCB9jyNZ+QZUUeLjmarcjWpwH5nrG5TFuL+vZvxxazK/qCaPvHQpp\nopNYYpwtJuIpMflfj9tKfLOXi1NphXb1GK/MPH5JxmNewPHQPjS2Al/rNRcATdRLIqTahpdiAxlr\nlzyO0Xhj3UxKZOu9RYrLu/n8Lww7bNzkS3SNA8Lh/azkvJi5lgWAw92ztaPyABs6y7KE3wFXSP+T\nLwjlzzf8XkMWbITfJL2WuavNpnlHviK1aRnX9dG6elQGweNvwbQJlDAPsWwrPwmpt1lndjg25GSG\nhfx3jpUrFTFUosf+TjGtPYDdQ3o331Sn8L6amkXjALwBZh+kSbyRu8PvDtyGeab7UQhbody0IzXD\ndgAbMW0jmLKWJN6GS1qlJyp24+ViH6Nav2U2N2D9U/gIntoLqyPZcBEz1/svdtI9gWyrspISPkxW\ndeEGqr0RoAKip31/Rb0dNvX2AhO4fIie6aijI0rqMj0J89wz15KYw4XDGznwyz+GmWqbzgh9P6JY\ndH62plAdn02rkecZKBIDwi8PeG+JVWlwnTMp0dt4brp40fsU4l/3NJZin2pu4JxPgFwO5+VYHx8q\n6kE1sVcVzy234D5szuENTvbfx+G/GBOtPAmb0ZSK6YXPwIRkdlJoEySeC2orvg3W4F+iJuJH8BQ+\nh9BUHVPRLgGXpfm1KlRftZ0pspIRQSXBxRnbDgZCyTK5NW0tRIUN2CdvCrNzAL/WXa7mjMRIjk0c\nzrCsHpi56sbnQLRwPLtZ82kpYbU4aS7G8C/u9Oy6wwZl0kP45Po06iN4aiIqKkkbDQq7q9Xq0sDn\nR/kyfTRU6GTcVhQCb0Bcf9Pp3RDQYzY5x4vjQiDpg0FWOIH1wI+LSX9hMdlOCZAJbnzNba7wVo39\ncPc1tsHh7kMmp9DaaCbDIfzlkkJnB8KTx9/F1f0MrzBVLplpvBeK9s/xHu+pJVZukXCLlyyGFsLR\n3kiih9J4QwwpAiNxT88YI69+xKB26ZKyDK8mJRRDIkmKwskild1yqqyY01em8XFwg+Euw5emEa6k\n3VYCmfb754dkcAqfqjosMKqmv9A+4EPAdoAyGx8dS+tiPllgJW5gnGyYBIbvXrb2Gp+OmNLF6KkO\nlmETtUqJi1hsnarPWFDljNZy9Vm7xI0PwabK/7+hPtvguJy555/o/BM3/DZl8GI1wXDb0YPgrrXd\nwJcDVQCArkCX0k28pFNAhlNzZJsY59ljdJ3KGpaNW+MZnyvqBYONcJPihAqF4r2+gYJqPdprmMgL\nAl3ettgGjECTqq5VebQaPKrXvBABKDqE9xefDu73VPQWQMGtiiGcCnNu3BV4h0qM54lq//f8GA9D\n77XnHfjKfDquMpj0/uAdWGkGgbPMZhFUZYHprWu8LdQXLiF7Xmf8ZQCnvV60VAXIpoa0S4ZovKpj\njTkxnNgvcTAZ7Qgar6hIybdgLP+nBrLSjmFZ+8WdujXKc/rzetL0Yq2hxofCjn6kpsz9swQrXt4i\nASkETIa/x8B+AXOBSgrBC+FbiVCHONTdJPk1PgBAduvvvegpyNA6kUe+QyHmO6xVG0NPNIKIR5iO\n8OlVYaXTuBn43fu9/QYOBJp5+SkVDv/rAmT43N8mzBm6LmLd//t0kxoJ4D0zav9jBcjzNv8z5EeV\nCBpXrqGBRsY0fF/kVcbe4TZq47MQ7G6bx1cCrCwoU+10SyU9jnQOybTo8E/eraWn07gDkJ3E4rxP\nYMjCFUoLeuw4xfhvqNOUmwrOHyglSlk2bgi8I7BvAusx46dlPD7j9aMvN+yAjhx2ejGUuI2vkQdq\nP8IYoGTYlGw/jV97FogU4PucZBs1jJK8DCQWXTPJtZ3Sp2F7n4nSyLq5IF952BJ+S5czewfEHZ6o\nGvZ1RRsngorn97AKVOarCxjTQWvkrinFEKjqOPqK60X6y41Pweo17O5gXsqZOF3u1hi/i6NH08b/\nWbhHFRgpGo79x+p65y84u/awtdTyyx31NDotWAG0XKA9ws3ANvPAe40amX1/LqOaNq6yTMQOym4Y\nRCmr8VY14tej5tUgKvcTWWkE+dxajFxsNmRM7zqLKWj4eOoTc1nuqrSzOAuZxg6SKE7fY8iFOLrv\nZaptRMmKFFNgg9zImLwS41xkLye9zLbkEnHX41P2s9JXAleNgG3/1+ONO1x+1yPcPGN6G7LS39UQ\n7BTKHkgdfzykN1M2tsJ59RjKpWYyM0PmrsRQP/w/YkdUV+IwPxKTo91W1eqOA684z2WkG0Taou+N\n8iialKPQjVcN5iRpSrqkqo7jC1RPbEGsOS/xJFJZxCl85Fs0zz9s39tO5AJcUJ5dss9MokEwXP++\nCNP1ZWYUc4B5ho13BltBhnNKPIG3cZbO0qW1mX4/8BQJEhZPDWsbL05xF2PaOBVnRgE+0lmEfMVh\n5U0B37Q1m7Az7wC3HK1YDE3dm7brdwOmYUTsOjEI4CV3pRjE5oG24mmmEhPkNmQzUpDkpV3J6YDD\nqfOq0oAusJH85wEH7dci+Jmc0ZOLKTVy380oPSp6phQH4yoBr4iebeyB1s22jhJsI2fLxU20rhPJ\nSHQvXMomoFmcez8eT/7EWqCN+W1BKqVDTYUAEDmeAnmg98ZWwFp3kcwRkrn9OQXTenC61k1hpbEX\nalNACwFUqn9yKHa5ao8G4q+MH8un0JTwhCWWxIVqFIJA6yOuQ2IlXdKnNKIoDOaZ3DHsO6g64Jya\n5eBJKRejMQuspX9i2bIdHkQb5bqJdgR3QmY3pC0V4kbJk1FitkKFOUSIO9XTlXcocucD0YkZcV6M\n227iAEpBTjXuAnv/iODs5oIZTdFq+ABqvYxy1ePV44QT7Nik/siUQirAPFMgNO4NZZcynYGZbbhT\nENNWnAAZayyEu0ZsniJ+BHbCTXDwkF4OTXSm9GwUIVOnnu//h6I+LJPMBLaOSubTFSaV9fs62+Mf\no2gzGwXIr76sNOJRLGDKVG6VUQXGYv+UuOQBB2kidvQOirqfzV2YpDQqEc73g9wnK9HFWG7FGYqh\nrhyYUvqCpTNvbdwkG28X8BVA9xVvIri5Bt43A9pm1nPZAZ0t8RppWzm2DKsML8mnfC0bFyObGtSV\np2ouwFbayCkkjzdY5XsWC4faWmpathPMHlzLdgeng9HadRf9QYyivdAeg7Y2yUSKHmOSwqZm3YE9\nJROZDpjrDPLi4BIQJWjswWq9Q+2c5Z16MCgF7Ko6c57ZsZ1UhEacs+QQ5fVI903C+VCYoHxgIwLZ\n4IHC2zgoC6KGuMKmv79Wa/QLR/crt3Ra8XJ+u6GxH/C2jOM/icT+ZRz4mPNQtZKraEoQmEOEK0p/\nWlje0Cdpp3ApyjSgNjRsCP5qP3IL37/EHicGzxiON950FGerz4FkbBJIASBYVzZKwP3u2yWxrxyb\nUtuq+iqEQIp2/E0ng77cUxBL0j/odHPMaodvmOj2NzcChvBXbPzOITx7rTowTHdd6Uy63qn+h+Mv\nVmg/8QQcEY7Hs1gGQxKqUL4QDD4EsWmURy/aahjXmmXxTYefQqZzBAhIjMhERfdtH2bZqXEtoKtT\nj1nVKM5Ugq5bfCwbH8r/weHepFAgk9SOWGPXHu0Yj3W/Gn94aoGhFPq4iKyPQOo3Rc/qBcSQ8Swl\nsuwJ2Brq+KYP0sArYC7fmGiOBqLFy2cusHpuZhb12zplSWCUVuKPzBmfQwkZORoMuZBwrLSh0IZ8\nFlhLALnpJ39zNn9mJPtApstS4mBVfulHyASJ6bg8Vpr8wlSZfbuDi4AtvHhz0bjjGHMyYwzxHWEv\n9LjliMY2aAvwyb0J+I4LT6NJQwF5GDNEkvnaydjHG3fENT8JnSngh+IgeE7yoZPXQxKn2ZvtgAKp\niT3EdX9dO9wY+TQbLhlQ9sn9Z9UZaZrgQp9JwfIuEyHNUROs22FK1tQJ3/9KGg7swItscLrWNSqC\nIRPpE9AUdvUCl+qu3FMZj2E+i8tNefBDFilrUpXM8NtjfBoENT48wXr8WMI8RxfkdP0t67UpM72g\nKdak4adAZZAVTY/GOyCQIhLfO0xORksHwvKTGROGmO8L3OOMhXSkrUjh6pys4UI0Gz7zc3703WbJ\n295rNmKwNjHSGnof7ea7dru7NMKQfM51nwX/dqy2lyxMq8jdGFeVwibr5hjEZooolppGtLMQHpVh\nG4tWjTTYzvzjtwEmEIZCAVgaz2PANAFUK8Z9CE3BE7SVzwU2ivZIDBc2FD2RW6rpdLDBIF3bBVrf\n7plKDZicjZvplLfrqB6HHS7dzbKd1DaTKuy6tNnfBhlVAxjsQ7BJodHkL8M2dar42lB6P93R9Acr\n8+QOTHuKU/Hy1PDmtZ8DZKKsRYX5k1CIqVSZTS7EpCjvAqINvFRvghdhpG6NOyHUQnvZ6ePXhJdR\nWzt0Q/1wOLJf9SI2WX/96km+C1lGhg9pB3E9pi8XVXP/FREdCh9P5uHJBThcZnB6Ns0wPmWx1SmF\nXcb53duGD8OSnbC8nHR21MwpxLRWDX1gRrCk6+IVbVfEmAB8FMn/16XBKV5C1AUMGzqwxd77PO/W\nDa+ozP9YuYwpi2EnFrLPJOB8RT6OdGba4N8W0a2LJ34iqcFuygvCtpeEbRPa8VNqgMIkHGHV9v9u\n2NFl8hiUd6EYohAxNW6zYYdf8Wq4pnSyohz3+LZ3uDuc6gALO+vZZLMvR31jbC7Kq2q9LJdAC0M5\nXlgCNlSwrb4L5lDFC2yaGLT665f1uwtnKXFdNiEytkZL54wF0cZNBt/Gf3tojXesIT+9NzE5B5aq\nsDlqNC6qBFjpd3VVMnawnYUcVopPGdo1XASjNTSUj/tnkVIIY7aMjd1IKfWpZFuGhS1HRk7jDQH2\nFnQyzo1EbxD4WUBVAy3Aqhr4soeY1t2EYPjkG1KNIohGSOXnutHbhur+trcuUjoO6pTLIVqbRIl0\nC6c+bgIYEhofAqtX5lP57KWfXQCGReWxmOpbsydS6AJQ4N22UF+z3sPALKiJNU4EHBUx1wGdtjpn\nBhushYngJTexzQwXatvkgU/csxQWPqWjVUrzxzFFX+cebJyFcNkM1xrJfUVT650w7Plo+ZFqyGJK\nnpVmiUh32xNjPTv+DghncYkmTTcRLgW4/EZPi/mlFvy+YEwnmM4AJJpgVSIblFJDFaOux9XyvxWc\nxlZUfmpUK79TQOEdFNAN4MBJnvhOK3OnuXwrIJdxbtIeEUc2INarVot6IFGyhA6ZILWiE8/l2tcU\ncKgfgjPTssw26a8BqNR4TuKGwUQJESh3TX7qx9qKRWaOUKqEqflYJJf/HFbjMpy59bCbcH4NDfFf\nV6t0sHMB88wyH8anRkOEZpA2MX3ylYa/DZHR5he7LQyMerR3axCV5lKrbZfxrcDDkKfsw7YxMyFn\nA6xveICgLAxeQy/NFVqHYWg6bdWNavzP5ZFANx8o0sfJy5ym5bKTmvya19WTCQfHxphYcFh3ERsz\ncjq4qRElWMjUaFBnhHK2jiNNe0awQzuZaI4z54XBiq99ROOJst6Y4jsEnV4ohZFDOB5r1CM26MRw\nxIPoVxED7jpwIZMLIL1T47shWCNoWD/aZn+xUvmoRXK+KenJ3WAKmN/Yw90ka7NvvILUiPzDy+r4\npTQYL25kC4j+ZWGl8Eph/XIMLRkXW8y58f6oVXFc26p76dzYoquG2MCy+tGgxnuArUL9byubbuMc\nwyBafcqxs813uUEQbG0M4VPsTkfj85HXjJc6nKoZYl3BNH2sebaYZwWANgPenfUZoiSNxopsL57k\nZtMrDiWp3AW2gXk6u5LipkH3aRqNENhim2T9emAs1gwo+jxD4P4FAa+Zqd3ySImLn+fkBNKi9jCN\nx+Oh397b0PguqFNOBtJyTGdMBc1L5aztFyZn2x7hK2GHxMB9bPamJkcOMMynDPVzqhltGfPPjczw\nta/FS/uDb8S66wEXsFKzscZo2WnWwPsdpyY4wQD0jsOv84Xfemrwi6FvgjZmXHhfbUgH94gCT3is\n8LoAFAE2L0hWseXseuF7ETMVNWwDvxOZ0e/YiIxyC7mLEcZLzKi0SXqiwMan4++B4ulLh3p6eZpu\nbc2fWb6FDogTlpR01p2Oxlshej9q9RcFCmTYlt45Nw4KZxELP6Gifi0o2vYat0HoDlaKEhkFV+ls\nxftS1/x/IKLeIG1HFeZAjsu0A5C7G41GSEGoBhsvMZVHe4O5fAG5TZMx9aOZaquKtzwrZtX4ELyo\n1/Ngbbacj5MnKGrYwZ2MYy6TvpXAyGx8G6RSlvrFN5oioWi7b1KaIrleBklRCq8UalNvEAi/THkL\nfpSbfMOiSg06qcstxtsBm8Y/22ioWGsHQ53suFPbXFTOBoWIbi7QhfPIZkEGPXglCz8U2I0M+Wz6\nG6WNN0foHuSxGbEpyvqWbB8v12vWvM4xrLyUdgANG/e/+XTexCpapHuxtWXT+F4c3ICQ7Wr9KjEw\n4jlDtNt/pcqydxlLYjjY19TlzmcuX8bGXYBpwfV3/kpmgEf4feKmU+vtQ7CF0WicgQ05Z4F0cgpQ\n8hLh4pNVrtP5d3MaDQ6hpiM7MGX5FLzxVhKOsF9bof1gQOMNELjxNo1+HWVZwqBF+Ln0+9oSclPw\nfa+u8TkI3LZ2LRbkWWcAc0OTitgizfNCeL/ZaLwTIvZIldlDkRF2BKuXiXUKbEfGPllRW/C032nc\nC5q5UnYC2zxrfRAXhD52ORrl6alQo3EiomHqJIV/Tk+yavrrm8lJTzcd+uWljc8EexMu0QJUz+P3\n4SgzXIndIzFfo7PihjQaN8EQ/jKITpgHeiaS5ZQUMFSPoy2/cUckH7aRBowwW7cZGZ/U/1M/oX63\nuWr+pb1A46bIhES3u3Zp4lB2T1QcZZcY4lm8z9ptiMb12BEtA9+uB4e4jswoJMzzwmmPlfUHA+EX\ncRuNTwBun1dBdCh3m2SjcT1KrWJMPDeY3EtcrXmqwRVp8g88E5Fn0mjcFFp/Mc0kC7tYEO9JnNwQ\n0dDeoXE2Qqq/3kd4OTvGS3usXK2xpqBF5XYHNLJw8xVPLtoLNG6BuvQe6pYbtpebwFytlHBF0nv2\n5kWj8dbgXn9qn3qe/Z+xx4oL9EZd7Ebj8e9ae27v0Hg/xG6hYafDFuEPVFIDLXGoxRD+ajTeF/vu\nC9Qx3GtrSK2ReXajPUXjcxDzF2InfyLRkv8S+wn3ArVTQz4cRLuPxh2hdc52P25k8D/THsxmAV2q\nKB/bwBvvD0SLKzSd4JFxUnibcHJMscefPHFactRfbWjcBuxTdEBRALJCQrH1GIHWAqCsNwS518Cu\nzMSgnULjYthPBEjh7sUOw9kE8rBQxo+8WpdKD2YBpjzKB60237cqGncC3uLyDHt3K+DXE9mPR9im\na0fg9LyzK9AeoXELJJ8TCAx3b0OwbM3YPpYjQSk2Z3EipFfNzKjRyGHrDYUcK240VuwgMthmJYFY\nk7LRuB1iLbqNtmVyulUQdl1Su4nGmyHXOf/lEMvPY3KNhwtCXUOI/v81ov3LRuOOKHzoAGwobA6J\ne+2O4R5usDQa74loEb7PSOyWXqqJ2Gg03hgdoBuNm8N/FI9MMH4Y/j0apD8JIN96bH/RaNSDbRyw\njM+02/YRjUYxthrVUgs8X+rYPYJG4wbIGyH+AHWj0XgDAI8D7TJ7sCMgfe2ifjKNRuPxAJ6D5p8j\nhujHWkMQowtm0mg0ipHIHVD73frFjUaj4QK6R1BaR6w3F/u7Ao3GFcBuPTrfDhaPH3sEpEHT311o\nh9Fo7IVuY+f0HM/k2Wg0UFxigWNJLOz7Ie0mGo17I9cGiH0Js9FoVECJxWD570byKlBf6W40GkH8\nWX74+wuv34DyHyLCBEFUpzmkRqOxBc+S4P8fSG9SeCihfxOh0XgL+EnA+h6UQtmNRmMH6IcCDmdP\nNcz2Ao3GPiDfe6LORkmD6Gqi0diI8PeY4vLG35+NRuN0TK0+ctwfAzE6b74X0F6j0ShFzqTio7E6\nZJCNiXYQjUYFTrGkEXp1WvJZiUajUQw08Tfo7NbAz9crp5pDkSQ3DttfNBpnADHSy63x8gk0Gp+D\nIf4pHIm/8sg7lbLodgeNRiUYi3JKBtIXWA8+sE9HtV9oNIpBGltByiCehG1bHtuuodEohl087ODf\naDTuhYrHBLdYuuKe2qu8L/4BaUELmxAC0R4AAAAASUVORK5CYII=\n", 351 | "text/plain": [ 352 | "" 353 | ] 354 | }, 355 | "execution_count": 8, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "label_to_color_dict = {'Barren': np.array([0, 255, 0]),\n", 362 | " 'Cultivated': np.array([255, 255, 255]),\n", 363 | " 'Developed': np.array([255, 0, 0]),\n", 364 | " 'Forest': np.array([0, 255, 0]),\n", 365 | " 'Herbaceous': np.array([0, 255, 0]),\n", 366 | " 'Shrub': np.array([0, 255, 0])}\n", 367 | "\n", 368 | "county_image = np.zeros((len(lat_values), len(lon_values), 3))\n", 369 | "for row in config.o16n_df.itertuples():\n", 370 | " county_image[max_lat_idx - row.y_idx, row.x_idx, :] = label_to_color_dict[row.pred_label]\n", 371 | "Image.fromarray(np.uint8(county_image))" 372 | ] 373 | } 374 | ], 375 | "metadata": { 376 | "kernelspec": { 377 | "display_name": "Python 3", 378 | "language": "python", 379 | "name": "python3" 380 | }, 381 | "language_info": { 382 | "codemirror_mode": { 383 | "name": "ipython", 384 | "version": 3 385 | }, 386 | "file_extension": ".py", 387 | "mimetype": "text/x-python", 388 | "name": "python", 389 | "nbconvert_exporter": "python", 390 | "pygments_lexer": "ipython3", 391 | "version": "3.5.2" 392 | } 393 | }, 394 | "nbformat": 4, 395 | "nbformat_minor": 2 396 | } 397 | -------------------------------------------------------------------------------- /Code/04_Result_Analysis/analysis_config_loader.py: -------------------------------------------------------------------------------- 1 | ''' 2 | analysis_config_loader.py 3 | by Mary Wahl 4 | (c) Microsoft Corporation, 2017 5 | 6 | Loads dataframes of prediction results and the description of a trained model. 7 | ''' 8 | import os, io 9 | import numpy as np 10 | import pandas as pd 11 | from configparser import ConfigParser 12 | from azure.storage.blob import BlockBlobService 13 | 14 | def ensure_str(str_data): 15 | ''' Helper function to correct type of imported strings ''' 16 | if isinstance(str_data, str): 17 | return(str_data) 18 | return(str_data.encode('utf-8')) 19 | 20 | class ConfigFile(object): 21 | ''' Copies ConfigParser results into attributes, correcting type ''' 22 | def __init__(self, config_filename, output_model_name): 23 | ''' Load/validate model information from a config file ''' 24 | config = ConfigParser(allow_no_value=True) 25 | config.read(config_filename) 26 | my_config = config['Settings'] 27 | self.output_model_name = output_model_name 28 | 29 | # Load storage account info 30 | self.storage_account_name = ensure_str( 31 | my_config['storage_account_name']) 32 | self.storage_account_key = ensure_str(my_config['storage_account_key']) 33 | self.container_prediction_results = ensure_str( 34 | my_config['container_prediction_results']) 35 | self.container_trained_models = ensure_str( 36 | my_config['container_trained_models']) 37 | self.container_data_o16n = ensure_str( 38 | my_config['container_data_o16n']) 39 | self.predictions_o16n_filename = '{}_predictions_o16n.csv'.format( 40 | output_model_name) 41 | self.predictions_test_filename = '{}_predictions_test_set.csv'.format( 42 | output_model_name) 43 | 44 | # Load blob service and ensure containers are available 45 | blob_service = BlockBlobService(self.storage_account_name, 46 | self.storage_account_key) 47 | container_list = [i.name for i in blob_service.list_containers()] 48 | for container in [self.container_trained_models, 49 | self.container_prediction_results, 50 | self.container_data_o16n]: 51 | assert container in container_list, \ 52 | 'Could not find container {} in storage '.format(container) + \ 53 | 'account {}'.format(self.storage_account_name) 54 | 55 | # Load the predictions themselves 56 | try: 57 | o16n_blob = blob_service.get_blob_to_text( 58 | container_name=self.container_prediction_results, 59 | blob_name=self.predictions_o16n_filename) 60 | self.o16n_df = pd.read_csv(io.StringIO(o16n_blob.content)) 61 | except Exception as e: 62 | raise Exception('Error loading operationalization predictions;' + 63 | 'did you run batch_score_spark.py with this model?\n{}'.format( 64 | e)) 65 | self.o16n_df['name'] = self.o16n_df['filepath'].apply( 66 | lambda x: os.path.basename(x)) 67 | self.o16n_df.drop('filepath', axis=1, inplace=True) 68 | 69 | try: 70 | test_blob = blob_service.get_blob_to_text( 71 | container_name=self.container_prediction_results, 72 | blob_name=self.predictions_test_filename) 73 | self.test_df = pd.read_csv(io.StringIO(test_blob.content)) 74 | except Exception as e: 75 | raise Exception('Error downloading test set predictions:' + 76 | '\n{}'.format(e)) 77 | 78 | try: 79 | tile_blob = blob_service.get_blob_to_text( 80 | container_name=self.container_data_o16n, 81 | blob_name='tile_summaries.csv') 82 | self.tile_summaries_df = pd.read_csv(io.StringIO(tile_blob.content)) 83 | except Exception as e: 84 | raise Exception('Error downloading tile summaries for o16n data:' + 85 | '\n{}'.format(e)) 86 | self.tile_summaries_df['name'] = self.tile_summaries_df['filename'] \ 87 | .apply(lambda x: os.path.basename(x)) 88 | self.tile_summaries_df.drop('filename', axis=1, inplace=True) 89 | self.o16n_df = self.o16n_df.merge(self.tile_summaries_df, 90 | on='name', how='inner') 91 | self.o16n_df = self.o16n_df[['name', 'pred_label', 'llcrnrlat', 92 | 'llcrnrlon', 'urcrnrlat', 'urcrnrlon']] 93 | 94 | # Load the description of the trained model 95 | try: 96 | description = blob_service.get_blob_to_text( 97 | container_name=self.container_trained_models, 98 | blob_name='{}/model.info'.format(self.output_model_name)) 99 | except Exception as e: 100 | raise Exception('Error downloading model description:' + 101 | '\n{}'.format(e)) 102 | description_dict = {} 103 | for line in description.content.split('\n'): 104 | if len(line) == 0: 105 | continue 106 | key, val = line.strip().split(',') 107 | description_dict[key] = val 108 | self.model_source = description_dict['model_source'] 109 | self.pretrained_model_type = description_dict['pretrained_model_type'] 110 | self.mmlspark_model_type = description_dict['mmlspark_model_type'] 111 | return 112 | -------------------------------------------------------------------------------- /Code/settings.cfg: -------------------------------------------------------------------------------- 1 | [Settings] 2 | # Credentials for the Azure Storage account 3 | # All three values should be updated for the user's storage account. 4 | storage_account_name = 5 | storage_account_key = 6 | 7 | # Batch AI training credentials 8 | bait_subscription_id = 9 | bait_aad_client_id = 10 | bait_aad_secret = 11 | bait_aad_tenant = 12 | bait_region = eastus 13 | bait_resource_group_name = 14 | bait_vms_in_cluster = 2 15 | bait_vms_per_job = 2 16 | bait_cluster_name = landuseclassifier 17 | 18 | # Named of containers in Azure Storage account (no need to modify) 19 | container_data_training = train 20 | container_data_testing = test 21 | container_data_o16n = middlesexma2016 22 | container_trained_models = trainedmodels 23 | container_pretrained_models = pretrainedmodels 24 | container_prediction_results = predictions 25 | -------------------------------------------------------------------------------- /LICENSE.TXT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. All rights reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Aerial Image Classification 2 | 3 | > **NOTE** This content is no longer maintained. Visit the [Azure Machine Learning Notebook](https://github.com/Azure/MachineLearningNotebooks) project for sample Jupyter notebooks for ML and deep learning with Azure Machine Learning. 4 | 5 | ## Link to the Microsoft DOCS site 6 | 7 | The detailed documentation for this real world scenario includes the step-by-step walkthrough: 8 | 9 | [https://docs.microsoft.com/azure/machine-learning/preview/scenario-aerial-image-classification](https://docs.microsoft.com/azure/machine-learning/preview/scenario-aerial-image-classification) 10 | 11 | ## Link to the Gallery GitHub repository 12 | 13 | The public GitHub repository for this real world scenario contains all the code samples: 14 | [https://github.com/Azure/MachineLearningSamples-AerialImageClassification](https://github.com/Azure/MachineLearningSamples-AerialImageClassification) 15 | 16 | ## Overview 17 | 18 | In this scenario, we train machine learning models to classify the type of land shown in aerial images of 224-meter x 224-meter plots. Land use classification models can be used to track urbanization, deforestation, loss of wetlands, and other major environmental trends using periodically collected aerial imagery. After training and validating the classification model, we will apply it to aerial images spanning Middlesex County, MA -- home of Microsoft's New England Research & Development (NERD) Center -- to demonstrate how these models can be used to study trends in urban development. This example includes two approaches for distributed model training with Azure Machine Learning (AML) Workbench: deep neural network training on [Azure Batch AI](https://batchaitraining.azure.com/) GPU clusters, and transfer learning using the [Microsoft Machine Learning for Apache Spark (MMLSpark)](https://github.com/Azure/mmlspark) package. The example concludes with an illustration of model operationalization for scoring large static image sets on an [Azure HDInsight Spark](https://azure.microsoft.com/en-us/services/hdinsight/apache-spark/) cluster. 19 | 20 | ## Key components needed to run this scenario 21 | - An [Azure account](https://azure.microsoft.com/en-us/free/) (free trials are available), which will be used to create an HDInsight Spark cluster with 40 worker nodes and an Azure Batch AI GPU cluster with two VMs/two GPUs. 22 | - [Azure Machine Learning Workbench](https://review.docs.microsoft.com/en-us/azure/machine-learning/preview/overview-what-is-azure-ml). 23 | - [AzCopy](https://docs.microsoft.com/en-us/azure/storage/common/storage-use-azcopy), a free utility for coordinating file transfer between Azure storage accounts. 24 | - An SSH client; we recommend [PuTTy](http://www.putty.org/). 25 | 26 | ## Data/Telemetry 27 | Aerial Image Classification collects usage data and sends it to Microsoft to help improve our products and services. Read our [privacy statement](http://go.microsoft.com/fwlink/?LinkId=521839) to learn more. 28 | 29 | ## Contributing 30 | 31 | This project welcomes contributions and suggestions. Most contributions require you to agree to a 32 | Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us 33 | the rights to use your contribution. For details, visit https://cla.microsoft.com. 34 | 35 | When you submit a pull request, a CLA-bot will automatically determine whether you need to provide 36 | a CLA and decorate the PR appropriately (for example, label, comment). Simply follow the instructions 37 | provided by the bot. You will only need to do this once across all repos using our CLA. 38 | 39 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 40 | For more information, see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or 41 | contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. 42 | -------------------------------------------------------------------------------- /aml_config/conda_dependencies.yml: -------------------------------------------------------------------------------- 1 | # Conda environment specification. The dependencies defined in this file will 2 | # be automatically provisioned for managed runs. These include runs against 3 | # the localdocker, remotedocker, and cluster compute targets. 4 | 5 | # Note that this file is NOT used to automatically manage dependencies for the 6 | # local compute target. To provision these dependencies locally, run: 7 | # conda env update --file conda_dependencies.yml 8 | 9 | # Details about the Conda environment file format: 10 | # https://conda.io/docs/using/envs.html#create-environment-file-by-hand 11 | 12 | # For managing Spark packages and configuration, see spark_dependencies.yml. 13 | 14 | name: project_environment 15 | dependencies: 16 | # The python interpreter version. 17 | # Currently Azure ML Workbench only supports 3.5.2. 18 | - python=3.5.2 19 | 20 | # Required for Jupyter Notebooks. 21 | - ipykernel=4.6.1 22 | 23 | - pip: 24 | # The API for Azure Machine Learning Model Management Service. 25 | # Details: https://github.com/Azure/Machine-Learning-Operationalization 26 | - azure-common==1.1.8 27 | - azure-storage==0.36.0 28 | - azure-ml-api-sdk==0.1.0a11 29 | - pandas 30 | 31 | # Helper utilities for dealing with Azure ML Workbench Assets. 32 | - https://azuremldownloads.blob.core.windows.net/wheels/latest/azureml.assets-1.0.0-py3-none-any.whl?sv=2016-05-31&si=ro-2017&sr=c&sig=xnUdTm0B%2F%2FfknhTaRInBXyu2QTTt8wA3OsXwGVgU%2BJk%3D 33 | -------------------------------------------------------------------------------- /aml_config/docker.compute: -------------------------------------------------------------------------------- 1 | # Defines a localdocker compute target that uses a local Docker container. 2 | type: "localdocker" 3 | 4 | # The base image for the Docker container. This is used to provision Spark and 5 | # the Conda package manager. Supported based images are microsoft/mmlspark:plus 6 | # variants. The default 0.7 version includes Spark 2.1.1. 7 | baseDockerImage: "microsoft/mmlspark:plus-0.7.91" 8 | 9 | # Azure ML Workbench uses the Docker shared volumes feature to improve run 10 | # performance and to enable the automatic mounting of the shared directory. 11 | # This Docker features isn't completely stable yet on Windows, and so it's 12 | # disabled by default to ensure compatibility. 13 | sharedVolumes: false 14 | 15 | # The $AZUREML_NATIVE_SHARE_DIRECTORY environment variable inside runs points 16 | # at a persistent directory that is shared between all runs of the same project 17 | # on the same target. This specifies the base path for those directories. 18 | # Note that this is not available if sharedVolumes is false. 19 | nativeSharedDirectory: "~/.azureml/share/" 20 | -------------------------------------------------------------------------------- /aml_config/docker.runconfig: -------------------------------------------------------------------------------- 1 | # The program name and arguments to run when they aren't specified through 2 | # other means. The $file token is replaced with the currently selected file 3 | # by the Workbench application. 4 | ArgumentVector: 5 | - "$file" 6 | 7 | # The name of the compute target to use for this run. 8 | Target: "docker" 9 | 10 | # Environment variables set for the run. 11 | EnvironmentVariables: 12 | "EXAMPLE_ENV_VAR": "Example Value" 13 | 14 | # Framework to execute inside. Allowed values are "Python" and "PySpark". 15 | Framework: "PySpark" 16 | 17 | # Path to the Conda dependencies file to use for this run. If a project 18 | # contains multiple programs with different sets of dependencies, it may be 19 | # convenient to manage those environments with separate files. 20 | CondaDependenciesFile: "aml_config/conda_dependencies.yml" 21 | 22 | # Path to the Spark dependencies file to use for this run. If a project 23 | # contains multiple programs with different sets of dependencies, it may be 24 | # convenient to manage those environments with separate files. 25 | SparkDependenciesFile: "aml_config/spark_dependencies.yml" 26 | 27 | # Automatically prepare the run environment as part of the run itself. 28 | # Manual preparation of a compute target can be perfomed with: 29 | # az ml experiment prepare --run-configuration 30 | PrepareEnvironment: false 31 | 32 | # Enable history tracking -- this allows status, logs, metrics, and outputs 33 | # to be collected by Azure ML Workbench and uploaded to the cloud project. 34 | TrackedRun: true 35 | 36 | -------------------------------------------------------------------------------- /aml_config/jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | # Configuration file for jupyter-notebook. 2 | 3 | #------------------------------------------------------------------------------ 4 | # Application(SingletonConfigurable) configuration 5 | #------------------------------------------------------------------------------ 6 | 7 | ## This is an application. 8 | 9 | ## The date format used by logging formatters for %(asctime)s 10 | #c.Application.log_datefmt = '%Y-%m-%d %H:%M:%S' 11 | 12 | ## The Logging format template 13 | #c.Application.log_format = '[%(name)s]%(highlevel)s %(message)s' 14 | 15 | ## Set the log level by value or name. 16 | #c.Application.log_level = 30 17 | 18 | #------------------------------------------------------------------------------ 19 | # JupyterApp(Application) configuration 20 | #------------------------------------------------------------------------------ 21 | 22 | ## Base class for Jupyter applications 23 | 24 | ## Answer yes to any prompts. 25 | #c.JupyterApp.answer_yes = False 26 | 27 | ## Full path of a config file. 28 | #c.JupyterApp.config_file = '' 29 | 30 | ## Specify a config file to load. 31 | #c.JupyterApp.config_file_name = '' 32 | 33 | ## Generate default config file. 34 | #c.JupyterApp.generate_config = False 35 | 36 | #------------------------------------------------------------------------------ 37 | # NotebookApp(JupyterApp) configuration 38 | #------------------------------------------------------------------------------ 39 | 40 | ## Set the Access-Control-Allow-Credentials: true header 41 | #c.NotebookApp.allow_credentials = False 42 | 43 | ## Set the Access-Control-Allow-Origin header 44 | # 45 | # Use '*' to allow any origin to access your server. 46 | # 47 | # Takes precedence over allow_origin_pat. 48 | #c.NotebookApp.allow_origin = '' 49 | 50 | ## Use a regular expression for the Access-Control-Allow-Origin header 51 | # 52 | # Requests from an origin matching the expression will get replies with: 53 | # 54 | # Access-Control-Allow-Origin: origin 55 | # 56 | # where `origin` is the origin of the request. 57 | # 58 | # Ignored if allow_origin is set. 59 | #c.NotebookApp.allow_origin_pat = '' 60 | 61 | ## Whether to allow the user to run the notebook as root. 62 | #c.NotebookApp.allow_root = False 63 | 64 | ## DEPRECATED use base_url 65 | #c.NotebookApp.base_project_url = '/' 66 | 67 | ## The base URL for the notebook server. 68 | # 69 | # Leading and trailing slashes can be omitted, and will automatically be added. 70 | #c.NotebookApp.base_url = '/' 71 | 72 | ## Specify what command to use to invoke a web browser when opening the notebook. 73 | # If not specified, the default browser will be determined by the `webbrowser` 74 | # standard library module, which allows setting of the BROWSER environment 75 | # variable to override it. 76 | #c.NotebookApp.browser = '' 77 | 78 | ## The full path to an SSL/TLS certificate file. 79 | #c.NotebookApp.certfile = '' 80 | 81 | ## The full path to a certificate authority certificate for SSL/TLS client 82 | # authentication. 83 | #c.NotebookApp.client_ca = '' 84 | 85 | ## The config manager class to use 86 | #c.NotebookApp.config_manager_class = 'notebook.services.config.manager.ConfigManager' 87 | 88 | ## The notebook manager class to use. 89 | #c.NotebookApp.contents_manager_class = 'notebook.services.contents.largefilemanager.LargeFileManager' 90 | 91 | ## Extra keyword arguments to pass to `set_secure_cookie`. See tornado's 92 | # set_secure_cookie docs for details. 93 | #c.NotebookApp.cookie_options = {} 94 | 95 | ## The random bytes used to secure cookies. By default this is a new random 96 | # number every time you start the Notebook. Set it to a value in a config file 97 | # to enable logins to persist across server sessions. 98 | # 99 | # Note: Cookie secrets should be kept private, do not share config files with 100 | # cookie_secret stored in plaintext (you can read the value from a file). 101 | #c.NotebookApp.cookie_secret = b'' 102 | 103 | ## The file where the cookie secret is stored. 104 | #c.NotebookApp.cookie_secret_file = '' 105 | 106 | ## The default URL to redirect to from `/` 107 | #c.NotebookApp.default_url = '/tree' 108 | 109 | ## Disable cross-site-request-forgery protection 110 | # 111 | # Jupyter notebook 4.3.1 introduces protection from cross-site request 112 | # forgeries, requiring API requests to either: 113 | # 114 | # - originate from pages served by this server (validated with XSRF cookie and 115 | # token), or - authenticate with a token 116 | # 117 | # Some anonymous compute resources still desire the ability to run code, 118 | # completely without authentication. These services can disable all 119 | # authentication and security checks, with the full knowledge of what that 120 | # implies. 121 | #c.NotebookApp.disable_check_xsrf = False 122 | 123 | ## Whether to enable MathJax for typesetting math/TeX 124 | # 125 | # MathJax is the javascript library Jupyter uses to render math/LaTeX. It is 126 | # very large, so you may want to disable it if you have a slow internet 127 | # connection, or for offline use of the notebook. 128 | # 129 | # When disabled, equations etc. will appear as their untransformed TeX source. 130 | #c.NotebookApp.enable_mathjax = True 131 | 132 | ## extra paths to look for Javascript notebook extensions 133 | #c.NotebookApp.extra_nbextensions_path = [] 134 | 135 | ## Extra paths to search for serving static files. 136 | # 137 | # This allows adding javascript/css to be available from the notebook server 138 | # machine, or overriding individual files in the IPython 139 | #c.NotebookApp.extra_static_paths = [] 140 | 141 | ## Extra paths to search for serving jinja templates. 142 | # 143 | # Can be used to override templates from notebook.templates. 144 | #c.NotebookApp.extra_template_paths = [] 145 | 146 | ## 147 | #c.NotebookApp.file_to_run = '' 148 | 149 | ## Deprecated: Use minified JS file or not, mainly use during dev to avoid JS 150 | # recompilation 151 | #c.NotebookApp.ignore_minified_js = False 152 | 153 | ## (bytes/sec) Maximum rate at which messages can be sent on iopub before they 154 | # are limited. 155 | #c.NotebookApp.iopub_data_rate_limit = 1000000 156 | 157 | ## (msgs/sec) Maximum rate at which messages can be sent on iopub before they are 158 | # limited. 159 | #c.NotebookApp.iopub_msg_rate_limit = 1000 160 | 161 | ## The IP address the notebook server will listen on. 162 | #c.NotebookApp.ip = 'localhost' 163 | 164 | ## Supply extra arguments that will be passed to Jinja environment. 165 | #c.NotebookApp.jinja_environment_options = {} 166 | 167 | ## Extra variables to supply to jinja templates when rendering. 168 | #c.NotebookApp.jinja_template_vars = {} 169 | 170 | ## The kernel manager class to use. 171 | #c.NotebookApp.kernel_manager_class = 'notebook.services.kernels.kernelmanager.MappingKernelManager' 172 | 173 | ## The kernel spec manager class to use. Should be a subclass of 174 | # `jupyter_client.kernelspec.KernelSpecManager`. 175 | # 176 | # The Api of KernelSpecManager is provisional and might change without warning 177 | # between this version of Jupyter and the next stable one. 178 | #c.NotebookApp.kernel_spec_manager_class = 'jupyter_client.kernelspec.KernelSpecManager' 179 | 180 | ## The full path to a private key file for usage with SSL/TLS. 181 | #c.NotebookApp.keyfile = '' 182 | 183 | ## The login handler class to use. 184 | #c.NotebookApp.login_handler_class = 'notebook.auth.login.LoginHandler' 185 | 186 | ## The logout handler class to use. 187 | #c.NotebookApp.logout_handler_class = 'notebook.auth.logout.LogoutHandler' 188 | 189 | ## The MathJax.js configuration file that is to be used. 190 | #c.NotebookApp.mathjax_config = 'TeX-AMS-MML_HTMLorMML-full,Safe' 191 | 192 | ## A custom url for MathJax.js. Should be in the form of a case-sensitive url to 193 | # MathJax, for example: /static/components/MathJax/MathJax.js 194 | #c.NotebookApp.mathjax_url = '' 195 | 196 | ## Dict of Python modules to load as notebook server extensions.Entry values can 197 | # be used to enable and disable the loading ofthe extensions. The extensions 198 | # will be loaded in alphabetical order. 199 | #c.NotebookApp.nbserver_extensions = {} 200 | 201 | ## The directory to use for notebooks and kernels. 202 | #c.NotebookApp.notebook_dir = '' 203 | 204 | ## Whether to open in a browser after starting. The specific browser used is 205 | # platform dependent and determined by the python standard library `webbrowser` 206 | # module, unless it is overridden using the --browser (NotebookApp.browser) 207 | # configuration option. 208 | #c.NotebookApp.open_browser = True 209 | 210 | ## Hashed password to use for web authentication. 211 | # 212 | # To generate, type in a python/IPython shell: 213 | # 214 | # from notebook.auth import passwd; passwd() 215 | # 216 | # The string should be of the form type:salt:hashed-password. 217 | #c.NotebookApp.password = '' 218 | 219 | ## Forces users to use a password for the Notebook server. This is useful in a 220 | # multi user environment, for instance when everybody in the LAN can access each 221 | # other's machine though ssh. 222 | # 223 | # In such a case, server the notebook server on localhost is not secure since 224 | # any user can connect to the notebook server via ssh. 225 | #c.NotebookApp.password_required = False 226 | 227 | ## The port the notebook server will listen on. 228 | #c.NotebookApp.port = 8888 229 | 230 | ## The number of additional ports to try if the specified port is not available. 231 | #c.NotebookApp.port_retries = 50 232 | 233 | ## DISABLED: use %pylab or %matplotlib in the notebook to enable matplotlib. 234 | #c.NotebookApp.pylab = 'disabled' 235 | 236 | ## (sec) Time window used to check the message and data rate limits. 237 | #c.NotebookApp.rate_limit_window = 3 238 | 239 | ## Reraise exceptions encountered loading server extensions? 240 | #c.NotebookApp.reraise_server_extension_failures = False 241 | 242 | ## DEPRECATED use the nbserver_extensions dict instead 243 | #c.NotebookApp.server_extensions = [] 244 | 245 | ## The session manager class to use. 246 | #c.NotebookApp.session_manager_class = 'notebook.services.sessions.sessionmanager.SessionManager' 247 | 248 | ## Supply SSL options for the tornado HTTPServer. See the tornado docs for 249 | # details. 250 | #c.NotebookApp.ssl_options = {} 251 | 252 | ## Supply overrides for terminado. Currently only supports "shell_command". 253 | #c.NotebookApp.terminado_settings = {} 254 | 255 | ## Token used for authenticating first-time connections to the server. 256 | # 257 | # When no password is enabled, the default is to generate a new, random token. 258 | # 259 | # Setting to an empty string disables authentication altogether, which is NOT 260 | # RECOMMENDED. 261 | #c.NotebookApp.token = '' 262 | 263 | ## Supply overrides for the tornado.web.Application that the Jupyter notebook 264 | # uses. 265 | #c.NotebookApp.tornado_settings = {} 266 | 267 | ## Whether to trust or not X-Scheme/X-Forwarded-Proto and X-Real-Ip/X-Forwarded- 268 | # For headerssent by the upstream reverse proxy. Necessary if the proxy handles 269 | # SSL 270 | #c.NotebookApp.trust_xheaders = False 271 | 272 | ## DEPRECATED, use tornado_settings 273 | #c.NotebookApp.webapp_settings = {} 274 | 275 | ## The base URL for websockets, if it differs from the HTTP server (hint: it 276 | # almost certainly doesn't). 277 | # 278 | # Should be in the form of an HTTP origin: ws[s]://hostname[:port] 279 | #c.NotebookApp.websocket_url = '' 280 | 281 | #------------------------------------------------------------------------------ 282 | # ConnectionFileMixin(LoggingConfigurable) configuration 283 | #------------------------------------------------------------------------------ 284 | 285 | ## Mixin for configurable classes that work with connection files 286 | 287 | ## JSON file in which to store connection info [default: kernel-.json] 288 | # 289 | # This file will contain the IP, ports, and authentication key needed to connect 290 | # clients to this kernel. By default, this file will be created in the security 291 | # dir of the current profile, but can be specified by absolute path. 292 | #c.ConnectionFileMixin.connection_file = '' 293 | 294 | ## set the control (ROUTER) port [default: random] 295 | #c.ConnectionFileMixin.control_port = 0 296 | 297 | ## set the heartbeat port [default: random] 298 | #c.ConnectionFileMixin.hb_port = 0 299 | 300 | ## set the iopub (PUB) port [default: random] 301 | #c.ConnectionFileMixin.iopub_port = 0 302 | 303 | ## Set the kernel's IP address [default localhost]. If the IP address is 304 | # something other than localhost, then Consoles on other machines will be able 305 | # to connect to the Kernel, so be careful! 306 | #c.ConnectionFileMixin.ip = '' 307 | 308 | ## set the shell (ROUTER) port [default: random] 309 | #c.ConnectionFileMixin.shell_port = 0 310 | 311 | ## set the stdin (ROUTER) port [default: random] 312 | #c.ConnectionFileMixin.stdin_port = 0 313 | 314 | ## 315 | #c.ConnectionFileMixin.transport = 'tcp' 316 | 317 | #------------------------------------------------------------------------------ 318 | # KernelManager(ConnectionFileMixin) configuration 319 | #------------------------------------------------------------------------------ 320 | 321 | ## Manages a single kernel in a subprocess on this host. 322 | # 323 | # This version starts kernels with Popen. 324 | 325 | ## Should we autorestart the kernel if it dies. 326 | #c.KernelManager.autorestart = True 327 | 328 | ## DEPRECATED: Use kernel_name instead. 329 | # 330 | # The Popen Command to launch the kernel. Override this if you have a custom 331 | # kernel. If kernel_cmd is specified in a configuration file, Jupyter does not 332 | # pass any arguments to the kernel, because it cannot make any assumptions about 333 | # the arguments that the kernel understands. In particular, this means that the 334 | # kernel does not receive the option --debug if it given on the Jupyter command 335 | # line. 336 | #c.KernelManager.kernel_cmd = [] 337 | 338 | ## Time to wait for a kernel to terminate before killing it, in seconds. 339 | #c.KernelManager.shutdown_wait_time = 5.0 340 | 341 | #------------------------------------------------------------------------------ 342 | # Session(Configurable) configuration 343 | #------------------------------------------------------------------------------ 344 | 345 | ## Object for handling serialization and sending of messages. 346 | # 347 | # The Session object handles building messages and sending them with ZMQ sockets 348 | # or ZMQStream objects. Objects can communicate with each other over the 349 | # network via Session objects, and only need to work with the dict-based IPython 350 | # message spec. The Session will handle serialization/deserialization, security, 351 | # and metadata. 352 | # 353 | # Sessions support configurable serialization via packer/unpacker traits, and 354 | # signing with HMAC digests via the key/keyfile traits. 355 | # 356 | # Parameters ---------- 357 | # 358 | # debug : bool 359 | # whether to trigger extra debugging statements 360 | # packer/unpacker : str : 'json', 'pickle' or import_string 361 | # importstrings for methods to serialize message parts. If just 362 | # 'json' or 'pickle', predefined JSON and pickle packers will be used. 363 | # Otherwise, the entire importstring must be used. 364 | # 365 | # The functions must accept at least valid JSON input, and output *bytes*. 366 | # 367 | # For example, to use msgpack: 368 | # packer = 'msgpack.packb', unpacker='msgpack.unpackb' 369 | # pack/unpack : callables 370 | # You can also set the pack/unpack callables for serialization directly. 371 | # session : bytes 372 | # the ID of this Session object. The default is to generate a new UUID. 373 | # username : unicode 374 | # username added to message headers. The default is to ask the OS. 375 | # key : bytes 376 | # The key used to initialize an HMAC signature. If unset, messages 377 | # will not be signed or checked. 378 | # keyfile : filepath 379 | # The file containing a key. If this is set, `key` will be initialized 380 | # to the contents of the file. 381 | 382 | ## Threshold (in bytes) beyond which an object's buffer should be extracted to 383 | # avoid pickling. 384 | #c.Session.buffer_threshold = 1024 385 | 386 | ## Whether to check PID to protect against calls after fork. 387 | # 388 | # This check can be disabled if fork-safety is handled elsewhere. 389 | #c.Session.check_pid = True 390 | 391 | ## Threshold (in bytes) beyond which a buffer should be sent without copying. 392 | #c.Session.copy_threshold = 65536 393 | 394 | ## Debug output in the Session 395 | #c.Session.debug = False 396 | 397 | ## The maximum number of digests to remember. 398 | # 399 | # The digest history will be culled when it exceeds this value. 400 | #c.Session.digest_history_size = 65536 401 | 402 | ## The maximum number of items for a container to be introspected for custom 403 | # serialization. Containers larger than this are pickled outright. 404 | #c.Session.item_threshold = 64 405 | 406 | ## execution key, for signing messages. 407 | #c.Session.key = b'' 408 | 409 | ## path to file containing execution key. 410 | #c.Session.keyfile = '' 411 | 412 | ## Metadata dictionary, which serves as the default top-level metadata dict for 413 | # each message. 414 | #c.Session.metadata = {} 415 | 416 | ## The name of the packer for serializing messages. Should be one of 'json', 417 | # 'pickle', or an import name for a custom callable serializer. 418 | #c.Session.packer = 'json' 419 | 420 | ## The UUID identifying this session. 421 | #c.Session.session = '' 422 | 423 | ## The digest scheme used to construct the message signatures. Must have the form 424 | # 'hmac-HASH'. 425 | #c.Session.signature_scheme = 'hmac-sha256' 426 | 427 | ## The name of the unpacker for unserializing messages. Only used with custom 428 | # functions for `packer`. 429 | #c.Session.unpacker = 'json' 430 | 431 | ## Username for the Session. Default is your system username. 432 | #c.Session.username = 'username' 433 | 434 | #------------------------------------------------------------------------------ 435 | # MultiKernelManager(LoggingConfigurable) configuration 436 | #------------------------------------------------------------------------------ 437 | 438 | ## A class for managing multiple kernels. 439 | 440 | ## The name of the default kernel to start 441 | #c.MultiKernelManager.default_kernel_name = 'python3' 442 | 443 | ## The kernel manager class. This is configurable to allow subclassing of the 444 | # KernelManager for customized behavior. 445 | #c.MultiKernelManager.kernel_manager_class = 'jupyter_client.ioloop.IOLoopKernelManager' 446 | 447 | #------------------------------------------------------------------------------ 448 | # MappingKernelManager(MultiKernelManager) configuration 449 | #------------------------------------------------------------------------------ 450 | 451 | ## A KernelManager that handles notebook mapping and HTTP error handling 452 | 453 | ## 454 | #c.MappingKernelManager.root_dir = '' 455 | 456 | #------------------------------------------------------------------------------ 457 | # ContentsManager(LoggingConfigurable) configuration 458 | #------------------------------------------------------------------------------ 459 | 460 | ## Base class for serving files and directories. 461 | # 462 | # This serves any text or binary file, as well as directories, with special 463 | # handling for JSON notebook documents. 464 | # 465 | # Most APIs take a path argument, which is always an API-style unicode path, and 466 | # always refers to a directory. 467 | # 468 | # - unicode, not url-escaped 469 | # - '/'-separated 470 | # - leading and trailing '/' will be stripped 471 | # - if unspecified, path defaults to '', 472 | # indicating the root path. 473 | 474 | ## 475 | #c.ContentsManager.checkpoints = None 476 | 477 | ## 478 | #c.ContentsManager.checkpoints_class = 'notebook.services.contents.checkpoints.Checkpoints' 479 | 480 | ## 481 | #c.ContentsManager.checkpoints_kwargs = {} 482 | 483 | ## Glob patterns to hide in file and directory listings. 484 | #c.ContentsManager.hide_globs = ['__pycache__', '*.pyc', '*.pyo', '.DS_Store', '*.so', '*.dylib', '*~'] 485 | 486 | ## Python callable or importstring thereof 487 | # 488 | # To be called on a contents model prior to save. 489 | # 490 | # This can be used to process the structure, such as removing notebook outputs 491 | # or other side effects that should not be saved. 492 | # 493 | # It will be called as (all arguments passed by keyword):: 494 | # 495 | # hook(path=path, model=model, contents_manager=self) 496 | # 497 | # - model: the model to be saved. Includes file contents. 498 | # Modifying this dict will affect the file that is stored. 499 | # - path: the API path of the save destination 500 | # - contents_manager: this ContentsManager instance 501 | #c.ContentsManager.pre_save_hook = None 502 | 503 | ## 504 | #c.ContentsManager.root_dir = '/' 505 | 506 | ## The base name used when creating untitled directories. 507 | #c.ContentsManager.untitled_directory = 'Untitled Folder' 508 | 509 | ## The base name used when creating untitled files. 510 | #c.ContentsManager.untitled_file = 'untitled' 511 | 512 | ## The base name used when creating untitled notebooks. 513 | #c.ContentsManager.untitled_notebook = 'Untitled' 514 | 515 | #------------------------------------------------------------------------------ 516 | # FileManagerMixin(Configurable) configuration 517 | #------------------------------------------------------------------------------ 518 | 519 | ## Mixin for ContentsAPI classes that interact with the filesystem. 520 | # 521 | # Provides facilities for reading, writing, and copying both notebooks and 522 | # generic files. 523 | # 524 | # Shared by FileContentsManager and FileCheckpoints. 525 | # 526 | # Note ---- Classes using this mixin must provide the following attributes: 527 | # 528 | # root_dir : unicode 529 | # A directory against against which API-style paths are to be resolved. 530 | # 531 | # log : logging.Logger 532 | 533 | ## By default notebooks are saved on disk on a temporary file and then if 534 | # succefully written, it replaces the old ones. This procedure, namely 535 | # 'atomic_writing', causes some bugs on file system whitout operation order 536 | # enforcement (like some networked fs). If set to False, the new notebook is 537 | # written directly on the old one which could fail (eg: full filesystem or quota 538 | # ) 539 | #c.FileManagerMixin.use_atomic_writing = True 540 | 541 | #------------------------------------------------------------------------------ 542 | # FileContentsManager(FileManagerMixin,ContentsManager) configuration 543 | #------------------------------------------------------------------------------ 544 | 545 | ## Python callable or importstring thereof 546 | # 547 | # to be called on the path of a file just saved. 548 | # 549 | # This can be used to process the file on disk, such as converting the notebook 550 | # to a script or HTML via nbconvert. 551 | # 552 | # It will be called as (all arguments passed by keyword):: 553 | # 554 | # hook(os_path=os_path, model=model, contents_manager=instance) 555 | # 556 | # - path: the filesystem path to the file just written - model: the model 557 | # representing the file - contents_manager: this ContentsManager instance 558 | #c.FileContentsManager.post_save_hook = None 559 | 560 | ## 561 | #c.FileContentsManager.root_dir = '' 562 | 563 | ## DEPRECATED, use post_save_hook. Will be removed in Notebook 5.0 564 | #c.FileContentsManager.save_script = False 565 | 566 | #------------------------------------------------------------------------------ 567 | # NotebookNotary(LoggingConfigurable) configuration 568 | #------------------------------------------------------------------------------ 569 | 570 | ## A class for computing and verifying notebook signatures. 571 | 572 | ## The hashing algorithm used to sign notebooks. 573 | #c.NotebookNotary.algorithm = 'sha256' 574 | 575 | ## The sqlite file in which to store notebook signatures. By default, this will 576 | # be in your Jupyter data directory. You can set it to ':memory:' to disable 577 | # sqlite writing to the filesystem. 578 | #c.NotebookNotary.db_file = '' 579 | 580 | ## The secret key with which notebooks are signed. 581 | #c.NotebookNotary.secret = b'' 582 | 583 | ## The file where the secret key is stored. 584 | #c.NotebookNotary.secret_file = '' 585 | 586 | ## A callable returning the storage backend for notebook signatures. The default 587 | # uses an SQLite database. 588 | #c.NotebookNotary.store_factory = traitlets.Undefined 589 | 590 | #------------------------------------------------------------------------------ 591 | # KernelSpecManager(LoggingConfigurable) configuration 592 | #------------------------------------------------------------------------------ 593 | 594 | ## If there is no Python kernelspec registered and the IPython kernel is 595 | # available, ensure it is added to the spec list. 596 | #c.KernelSpecManager.ensure_native_kernel = True 597 | 598 | ## The kernel spec class. This is configurable to allow subclassing of the 599 | # KernelSpecManager for customized behavior. 600 | #c.KernelSpecManager.kernel_spec_class = 'jupyter_client.kernelspec.KernelSpec' 601 | 602 | ## Whitelist of allowed kernel names. 603 | # 604 | # By default, all installed kernels are allowed. 605 | #c.KernelSpecManager.whitelist = set() 606 | -------------------------------------------------------------------------------- /aml_config/local.compute: -------------------------------------------------------------------------------- 1 | # Defines a local compute target that uses an existing python environment. 2 | type: "local" 3 | 4 | # Specifies the user-managed python environment for the run. By default this 5 | # is "python" which uses the currently active python environment. The Azure ML 6 | # Workbench will use the python environment installed with it and the Azure ML 7 | # CLI will use whatever python environment it was installed into. 8 | # 9 | # You can change this to point at any python environment on your system, 10 | # including virtual environments and Conda environments. Note that backslashes 11 | # need to be escaped in this path, so it's easier to use forward slashes. 12 | pythonLocation: "python" 13 | 14 | # Specifies the path to spark-submit for local Spark runs. By default this 15 | # assumes that Spark is on the path. 16 | sparkSubmitLocation: "spark-submit" 17 | 18 | # The $AZUREML_NATIVE_SHARE_DIRECTORY environment variable inside runs points 19 | # at a persistent directory that is shared between all runs of the same project 20 | # on the same target. This specifies the base path for those directories. 21 | nativeSharedDirectory: "~/.azureml/share/" -------------------------------------------------------------------------------- /aml_config/local.runconfig: -------------------------------------------------------------------------------- 1 | # The program name and arguments to run when they aren't specified through 2 | # other means. The $file token is replaced with the currently selected file 3 | # by the Workbench application. 4 | ArgumentVector: 5 | - "$file" 6 | 7 | # The name of the compute target to use for this run. 8 | Target: "local" 9 | 10 | # Environment variables set for the run. 11 | EnvironmentVariables: 12 | "EXAMPLE_ENV_VAR": "Example Value" 13 | 14 | # Framework to execute inside. Allowed values are "Python" and "PySpark". 15 | Framework: "Python" 16 | 17 | # Path to the Conda dependencies file to use for this run. If a project 18 | # contains multiple programs with different sets of dependencies, it may be 19 | # convenient to manage those environments with separate files. 20 | CondaDependenciesFile: "aml_config/conda_dependencies.yml" 21 | 22 | # Path to the Spark dependencies file to use for this run. If a project 23 | # contains multiple programs with different sets of dependencies, it may be 24 | # convenient to manage those environments with separate files. 25 | SparkDependenciesFile: "aml_config/spark_dependencies.yml" 26 | 27 | # Automatically prepare the run environment as part of the run itself. 28 | # Manual preparation of a compute target can be perfomed with: 29 | # az ml experiment prepare --run-configuration 30 | PrepareEnvironment: false 31 | 32 | # Enable history tracking -- this allows status, logs, metrics, and outputs 33 | # to be collected by Azure ML Workbench and uploaded to the cloud project. 34 | TrackedRun: true -------------------------------------------------------------------------------- /aml_config/spark_dependencies.yml: -------------------------------------------------------------------------------- 1 | # Spark configuration and packages specification. The dependencies defined in 2 | # this file will be automatically provisioned for runs that use Spark. 3 | 4 | # For managing third-party python libraries, see conda_dependencies.yml. 5 | 6 | # Spark configuration properties. 7 | configuration: 8 | "spark.app.name": "Azure ML Experiment" 9 | "spark.yarn.maxAppAttempts": 1 10 | 11 | # Repositories to search for the specified Spark packages. 12 | repositories: 13 | - "https://mmlspark.azureedge.net/maven" 14 | 15 | # Spark packages to include in the run. 16 | packages: 17 | # Microsoft Machine Learning for Apache Spark provides a number of deep 18 | # learning and data science tools, including seamless integration of Spark 19 | # Machine Learning pipelines with Microsoft Cognitive Toolkit (CNTK) and 20 | # OpenCV, enabling you to quickly create powerful, highly-scalable 21 | # predictive and analytical models for large image and text datasets. 22 | # Details: https://github.com/Azure/mmlspark 23 | - group: "com.microsoft.ml.spark" 24 | artifact: "mmlspark_2.11" 25 | version: "0.7.91" 26 | 27 | # Required for SQL Server data sources. 28 | - group: "com.microsoft.sqlserver" 29 | artifact: "mssql-jdbc" 30 | version: "6.2.1.jre8" 31 | -------------------------------------------------------------------------------- /docs/Images/example_labels.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/MachineLearningSamples-AerialImageClassification/ffce6171015bd9669c433c6c86e9b82c71dafbf0/docs/Images/example_labels.PNG -------------------------------------------------------------------------------- /docs/Images/middlesex_ma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/MachineLearningSamples-AerialImageClassification/ffce6171015bd9669c433c6c86e9b82c71dafbf0/docs/Images/middlesex_ma.png -------------------------------------------------------------------------------- /docs/Images/sample_tile_developed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/MachineLearningSamples-AerialImageClassification/ffce6171015bd9669c433c6c86e9b82c71dafbf0/docs/Images/sample_tile_developed.png -------------------------------------------------------------------------------- /docs/Images/scenario_schematic.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Azure-Samples/MachineLearningSamples-AerialImageClassification/ffce6171015bd9669c433c6c86e9b82c71dafbf0/docs/Images/scenario_schematic.PNG --------------------------------------------------------------------------------