├── .DS_Store
├── 0_ground_truth
    ├── .gitkeep
    └── ground_truth.ipynb
├── 1_prepare_data
    ├── .gitkeep
    ├── docker
    │   ├── .gitkeep
    │   ├── Dockerfile
    │   ├── build_and_push.sh
    │   ├── code
    │   │   ├── .gitkeep
    │   │   ├── prepare_data.py
    │   │   └── utils
    │   │   │   ├── .gitkeep
    │   │   │   └── tf_record_util.py
    │   └── requirements.txt
    └── prepare_data.ipynb
├── 2_train_model
    ├── .gitkeep
    ├── docker
    │   ├── .gitkeep
    │   ├── Dockerfile
    │   └── build_and_push.sh
    ├── source_dir
    │   ├── .gitkeep
    │   ├── pipeline.config
    │   └── run_training.sh
    └── train_model.ipynb
├── 3_predict
    ├── .gitkeep
    ├── deploy_endpoint.ipynb
    └── visualization_utils.py
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
└── media
    ├── .gitkeep
    ├── test-01.jpg
    ├── test-02.png
    ├── test-1.jpg
    └── test-2.jpg


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/.DS_Store


--------------------------------------------------------------------------------
/0_ground_truth/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/0_ground_truth/.gitkeep


--------------------------------------------------------------------------------
/1_prepare_data/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/1_prepare_data/.gitkeep


--------------------------------------------------------------------------------
/1_prepare_data/docker/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/1_prepare_data/docker/.gitkeep


--------------------------------------------------------------------------------
/1_prepare_data/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.2.0-gpu
 2 | 
 3 | ARG DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | # Install apt dependencies
 6 | RUN apt-get update && apt-get install -y \
 7 |     git \
 8 |     gpg-agent \
 9 |     python3-cairocffi \
10 |     protobuf-compiler \
11 |     python3-pil \
12 |     python3-lxml \
13 |     python3-tk \
14 |     wget
15 | 
16 | COPY requirements.txt /tmp/
17 | RUN pip3 install -r /tmp/requirements.txt --no-cache --upgrade
18 | 
19 | COPY code /opt/program
20 | 
21 | ENTRYPOINT ["python3", "/opt/program/prepare_data.py"]


--------------------------------------------------------------------------------
/1_prepare_data/docker/build_and_push.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | image=$1
 4 | 
 5 | ACCOUNT_ID=$(aws sts get-caller-identity --query Account | tr -d '"')
 6 | AWS_REGION=$(aws configure get region)
 7 | TAG=$(date +%Y%m%d%H%M%S)
 8 | 
 9 | fullname="${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${image}:${TAG}"
10 | 
11 | # If the repository doesn't exist in ECR, create it.
12 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1
13 | if [[ $? -ne 0 ]]
14 | then
15 |     aws ecr create-repository --repository-name "${image}" > /dev/null
16 | fi
17 | 
18 | # Get the login command from ECR and execute it directly
19 | $(aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com)
20 | 
21 | # Build the docker image locally and then push it to ECR with the full name.
22 | cd docker
23 | 
24 | echo "Building image with name ${image}"
25 | docker build --no-cache -t ${image} -f Dockerfile .
26 | docker tag ${image} ${fullname}
27 | 
28 | echo "Pushing image to ECR ${fullname}"
29 | docker push ${fullname}
30 | 
31 | # Writing the image name to let the calling process extract it without manual intervention:
32 | echo "${fullname}" > ecr_image_fullname.txt


--------------------------------------------------------------------------------
/1_prepare_data/docker/code/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/1_prepare_data/docker/code/.gitkeep


--------------------------------------------------------------------------------
/1_prepare_data/docker/code/prepare_data.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import json
 4 | import argparse
 5 | from utils.tf_record_util import TfRecordGenerator
 6 | 
 7 | if __name__ == "__main__":
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument("--input", type=str, default="")
10 |     parser.add_argument("--label_map", type=str, default="")
11 |     parser.add_argument("--ground_truth_manifest", type=str, default="")
12 |     parser.add_argument("--output", type=str, default="")
13 |     args, _ = parser.parse_known_args()
14 | 
15 |     input_folder = args.input
16 |     ground_truth_manifest = args.ground_truth_manifest
17 |     label_map = json.loads(args.label_map)
18 |     output_folder = args.output
19 | 
20 |     # Feed in necessary path variables from above operations
21 |     tf_record_generator = TfRecordGenerator(image_dir=input_folder,
22 |                                             manifest=ground_truth_manifest,
23 |                                             label_map=label_map,
24 |                                             output_dir=output_folder)
25 | 
26 |     print('GENERATING TF RECORD FILES')
27 |     tf_record_generator.generate_tf_records()
28 | 
29 |     print('GENERATING LABEL MAP FILE')
30 |     with open(f'{output_folder}/label_map.pbtxt', 'w') as label_map_file:
31 |         for item in label_map:
32 |             label_map_file.write('item {\n')
33 |             label_map_file.write(' id: ' + str(int(item) + 1) + '\n')
34 |             label_map_file.write(" name: '" + label_map[item] + "'\n")
35 |             label_map_file.write('}\n\n')
36 | 
37 |     print('FINISHED')
38 | 


--------------------------------------------------------------------------------
/1_prepare_data/docker/code/utils/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/1_prepare_data/docker/code/utils/.gitkeep


--------------------------------------------------------------------------------
/1_prepare_data/docker/code/utils/tf_record_util.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: MIT-0
 3 | import os
 4 | import io
 5 | import json
 6 | import jsonlines
 7 | import random
 8 | import logging
 9 | from utils import dataset_util
10 | from PIL import Image
11 | import tensorflow as tf
12 | 
13 | 
14 | class TfRecordGenerator:
15 |     def __init__(self, image_dir, manifest, label_map, output_dir):
16 |         self.image_dir = image_dir
17 |         self.manifest = manifest
18 |         self.label_map = label_map
19 |         self.output_dir = output_dir
20 | 
21 |     def generate_tf_records(self):
22 |         with jsonlines.open(self.manifest, 'r') as reader:
23 |             ground_truth_annotations = list(reader)
24 |             dataset = split_dataset(ground_truth_annotations)
25 |             for subset in dataset:
26 |                 logging.info(f'GENERATING TF RECORD FOR {subset}')
27 |                 writer = tf.io.TFRecordWriter(os.path.join(self.output_dir, f'{subset}.records'))
28 |                 for image_annotations in dataset[subset]:
29 |                     annotation_dict = json.loads(json.dumps(image_annotations))
30 |                     tf_example = self._create_tf_example(annotation_dict['source-ref'],
31 |                                                          annotation_dict['car-70']['annotations']) # '<<car-70>>' is ground truth job name. Please replace this value with your ground truth job name.
32 |                     writer.write(tf_example.SerializeToString())
33 |                 writer.close()
34 | 
35 |     def _create_tf_example(self, s3_image_path, annotations):
36 |         image_name = os.path.basename(s3_image_path)
37 |         image_path = f'{self.image_dir}/{image_name}'
38 |         im = Image.open(image_path)
39 | 
40 |         # READ IMAGE FILE
41 |         with tf.io.gfile.GFile(image_path, 'rb') as fid:
42 |             encoded_jpg = fid.read()
43 | 
44 |         encoded_jpg_io = io.BytesIO(encoded_jpg)
45 |         encoded_jpg_io.seek(0)
46 |         image = Image.open(encoded_jpg_io)
47 |         image_width, image_height = image.size
48 |         if image.format != 'JPEG':
49 |             image = image.convert('RGB')
50 | 
51 |         xmins = []
52 |         ymins = []
53 |         xmaxs = []
54 |         ymaxs = []
55 |         classes = []
56 |         classes_text = []
57 |         for a in annotations:
58 |             x = a['left']
59 |             y = a['top']
60 |             width = a['width']
61 |             height = a['height']
62 |             class_id = a['class_id']
63 |             xmins.append(float(x) / image_width)
64 |             xmaxs.append(float(x + width) / image_width)
65 |             ymins.append(float(y) / image_height)
66 |             ymaxs.append(float(y + height) / image_height)
67 |             class_name = self.label_map[str(class_id)]
68 |             classes_text.append(class_name.encode('utf8'))
69 |             classes.append(class_id)
70 | 
71 |         feature_dict = {
72 |             'image/height': dataset_util.int64_feature(image_height),
73 |             'image/width': dataset_util.int64_feature(image_width),
74 |             'image/filename': dataset_util.bytes_feature(bytes(image_name, 'utf-8')),
75 |             'image/source_id': dataset_util.bytes_feature(bytes(image_name.replace('.jpg', ''), 'utf-8')),
76 |             'image/encoded': dataset_util.bytes_feature(encoded_jpg),
77 |             'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
78 |             'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
79 |             'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
80 |             'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
81 |             'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
82 |             'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
83 |             'image/object/class/label': dataset_util.int64_list_feature(classes),
84 |         }
85 |         example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
86 |         return example
87 | 
88 | 
89 | def split_dataset(list_images):
90 |     dataset = {}
91 |     random.seed(42)
92 |     random.shuffle(list_images)
93 |     num_train = int(0.9 * len(list_images))
94 |     dataset['train'] = list_images[:num_train]
95 |     dataset['validation'] = list_images[num_train:]
96 |     logging.info(f'TRAINING EXAMPLES: %d - VALIDATION EXAMPLES: %d', len(dataset['train']), len(dataset['validation']))
97 |     return dataset
98 | 
99 | 


--------------------------------------------------------------------------------
/1_prepare_data/docker/requirements.txt:
--------------------------------------------------------------------------------
1 | jsonlines
2 | pillow


--------------------------------------------------------------------------------
/2_train_model/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/2_train_model/.gitkeep


--------------------------------------------------------------------------------
/2_train_model/docker/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/2_train_model/docker/.gitkeep


--------------------------------------------------------------------------------
/2_train_model/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM tensorflow/tensorflow:2.5.0-gpu
 2 | 
 3 | ARG DEBIAN_FRONTEND=noninteractive
 4 | 
 5 | # Install apt dependencies
 6 | RUN apt-get update && apt-get install -y \
 7 |     git \
 8 |     gpg-agent \
 9 |     python3-cairocffi \
10 |     protobuf-compiler \
11 |     python3-pil \
12 |     python3-lxml \
13 |     python3-tk \
14 |     libgl1-mesa-dev \
15 |     wget
16 | 
17 | # Copy this version of of the model garden into the image
18 | COPY models/research/object_detection /home/tensorflow/models/research/object_detection 
19 | 
20 | # Compile protobuf configs
21 | RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.)
22 | WORKDIR /home/tensorflow/models/research/
23 | 
24 | RUN cp object_detection/packages/tf2/setup.py ./
25 | ENV PATH="/home/tensorflow/.local/bin:${PATH}"
26 | RUN python -m pip install -U pip
27 | RUN python -m pip install .
28 | 
29 | ENV TF_CPP_MIN_LOG_LEVEL 3
30 | 
31 | # Install SageMaker training-toolkit
32 | RUN pip3 install sagemaker-training


--------------------------------------------------------------------------------
/2_train_model/docker/build_and_push.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | image=$1
 4 | 
 5 | ACCOUNT_ID=$(aws sts get-caller-identity --query Account | tr -d '"')
 6 | AWS_REGION=$(aws configure get region)
 7 | TAG=$(date +%Y%m%d%H%M%S)
 8 | 
 9 | fullname="${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${image}:${TAG}"
10 | 
11 | # If the repository doesn't exist in ECR, create it.
12 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1
13 | if [[ $? -ne 0 ]]
14 | then
15 |     aws ecr create-repository --repository-name "${image}" > /dev/null
16 | fi
17 | 
18 | # Get the login command from ECR and execute it directly
19 | $(aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com)
20 | 
21 | # Build the docker image locally and then push it to ECR with the full name.
22 | cd docker
23 | 
24 | echo "Building image with name ${image}"
25 | docker build --no-cache -t ${image} -f Dockerfile .
26 | docker tag ${image} ${fullname}
27 | 
28 | echo "Pushing image to ECR ${fullname}"
29 | docker push ${fullname}
30 | 
31 | # Writing the image name to let the calling process extract it without manual intervention:
32 | echo "${fullname}" > ecr_image_fullname.txt


--------------------------------------------------------------------------------
/2_train_model/source_dir/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/2_train_model/source_dir/.gitkeep


--------------------------------------------------------------------------------
/2_train_model/source_dir/pipeline.config:
--------------------------------------------------------------------------------
  1 | model {
  2 |   ssd {
  3 |     num_classes: 2
  4 |     image_resizer {
  5 |       keep_aspect_ratio_resizer {
  6 |         min_dimension: 640
  7 |         max_dimension: 640
  8 |         pad_to_max_dimension: true
  9 |       }
 10 |     }
 11 |     feature_extractor {
 12 |       type: "ssd_efficientnet-b1_bifpn_keras"
 13 |       conv_hyperparams {
 14 |         regularizer {
 15 |           l2_regularizer {
 16 |             weight: 3.9999998989515007e-05
 17 |           }
 18 |         }
 19 |         initializer {
 20 |           truncated_normal_initializer {
 21 |             mean: 0.0
 22 |             stddev: 0.029999999329447746
 23 |           }
 24 |         }
 25 |         activation: SWISH
 26 |         batch_norm {
 27 |           decay: 0.9900000095367432
 28 |           scale: true
 29 |           epsilon: 0.0010000000474974513
 30 |         }
 31 |         force_use_bias: true
 32 |       }
 33 |       bifpn {
 34 |         min_level: 3
 35 |         max_level: 7
 36 |         num_iterations: 4
 37 |         num_filters: 88
 38 |       }
 39 |     }
 40 |     box_coder {
 41 |       faster_rcnn_box_coder {
 42 |         y_scale: 1.0
 43 |         x_scale: 1.0
 44 |         height_scale: 1.0
 45 |         width_scale: 1.0
 46 |       }
 47 |     }
 48 |     matcher {
 49 |       argmax_matcher {
 50 |         matched_threshold: 0.5
 51 |         unmatched_threshold: 0.5
 52 |         ignore_thresholds: false
 53 |         negatives_lower_than_unmatched: true
 54 |         force_match_for_each_row: true
 55 |         use_matmul_gather: true
 56 |       }
 57 |     }
 58 |     similarity_calculator {
 59 |       iou_similarity {
 60 |       }
 61 |     }
 62 |     box_predictor {
 63 |       weight_shared_convolutional_box_predictor {
 64 |         conv_hyperparams {
 65 |           regularizer {
 66 |             l2_regularizer {
 67 |               weight: 3.9999998989515007e-05
 68 |             }
 69 |           }
 70 |           initializer {
 71 |             random_normal_initializer {
 72 |               mean: 0.0
 73 |               stddev: 0.009999999776482582
 74 |             }
 75 |           }
 76 |           activation: SWISH
 77 |           batch_norm {
 78 |             decay: 0.9900000095367432
 79 |             scale: true
 80 |             epsilon: 0.0010000000474974513
 81 |           }
 82 |           force_use_bias: true
 83 |         }
 84 |         depth: 88
 85 |         num_layers_before_predictor: 3
 86 |         kernel_size: 3
 87 |         class_prediction_bias_init: -4.599999904632568
 88 |         use_depthwise: true
 89 |       }
 90 |     }
 91 |     anchor_generator {
 92 |       multiscale_anchor_generator {
 93 |         min_level: 3
 94 |         max_level: 7
 95 |         anchor_scale: 4.0
 96 |         aspect_ratios: 1.0
 97 |         aspect_ratios: 2.0
 98 |         aspect_ratios: 0.5
 99 |         scales_per_octave: 3
100 |       }
101 |     }
102 |     post_processing {
103 |       batch_non_max_suppression {
104 |         score_threshold: 9.99999993922529e-09
105 |         iou_threshold: 0.5
106 |         max_detections_per_class: 100
107 |         max_total_detections: 100
108 |       }
109 |       score_converter: SOFTMAX
110 |     }
111 |     normalize_loss_by_num_matches: true
112 |     loss {
113 |       localization_loss {
114 |         weighted_smooth_l1 {
115 |         }
116 |       }
117 |       classification_loss {
118 |         weighted_sigmoid_focal {
119 |           gamma: 1.5
120 |           alpha: 0.25
121 |         }
122 |       }
123 |       classification_weight: 1.0
124 |       localization_weight: 1.0
125 |     }
126 |     encode_background_as_zeros: true
127 |     normalize_loc_loss_by_codesize: true
128 |     inplace_batchnorm_update: true
129 |     freeze_batchnorm: false
130 |     add_background_class: false
131 |   }
132 | }
133 | train_config {
134 |   batch_size: 8
135 |   data_augmentation_options {
136 |     random_horizontal_flip {
137 |     }
138 |   }
139 |   data_augmentation_options {
140 |     random_scale_crop_and_pad_to_square {
141 |       output_size: 640
142 |       scale_min: 0.10000000149011612
143 |       scale_max: 2.0
144 |     }
145 |   }
146 |   sync_replicas: true
147 |   optimizer {
148 |     momentum_optimizer {
149 |       learning_rate {
150 |         cosine_decay_learning_rate {
151 |           learning_rate_base: 0.07999999821186066
152 |           total_steps: 300000
153 |           warmup_learning_rate: 0.0010000000474974513
154 |           warmup_steps: 2500
155 |         }
156 |       }
157 |       momentum_optimizer_value: 0.8999999761581421
158 |     }
159 |     use_moving_average: false
160 |   }
161 |   fine_tune_checkpoint: "checkpoint/ckpt-0"
162 |   num_steps: 300000
163 |   startup_delay_steps: 0.0
164 |   replicas_to_aggregate: 8
165 |   max_number_of_boxes: 100
166 |   unpad_groundtruth_tensors: false
167 |   fine_tune_checkpoint_type: "detection"
168 |   use_bfloat16: true
169 |   fine_tune_checkpoint_version: V2
170 | }
171 | train_input_reader: {
172 |   label_map_path: "/opt/ml/input/data/train/label_map.pbtxt"
173 |   tf_record_input_reader {
174 |     input_path: "/opt/ml/input/data/train/train.records"
175 |   }
176 | }
177 | 
178 | eval_config: {
179 |   metrics_set: "coco_detection_metrics"
180 |   use_moving_averages: false
181 |   batch_size: 1;
182 | }
183 | 
184 | eval_input_reader: {
185 |   label_map_path: "/opt/ml/input/data/train/label_map.pbtxt"
186 |   shuffle: false
187 |   num_epochs: 1
188 |   tf_record_input_reader {
189 |     input_path: "/opt/ml/input/data/train/validation.records"
190 |   }
191 | }
192 | 


--------------------------------------------------------------------------------
/2_train_model/source_dir/run_training.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | MODEL_DIR=${SM_HP_MODEL_DIR}
 4 | PIPELINE_CONFIG_PATH=${SM_HP_PIPELINE_CONFIG_PATH}
 5 | NUM_TRAIN_STEPS=${SM_HP_NUM_TRAIN_STEPS}
 6 | SAMPLE_1_OF_N_EVAL_EXAMPLES=${SM_HP_SAMPLE_1_OF_N_EVAL_EXAMPLES}
 7 | 
 8 | if [ ${SM_NUM_GPUS} > 0 ]
 9 | then
10 |    NUM_WORKERS=${SM_NUM_GPUS}
11 | else
12 |    NUM_WORKERS=1
13 | fi
14 | 
15 | echo "===TRAINING THE MODEL=="
16 | python model_main_tf2.py \
17 |     --pipeline_config_path ${PIPELINE_CONFIG_PATH} \
18 |     --model_dir ${MODEL_DIR} \
19 |     --num_train_steps ${NUM_TRAIN_STEPS} \
20 |     --num_workers ${NUM_WORKERS} \
21 |     --sample_1_of_n_eval_examples ${SAMPLE_1_OF_N_EVAL_EXAMPLES} \
22 |     --alsologtostderr
23 | 
24 | echo "==EVALUATING THE MODEL=="
25 | python model_main_tf2.py \
26 |     --pipeline_config_path ${PIPELINE_CONFIG_PATH} \
27 |     --model_dir ${MODEL_DIR} \
28 |     --checkpoint_dir ${MODEL_DIR} \
29 |     --eval_timeout 10
30 | 
31 | echo "==EXPORTING THE MODEL=="
32 | python exporter_main_v2.py \
33 |     --trained_checkpoint_dir ${MODEL_DIR} \
34 |     --pipeline_config_path ${PIPELINE_CONFIG_PATH} \
35 |     --output_directory /tmp/exported
36 |     
37 | mv /tmp/exported/saved_model /opt/ml/model/1


--------------------------------------------------------------------------------
/2_train_model/train_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Training Stage: Train an object detection model using Tensorflow on SageMaker"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Prerequisites\n",
 15 |     "\n",
 16 |     "Please get the s3 URI from Sagemaker processing job (refer previous notebook).\n",
 17 |     "\n",
 18 |     "#### S3 URI example: \n",
 19 |     "s3://<<\"sagemaker_default_bucket_name\">>/data/car-gt-100/tfrecords\n",
 20 |     "### Note:-- Above S3 URI used as an input to sagemaker training job"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Setup environment"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "import os\n",
 37 |     "import sagemaker\n",
 38 |     "from sagemaker.estimator import Framework, Estimator\n",
 39 |     "\n",
 40 |     "role = sagemaker.get_execution_role()\n",
 41 |     "inputs = {'train': 's3://<<\"sagemaker_default_bucket_name\">>/data/car-gt-100/tfrecords/'} # define s3 training data inputs, refer previous notebook."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "## Build and push container"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "%%bash\n",
 58 |     "git clone https://github.com/tensorflow/models.git docker/models\n",
 59 |     "# get model_main and exporter_main files from TF2 Object Detection GitHub repository\n",
 60 |     "cp docker/models/research/object_detection/exporter_main_v2.py source_dir \n",
 61 |     "cp docker/models/research/object_detection/model_main_tf2.py source_dir"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "image_name = 'car-tf2-object-detection-1'"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {
 77 |     "scrolled": true
 78 |    },
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "!sh ./docker/build_and_push.sh $image_name"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "with open (os.path.join('docker', 'ecr_image_fullname.txt'), 'r') as f:\n",
 91 |     "    container = f.readlines()[0][:-1]\n",
 92 |     "\n",
 93 |     "print(container)"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "## Get pre-trained model from model zoo"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "Download the base model and extract locally"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "scrolled": true
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "%%bash\n",
119 |     "mkdir /tmp/checkpoint\n",
120 |     "mkdir source_dir/checkpoint\n",
121 |     "wget -O /tmp/efficientdet.tar.gz http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d1_coco17_tpu-32.tar.gz\n",
122 |     "tar -zxvf /tmp/efficientdet.tar.gz --strip-components 2 --directory source_dir/checkpoint efficientdet_d1_coco17_tpu-32/checkpoint"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "## Create SageMaker Custom Framework and Launch Training job"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "markdown",
134 |    "metadata": {},
135 |    "source": [
136 |     "Here we define a custom framework estimator using the Amazon SageMaker Python SDK and run training with that class, which will take care of managing these tasks."
137 |    ]
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "execution_count": null,
142 |    "metadata": {},
143 |    "outputs": [],
144 |    "source": [
145 |     "class CustomFramework(Framework):\n",
146 |     "    def __init__(\n",
147 |     "        self,\n",
148 |     "        entry_point,\n",
149 |     "        source_dir=None,\n",
150 |     "        hyperparameters=None,\n",
151 |     "        distributions=None,\n",
152 |     "        **kwargs\n",
153 |     "    ):\n",
154 |     "        super(CustomFramework, self).__init__(entry_point, source_dir, hyperparameters, **kwargs)\n",
155 |     "    \n",
156 |     "    def _configure_distribution(self, distributions):\n",
157 |     "        return\n",
158 |     "    \n",
159 |     "    def create_model(\n",
160 |     "        self,\n",
161 |     "        model_server_workers=None,\n",
162 |     "        role=None,\n",
163 |     "        vpc_config_override=None,\n",
164 |     "        entry_point=None,\n",
165 |     "        source_dir=None,\n",
166 |     "        dependencies=None,\n",
167 |     "        image_uri=None,\n",
168 |     "        **kwargs\n",
169 |     "    ):\n",
170 |     "        return None"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "from sagemaker.debugger import TensorBoardOutputConfig\n",
180 |     "\n",
181 |     "hyperparameters = {\n",
182 |     "    \"model_dir\":\"/opt/training\",        \n",
183 |     "    \"pipeline_config_path\": \"pipeline.config\",\n",
184 |     "    \"num_train_steps\": 1000,    \n",
185 |     "    \"sample_1_of_n_eval_examples\": 1\n",
186 |     "}\n",
187 |     "\n",
188 |     "estimator = CustomFramework(\n",
189 |     "    image_uri=container,\n",
190 |     "    role=role,\n",
191 |     "    entry_point='run_training.sh',\n",
192 |     "    source_dir='source_dir/',\n",
193 |     "    instance_count=1,\n",
194 |     "    instance_type='ml.p3.2xlarge',\n",
195 |     "    hyperparameters=hyperparameters,\n",
196 |     "    disable_profiler=True,\n",
197 |     "    base_job_name='car-new-tf2-object-detection'\n",
198 |     ")"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": [
207 |     "estimator.fit(inputs)"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {},
214 |    "outputs": [],
215 |    "source": [
216 |     "\n"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {},
223 |    "outputs": [],
224 |    "source": []
225 |   }
226 |  ],
227 |  "metadata": {
228 |   "kernelspec": {
229 |    "display_name": "conda_tensorflow_p36",
230 |    "language": "python",
231 |    "name": "conda_tensorflow_p36"
232 |   },
233 |   "language_info": {
234 |    "codemirror_mode": {
235 |     "name": "ipython",
236 |     "version": 3
237 |    },
238 |    "file_extension": ".py",
239 |    "mimetype": "text/x-python",
240 |    "name": "python",
241 |    "nbconvert_exporter": "python",
242 |    "pygments_lexer": "ipython3",
243 |    "version": "3.6.13"
244 |   }
245 |  },
246 |  "nbformat": 4,
247 |  "nbformat_minor": 4
248 | }
249 | 


--------------------------------------------------------------------------------
/3_predict/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/3_predict/.gitkeep


--------------------------------------------------------------------------------
/3_predict/visualization_utils.py:
--------------------------------------------------------------------------------
   1 | # Licensed under the Apache License, Version 2.0 (the "License");
   2 | # you may not use this file except in compliance with the License.
   3 | # You may obtain a copy of the License at
   4 | #
   5 | #     http://www.apache.org/licenses/LICENSE-2.0
   6 | #
   7 | # Unless required by applicable law or agreed to in writing, software
   8 | # distributed under the License is distributed on an "AS IS" BASIS,
   9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10 | # See the License for the specific language governing permissions and
  11 | # limitations under the License.
  12 | # ==============================================================================
  13 | 
  14 | """A set of functions that are used for visualization.
  15 | 
  16 | These functions often receive an image, perform some visualization on the image.
  17 | The functions do not return a value, instead they modify the image itself.
  18 | 
  19 | """
  20 | from __future__ import absolute_import
  21 | from __future__ import division
  22 | from __future__ import print_function
  23 | 
  24 | import abc
  25 | import collections
  26 | # Set headless-friendly backend.
  27 | import matplotlib; matplotlib.use('Agg')  # pylint: disable=multiple-statements
  28 | import matplotlib.pyplot as plt  # pylint: disable=g-import-not-at-top
  29 | import numpy as np
  30 | import PIL.Image as Image
  31 | import PIL.ImageColor as ImageColor
  32 | import PIL.ImageDraw as ImageDraw
  33 | import PIL.ImageFont as ImageFont
  34 | import six
  35 | from six.moves import range
  36 | from six.moves import zip
  37 | import tensorflow as tf
  38 | 
  39 | _TITLE_LEFT_MARGIN = 10
  40 | _TITLE_TOP_MARGIN = 10
  41 | STANDARD_COLORS = [
  42 |     'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque',
  43 |     'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
  44 |     'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan',
  45 |     'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
  46 |     'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
  47 |     'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
  48 |     'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod',
  49 |     'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
  50 |     'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue',
  51 |     'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
  52 |     'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue',
  53 |     'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
  54 |     'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid',
  55 |     'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
  56 |     'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin',
  57 |     'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
  58 |     'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed',
  59 |     'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
  60 |     'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
  61 |     'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
  62 |     'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow',
  63 |     'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White',
  64 |     'WhiteSmoke', 'Yellow', 'YellowGreen'
  65 | ]
  66 | 
  67 | 
  68 | def _get_multiplier_for_color_randomness():
  69 |   """Returns a multiplier to get semi-random colors from successive indices.
  70 | 
  71 |   This function computes a prime number, p, in the range [2, 17] that:
  72 |   - is closest to len(STANDARD_COLORS) / 10
  73 |   - does not divide len(STANDARD_COLORS)
  74 | 
  75 |   If no prime numbers in that range satisfy the constraints, p is returned as 1.
  76 | 
  77 |   Once p is established, it can be used as a multiplier to select
  78 |   non-consecutive colors from STANDARD_COLORS:
  79 |   colors = [(p * i) % len(STANDARD_COLORS) for i in range(20)]
  80 |   """
  81 |   num_colors = len(STANDARD_COLORS)
  82 |   prime_candidates = [5, 7, 11, 13, 17]
  83 | 
  84 |   # Remove all prime candidates that divide the number of colors.
  85 |   prime_candidates = [p for p in prime_candidates if num_colors % p]
  86 |   if not prime_candidates:
  87 |     return 1
  88 | 
  89 |   # Return the closest prime number to num_colors / 10.
  90 |   abs_distance = [np.abs(num_colors / 10. - p) for p in prime_candidates]
  91 |   num_candidates = len(abs_distance)
  92 |   inds = [i for _, i in sorted(zip(abs_distance, range(num_candidates)))]
  93 |   return prime_candidates[inds[0]]
  94 | 
  95 | 
  96 | def save_image_array_as_png(image, output_path):
  97 |   """Saves an image (represented as a numpy array) to PNG.
  98 | 
  99 |   Args:
 100 |     image: a numpy array with shape [height, width, 3].
 101 |     output_path: path to which image should be written.
 102 |   """
 103 |   image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
 104 |   with tf.gfile.Open(output_path, 'w') as fid:
 105 |     image_pil.save(fid, 'PNG')
 106 | 
 107 | 
 108 | def encode_image_array_as_png_str(image):
 109 |   """Encodes a numpy array into a PNG string.
 110 | 
 111 |   Args:
 112 |     image: a numpy array with shape [height, width, 3].
 113 | 
 114 |   Returns:
 115 |     PNG encoded image string.
 116 |   """
 117 |   image_pil = Image.fromarray(np.uint8(image))
 118 |   output = six.BytesIO()
 119 |   image_pil.save(output, format='PNG')
 120 |   png_string = output.getvalue()
 121 |   output.close()
 122 |   return png_string
 123 | 
 124 | 
 125 | def draw_bounding_box_on_image_array(image,
 126 |                                      ymin,
 127 |                                      xmin,
 128 |                                      ymax,
 129 |                                      xmax,
 130 |                                      color='red',
 131 |                                      thickness=4,
 132 |                                      display_str_list=(),
 133 |                                      use_normalized_coordinates=True):
 134 |   """Adds a bounding box to an image (numpy array).
 135 | 
 136 |   Bounding box coordinates can be specified in either absolute (pixel) or
 137 |   normalized coordinates by setting the use_normalized_coordinates argument.
 138 | 
 139 |   Args:
 140 |     image: a numpy array with shape [height, width, 3].
 141 |     ymin: ymin of bounding box.
 142 |     xmin: xmin of bounding box.
 143 |     ymax: ymax of bounding box.
 144 |     xmax: xmax of bounding box.
 145 |     color: color to draw bounding box. Default is red.
 146 |     thickness: line thickness. Default value is 4.
 147 |     display_str_list: list of strings to display in box
 148 |                       (each to be shown on its own line).
 149 |     use_normalized_coordinates: If True (default), treat coordinates
 150 |       ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
 151 |       coordinates as absolute.
 152 |   """
 153 |   image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
 154 |   draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
 155 |                              thickness, display_str_list,
 156 |                              use_normalized_coordinates)
 157 |   np.copyto(image, np.array(image_pil))
 158 | 
 159 | 
 160 | def draw_bounding_box_on_image(image,
 161 |                                ymin,
 162 |                                xmin,
 163 |                                ymax,
 164 |                                xmax,
 165 |                                color='red',
 166 |                                thickness=4,
 167 |                                display_str_list=(),
 168 |                                use_normalized_coordinates=True):
 169 |   """Adds a bounding box to an image.
 170 | 
 171 |   Bounding box coordinates can be specified in either absolute (pixel) or
 172 |   normalized coordinates by setting the use_normalized_coordinates argument.
 173 | 
 174 |   Each string in display_str_list is displayed on a separate line above the
 175 |   bounding box in black text on a rectangle filled with the input 'color'.
 176 |   If the top of the bounding box extends to the edge of the image, the strings
 177 |   are displayed below the bounding box.
 178 | 
 179 |   Args:
 180 |     image: a PIL.Image object.
 181 |     ymin: ymin of bounding box.
 182 |     xmin: xmin of bounding box.
 183 |     ymax: ymax of bounding box.
 184 |     xmax: xmax of bounding box.
 185 |     color: color to draw bounding box. Default is red.
 186 |     thickness: line thickness. Default value is 4.
 187 |     display_str_list: list of strings to display in box
 188 |                       (each to be shown on its own line).
 189 |     use_normalized_coordinates: If True (default), treat coordinates
 190 |       ymin, xmin, ymax, xmax as relative to the image.  Otherwise treat
 191 |       coordinates as absolute.
 192 |   """
 193 |   draw = ImageDraw.Draw(image)
 194 |   im_width, im_height = image.size
 195 |   if use_normalized_coordinates:
 196 |     (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
 197 |                                   ymin * im_height, ymax * im_height)
 198 |   else:
 199 |     (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
 200 |   if thickness > 0:
 201 |     draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
 202 |                (left, top)],
 203 |               width=thickness,
 204 |               fill=color)
 205 |   try:
 206 |     font = ImageFont.truetype('arial.ttf', 24)
 207 |   except IOError:
 208 |     font = ImageFont.load_default()
 209 | 
 210 |   # If the total height of the display strings added to the top of the bounding
 211 |   # box exceeds the top of the image, stack the strings below the bounding box
 212 |   # instead of above.
 213 |   display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
 214 |   # Each display_str has a top and bottom margin of 0.05x.
 215 |   total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
 216 | 
 217 |   if top > total_display_str_height:
 218 |     text_bottom = top
 219 |   else:
 220 |     text_bottom = bottom + total_display_str_height
 221 |   # Reverse list and print from bottom to top.
 222 |   for display_str in display_str_list[::-1]:
 223 |     text_width, text_height = font.getsize(display_str)
 224 |     margin = np.ceil(0.05 * text_height)
 225 |     draw.rectangle(
 226 |         [(left, text_bottom - text_height - 2 * margin), (left + text_width,
 227 |                                                           text_bottom)],
 228 |         fill=color)
 229 |     draw.text(
 230 |         (left + margin, text_bottom - text_height - margin),
 231 |         display_str,
 232 |         fill='black',
 233 |         font=font)
 234 |     text_bottom -= text_height - 2 * margin
 235 | 
 236 | 
 237 | def draw_bounding_boxes_on_image_array(image,
 238 |                                        boxes,
 239 |                                        color='red',
 240 |                                        thickness=4,
 241 |                                        display_str_list_list=()):
 242 |   """Draws bounding boxes on image (numpy array).
 243 | 
 244 |   Args:
 245 |     image: a numpy array object.
 246 |     boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
 247 |            The coordinates are in normalized format between [0, 1].
 248 |     color: color to draw bounding box. Default is red.
 249 |     thickness: line thickness. Default value is 4.
 250 |     display_str_list_list: list of list of strings.
 251 |                            a list of strings for each bounding box.
 252 |                            The reason to pass a list of strings for a
 253 |                            bounding box is that it might contain
 254 |                            multiple labels.
 255 | 
 256 |   Raises:
 257 |     ValueError: if boxes is not a [N, 4] array
 258 |   """
 259 |   image_pil = Image.fromarray(image)
 260 |   draw_bounding_boxes_on_image(image_pil, boxes, color, thickness,
 261 |                                display_str_list_list)
 262 |   np.copyto(image, np.array(image_pil))
 263 | 
 264 | 
 265 | def draw_bounding_boxes_on_image(image,
 266 |                                  boxes,
 267 |                                  color='red',
 268 |                                  thickness=4,
 269 |                                  display_str_list_list=()):
 270 |   """Draws bounding boxes on image.
 271 | 
 272 |   Args:
 273 |     image: a PIL.Image object.
 274 |     boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax).
 275 |            The coordinates are in normalized format between [0, 1].
 276 |     color: color to draw bounding box. Default is red.
 277 |     thickness: line thickness. Default value is 4.
 278 |     display_str_list_list: list of list of strings.
 279 |                            a list of strings for each bounding box.
 280 |                            The reason to pass a list of strings for a
 281 |                            bounding box is that it might contain
 282 |                            multiple labels.
 283 | 
 284 |   Raises:
 285 |     ValueError: if boxes is not a [N, 4] array
 286 |   """
 287 |   boxes_shape = boxes.shape
 288 |   if not boxes_shape:
 289 |     return
 290 |   if len(boxes_shape) != 2 or boxes_shape[1] != 4:
 291 |     raise ValueError('Input must be of size [N, 4]')
 292 |   for i in range(boxes_shape[0]):
 293 |     display_str_list = ()
 294 |     if display_str_list_list:
 295 |       display_str_list = display_str_list_list[i]
 296 |     draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2],
 297 |                                boxes[i, 3], color, thickness, display_str_list)
 298 | 
 299 | 
 300 | def create_visualization_fn(category_index,
 301 |                             include_masks=False,
 302 |                             include_keypoints=False,
 303 |                             include_keypoint_scores=False,
 304 |                             include_track_ids=False,
 305 |                             **kwargs):
 306 |   """Constructs a visualization function that can be wrapped in a py_func.
 307 | 
 308 |   py_funcs only accept positional arguments. This function returns a suitable
 309 |   function with the correct positional argument mapping. The positional
 310 |   arguments in order are:
 311 |   0: image
 312 |   1: boxes
 313 |   2: classes
 314 |   3: scores
 315 |   [4]: masks (optional)
 316 |   [4-5]: keypoints (optional)
 317 |   [4-6]: keypoint_scores (optional)
 318 |   [4-7]: track_ids (optional)
 319 | 
 320 |   -- Example 1 --
 321 |   vis_only_masks_fn = create_visualization_fn(category_index,
 322 |     include_masks=True, include_keypoints=False, include_track_ids=False,
 323 |     **kwargs)
 324 |   image = tf.py_func(vis_only_masks_fn,
 325 |                      inp=[image, boxes, classes, scores, masks],
 326 |                      Tout=tf.uint8)
 327 | 
 328 |   -- Example 2 --
 329 |   vis_masks_and_track_ids_fn = create_visualization_fn(category_index,
 330 |     include_masks=True, include_keypoints=False, include_track_ids=True,
 331 |     **kwargs)
 332 |   image = tf.py_func(vis_masks_and_track_ids_fn,
 333 |                      inp=[image, boxes, classes, scores, masks, track_ids],
 334 |                      Tout=tf.uint8)
 335 | 
 336 |   Args:
 337 |     category_index: a dict that maps integer ids to category dicts. e.g.
 338 |       {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
 339 |     include_masks: Whether masks should be expected as a positional argument in
 340 |       the returned function.
 341 |     include_keypoints: Whether keypoints should be expected as a positional
 342 |       argument in the returned function.
 343 |     include_keypoint_scores: Whether keypoint scores should be expected as a
 344 |       positional argument in the returned function.
 345 |     include_track_ids: Whether track ids should be expected as a positional
 346 |       argument in the returned function.
 347 |     **kwargs: Additional kwargs that will be passed to
 348 |       visualize_boxes_and_labels_on_image_array.
 349 | 
 350 |   Returns:
 351 |     Returns a function that only takes tensors as positional arguments.
 352 |   """
 353 | 
 354 |   def visualization_py_func_fn(*args):
 355 |     """Visualization function that can be wrapped in a tf.py_func.
 356 | 
 357 |     Args:
 358 |       *args: First 4 positional arguments must be:
 359 |         image - uint8 numpy array with shape (img_height, img_width, 3).
 360 |         boxes - a numpy array of shape [N, 4].
 361 |         classes - a numpy array of shape [N].
 362 |         scores - a numpy array of shape [N] or None.
 363 |         -- Optional positional arguments --
 364 |         instance_masks - a numpy array of shape [N, image_height, image_width].
 365 |         keypoints - a numpy array of shape [N, num_keypoints, 2].
 366 |         keypoint_scores - a numpy array of shape [N, num_keypoints].
 367 |         track_ids - a numpy array of shape [N] with unique track ids.
 368 | 
 369 |     Returns:
 370 |       uint8 numpy array with shape (img_height, img_width, 3) with overlaid
 371 |       boxes.
 372 |     """
 373 |     image = args[0]
 374 |     boxes = args[1]
 375 |     classes = args[2]
 376 |     scores = args[3]
 377 |     masks = keypoints = keypoint_scores = track_ids = None
 378 |     pos_arg_ptr = 4  # Positional argument for first optional tensor (masks).
 379 |     if include_masks:
 380 |       masks = args[pos_arg_ptr]
 381 |       pos_arg_ptr += 1
 382 |     if include_keypoints:
 383 |       keypoints = args[pos_arg_ptr]
 384 |       pos_arg_ptr += 1
 385 |     if include_keypoint_scores:
 386 |       keypoint_scores = args[pos_arg_ptr]
 387 |       pos_arg_ptr += 1
 388 |     if include_track_ids:
 389 |       track_ids = args[pos_arg_ptr]
 390 | 
 391 |     return visualize_boxes_and_labels_on_image_array(
 392 |         image,
 393 |         boxes,
 394 |         classes,
 395 |         scores,
 396 |         category_index=category_index,
 397 |         instance_masks=masks,
 398 |         keypoints=keypoints,
 399 |         keypoint_scores=keypoint_scores,
 400 |         track_ids=track_ids,
 401 |         **kwargs)
 402 |   return visualization_py_func_fn
 403 | 
 404 | 
 405 | def draw_heatmaps_on_image(image, heatmaps):
 406 |   """Draws heatmaps on an image.
 407 | 
 408 |   The heatmaps are handled channel by channel and different colors are used to
 409 |   paint different heatmap channels.
 410 | 
 411 |   Args:
 412 |     image: a PIL.Image object.
 413 |     heatmaps: a numpy array with shape [image_height, image_width, channel].
 414 |       Note that the image_height and image_width should match the size of input
 415 |       image.
 416 |   """
 417 |   draw = ImageDraw.Draw(image)
 418 |   channel = heatmaps.shape[2]
 419 |   for c in range(channel):
 420 |     heatmap = heatmaps[:, :, c] * 255
 421 |     heatmap = heatmap.astype('uint8')
 422 |     bitmap = Image.fromarray(heatmap, 'L')
 423 |     bitmap.convert('1')
 424 |     draw.bitmap(
 425 |         xy=[(0, 0)],
 426 |         bitmap=bitmap,
 427 |         fill=STANDARD_COLORS[c])
 428 | 
 429 | 
 430 | def draw_heatmaps_on_image_array(image, heatmaps):
 431 |   """Overlays heatmaps to an image (numpy array).
 432 | 
 433 |   The function overlays the heatmaps on top of image. The heatmap values will be
 434 |   painted with different colors depending on the channels. Similar to
 435 |   "draw_heatmaps_on_image_array" function except the inputs are numpy arrays.
 436 | 
 437 |   Args:
 438 |     image: a numpy array with shape [height, width, 3].
 439 |     heatmaps: a numpy array with shape [height, width, channel].
 440 | 
 441 |   Returns:
 442 |     An uint8 numpy array representing the input image painted with heatmap
 443 |     colors.
 444 |   """
 445 |   if not isinstance(image, np.ndarray):
 446 |     image = image.numpy()
 447 |   if not isinstance(heatmaps, np.ndarray):
 448 |     heatmaps = heatmaps.numpy()
 449 |   image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
 450 |   draw_heatmaps_on_image(image_pil, heatmaps)
 451 |   return np.array(image_pil)
 452 | 
 453 | 
 454 | def draw_heatmaps_on_image_tensors(images,
 455 |                                    heatmaps,
 456 |                                    apply_sigmoid=False):
 457 |   """Draws heatmaps on batch of image tensors.
 458 | 
 459 |   Args:
 460 |     images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
 461 |       channels will be ignored. If C = 1, then we convert the images to RGB
 462 |       images.
 463 |     heatmaps: [N, h, w, channel] float32 tensor of heatmaps. Note that the
 464 |       heatmaps will be resized to match the input image size before overlaying
 465 |       the heatmaps with input images. Theoretically the heatmap height width
 466 |       should have the same aspect ratio as the input image to avoid potential
 467 |       misalignment introduced by the image resize.
 468 |     apply_sigmoid: Whether to apply a sigmoid layer on top of the heatmaps. If
 469 |       the heatmaps come directly from the prediction logits, then we should
 470 |       apply the sigmoid layer to make sure the values are in between [0.0, 1.0].
 471 | 
 472 |   Returns:
 473 |     4D image tensor of type uint8, with heatmaps overlaid on top.
 474 |   """
 475 |   # Additional channels are being ignored.
 476 |   if images.shape[3] > 3:
 477 |     images = images[:, :, :, 0:3]
 478 |   elif images.shape[3] == 1:
 479 |     images = tf.image.grayscale_to_rgb(images)
 480 | 
 481 |   _, height, width, _ = shape_utils.combined_static_and_dynamic_shape(images)
 482 |   if apply_sigmoid:
 483 |     heatmaps = tf.math.sigmoid(heatmaps)
 484 |   resized_heatmaps = tf.image.resize(heatmaps, size=[height, width])
 485 | 
 486 |   elems = [images, resized_heatmaps]
 487 | 
 488 |   def draw_heatmaps(image_and_heatmaps):
 489 |     """Draws heatmaps on image."""
 490 |     image_with_heatmaps = tf.py_function(
 491 |         draw_heatmaps_on_image_array,
 492 |         image_and_heatmaps,
 493 |         tf.uint8)
 494 |     return image_with_heatmaps
 495 |   images = tf.map_fn(draw_heatmaps, elems, dtype=tf.uint8, back_prop=False)
 496 |   return images
 497 | 
 498 | 
 499 | def _resize_original_image(image, image_shape):
 500 |   image = tf.expand_dims(image, 0)
 501 |   image = tf.image.resize_images(
 502 |       image,
 503 |       image_shape,
 504 |       method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
 505 |       align_corners=True)
 506 |   return tf.cast(tf.squeeze(image, 0), tf.uint8)
 507 | 
 508 | 
 509 | def draw_bounding_boxes_on_image_tensors(images,
 510 |                                          boxes,
 511 |                                          classes,
 512 |                                          scores,
 513 |                                          category_index,
 514 |                                          original_image_spatial_shape=None,
 515 |                                          true_image_shape=None,
 516 |                                          instance_masks=None,
 517 |                                          keypoints=None,
 518 |                                          keypoint_scores=None,
 519 |                                          keypoint_edges=None,
 520 |                                          track_ids=None,
 521 |                                          max_boxes_to_draw=20,
 522 |                                          min_score_thresh=0.2,
 523 |                                          use_normalized_coordinates=True):
 524 |   """Draws bounding boxes, masks, and keypoints on batch of image tensors.
 525 | 
 526 |   Args:
 527 |     images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional
 528 |       channels will be ignored. If C = 1, then we convert the images to RGB
 529 |       images.
 530 |     boxes: [N, max_detections, 4] float32 tensor of detection boxes.
 531 |     classes: [N, max_detections] int tensor of detection classes. Note that
 532 |       classes are 1-indexed.
 533 |     scores: [N, max_detections] float32 tensor of detection scores.
 534 |     category_index: a dict that maps integer ids to category dicts. e.g.
 535 |       {1: {1: 'dog'}, 2: {2: 'cat'}, ...}
 536 |     original_image_spatial_shape: [N, 2] tensor containing the spatial size of
 537 |       the original image.
 538 |     true_image_shape: [N, 3] tensor containing the spatial size of unpadded
 539 |       original_image.
 540 |     instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with
 541 |       instance masks.
 542 |     keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2]
 543 |       with keypoints.
 544 |     keypoint_scores: A 3D float32 tensor of shape [N, max_detection,
 545 |       num_keypoints] with keypoint scores.
 546 |     keypoint_edges: A list of tuples with keypoint indices that specify which
 547 |       keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
 548 |       edges from keypoint 0 to 1 and from keypoint 2 to 4.
 549 |     track_ids: [N, max_detections] int32 tensor of unique tracks ids (i.e.
 550 |       instance ids for each object). If provided, the color-coding of boxes is
 551 |       dictated by these ids, and not classes.
 552 |     max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20.
 553 |     min_score_thresh: Minimum score threshold for visualization. Default 0.2.
 554 |     use_normalized_coordinates: Whether to assume boxes and kepoints are in
 555 |       normalized coordinates (as opposed to absolute coordiantes).
 556 |       Default is True.
 557 | 
 558 |   Returns:
 559 |     4D image tensor of type uint8, with boxes drawn on top.
 560 |   """
 561 |   # Additional channels are being ignored.
 562 |   if images.shape[3] > 3:
 563 |     images = images[:, :, :, 0:3]
 564 |   elif images.shape[3] == 1:
 565 |     images = tf.image.grayscale_to_rgb(images)
 566 |   visualization_keyword_args = {
 567 |       'use_normalized_coordinates': use_normalized_coordinates,
 568 |       'max_boxes_to_draw': max_boxes_to_draw,
 569 |       'min_score_thresh': min_score_thresh,
 570 |       'agnostic_mode': False,
 571 |       'line_thickness': 4,
 572 |       'keypoint_edges': keypoint_edges
 573 |   }
 574 |   if true_image_shape is None:
 575 |     true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3])
 576 |   else:
 577 |     true_shapes = true_image_shape
 578 |   if original_image_spatial_shape is None:
 579 |     original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2])
 580 |   else:
 581 |     original_shapes = original_image_spatial_shape
 582 | 
 583 |   visualize_boxes_fn = create_visualization_fn(
 584 |       category_index,
 585 |       include_masks=instance_masks is not None,
 586 |       include_keypoints=keypoints is not None,
 587 |       include_keypoint_scores=keypoint_scores is not None,
 588 |       include_track_ids=track_ids is not None,
 589 |       **visualization_keyword_args)
 590 | 
 591 |   elems = [true_shapes, original_shapes, images, boxes, classes, scores]
 592 |   if instance_masks is not None:
 593 |     elems.append(instance_masks)
 594 |   if keypoints is not None:
 595 |     elems.append(keypoints)
 596 |   if keypoint_scores is not None:
 597 |     elems.append(keypoint_scores)
 598 |   if track_ids is not None:
 599 |     elems.append(track_ids)
 600 | 
 601 |   def draw_boxes(image_and_detections):
 602 |     """Draws boxes on image."""
 603 |     true_shape = image_and_detections[0]
 604 |     original_shape = image_and_detections[1]
 605 |     if true_image_shape is not None:
 606 |       image = shape_utils.pad_or_clip_nd(image_and_detections[2],
 607 |                                          [true_shape[0], true_shape[1], 3])
 608 |     if original_image_spatial_shape is not None:
 609 |       image_and_detections[2] = _resize_original_image(image, original_shape)
 610 | 
 611 |     image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:],
 612 |                                   tf.uint8)
 613 |     return image_with_boxes
 614 | 
 615 |   images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False)
 616 |   return images
 617 | 
 618 | 
 619 | def draw_side_by_side_evaluation_image(eval_dict,
 620 |                                        category_index,
 621 |                                        max_boxes_to_draw=20,
 622 |                                        min_score_thresh=0.2,
 623 |                                        use_normalized_coordinates=True,
 624 |                                        keypoint_edges=None):
 625 |   """Creates a side-by-side image with detections and groundtruth.
 626 | 
 627 |   Bounding boxes (and instance masks, if available) are visualized on both
 628 |   subimages.
 629 | 
 630 |   Args:
 631 |     eval_dict: The evaluation dictionary returned by
 632 |       eval_util.result_dict_for_batched_example() or
 633 |       eval_util.result_dict_for_single_example().
 634 |     category_index: A category index (dictionary) produced from a labelmap.
 635 |     max_boxes_to_draw: The maximum number of boxes to draw for detections.
 636 |     min_score_thresh: The minimum score threshold for showing detections.
 637 |     use_normalized_coordinates: Whether to assume boxes and keypoints are in
 638 |       normalized coordinates (as opposed to absolute coordinates).
 639 |       Default is True.
 640 |     keypoint_edges: A list of tuples with keypoint indices that specify which
 641 |       keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
 642 |       edges from keypoint 0 to 1 and from keypoint 2 to 4.
 643 | 
 644 |   Returns:
 645 |     A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left
 646 |       corresponds to detections, while the subimage on the right corresponds to
 647 |       groundtruth.
 648 |   """
 649 |   detection_fields = fields.DetectionResultFields()
 650 |   input_data_fields = fields.InputDataFields()
 651 | 
 652 |   images_with_detections_list = []
 653 | 
 654 |   # Add the batch dimension if the eval_dict is for single example.
 655 |   if len(eval_dict[detection_fields.detection_classes].shape) == 1:
 656 |     for key in eval_dict:
 657 |       if (key != input_data_fields.original_image and
 658 |           key != input_data_fields.image_additional_channels):
 659 |         eval_dict[key] = tf.expand_dims(eval_dict[key], 0)
 660 | 
 661 |   for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
 662 |     instance_masks = None
 663 |     if detection_fields.detection_masks in eval_dict:
 664 |       instance_masks = tf.cast(
 665 |           tf.expand_dims(
 666 |               eval_dict[detection_fields.detection_masks][indx], axis=0),
 667 |           tf.uint8)
 668 |     keypoints = None
 669 |     keypoint_scores = None
 670 |     if detection_fields.detection_keypoints in eval_dict:
 671 |       keypoints = tf.expand_dims(
 672 |           eval_dict[detection_fields.detection_keypoints][indx], axis=0)
 673 |       if detection_fields.detection_keypoint_scores in eval_dict:
 674 |         keypoint_scores = tf.expand_dims(
 675 |             eval_dict[detection_fields.detection_keypoint_scores][indx], axis=0)
 676 |       else:
 677 |         keypoint_scores = tf.cast(keypoint_ops.set_keypoint_visibilities(
 678 |             keypoints), dtype=tf.float32)
 679 | 
 680 |     groundtruth_instance_masks = None
 681 |     if input_data_fields.groundtruth_instance_masks in eval_dict:
 682 |       groundtruth_instance_masks = tf.cast(
 683 |           tf.expand_dims(
 684 |               eval_dict[input_data_fields.groundtruth_instance_masks][indx],
 685 |               axis=0), tf.uint8)
 686 |     groundtruth_keypoints = None
 687 |     groundtruth_keypoint_scores = None
 688 |     gt_kpt_vis_fld = input_data_fields.groundtruth_keypoint_visibilities
 689 |     if input_data_fields.groundtruth_keypoints in eval_dict:
 690 |       groundtruth_keypoints = tf.expand_dims(
 691 |           eval_dict[input_data_fields.groundtruth_keypoints][indx], axis=0)
 692 |       if gt_kpt_vis_fld in eval_dict:
 693 |         groundtruth_keypoint_scores = tf.expand_dims(
 694 |             tf.cast(eval_dict[gt_kpt_vis_fld][indx], dtype=tf.float32), axis=0)
 695 |       else:
 696 |         groundtruth_keypoint_scores = tf.cast(
 697 |             keypoint_ops.set_keypoint_visibilities(
 698 |                 groundtruth_keypoints), dtype=tf.float32)
 699 | 
 700 |     images_with_detections = draw_bounding_boxes_on_image_tensors(
 701 |         tf.expand_dims(
 702 |             eval_dict[input_data_fields.original_image][indx], axis=0),
 703 |         tf.expand_dims(
 704 |             eval_dict[detection_fields.detection_boxes][indx], axis=0),
 705 |         tf.expand_dims(
 706 |             eval_dict[detection_fields.detection_classes][indx], axis=0),
 707 |         tf.expand_dims(
 708 |             eval_dict[detection_fields.detection_scores][indx], axis=0),
 709 |         category_index,
 710 |         original_image_spatial_shape=tf.expand_dims(
 711 |             eval_dict[input_data_fields.original_image_spatial_shape][indx],
 712 |             axis=0),
 713 |         true_image_shape=tf.expand_dims(
 714 |             eval_dict[input_data_fields.true_image_shape][indx], axis=0),
 715 |         instance_masks=instance_masks,
 716 |         keypoints=keypoints,
 717 |         keypoint_scores=keypoint_scores,
 718 |         keypoint_edges=keypoint_edges,
 719 |         max_boxes_to_draw=max_boxes_to_draw,
 720 |         min_score_thresh=min_score_thresh,
 721 |         use_normalized_coordinates=use_normalized_coordinates)
 722 |     images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
 723 |         tf.expand_dims(
 724 |             eval_dict[input_data_fields.original_image][indx], axis=0),
 725 |         tf.expand_dims(
 726 |             eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
 727 |         tf.expand_dims(
 728 |             eval_dict[input_data_fields.groundtruth_classes][indx], axis=0),
 729 |         tf.expand_dims(
 730 |             tf.ones_like(
 731 |                 eval_dict[input_data_fields.groundtruth_classes][indx],
 732 |                 dtype=tf.float32),
 733 |             axis=0),
 734 |         category_index,
 735 |         original_image_spatial_shape=tf.expand_dims(
 736 |             eval_dict[input_data_fields.original_image_spatial_shape][indx],
 737 |             axis=0),
 738 |         true_image_shape=tf.expand_dims(
 739 |             eval_dict[input_data_fields.true_image_shape][indx], axis=0),
 740 |         instance_masks=groundtruth_instance_masks,
 741 |         keypoints=groundtruth_keypoints,
 742 |         keypoint_scores=groundtruth_keypoint_scores,
 743 |         keypoint_edges=keypoint_edges,
 744 |         max_boxes_to_draw=None,
 745 |         min_score_thresh=0.0,
 746 |         use_normalized_coordinates=use_normalized_coordinates)
 747 |     images_to_visualize = tf.concat([images_with_detections,
 748 |                                      images_with_groundtruth], axis=2)
 749 | 
 750 |     if input_data_fields.image_additional_channels in eval_dict:
 751 |       images_with_additional_channels_groundtruth = (
 752 |           draw_bounding_boxes_on_image_tensors(
 753 |               tf.expand_dims(
 754 |                   eval_dict[input_data_fields.image_additional_channels][indx],
 755 |                   axis=0),
 756 |               tf.expand_dims(
 757 |                   eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
 758 |               tf.expand_dims(
 759 |                   eval_dict[input_data_fields.groundtruth_classes][indx],
 760 |                   axis=0),
 761 |               tf.expand_dims(
 762 |                   tf.ones_like(
 763 |                       eval_dict[input_data_fields.groundtruth_classes][indx],
 764 |                       dtype=tf.float32),
 765 |                   axis=0),
 766 |               category_index,
 767 |               original_image_spatial_shape=tf.expand_dims(
 768 |                   eval_dict[input_data_fields.original_image_spatial_shape]
 769 |                   [indx],
 770 |                   axis=0),
 771 |               true_image_shape=tf.expand_dims(
 772 |                   eval_dict[input_data_fields.true_image_shape][indx], axis=0),
 773 |               instance_masks=groundtruth_instance_masks,
 774 |               keypoints=None,
 775 |               keypoint_edges=None,
 776 |               max_boxes_to_draw=None,
 777 |               min_score_thresh=0.0,
 778 |               use_normalized_coordinates=use_normalized_coordinates))
 779 |       images_to_visualize = tf.concat(
 780 |           [images_to_visualize, images_with_additional_channels_groundtruth],
 781 |           axis=2)
 782 |     images_with_detections_list.append(images_to_visualize)
 783 | 
 784 |   return images_with_detections_list
 785 | 
 786 | 
 787 | def draw_densepose_visualizations(eval_dict,
 788 |                                   max_boxes_to_draw=20,
 789 |                                   min_score_thresh=0.2,
 790 |                                   num_parts=24,
 791 |                                   dp_coord_to_visualize=0):
 792 |   """Draws DensePose visualizations.
 793 | 
 794 |   Args:
 795 |     eval_dict: The evaluation dictionary returned by
 796 |       eval_util.result_dict_for_batched_example().
 797 |     max_boxes_to_draw: The maximum number of boxes to draw for detections.
 798 |     min_score_thresh: The minimum score threshold for showing detections.
 799 |     num_parts: The number of different densepose parts.
 800 |     dp_coord_to_visualize: Whether to visualize v-coordinates (0) or
 801 |       u-coordinates (0) overlaid on the person masks.
 802 | 
 803 |   Returns:
 804 |     A list of [1, H, W, C] uint8 tensor, each element corresponding to an image
 805 |     in the batch.
 806 | 
 807 |   Raises:
 808 |     ValueError: If `dp_coord_to_visualize` is not 0 or 1.
 809 |   """
 810 |   if dp_coord_to_visualize not in (0, 1):
 811 |     raise ValueError('`dp_coord_to_visualize` must be either 0 for v '
 812 |                      'coordinates), or 1 for u coordinates, but instead got '
 813 |                      '{}'.format(dp_coord_to_visualize))
 814 |   detection_fields = fields.DetectionResultFields()
 815 |   input_data_fields = fields.InputDataFields()
 816 | 
 817 |   if detection_fields.detection_masks not in eval_dict:
 818 |     raise ValueError('Expected `detection_masks` in `eval_dict`.')
 819 |   if detection_fields.detection_surface_coords not in eval_dict:
 820 |     raise ValueError('Expected `detection_surface_coords` in `eval_dict`.')
 821 | 
 822 |   images_with_detections_list = []
 823 |   for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
 824 |     # Note that detection masks have already been resized to the original image
 825 |     # shapes, but `original_image` has not.
 826 |     # TODO(ronnyvotel): Consider resizing `original_image` in
 827 |     # eval_util.result_dict_for_batched_example().
 828 |     true_shape = eval_dict[input_data_fields.true_image_shape][indx]
 829 |     original_shape = eval_dict[
 830 |         input_data_fields.original_image_spatial_shape][indx]
 831 |     image = eval_dict[input_data_fields.original_image][indx]
 832 |     image = shape_utils.pad_or_clip_nd(image, [true_shape[0], true_shape[1], 3])
 833 |     image = _resize_original_image(image, original_shape)
 834 | 
 835 |     scores = eval_dict[detection_fields.detection_scores][indx]
 836 |     detection_masks = eval_dict[detection_fields.detection_masks][indx]
 837 |     surface_coords = eval_dict[detection_fields.detection_surface_coords][indx]
 838 | 
 839 |     def draw_densepose_py_func(image, detection_masks, surface_coords, scores):
 840 |       """Overlays part masks and surface coords on original images."""
 841 |       surface_coord_image = np.copy(image)
 842 |       for i, (score, surface_coord, mask) in enumerate(
 843 |           zip(scores, surface_coords, detection_masks)):
 844 |         if i == max_boxes_to_draw:
 845 |           break
 846 |         if score > min_score_thresh:
 847 |           draw_part_mask_on_image_array(image, mask, num_parts=num_parts)
 848 |           draw_float_channel_on_image_array(
 849 |               surface_coord_image, surface_coord[:, :, dp_coord_to_visualize],
 850 |               mask)
 851 |       return np.concatenate([image, surface_coord_image], axis=1)
 852 | 
 853 |     image_with_densepose = tf.py_func(
 854 |         draw_densepose_py_func,
 855 |         [image, detection_masks, surface_coords, scores],
 856 |         tf.uint8)
 857 |     images_with_detections_list.append(
 858 |         image_with_densepose[tf.newaxis, :, :, :])
 859 |   return images_with_detections_list
 860 | 
 861 | 
 862 | def draw_keypoints_on_image_array(image,
 863 |                                   keypoints,
 864 |                                   keypoint_scores=None,
 865 |                                   min_score_thresh=0.5,
 866 |                                   color='red',
 867 |                                   radius=2,
 868 |                                   use_normalized_coordinates=True,
 869 |                                   keypoint_edges=None,
 870 |                                   keypoint_edge_color='green',
 871 |                                   keypoint_edge_width=2):
 872 |   """Draws keypoints on an image (numpy array).
 873 | 
 874 |   Args:
 875 |     image: a numpy array with shape [height, width, 3].
 876 |     keypoints: a numpy array with shape [num_keypoints, 2].
 877 |     keypoint_scores: a numpy array with shape [num_keypoints]. If provided, only
 878 |       those keypoints with a score above score_threshold will be visualized.
 879 |     min_score_thresh: A scalar indicating the minimum keypoint score required
 880 |       for a keypoint to be visualized. Note that keypoint_scores must be
 881 |       provided for this threshold to take effect.
 882 |     color: color to draw the keypoints with. Default is red.
 883 |     radius: keypoint radius. Default value is 2.
 884 |     use_normalized_coordinates: if True (default), treat keypoint values as
 885 |       relative to the image.  Otherwise treat them as absolute.
 886 |     keypoint_edges: A list of tuples with keypoint indices that specify which
 887 |       keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
 888 |       edges from keypoint 0 to 1 and from keypoint 2 to 4.
 889 |     keypoint_edge_color: color to draw the keypoint edges with. Default is red.
 890 |     keypoint_edge_width: width of the edges drawn between keypoints. Default
 891 |       value is 2.
 892 |   """
 893 |   image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
 894 |   draw_keypoints_on_image(image_pil,
 895 |                           keypoints,
 896 |                           keypoint_scores=keypoint_scores,
 897 |                           min_score_thresh=min_score_thresh,
 898 |                           color=color,
 899 |                           radius=radius,
 900 |                           use_normalized_coordinates=use_normalized_coordinates,
 901 |                           keypoint_edges=keypoint_edges,
 902 |                           keypoint_edge_color=keypoint_edge_color,
 903 |                           keypoint_edge_width=keypoint_edge_width)
 904 |   np.copyto(image, np.array(image_pil))
 905 | 
 906 | 
 907 | def draw_keypoints_on_image(image,
 908 |                             keypoints,
 909 |                             keypoint_scores=None,
 910 |                             min_score_thresh=0.5,
 911 |                             color='red',
 912 |                             radius=2,
 913 |                             use_normalized_coordinates=True,
 914 |                             keypoint_edges=None,
 915 |                             keypoint_edge_color='green',
 916 |                             keypoint_edge_width=2):
 917 |   """Draws keypoints on an image.
 918 | 
 919 |   Args:
 920 |     image: a PIL.Image object.
 921 |     keypoints: a numpy array with shape [num_keypoints, 2].
 922 |     keypoint_scores: a numpy array with shape [num_keypoints].
 923 |     min_score_thresh: a score threshold for visualizing keypoints. Only used if
 924 |       keypoint_scores is provided.
 925 |     color: color to draw the keypoints with. Default is red.
 926 |     radius: keypoint radius. Default value is 2.
 927 |     use_normalized_coordinates: if True (default), treat keypoint values as
 928 |       relative to the image.  Otherwise treat them as absolute.
 929 |     keypoint_edges: A list of tuples with keypoint indices that specify which
 930 |       keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
 931 |       edges from keypoint 0 to 1 and from keypoint 2 to 4.
 932 |     keypoint_edge_color: color to draw the keypoint edges with. Default is red.
 933 |     keypoint_edge_width: width of the edges drawn between keypoints. Default
 934 |       value is 2.
 935 |   """
 936 |   draw = ImageDraw.Draw(image)
 937 |   im_width, im_height = image.size
 938 |   keypoints = np.array(keypoints)
 939 |   keypoints_x = [k[1] for k in keypoints]
 940 |   keypoints_y = [k[0] for k in keypoints]
 941 |   if use_normalized_coordinates:
 942 |     keypoints_x = tuple([im_width * x for x in keypoints_x])
 943 |     keypoints_y = tuple([im_height * y for y in keypoints_y])
 944 |   if keypoint_scores is not None:
 945 |     keypoint_scores = np.array(keypoint_scores)
 946 |     valid_kpt = np.greater(keypoint_scores, min_score_thresh)
 947 |   else:
 948 |     valid_kpt = np.where(np.any(np.isnan(keypoints), axis=1),
 949 |                          np.zeros_like(keypoints[:, 0]),
 950 |                          np.ones_like(keypoints[:, 0]))
 951 |   valid_kpt = [v for v in valid_kpt]
 952 | 
 953 |   for keypoint_x, keypoint_y, valid in zip(keypoints_x, keypoints_y, valid_kpt):
 954 |     if valid:
 955 |       draw.ellipse([(keypoint_x - radius, keypoint_y - radius),
 956 |                     (keypoint_x + radius, keypoint_y + radius)],
 957 |                    outline=color, fill=color)
 958 |   if keypoint_edges is not None:
 959 |     for keypoint_start, keypoint_end in keypoint_edges:
 960 |       if (keypoint_start < 0 or keypoint_start >= len(keypoints) or
 961 |           keypoint_end < 0 or keypoint_end >= len(keypoints)):
 962 |         continue
 963 |       if not (valid_kpt[keypoint_start] and valid_kpt[keypoint_end]):
 964 |         continue
 965 |       edge_coordinates = [
 966 |           keypoints_x[keypoint_start], keypoints_y[keypoint_start],
 967 |           keypoints_x[keypoint_end], keypoints_y[keypoint_end]
 968 |       ]
 969 |       draw.line(
 970 |           edge_coordinates, fill=keypoint_edge_color, width=keypoint_edge_width)
 971 | 
 972 | 
 973 | def draw_mask_on_image_array(image, mask, color='red', alpha=0.4):
 974 |   """Draws mask on an image.
 975 | 
 976 |   Args:
 977 |     image: uint8 numpy array with shape (img_height, img_height, 3)
 978 |     mask: a uint8 numpy array of shape (img_height, img_height) with
 979 |       values between either 0 or 1.
 980 |     color: color to draw the keypoints with. Default is red.
 981 |     alpha: transparency value between 0 and 1. (default: 0.4)
 982 | 
 983 |   Raises:
 984 |     ValueError: On incorrect data type for image or masks.
 985 |   """
 986 |   if image.dtype != np.uint8:
 987 |     raise ValueError('`image` not of type np.uint8')
 988 |   if mask.dtype != np.uint8:
 989 |     raise ValueError('`mask` not of type np.uint8')
 990 |   if image.shape[:2] != mask.shape:
 991 |     raise ValueError('The image has spatial dimensions %s but the mask has '
 992 |                      'dimensions %s' % (image.shape[:2], mask.shape))
 993 |   rgb = ImageColor.getrgb(color)
 994 |   pil_image = Image.fromarray(image)
 995 | 
 996 |   solid_color = np.expand_dims(
 997 |       np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3])
 998 |   pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA')
 999 |   pil_mask = Image.fromarray(np.uint8(255.0*alpha*(mask > 0))).convert('L')
1000 |   pil_image = Image.composite(pil_solid_color, pil_image, pil_mask)
1001 |   np.copyto(image, np.array(pil_image.convert('RGB')))
1002 | 
1003 | 
1004 | def draw_part_mask_on_image_array(image, mask, alpha=0.4, num_parts=24):
1005 |   """Draws part mask on an image.
1006 | 
1007 |   Args:
1008 |     image: uint8 numpy array with shape (img_height, img_height, 3)
1009 |     mask: a uint8 numpy array of shape (img_height, img_height) with
1010 |       1-indexed parts (0 for background).
1011 |     alpha: transparency value between 0 and 1 (default: 0.4)
1012 |     num_parts: the maximum number of parts that may exist in the image (default
1013 |       24 for DensePose).
1014 | 
1015 |   Raises:
1016 |     ValueError: On incorrect data type for image or masks.
1017 |   """
1018 |   if image.dtype != np.uint8:
1019 |     raise ValueError('`image` not of type np.uint8')
1020 |   if mask.dtype != np.uint8:
1021 |     raise ValueError('`mask` not of type np.uint8')
1022 |   if image.shape[:2] != mask.shape:
1023 |     raise ValueError('The image has spatial dimensions %s but the mask has '
1024 |                      'dimensions %s' % (image.shape[:2], mask.shape))
1025 | 
1026 |   pil_image = Image.fromarray(image)
1027 |   part_colors = np.zeros_like(image)
1028 |   mask_1_channel = mask[:, :, np.newaxis]
1029 |   for i, color in enumerate(STANDARD_COLORS[:num_parts]):
1030 |     rgb = np.array(ImageColor.getrgb(color), dtype=np.uint8)
1031 |     part_colors += (mask_1_channel == i + 1) * rgb[np.newaxis, np.newaxis, :]
1032 |   pil_part_colors = Image.fromarray(np.uint8(part_colors)).convert('RGBA')
1033 |   pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L')
1034 |   pil_image = Image.composite(pil_part_colors, pil_image, pil_mask)
1035 |   np.copyto(image, np.array(pil_image.convert('RGB')))
1036 | 
1037 | 
1038 | def draw_float_channel_on_image_array(image, channel, mask, alpha=0.9,
1039 |                                       cmap='YlGn'):
1040 |   """Draws a floating point channel on an image array.
1041 | 
1042 |   Args:
1043 |     image: uint8 numpy array with shape (img_height, img_height, 3)
1044 |     channel: float32 numpy array with shape (img_height, img_height). The values
1045 |       should be in the range [0, 1], and will be mapped to colors using the
1046 |       provided colormap `cmap` argument.
1047 |     mask: a uint8 numpy array of shape (img_height, img_height) with
1048 |       1-indexed parts (0 for background).
1049 |     alpha: transparency value between 0 and 1 (default: 0.9)
1050 |     cmap: string with the colormap to use.
1051 | 
1052 |   Raises:
1053 |     ValueError: On incorrect data type for image or masks.
1054 |   """
1055 |   if image.dtype != np.uint8:
1056 |     raise ValueError('`image` not of type np.uint8')
1057 |   if channel.dtype != np.float32:
1058 |     raise ValueError('`channel` not of type np.float32')
1059 |   if mask.dtype != np.uint8:
1060 |     raise ValueError('`mask` not of type np.uint8')
1061 |   if image.shape[:2] != channel.shape:
1062 |     raise ValueError('The image has spatial dimensions %s but the channel has '
1063 |                      'dimensions %s' % (image.shape[:2], channel.shape))
1064 |   if image.shape[:2] != mask.shape:
1065 |     raise ValueError('The image has spatial dimensions %s but the mask has '
1066 |                      'dimensions %s' % (image.shape[:2], mask.shape))
1067 | 
1068 |   cm = plt.get_cmap(cmap)
1069 |   pil_image = Image.fromarray(image)
1070 |   colored_channel = cm(channel)[:, :, :3]
1071 |   pil_colored_channel = Image.fromarray(
1072 |       np.uint8(colored_channel * 255)).convert('RGBA')
1073 |   pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L')
1074 |   pil_image = Image.composite(pil_colored_channel, pil_image, pil_mask)
1075 |   np.copyto(image, np.array(pil_image.convert('RGB')))
1076 | 
1077 | 
1078 | def visualize_boxes_and_labels_on_image_array(
1079 |     image,
1080 |     boxes,
1081 |     classes,
1082 |     scores,
1083 |     category_index,
1084 |     instance_masks=None,
1085 |     instance_boundaries=None,
1086 |     keypoints=None,
1087 |     keypoint_scores=None,
1088 |     keypoint_edges=None,
1089 |     track_ids=None,
1090 |     use_normalized_coordinates=False,
1091 |     max_boxes_to_draw=20,
1092 |     min_score_thresh=.5,
1093 |     agnostic_mode=False,
1094 |     line_thickness=4,
1095 |     groundtruth_box_visualization_color='black',
1096 |     skip_boxes=False,
1097 |     skip_scores=False,
1098 |     skip_labels=False,
1099 |     skip_track_ids=False):
1100 |   """Overlay labeled boxes on an image with formatted scores and label names.
1101 | 
1102 |   This function groups boxes that correspond to the same location
1103 |   and creates a display string for each detection and overlays these
1104 |   on the image. Note that this function modifies the image in place, and returns
1105 |   that same image.
1106 | 
1107 |   Args:
1108 |     image: uint8 numpy array with shape (img_height, img_width, 3)
1109 |     boxes: a numpy array of shape [N, 4]
1110 |     classes: a numpy array of shape [N]. Note that class indices are 1-based,
1111 |       and match the keys in the label map.
1112 |     scores: a numpy array of shape [N] or None.  If scores=None, then
1113 |       this function assumes that the boxes to be plotted are groundtruth
1114 |       boxes and plot all boxes as black with no classes or scores.
1115 |     category_index: a dict containing category dictionaries (each holding
1116 |       category index `id` and category name `name`) keyed by category indices.
1117 |     instance_masks: a uint8 numpy array of shape [N, image_height, image_width],
1118 |       can be None.
1119 |     instance_boundaries: a numpy array of shape [N, image_height, image_width]
1120 |       with values ranging between 0 and 1, can be None.
1121 |     keypoints: a numpy array of shape [N, num_keypoints, 2], can
1122 |       be None.
1123 |     keypoint_scores: a numpy array of shape [N, num_keypoints], can be None.
1124 |     keypoint_edges: A list of tuples with keypoint indices that specify which
1125 |       keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
1126 |       edges from keypoint 0 to 1 and from keypoint 2 to 4.
1127 |     track_ids: a numpy array of shape [N] with unique track ids. If provided,
1128 |       color-coding of boxes will be determined by these ids, and not the class
1129 |       indices.
1130 |     use_normalized_coordinates: whether boxes is to be interpreted as
1131 |       normalized coordinates or not.
1132 |     max_boxes_to_draw: maximum number of boxes to visualize.  If None, draw
1133 |       all boxes.
1134 |     min_score_thresh: minimum score threshold for a box or keypoint to be
1135 |       visualized.
1136 |     agnostic_mode: boolean (default: False) controlling whether to evaluate in
1137 |       class-agnostic mode or not.  This mode will display scores but ignore
1138 |       classes.
1139 |     line_thickness: integer (default: 4) controlling line width of the boxes.
1140 |     groundtruth_box_visualization_color: box color for visualizing groundtruth
1141 |       boxes
1142 |     skip_boxes: whether to skip the drawing of bounding boxes.
1143 |     skip_scores: whether to skip score when drawing a single detection
1144 |     skip_labels: whether to skip label when drawing a single detection
1145 |     skip_track_ids: whether to skip track id when drawing a single detection
1146 | 
1147 |   Returns:
1148 |     uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
1149 |   """
1150 |   # Create a display string (and color) for every box location, group any boxes
1151 |   # that correspond to the same location.
1152 |   box_to_display_str_map = collections.defaultdict(list)
1153 |   box_to_color_map = collections.defaultdict(str)
1154 |   box_to_instance_masks_map = {}
1155 |   box_to_instance_boundaries_map = {}
1156 |   box_to_keypoints_map = collections.defaultdict(list)
1157 |   box_to_keypoint_scores_map = collections.defaultdict(list)
1158 |   box_to_track_ids_map = {}
1159 |   if not max_boxes_to_draw:
1160 |     max_boxes_to_draw = boxes.shape[0]
1161 |   for i in range(boxes.shape[0]):
1162 |     if max_boxes_to_draw == len(box_to_color_map):
1163 |       break
1164 |     if scores is None or scores[i] > min_score_thresh:
1165 |       box = tuple(boxes[i].tolist())
1166 |       if instance_masks is not None:
1167 |         box_to_instance_masks_map[box] = instance_masks[i]
1168 |       if instance_boundaries is not None:
1169 |         box_to_instance_boundaries_map[box] = instance_boundaries[i]
1170 |       if keypoints is not None:
1171 |         box_to_keypoints_map[box].extend(keypoints[i])
1172 |       if keypoint_scores is not None:
1173 |         box_to_keypoint_scores_map[box].extend(keypoint_scores[i])
1174 |       if track_ids is not None:
1175 |         box_to_track_ids_map[box] = track_ids[i]
1176 |       if scores is None:
1177 |         box_to_color_map[box] = groundtruth_box_visualization_color
1178 |       else:
1179 |         display_str = ''
1180 |         if not skip_labels:
1181 |           if not agnostic_mode:
1182 |             if classes[i] in six.viewkeys(category_index):
1183 |               class_name = category_index[classes[i]]['name']
1184 |             else:
1185 |               class_name = 'N/A'
1186 |             display_str = str(class_name)
1187 |         if not skip_scores:
1188 |           if not display_str:
1189 |             display_str = '{}%'.format(round(100*scores[i]))
1190 |           else:
1191 |             display_str = '{}: {}%'.format(display_str, round(100*scores[i]))
1192 |         if not skip_track_ids and track_ids is not None:
1193 |           if not display_str:
1194 |             display_str = 'ID {}'.format(track_ids[i])
1195 |           else:
1196 |             display_str = '{}: ID {}'.format(display_str, track_ids[i])
1197 |         box_to_display_str_map[box].append(display_str)
1198 |         if agnostic_mode:
1199 |           box_to_color_map[box] = 'DarkOrange'
1200 |         elif track_ids is not None:
1201 |           prime_multipler = _get_multiplier_for_color_randomness()
1202 |           box_to_color_map[box] = STANDARD_COLORS[
1203 |               (prime_multipler * track_ids[i]) % len(STANDARD_COLORS)]
1204 |         else:
1205 |           box_to_color_map[box] = STANDARD_COLORS[
1206 |               classes[i] % len(STANDARD_COLORS)]
1207 | 
1208 |   # Draw all boxes onto image.
1209 |   for box, color in box_to_color_map.items():
1210 |     ymin, xmin, ymax, xmax = box
1211 |     if instance_masks is not None:
1212 |       draw_mask_on_image_array(
1213 |           image,
1214 |           box_to_instance_masks_map[box],
1215 |           color=color
1216 |       )
1217 |     if instance_boundaries is not None:
1218 |       draw_mask_on_image_array(
1219 |           image,
1220 |           box_to_instance_boundaries_map[box],
1221 |           color='red',
1222 |           alpha=1.0
1223 |       )
1224 |     draw_bounding_box_on_image_array(
1225 |         image,
1226 |         ymin,
1227 |         xmin,
1228 |         ymax,
1229 |         xmax,
1230 |         color=color,
1231 |         thickness=0 if skip_boxes else line_thickness,
1232 |         display_str_list=box_to_display_str_map[box],
1233 |         use_normalized_coordinates=use_normalized_coordinates)
1234 |     if keypoints is not None:
1235 |       keypoint_scores_for_box = None
1236 |       if box_to_keypoint_scores_map:
1237 |         keypoint_scores_for_box = box_to_keypoint_scores_map[box]
1238 |       draw_keypoints_on_image_array(
1239 |           image,
1240 |           box_to_keypoints_map[box],
1241 |           keypoint_scores_for_box,
1242 |           min_score_thresh=min_score_thresh,
1243 |           color=color,
1244 |           radius=line_thickness / 2,
1245 |           use_normalized_coordinates=use_normalized_coordinates,
1246 |           keypoint_edges=keypoint_edges,
1247 |           keypoint_edge_color=color,
1248 |           keypoint_edge_width=line_thickness // 2)
1249 | 
1250 |   return image
1251 | 
1252 | 
1253 | def add_cdf_image_summary(values, name):
1254 |   """Adds a tf.summary.image for a CDF plot of the values.
1255 | 
1256 |   Normalizes `values` such that they sum to 1, plots the cumulative distribution
1257 |   function and creates a tf image summary.
1258 | 
1259 |   Args:
1260 |     values: a 1-D float32 tensor containing the values.
1261 |     name: name for the image summary.
1262 |   """
1263 |   def cdf_plot(values):
1264 |     """Numpy function to plot CDF."""
1265 |     normalized_values = values / np.sum(values)
1266 |     sorted_values = np.sort(normalized_values)
1267 |     cumulative_values = np.cumsum(sorted_values)
1268 |     fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32)
1269 |                             / cumulative_values.size)
1270 |     fig = plt.figure(frameon=False)
1271 |     ax = fig.add_subplot('111')
1272 |     ax.plot(fraction_of_examples, cumulative_values)
1273 |     ax.set_ylabel('cumulative normalized values')
1274 |     ax.set_xlabel('fraction of examples')
1275 |     fig.canvas.draw()
1276 |     width, height = fig.get_size_inches() * fig.get_dpi()
1277 |     image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape(
1278 |         1, int(height), int(width), 3)
1279 |     return image
1280 |   cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8)
1281 |   tf.summary.image(name, cdf_plot)
1282 | 
1283 | 
1284 | def add_hist_image_summary(values, bins, name):
1285 |   """Adds a tf.summary.image for a histogram plot of the values.
1286 | 
1287 |   Plots the histogram of values and creates a tf image summary.
1288 | 
1289 |   Args:
1290 |     values: a 1-D float32 tensor containing the values.
1291 |     bins: bin edges which will be directly passed to np.histogram.
1292 |     name: name for the image summary.
1293 |   """
1294 | 
1295 |   def hist_plot(values, bins):
1296 |     """Numpy function to plot hist."""
1297 |     fig = plt.figure(frameon=False)
1298 |     ax = fig.add_subplot('111')
1299 |     y, x = np.histogram(values, bins=bins)
1300 |     ax.plot(x[:-1], y)
1301 |     ax.set_ylabel('count')
1302 |     ax.set_xlabel('value')
1303 |     fig.canvas.draw()
1304 |     width, height = fig.get_size_inches() * fig.get_dpi()
1305 |     image = np.fromstring(
1306 |         fig.canvas.tostring_rgb(), dtype='uint8').reshape(
1307 |             1, int(height), int(width), 3)
1308 |     return image
1309 |   hist_plot = tf.py_func(hist_plot, [values, bins], tf.uint8)
1310 |   tf.summary.image(name, hist_plot)
1311 | 
1312 | 
1313 | class EvalMetricOpsVisualization(six.with_metaclass(abc.ABCMeta, object)):
1314 |   """Abstract base class responsible for visualizations during evaluation.
1315 | 
1316 |   Currently, summary images are not run during evaluation. One way to produce
1317 |   evaluation images in Tensorboard is to provide tf.summary.image strings as
1318 |   `value_ops` in tf.estimator.EstimatorSpec's `eval_metric_ops`. This class is
1319 |   responsible for accruing images (with overlaid detections and groundtruth)
1320 |   and returning a dictionary that can be passed to `eval_metric_ops`.
1321 |   """
1322 | 
1323 |   def __init__(self,
1324 |                category_index,
1325 |                max_examples_to_draw=5,
1326 |                max_boxes_to_draw=20,
1327 |                min_score_thresh=0.2,
1328 |                use_normalized_coordinates=True,
1329 |                summary_name_prefix='evaluation_image',
1330 |                keypoint_edges=None):
1331 |     """Creates an EvalMetricOpsVisualization.
1332 | 
1333 |     Args:
1334 |       category_index: A category index (dictionary) produced from a labelmap.
1335 |       max_examples_to_draw: The maximum number of example summaries to produce.
1336 |       max_boxes_to_draw: The maximum number of boxes to draw for detections.
1337 |       min_score_thresh: The minimum score threshold for showing detections.
1338 |       use_normalized_coordinates: Whether to assume boxes and keypoints are in
1339 |         normalized coordinates (as opposed to absolute coordinates).
1340 |         Default is True.
1341 |       summary_name_prefix: A string prefix for each image summary.
1342 |       keypoint_edges: A list of tuples with keypoint indices that specify which
1343 |         keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws
1344 |         edges from keypoint 0 to 1 and from keypoint 2 to 4.
1345 |     """
1346 | 
1347 |     self._category_index = category_index
1348 |     self._max_examples_to_draw = max_examples_to_draw
1349 |     self._max_boxes_to_draw = max_boxes_to_draw
1350 |     self._min_score_thresh = min_score_thresh
1351 |     self._use_normalized_coordinates = use_normalized_coordinates
1352 |     self._summary_name_prefix = summary_name_prefix
1353 |     self._keypoint_edges = keypoint_edges
1354 |     self._images = []
1355 | 
1356 |   def clear(self):
1357 |     self._images = []
1358 | 
1359 |   def add_images(self, images):
1360 |     """Store a list of images, each with shape [1, H, W, C]."""
1361 |     if len(self._images) >= self._max_examples_to_draw:
1362 |       return
1363 | 
1364 |     # Store images and clip list if necessary.
1365 |     self._images.extend(images)
1366 |     if len(self._images) > self._max_examples_to_draw:
1367 |       self._images[self._max_examples_to_draw:] = []
1368 | 
1369 |   def get_estimator_eval_metric_ops(self, eval_dict):
1370 |     """Returns metric ops for use in tf.estimator.EstimatorSpec.
1371 | 
1372 |     Args:
1373 |       eval_dict: A dictionary that holds an image, groundtruth, and detections
1374 |         for a batched example. Note that, we use only the first example for
1375 |         visualization. See eval_util.result_dict_for_batched_example() for a
1376 |         convenient method for constructing such a dictionary. The dictionary
1377 |         contains
1378 |         fields.InputDataFields.original_image: [batch_size, H, W, 3] image.
1379 |         fields.InputDataFields.original_image_spatial_shape: [batch_size, 2]
1380 |           tensor containing the size of the original image.
1381 |         fields.InputDataFields.true_image_shape: [batch_size, 3]
1382 |           tensor containing the spatial size of the upadded original image.
1383 |         fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4]
1384 |           float32 tensor with groundtruth boxes in range [0.0, 1.0].
1385 |         fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes]
1386 |           int64 tensor with 1-indexed groundtruth classes.
1387 |         fields.InputDataFields.groundtruth_instance_masks - (optional)
1388 |           [batch_size, num_boxes, H, W] int64 tensor with instance masks.
1389 |         fields.InputDataFields.groundtruth_keypoints - (optional)
1390 |           [batch_size, num_boxes, num_keypoints, 2] float32 tensor with
1391 |           keypoint coordinates in format [y, x].
1392 |         fields.InputDataFields.groundtruth_keypoint_visibilities - (optional)
1393 |           [batch_size, num_boxes, num_keypoints] bool tensor with
1394 |           keypoint visibilities.
1395 |         fields.DetectionResultFields.detection_boxes - [batch_size,
1396 |           max_num_boxes, 4] float32 tensor with detection boxes in range [0.0,
1397 |           1.0].
1398 |         fields.DetectionResultFields.detection_classes - [batch_size,
1399 |           max_num_boxes] int64 tensor with 1-indexed detection classes.
1400 |         fields.DetectionResultFields.detection_scores - [batch_size,
1401 |           max_num_boxes] float32 tensor with detection scores.
1402 |         fields.DetectionResultFields.detection_masks - (optional) [batch_size,
1403 |           max_num_boxes, H, W] float32 tensor of binarized masks.
1404 |         fields.DetectionResultFields.detection_keypoints - (optional)
1405 |           [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with
1406 |           keypoints.
1407 |         fields.DetectionResultFields.detection_keypoint_scores - (optional)
1408 |           [batch_size, max_num_boxes, num_keypoints] float32 tensor with
1409 |           keypoints scores.
1410 | 
1411 |     Returns:
1412 |       A dictionary of image summary names to tuple of (value_op, update_op). The
1413 |       `update_op` is the same for all items in the dictionary, and is
1414 |       responsible for saving a single side-by-side image with detections and
1415 |       groundtruth. Each `value_op` holds the tf.summary.image string for a given
1416 |       image.
1417 |     """
1418 |     if self._max_examples_to_draw == 0:
1419 |       return {}
1420 |     images = self.images_from_evaluation_dict(eval_dict)
1421 | 
1422 |     def get_images():
1423 |       """Returns a list of images, padded to self._max_images_to_draw."""
1424 |       images = self._images
1425 |       while len(images) < self._max_examples_to_draw:
1426 |         images.append(np.array(0, dtype=np.uint8))
1427 |       self.clear()
1428 |       return images
1429 | 
1430 |     def image_summary_or_default_string(summary_name, image):
1431 |       """Returns image summaries for non-padded elements."""
1432 |       return tf.cond(
1433 |           tf.equal(tf.size(tf.shape(image)), 4),
1434 |           lambda: tf.summary.image(summary_name, image),
1435 |           lambda: tf.constant(''))
1436 | 
1437 |     if tf.executing_eagerly():
1438 |       update_op = self.add_images([[images[0]]])
1439 |       image_tensors = get_images()
1440 |     else:
1441 |       update_op = tf.py_func(self.add_images, [[images[0]]], [])
1442 |       image_tensors = tf.py_func(
1443 |           get_images, [], [tf.uint8] * self._max_examples_to_draw)
1444 |     eval_metric_ops = {}
1445 |     for i, image in enumerate(image_tensors):
1446 |       summary_name = self._summary_name_prefix + '/' + str(i)
1447 |       value_op = image_summary_or_default_string(summary_name, image)
1448 |       eval_metric_ops[summary_name] = (value_op, update_op)
1449 |     return eval_metric_ops
1450 | 
1451 |   @abc.abstractmethod
1452 |   def images_from_evaluation_dict(self, eval_dict):
1453 |     """Converts evaluation dictionary into a list of image tensors.
1454 | 
1455 |     To be overridden by implementations.
1456 | 
1457 |     Args:
1458 |       eval_dict: A dictionary with all the necessary information for producing
1459 |         visualizations.
1460 | 
1461 |     Returns:
1462 |       A list of [1, H, W, C] uint8 tensors.
1463 |     """
1464 |     raise NotImplementedError
1465 | 
1466 | 
1467 | class VisualizeSingleFrameDetections(EvalMetricOpsVisualization):
1468 |   """Class responsible for single-frame object detection visualizations."""
1469 | 
1470 |   def __init__(self,
1471 |                category_index,
1472 |                max_examples_to_draw=5,
1473 |                max_boxes_to_draw=20,
1474 |                min_score_thresh=0.2,
1475 |                use_normalized_coordinates=True,
1476 |                summary_name_prefix='Detections_Left_Groundtruth_Right',
1477 |                keypoint_edges=None):
1478 |     super(VisualizeSingleFrameDetections, self).__init__(
1479 |         category_index=category_index,
1480 |         max_examples_to_draw=max_examples_to_draw,
1481 |         max_boxes_to_draw=max_boxes_to_draw,
1482 |         min_score_thresh=min_score_thresh,
1483 |         use_normalized_coordinates=use_normalized_coordinates,
1484 |         summary_name_prefix=summary_name_prefix,
1485 |         keypoint_edges=keypoint_edges)
1486 | 
1487 |   def images_from_evaluation_dict(self, eval_dict):
1488 |     return draw_side_by_side_evaluation_image(eval_dict, self._category_index,
1489 |                                               self._max_boxes_to_draw,
1490 |                                               self._min_score_thresh,
1491 |                                               self._use_normalized_coordinates,
1492 |                                               self._keypoint_edges)
1493 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of Conduct
2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
4 | opensource-codeofconduct@amazon.com with any additional questions or comments.
5 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing Guidelines
 2 | 
 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional
 4 | documentation, we greatly value feedback and contributions from our community.
 5 | 
 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary
 7 | information to effectively respond to your bug report or contribution.
 8 | 
 9 | 
10 | ## Reporting Bugs/Feature Requests
11 | 
12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features.
13 | 
14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already
15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful:
16 | 
17 | * A reproducible test case or series of steps
18 | * The version of our code being used
19 | * Any modifications you've made relevant to the bug
20 | * Anything unusual about your environment or deployment
21 | 
22 | 
23 | ## Contributing via Pull Requests
24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that:
25 | 
26 | 1. You are working against the latest source on the *main* branch.
27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already.
28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted.
29 | 
30 | To send us a pull request, please:
31 | 
32 | 1. Fork the repository.
33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
34 | 3. Ensure local tests pass.
35 | 4. Commit to your fork using clear commit messages.
36 | 5. Send us a pull request, answering any default questions in the pull request interface.
37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
38 | 
39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
41 | 
42 | 
43 | ## Finding contributions to work on
44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start.
45 | 
46 | 
47 | ## Code of Conduct
48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct).
49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact
50 | opensource-codeofconduct@amazon.com with any additional questions or comments.
51 | 
52 | 
53 | ## Security issue notifications
54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue.
55 | 
56 | 
57 | ## Licensing
58 | 
59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution.
60 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15 | 
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Car Damage Detection using Sagemaker and Tensorflow
 2 | 
 3 | ### Usecase: 
 4 | Global vehicle insurance & vehicle rental industries still rely on manual ways to detect the vehicle damage & its intensity. Visual quality inspection is commonly used for detecting the damage for claim process. The industry is steeped with manual processes, paper-driven operations, high premium offerings, poor customer service, long turnaround time, etc. 
 5 | Here we will use machine learning - object detection “Efficientdet” model with sagemaker and tensor flow. Object detection model will be used to identify & mark the dent and scratch area in the car images.
 6 | 
 7 | Let’s refresh the basic terms used in building this ML Model.
 8 | 
 9 | ### What is Machine Learning (ML)?
10 | Machine learning is a method of data analysis that automates analytical model building. It is a branch of artificial intelligence based on the idea that systems can learn from data, identify patterns and make decisions with minimal human intervention.
11 | 
12 | ### What is Object Detection?
13 | Object detection is a computer technology related to computer vision and image processing that deals with detecting instances of semantic objects of a certain class (such as humans, buildings, or cars) in digital images and videos.
14 | 
15 | ### What is Efficentdent Model?
16 | EfficientDet is an object detection machine learning model, which utilizes several optimization and backbone tweaks, such as the use of a BiFPN, and a compound scaling method that uniformly scales the resolution, depth and width for all backbones, feature networks and box/class prediction networks at the same time.
17 |  
18 | ### What is a loss function or classification loss in your training?
19 | Loss functions is a crucial factor that affecting the detection precision in object detection task. This loss will help with any task which requires classification. We are given k categories and our job is to make sure our model is good job in classifying x number of examples in k categories. Let’s take example of this project where we are given 100 images of 2 categories and our task is to classify each given image into either of these categories “dent” and/or “scratch”.
20 | 
21 | ### Overview
22 | In this repository, we will build a custom model using Sagemaker & tensorflow to provide bounding boxes on car images consisting of “dents” and/or “Scratch”.
23 | Firstly, use Amazon SageMaker Ground Truth to label the car images with bounding box using private workforce option. After finishing the labelling job, ground truth will create & save a manifest file in S3.
24 | 
25 | Next steps, use Amazon SageMaker to build, train, and deploy an EfficientDet model using the TensorFlow Object Detection API. It is built on top of TensorFlow 2 that makes it easy to construct, train and deploy object detection models. It also provides the TensorFlow 2 Detection Model Zoo which is a collection of pre-trained detection models we can use to accelerate our Model building.
26 | 
27 | ### High Level Steps:-
28 | •	Label the car images with bounding boxes as “dent” and/or “scratch” using Sagemaker Ground Truth
29 | •	Generate the dataset TFRecords and label map using SageMaker Processing job
30 | •	Fine-tune an EfficientDet model with TF2 on Amazon SageMaker
31 | •	Monitor your model training with Tensorboard and SageMaker Debugger
32 | •	Deploy your model on a SageMaker endpoint and visualize the prediction by detecting "dent" and/or “scratch” in car images (refer below images)
33 | 
34 | ### Get started - Instructions
35 | Follow the step-by-step guide by executing the notebooks in the following folders:
36 | #### 0_ground_truth/ ground_truth.ipynb
37 | #### 1_prepare_data/prepare_data.ipynb
38 | #### 2_train_model/train_model.ipynb
39 | #### 3_predict/deploy_endpoint.ipynb
40 | 
41 | |||
42 | | -------------- | ---------------------------- |
43 | |![](media/test-1.jpg)|![](media/test-01.jpg)|
44 | |![](media/test-2.jpg)|![](media/test-02.png)|
45 | 
46 | ## License
47 | This library is licensed under the MIT-0 License. See the LICENSE file.
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/media/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/.gitkeep


--------------------------------------------------------------------------------
/media/test-01.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/test-01.jpg


--------------------------------------------------------------------------------
/media/test-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/test-02.png


--------------------------------------------------------------------------------
/media/test-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/test-1.jpg


--------------------------------------------------------------------------------
/media/test-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/test-2.jpg


--------------------------------------------------------------------------------