├── .DS_Store ├── 0_ground_truth ├── .gitkeep └── ground_truth.ipynb ├── 1_prepare_data ├── .gitkeep ├── docker │ ├── .gitkeep │ ├── Dockerfile │ ├── build_and_push.sh │ ├── code │ │ ├── .gitkeep │ │ ├── prepare_data.py │ │ └── utils │ │ │ ├── .gitkeep │ │ │ └── tf_record_util.py │ └── requirements.txt └── prepare_data.ipynb ├── 2_train_model ├── .gitkeep ├── docker │ ├── .gitkeep │ ├── Dockerfile │ └── build_and_push.sh ├── source_dir │ ├── .gitkeep │ ├── pipeline.config │ └── run_training.sh └── train_model.ipynb ├── 3_predict ├── .gitkeep ├── deploy_endpoint.ipynb └── visualization_utils.py ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md └── media ├── .gitkeep ├── test-01.jpg ├── test-02.png ├── test-1.jpg └── test-2.jpg /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/.DS_Store -------------------------------------------------------------------------------- /0_ground_truth/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/0_ground_truth/.gitkeep -------------------------------------------------------------------------------- /1_prepare_data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/1_prepare_data/.gitkeep -------------------------------------------------------------------------------- /1_prepare_data/docker/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/1_prepare_data/docker/.gitkeep -------------------------------------------------------------------------------- /1_prepare_data/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.2.0-gpu 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | # Install apt dependencies 6 | RUN apt-get update && apt-get install -y \ 7 | git \ 8 | gpg-agent \ 9 | python3-cairocffi \ 10 | protobuf-compiler \ 11 | python3-pil \ 12 | python3-lxml \ 13 | python3-tk \ 14 | wget 15 | 16 | COPY requirements.txt /tmp/ 17 | RUN pip3 install -r /tmp/requirements.txt --no-cache --upgrade 18 | 19 | COPY code /opt/program 20 | 21 | ENTRYPOINT ["python3", "/opt/program/prepare_data.py"] -------------------------------------------------------------------------------- /1_prepare_data/docker/build_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | image=$1 4 | 5 | ACCOUNT_ID=$(aws sts get-caller-identity --query Account | tr -d '"') 6 | AWS_REGION=$(aws configure get region) 7 | TAG=$(date +%Y%m%d%H%M%S) 8 | 9 | fullname="${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${image}:${TAG}" 10 | 11 | # If the repository doesn't exist in ECR, create it. 12 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1 13 | if [[ $? -ne 0 ]] 14 | then 15 | aws ecr create-repository --repository-name "${image}" > /dev/null 16 | fi 17 | 18 | # Get the login command from ECR and execute it directly 19 | $(aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com) 20 | 21 | # Build the docker image locally and then push it to ECR with the full name. 22 | cd docker 23 | 24 | echo "Building image with name ${image}" 25 | docker build --no-cache -t ${image} -f Dockerfile . 26 | docker tag ${image} ${fullname} 27 | 28 | echo "Pushing image to ECR ${fullname}" 29 | docker push ${fullname} 30 | 31 | # Writing the image name to let the calling process extract it without manual intervention: 32 | echo "${fullname}" > ecr_image_fullname.txt -------------------------------------------------------------------------------- /1_prepare_data/docker/code/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/1_prepare_data/docker/code/.gitkeep -------------------------------------------------------------------------------- /1_prepare_data/docker/code/prepare_data.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | import json 4 | import argparse 5 | from utils.tf_record_util import TfRecordGenerator 6 | 7 | if __name__ == "__main__": 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("--input", type=str, default="") 10 | parser.add_argument("--label_map", type=str, default="") 11 | parser.add_argument("--ground_truth_manifest", type=str, default="") 12 | parser.add_argument("--output", type=str, default="") 13 | args, _ = parser.parse_known_args() 14 | 15 | input_folder = args.input 16 | ground_truth_manifest = args.ground_truth_manifest 17 | label_map = json.loads(args.label_map) 18 | output_folder = args.output 19 | 20 | # Feed in necessary path variables from above operations 21 | tf_record_generator = TfRecordGenerator(image_dir=input_folder, 22 | manifest=ground_truth_manifest, 23 | label_map=label_map, 24 | output_dir=output_folder) 25 | 26 | print('GENERATING TF RECORD FILES') 27 | tf_record_generator.generate_tf_records() 28 | 29 | print('GENERATING LABEL MAP FILE') 30 | with open(f'{output_folder}/label_map.pbtxt', 'w') as label_map_file: 31 | for item in label_map: 32 | label_map_file.write('item {\n') 33 | label_map_file.write(' id: ' + str(int(item) + 1) + '\n') 34 | label_map_file.write(" name: '" + label_map[item] + "'\n") 35 | label_map_file.write('}\n\n') 36 | 37 | print('FINISHED') 38 | -------------------------------------------------------------------------------- /1_prepare_data/docker/code/utils/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/1_prepare_data/docker/code/utils/.gitkeep -------------------------------------------------------------------------------- /1_prepare_data/docker/code/utils/tf_record_util.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: MIT-0 3 | import os 4 | import io 5 | import json 6 | import jsonlines 7 | import random 8 | import logging 9 | from utils import dataset_util 10 | from PIL import Image 11 | import tensorflow as tf 12 | 13 | 14 | class TfRecordGenerator: 15 | def __init__(self, image_dir, manifest, label_map, output_dir): 16 | self.image_dir = image_dir 17 | self.manifest = manifest 18 | self.label_map = label_map 19 | self.output_dir = output_dir 20 | 21 | def generate_tf_records(self): 22 | with jsonlines.open(self.manifest, 'r') as reader: 23 | ground_truth_annotations = list(reader) 24 | dataset = split_dataset(ground_truth_annotations) 25 | for subset in dataset: 26 | logging.info(f'GENERATING TF RECORD FOR {subset}') 27 | writer = tf.io.TFRecordWriter(os.path.join(self.output_dir, f'{subset}.records')) 28 | for image_annotations in dataset[subset]: 29 | annotation_dict = json.loads(json.dumps(image_annotations)) 30 | tf_example = self._create_tf_example(annotation_dict['source-ref'], 31 | annotation_dict['car-70']['annotations']) # '<>' is ground truth job name. Please replace this value with your ground truth job name. 32 | writer.write(tf_example.SerializeToString()) 33 | writer.close() 34 | 35 | def _create_tf_example(self, s3_image_path, annotations): 36 | image_name = os.path.basename(s3_image_path) 37 | image_path = f'{self.image_dir}/{image_name}' 38 | im = Image.open(image_path) 39 | 40 | # READ IMAGE FILE 41 | with tf.io.gfile.GFile(image_path, 'rb') as fid: 42 | encoded_jpg = fid.read() 43 | 44 | encoded_jpg_io = io.BytesIO(encoded_jpg) 45 | encoded_jpg_io.seek(0) 46 | image = Image.open(encoded_jpg_io) 47 | image_width, image_height = image.size 48 | if image.format != 'JPEG': 49 | image = image.convert('RGB') 50 | 51 | xmins = [] 52 | ymins = [] 53 | xmaxs = [] 54 | ymaxs = [] 55 | classes = [] 56 | classes_text = [] 57 | for a in annotations: 58 | x = a['left'] 59 | y = a['top'] 60 | width = a['width'] 61 | height = a['height'] 62 | class_id = a['class_id'] 63 | xmins.append(float(x) / image_width) 64 | xmaxs.append(float(x + width) / image_width) 65 | ymins.append(float(y) / image_height) 66 | ymaxs.append(float(y + height) / image_height) 67 | class_name = self.label_map[str(class_id)] 68 | classes_text.append(class_name.encode('utf8')) 69 | classes.append(class_id) 70 | 71 | feature_dict = { 72 | 'image/height': dataset_util.int64_feature(image_height), 73 | 'image/width': dataset_util.int64_feature(image_width), 74 | 'image/filename': dataset_util.bytes_feature(bytes(image_name, 'utf-8')), 75 | 'image/source_id': dataset_util.bytes_feature(bytes(image_name.replace('.jpg', ''), 'utf-8')), 76 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 77 | 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 78 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 79 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 80 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 81 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 82 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 83 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 84 | } 85 | example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) 86 | return example 87 | 88 | 89 | def split_dataset(list_images): 90 | dataset = {} 91 | random.seed(42) 92 | random.shuffle(list_images) 93 | num_train = int(0.9 * len(list_images)) 94 | dataset['train'] = list_images[:num_train] 95 | dataset['validation'] = list_images[num_train:] 96 | logging.info(f'TRAINING EXAMPLES: %d - VALIDATION EXAMPLES: %d', len(dataset['train']), len(dataset['validation'])) 97 | return dataset 98 | 99 | -------------------------------------------------------------------------------- /1_prepare_data/docker/requirements.txt: -------------------------------------------------------------------------------- 1 | jsonlines 2 | pillow -------------------------------------------------------------------------------- /2_train_model/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/2_train_model/.gitkeep -------------------------------------------------------------------------------- /2_train_model/docker/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/2_train_model/docker/.gitkeep -------------------------------------------------------------------------------- /2_train_model/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM tensorflow/tensorflow:2.5.0-gpu 2 | 3 | ARG DEBIAN_FRONTEND=noninteractive 4 | 5 | # Install apt dependencies 6 | RUN apt-get update && apt-get install -y \ 7 | git \ 8 | gpg-agent \ 9 | python3-cairocffi \ 10 | protobuf-compiler \ 11 | python3-pil \ 12 | python3-lxml \ 13 | python3-tk \ 14 | libgl1-mesa-dev \ 15 | wget 16 | 17 | # Copy this version of of the model garden into the image 18 | COPY models/research/object_detection /home/tensorflow/models/research/object_detection 19 | 20 | # Compile protobuf configs 21 | RUN (cd /home/tensorflow/models/research/ && protoc object_detection/protos/*.proto --python_out=.) 22 | WORKDIR /home/tensorflow/models/research/ 23 | 24 | RUN cp object_detection/packages/tf2/setup.py ./ 25 | ENV PATH="/home/tensorflow/.local/bin:${PATH}" 26 | RUN python -m pip install -U pip 27 | RUN python -m pip install . 28 | 29 | ENV TF_CPP_MIN_LOG_LEVEL 3 30 | 31 | # Install SageMaker training-toolkit 32 | RUN pip3 install sagemaker-training -------------------------------------------------------------------------------- /2_train_model/docker/build_and_push.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | image=$1 4 | 5 | ACCOUNT_ID=$(aws sts get-caller-identity --query Account | tr -d '"') 6 | AWS_REGION=$(aws configure get region) 7 | TAG=$(date +%Y%m%d%H%M%S) 8 | 9 | fullname="${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${image}:${TAG}" 10 | 11 | # If the repository doesn't exist in ECR, create it. 12 | aws ecr describe-repositories --repository-names "${image}" > /dev/null 2>&1 13 | if [[ $? -ne 0 ]] 14 | then 15 | aws ecr create-repository --repository-name "${image}" > /dev/null 16 | fi 17 | 18 | # Get the login command from ECR and execute it directly 19 | $(aws ecr get-login-password --region ${AWS_REGION} | docker login --username AWS --password-stdin ${ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com) 20 | 21 | # Build the docker image locally and then push it to ECR with the full name. 22 | cd docker 23 | 24 | echo "Building image with name ${image}" 25 | docker build --no-cache -t ${image} -f Dockerfile . 26 | docker tag ${image} ${fullname} 27 | 28 | echo "Pushing image to ECR ${fullname}" 29 | docker push ${fullname} 30 | 31 | # Writing the image name to let the calling process extract it without manual intervention: 32 | echo "${fullname}" > ecr_image_fullname.txt -------------------------------------------------------------------------------- /2_train_model/source_dir/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/2_train_model/source_dir/.gitkeep -------------------------------------------------------------------------------- /2_train_model/source_dir/pipeline.config: -------------------------------------------------------------------------------- 1 | model { 2 | ssd { 3 | num_classes: 2 4 | image_resizer { 5 | keep_aspect_ratio_resizer { 6 | min_dimension: 640 7 | max_dimension: 640 8 | pad_to_max_dimension: true 9 | } 10 | } 11 | feature_extractor { 12 | type: "ssd_efficientnet-b1_bifpn_keras" 13 | conv_hyperparams { 14 | regularizer { 15 | l2_regularizer { 16 | weight: 3.9999998989515007e-05 17 | } 18 | } 19 | initializer { 20 | truncated_normal_initializer { 21 | mean: 0.0 22 | stddev: 0.029999999329447746 23 | } 24 | } 25 | activation: SWISH 26 | batch_norm { 27 | decay: 0.9900000095367432 28 | scale: true 29 | epsilon: 0.0010000000474974513 30 | } 31 | force_use_bias: true 32 | } 33 | bifpn { 34 | min_level: 3 35 | max_level: 7 36 | num_iterations: 4 37 | num_filters: 88 38 | } 39 | } 40 | box_coder { 41 | faster_rcnn_box_coder { 42 | y_scale: 1.0 43 | x_scale: 1.0 44 | height_scale: 1.0 45 | width_scale: 1.0 46 | } 47 | } 48 | matcher { 49 | argmax_matcher { 50 | matched_threshold: 0.5 51 | unmatched_threshold: 0.5 52 | ignore_thresholds: false 53 | negatives_lower_than_unmatched: true 54 | force_match_for_each_row: true 55 | use_matmul_gather: true 56 | } 57 | } 58 | similarity_calculator { 59 | iou_similarity { 60 | } 61 | } 62 | box_predictor { 63 | weight_shared_convolutional_box_predictor { 64 | conv_hyperparams { 65 | regularizer { 66 | l2_regularizer { 67 | weight: 3.9999998989515007e-05 68 | } 69 | } 70 | initializer { 71 | random_normal_initializer { 72 | mean: 0.0 73 | stddev: 0.009999999776482582 74 | } 75 | } 76 | activation: SWISH 77 | batch_norm { 78 | decay: 0.9900000095367432 79 | scale: true 80 | epsilon: 0.0010000000474974513 81 | } 82 | force_use_bias: true 83 | } 84 | depth: 88 85 | num_layers_before_predictor: 3 86 | kernel_size: 3 87 | class_prediction_bias_init: -4.599999904632568 88 | use_depthwise: true 89 | } 90 | } 91 | anchor_generator { 92 | multiscale_anchor_generator { 93 | min_level: 3 94 | max_level: 7 95 | anchor_scale: 4.0 96 | aspect_ratios: 1.0 97 | aspect_ratios: 2.0 98 | aspect_ratios: 0.5 99 | scales_per_octave: 3 100 | } 101 | } 102 | post_processing { 103 | batch_non_max_suppression { 104 | score_threshold: 9.99999993922529e-09 105 | iou_threshold: 0.5 106 | max_detections_per_class: 100 107 | max_total_detections: 100 108 | } 109 | score_converter: SOFTMAX 110 | } 111 | normalize_loss_by_num_matches: true 112 | loss { 113 | localization_loss { 114 | weighted_smooth_l1 { 115 | } 116 | } 117 | classification_loss { 118 | weighted_sigmoid_focal { 119 | gamma: 1.5 120 | alpha: 0.25 121 | } 122 | } 123 | classification_weight: 1.0 124 | localization_weight: 1.0 125 | } 126 | encode_background_as_zeros: true 127 | normalize_loc_loss_by_codesize: true 128 | inplace_batchnorm_update: true 129 | freeze_batchnorm: false 130 | add_background_class: false 131 | } 132 | } 133 | train_config { 134 | batch_size: 8 135 | data_augmentation_options { 136 | random_horizontal_flip { 137 | } 138 | } 139 | data_augmentation_options { 140 | random_scale_crop_and_pad_to_square { 141 | output_size: 640 142 | scale_min: 0.10000000149011612 143 | scale_max: 2.0 144 | } 145 | } 146 | sync_replicas: true 147 | optimizer { 148 | momentum_optimizer { 149 | learning_rate { 150 | cosine_decay_learning_rate { 151 | learning_rate_base: 0.07999999821186066 152 | total_steps: 300000 153 | warmup_learning_rate: 0.0010000000474974513 154 | warmup_steps: 2500 155 | } 156 | } 157 | momentum_optimizer_value: 0.8999999761581421 158 | } 159 | use_moving_average: false 160 | } 161 | fine_tune_checkpoint: "checkpoint/ckpt-0" 162 | num_steps: 300000 163 | startup_delay_steps: 0.0 164 | replicas_to_aggregate: 8 165 | max_number_of_boxes: 100 166 | unpad_groundtruth_tensors: false 167 | fine_tune_checkpoint_type: "detection" 168 | use_bfloat16: true 169 | fine_tune_checkpoint_version: V2 170 | } 171 | train_input_reader: { 172 | label_map_path: "/opt/ml/input/data/train/label_map.pbtxt" 173 | tf_record_input_reader { 174 | input_path: "/opt/ml/input/data/train/train.records" 175 | } 176 | } 177 | 178 | eval_config: { 179 | metrics_set: "coco_detection_metrics" 180 | use_moving_averages: false 181 | batch_size: 1; 182 | } 183 | 184 | eval_input_reader: { 185 | label_map_path: "/opt/ml/input/data/train/label_map.pbtxt" 186 | shuffle: false 187 | num_epochs: 1 188 | tf_record_input_reader { 189 | input_path: "/opt/ml/input/data/train/validation.records" 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /2_train_model/source_dir/run_training.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | MODEL_DIR=${SM_HP_MODEL_DIR} 4 | PIPELINE_CONFIG_PATH=${SM_HP_PIPELINE_CONFIG_PATH} 5 | NUM_TRAIN_STEPS=${SM_HP_NUM_TRAIN_STEPS} 6 | SAMPLE_1_OF_N_EVAL_EXAMPLES=${SM_HP_SAMPLE_1_OF_N_EVAL_EXAMPLES} 7 | 8 | if [ ${SM_NUM_GPUS} > 0 ] 9 | then 10 | NUM_WORKERS=${SM_NUM_GPUS} 11 | else 12 | NUM_WORKERS=1 13 | fi 14 | 15 | echo "===TRAINING THE MODEL==" 16 | python model_main_tf2.py \ 17 | --pipeline_config_path ${PIPELINE_CONFIG_PATH} \ 18 | --model_dir ${MODEL_DIR} \ 19 | --num_train_steps ${NUM_TRAIN_STEPS} \ 20 | --num_workers ${NUM_WORKERS} \ 21 | --sample_1_of_n_eval_examples ${SAMPLE_1_OF_N_EVAL_EXAMPLES} \ 22 | --alsologtostderr 23 | 24 | echo "==EVALUATING THE MODEL==" 25 | python model_main_tf2.py \ 26 | --pipeline_config_path ${PIPELINE_CONFIG_PATH} \ 27 | --model_dir ${MODEL_DIR} \ 28 | --checkpoint_dir ${MODEL_DIR} \ 29 | --eval_timeout 10 30 | 31 | echo "==EXPORTING THE MODEL==" 32 | python exporter_main_v2.py \ 33 | --trained_checkpoint_dir ${MODEL_DIR} \ 34 | --pipeline_config_path ${PIPELINE_CONFIG_PATH} \ 35 | --output_directory /tmp/exported 36 | 37 | mv /tmp/exported/saved_model /opt/ml/model/1 -------------------------------------------------------------------------------- /2_train_model/train_model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Training Stage: Train an object detection model using Tensorflow on SageMaker" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "## Prerequisites\n", 15 | "\n", 16 | "Please get the s3 URI from Sagemaker processing job (refer previous notebook).\n", 17 | "\n", 18 | "#### S3 URI example: \n", 19 | "s3://<<\"sagemaker_default_bucket_name\">>/data/car-gt-100/tfrecords\n", 20 | "### Note:-- Above S3 URI used as an input to sagemaker training job" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "## Setup environment" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "import os\n", 37 | "import sagemaker\n", 38 | "from sagemaker.estimator import Framework, Estimator\n", 39 | "\n", 40 | "role = sagemaker.get_execution_role()\n", 41 | "inputs = {'train': 's3://<<\"sagemaker_default_bucket_name\">>/data/car-gt-100/tfrecords/'} # define s3 training data inputs, refer previous notebook." 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "## Build and push container" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "%%bash\n", 58 | "git clone https://github.com/tensorflow/models.git docker/models\n", 59 | "# get model_main and exporter_main files from TF2 Object Detection GitHub repository\n", 60 | "cp docker/models/research/object_detection/exporter_main_v2.py source_dir \n", 61 | "cp docker/models/research/object_detection/model_main_tf2.py source_dir" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "image_name = 'car-tf2-object-detection-1'" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "scrolled": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "!sh ./docker/build_and_push.sh $image_name" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "with open (os.path.join('docker', 'ecr_image_fullname.txt'), 'r') as f:\n", 91 | " container = f.readlines()[0][:-1]\n", 92 | "\n", 93 | "print(container)" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "metadata": {}, 99 | "source": [ 100 | "## Get pre-trained model from model zoo" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "Download the base model and extract locally" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "scrolled": true 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "%%bash\n", 119 | "mkdir /tmp/checkpoint\n", 120 | "mkdir source_dir/checkpoint\n", 121 | "wget -O /tmp/efficientdet.tar.gz http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d1_coco17_tpu-32.tar.gz\n", 122 | "tar -zxvf /tmp/efficientdet.tar.gz --strip-components 2 --directory source_dir/checkpoint efficientdet_d1_coco17_tpu-32/checkpoint" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "## Create SageMaker Custom Framework and Launch Training job" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "Here we define a custom framework estimator using the Amazon SageMaker Python SDK and run training with that class, which will take care of managing these tasks." 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": {}, 143 | "outputs": [], 144 | "source": [ 145 | "class CustomFramework(Framework):\n", 146 | " def __init__(\n", 147 | " self,\n", 148 | " entry_point,\n", 149 | " source_dir=None,\n", 150 | " hyperparameters=None,\n", 151 | " distributions=None,\n", 152 | " **kwargs\n", 153 | " ):\n", 154 | " super(CustomFramework, self).__init__(entry_point, source_dir, hyperparameters, **kwargs)\n", 155 | " \n", 156 | " def _configure_distribution(self, distributions):\n", 157 | " return\n", 158 | " \n", 159 | " def create_model(\n", 160 | " self,\n", 161 | " model_server_workers=None,\n", 162 | " role=None,\n", 163 | " vpc_config_override=None,\n", 164 | " entry_point=None,\n", 165 | " source_dir=None,\n", 166 | " dependencies=None,\n", 167 | " image_uri=None,\n", 168 | " **kwargs\n", 169 | " ):\n", 170 | " return None" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": null, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "from sagemaker.debugger import TensorBoardOutputConfig\n", 180 | "\n", 181 | "hyperparameters = {\n", 182 | " \"model_dir\":\"/opt/training\", \n", 183 | " \"pipeline_config_path\": \"pipeline.config\",\n", 184 | " \"num_train_steps\": 1000, \n", 185 | " \"sample_1_of_n_eval_examples\": 1\n", 186 | "}\n", 187 | "\n", 188 | "estimator = CustomFramework(\n", 189 | " image_uri=container,\n", 190 | " role=role,\n", 191 | " entry_point='run_training.sh',\n", 192 | " source_dir='source_dir/',\n", 193 | " instance_count=1,\n", 194 | " instance_type='ml.p3.2xlarge',\n", 195 | " hyperparameters=hyperparameters,\n", 196 | " disable_profiler=True,\n", 197 | " base_job_name='car-new-tf2-object-detection'\n", 198 | ")" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": {}, 205 | "outputs": [], 206 | "source": [ 207 | "estimator.fit(inputs)" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "\n" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [] 225 | } 226 | ], 227 | "metadata": { 228 | "kernelspec": { 229 | "display_name": "conda_tensorflow_p36", 230 | "language": "python", 231 | "name": "conda_tensorflow_p36" 232 | }, 233 | "language_info": { 234 | "codemirror_mode": { 235 | "name": "ipython", 236 | "version": 3 237 | }, 238 | "file_extension": ".py", 239 | "mimetype": "text/x-python", 240 | "name": "python", 241 | "nbconvert_exporter": "python", 242 | "pygments_lexer": "ipython3", 243 | "version": "3.6.13" 244 | } 245 | }, 246 | "nbformat": 4, 247 | "nbformat_minor": 4 248 | } 249 | -------------------------------------------------------------------------------- /3_predict/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/3_predict/.gitkeep -------------------------------------------------------------------------------- /3_predict/visualization_utils.py: -------------------------------------------------------------------------------- 1 | # Licensed under the Apache License, Version 2.0 (the "License"); 2 | # you may not use this file except in compliance with the License. 3 | # You may obtain a copy of the License at 4 | # 5 | # http://www.apache.org/licenses/LICENSE-2.0 6 | # 7 | # Unless required by applicable law or agreed to in writing, software 8 | # distributed under the License is distributed on an "AS IS" BASIS, 9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 | # See the License for the specific language governing permissions and 11 | # limitations under the License. 12 | # ============================================================================== 13 | 14 | """A set of functions that are used for visualization. 15 | 16 | These functions often receive an image, perform some visualization on the image. 17 | The functions do not return a value, instead they modify the image itself. 18 | 19 | """ 20 | from __future__ import absolute_import 21 | from __future__ import division 22 | from __future__ import print_function 23 | 24 | import abc 25 | import collections 26 | # Set headless-friendly backend. 27 | import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements 28 | import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top 29 | import numpy as np 30 | import PIL.Image as Image 31 | import PIL.ImageColor as ImageColor 32 | import PIL.ImageDraw as ImageDraw 33 | import PIL.ImageFont as ImageFont 34 | import six 35 | from six.moves import range 36 | from six.moves import zip 37 | import tensorflow as tf 38 | 39 | _TITLE_LEFT_MARGIN = 10 40 | _TITLE_TOP_MARGIN = 10 41 | STANDARD_COLORS = [ 42 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 43 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 44 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 45 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 46 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 47 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 48 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 49 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 50 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 51 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 52 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 53 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 54 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 55 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 56 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 57 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 58 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 59 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 60 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 61 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 62 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 63 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 64 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 65 | ] 66 | 67 | 68 | def _get_multiplier_for_color_randomness(): 69 | """Returns a multiplier to get semi-random colors from successive indices. 70 | 71 | This function computes a prime number, p, in the range [2, 17] that: 72 | - is closest to len(STANDARD_COLORS) / 10 73 | - does not divide len(STANDARD_COLORS) 74 | 75 | If no prime numbers in that range satisfy the constraints, p is returned as 1. 76 | 77 | Once p is established, it can be used as a multiplier to select 78 | non-consecutive colors from STANDARD_COLORS: 79 | colors = [(p * i) % len(STANDARD_COLORS) for i in range(20)] 80 | """ 81 | num_colors = len(STANDARD_COLORS) 82 | prime_candidates = [5, 7, 11, 13, 17] 83 | 84 | # Remove all prime candidates that divide the number of colors. 85 | prime_candidates = [p for p in prime_candidates if num_colors % p] 86 | if not prime_candidates: 87 | return 1 88 | 89 | # Return the closest prime number to num_colors / 10. 90 | abs_distance = [np.abs(num_colors / 10. - p) for p in prime_candidates] 91 | num_candidates = len(abs_distance) 92 | inds = [i for _, i in sorted(zip(abs_distance, range(num_candidates)))] 93 | return prime_candidates[inds[0]] 94 | 95 | 96 | def save_image_array_as_png(image, output_path): 97 | """Saves an image (represented as a numpy array) to PNG. 98 | 99 | Args: 100 | image: a numpy array with shape [height, width, 3]. 101 | output_path: path to which image should be written. 102 | """ 103 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 104 | with tf.gfile.Open(output_path, 'w') as fid: 105 | image_pil.save(fid, 'PNG') 106 | 107 | 108 | def encode_image_array_as_png_str(image): 109 | """Encodes a numpy array into a PNG string. 110 | 111 | Args: 112 | image: a numpy array with shape [height, width, 3]. 113 | 114 | Returns: 115 | PNG encoded image string. 116 | """ 117 | image_pil = Image.fromarray(np.uint8(image)) 118 | output = six.BytesIO() 119 | image_pil.save(output, format='PNG') 120 | png_string = output.getvalue() 121 | output.close() 122 | return png_string 123 | 124 | 125 | def draw_bounding_box_on_image_array(image, 126 | ymin, 127 | xmin, 128 | ymax, 129 | xmax, 130 | color='red', 131 | thickness=4, 132 | display_str_list=(), 133 | use_normalized_coordinates=True): 134 | """Adds a bounding box to an image (numpy array). 135 | 136 | Bounding box coordinates can be specified in either absolute (pixel) or 137 | normalized coordinates by setting the use_normalized_coordinates argument. 138 | 139 | Args: 140 | image: a numpy array with shape [height, width, 3]. 141 | ymin: ymin of bounding box. 142 | xmin: xmin of bounding box. 143 | ymax: ymax of bounding box. 144 | xmax: xmax of bounding box. 145 | color: color to draw bounding box. Default is red. 146 | thickness: line thickness. Default value is 4. 147 | display_str_list: list of strings to display in box 148 | (each to be shown on its own line). 149 | use_normalized_coordinates: If True (default), treat coordinates 150 | ymin, xmin, ymax, xmax as relative to the image. Otherwise treat 151 | coordinates as absolute. 152 | """ 153 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 154 | draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, 155 | thickness, display_str_list, 156 | use_normalized_coordinates) 157 | np.copyto(image, np.array(image_pil)) 158 | 159 | 160 | def draw_bounding_box_on_image(image, 161 | ymin, 162 | xmin, 163 | ymax, 164 | xmax, 165 | color='red', 166 | thickness=4, 167 | display_str_list=(), 168 | use_normalized_coordinates=True): 169 | """Adds a bounding box to an image. 170 | 171 | Bounding box coordinates can be specified in either absolute (pixel) or 172 | normalized coordinates by setting the use_normalized_coordinates argument. 173 | 174 | Each string in display_str_list is displayed on a separate line above the 175 | bounding box in black text on a rectangle filled with the input 'color'. 176 | If the top of the bounding box extends to the edge of the image, the strings 177 | are displayed below the bounding box. 178 | 179 | Args: 180 | image: a PIL.Image object. 181 | ymin: ymin of bounding box. 182 | xmin: xmin of bounding box. 183 | ymax: ymax of bounding box. 184 | xmax: xmax of bounding box. 185 | color: color to draw bounding box. Default is red. 186 | thickness: line thickness. Default value is 4. 187 | display_str_list: list of strings to display in box 188 | (each to be shown on its own line). 189 | use_normalized_coordinates: If True (default), treat coordinates 190 | ymin, xmin, ymax, xmax as relative to the image. Otherwise treat 191 | coordinates as absolute. 192 | """ 193 | draw = ImageDraw.Draw(image) 194 | im_width, im_height = image.size 195 | if use_normalized_coordinates: 196 | (left, right, top, bottom) = (xmin * im_width, xmax * im_width, 197 | ymin * im_height, ymax * im_height) 198 | else: 199 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 200 | if thickness > 0: 201 | draw.line([(left, top), (left, bottom), (right, bottom), (right, top), 202 | (left, top)], 203 | width=thickness, 204 | fill=color) 205 | try: 206 | font = ImageFont.truetype('arial.ttf', 24) 207 | except IOError: 208 | font = ImageFont.load_default() 209 | 210 | # If the total height of the display strings added to the top of the bounding 211 | # box exceeds the top of the image, stack the strings below the bounding box 212 | # instead of above. 213 | display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] 214 | # Each display_str has a top and bottom margin of 0.05x. 215 | total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) 216 | 217 | if top > total_display_str_height: 218 | text_bottom = top 219 | else: 220 | text_bottom = bottom + total_display_str_height 221 | # Reverse list and print from bottom to top. 222 | for display_str in display_str_list[::-1]: 223 | text_width, text_height = font.getsize(display_str) 224 | margin = np.ceil(0.05 * text_height) 225 | draw.rectangle( 226 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 227 | text_bottom)], 228 | fill=color) 229 | draw.text( 230 | (left + margin, text_bottom - text_height - margin), 231 | display_str, 232 | fill='black', 233 | font=font) 234 | text_bottom -= text_height - 2 * margin 235 | 236 | 237 | def draw_bounding_boxes_on_image_array(image, 238 | boxes, 239 | color='red', 240 | thickness=4, 241 | display_str_list_list=()): 242 | """Draws bounding boxes on image (numpy array). 243 | 244 | Args: 245 | image: a numpy array object. 246 | boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). 247 | The coordinates are in normalized format between [0, 1]. 248 | color: color to draw bounding box. Default is red. 249 | thickness: line thickness. Default value is 4. 250 | display_str_list_list: list of list of strings. 251 | a list of strings for each bounding box. 252 | The reason to pass a list of strings for a 253 | bounding box is that it might contain 254 | multiple labels. 255 | 256 | Raises: 257 | ValueError: if boxes is not a [N, 4] array 258 | """ 259 | image_pil = Image.fromarray(image) 260 | draw_bounding_boxes_on_image(image_pil, boxes, color, thickness, 261 | display_str_list_list) 262 | np.copyto(image, np.array(image_pil)) 263 | 264 | 265 | def draw_bounding_boxes_on_image(image, 266 | boxes, 267 | color='red', 268 | thickness=4, 269 | display_str_list_list=()): 270 | """Draws bounding boxes on image. 271 | 272 | Args: 273 | image: a PIL.Image object. 274 | boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). 275 | The coordinates are in normalized format between [0, 1]. 276 | color: color to draw bounding box. Default is red. 277 | thickness: line thickness. Default value is 4. 278 | display_str_list_list: list of list of strings. 279 | a list of strings for each bounding box. 280 | The reason to pass a list of strings for a 281 | bounding box is that it might contain 282 | multiple labels. 283 | 284 | Raises: 285 | ValueError: if boxes is not a [N, 4] array 286 | """ 287 | boxes_shape = boxes.shape 288 | if not boxes_shape: 289 | return 290 | if len(boxes_shape) != 2 or boxes_shape[1] != 4: 291 | raise ValueError('Input must be of size [N, 4]') 292 | for i in range(boxes_shape[0]): 293 | display_str_list = () 294 | if display_str_list_list: 295 | display_str_list = display_str_list_list[i] 296 | draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2], 297 | boxes[i, 3], color, thickness, display_str_list) 298 | 299 | 300 | def create_visualization_fn(category_index, 301 | include_masks=False, 302 | include_keypoints=False, 303 | include_keypoint_scores=False, 304 | include_track_ids=False, 305 | **kwargs): 306 | """Constructs a visualization function that can be wrapped in a py_func. 307 | 308 | py_funcs only accept positional arguments. This function returns a suitable 309 | function with the correct positional argument mapping. The positional 310 | arguments in order are: 311 | 0: image 312 | 1: boxes 313 | 2: classes 314 | 3: scores 315 | [4]: masks (optional) 316 | [4-5]: keypoints (optional) 317 | [4-6]: keypoint_scores (optional) 318 | [4-7]: track_ids (optional) 319 | 320 | -- Example 1 -- 321 | vis_only_masks_fn = create_visualization_fn(category_index, 322 | include_masks=True, include_keypoints=False, include_track_ids=False, 323 | **kwargs) 324 | image = tf.py_func(vis_only_masks_fn, 325 | inp=[image, boxes, classes, scores, masks], 326 | Tout=tf.uint8) 327 | 328 | -- Example 2 -- 329 | vis_masks_and_track_ids_fn = create_visualization_fn(category_index, 330 | include_masks=True, include_keypoints=False, include_track_ids=True, 331 | **kwargs) 332 | image = tf.py_func(vis_masks_and_track_ids_fn, 333 | inp=[image, boxes, classes, scores, masks, track_ids], 334 | Tout=tf.uint8) 335 | 336 | Args: 337 | category_index: a dict that maps integer ids to category dicts. e.g. 338 | {1: {1: 'dog'}, 2: {2: 'cat'}, ...} 339 | include_masks: Whether masks should be expected as a positional argument in 340 | the returned function. 341 | include_keypoints: Whether keypoints should be expected as a positional 342 | argument in the returned function. 343 | include_keypoint_scores: Whether keypoint scores should be expected as a 344 | positional argument in the returned function. 345 | include_track_ids: Whether track ids should be expected as a positional 346 | argument in the returned function. 347 | **kwargs: Additional kwargs that will be passed to 348 | visualize_boxes_and_labels_on_image_array. 349 | 350 | Returns: 351 | Returns a function that only takes tensors as positional arguments. 352 | """ 353 | 354 | def visualization_py_func_fn(*args): 355 | """Visualization function that can be wrapped in a tf.py_func. 356 | 357 | Args: 358 | *args: First 4 positional arguments must be: 359 | image - uint8 numpy array with shape (img_height, img_width, 3). 360 | boxes - a numpy array of shape [N, 4]. 361 | classes - a numpy array of shape [N]. 362 | scores - a numpy array of shape [N] or None. 363 | -- Optional positional arguments -- 364 | instance_masks - a numpy array of shape [N, image_height, image_width]. 365 | keypoints - a numpy array of shape [N, num_keypoints, 2]. 366 | keypoint_scores - a numpy array of shape [N, num_keypoints]. 367 | track_ids - a numpy array of shape [N] with unique track ids. 368 | 369 | Returns: 370 | uint8 numpy array with shape (img_height, img_width, 3) with overlaid 371 | boxes. 372 | """ 373 | image = args[0] 374 | boxes = args[1] 375 | classes = args[2] 376 | scores = args[3] 377 | masks = keypoints = keypoint_scores = track_ids = None 378 | pos_arg_ptr = 4 # Positional argument for first optional tensor (masks). 379 | if include_masks: 380 | masks = args[pos_arg_ptr] 381 | pos_arg_ptr += 1 382 | if include_keypoints: 383 | keypoints = args[pos_arg_ptr] 384 | pos_arg_ptr += 1 385 | if include_keypoint_scores: 386 | keypoint_scores = args[pos_arg_ptr] 387 | pos_arg_ptr += 1 388 | if include_track_ids: 389 | track_ids = args[pos_arg_ptr] 390 | 391 | return visualize_boxes_and_labels_on_image_array( 392 | image, 393 | boxes, 394 | classes, 395 | scores, 396 | category_index=category_index, 397 | instance_masks=masks, 398 | keypoints=keypoints, 399 | keypoint_scores=keypoint_scores, 400 | track_ids=track_ids, 401 | **kwargs) 402 | return visualization_py_func_fn 403 | 404 | 405 | def draw_heatmaps_on_image(image, heatmaps): 406 | """Draws heatmaps on an image. 407 | 408 | The heatmaps are handled channel by channel and different colors are used to 409 | paint different heatmap channels. 410 | 411 | Args: 412 | image: a PIL.Image object. 413 | heatmaps: a numpy array with shape [image_height, image_width, channel]. 414 | Note that the image_height and image_width should match the size of input 415 | image. 416 | """ 417 | draw = ImageDraw.Draw(image) 418 | channel = heatmaps.shape[2] 419 | for c in range(channel): 420 | heatmap = heatmaps[:, :, c] * 255 421 | heatmap = heatmap.astype('uint8') 422 | bitmap = Image.fromarray(heatmap, 'L') 423 | bitmap.convert('1') 424 | draw.bitmap( 425 | xy=[(0, 0)], 426 | bitmap=bitmap, 427 | fill=STANDARD_COLORS[c]) 428 | 429 | 430 | def draw_heatmaps_on_image_array(image, heatmaps): 431 | """Overlays heatmaps to an image (numpy array). 432 | 433 | The function overlays the heatmaps on top of image. The heatmap values will be 434 | painted with different colors depending on the channels. Similar to 435 | "draw_heatmaps_on_image_array" function except the inputs are numpy arrays. 436 | 437 | Args: 438 | image: a numpy array with shape [height, width, 3]. 439 | heatmaps: a numpy array with shape [height, width, channel]. 440 | 441 | Returns: 442 | An uint8 numpy array representing the input image painted with heatmap 443 | colors. 444 | """ 445 | if not isinstance(image, np.ndarray): 446 | image = image.numpy() 447 | if not isinstance(heatmaps, np.ndarray): 448 | heatmaps = heatmaps.numpy() 449 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 450 | draw_heatmaps_on_image(image_pil, heatmaps) 451 | return np.array(image_pil) 452 | 453 | 454 | def draw_heatmaps_on_image_tensors(images, 455 | heatmaps, 456 | apply_sigmoid=False): 457 | """Draws heatmaps on batch of image tensors. 458 | 459 | Args: 460 | images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional 461 | channels will be ignored. If C = 1, then we convert the images to RGB 462 | images. 463 | heatmaps: [N, h, w, channel] float32 tensor of heatmaps. Note that the 464 | heatmaps will be resized to match the input image size before overlaying 465 | the heatmaps with input images. Theoretically the heatmap height width 466 | should have the same aspect ratio as the input image to avoid potential 467 | misalignment introduced by the image resize. 468 | apply_sigmoid: Whether to apply a sigmoid layer on top of the heatmaps. If 469 | the heatmaps come directly from the prediction logits, then we should 470 | apply the sigmoid layer to make sure the values are in between [0.0, 1.0]. 471 | 472 | Returns: 473 | 4D image tensor of type uint8, with heatmaps overlaid on top. 474 | """ 475 | # Additional channels are being ignored. 476 | if images.shape[3] > 3: 477 | images = images[:, :, :, 0:3] 478 | elif images.shape[3] == 1: 479 | images = tf.image.grayscale_to_rgb(images) 480 | 481 | _, height, width, _ = shape_utils.combined_static_and_dynamic_shape(images) 482 | if apply_sigmoid: 483 | heatmaps = tf.math.sigmoid(heatmaps) 484 | resized_heatmaps = tf.image.resize(heatmaps, size=[height, width]) 485 | 486 | elems = [images, resized_heatmaps] 487 | 488 | def draw_heatmaps(image_and_heatmaps): 489 | """Draws heatmaps on image.""" 490 | image_with_heatmaps = tf.py_function( 491 | draw_heatmaps_on_image_array, 492 | image_and_heatmaps, 493 | tf.uint8) 494 | return image_with_heatmaps 495 | images = tf.map_fn(draw_heatmaps, elems, dtype=tf.uint8, back_prop=False) 496 | return images 497 | 498 | 499 | def _resize_original_image(image, image_shape): 500 | image = tf.expand_dims(image, 0) 501 | image = tf.image.resize_images( 502 | image, 503 | image_shape, 504 | method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, 505 | align_corners=True) 506 | return tf.cast(tf.squeeze(image, 0), tf.uint8) 507 | 508 | 509 | def draw_bounding_boxes_on_image_tensors(images, 510 | boxes, 511 | classes, 512 | scores, 513 | category_index, 514 | original_image_spatial_shape=None, 515 | true_image_shape=None, 516 | instance_masks=None, 517 | keypoints=None, 518 | keypoint_scores=None, 519 | keypoint_edges=None, 520 | track_ids=None, 521 | max_boxes_to_draw=20, 522 | min_score_thresh=0.2, 523 | use_normalized_coordinates=True): 524 | """Draws bounding boxes, masks, and keypoints on batch of image tensors. 525 | 526 | Args: 527 | images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional 528 | channels will be ignored. If C = 1, then we convert the images to RGB 529 | images. 530 | boxes: [N, max_detections, 4] float32 tensor of detection boxes. 531 | classes: [N, max_detections] int tensor of detection classes. Note that 532 | classes are 1-indexed. 533 | scores: [N, max_detections] float32 tensor of detection scores. 534 | category_index: a dict that maps integer ids to category dicts. e.g. 535 | {1: {1: 'dog'}, 2: {2: 'cat'}, ...} 536 | original_image_spatial_shape: [N, 2] tensor containing the spatial size of 537 | the original image. 538 | true_image_shape: [N, 3] tensor containing the spatial size of unpadded 539 | original_image. 540 | instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with 541 | instance masks. 542 | keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2] 543 | with keypoints. 544 | keypoint_scores: A 3D float32 tensor of shape [N, max_detection, 545 | num_keypoints] with keypoint scores. 546 | keypoint_edges: A list of tuples with keypoint indices that specify which 547 | keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws 548 | edges from keypoint 0 to 1 and from keypoint 2 to 4. 549 | track_ids: [N, max_detections] int32 tensor of unique tracks ids (i.e. 550 | instance ids for each object). If provided, the color-coding of boxes is 551 | dictated by these ids, and not classes. 552 | max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20. 553 | min_score_thresh: Minimum score threshold for visualization. Default 0.2. 554 | use_normalized_coordinates: Whether to assume boxes and kepoints are in 555 | normalized coordinates (as opposed to absolute coordiantes). 556 | Default is True. 557 | 558 | Returns: 559 | 4D image tensor of type uint8, with boxes drawn on top. 560 | """ 561 | # Additional channels are being ignored. 562 | if images.shape[3] > 3: 563 | images = images[:, :, :, 0:3] 564 | elif images.shape[3] == 1: 565 | images = tf.image.grayscale_to_rgb(images) 566 | visualization_keyword_args = { 567 | 'use_normalized_coordinates': use_normalized_coordinates, 568 | 'max_boxes_to_draw': max_boxes_to_draw, 569 | 'min_score_thresh': min_score_thresh, 570 | 'agnostic_mode': False, 571 | 'line_thickness': 4, 572 | 'keypoint_edges': keypoint_edges 573 | } 574 | if true_image_shape is None: 575 | true_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 3]) 576 | else: 577 | true_shapes = true_image_shape 578 | if original_image_spatial_shape is None: 579 | original_shapes = tf.constant(-1, shape=[images.shape.as_list()[0], 2]) 580 | else: 581 | original_shapes = original_image_spatial_shape 582 | 583 | visualize_boxes_fn = create_visualization_fn( 584 | category_index, 585 | include_masks=instance_masks is not None, 586 | include_keypoints=keypoints is not None, 587 | include_keypoint_scores=keypoint_scores is not None, 588 | include_track_ids=track_ids is not None, 589 | **visualization_keyword_args) 590 | 591 | elems = [true_shapes, original_shapes, images, boxes, classes, scores] 592 | if instance_masks is not None: 593 | elems.append(instance_masks) 594 | if keypoints is not None: 595 | elems.append(keypoints) 596 | if keypoint_scores is not None: 597 | elems.append(keypoint_scores) 598 | if track_ids is not None: 599 | elems.append(track_ids) 600 | 601 | def draw_boxes(image_and_detections): 602 | """Draws boxes on image.""" 603 | true_shape = image_and_detections[0] 604 | original_shape = image_and_detections[1] 605 | if true_image_shape is not None: 606 | image = shape_utils.pad_or_clip_nd(image_and_detections[2], 607 | [true_shape[0], true_shape[1], 3]) 608 | if original_image_spatial_shape is not None: 609 | image_and_detections[2] = _resize_original_image(image, original_shape) 610 | 611 | image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections[2:], 612 | tf.uint8) 613 | return image_with_boxes 614 | 615 | images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False) 616 | return images 617 | 618 | 619 | def draw_side_by_side_evaluation_image(eval_dict, 620 | category_index, 621 | max_boxes_to_draw=20, 622 | min_score_thresh=0.2, 623 | use_normalized_coordinates=True, 624 | keypoint_edges=None): 625 | """Creates a side-by-side image with detections and groundtruth. 626 | 627 | Bounding boxes (and instance masks, if available) are visualized on both 628 | subimages. 629 | 630 | Args: 631 | eval_dict: The evaluation dictionary returned by 632 | eval_util.result_dict_for_batched_example() or 633 | eval_util.result_dict_for_single_example(). 634 | category_index: A category index (dictionary) produced from a labelmap. 635 | max_boxes_to_draw: The maximum number of boxes to draw for detections. 636 | min_score_thresh: The minimum score threshold for showing detections. 637 | use_normalized_coordinates: Whether to assume boxes and keypoints are in 638 | normalized coordinates (as opposed to absolute coordinates). 639 | Default is True. 640 | keypoint_edges: A list of tuples with keypoint indices that specify which 641 | keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws 642 | edges from keypoint 0 to 1 and from keypoint 2 to 4. 643 | 644 | Returns: 645 | A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left 646 | corresponds to detections, while the subimage on the right corresponds to 647 | groundtruth. 648 | """ 649 | detection_fields = fields.DetectionResultFields() 650 | input_data_fields = fields.InputDataFields() 651 | 652 | images_with_detections_list = [] 653 | 654 | # Add the batch dimension if the eval_dict is for single example. 655 | if len(eval_dict[detection_fields.detection_classes].shape) == 1: 656 | for key in eval_dict: 657 | if (key != input_data_fields.original_image and 658 | key != input_data_fields.image_additional_channels): 659 | eval_dict[key] = tf.expand_dims(eval_dict[key], 0) 660 | 661 | for indx in range(eval_dict[input_data_fields.original_image].shape[0]): 662 | instance_masks = None 663 | if detection_fields.detection_masks in eval_dict: 664 | instance_masks = tf.cast( 665 | tf.expand_dims( 666 | eval_dict[detection_fields.detection_masks][indx], axis=0), 667 | tf.uint8) 668 | keypoints = None 669 | keypoint_scores = None 670 | if detection_fields.detection_keypoints in eval_dict: 671 | keypoints = tf.expand_dims( 672 | eval_dict[detection_fields.detection_keypoints][indx], axis=0) 673 | if detection_fields.detection_keypoint_scores in eval_dict: 674 | keypoint_scores = tf.expand_dims( 675 | eval_dict[detection_fields.detection_keypoint_scores][indx], axis=0) 676 | else: 677 | keypoint_scores = tf.cast(keypoint_ops.set_keypoint_visibilities( 678 | keypoints), dtype=tf.float32) 679 | 680 | groundtruth_instance_masks = None 681 | if input_data_fields.groundtruth_instance_masks in eval_dict: 682 | groundtruth_instance_masks = tf.cast( 683 | tf.expand_dims( 684 | eval_dict[input_data_fields.groundtruth_instance_masks][indx], 685 | axis=0), tf.uint8) 686 | groundtruth_keypoints = None 687 | groundtruth_keypoint_scores = None 688 | gt_kpt_vis_fld = input_data_fields.groundtruth_keypoint_visibilities 689 | if input_data_fields.groundtruth_keypoints in eval_dict: 690 | groundtruth_keypoints = tf.expand_dims( 691 | eval_dict[input_data_fields.groundtruth_keypoints][indx], axis=0) 692 | if gt_kpt_vis_fld in eval_dict: 693 | groundtruth_keypoint_scores = tf.expand_dims( 694 | tf.cast(eval_dict[gt_kpt_vis_fld][indx], dtype=tf.float32), axis=0) 695 | else: 696 | groundtruth_keypoint_scores = tf.cast( 697 | keypoint_ops.set_keypoint_visibilities( 698 | groundtruth_keypoints), dtype=tf.float32) 699 | 700 | images_with_detections = draw_bounding_boxes_on_image_tensors( 701 | tf.expand_dims( 702 | eval_dict[input_data_fields.original_image][indx], axis=0), 703 | tf.expand_dims( 704 | eval_dict[detection_fields.detection_boxes][indx], axis=0), 705 | tf.expand_dims( 706 | eval_dict[detection_fields.detection_classes][indx], axis=0), 707 | tf.expand_dims( 708 | eval_dict[detection_fields.detection_scores][indx], axis=0), 709 | category_index, 710 | original_image_spatial_shape=tf.expand_dims( 711 | eval_dict[input_data_fields.original_image_spatial_shape][indx], 712 | axis=0), 713 | true_image_shape=tf.expand_dims( 714 | eval_dict[input_data_fields.true_image_shape][indx], axis=0), 715 | instance_masks=instance_masks, 716 | keypoints=keypoints, 717 | keypoint_scores=keypoint_scores, 718 | keypoint_edges=keypoint_edges, 719 | max_boxes_to_draw=max_boxes_to_draw, 720 | min_score_thresh=min_score_thresh, 721 | use_normalized_coordinates=use_normalized_coordinates) 722 | images_with_groundtruth = draw_bounding_boxes_on_image_tensors( 723 | tf.expand_dims( 724 | eval_dict[input_data_fields.original_image][indx], axis=0), 725 | tf.expand_dims( 726 | eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0), 727 | tf.expand_dims( 728 | eval_dict[input_data_fields.groundtruth_classes][indx], axis=0), 729 | tf.expand_dims( 730 | tf.ones_like( 731 | eval_dict[input_data_fields.groundtruth_classes][indx], 732 | dtype=tf.float32), 733 | axis=0), 734 | category_index, 735 | original_image_spatial_shape=tf.expand_dims( 736 | eval_dict[input_data_fields.original_image_spatial_shape][indx], 737 | axis=0), 738 | true_image_shape=tf.expand_dims( 739 | eval_dict[input_data_fields.true_image_shape][indx], axis=0), 740 | instance_masks=groundtruth_instance_masks, 741 | keypoints=groundtruth_keypoints, 742 | keypoint_scores=groundtruth_keypoint_scores, 743 | keypoint_edges=keypoint_edges, 744 | max_boxes_to_draw=None, 745 | min_score_thresh=0.0, 746 | use_normalized_coordinates=use_normalized_coordinates) 747 | images_to_visualize = tf.concat([images_with_detections, 748 | images_with_groundtruth], axis=2) 749 | 750 | if input_data_fields.image_additional_channels in eval_dict: 751 | images_with_additional_channels_groundtruth = ( 752 | draw_bounding_boxes_on_image_tensors( 753 | tf.expand_dims( 754 | eval_dict[input_data_fields.image_additional_channels][indx], 755 | axis=0), 756 | tf.expand_dims( 757 | eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0), 758 | tf.expand_dims( 759 | eval_dict[input_data_fields.groundtruth_classes][indx], 760 | axis=0), 761 | tf.expand_dims( 762 | tf.ones_like( 763 | eval_dict[input_data_fields.groundtruth_classes][indx], 764 | dtype=tf.float32), 765 | axis=0), 766 | category_index, 767 | original_image_spatial_shape=tf.expand_dims( 768 | eval_dict[input_data_fields.original_image_spatial_shape] 769 | [indx], 770 | axis=0), 771 | true_image_shape=tf.expand_dims( 772 | eval_dict[input_data_fields.true_image_shape][indx], axis=0), 773 | instance_masks=groundtruth_instance_masks, 774 | keypoints=None, 775 | keypoint_edges=None, 776 | max_boxes_to_draw=None, 777 | min_score_thresh=0.0, 778 | use_normalized_coordinates=use_normalized_coordinates)) 779 | images_to_visualize = tf.concat( 780 | [images_to_visualize, images_with_additional_channels_groundtruth], 781 | axis=2) 782 | images_with_detections_list.append(images_to_visualize) 783 | 784 | return images_with_detections_list 785 | 786 | 787 | def draw_densepose_visualizations(eval_dict, 788 | max_boxes_to_draw=20, 789 | min_score_thresh=0.2, 790 | num_parts=24, 791 | dp_coord_to_visualize=0): 792 | """Draws DensePose visualizations. 793 | 794 | Args: 795 | eval_dict: The evaluation dictionary returned by 796 | eval_util.result_dict_for_batched_example(). 797 | max_boxes_to_draw: The maximum number of boxes to draw for detections. 798 | min_score_thresh: The minimum score threshold for showing detections. 799 | num_parts: The number of different densepose parts. 800 | dp_coord_to_visualize: Whether to visualize v-coordinates (0) or 801 | u-coordinates (0) overlaid on the person masks. 802 | 803 | Returns: 804 | A list of [1, H, W, C] uint8 tensor, each element corresponding to an image 805 | in the batch. 806 | 807 | Raises: 808 | ValueError: If `dp_coord_to_visualize` is not 0 or 1. 809 | """ 810 | if dp_coord_to_visualize not in (0, 1): 811 | raise ValueError('`dp_coord_to_visualize` must be either 0 for v ' 812 | 'coordinates), or 1 for u coordinates, but instead got ' 813 | '{}'.format(dp_coord_to_visualize)) 814 | detection_fields = fields.DetectionResultFields() 815 | input_data_fields = fields.InputDataFields() 816 | 817 | if detection_fields.detection_masks not in eval_dict: 818 | raise ValueError('Expected `detection_masks` in `eval_dict`.') 819 | if detection_fields.detection_surface_coords not in eval_dict: 820 | raise ValueError('Expected `detection_surface_coords` in `eval_dict`.') 821 | 822 | images_with_detections_list = [] 823 | for indx in range(eval_dict[input_data_fields.original_image].shape[0]): 824 | # Note that detection masks have already been resized to the original image 825 | # shapes, but `original_image` has not. 826 | # TODO(ronnyvotel): Consider resizing `original_image` in 827 | # eval_util.result_dict_for_batched_example(). 828 | true_shape = eval_dict[input_data_fields.true_image_shape][indx] 829 | original_shape = eval_dict[ 830 | input_data_fields.original_image_spatial_shape][indx] 831 | image = eval_dict[input_data_fields.original_image][indx] 832 | image = shape_utils.pad_or_clip_nd(image, [true_shape[0], true_shape[1], 3]) 833 | image = _resize_original_image(image, original_shape) 834 | 835 | scores = eval_dict[detection_fields.detection_scores][indx] 836 | detection_masks = eval_dict[detection_fields.detection_masks][indx] 837 | surface_coords = eval_dict[detection_fields.detection_surface_coords][indx] 838 | 839 | def draw_densepose_py_func(image, detection_masks, surface_coords, scores): 840 | """Overlays part masks and surface coords on original images.""" 841 | surface_coord_image = np.copy(image) 842 | for i, (score, surface_coord, mask) in enumerate( 843 | zip(scores, surface_coords, detection_masks)): 844 | if i == max_boxes_to_draw: 845 | break 846 | if score > min_score_thresh: 847 | draw_part_mask_on_image_array(image, mask, num_parts=num_parts) 848 | draw_float_channel_on_image_array( 849 | surface_coord_image, surface_coord[:, :, dp_coord_to_visualize], 850 | mask) 851 | return np.concatenate([image, surface_coord_image], axis=1) 852 | 853 | image_with_densepose = tf.py_func( 854 | draw_densepose_py_func, 855 | [image, detection_masks, surface_coords, scores], 856 | tf.uint8) 857 | images_with_detections_list.append( 858 | image_with_densepose[tf.newaxis, :, :, :]) 859 | return images_with_detections_list 860 | 861 | 862 | def draw_keypoints_on_image_array(image, 863 | keypoints, 864 | keypoint_scores=None, 865 | min_score_thresh=0.5, 866 | color='red', 867 | radius=2, 868 | use_normalized_coordinates=True, 869 | keypoint_edges=None, 870 | keypoint_edge_color='green', 871 | keypoint_edge_width=2): 872 | """Draws keypoints on an image (numpy array). 873 | 874 | Args: 875 | image: a numpy array with shape [height, width, 3]. 876 | keypoints: a numpy array with shape [num_keypoints, 2]. 877 | keypoint_scores: a numpy array with shape [num_keypoints]. If provided, only 878 | those keypoints with a score above score_threshold will be visualized. 879 | min_score_thresh: A scalar indicating the minimum keypoint score required 880 | for a keypoint to be visualized. Note that keypoint_scores must be 881 | provided for this threshold to take effect. 882 | color: color to draw the keypoints with. Default is red. 883 | radius: keypoint radius. Default value is 2. 884 | use_normalized_coordinates: if True (default), treat keypoint values as 885 | relative to the image. Otherwise treat them as absolute. 886 | keypoint_edges: A list of tuples with keypoint indices that specify which 887 | keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws 888 | edges from keypoint 0 to 1 and from keypoint 2 to 4. 889 | keypoint_edge_color: color to draw the keypoint edges with. Default is red. 890 | keypoint_edge_width: width of the edges drawn between keypoints. Default 891 | value is 2. 892 | """ 893 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 894 | draw_keypoints_on_image(image_pil, 895 | keypoints, 896 | keypoint_scores=keypoint_scores, 897 | min_score_thresh=min_score_thresh, 898 | color=color, 899 | radius=radius, 900 | use_normalized_coordinates=use_normalized_coordinates, 901 | keypoint_edges=keypoint_edges, 902 | keypoint_edge_color=keypoint_edge_color, 903 | keypoint_edge_width=keypoint_edge_width) 904 | np.copyto(image, np.array(image_pil)) 905 | 906 | 907 | def draw_keypoints_on_image(image, 908 | keypoints, 909 | keypoint_scores=None, 910 | min_score_thresh=0.5, 911 | color='red', 912 | radius=2, 913 | use_normalized_coordinates=True, 914 | keypoint_edges=None, 915 | keypoint_edge_color='green', 916 | keypoint_edge_width=2): 917 | """Draws keypoints on an image. 918 | 919 | Args: 920 | image: a PIL.Image object. 921 | keypoints: a numpy array with shape [num_keypoints, 2]. 922 | keypoint_scores: a numpy array with shape [num_keypoints]. 923 | min_score_thresh: a score threshold for visualizing keypoints. Only used if 924 | keypoint_scores is provided. 925 | color: color to draw the keypoints with. Default is red. 926 | radius: keypoint radius. Default value is 2. 927 | use_normalized_coordinates: if True (default), treat keypoint values as 928 | relative to the image. Otherwise treat them as absolute. 929 | keypoint_edges: A list of tuples with keypoint indices that specify which 930 | keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws 931 | edges from keypoint 0 to 1 and from keypoint 2 to 4. 932 | keypoint_edge_color: color to draw the keypoint edges with. Default is red. 933 | keypoint_edge_width: width of the edges drawn between keypoints. Default 934 | value is 2. 935 | """ 936 | draw = ImageDraw.Draw(image) 937 | im_width, im_height = image.size 938 | keypoints = np.array(keypoints) 939 | keypoints_x = [k[1] for k in keypoints] 940 | keypoints_y = [k[0] for k in keypoints] 941 | if use_normalized_coordinates: 942 | keypoints_x = tuple([im_width * x for x in keypoints_x]) 943 | keypoints_y = tuple([im_height * y for y in keypoints_y]) 944 | if keypoint_scores is not None: 945 | keypoint_scores = np.array(keypoint_scores) 946 | valid_kpt = np.greater(keypoint_scores, min_score_thresh) 947 | else: 948 | valid_kpt = np.where(np.any(np.isnan(keypoints), axis=1), 949 | np.zeros_like(keypoints[:, 0]), 950 | np.ones_like(keypoints[:, 0])) 951 | valid_kpt = [v for v in valid_kpt] 952 | 953 | for keypoint_x, keypoint_y, valid in zip(keypoints_x, keypoints_y, valid_kpt): 954 | if valid: 955 | draw.ellipse([(keypoint_x - radius, keypoint_y - radius), 956 | (keypoint_x + radius, keypoint_y + radius)], 957 | outline=color, fill=color) 958 | if keypoint_edges is not None: 959 | for keypoint_start, keypoint_end in keypoint_edges: 960 | if (keypoint_start < 0 or keypoint_start >= len(keypoints) or 961 | keypoint_end < 0 or keypoint_end >= len(keypoints)): 962 | continue 963 | if not (valid_kpt[keypoint_start] and valid_kpt[keypoint_end]): 964 | continue 965 | edge_coordinates = [ 966 | keypoints_x[keypoint_start], keypoints_y[keypoint_start], 967 | keypoints_x[keypoint_end], keypoints_y[keypoint_end] 968 | ] 969 | draw.line( 970 | edge_coordinates, fill=keypoint_edge_color, width=keypoint_edge_width) 971 | 972 | 973 | def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): 974 | """Draws mask on an image. 975 | 976 | Args: 977 | image: uint8 numpy array with shape (img_height, img_height, 3) 978 | mask: a uint8 numpy array of shape (img_height, img_height) with 979 | values between either 0 or 1. 980 | color: color to draw the keypoints with. Default is red. 981 | alpha: transparency value between 0 and 1. (default: 0.4) 982 | 983 | Raises: 984 | ValueError: On incorrect data type for image or masks. 985 | """ 986 | if image.dtype != np.uint8: 987 | raise ValueError('`image` not of type np.uint8') 988 | if mask.dtype != np.uint8: 989 | raise ValueError('`mask` not of type np.uint8') 990 | if image.shape[:2] != mask.shape: 991 | raise ValueError('The image has spatial dimensions %s but the mask has ' 992 | 'dimensions %s' % (image.shape[:2], mask.shape)) 993 | rgb = ImageColor.getrgb(color) 994 | pil_image = Image.fromarray(image) 995 | 996 | solid_color = np.expand_dims( 997 | np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) 998 | pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') 999 | pil_mask = Image.fromarray(np.uint8(255.0*alpha*(mask > 0))).convert('L') 1000 | pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) 1001 | np.copyto(image, np.array(pil_image.convert('RGB'))) 1002 | 1003 | 1004 | def draw_part_mask_on_image_array(image, mask, alpha=0.4, num_parts=24): 1005 | """Draws part mask on an image. 1006 | 1007 | Args: 1008 | image: uint8 numpy array with shape (img_height, img_height, 3) 1009 | mask: a uint8 numpy array of shape (img_height, img_height) with 1010 | 1-indexed parts (0 for background). 1011 | alpha: transparency value between 0 and 1 (default: 0.4) 1012 | num_parts: the maximum number of parts that may exist in the image (default 1013 | 24 for DensePose). 1014 | 1015 | Raises: 1016 | ValueError: On incorrect data type for image or masks. 1017 | """ 1018 | if image.dtype != np.uint8: 1019 | raise ValueError('`image` not of type np.uint8') 1020 | if mask.dtype != np.uint8: 1021 | raise ValueError('`mask` not of type np.uint8') 1022 | if image.shape[:2] != mask.shape: 1023 | raise ValueError('The image has spatial dimensions %s but the mask has ' 1024 | 'dimensions %s' % (image.shape[:2], mask.shape)) 1025 | 1026 | pil_image = Image.fromarray(image) 1027 | part_colors = np.zeros_like(image) 1028 | mask_1_channel = mask[:, :, np.newaxis] 1029 | for i, color in enumerate(STANDARD_COLORS[:num_parts]): 1030 | rgb = np.array(ImageColor.getrgb(color), dtype=np.uint8) 1031 | part_colors += (mask_1_channel == i + 1) * rgb[np.newaxis, np.newaxis, :] 1032 | pil_part_colors = Image.fromarray(np.uint8(part_colors)).convert('RGBA') 1033 | pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L') 1034 | pil_image = Image.composite(pil_part_colors, pil_image, pil_mask) 1035 | np.copyto(image, np.array(pil_image.convert('RGB'))) 1036 | 1037 | 1038 | def draw_float_channel_on_image_array(image, channel, mask, alpha=0.9, 1039 | cmap='YlGn'): 1040 | """Draws a floating point channel on an image array. 1041 | 1042 | Args: 1043 | image: uint8 numpy array with shape (img_height, img_height, 3) 1044 | channel: float32 numpy array with shape (img_height, img_height). The values 1045 | should be in the range [0, 1], and will be mapped to colors using the 1046 | provided colormap `cmap` argument. 1047 | mask: a uint8 numpy array of shape (img_height, img_height) with 1048 | 1-indexed parts (0 for background). 1049 | alpha: transparency value between 0 and 1 (default: 0.9) 1050 | cmap: string with the colormap to use. 1051 | 1052 | Raises: 1053 | ValueError: On incorrect data type for image or masks. 1054 | """ 1055 | if image.dtype != np.uint8: 1056 | raise ValueError('`image` not of type np.uint8') 1057 | if channel.dtype != np.float32: 1058 | raise ValueError('`channel` not of type np.float32') 1059 | if mask.dtype != np.uint8: 1060 | raise ValueError('`mask` not of type np.uint8') 1061 | if image.shape[:2] != channel.shape: 1062 | raise ValueError('The image has spatial dimensions %s but the channel has ' 1063 | 'dimensions %s' % (image.shape[:2], channel.shape)) 1064 | if image.shape[:2] != mask.shape: 1065 | raise ValueError('The image has spatial dimensions %s but the mask has ' 1066 | 'dimensions %s' % (image.shape[:2], mask.shape)) 1067 | 1068 | cm = plt.get_cmap(cmap) 1069 | pil_image = Image.fromarray(image) 1070 | colored_channel = cm(channel)[:, :, :3] 1071 | pil_colored_channel = Image.fromarray( 1072 | np.uint8(colored_channel * 255)).convert('RGBA') 1073 | pil_mask = Image.fromarray(np.uint8(255.0 * alpha * (mask > 0))).convert('L') 1074 | pil_image = Image.composite(pil_colored_channel, pil_image, pil_mask) 1075 | np.copyto(image, np.array(pil_image.convert('RGB'))) 1076 | 1077 | 1078 | def visualize_boxes_and_labels_on_image_array( 1079 | image, 1080 | boxes, 1081 | classes, 1082 | scores, 1083 | category_index, 1084 | instance_masks=None, 1085 | instance_boundaries=None, 1086 | keypoints=None, 1087 | keypoint_scores=None, 1088 | keypoint_edges=None, 1089 | track_ids=None, 1090 | use_normalized_coordinates=False, 1091 | max_boxes_to_draw=20, 1092 | min_score_thresh=.5, 1093 | agnostic_mode=False, 1094 | line_thickness=4, 1095 | groundtruth_box_visualization_color='black', 1096 | skip_boxes=False, 1097 | skip_scores=False, 1098 | skip_labels=False, 1099 | skip_track_ids=False): 1100 | """Overlay labeled boxes on an image with formatted scores and label names. 1101 | 1102 | This function groups boxes that correspond to the same location 1103 | and creates a display string for each detection and overlays these 1104 | on the image. Note that this function modifies the image in place, and returns 1105 | that same image. 1106 | 1107 | Args: 1108 | image: uint8 numpy array with shape (img_height, img_width, 3) 1109 | boxes: a numpy array of shape [N, 4] 1110 | classes: a numpy array of shape [N]. Note that class indices are 1-based, 1111 | and match the keys in the label map. 1112 | scores: a numpy array of shape [N] or None. If scores=None, then 1113 | this function assumes that the boxes to be plotted are groundtruth 1114 | boxes and plot all boxes as black with no classes or scores. 1115 | category_index: a dict containing category dictionaries (each holding 1116 | category index `id` and category name `name`) keyed by category indices. 1117 | instance_masks: a uint8 numpy array of shape [N, image_height, image_width], 1118 | can be None. 1119 | instance_boundaries: a numpy array of shape [N, image_height, image_width] 1120 | with values ranging between 0 and 1, can be None. 1121 | keypoints: a numpy array of shape [N, num_keypoints, 2], can 1122 | be None. 1123 | keypoint_scores: a numpy array of shape [N, num_keypoints], can be None. 1124 | keypoint_edges: A list of tuples with keypoint indices that specify which 1125 | keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws 1126 | edges from keypoint 0 to 1 and from keypoint 2 to 4. 1127 | track_ids: a numpy array of shape [N] with unique track ids. If provided, 1128 | color-coding of boxes will be determined by these ids, and not the class 1129 | indices. 1130 | use_normalized_coordinates: whether boxes is to be interpreted as 1131 | normalized coordinates or not. 1132 | max_boxes_to_draw: maximum number of boxes to visualize. If None, draw 1133 | all boxes. 1134 | min_score_thresh: minimum score threshold for a box or keypoint to be 1135 | visualized. 1136 | agnostic_mode: boolean (default: False) controlling whether to evaluate in 1137 | class-agnostic mode or not. This mode will display scores but ignore 1138 | classes. 1139 | line_thickness: integer (default: 4) controlling line width of the boxes. 1140 | groundtruth_box_visualization_color: box color for visualizing groundtruth 1141 | boxes 1142 | skip_boxes: whether to skip the drawing of bounding boxes. 1143 | skip_scores: whether to skip score when drawing a single detection 1144 | skip_labels: whether to skip label when drawing a single detection 1145 | skip_track_ids: whether to skip track id when drawing a single detection 1146 | 1147 | Returns: 1148 | uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes. 1149 | """ 1150 | # Create a display string (and color) for every box location, group any boxes 1151 | # that correspond to the same location. 1152 | box_to_display_str_map = collections.defaultdict(list) 1153 | box_to_color_map = collections.defaultdict(str) 1154 | box_to_instance_masks_map = {} 1155 | box_to_instance_boundaries_map = {} 1156 | box_to_keypoints_map = collections.defaultdict(list) 1157 | box_to_keypoint_scores_map = collections.defaultdict(list) 1158 | box_to_track_ids_map = {} 1159 | if not max_boxes_to_draw: 1160 | max_boxes_to_draw = boxes.shape[0] 1161 | for i in range(boxes.shape[0]): 1162 | if max_boxes_to_draw == len(box_to_color_map): 1163 | break 1164 | if scores is None or scores[i] > min_score_thresh: 1165 | box = tuple(boxes[i].tolist()) 1166 | if instance_masks is not None: 1167 | box_to_instance_masks_map[box] = instance_masks[i] 1168 | if instance_boundaries is not None: 1169 | box_to_instance_boundaries_map[box] = instance_boundaries[i] 1170 | if keypoints is not None: 1171 | box_to_keypoints_map[box].extend(keypoints[i]) 1172 | if keypoint_scores is not None: 1173 | box_to_keypoint_scores_map[box].extend(keypoint_scores[i]) 1174 | if track_ids is not None: 1175 | box_to_track_ids_map[box] = track_ids[i] 1176 | if scores is None: 1177 | box_to_color_map[box] = groundtruth_box_visualization_color 1178 | else: 1179 | display_str = '' 1180 | if not skip_labels: 1181 | if not agnostic_mode: 1182 | if classes[i] in six.viewkeys(category_index): 1183 | class_name = category_index[classes[i]]['name'] 1184 | else: 1185 | class_name = 'N/A' 1186 | display_str = str(class_name) 1187 | if not skip_scores: 1188 | if not display_str: 1189 | display_str = '{}%'.format(round(100*scores[i])) 1190 | else: 1191 | display_str = '{}: {}%'.format(display_str, round(100*scores[i])) 1192 | if not skip_track_ids and track_ids is not None: 1193 | if not display_str: 1194 | display_str = 'ID {}'.format(track_ids[i]) 1195 | else: 1196 | display_str = '{}: ID {}'.format(display_str, track_ids[i]) 1197 | box_to_display_str_map[box].append(display_str) 1198 | if agnostic_mode: 1199 | box_to_color_map[box] = 'DarkOrange' 1200 | elif track_ids is not None: 1201 | prime_multipler = _get_multiplier_for_color_randomness() 1202 | box_to_color_map[box] = STANDARD_COLORS[ 1203 | (prime_multipler * track_ids[i]) % len(STANDARD_COLORS)] 1204 | else: 1205 | box_to_color_map[box] = STANDARD_COLORS[ 1206 | classes[i] % len(STANDARD_COLORS)] 1207 | 1208 | # Draw all boxes onto image. 1209 | for box, color in box_to_color_map.items(): 1210 | ymin, xmin, ymax, xmax = box 1211 | if instance_masks is not None: 1212 | draw_mask_on_image_array( 1213 | image, 1214 | box_to_instance_masks_map[box], 1215 | color=color 1216 | ) 1217 | if instance_boundaries is not None: 1218 | draw_mask_on_image_array( 1219 | image, 1220 | box_to_instance_boundaries_map[box], 1221 | color='red', 1222 | alpha=1.0 1223 | ) 1224 | draw_bounding_box_on_image_array( 1225 | image, 1226 | ymin, 1227 | xmin, 1228 | ymax, 1229 | xmax, 1230 | color=color, 1231 | thickness=0 if skip_boxes else line_thickness, 1232 | display_str_list=box_to_display_str_map[box], 1233 | use_normalized_coordinates=use_normalized_coordinates) 1234 | if keypoints is not None: 1235 | keypoint_scores_for_box = None 1236 | if box_to_keypoint_scores_map: 1237 | keypoint_scores_for_box = box_to_keypoint_scores_map[box] 1238 | draw_keypoints_on_image_array( 1239 | image, 1240 | box_to_keypoints_map[box], 1241 | keypoint_scores_for_box, 1242 | min_score_thresh=min_score_thresh, 1243 | color=color, 1244 | radius=line_thickness / 2, 1245 | use_normalized_coordinates=use_normalized_coordinates, 1246 | keypoint_edges=keypoint_edges, 1247 | keypoint_edge_color=color, 1248 | keypoint_edge_width=line_thickness // 2) 1249 | 1250 | return image 1251 | 1252 | 1253 | def add_cdf_image_summary(values, name): 1254 | """Adds a tf.summary.image for a CDF plot of the values. 1255 | 1256 | Normalizes `values` such that they sum to 1, plots the cumulative distribution 1257 | function and creates a tf image summary. 1258 | 1259 | Args: 1260 | values: a 1-D float32 tensor containing the values. 1261 | name: name for the image summary. 1262 | """ 1263 | def cdf_plot(values): 1264 | """Numpy function to plot CDF.""" 1265 | normalized_values = values / np.sum(values) 1266 | sorted_values = np.sort(normalized_values) 1267 | cumulative_values = np.cumsum(sorted_values) 1268 | fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32) 1269 | / cumulative_values.size) 1270 | fig = plt.figure(frameon=False) 1271 | ax = fig.add_subplot('111') 1272 | ax.plot(fraction_of_examples, cumulative_values) 1273 | ax.set_ylabel('cumulative normalized values') 1274 | ax.set_xlabel('fraction of examples') 1275 | fig.canvas.draw() 1276 | width, height = fig.get_size_inches() * fig.get_dpi() 1277 | image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape( 1278 | 1, int(height), int(width), 3) 1279 | return image 1280 | cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8) 1281 | tf.summary.image(name, cdf_plot) 1282 | 1283 | 1284 | def add_hist_image_summary(values, bins, name): 1285 | """Adds a tf.summary.image for a histogram plot of the values. 1286 | 1287 | Plots the histogram of values and creates a tf image summary. 1288 | 1289 | Args: 1290 | values: a 1-D float32 tensor containing the values. 1291 | bins: bin edges which will be directly passed to np.histogram. 1292 | name: name for the image summary. 1293 | """ 1294 | 1295 | def hist_plot(values, bins): 1296 | """Numpy function to plot hist.""" 1297 | fig = plt.figure(frameon=False) 1298 | ax = fig.add_subplot('111') 1299 | y, x = np.histogram(values, bins=bins) 1300 | ax.plot(x[:-1], y) 1301 | ax.set_ylabel('count') 1302 | ax.set_xlabel('value') 1303 | fig.canvas.draw() 1304 | width, height = fig.get_size_inches() * fig.get_dpi() 1305 | image = np.fromstring( 1306 | fig.canvas.tostring_rgb(), dtype='uint8').reshape( 1307 | 1, int(height), int(width), 3) 1308 | return image 1309 | hist_plot = tf.py_func(hist_plot, [values, bins], tf.uint8) 1310 | tf.summary.image(name, hist_plot) 1311 | 1312 | 1313 | class EvalMetricOpsVisualization(six.with_metaclass(abc.ABCMeta, object)): 1314 | """Abstract base class responsible for visualizations during evaluation. 1315 | 1316 | Currently, summary images are not run during evaluation. One way to produce 1317 | evaluation images in Tensorboard is to provide tf.summary.image strings as 1318 | `value_ops` in tf.estimator.EstimatorSpec's `eval_metric_ops`. This class is 1319 | responsible for accruing images (with overlaid detections and groundtruth) 1320 | and returning a dictionary that can be passed to `eval_metric_ops`. 1321 | """ 1322 | 1323 | def __init__(self, 1324 | category_index, 1325 | max_examples_to_draw=5, 1326 | max_boxes_to_draw=20, 1327 | min_score_thresh=0.2, 1328 | use_normalized_coordinates=True, 1329 | summary_name_prefix='evaluation_image', 1330 | keypoint_edges=None): 1331 | """Creates an EvalMetricOpsVisualization. 1332 | 1333 | Args: 1334 | category_index: A category index (dictionary) produced from a labelmap. 1335 | max_examples_to_draw: The maximum number of example summaries to produce. 1336 | max_boxes_to_draw: The maximum number of boxes to draw for detections. 1337 | min_score_thresh: The minimum score threshold for showing detections. 1338 | use_normalized_coordinates: Whether to assume boxes and keypoints are in 1339 | normalized coordinates (as opposed to absolute coordinates). 1340 | Default is True. 1341 | summary_name_prefix: A string prefix for each image summary. 1342 | keypoint_edges: A list of tuples with keypoint indices that specify which 1343 | keypoints should be connected by an edge, e.g. [(0, 1), (2, 4)] draws 1344 | edges from keypoint 0 to 1 and from keypoint 2 to 4. 1345 | """ 1346 | 1347 | self._category_index = category_index 1348 | self._max_examples_to_draw = max_examples_to_draw 1349 | self._max_boxes_to_draw = max_boxes_to_draw 1350 | self._min_score_thresh = min_score_thresh 1351 | self._use_normalized_coordinates = use_normalized_coordinates 1352 | self._summary_name_prefix = summary_name_prefix 1353 | self._keypoint_edges = keypoint_edges 1354 | self._images = [] 1355 | 1356 | def clear(self): 1357 | self._images = [] 1358 | 1359 | def add_images(self, images): 1360 | """Store a list of images, each with shape [1, H, W, C].""" 1361 | if len(self._images) >= self._max_examples_to_draw: 1362 | return 1363 | 1364 | # Store images and clip list if necessary. 1365 | self._images.extend(images) 1366 | if len(self._images) > self._max_examples_to_draw: 1367 | self._images[self._max_examples_to_draw:] = [] 1368 | 1369 | def get_estimator_eval_metric_ops(self, eval_dict): 1370 | """Returns metric ops for use in tf.estimator.EstimatorSpec. 1371 | 1372 | Args: 1373 | eval_dict: A dictionary that holds an image, groundtruth, and detections 1374 | for a batched example. Note that, we use only the first example for 1375 | visualization. See eval_util.result_dict_for_batched_example() for a 1376 | convenient method for constructing such a dictionary. The dictionary 1377 | contains 1378 | fields.InputDataFields.original_image: [batch_size, H, W, 3] image. 1379 | fields.InputDataFields.original_image_spatial_shape: [batch_size, 2] 1380 | tensor containing the size of the original image. 1381 | fields.InputDataFields.true_image_shape: [batch_size, 3] 1382 | tensor containing the spatial size of the upadded original image. 1383 | fields.InputDataFields.groundtruth_boxes - [batch_size, num_boxes, 4] 1384 | float32 tensor with groundtruth boxes in range [0.0, 1.0]. 1385 | fields.InputDataFields.groundtruth_classes - [batch_size, num_boxes] 1386 | int64 tensor with 1-indexed groundtruth classes. 1387 | fields.InputDataFields.groundtruth_instance_masks - (optional) 1388 | [batch_size, num_boxes, H, W] int64 tensor with instance masks. 1389 | fields.InputDataFields.groundtruth_keypoints - (optional) 1390 | [batch_size, num_boxes, num_keypoints, 2] float32 tensor with 1391 | keypoint coordinates in format [y, x]. 1392 | fields.InputDataFields.groundtruth_keypoint_visibilities - (optional) 1393 | [batch_size, num_boxes, num_keypoints] bool tensor with 1394 | keypoint visibilities. 1395 | fields.DetectionResultFields.detection_boxes - [batch_size, 1396 | max_num_boxes, 4] float32 tensor with detection boxes in range [0.0, 1397 | 1.0]. 1398 | fields.DetectionResultFields.detection_classes - [batch_size, 1399 | max_num_boxes] int64 tensor with 1-indexed detection classes. 1400 | fields.DetectionResultFields.detection_scores - [batch_size, 1401 | max_num_boxes] float32 tensor with detection scores. 1402 | fields.DetectionResultFields.detection_masks - (optional) [batch_size, 1403 | max_num_boxes, H, W] float32 tensor of binarized masks. 1404 | fields.DetectionResultFields.detection_keypoints - (optional) 1405 | [batch_size, max_num_boxes, num_keypoints, 2] float32 tensor with 1406 | keypoints. 1407 | fields.DetectionResultFields.detection_keypoint_scores - (optional) 1408 | [batch_size, max_num_boxes, num_keypoints] float32 tensor with 1409 | keypoints scores. 1410 | 1411 | Returns: 1412 | A dictionary of image summary names to tuple of (value_op, update_op). The 1413 | `update_op` is the same for all items in the dictionary, and is 1414 | responsible for saving a single side-by-side image with detections and 1415 | groundtruth. Each `value_op` holds the tf.summary.image string for a given 1416 | image. 1417 | """ 1418 | if self._max_examples_to_draw == 0: 1419 | return {} 1420 | images = self.images_from_evaluation_dict(eval_dict) 1421 | 1422 | def get_images(): 1423 | """Returns a list of images, padded to self._max_images_to_draw.""" 1424 | images = self._images 1425 | while len(images) < self._max_examples_to_draw: 1426 | images.append(np.array(0, dtype=np.uint8)) 1427 | self.clear() 1428 | return images 1429 | 1430 | def image_summary_or_default_string(summary_name, image): 1431 | """Returns image summaries for non-padded elements.""" 1432 | return tf.cond( 1433 | tf.equal(tf.size(tf.shape(image)), 4), 1434 | lambda: tf.summary.image(summary_name, image), 1435 | lambda: tf.constant('')) 1436 | 1437 | if tf.executing_eagerly(): 1438 | update_op = self.add_images([[images[0]]]) 1439 | image_tensors = get_images() 1440 | else: 1441 | update_op = tf.py_func(self.add_images, [[images[0]]], []) 1442 | image_tensors = tf.py_func( 1443 | get_images, [], [tf.uint8] * self._max_examples_to_draw) 1444 | eval_metric_ops = {} 1445 | for i, image in enumerate(image_tensors): 1446 | summary_name = self._summary_name_prefix + '/' + str(i) 1447 | value_op = image_summary_or_default_string(summary_name, image) 1448 | eval_metric_ops[summary_name] = (value_op, update_op) 1449 | return eval_metric_ops 1450 | 1451 | @abc.abstractmethod 1452 | def images_from_evaluation_dict(self, eval_dict): 1453 | """Converts evaluation dictionary into a list of image tensors. 1454 | 1455 | To be overridden by implementations. 1456 | 1457 | Args: 1458 | eval_dict: A dictionary with all the necessary information for producing 1459 | visualizations. 1460 | 1461 | Returns: 1462 | A list of [1, H, W, C] uint8 tensors. 1463 | """ 1464 | raise NotImplementedError 1465 | 1466 | 1467 | class VisualizeSingleFrameDetections(EvalMetricOpsVisualization): 1468 | """Class responsible for single-frame object detection visualizations.""" 1469 | 1470 | def __init__(self, 1471 | category_index, 1472 | max_examples_to_draw=5, 1473 | max_boxes_to_draw=20, 1474 | min_score_thresh=0.2, 1475 | use_normalized_coordinates=True, 1476 | summary_name_prefix='Detections_Left_Groundtruth_Right', 1477 | keypoint_edges=None): 1478 | super(VisualizeSingleFrameDetections, self).__init__( 1479 | category_index=category_index, 1480 | max_examples_to_draw=max_examples_to_draw, 1481 | max_boxes_to_draw=max_boxes_to_draw, 1482 | min_score_thresh=min_score_thresh, 1483 | use_normalized_coordinates=use_normalized_coordinates, 1484 | summary_name_prefix=summary_name_prefix, 1485 | keypoint_edges=keypoint_edges) 1486 | 1487 | def images_from_evaluation_dict(self, eval_dict): 1488 | return draw_side_by_side_evaluation_image(eval_dict, self._category_index, 1489 | self._max_boxes_to_draw, 1490 | self._min_score_thresh, 1491 | self._use_normalized_coordinates, 1492 | self._keypoint_edges) 1493 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 10 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 11 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 12 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 13 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 14 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Car Damage Detection using Sagemaker and Tensorflow 2 | 3 | ### Usecase: 4 | Global vehicle insurance & vehicle rental industries still rely on manual ways to detect the vehicle damage & its intensity. Visual quality inspection is commonly used for detecting the damage for claim process. The industry is steeped with manual processes, paper-driven operations, high premium offerings, poor customer service, long turnaround time, etc. 5 | Here we will use machine learning - object detection “Efficientdet” model with sagemaker and tensor flow. Object detection model will be used to identify & mark the dent and scratch area in the car images. 6 | 7 | Let’s refresh the basic terms used in building this ML Model. 8 | 9 | ### What is Machine Learning (ML)? 10 | Machine learning is a method of data analysis that automates analytical model building. It is a branch of artificial intelligence based on the idea that systems can learn from data, identify patterns and make decisions with minimal human intervention. 11 | 12 | ### What is Object Detection? 13 | Object detection is a computer technology related to computer vision and image processing that deals with detecting instances of semantic objects of a certain class (such as humans, buildings, or cars) in digital images and videos. 14 | 15 | ### What is Efficentdent Model? 16 | EfficientDet is an object detection machine learning model, which utilizes several optimization and backbone tweaks, such as the use of a BiFPN, and a compound scaling method that uniformly scales the resolution, depth and width for all backbones, feature networks and box/class prediction networks at the same time. 17 | 18 | ### What is a loss function or classification loss in your training? 19 | Loss functions is a crucial factor that affecting the detection precision in object detection task. This loss will help with any task which requires classification. We are given k categories and our job is to make sure our model is good job in classifying x number of examples in k categories. Let’s take example of this project where we are given 100 images of 2 categories and our task is to classify each given image into either of these categories “dent” and/or “scratch”. 20 | 21 | ### Overview 22 | In this repository, we will build a custom model using Sagemaker & tensorflow to provide bounding boxes on car images consisting of “dents” and/or “Scratch”. 23 | Firstly, use Amazon SageMaker Ground Truth to label the car images with bounding box using private workforce option. After finishing the labelling job, ground truth will create & save a manifest file in S3. 24 | 25 | Next steps, use Amazon SageMaker to build, train, and deploy an EfficientDet model using the TensorFlow Object Detection API. It is built on top of TensorFlow 2 that makes it easy to construct, train and deploy object detection models. It also provides the TensorFlow 2 Detection Model Zoo which is a collection of pre-trained detection models we can use to accelerate our Model building. 26 | 27 | ### High Level Steps:- 28 | • Label the car images with bounding boxes as “dent” and/or “scratch” using Sagemaker Ground Truth 29 | • Generate the dataset TFRecords and label map using SageMaker Processing job 30 | • Fine-tune an EfficientDet model with TF2 on Amazon SageMaker 31 | • Monitor your model training with Tensorboard and SageMaker Debugger 32 | • Deploy your model on a SageMaker endpoint and visualize the prediction by detecting "dent" and/or “scratch” in car images (refer below images) 33 | 34 | ### Get started - Instructions 35 | Follow the step-by-step guide by executing the notebooks in the following folders: 36 | #### 0_ground_truth/ ground_truth.ipynb 37 | #### 1_prepare_data/prepare_data.ipynb 38 | #### 2_train_model/train_model.ipynb 39 | #### 3_predict/deploy_endpoint.ipynb 40 | 41 | ||| 42 | | -------------- | ---------------------------- | 43 | |![](media/test-1.jpg)|![](media/test-01.jpg)| 44 | |![](media/test-2.jpg)|![](media/test-02.png)| 45 | 46 | ## License 47 | This library is licensed under the MIT-0 License. See the LICENSE file. 48 | 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /media/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/.gitkeep -------------------------------------------------------------------------------- /media/test-01.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/test-01.jpg -------------------------------------------------------------------------------- /media/test-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/test-02.png -------------------------------------------------------------------------------- /media/test-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/test-1.jpg -------------------------------------------------------------------------------- /media/test-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/car-damage-detection-using-sagemaker-and-tensorflow/8fc458099e3779d8dae9daadb230953e7c8dcd60/media/test-2.jpg --------------------------------------------------------------------------------