├── Tensorflow ├── scripts │ └── generate_tfrecord.py └── workspace │ ├── annotations │ └── .gitkeep │ ├── images │ ├── test │ │ └── .gitkeep │ └── train │ │ └── .gitkeep │ ├── models │ └── .gitkeep │ └── pre-trained-models │ ├── ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz │ └── ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8 │ ├── checkpoint │ ├── checkpoint │ ├── ckpt-0.data-00000-of-00001 │ └── ckpt-0.index │ ├── pipeline.config │ └── saved_model │ ├── saved_model.pb │ └── variables │ ├── variables.data-00000-of-00001 │ └── variables.index └── Tutorial.ipynb /Tensorflow/scripts/generate_tfrecord.py: -------------------------------------------------------------------------------- 1 | """ Sample TensorFlow XML-to-TFRecord converter 2 | 3 | usage: generate_tfrecord.py [-h] [-x XML_DIR] [-l LABELS_PATH] [-o OUTPUT_PATH] [-i IMAGE_DIR] [-c CSV_PATH] 4 | 5 | optional arguments: 6 | -h, --help show this help message and exit 7 | -x XML_DIR, --xml_dir XML_DIR 8 | Path to the folder where the input .xml files are stored. 9 | -l LABELS_PATH, --labels_path LABELS_PATH 10 | Path to the labels (.pbtxt) file. 11 | -o OUTPUT_PATH, --output_path OUTPUT_PATH 12 | Path of output TFRecord (.record) file. 13 | -i IMAGE_DIR, --image_dir IMAGE_DIR 14 | Path to the folder where the input image files are stored. Defaults to the same directory as XML_DIR. 15 | -c CSV_PATH, --csv_path CSV_PATH 16 | Path of output .csv file. If none provided, then no file will be written. 17 | """ 18 | 19 | import os 20 | import glob 21 | import pandas as pd 22 | import io 23 | import xml.etree.ElementTree as ET 24 | import argparse 25 | 26 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TensorFlow logging (1) 27 | import tensorflow.compat.v1 as tf 28 | from PIL import Image 29 | from object_detection.utils import dataset_util, label_map_util 30 | from collections import namedtuple 31 | 32 | # Initiate argument parser 33 | parser = argparse.ArgumentParser( 34 | description="Sample TensorFlow XML-to-TFRecord converter") 35 | parser.add_argument("-x", 36 | "--xml_dir", 37 | help="Path to the folder where the input .xml files are stored.", 38 | type=str) 39 | parser.add_argument("-l", 40 | "--labels_path", 41 | help="Path to the labels (.pbtxt) file.", type=str) 42 | parser.add_argument("-o", 43 | "--output_path", 44 | help="Path of output TFRecord (.record) file.", type=str) 45 | parser.add_argument("-i", 46 | "--image_dir", 47 | help="Path to the folder where the input image files are stored. " 48 | "Defaults to the same directory as XML_DIR.", 49 | type=str, default=None) 50 | parser.add_argument("-c", 51 | "--csv_path", 52 | help="Path of output .csv file. If none provided, then no file will be " 53 | "written.", 54 | type=str, default=None) 55 | 56 | args = parser.parse_args() 57 | 58 | if args.image_dir is None: 59 | args.image_dir = args.xml_dir 60 | 61 | label_map = label_map_util.load_labelmap(args.labels_path) 62 | label_map_dict = label_map_util.get_label_map_dict(label_map) 63 | 64 | 65 | def xml_to_csv(path): 66 | """Iterates through all .xml files (generated by labelImg) in a given directory and combines 67 | them in a single Pandas dataframe. 68 | 69 | Parameters: 70 | ---------- 71 | path : str 72 | The path containing the .xml files 73 | Returns 74 | ------- 75 | Pandas DataFrame 76 | The produced dataframe 77 | """ 78 | 79 | xml_list = [] 80 | for xml_file in glob.glob(path + '/*.xml'): 81 | tree = ET.parse(xml_file) 82 | root = tree.getroot() 83 | for member in root.findall('object'): 84 | value = (root.find('filename').text, 85 | int(root.find('size')[0].text), 86 | int(root.find('size')[1].text), 87 | member[0].text, 88 | int(member[4][0].text), 89 | int(member[4][1].text), 90 | int(member[4][2].text), 91 | int(member[4][3].text) 92 | ) 93 | xml_list.append(value) 94 | column_name = ['filename', 'width', 'height', 95 | 'class', 'xmin', 'ymin', 'xmax', 'ymax'] 96 | xml_df = pd.DataFrame(xml_list, columns=column_name) 97 | return xml_df 98 | 99 | 100 | def class_text_to_int(row_label): 101 | return label_map_dict[row_label] 102 | 103 | 104 | def split(df, group): 105 | data = namedtuple('data', ['filename', 'object']) 106 | gb = df.groupby(group) 107 | return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)] 108 | 109 | 110 | def create_tf_example(group, path): 111 | with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: 112 | encoded_jpg = fid.read() 113 | encoded_jpg_io = io.BytesIO(encoded_jpg) 114 | image = Image.open(encoded_jpg_io) 115 | width, height = image.size 116 | 117 | filename = group.filename.encode('utf8') 118 | image_format = b'jpg' 119 | xmins = [] 120 | xmaxs = [] 121 | ymins = [] 122 | ymaxs = [] 123 | classes_text = [] 124 | classes = [] 125 | 126 | for index, row in group.object.iterrows(): 127 | xmins.append(row['xmin'] / width) 128 | xmaxs.append(row['xmax'] / width) 129 | ymins.append(row['ymin'] / height) 130 | ymaxs.append(row['ymax'] / height) 131 | classes_text.append(row['class'].encode('utf8')) 132 | classes.append(class_text_to_int(row['class'])) 133 | 134 | tf_example = tf.train.Example(features=tf.train.Features(feature={ 135 | 'image/height': dataset_util.int64_feature(height), 136 | 'image/width': dataset_util.int64_feature(width), 137 | 'image/filename': dataset_util.bytes_feature(filename), 138 | 'image/source_id': dataset_util.bytes_feature(filename), 139 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 140 | 'image/format': dataset_util.bytes_feature(image_format), 141 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 142 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 143 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 144 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 145 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 146 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 147 | })) 148 | return tf_example 149 | 150 | 151 | def main(_): 152 | 153 | writer = tf.python_io.TFRecordWriter(args.output_path) 154 | path = os.path.join(args.image_dir) 155 | examples = xml_to_csv(args.xml_dir) 156 | grouped = split(examples, 'filename') 157 | for group in grouped: 158 | tf_example = create_tf_example(group, path) 159 | writer.write(tf_example.SerializeToString()) 160 | writer.close() 161 | print('Successfully created the TFRecord file: {}'.format(args.output_path)) 162 | if args.csv_path is not None: 163 | examples.to_csv(args.csv_path, index=None) 164 | print('Successfully created the CSV file: {}'.format(args.csv_path)) 165 | 166 | 167 | if __name__ == '__main__': 168 | tf.app.run() 169 | -------------------------------------------------------------------------------- /Tensorflow/workspace/annotations/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/annotations/.gitkeep -------------------------------------------------------------------------------- /Tensorflow/workspace/images/test/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/images/test/.gitkeep -------------------------------------------------------------------------------- /Tensorflow/workspace/images/train/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/images/train/.gitkeep -------------------------------------------------------------------------------- /Tensorflow/workspace/models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/models/.gitkeep -------------------------------------------------------------------------------- /Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz -------------------------------------------------------------------------------- /Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "ckpt-0" 2 | all_model_checkpoint_paths: "ckpt-0" 3 | all_model_checkpoint_timestamps: 1594332511.5251744 4 | last_preserved_timestamp: 1594332507.0004687 5 | -------------------------------------------------------------------------------- /Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0.data-00000-of-00001 -------------------------------------------------------------------------------- /Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0.index -------------------------------------------------------------------------------- /Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config: -------------------------------------------------------------------------------- 1 | model { 2 | ssd { 3 | num_classes: 90 4 | image_resizer { 5 | fixed_shape_resizer { 6 | height: 320 7 | width: 320 8 | } 9 | } 10 | feature_extractor { 11 | type: "ssd_mobilenet_v2_fpn_keras" 12 | depth_multiplier: 1.0 13 | min_depth: 16 14 | conv_hyperparams { 15 | regularizer { 16 | l2_regularizer { 17 | weight: 3.9999998989515007e-05 18 | } 19 | } 20 | initializer { 21 | random_normal_initializer { 22 | mean: 0.0 23 | stddev: 0.009999999776482582 24 | } 25 | } 26 | activation: RELU_6 27 | batch_norm { 28 | decay: 0.996999979019165 29 | scale: true 30 | epsilon: 0.0010000000474974513 31 | } 32 | } 33 | use_depthwise: true 34 | override_base_feature_extractor_hyperparams: true 35 | fpn { 36 | min_level: 3 37 | max_level: 7 38 | additional_layer_depth: 128 39 | } 40 | } 41 | box_coder { 42 | faster_rcnn_box_coder { 43 | y_scale: 10.0 44 | x_scale: 10.0 45 | height_scale: 5.0 46 | width_scale: 5.0 47 | } 48 | } 49 | matcher { 50 | argmax_matcher { 51 | matched_threshold: 0.5 52 | unmatched_threshold: 0.5 53 | ignore_thresholds: false 54 | negatives_lower_than_unmatched: true 55 | force_match_for_each_row: true 56 | use_matmul_gather: true 57 | } 58 | } 59 | similarity_calculator { 60 | iou_similarity { 61 | } 62 | } 63 | box_predictor { 64 | weight_shared_convolutional_box_predictor { 65 | conv_hyperparams { 66 | regularizer { 67 | l2_regularizer { 68 | weight: 3.9999998989515007e-05 69 | } 70 | } 71 | initializer { 72 | random_normal_initializer { 73 | mean: 0.0 74 | stddev: 0.009999999776482582 75 | } 76 | } 77 | activation: RELU_6 78 | batch_norm { 79 | decay: 0.996999979019165 80 | scale: true 81 | epsilon: 0.0010000000474974513 82 | } 83 | } 84 | depth: 128 85 | num_layers_before_predictor: 4 86 | kernel_size: 3 87 | class_prediction_bias_init: -4.599999904632568 88 | share_prediction_tower: true 89 | use_depthwise: true 90 | } 91 | } 92 | anchor_generator { 93 | multiscale_anchor_generator { 94 | min_level: 3 95 | max_level: 7 96 | anchor_scale: 4.0 97 | aspect_ratios: 1.0 98 | aspect_ratios: 2.0 99 | aspect_ratios: 0.5 100 | scales_per_octave: 2 101 | } 102 | } 103 | post_processing { 104 | batch_non_max_suppression { 105 | score_threshold: 9.99999993922529e-09 106 | iou_threshold: 0.6000000238418579 107 | max_detections_per_class: 100 108 | max_total_detections: 100 109 | use_static_shapes: false 110 | } 111 | score_converter: SIGMOID 112 | } 113 | normalize_loss_by_num_matches: true 114 | loss { 115 | localization_loss { 116 | weighted_smooth_l1 { 117 | } 118 | } 119 | classification_loss { 120 | weighted_sigmoid_focal { 121 | gamma: 2.0 122 | alpha: 0.25 123 | } 124 | } 125 | classification_weight: 1.0 126 | localization_weight: 1.0 127 | } 128 | encode_background_as_zeros: true 129 | normalize_loc_loss_by_codesize: true 130 | inplace_batchnorm_update: true 131 | freeze_batchnorm: false 132 | } 133 | } 134 | train_config { 135 | batch_size: 128 136 | data_augmentation_options { 137 | random_horizontal_flip { 138 | } 139 | } 140 | data_augmentation_options { 141 | random_crop_image { 142 | min_object_covered: 0.0 143 | min_aspect_ratio: 0.75 144 | max_aspect_ratio: 3.0 145 | min_area: 0.75 146 | max_area: 1.0 147 | overlap_thresh: 0.0 148 | } 149 | } 150 | sync_replicas: true 151 | optimizer { 152 | momentum_optimizer { 153 | learning_rate { 154 | cosine_decay_learning_rate { 155 | learning_rate_base: 0.07999999821186066 156 | total_steps: 50000 157 | warmup_learning_rate: 0.026666000485420227 158 | warmup_steps: 1000 159 | } 160 | } 161 | momentum_optimizer_value: 0.8999999761581421 162 | } 163 | use_moving_average: false 164 | } 165 | fine_tune_checkpoint: "PATH_TO_BE_CONFIGURED" 166 | num_steps: 50000 167 | startup_delay_steps: 0.0 168 | replicas_to_aggregate: 8 169 | max_number_of_boxes: 100 170 | unpad_groundtruth_tensors: false 171 | fine_tune_checkpoint_type: "classification" 172 | fine_tune_checkpoint_version: V2 173 | } 174 | train_input_reader { 175 | label_map_path: "PATH_TO_BE_CONFIGURED" 176 | tf_record_input_reader { 177 | input_path: "PATH_TO_BE_CONFIGURED" 178 | } 179 | } 180 | eval_config { 181 | metrics_set: "coco_detection_metrics" 182 | use_moving_averages: false 183 | } 184 | eval_input_reader { 185 | label_map_path: "PATH_TO_BE_CONFIGURED" 186 | shuffle: false 187 | num_epochs: 1 188 | tf_record_input_reader { 189 | input_path: "PATH_TO_BE_CONFIGURED" 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/saved_model/saved_model.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/saved_model/saved_model.pb -------------------------------------------------------------------------------- /Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/saved_model/variables/variables.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/saved_model/variables/variables.data-00000-of-00001 -------------------------------------------------------------------------------- /Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/saved_model/variables/variables.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nicknochnack/RealTimeObjectDetection/7b6228d3cb7be33e45394b3c7446ca09431d4cfa/Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/saved_model/variables/variables.index -------------------------------------------------------------------------------- /Tutorial.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Resources Used\n", 8 | "- wget.download('https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/_downloads/da4babe668a8afb093cc7776d7e630f3/generate_tfrecord.py')\n", 9 | "- Setup https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# 0. Setup Paths" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 1, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "WORKSPACE_PATH = 'Tensorflow/workspace'\n", 26 | "SCRIPTS_PATH = 'Tensorflow/scripts'\n", 27 | "APIMODEL_PATH = 'Tensorflow/models'\n", 28 | "ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'\n", 29 | "IMAGE_PATH = WORKSPACE_PATH+'/images'\n", 30 | "MODEL_PATH = WORKSPACE_PATH+'/models'\n", 31 | "PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'\n", 32 | "CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config'\n", 33 | "CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/'" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "# 1. Create Label Map" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 2, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "labels = [{'name':'Mask', 'id':1}, {'name':'NoMask', 'id':2}]\n", 50 | "\n", 51 | "with open(ANNOTATION_PATH + '\\label_map.pbtxt', 'w') as f:\n", 52 | " for label in labels:\n", 53 | " f.write('item { \\n')\n", 54 | " f.write('\\tname:\\'{}\\'\\n'.format(label['name']))\n", 55 | " f.write('\\tid:{}\\n'.format(label['id']))\n", 56 | " f.write('}\\n')" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "# 2. Create TF records" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 3, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "name": "stdout", 73 | "output_type": "stream", 74 | "text": [ 75 | "Successfully created the TFRecord file: Tensorflow/workspace/annotations/train.record\n", 76 | "Successfully created the TFRecord file: Tensorflow/workspace/annotations/test.record\n" 77 | ] 78 | } 79 | ], 80 | "source": [ 81 | "!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x {IMAGE_PATH + '/train'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/train.record'}\n", 82 | "!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x{IMAGE_PATH + '/test'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/test.record'}" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "# 3. Download TF Models Pretrained Models from Tensorflow Model Zoo" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 4, 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stderr", 99 | "output_type": "stream", 100 | "text": [ 101 | "Cloning into 'models'...\n" 102 | ] 103 | } 104 | ], 105 | "source": [ 106 | "!cd Tensorflow && git clone https://github.com/tensorflow/models" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 6, 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "#wget.download('http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz')\n", 116 | "#!mv ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz {PRETRAINED_MODEL_PATH}\n", 117 | "#!cd {PRETRAINED_MODEL_PATH} && tar -zxvf ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8.tar.gz" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "# 4. Copy Model Config to Training Folder" 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": 7, 130 | "metadata": {}, 131 | "outputs": [], 132 | "source": [ 133 | "CUSTOM_MODEL_NAME = 'my_ssd_mobnet' " 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 47, 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stderr", 143 | "output_type": "stream", 144 | "text": [ 145 | "A subdirectory or file Tensorflow\\workspace\\models\\my_ssd_mobnet already exists.\n" 146 | ] 147 | } 148 | ], 149 | "source": [ 150 | "!mkdir {'Tensorflow\\workspace\\models\\\\'+CUSTOM_MODEL_NAME}\n", 151 | "!cp {PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config'} {MODEL_PATH+'/'+CUSTOM_MODEL_NAME}" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "# 5. Update Config For Transfer Learning" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 1, 164 | "metadata": {}, 165 | "outputs": [], 166 | "source": [ 167 | "import tensorflow as tf\n", 168 | "from object_detection.utils import config_util\n", 169 | "from object_detection.protos import pipeline_pb2\n", 170 | "from google.protobuf import text_format" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 8, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "CONFIG_PATH = MODEL_PATH+'/'+CUSTOM_MODEL_NAME+'/pipeline.config'" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 55, 185 | "metadata": {}, 186 | "outputs": [], 187 | "source": [ 188 | "config = config_util.get_configs_from_pipeline_file(CONFIG_PATH)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 56, 194 | "metadata": { 195 | "collapsed": true 196 | }, 197 | "outputs": [ 198 | { 199 | "data": { 200 | "text/plain": [ 201 | "{'model': ssd {\n", 202 | " num_classes: 2\n", 203 | " image_resizer {\n", 204 | " fixed_shape_resizer {\n", 205 | " height: 320\n", 206 | " width: 320\n", 207 | " }\n", 208 | " }\n", 209 | " feature_extractor {\n", 210 | " type: \"ssd_mobilenet_v2_fpn_keras\"\n", 211 | " depth_multiplier: 1.0\n", 212 | " min_depth: 16\n", 213 | " conv_hyperparams {\n", 214 | " regularizer {\n", 215 | " l2_regularizer {\n", 216 | " weight: 4e-05\n", 217 | " }\n", 218 | " }\n", 219 | " initializer {\n", 220 | " random_normal_initializer {\n", 221 | " mean: 0.0\n", 222 | " stddev: 0.01\n", 223 | " }\n", 224 | " }\n", 225 | " activation: RELU_6\n", 226 | " batch_norm {\n", 227 | " decay: 0.997\n", 228 | " scale: true\n", 229 | " epsilon: 0.001\n", 230 | " }\n", 231 | " }\n", 232 | " use_depthwise: true\n", 233 | " override_base_feature_extractor_hyperparams: true\n", 234 | " fpn {\n", 235 | " min_level: 3\n", 236 | " max_level: 7\n", 237 | " additional_layer_depth: 128\n", 238 | " }\n", 239 | " }\n", 240 | " box_coder {\n", 241 | " faster_rcnn_box_coder {\n", 242 | " y_scale: 10.0\n", 243 | " x_scale: 10.0\n", 244 | " height_scale: 5.0\n", 245 | " width_scale: 5.0\n", 246 | " }\n", 247 | " }\n", 248 | " matcher {\n", 249 | " argmax_matcher {\n", 250 | " matched_threshold: 0.5\n", 251 | " unmatched_threshold: 0.5\n", 252 | " ignore_thresholds: false\n", 253 | " negatives_lower_than_unmatched: true\n", 254 | " force_match_for_each_row: true\n", 255 | " use_matmul_gather: true\n", 256 | " }\n", 257 | " }\n", 258 | " similarity_calculator {\n", 259 | " iou_similarity {\n", 260 | " }\n", 261 | " }\n", 262 | " box_predictor {\n", 263 | " weight_shared_convolutional_box_predictor {\n", 264 | " conv_hyperparams {\n", 265 | " regularizer {\n", 266 | " l2_regularizer {\n", 267 | " weight: 4e-05\n", 268 | " }\n", 269 | " }\n", 270 | " initializer {\n", 271 | " random_normal_initializer {\n", 272 | " mean: 0.0\n", 273 | " stddev: 0.01\n", 274 | " }\n", 275 | " }\n", 276 | " activation: RELU_6\n", 277 | " batch_norm {\n", 278 | " decay: 0.997\n", 279 | " scale: true\n", 280 | " epsilon: 0.001\n", 281 | " }\n", 282 | " }\n", 283 | " depth: 128\n", 284 | " num_layers_before_predictor: 4\n", 285 | " kernel_size: 3\n", 286 | " class_prediction_bias_init: -4.6\n", 287 | " share_prediction_tower: true\n", 288 | " use_depthwise: true\n", 289 | " }\n", 290 | " }\n", 291 | " anchor_generator {\n", 292 | " multiscale_anchor_generator {\n", 293 | " min_level: 3\n", 294 | " max_level: 7\n", 295 | " anchor_scale: 4.0\n", 296 | " aspect_ratios: 1.0\n", 297 | " aspect_ratios: 2.0\n", 298 | " aspect_ratios: 0.5\n", 299 | " scales_per_octave: 2\n", 300 | " }\n", 301 | " }\n", 302 | " post_processing {\n", 303 | " batch_non_max_suppression {\n", 304 | " score_threshold: 1e-08\n", 305 | " iou_threshold: 0.6\n", 306 | " max_detections_per_class: 100\n", 307 | " max_total_detections: 100\n", 308 | " use_static_shapes: false\n", 309 | " }\n", 310 | " score_converter: SIGMOID\n", 311 | " }\n", 312 | " normalize_loss_by_num_matches: true\n", 313 | " loss {\n", 314 | " localization_loss {\n", 315 | " weighted_smooth_l1 {\n", 316 | " }\n", 317 | " }\n", 318 | " classification_loss {\n", 319 | " weighted_sigmoid_focal {\n", 320 | " gamma: 2.0\n", 321 | " alpha: 0.25\n", 322 | " }\n", 323 | " }\n", 324 | " classification_weight: 1.0\n", 325 | " localization_weight: 1.0\n", 326 | " }\n", 327 | " encode_background_as_zeros: true\n", 328 | " normalize_loc_loss_by_codesize: true\n", 329 | " inplace_batchnorm_update: true\n", 330 | " freeze_batchnorm: false\n", 331 | " }, 'train_config': batch_size: 4\n", 332 | " data_augmentation_options {\n", 333 | " random_horizontal_flip {\n", 334 | " }\n", 335 | " }\n", 336 | " data_augmentation_options {\n", 337 | " random_crop_image {\n", 338 | " min_object_covered: 0.0\n", 339 | " min_aspect_ratio: 0.75\n", 340 | " max_aspect_ratio: 3.0\n", 341 | " min_area: 0.75\n", 342 | " max_area: 1.0\n", 343 | " overlap_thresh: 0.0\n", 344 | " }\n", 345 | " }\n", 346 | " sync_replicas: true\n", 347 | " optimizer {\n", 348 | " momentum_optimizer {\n", 349 | " learning_rate {\n", 350 | " cosine_decay_learning_rate {\n", 351 | " learning_rate_base: 0.08\n", 352 | " total_steps: 50000\n", 353 | " warmup_learning_rate: 0.026666\n", 354 | " warmup_steps: 1000\n", 355 | " }\n", 356 | " }\n", 357 | " momentum_optimizer_value: 0.9\n", 358 | " }\n", 359 | " use_moving_average: false\n", 360 | " }\n", 361 | " fine_tune_checkpoint: \"Tensorflow/workspace/pre-trained-models/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0\"\n", 362 | " num_steps: 50000\n", 363 | " startup_delay_steps: 0.0\n", 364 | " replicas_to_aggregate: 8\n", 365 | " max_number_of_boxes: 100\n", 366 | " unpad_groundtruth_tensors: false\n", 367 | " fine_tune_checkpoint_type: \"detection\"\n", 368 | " fine_tune_checkpoint_version: V2, 'train_input_config': label_map_path: \"Tensorflow/workspace/annotations/label_map.pbtxt\"\n", 369 | " tf_record_input_reader {\n", 370 | " input_path: \"Tensorflow/workspace/annotations/train.record\"\n", 371 | " }, 'eval_config': metrics_set: \"coco_detection_metrics\"\n", 372 | " use_moving_averages: false, 'eval_input_configs': [label_map_path: \"Tensorflow/workspace/annotations/label_map.pbtxt\"\n", 373 | " shuffle: false\n", 374 | " num_epochs: 1\n", 375 | " tf_record_input_reader {\n", 376 | " input_path: \"Tensorflow/workspace/annotations/test.record\"\n", 377 | " }\n", 378 | " ], 'eval_input_config': label_map_path: \"Tensorflow/workspace/annotations/label_map.pbtxt\"\n", 379 | " shuffle: false\n", 380 | " num_epochs: 1\n", 381 | " tf_record_input_reader {\n", 382 | " input_path: \"Tensorflow/workspace/annotations/test.record\"\n", 383 | " }}" 384 | ] 385 | }, 386 | "execution_count": 56, 387 | "metadata": {}, 388 | "output_type": "execute_result" 389 | } 390 | ], 391 | "source": [ 392 | "config" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 52, 398 | "metadata": {}, 399 | "outputs": [], 400 | "source": [ 401 | "pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()\n", 402 | "with tf.io.gfile.GFile(CONFIG_PATH, \"r\") as f: \n", 403 | " proto_str = f.read() \n", 404 | " text_format.Merge(proto_str, pipeline_config) " 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": 53, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "pipeline_config.model.ssd.num_classes = 2\n", 414 | "pipeline_config.train_config.batch_size = 4\n", 415 | "pipeline_config.train_config.fine_tune_checkpoint = PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0'\n", 416 | "pipeline_config.train_config.fine_tune_checkpoint_type = \"detection\"\n", 417 | "pipeline_config.train_input_reader.label_map_path= ANNOTATION_PATH + '/label_map.pbtxt'\n", 418 | "pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/train.record']\n", 419 | "pipeline_config.eval_input_reader[0].label_map_path = ANNOTATION_PATH + '/label_map.pbtxt'\n", 420 | "pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/test.record']" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 54, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "config_text = text_format.MessageToString(pipeline_config) \n", 430 | "with tf.io.gfile.GFile(CONFIG_PATH, \"wb\") as f: \n", 431 | " f.write(config_text) " 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "# 6. Train the model" 439 | ] 440 | }, 441 | { 442 | "cell_type": "code", 443 | "execution_count": 58, 444 | "metadata": {}, 445 | "outputs": [ 446 | { 447 | "name": "stdout", 448 | "output_type": "stream", 449 | "text": [ 450 | "python Tensorflow/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/my_ssd_mobnet --pipeline_config_path=Tensorflow/workspace/models/my_ssd_mobnet/pipeline.config --num_train_steps=5000\n" 451 | ] 452 | } 453 | ], 454 | "source": [ 455 | "print(\"\"\"python {}/research/object_detection/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --num_train_steps=5000\"\"\".format(APIMODEL_PATH, MODEL_PATH,CUSTOM_MODEL_NAME,MODEL_PATH,CUSTOM_MODEL_NAME))" 456 | ] 457 | }, 458 | { 459 | "cell_type": "markdown", 460 | "metadata": {}, 461 | "source": [ 462 | "# 7. Load Train Model From Checkpoint" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 2, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [ 471 | "import os\n", 472 | "from object_detection.utils import label_map_util\n", 473 | "from object_detection.utils import visualization_utils as viz_utils\n", 474 | "from object_detection.builders import model_builder" 475 | ] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": 9, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [ 483 | "# Load pipeline config and build a detection model\n", 484 | "configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)\n", 485 | "detection_model = model_builder.build(model_config=configs['model'], is_training=False)\n", 486 | "\n", 487 | "# Restore checkpoint\n", 488 | "ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)\n", 489 | "ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-6')).expect_partial()\n", 490 | "\n", 491 | "@tf.function\n", 492 | "def detect_fn(image):\n", 493 | " image, shapes = detection_model.preprocess(image)\n", 494 | " prediction_dict = detection_model.predict(image, shapes)\n", 495 | " detections = detection_model.postprocess(prediction_dict, shapes)\n", 496 | " return detections" 497 | ] 498 | }, 499 | { 500 | "cell_type": "markdown", 501 | "metadata": {}, 502 | "source": [ 503 | "# 8. Detect in Real-Time" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 10, 509 | "metadata": {}, 510 | "outputs": [], 511 | "source": [ 512 | "import cv2 \n", 513 | "import numpy as np" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": 11, 519 | "metadata": {}, 520 | "outputs": [], 521 | "source": [ 522 | "category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": 105, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "cap.release()" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "execution_count": 12, 537 | "metadata": {}, 538 | "outputs": [], 539 | "source": [ 540 | "# Setup capture\n", 541 | "cap = cv2.VideoCapture(0)\n", 542 | "width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n", 543 | "height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": null, 549 | "metadata": {}, 550 | "outputs": [], 551 | "source": [ 552 | "while True: \n", 553 | " ret, frame = cap.read()\n", 554 | " image_np = np.array(frame)\n", 555 | " \n", 556 | " input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)\n", 557 | " detections = detect_fn(input_tensor)\n", 558 | " \n", 559 | " num_detections = int(detections.pop('num_detections'))\n", 560 | " detections = {key: value[0, :num_detections].numpy()\n", 561 | " for key, value in detections.items()}\n", 562 | " detections['num_detections'] = num_detections\n", 563 | "\n", 564 | " # detection_classes should be ints.\n", 565 | " detections['detection_classes'] = detections['detection_classes'].astype(np.int64)\n", 566 | "\n", 567 | " label_id_offset = 1\n", 568 | " image_np_with_detections = image_np.copy()\n", 569 | "\n", 570 | " viz_utils.visualize_boxes_and_labels_on_image_array(\n", 571 | " image_np_with_detections,\n", 572 | " detections['detection_boxes'],\n", 573 | " detections['detection_classes']+label_id_offset,\n", 574 | " detections['detection_scores'],\n", 575 | " category_index,\n", 576 | " use_normalized_coordinates=True,\n", 577 | " max_boxes_to_draw=5,\n", 578 | " min_score_thresh=.5,\n", 579 | " agnostic_mode=False)\n", 580 | "\n", 581 | " cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))\n", 582 | " \n", 583 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 584 | " cap.release()\n", 585 | " break" 586 | ] 587 | }, 588 | { 589 | "cell_type": "code", 590 | "execution_count": 42, 591 | "metadata": {}, 592 | "outputs": [], 593 | "source": [ 594 | "detections = detect_fn(input_tensor)" 595 | ] 596 | }, 597 | { 598 | "cell_type": "code", 599 | "execution_count": 67, 600 | "metadata": {}, 601 | "outputs": [], 602 | "source": [ 603 | "from matplotlib import pyplot as plt" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": null, 609 | "metadata": {}, 610 | "outputs": [], 611 | "source": [] 612 | } 613 | ], 614 | "metadata": { 615 | "kernelspec": { 616 | "display_name": "Python 3", 617 | "language": "python", 618 | "name": "python3" 619 | }, 620 | "language_info": { 621 | "codemirror_mode": { 622 | "name": "ipython", 623 | "version": 3 624 | }, 625 | "file_extension": ".py", 626 | "mimetype": "text/x-python", 627 | "name": "python", 628 | "nbconvert_exporter": "python", 629 | "pygments_lexer": "ipython3", 630 | "version": "3.7.4" 631 | } 632 | }, 633 | "nbformat": 4, 634 | "nbformat_minor": 2 635 | } 636 | --------------------------------------------------------------------------------