├── Object_detection_video.py
├── README.md
├── Town02.mp4
├── Town02_output.mp4
├── example_output.png
├── generate_tfrecord.py
├── train.py
├── training
    ├── faster_rcnn_inception_v2_coco.config
    └── labelmap.pbtxt
└── xml_to_csv.py


/Object_detection_video.py:
--------------------------------------------------------------------------------
  1 | ######## Video Object Detection Using Tensorflow-trained Classifier #########
  2 | #
  3 | # Author: Evan Juras
  4 | # Date: 1/16/18
  5 | # Description: 
  6 | # This program uses a TensorFlow-trained classifier to perform object detection.
  7 | # It loads the classifier uses it to perform object detection on a video.
  8 | # It draws boxes and scores around the objects of interest in each frame
  9 | # of the video.
 10 | 
 11 | ## Some of the code is copied from Google's example at
 12 | ## https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb
 13 | 
 14 | ## and some is copied from Dat Tran's example at
 15 | ## https://github.com/datitran/object_detector_app/blob/master/object_detection_app.py
 16 | 
 17 | ## but I changed it to make it more understandable to me.
 18 | 
 19 | # Import packages
 20 | import os
 21 | import cv2
 22 | import numpy as np
 23 | import tensorflow as tf
 24 | import sys
 25 | 
 26 | # This is needed since the notebook is stored in the object_detection folder.
 27 | sys.path.append("..")
 28 | 
 29 | # Import utilites
 30 | from utils import label_map_util
 31 | from utils import visualization_utils as vis_util
 32 | 
 33 | # Name of the directory containing the object detection module we're using
 34 | MODEL_NAME = 'inference_graph'
 35 | VIDEO_NAME = 'Town02.mp4'
 36 | 
 37 | # Grab path to current working directory
 38 | CWD_PATH = os.getcwd()
 39 | 
 40 | # Path to frozen detection graph .pb file, which contains the model that is used
 41 | # for object detection.
 42 | PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')
 43 | 
 44 | # Path to label map file
 45 | PATH_TO_LABELS = os.path.join(CWD_PATH,MODEL_NAME,'labelmap.pbtxt')
 46 | 
 47 | # Path to video
 48 | PATH_TO_VIDEO = os.path.join(CWD_PATH,VIDEO_NAME)
 49 | 
 50 | # Number of classes the object detector can identify
 51 | NUM_CLASSES = 5
 52 | 
 53 | # Load the label map.
 54 | # Label maps map indices to category names, so that when our convolution
 55 | # network predicts `5`, we know that this corresponds to `king`.
 56 | # Here we use internal utility functions, but anything that returns a
 57 | # dictionary mapping integers to appropriate string labels would be fine
 58 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
 59 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
 60 | category_index = label_map_util.create_category_index(categories)
 61 | 
 62 | # Load the Tensorflow model into memory.
 63 | detection_graph = tf.Graph()
 64 | with detection_graph.as_default():
 65 |     od_graph_def = tf.GraphDef()
 66 |     with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
 67 |         serialized_graph = fid.read()
 68 |         od_graph_def.ParseFromString(serialized_graph)
 69 |         tf.import_graph_def(od_graph_def, name='')
 70 | 
 71 |     sess = tf.Session(graph=detection_graph)
 72 | 
 73 | # Define input and output tensors (i.e. data) for the object detection classifier
 74 | 
 75 | # Input tensor is the image
 76 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
 77 | 
 78 | # Output tensors are the detection boxes, scores, and classes
 79 | # Each box represents a part of the image where a particular object was detected
 80 | detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
 81 | 
 82 | # Each score represents level of confidence for each of the objects.
 83 | # The score is shown on the result image, together with the class label.
 84 | detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
 85 | detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
 86 | 
 87 | # Number of objects detected
 88 | num_detections = detection_graph.get_tensor_by_name('num_detections:0')
 89 | 
 90 | # Open video file
 91 | video = cv2.VideoCapture(PATH_TO_VIDEO)
 92 | 
 93 | while(video.isOpened()):
 94 | 
 95 |     # Acquire frame and expand frame dimensions to have shape: [1, None, None, 3]
 96 |     # i.e. a single-column array, where each item in the column has the pixel RGB value
 97 |     ret, frame = video.read()
 98 |     frame_expanded = np.expand_dims(frame, axis=0)
 99 | 
100 |     # Perform the actual detection by running the model with the image as input
101 |     (boxes, scores, classes, num) = sess.run(
102 |         [detection_boxes, detection_scores, detection_classes, num_detections],
103 |         feed_dict={image_tensor: frame_expanded})
104 | 
105 |     # Draw the results of the detection (aka 'visulaize the results')
106 |     vis_util.visualize_boxes_and_labels_on_image_array(
107 |         frame,
108 |         np.squeeze(boxes),
109 |         np.squeeze(classes).astype(np.int32),
110 |         np.squeeze(scores),
111 |         category_index,
112 |         use_normalized_coordinates=True,
113 |         line_thickness=2,
114 |         min_score_thresh=0.85)
115 | 
116 |     # All the results have been drawn on the frame, so it's time to display it.
117 |     cv2.imshow('Object detector', frame)
118 | 
119 |     # Press 'q' to quit
120 |     if cv2.waitKey(1) == ord('q'):
121 |         break
122 | 
123 | # Clean up
124 | video.release()
125 | cv2.destroyAllWindows()
126 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Tensorflow-Carla-Object-Detection
 2 | 
 3 | With this repo you can train your own object detection classifier with the tensorflow object detection api. I use it to detect several objects in the Carla simulator. For this purpose I created my own dataset which can be downloaded from my other github repository. This is tested for Ubuntu 16.04 but should also work under windows and other linux distributions. 
 4 | 
 5 | 
 6 | #### Youtube demonstration video of the trained classifier:
 7 | 
 8 | [![Demonstration video](https://img.youtube.com/vi/08zke4oY7JE/0.jpg)](https://youtu.be/08zke4oY7JE)
 9 | 
10 | 
11 | A more detailed instruction on how to train this object detection classifier can be found under: [Link](https://github.com/EdjeElectronics/TensorFlow-Object-Detection-API-Tutorial-Train-Multiple-Objects-Windows-10)
12 | 
13 | #### Basic steps to follow:
14 | 
15 | 1. #### Install Tensorflow GPU support
16 | 
17 |    Go to the Tensorflow website and follow the step described [here](https://www.tensorflow.org/install). You will also need to install CUDA and cuDNN which is also described on the website.
18 | 
19 | 2. #### Download Tensorflow object detection api 
20 | 
21 |    A detailed description can be found [here](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md).
22 | 
23 | 3. #### Download this repository 
24 | 
25 |    Clone this repository into the ./tensorflow/models/research/object_detection/ folder.
26 | 
27 | 4. #### Download the a pretrained model from TensorFlow's model zoo 
28 | 
29 |    To train the model you will need to use a pretrained model, otherwise training would consume to much time. I my case I used the Faster-RCNN-Inception-V2-COCO model and downloaded it from [Tensorflow's model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md). Extract the pretrained model into the ./tensorflow/models/research/object_detection/ folder. 
30 | 
31 | 5. #### Download dataset and copy files 
32 | 
33 |    Download the dataset from my other github repo and extract the images into the "images" of this repository e.g. ./tensorflow/models/research/object_detection/Carla_object_detection/images/
34 | 
35 | 6. #### Generate training data 
36 | 
37 | 7. #### Configure training  
38 | 
39 | 8. #### Run training  
40 | 
41 | 9. #### Export inference graph 
42 | 
43 | 10. #### Use the trained classifier 
44 | 


--------------------------------------------------------------------------------
/Town02.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielHfnr/Tensorflow-Carla-Object-Detection/fa7d5fbac12681fa8a211cdb553158ef9c5d1967/Town02.mp4


--------------------------------------------------------------------------------
/Town02_output.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielHfnr/Tensorflow-Carla-Object-Detection/fa7d5fbac12681fa8a211cdb553158ef9c5d1967/Town02_output.mp4


--------------------------------------------------------------------------------
/example_output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DanielHfnr/Tensorflow-Carla-Object-Detection/fa7d5fbac12681fa8a211cdb553158ef9c5d1967/example_output.png


--------------------------------------------------------------------------------
/generate_tfrecord.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Usage:
  3 |   # From tensorflow/models/
  4 |   # Create train data:
  5 |   python generate_tfrecord.py --csv_input=images/train_labels.csv --image_dir=images/train --output_path=train.record
  6 | 
  7 |   # Create test data:
  8 |   python generate_tfrecord.py --csv_input=images/test_labels.csv  --image_dir=images/test --output_path=test.record
  9 | """
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | from __future__ import absolute_import
 13 | 
 14 | import os
 15 | import io
 16 | import pandas as pd
 17 | import tensorflow as tf
 18 | 
 19 | from PIL import Image
 20 | from object_detection.utils import dataset_util
 21 | from collections import namedtuple, OrderedDict
 22 | 
 23 | flags = tf.app.flags
 24 | flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
 25 | flags.DEFINE_string('image_dir', '', 'Path to the image directory')
 26 | flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
 27 | FLAGS = flags.FLAGS
 28 | 
 29 | 
 30 | # TO-DO replace this with label map
 31 | def class_text_to_int(row_label):
 32 |     if row_label == 'vehicle':
 33 |         return 1
 34 |     elif row_label == 'traffic_light':
 35 |         return 2
 36 |     elif row_label == 'traffic_sign':
 37 |         return 3
 38 |     elif row_label == 'bike':
 39 |         return 4
 40 |     elif row_label == 'motobike':
 41 |         return 5
 42 |     else:
 43 |         None
 44 | 
 45 | 
 46 | def split(df, group):
 47 |     data = namedtuple('data', ['filename', 'object'])
 48 |     gb = df.groupby(group)
 49 |     return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
 50 | 
 51 | 
 52 | def create_tf_example(group, path):
 53 |     with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
 54 |         encoded_jpg = fid.read()
 55 |     encoded_jpg_io = io.BytesIO(encoded_jpg)
 56 |     image = Image.open(encoded_jpg_io)
 57 |     width, height = image.size
 58 | 
 59 |     filename = group.filename.encode('utf8')
 60 |     image_format = b'jpg'
 61 |     xmins = []
 62 |     xmaxs = []
 63 |     ymins = []
 64 |     ymaxs = []
 65 |     classes_text = []
 66 |     classes = []
 67 | 
 68 |     for index, row in group.object.iterrows():
 69 |         xmins.append(row['xmin'] / width)
 70 |         xmaxs.append(row['xmax'] / width)
 71 |         ymins.append(row['ymin'] / height)
 72 |         ymaxs.append(row['ymax'] / height)
 73 |         classes_text.append(row['class'].encode('utf8'))
 74 |         classes.append(class_text_to_int(row['class']))
 75 | 
 76 |     tf_example = tf.train.Example(features=tf.train.Features(feature={
 77 |         'image/height': dataset_util.int64_feature(height),
 78 |         'image/width': dataset_util.int64_feature(width),
 79 |         'image/filename': dataset_util.bytes_feature(filename),
 80 |         'image/source_id': dataset_util.bytes_feature(filename),
 81 |         'image/encoded': dataset_util.bytes_feature(encoded_jpg),
 82 |         'image/format': dataset_util.bytes_feature(image_format),
 83 |         'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
 84 |         'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
 85 |         'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
 86 |         'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
 87 |         'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
 88 |         'image/object/class/label': dataset_util.int64_list_feature(classes),
 89 |     }))
 90 |     return tf_example
 91 | 
 92 | 
 93 | def main(_):
 94 |     writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
 95 |     path = os.path.join(os.getcwd(), FLAGS.image_dir)
 96 |     examples = pd.read_csv(FLAGS.csv_input)
 97 |     grouped = split(examples, 'filename')
 98 |     for group in grouped:
 99 |         tf_example = create_tf_example(group, path)
100 |         writer.write(tf_example.SerializeToString())
101 | 
102 |     writer.close()
103 |     output_path = os.path.join(os.getcwd(), FLAGS.output_path)
104 |     print('Successfully created the TFRecords: {}'.format(output_path))
105 | 
106 | 
107 | if __name__ == '__main__':
108 |     tf.app.run()
109 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | 
 16 | r"""Training executable for detection models.
 17 | 
 18 | This executable is used to train DetectionModels. There are two ways of
 19 | configuring the training job:
 20 | 
 21 | 1) A single pipeline_pb2.TrainEvalPipelineConfig configuration file
 22 | can be specified by --pipeline_config_path.
 23 | 
 24 | Example usage:
 25 |     ./train \
 26 |         --logtostderr \
 27 |         --train_dir=path/to/train_dir \
 28 |         --pipeline_config_path=pipeline_config.pbtxt
 29 | 
 30 | 2) Three configuration files can be provided: a model_pb2.DetectionModel
 31 | configuration file to define what type of DetectionModel is being trained, an
 32 | input_reader_pb2.InputReader file to specify what training data will be used and
 33 | a train_pb2.TrainConfig file to configure training parameters.
 34 | 
 35 | Example usage:
 36 |     ./train \
 37 |         --logtostderr \
 38 |         --train_dir=path/to/train_dir \
 39 |         --model_config_path=model_config.pbtxt \
 40 |         --train_config_path=train_config.pbtxt \
 41 |         --input_config_path=train_input_config.pbtxt
 42 | """
 43 | 
 44 | import functools
 45 | import json
 46 | import os
 47 | import tensorflow as tf
 48 | 
 49 | from object_detection.builders import dataset_builder
 50 | from object_detection.builders import graph_rewriter_builder
 51 | from object_detection.builders import model_builder
 52 | from object_detection.legacy import trainer
 53 | from object_detection.utils import config_util
 54 | 
 55 | tf.logging.set_verbosity(tf.logging.INFO)
 56 | 
 57 | flags = tf.app.flags
 58 | flags.DEFINE_string('master', '', 'Name of the TensorFlow master to use.')
 59 | flags.DEFINE_integer('task', 0, 'task id')
 60 | flags.DEFINE_integer('num_clones', 1, 'Number of clones to deploy per worker.')
 61 | flags.DEFINE_boolean('clone_on_cpu', False,
 62 |                      'Force clones to be deployed on CPU.  Note that even if '
 63 |                      'set to False (allowing ops to run on gpu), some ops may '
 64 |                      'still be run on the CPU if they have no GPU kernel.')
 65 | flags.DEFINE_integer('worker_replicas', 1, 'Number of worker+trainer '
 66 |                      'replicas.')
 67 | flags.DEFINE_integer('ps_tasks', 0,
 68 |                      'Number of parameter server tasks. If None, does not use '
 69 |                      'a parameter server.')
 70 | flags.DEFINE_string('train_dir', '',
 71 |                     'Directory to save the checkpoints and training summaries.')
 72 | 
 73 | flags.DEFINE_string('pipeline_config_path', '',
 74 |                     'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
 75 |                     'file. If provided, other configs are ignored')
 76 | 
 77 | flags.DEFINE_string('train_config_path', '',
 78 |                     'Path to a train_pb2.TrainConfig config file.')
 79 | flags.DEFINE_string('input_config_path', '',
 80 |                     'Path to an input_reader_pb2.InputReader config file.')
 81 | flags.DEFINE_string('model_config_path', '',
 82 |                     'Path to a model_pb2.DetectionModel config file.')
 83 | 
 84 | FLAGS = flags.FLAGS
 85 | 
 86 | 
 87 | @tf.contrib.framework.deprecated(None, 'Use object_detection/model_main.py.')
 88 | def main(_):
 89 |   assert FLAGS.train_dir, '`train_dir` is missing.'
 90 |   if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir)
 91 |   if FLAGS.pipeline_config_path:
 92 |     configs = config_util.get_configs_from_pipeline_file(
 93 |         FLAGS.pipeline_config_path)
 94 |     if FLAGS.task == 0:
 95 |       tf.gfile.Copy(FLAGS.pipeline_config_path,
 96 |                     os.path.join(FLAGS.train_dir, 'pipeline.config'),
 97 |                     overwrite=True)
 98 |   else:
 99 |     configs = config_util.get_configs_from_multiple_files(
100 |         model_config_path=FLAGS.model_config_path,
101 |         train_config_path=FLAGS.train_config_path,
102 |         train_input_config_path=FLAGS.input_config_path)
103 |     if FLAGS.task == 0:
104 |       for name, config in [('model.config', FLAGS.model_config_path),
105 |                            ('train.config', FLAGS.train_config_path),
106 |                            ('input.config', FLAGS.input_config_path)]:
107 |         tf.gfile.Copy(config, os.path.join(FLAGS.train_dir, name),
108 |                       overwrite=True)
109 | 
110 |   model_config = configs['model']
111 |   train_config = configs['train_config']
112 |   input_config = configs['train_input_config']
113 | 
114 |   model_fn = functools.partial(
115 |       model_builder.build,
116 |       model_config=model_config,
117 |       is_training=True)
118 | 
119 |   def get_next(config):
120 |     return dataset_builder.make_initializable_iterator(
121 |         dataset_builder.build(config)).get_next()
122 | 
123 |   create_input_dict_fn = functools.partial(get_next, input_config)
124 | 
125 |   env = json.loads(os.environ.get('TF_CONFIG', '{}'))
126 |   cluster_data = env.get('cluster', None)
127 |   cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
128 |   task_data = env.get('task', None) or {'type': 'master', 'index': 0}
129 |   task_info = type('TaskSpec', (object,), task_data)
130 | 
131 |   # Parameters for a single worker.
132 |   ps_tasks = 0
133 |   worker_replicas = 1
134 |   worker_job_name = 'lonely_worker'
135 |   task = 0
136 |   is_chief = True
137 |   master = ''
138 | 
139 |   if cluster_data and 'worker' in cluster_data:
140 |     # Number of total worker replicas include "worker"s and the "master".
141 |     worker_replicas = len(cluster_data['worker']) + 1
142 |   if cluster_data and 'ps' in cluster_data:
143 |     ps_tasks = len(cluster_data['ps'])
144 | 
145 |   if worker_replicas > 1 and ps_tasks < 1:
146 |     raise ValueError('At least 1 ps task is needed for distributed training.')
147 | 
148 |   if worker_replicas >= 1 and ps_tasks > 0:
149 |     # Set up distributed training.
150 |     server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
151 |                              job_name=task_info.type,
152 |                              task_index=task_info.index)
153 |     if task_info.type == 'ps':
154 |       server.join()
155 |       return
156 | 
157 |     worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
158 |     task = task_info.index
159 |     is_chief = (task_info.type == 'master')
160 |     master = server.target
161 | 
162 |   graph_rewriter_fn = None
163 |   if 'graph_rewriter_config' in configs:
164 |     graph_rewriter_fn = graph_rewriter_builder.build(
165 |         configs['graph_rewriter_config'], is_training=True)
166 | 
167 |   trainer.train(
168 |       create_input_dict_fn,
169 |       model_fn,
170 |       train_config,
171 |       master,
172 |       task,
173 |       FLAGS.num_clones,
174 |       worker_replicas,
175 |       FLAGS.clone_on_cpu,
176 |       ps_tasks,
177 |       worker_job_name,
178 |       is_chief,
179 |       FLAGS.train_dir,
180 |       graph_hook_fn=graph_rewriter_fn)
181 | 
182 | 
183 | if __name__ == '__main__':
184 |   tf.app.run()
185 | 


--------------------------------------------------------------------------------
/training/faster_rcnn_inception_v2_coco.config:
--------------------------------------------------------------------------------
  1 | # Faster R-CNN with Inception v2, configuration for MSCOCO Dataset.
  2 | # Users should configure the fine_tune_checkpoint field in the train config as
  3 | # well as the label_map_path and input_path fields in the train_input_reader and
  4 | # eval_input_reader. Search for "PATH_TO_BE_CONFIGURED" to find the fields that
  5 | # should be configured.
  6 | 
  7 | 
  8 | model {
  9 |   faster_rcnn {
 10 |     num_classes: 5
 11 |     image_resizer {
 12 |       keep_aspect_ratio_resizer {
 13 |         min_dimension: 600
 14 |         max_dimension: 1024
 15 |       }
 16 |     }
 17 |     feature_extractor {
 18 |       type: 'faster_rcnn_inception_v2'
 19 |       first_stage_features_stride: 16
 20 |     }
 21 |     first_stage_anchor_generator {
 22 |       grid_anchor_generator {
 23 |         scales: [0.25, 0.5, 1.0, 2.0]
 24 |         aspect_ratios: [0.5, 1.0, 2.0]
 25 |         height_stride: 16
 26 |         width_stride: 16
 27 |       }
 28 |     }
 29 |     first_stage_box_predictor_conv_hyperparams {
 30 |       op: CONV
 31 |       regularizer {
 32 |         l2_regularizer {
 33 |           weight: 0.0
 34 |         }
 35 |       }
 36 |       initializer {
 37 |         truncated_normal_initializer {
 38 |           stddev: 0.01
 39 |         }
 40 |       }
 41 |     }
 42 |     first_stage_nms_score_threshold: 0.0
 43 |     first_stage_nms_iou_threshold: 0.7
 44 |     first_stage_max_proposals: 300
 45 |     first_stage_localization_loss_weight: 2.0
 46 |     first_stage_objectness_loss_weight: 1.0
 47 |     initial_crop_size: 14
 48 |     maxpool_kernel_size: 2
 49 |     maxpool_stride: 2
 50 |     second_stage_box_predictor {
 51 |       mask_rcnn_box_predictor {
 52 |         use_dropout: false
 53 |         dropout_keep_probability: 1.0
 54 |         fc_hyperparams {
 55 |           op: FC
 56 |           regularizer {
 57 |             l2_regularizer {
 58 |               weight: 0.0
 59 |             }
 60 |           }
 61 |           initializer {
 62 |             variance_scaling_initializer {
 63 |               factor: 1.0
 64 |               uniform: true
 65 |               mode: FAN_AVG
 66 |             }
 67 |           }
 68 |         }
 69 |       }
 70 |     }
 71 |     second_stage_post_processing {
 72 |       batch_non_max_suppression {
 73 |         score_threshold: 0.0
 74 |         iou_threshold: 0.6
 75 |         max_detections_per_class: 100
 76 |         max_total_detections: 300
 77 |       }
 78 |       score_converter: SOFTMAX
 79 |     }
 80 |     second_stage_localization_loss_weight: 2.0
 81 |     second_stage_classification_loss_weight: 1.0
 82 |   }
 83 | }
 84 | 
 85 | train_config: {
 86 |   batch_size: 1
 87 |   optimizer {
 88 |     momentum_optimizer: {
 89 |       learning_rate: {
 90 |         manual_step_learning_rate {
 91 |           initial_learning_rate: 0.0002
 92 |           schedule {
 93 |             step: 900000
 94 |             learning_rate: .00002
 95 |           }
 96 |           schedule {
 97 |             step: 1200000
 98 |             learning_rate: .000002
 99 |           }
100 |         }
101 |       }
102 |       momentum_optimizer_value: 0.9
103 |     }
104 |     use_moving_average: false
105 |   }
106 |   gradient_clipping_by_norm: 10.0
107 |   fine_tune_checkpoint: "faster_rcnn_inception_v2_coco_2018_01_28/model.ckpt"
108 |   from_detection_checkpoint: true
109 |   # Note: The below line limits the training process to 200K steps, which we
110 |   # empirically found to be sufficient enough to train the COCO dataset. This
111 |   # effectively bypasses the learning rate schedule (the learning rate will
112 |   # never decay). Remove the below line to train indefinitely.
113 |   num_steps: 200000
114 |   data_augmentation_options {
115 |     random_horizontal_flip {
116 |     }
117 |   }
118 | }
119 | 
120 | train_input_reader: {
121 |   tf_record_input_reader {
122 |     input_path: "train.record"
123 |   }
124 |   label_map_path: "labelmap.pbtxt"
125 | }
126 | 
127 | eval_config: {
128 |   num_examples: 208
129 |   # Note: The below line limits the evaluation process to 10 evaluations.
130 |   # Remove the below line to evaluate indefinitely.
131 |   max_evals: 10
132 | }
133 | 
134 | eval_input_reader: {
135 |   tf_record_input_reader {
136 |     input_path: "test.record"
137 |   }
138 |   label_map_path: "labelmap.pbtxt"
139 |   shuffle: true
140 |   num_readers: 1
141 | }
142 | 


--------------------------------------------------------------------------------
/training/labelmap.pbtxt:
--------------------------------------------------------------------------------
 1 | item {
 2 |   id: 1
 3 |   name: 'vehicle'
 4 | }
 5 | 
 6 | item {
 7 |   id: 2
 8 |   name: 'traffic_light'
 9 | }
10 | 
11 | item {
12 |   id: 3
13 |   name: 'traffic_sign'
14 | }
15 | 
16 | item {
17 |   id: 4
18 |   name: 'bike'
19 | }
20 | 
21 | item {
22 |   id: 5
23 |   name: 'motobike'
24 | }
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/xml_to_csv.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import pandas as pd
 4 | import xml.etree.ElementTree as ET
 5 | 
 6 | 
 7 | def xml_to_csv(path):
 8 |     xml_list = []
 9 |     for xml_file in glob.glob(path + '/*.xml'):
10 |         tree = ET.parse(xml_file)
11 |         root = tree.getroot()
12 |         for member in root.findall('object'):
13 |             value = (root.find('filename').text,
14 |                      int(root.find('size')[0].text),
15 |                      int(root.find('size')[1].text),
16 |                      member[0].text,
17 |                      int(member[4][0].text),
18 |                      int(member[4][1].text),
19 |                      int(member[4][2].text),
20 |                      int(member[4][3].text)
21 |                      )
22 |             xml_list.append(value)
23 |     column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
24 |     xml_df = pd.DataFrame(xml_list, columns=column_name)
25 |     return xml_df
26 | 
27 | 
28 | def main():
29 | 
30 |     for folder in ['train','test']:
31 |         image_path = os.path.join(os.getcwd(), ('images/' + folder))
32 |         xml_df = xml_to_csv(image_path)
33 |         xml_df.to_csv(('images/' + folder + '_labels.csv'), index=None)
34 |         print('Successfully converted xml to csv.')
35 | 
36 | 
37 | main()
38 | 


--------------------------------------------------------------------------------