├── tensorflow
    └── object_detection
    │   ├── scripts
    │       ├── xml_to_csv.py
    │       ├── configure_training.py
    │       └── generate_tfrecord.py
    │   ├── video_recording_opencv.ipynb
    │   ├── webcam_detection_custom.ipynb
    │   ├── webcam_detection_custom-ip_camera.ipynb
    │   ├── webcam_detection_non_customized.ipynb
    │   └── object_detection_api_test.ipynb
└── opencv
    └── yolov4_webcam.py


/tensorflow/object_detection/scripts/xml_to_csv.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | import glob
 3 | import pandas as pd
 4 | import xml.etree.ElementTree as ET
 5 | import argparse
 6 | 
 7 | 
 8 | def xml_to_csv(path):
 9 |     xml_list = []
10 |     for xml_file in glob.glob(path + '/*.xml'):
11 |         tree = ET.parse(xml_file)
12 |         root = tree.getroot()
13 |         for member in root.findall('object'):
14 |             value = (root.find('filename').text,
15 |                      int(root.find('size')[0].text),
16 |                      int(root.find('size')[1].text),
17 |                      member[0].text,
18 |                      int(member[4][0].text),
19 |                      int(member[4][1].text),
20 |                      int(member[4][2].text),
21 |                      int(member[4][3].text)
22 |                      )
23 |             xml_list.append(value)
24 |     column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
25 |     xml_df = pd.DataFrame(xml_list, columns=column_name)
26 |     return xml_df
27 | 
28 | 
29 | def main():
30 |     
31 |     parser = argparse.ArgumentParser()
32 |     parser.add_argument('-imageRoot',help='the folder containing the training images in test and train folders',type=str,required=True)
33 |     args = parser.parse_args()
34 |     
35 |     print (args.imageRoot)
36 |     
37 |     for folder in ['train','test']:
38 |         image_path = os.path.join(args.imageRoot, folder )
39 |         xml_df = xml_to_csv(image_path)
40 |         xml_df.to_csv(os.path.join(args.imageRoot, folder + '_labels.csv'), index=None)
41 |     print('Successfully converted xml to csv.')
42 | 
43 | 
44 | if __name__ == '__main__':
45 |     sys.exit(main())
46 | 


--------------------------------------------------------------------------------
/tensorflow/object_detection/scripts/configure_training.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import shlex
 3 | import argparse
 4 | import sys
 5 | 
 6 | def getLabelMap(labelMapAsStr):
 7 |     labelMap = {}
 8 |     for labelValue in [x for x in labelMapAsStr.split(',')]:
 9 |         labelMap[labelValue.split(':')[0]] = int(labelValue.split(':')[1])
10 |     return labelMap
11 | 
12 | def main():
13 |     
14 |     parser = argparse.ArgumentParser()
15 |     parser.add_argument('-imageRoot',help='the folder containing the training images in test and train folders',type=str,required=True)
16 |     parser.add_argument('-labelMap',help='Comma separated label map, e.g. label1:value1,label2:value2',type=str,required=True)
17 |     parser.add_argument('-labelMapOutputFile',help='The file where the .pbtxt file will be generated based on the labelMap',type=str,required=True)
18 |     args = parser.parse_args()
19 |     
20 |     subprocess.call(shlex.split(r'python xml_to_csv.py -imageRoot %s' % args.imageRoot), shell=False)
21 |     
22 |     for imageSet in ['train','test']:
23 |         subprocess.call(shlex.split(r'python generate_tfrecord.py --csv_input={imageRoot}/{imageSet}_labels.csv --image_dir={imageRoot}/{imageSet} --output_path={imageRoot}/{imageSet}.record --label_map={labelMap}'.format(imageRoot=args.imageRoot, imageSet=imageSet, labelMap=args.labelMap)), shell=False)
24 |     
25 |     labelDict = getLabelMap(args.labelMap)
26 |     
27 |     with open(args.labelMapOutputFile,'wt') as outFile:
28 |         for label, id in labelDict.items():
29 |             outFile.write('item {\n')
30 |             outFile.write("\tid: %s\n" % id)
31 |             outFile.write("\tname: '%s'\n" % label)
32 |             outFile.write('}\n')
33 |             
34 |     
35 | 
36 | if __name__ == '__main__':
37 |     sys.exit(main())
38 | 
39 | 


--------------------------------------------------------------------------------
/tensorflow/object_detection/video_recording_opencv.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 14,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "import cv2\n",
10 |     "import numpy as np\n",
11 |     "import time\n",
12 |     "\n",
13 |     "# remember to use your own path and credentials\n",
14 |     "cap = cv2.VideoCapture('http://192.168.0.137:81/videostream.cgi?loginuse=&loginpas=&resolution=32')\n",
15 |     "cap.set(cv2.CAP_PROP_BUFFERSIZE,1)\n",
16 |     "#print(cv2.getBuildInformation())\n",
17 |     "\n",
18 |     "if (cap.isOpened() == False): \n",
19 |     "    print (\"Unable to read camera feed\")\n",
20 |     "\n",
21 |     "frame_width = int(cap.get(3))\n",
22 |     "frame_height = int(cap.get(4))\n",
23 |     "\n",
24 |     "out = cv2.VideoWriter('recording.mp4',cv2.VideoWriter_fourcc(*'MP4V'), 20.0, (frame_width,frame_height))\n",
25 |     "\n",
26 |     "MAX_SECS = 60 * 60\n",
27 |     "\n",
28 |     "startTime = time.time()\n",
29 |     "\n",
30 |     "while True:\n",
31 |     "    currentTime = time.time()\n",
32 |     "\n",
33 |     "    if currentTime - startTime > MAX_SECS:\n",
34 |     "        break\n",
35 |     "\n",
36 |     "    ret, frame = cap.read()\n",
37 |     "\n",
38 |     "    if ret == True: \n",
39 |     "        out.write(frame)\n",
40 |     "        cv2.imshow('frame',frame)\n",
41 |     "\n",
42 |     "    # Press q on keyboard to stop recording\n",
43 |     "    if cv2.waitKey(1) & 0xFF == ord('q'):\n",
44 |     "        break\n",
45 |     "\n",
46 |     "    else:\n",
47 |     "        break  \n",
48 |     "\n",
49 |     "cap.release()\n",
50 |     "out.release()\n",
51 |     "\n",
52 |     "cv2.destroyAllWindows() "
53 |    ]
54 |   }
55 |  ],
56 |  "metadata": {
57 |   "kernelspec": {
58 |    "display_name": "Python 3",
59 |    "language": "python",
60 |    "name": "python3"
61 |   },
62 |   "language_info": {
63 |    "codemirror_mode": {
64 |     "name": "ipython",
65 |     "version": 3
66 |    },
67 |    "file_extension": ".py",
68 |    "mimetype": "text/x-python",
69 |    "name": "python",
70 |    "nbconvert_exporter": "python",
71 |    "pygments_lexer": "ipython3",
72 |    "version": "3.6.5"
73 |   }
74 |  },
75 |  "nbformat": 4,
76 |  "nbformat_minor": 4
77 | }
78 | 


--------------------------------------------------------------------------------
/tensorflow/object_detection/scripts/generate_tfrecord.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Usage:
  3 |   # From tensorflow/models/
  4 |   # Create train data:
  5 |   python generate_tfrecord.py --csv_input=data/train_labels.csv  --output_path=train.record
  6 | 
  7 |   # Create test data:
  8 |   python generate_tfrecord.py --csv_input=data/test_labels.csv  --output_path=test.record
  9 | """
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | from __future__ import absolute_import
 13 | 
 14 | import os
 15 | import io
 16 | import pandas as pd
 17 | import tensorflow.compat.v1 as tf
 18 | 
 19 | from PIL import Image
 20 | from object_detection.utils import dataset_util
 21 | from collections import namedtuple, OrderedDict
 22 | 
 23 | flags = tf.app.flags
 24 | flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
 25 | flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
 26 | flags.DEFINE_string('image_dir', '', 'Path to images')
 27 | flags.DEFINE_string('label_map', '', 'Comma separated label map, e.g. label1:value1,label2:value2')
 28 | FLAGS = flags.FLAGS
 29 | 
 30 | def split(df, group):
 31 |     data = namedtuple('data', ['filename', 'object'])
 32 |     gb = df.groupby(group)
 33 |     return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
 34 | 
 35 | 
 36 | def create_tf_example(group, path, labelMap):
 37 |     with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
 38 |         encoded_jpg = fid.read()
 39 |     encoded_jpg_io = io.BytesIO(encoded_jpg)
 40 |     image = Image.open(encoded_jpg_io)
 41 |     width, height = image.size
 42 | 
 43 |     filename = group.filename.encode('utf8')
 44 |     image_format = b'jpg'
 45 |     xmins = []
 46 |     xmaxs = []
 47 |     ymins = []
 48 |     ymaxs = []
 49 |     classes_text = []
 50 |     classes = []
 51 | 
 52 |     for index, row in group.object.iterrows():
 53 |         xmins.append(row['xmin'] / width)
 54 |         xmaxs.append(row['xmax'] / width)
 55 |         ymins.append(row['ymin'] / height)
 56 |         ymaxs.append(row['ymax'] / height)
 57 |         classes_text.append(row['class'].encode('utf8'))
 58 |         classes.append(labelMap[row['class']])
 59 | 
 60 |     tf_example = tf.train.Example(features=tf.train.Features(feature={
 61 |         'image/height': dataset_util.int64_feature(height),
 62 |         'image/width': dataset_util.int64_feature(width),
 63 |         'image/filename': dataset_util.bytes_feature(filename),
 64 |         'image/source_id': dataset_util.bytes_feature(filename),
 65 |         'image/encoded': dataset_util.bytes_feature(encoded_jpg),
 66 |         'image/format': dataset_util.bytes_feature(image_format),
 67 |         'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
 68 |         'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
 69 |         'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
 70 |         'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
 71 |         'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
 72 |         'image/object/class/label': dataset_util.int64_list_feature(classes),
 73 |     }))
 74 |     return tf_example
 75 |     
 76 | def getLabelMap(labelMapAsStr):
 77 |     labelMap = {}
 78 |     for labelValue in [x for x in labelMapAsStr.split(',')]:
 79 |         labelMap[labelValue.split(':')[0]] = int(labelValue.split(':')[1])
 80 |     return labelMap
 81 | 
 82 | 
 83 | def main(_):
 84 |     writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
 85 |     path = os.path.join(FLAGS.image_dir)
 86 |     examples = pd.read_csv(FLAGS.csv_input)
 87 |     label_map = getLabelMap(FLAGS.label_map)
 88 |     print(str(label_map))
 89 |     grouped = split(examples, 'filename')
 90 |     for group in grouped:
 91 |         tf_example = create_tf_example(group, path, label_map)
 92 |         writer.write(tf_example.SerializeToString())
 93 | 
 94 |     writer.close()
 95 |     output_path = os.path.join(os.getcwd(), FLAGS.output_path)
 96 |     print('Successfully created the TFRecords: {}'.format(output_path))
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     tf.app.run()
101 | 


--------------------------------------------------------------------------------
/opencv/yolov4_webcam.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import imutils
  3 | import numpy as np
  4 | import random
  5 | import colorsys
  6 | 
  7 | # inspiration and some code pieces were copied from https://github.com/haroonshakeel/yolo_get_preds/blob/master/my_utils.py
  8 | 
  9 | def get_random_bright_colors(size):
 10 |     for i in range(0,size-1):
 11 |         h,s,l = random.random(), 0.5 + random.random()/2.0, 0.4 + random.random()/5.0
 12 |         r,g,b = [int(256*i) for i in colorsys.hls_to_rgb(h,l,s)]
 13 |         yield (r,g,b)
 14 | 
 15 | 
 16 | def get_yolo_preds(net, video_url, confidence_threshold, overlapping_threshold, labels = None, frame_resize_width=None):
 17 | 
 18 |     # List of colors to represent each class label with distinct bright color
 19 |     colors = list(get_random_bright_colors(len(labels)))
 20 | 
 21 |     ln = net.getLayerNames()
 22 |     ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
 23 |     cap = cv2.VideoCapture(video_url)
 24 | 
 25 |     try:
 26 |         if not cap.isOpened():
 27 |             print("Error opening video stream or file")
 28 |             return
 29 | 
 30 |         yolo_width_height = (416, 416)
 31 | 
 32 |         counter = 0
 33 |         max_count = 0
 34 | 
 35 |         while True:
 36 |             (_, frame) = cap.read()
 37 | 
 38 |             counter += 1 
 39 |             
 40 |             if frame_resize_width:
 41 |                 frame = imutils.resize(frame, width=frame_resize_width)
 42 |             (H, W) = frame.shape[:2]
 43 | 
 44 |             # Construct blob of frames by standardization, resizing, and swapping Red and Blue channels (RBG to RGB)
 45 |             blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, yolo_width_height, swapRB=True, crop=False)
 46 |             net.setInput(blob)
 47 |             layerOutputs = net.forward(ln)
 48 |             boxes = []
 49 |             confidences = []
 50 |             classIDs = []
 51 |             for output in layerOutputs:
 52 |                 for detection in output:
 53 |                     scores = detection[5:]
 54 |                     classID = np.argmax(scores)
 55 |                     confidence = scores[classID]
 56 |                     if confidence > confidence_threshold:
 57 |                         # Scale the bboxes back to the original image size
 58 |                         box = detection[0:4] * np.array([W, H, W, H])
 59 |                         (centerX, centerY, width, height) = box.astype("int")
 60 |                         x = int(centerX - (width / 2))
 61 |                         y = int(centerY - (height / 2))
 62 |                         boxes.append([x, y, int(width), int(height)])
 63 |                         confidences.append(float(confidence))
 64 |                         classIDs.append(classID)
 65 | 
 66 |             # Remove overlapping bounding boxes
 67 |             bboxes = cv2.dnn.NMSBoxes(
 68 |                 boxes, confidences, confidence_threshold, overlapping_threshold)
 69 |             if len(bboxes) > 0:
 70 |                 for i in bboxes.flatten():
 71 |                     (x, y) = (boxes[i][0], boxes[i][1])
 72 |                     (w, h) = (boxes[i][2], boxes[i][3])
 73 |                     color = [int(c) for c in colors[classIDs[i]]]
 74 |                     cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
 75 |                     text = "{}: {:.4f}".format(labels[classIDs[i]], confidences[i])
 76 |                     
 77 |                     # draw bounding box title background
 78 |                     text_offset_x = x
 79 |                     text_offset_y = y
 80 |                     text_color = (255, 255, 255)
 81 |                     (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=1.0, thickness=1)[0]
 82 |                     box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width - 80, text_offset_y - text_height + 4))
 83 |                     cv2.rectangle(frame, box_coords[0], box_coords[1], color, cv2.FILLED)
 84 |                     
 85 |                     # draw bounding box title
 86 |                     cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1)
 87 | 
 88 |             #if len(bboxes) > max_count:
 89 |             #    max_count = len(bboxes)
 90 |             #    cv2.imwrite('captured_' + str(counter) + '.jpg', frame) 
 91 |             cv2.imshow("YOLOv4 Object Detection", frame)
 92 |             key = cv2.waitKey(1) & 0xFF
 93 |             # if the `q` key was pressed, break the loop
 94 |             if key == ord("q"):
 95 |                 break
 96 |     finally:
 97 |         cap.release()
 98 |         cv2.destroyAllWindows()
 99 | 
100 | 
101 | with open("model/coco.names","r", encoding="utf-8") as f:
102 |     labels = f.read().strip().split("\n")
103 | 
104 | yolo_config_path = "model/yolov4.cfg"
105 | yolo_weights_path = "model/yolov4.weights"
106 | 
107 | useCuda = True
108 | 
109 | net = cv2.dnn.readNetFromDarknet(yolo_config_path, yolo_weights_path)
110 | 
111 | if useCuda:
112 |     net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
113 |     net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
114 | 
115 | video_url = "https://cdn-004.whatsupcams.com/hls/hr_pula01.m3u8"
116 | frame_width = 1200
117 | 
118 | if __name__ == '__main__':
119 |     get_yolo_preds(net, video_url, 0.6, 0.1, labels,frame_width)


--------------------------------------------------------------------------------
/tensorflow/object_detection/webcam_detection_custom.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import pathlib\n",
 11 |     "\n",
 12 |     "\n",
 13 |     "if \"models\" in pathlib.Path.cwd().parts:\n",
 14 |     "    while \"models\" in pathlib.Path.cwd().parts:\n",
 15 |     "        os.chdir('..')\n",
 16 |     "elif not pathlib.Path('models').exists():\n",
 17 |     "    !git clone --depth 1 https://github.com/tensorflow/models"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import numpy as np\n",
 27 |     "import os\n",
 28 |     "import six.moves.urllib as urllib\n",
 29 |     "import sys\n",
 30 |     "import tarfile\n",
 31 |     "import tensorflow as tf\n",
 32 |     "import zipfile\n",
 33 |     "\n",
 34 |     "from collections import defaultdict\n",
 35 |     "from io import StringIO\n",
 36 |     "from matplotlib import pyplot as plt\n",
 37 |     "from PIL import Image\n",
 38 |     "from IPython.display import display"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {
 45 |     "scrolled": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "from object_detection.utils import ops as utils_ops\n",
 50 |     "from object_detection.utils import label_map_util\n",
 51 |     "from object_detection.utils import visualization_utils as vis_util"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 4,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "WARNING:tensorflow:From e:\\python\\tensorflow\\models\\research\\object_detection\\utils\\label_map_util.py:138: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.\n",
 64 |       "\n"
 65 |      ]
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "MODEL_NAME = 'models/research/object_detection/inference_graph'\n",
 70 |     "PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n",
 71 |     "PATH_TO_LABELS = 'models/research/object_detection/training/labelmap.pbtxt'\n",
 72 |     "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 5,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "detection_graph = tf.Graph()\n",
 82 |     "with detection_graph.as_default():\n",
 83 |     "  od_graph_def = tf.GraphDef()\n",
 84 |     "  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n",
 85 |     "    serialized_graph = fid.read()\n",
 86 |     "    od_graph_def.ParseFromString(serialized_graph)\n",
 87 |     "    tf.import_graph_def(od_graph_def, name='')"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 6,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "def run_inference_for_single_image(image, graph):\n",
 97 |     "    if 'detection_masks' in tensor_dict:\n",
 98 |     "        # The following processing is only for single image\n",
 99 |     "        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n",
100 |     "        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n",
101 |     "        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n",
102 |     "        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n",
103 |     "        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n",
104 |     "        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n",
105 |     "        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n",
106 |     "            detection_masks, detection_boxes, image.shape[0], image.shape[1])\n",
107 |     "        detection_masks_reframed = tf.cast(\n",
108 |     "            tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n",
109 |     "        # Follow the convention by adding back the batch dimension\n",
110 |     "        tensor_dict['detection_masks'] = tf.expand_dims(\n",
111 |     "            detection_masks_reframed, 0)\n",
112 |     "    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n",
113 |     "\n",
114 |     "    # Run inference\n",
115 |     "    output_dict = sess.run(tensor_dict,\n",
116 |     "                            feed_dict={image_tensor: np.expand_dims(image, 0)})\n",
117 |     "\n",
118 |     "    # all outputs are float32 numpy arrays, so convert types as appropriate\n",
119 |     "    output_dict['num_detections'] = int(output_dict['num_detections'][0])\n",
120 |     "    output_dict['detection_classes'] = output_dict[\n",
121 |     "        'detection_classes'][0].astype(np.uint8)\n",
122 |     "    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n",
123 |     "    output_dict['detection_scores'] = output_dict['detection_scores'][0]\n",
124 |     "    if 'detection_masks' in output_dict:\n",
125 |     "        output_dict['detection_masks'] = output_dict['detection_masks'][0]\n",
126 |     "    return output_dict"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 7,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "import cv2\n",
136 |     "cap = cv2.VideoCapture(0)\n",
137 |     "try:\n",
138 |     "    with detection_graph.as_default():\n",
139 |     "        with tf.Session() as sess:\n",
140 |     "            # Get handles to input and output tensors\n",
141 |     "            ops = tf.get_default_graph().get_operations()\n",
142 |     "            all_tensor_names = {output.name for op in ops for output in op.outputs}\n",
143 |     "            tensor_dict = {}\n",
144 |     "            for key in [\n",
145 |     "              'num_detections', 'detection_boxes', 'detection_scores',\n",
146 |     "              'detection_classes', 'detection_masks'\n",
147 |     "            ]:\n",
148 |     "                tensor_name = key + ':0'\n",
149 |     "                if tensor_name in all_tensor_names:\n",
150 |     "                    tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n",
151 |     "                  tensor_name)\n",
152 |     "\n",
153 |     "            while True:\n",
154 |     "                ret, image_np = cap.read()\n",
155 |     "                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
156 |     "                image_np_expanded = np.expand_dims(image_np, axis=0)\n",
157 |     "                # Actual detection.\n",
158 |     "                output_dict = run_inference_for_single_image(image_np, detection_graph)\n",
159 |     "                # Visualization of the results of a detection.\n",
160 |     "                vis_util.visualize_boxes_and_labels_on_image_array(\n",
161 |     "                    image_np,\n",
162 |     "                    output_dict['detection_boxes'],\n",
163 |     "                    output_dict['detection_classes'],\n",
164 |     "                    output_dict['detection_scores'],\n",
165 |     "                    category_index,\n",
166 |     "                    instance_masks=output_dict.get('detection_masks'),\n",
167 |     "                    use_normalized_coordinates=True,\n",
168 |     "                    line_thickness=4)\n",
169 |     "                cv2.imshow('object_detection', cv2.resize(image_np, (800, 600)))\n",
170 |     "                if cv2.waitKey(25) & 0xFF == ord('q'):\n",
171 |     "                    cap.release()\n",
172 |     "                    cv2.destroyAllWindows()\n",
173 |     "                    break\n",
174 |     "except Exception as e:\n",
175 |     "    print(e)\n",
176 |     "    cap.release()"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": []
185 |   }
186 |  ],
187 |  "metadata": {
188 |   "kernelspec": {
189 |    "display_name": "Python 3",
190 |    "language": "python",
191 |    "name": "python3"
192 |   },
193 |   "language_info": {
194 |    "codemirror_mode": {
195 |     "name": "ipython",
196 |     "version": 3
197 |    },
198 |    "file_extension": ".py",
199 |    "mimetype": "text/x-python",
200 |    "name": "python",
201 |    "nbconvert_exporter": "python",
202 |    "pygments_lexer": "ipython3",
203 |    "version": "3.6.5"
204 |   }
205 |  },
206 |  "nbformat": 4,
207 |  "nbformat_minor": 2
208 | }
209 | 


--------------------------------------------------------------------------------
/tensorflow/object_detection/webcam_detection_custom-ip_camera.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import os\n",
 10 |     "import pathlib\n",
 11 |     "\n",
 12 |     "\n",
 13 |     "if \"models\" in pathlib.Path.cwd().parts:\n",
 14 |     "    while \"models\" in pathlib.Path.cwd().parts:\n",
 15 |     "        os.chdir('..')\n",
 16 |     "elif not pathlib.Path('models').exists():\n",
 17 |     "    !git clone --depth 1 https://github.com/tensorflow/models"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "import numpy as np\n",
 27 |     "import six.moves.urllib as urllib\n",
 28 |     "import sys\n",
 29 |     "import tarfile\n",
 30 |     "import tensorflow as tf\n",
 31 |     "import zipfile\n",
 32 |     "import cv2\n",
 33 |     "\n",
 34 |     "from collections import defaultdict\n",
 35 |     "from io import StringIO\n",
 36 |     "from matplotlib import pyplot as plt\n",
 37 |     "from PIL import Image\n",
 38 |     "from IPython.display import display"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {
 45 |     "scrolled": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "from object_detection.utils import ops as utils_ops\n",
 50 |     "from object_detection.utils import label_map_util\n",
 51 |     "from object_detection.utils import visualization_utils as vis_util"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "MODEL_NAME = 'models/research/object_detection/inference_graph'\n",
 61 |     "PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n",
 62 |     "PATH_TO_LABELS = 'models/research/object_detection/training/labelmap.pbtxt'\n",
 63 |     "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "detection_graph = tf.Graph()\n",
 73 |     "with detection_graph.as_default():\n",
 74 |     "  od_graph_def = tf.GraphDef()\n",
 75 |     "  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n",
 76 |     "    serialized_graph = fid.read()\n",
 77 |     "    od_graph_def.ParseFromString(serialized_graph)\n",
 78 |     "    tf.import_graph_def(od_graph_def, name='')"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "def run_inference_for_single_image(image, graph):\n",
 88 |     "    if 'detection_masks' in tensor_dict:\n",
 89 |     "        # The following processing is only for single image\n",
 90 |     "        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n",
 91 |     "        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n",
 92 |     "        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n",
 93 |     "        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n",
 94 |     "        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n",
 95 |     "        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n",
 96 |     "        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n",
 97 |     "            detection_masks, detection_boxes, image.shape[0], image.shape[1])\n",
 98 |     "        detection_masks_reframed = tf.cast(\n",
 99 |     "            tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n",
100 |     "        # Follow the convention by adding back the batch dimension\n",
101 |     "        tensor_dict['detection_masks'] = tf.expand_dims(\n",
102 |     "            detection_masks_reframed, 0)\n",
103 |     "    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n",
104 |     "\n",
105 |     "    # Run inference\n",
106 |     "    output_dict = sess.run(tensor_dict,\n",
107 |     "                            feed_dict={image_tensor: np.expand_dims(image, 0)})\n",
108 |     "\n",
109 |     "    # all outputs are float32 numpy arrays, so convert types as appropriate\n",
110 |     "    output_dict['num_detections'] = int(output_dict['num_detections'][0])\n",
111 |     "    output_dict['detection_classes'] = output_dict[\n",
112 |     "        'detection_classes'][0].astype(np.uint8)\n",
113 |     "    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n",
114 |     "    output_dict['detection_scores'] = output_dict['detection_scores'][0]\n",
115 |     "    if 'detection_masks' in output_dict:\n",
116 |     "        output_dict['detection_masks'] = output_dict['detection_masks'][0]\n",
117 |     "    return output_dict"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "#cap = cv2.VideoCapture('rtsp://username:password@192.168.0.137:10554/tcp/av0_1')\n",
127 |     "cap = cv2.VideoCapture('http://192.168.0.137:81/videostream.cgi?loginuse=username&loginpas=password&resolution=32')\n",
128 |     "cap.set(cv2.CAP_PROP_BUFFERSIZE,1)\n",
129 |     "#print(cv2.getBuildInformation())\n",
130 |     "\n",
131 |     "IP_CAMERA_RESOLUTION = (640, 360)\n",
132 |     "\n",
133 |     "try:\n",
134 |     "    with detection_graph.as_default():\n",
135 |     "        with tf.Session() as sess:\n",
136 |     "            # Get handles to input and output tensors\n",
137 |     "            ops = tf.get_default_graph().get_operations()\n",
138 |     "            all_tensor_names = {output.name for op in ops for output in op.outputs}\n",
139 |     "            tensor_dict = {}\n",
140 |     "            for key in [\n",
141 |     "              'num_detections', 'detection_boxes', 'detection_scores',\n",
142 |     "              'detection_classes', 'detection_masks'\n",
143 |     "            ]:\n",
144 |     "                tensor_name = key + ':0'\n",
145 |     "                if tensor_name in all_tensor_names:\n",
146 |     "                    tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n",
147 |     "                  tensor_name)\n",
148 |     "\n",
149 |     "            while True:\n",
150 |     "                ret, image_np = cap.read()\n",
151 |     "                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
152 |     "                image_np_expanded = np.expand_dims(image_np, axis=0)\n",
153 |     "                # Actual detection.\n",
154 |     "                output_dict = run_inference_for_single_image(image_np, detection_graph)\n",
155 |     "                # Visualization of the results of a detection.\n",
156 |     "                vis_util.visualize_boxes_and_labels_on_image_array(\n",
157 |     "                    image_np,\n",
158 |     "                    output_dict['detection_boxes'],\n",
159 |     "                    output_dict['detection_classes'],\n",
160 |     "                    output_dict['detection_scores'],\n",
161 |     "                    category_index,\n",
162 |     "                    instance_masks=output_dict.get('detection_masks'),\n",
163 |     "                    use_normalized_coordinates=True,\n",
164 |     "                    line_thickness=4)\n",
165 |     "                cv2.imshow('object_detection', cv2.resize(image_np, IP_CAMERA_RESOLUTION))\n",
166 |     "                if cv2.waitKey(25) & 0xFF == ord('q'):\n",
167 |     "                    cap.release()\n",
168 |     "                    cv2.destroyAllWindows()\n",
169 |     "                    break\n",
170 |     "except Exception as e:\n",
171 |     "    print(e)\n",
172 |     "    cap.release()"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": []
181 |   }
182 |  ],
183 |  "metadata": {
184 |   "kernelspec": {
185 |    "display_name": "Python 3",
186 |    "language": "python",
187 |    "name": "python3"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 3
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython3",
199 |    "version": "3.6.5"
200 |   }
201 |  },
202 |  "nbformat": 4,
203 |  "nbformat_minor": 2
204 | }
205 | 


--------------------------------------------------------------------------------
/tensorflow/object_detection/webcam_detection_non_customized.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 13,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import numpy as np\n",
 10 |     "import os\n",
 11 |     "import six.moves.urllib as urllib\n",
 12 |     "import sys\n",
 13 |     "import tarfile\n",
 14 |     "import tensorflow as tf\n",
 15 |     "import zipfile\n",
 16 |     "import cv2\n",
 17 |     "\n",
 18 |     "from collections import defaultdict\n",
 19 |     "from io import StringIO\n",
 20 |     "from matplotlib import pyplot as plt\n",
 21 |     "from PIL import Image\n",
 22 |     "from IPython.display import display\n",
 23 |     "\n",
 24 |     "from object_detection.utils import ops as utils_ops\n",
 25 |     "from object_detection.utils import label_map_util\n",
 26 |     "from object_detection.utils import visualization_utils as vis_util"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 14,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "# define your root directory here:\n",
 36 |     "\n",
 37 |     "root='e:/python/tensorflow/models/research/object_detection'\n",
 38 |     "\n",
 39 |     "# What model to download.\n",
 40 |     "MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'\n",
 41 |     "MODEL_FILE = MODEL_NAME + '.tar.gz'\n",
 42 |     "DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n",
 43 |     "\n",
 44 |     "# Path to frozen detection graph. This is the actual model that is used for the object detection.\n",
 45 |     "PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n",
 46 |     "\n",
 47 |     "# List of the strings that is used to add correct label for each box.\n",
 48 |     "PATH_TO_LABELS = root + '/data/mscoco_label_map.pbtxt'\n",
 49 |     "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 15,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "# download and extract the model file\n",
 59 |     "\n",
 60 |     "opener = urllib.request.URLopener()\n",
 61 |     "opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n",
 62 |     "tar_file = tarfile.open(MODEL_FILE)\n",
 63 |     "for file in tar_file.getmembers():\n",
 64 |     "  file_name = os.path.basename(file.name)\n",
 65 |     "  if 'frozen_inference_graph.pb' in file_name:\n",
 66 |     "    tar_file.extract(file, os.getcwd())"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 16,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "# open the inference graph and load in into Tensorflow\n",
 76 |     "\n",
 77 |     "detection_graph = tf.Graph()\n",
 78 |     "with detection_graph.as_default():\n",
 79 |     "  od_graph_def = tf.GraphDef()\n",
 80 |     "  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n",
 81 |     "    serialized_graph = fid.read()\n",
 82 |     "    od_graph_def.ParseFromString(serialized_graph)\n",
 83 |     "    tf.import_graph_def(od_graph_def, name='')"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 17,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "def run_inference_for_single_image(image, graph):\n",
 93 |     "    if 'detection_masks' in tensor_dict:\n",
 94 |     "        # The following processing is only for single image\n",
 95 |     "        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n",
 96 |     "        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n",
 97 |     "        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n",
 98 |     "        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n",
 99 |     "        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n",
100 |     "        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n",
101 |     "        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n",
102 |     "            detection_masks, detection_boxes, image.shape[0], image.shape[1])\n",
103 |     "        detection_masks_reframed = tf.cast(\n",
104 |     "            tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n",
105 |     "        # Follow the convention by adding back the batch dimension\n",
106 |     "        tensor_dict['detection_masks'] = tf.expand_dims(\n",
107 |     "            detection_masks_reframed, 0)\n",
108 |     "        \n",
109 |     "    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n",
110 |     "\n",
111 |     "    # Run inference\n",
112 |     "    output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)})\n",
113 |     "\n",
114 |     "    # all outputs are float32 numpy arrays, so convert types as appropriate\n",
115 |     "    output_dict['num_detections'] = int(output_dict['num_detections'][0])\n",
116 |     "    output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8)\n",
117 |     "    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n",
118 |     "    output_dict['detection_scores'] = output_dict['detection_scores'][0]\n",
119 |     "    \n",
120 |     "    if 'detection_masks' in output_dict:\n",
121 |     "        output_dict['detection_masks'] = output_dict['detection_masks'][0]\n",
122 |     "        \n",
123 |     "    return output_dict"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 18,
129 |    "metadata": {},
130 |    "outputs": [],
131 |    "source": [
132 |     "cap = cv2.VideoCapture(0)\n",
133 |     "try:\n",
134 |     "    with detection_graph.as_default():\n",
135 |     "        with tf.Session() as sess:\n",
136 |     "            # Get handles to input and output tensors\n",
137 |     "            ops = tf.get_default_graph().get_operations()\n",
138 |     "            all_tensor_names = {output.name for op in ops for output in op.outputs}\n",
139 |     "            tensor_dict = {}\n",
140 |     "            for key in [\n",
141 |     "              'num_detections', 'detection_boxes', 'detection_scores',\n",
142 |     "              'detection_classes', 'detection_masks'\n",
143 |     "            ]:\n",
144 |     "                tensor_name = key + ':0'\n",
145 |     "                if tensor_name in all_tensor_names:\n",
146 |     "                    tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n",
147 |     "                  tensor_name)\n",
148 |     "\n",
149 |     "            while True:\n",
150 |     "                ret, image_np = cap.read()\n",
151 |     "                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
152 |     "                image_np_expanded = np.expand_dims(image_np, axis=0)\n",
153 |     "                # Actual detection.\n",
154 |     "                output_dict = run_inference_for_single_image(image_np, detection_graph)\n",
155 |     "                # Visualization of the results of a detection.\n",
156 |     "                vis_util.visualize_boxes_and_labels_on_image_array(\n",
157 |     "                    image_np,\n",
158 |     "                    output_dict['detection_boxes'],\n",
159 |     "                    output_dict['detection_classes'],\n",
160 |     "                    output_dict['detection_scores'],\n",
161 |     "                    category_index,\n",
162 |     "                    instance_masks=output_dict.get('detection_masks'),\n",
163 |     "                    use_normalized_coordinates=True,\n",
164 |     "                    line_thickness=4)\n",
165 |     "                cv2.imshow('object_detection', cv2.resize(image_np, (800, 600)))\n",
166 |     "                if cv2.waitKey(25) & 0xFF == ord('q'):\n",
167 |     "                    cap.release()\n",
168 |     "                    cv2.destroyAllWindows()\n",
169 |     "                    break\n",
170 |     "except Exception as e:\n",
171 |     "    print(e)\n",
172 |     "    cap.release()"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {},
179 |    "outputs": [],
180 |    "source": []
181 |   }
182 |  ],
183 |  "metadata": {
184 |   "kernelspec": {
185 |    "display_name": "Python 3",
186 |    "language": "python",
187 |    "name": "python3"
188 |   },
189 |   "language_info": {
190 |    "codemirror_mode": {
191 |     "name": "ipython",
192 |     "version": 3
193 |    },
194 |    "file_extension": ".py",
195 |    "mimetype": "text/x-python",
196 |    "name": "python",
197 |    "nbconvert_exporter": "python",
198 |    "pygments_lexer": "ipython3",
199 |    "version": "3.6.5"
200 |   }
201 |  },
202 |  "nbformat": 4,
203 |  "nbformat_minor": 2
204 | }
205 | 


--------------------------------------------------------------------------------
/tensorflow/object_detection/object_detection_api_test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "colab": {
  8 |      "autoexec": {
  9 |       "startup": false,
 10 |       "wait_interval": 0
 11 |      }
 12 |     },
 13 |     "colab_type": "code",
 14 |     "id": "hV4P5gyTWKMI"
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import numpy as np\n",
 19 |     "import os\n",
 20 |     "import six.moves.urllib as urllib\n",
 21 |     "import sys\n",
 22 |     "import tarfile\n",
 23 |     "import tensorflow as tf\n",
 24 |     "import zipfile\n",
 25 |     "\n",
 26 |     "from distutils.version import StrictVersion\n",
 27 |     "from collections import defaultdict\n",
 28 |     "from io import StringIO\n",
 29 |     "from matplotlib import pyplot as plt\n",
 30 |     "from PIL import Image\n",
 31 |     "\n",
 32 |     "# This is needed since the notebook is stored in the object_detection folder.\n",
 33 |     "sys.path.append(\"..\")\n",
 34 |     "from object_detection.utils import ops as utils_ops\n",
 35 |     "\n",
 36 |     "if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):\n",
 37 |     "  raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')\n",
 38 |     "\n",
 39 |     "# define your root directory here:\n",
 40 |     "\n",
 41 |     "root='e:/python/tensorflow/models/research/object_detection'\n"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 2,
 47 |    "metadata": {
 48 |     "colab": {
 49 |      "autoexec": {
 50 |       "startup": false,
 51 |       "wait_interval": 0
 52 |      }
 53 |     },
 54 |     "colab_type": "code",
 55 |     "id": "v7m_NY_aWKMK"
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "# This is needed to display the images.\n",
 60 |     "%matplotlib inline"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {
 66 |     "colab_type": "text",
 67 |     "id": "r5FNuiRPWKMN"
 68 |    },
 69 |    "source": [
 70 |     "## Object detection imports\n",
 71 |     "Here are the imports from the object detection module."
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 3,
 77 |    "metadata": {
 78 |     "colab": {
 79 |      "autoexec": {
 80 |       "startup": false,
 81 |       "wait_interval": 0
 82 |      }
 83 |     },
 84 |     "colab_type": "code",
 85 |     "id": "bm0_uNRnWKMN"
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "from utils import label_map_util\n",
 90 |     "from utils import visualization_utils as vis_util"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "markdown",
 95 |    "metadata": {
 96 |     "colab_type": "text",
 97 |     "id": "cfn_tRFOWKMO"
 98 |    },
 99 |    "source": [
100 |     "# Model preparation "
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": 4,
106 |    "metadata": {
107 |     "colab": {
108 |      "autoexec": {
109 |       "startup": false,
110 |       "wait_interval": 0
111 |      }
112 |     },
113 |     "colab_type": "code",
114 |     "id": "VyPz_t8WWKMQ"
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "# What model to download.\n",
119 |     "MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'\n",
120 |     "MODEL_FILE = MODEL_NAME + '.tar.gz'\n",
121 |     "DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n",
122 |     "\n",
123 |     "# Path to frozen detection graph. This is the actual model that is used for the object detection.\n",
124 |     "PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n",
125 |     "\n",
126 |     "# List of the strings that is used to add correct label for each box.\n",
127 |     "PATH_TO_LABELS = root + '/data/mscoco_label_map.pbtxt'"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "markdown",
132 |    "metadata": {
133 |     "colab_type": "text",
134 |     "id": "7ai8pLZZWKMS"
135 |    },
136 |    "source": [
137 |     "## Download Model"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": 5,
143 |    "metadata": {
144 |     "colab": {
145 |      "autoexec": {
146 |       "startup": false,
147 |       "wait_interval": 0
148 |      }
149 |     },
150 |     "colab_type": "code",
151 |     "id": "KILYnwR5WKMS"
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "opener = urllib.request.URLopener()\n",
156 |     "opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n",
157 |     "tar_file = tarfile.open(MODEL_FILE)\n",
158 |     "for file in tar_file.getmembers():\n",
159 |     "  file_name = os.path.basename(file.name)\n",
160 |     "  if 'frozen_inference_graph.pb' in file_name:\n",
161 |     "    tar_file.extract(file, os.getcwd())"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "markdown",
166 |    "metadata": {
167 |     "colab_type": "text",
168 |     "id": "YBcB9QHLWKMU"
169 |    },
170 |    "source": [
171 |     "## Load a (frozen) Tensorflow model into memory."
172 |    ]
173 |   },
174 |   {
175 |    "cell_type": "code",
176 |    "execution_count": 6,
177 |    "metadata": {
178 |     "colab": {
179 |      "autoexec": {
180 |       "startup": false,
181 |       "wait_interval": 0
182 |      }
183 |     },
184 |     "colab_type": "code",
185 |     "id": "KezjCRVvWKMV"
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "detection_graph = tf.Graph()\n",
190 |     "with detection_graph.as_default():\n",
191 |     "  od_graph_def = tf.GraphDef()\n",
192 |     "  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n",
193 |     "    serialized_graph = fid.read()\n",
194 |     "    od_graph_def.ParseFromString(serialized_graph)\n",
195 |     "    tf.import_graph_def(od_graph_def, name='')"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {
201 |     "colab_type": "text",
202 |     "id": "_1MVVTcLWKMW"
203 |    },
204 |    "source": [
205 |     "## Loading label map\n",
206 |     "Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": 7,
212 |    "metadata": {
213 |     "colab": {
214 |      "autoexec": {
215 |       "startup": false,
216 |       "wait_interval": 0
217 |      }
218 |     },
219 |     "colab_type": "code",
220 |     "id": "hDbpHkiWWKMX"
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {
230 |     "colab_type": "text",
231 |     "id": "EFsoUHvbWKMZ"
232 |    },
233 |    "source": [
234 |     "## Helper code"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 8,
240 |    "metadata": {
241 |     "colab": {
242 |      "autoexec": {
243 |       "startup": false,
244 |       "wait_interval": 0
245 |      }
246 |     },
247 |     "colab_type": "code",
248 |     "id": "aSlYc3JkWKMa"
249 |    },
250 |    "outputs": [],
251 |    "source": [
252 |     "def load_image_into_numpy_array(image):\n",
253 |     "  (im_width, im_height) = image.size\n",
254 |     "  return np.array(image.getdata()).reshape(\n",
255 |     "      (im_height, im_width, 3)).astype(np.uint8)"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "metadata": {
261 |     "colab_type": "text",
262 |     "id": "H0_1AGhrWKMc"
263 |    },
264 |    "source": [
265 |     "# Detection"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "code",
270 |    "execution_count": 9,
271 |    "metadata": {
272 |     "colab": {
273 |      "autoexec": {
274 |       "startup": false,
275 |       "wait_interval": 0
276 |      }
277 |     },
278 |     "colab_type": "code",
279 |     "id": "jG-zn5ykWKMd"
280 |    },
281 |    "outputs": [],
282 |    "source": [
283 |     "# For the sake of simplicity we will use only 2 images:\n",
284 |     "# image1.jpg\n",
285 |     "# image2.jpg\n",
286 |     "# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n",
287 |     "PATH_TO_TEST_IMAGES_DIR = root + '/test_images'\n",
288 |     "TEST_IMAGE_PATHS = [ PATH_TO_TEST_IMAGES_DIR + '/image{}.jpg'.format(i) for i in range(1, 3) ]\n",
289 |     "\n",
290 |     "# Size, in inches, of the output images.\n",
291 |     "IMAGE_SIZE = (14, 10)"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": 10,
297 |    "metadata": {
298 |     "colab": {
299 |      "autoexec": {
300 |       "startup": false,
301 |       "wait_interval": 0
302 |      }
303 |     },
304 |     "colab_type": "code",
305 |     "id": "92BHxzcNWKMf"
306 |    },
307 |    "outputs": [],
308 |    "source": [
309 |     "def run_inference_for_single_image(image, graph):\n",
310 |     "    if 'detection_masks' in tensor_dict:\n",
311 |     "        # The following processing is only for single image\n",
312 |     "        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n",
313 |     "        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n",
314 |     "        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n",
315 |     "        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n",
316 |     "        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n",
317 |     "        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n",
318 |     "        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n",
319 |     "            detection_masks, detection_boxes, image.shape[0], image.shape[1])\n",
320 |     "        detection_masks_reframed = tf.cast(\n",
321 |     "            tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n",
322 |     "        # Follow the convention by adding back the batch dimension\n",
323 |     "        tensor_dict['detection_masks'] = tf.expand_dims(\n",
324 |     "            detection_masks_reframed, 0)\n",
325 |     "    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n",
326 |     "\n",
327 |     "    # Run inference\n",
328 |     "    output_dict = sess.run(tensor_dict,\n",
329 |     "                            feed_dict={image_tensor: np.expand_dims(image, 0)})\n",
330 |     "\n",
331 |     "    # all outputs are float32 numpy arrays, so convert types as appropriate\n",
332 |     "    output_dict['num_detections'] = int(output_dict['num_detections'][0])\n",
333 |     "    output_dict['detection_classes'] = output_dict[\n",
334 |     "        'detection_classes'][0].astype(np.uint8)\n",
335 |     "    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n",
336 |     "    output_dict['detection_scores'] = output_dict['detection_scores'][0]\n",
337 |     "    if 'detection_masks' in output_dict:\n",
338 |     "        output_dict['detection_masks'] = output_dict['detection_masks'][0]\n",
339 |     "    return output_dict"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": 11,
345 |    "metadata": {},
346 |    "outputs": [
347 |     {
348 |      "name": "stdout",
349 |      "output_type": "stream",
350 |      "text": [
351 |       "Processing image: e:/python/tensorflow/models/research/object_detection/test_images/image1.jpg\n",
352 |       "Processing image: e:/python/tensorflow/models/research/object_detection/test_images/image2.jpg\n"
353 |      ]
354 |     }
355 |    ],
356 |    "source": [
357 |     "with detection_graph.as_default():\n",
358 |     "    with tf.Session() as sess:\n",
359 |     "        # Get handles to input and output tensors\n",
360 |     "        ops = tf.get_default_graph().get_operations()\n",
361 |     "        all_tensor_names = {output.name for op in ops for output in op.outputs}\n",
362 |     "        tensor_dict = {}\n",
363 |     "        for key in [\n",
364 |     "          'num_detections', 'detection_boxes', 'detection_scores',\n",
365 |     "          'detection_classes', 'detection_masks'\n",
366 |     "        ]:\n",
367 |     "            tensor_name = key + ':0'\n",
368 |     "            if tensor_name in all_tensor_names:\n",
369 |     "                tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n",
370 |     "              tensor_name)\n",
371 |     "\n",
372 |     "        for image_path in TEST_IMAGE_PATHS:\n",
373 |     "            print ('Processing image: ' + image_path)\n",
374 |     "            image = Image.open(image_path)   \n",
375 |     "            image_np = load_image_into_numpy_array(image)\n",
376 |     "            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n",
377 |     "            image_np_expanded = np.expand_dims(image_np, axis=0)\n",
378 |     "            # Actual detection.\n",
379 |     "            output_dict = run_inference_for_single_image(image_np, detection_graph)\n",
380 |     "            # Visualization of the results of a detection.\n",
381 |     "            vis_util.visualize_boxes_and_labels_on_image_array(\n",
382 |     "                image_np,\n",
383 |     "                output_dict['detection_boxes'],\n",
384 |     "                output_dict['detection_classes'],\n",
385 |     "                output_dict['detection_scores'],\n",
386 |     "                category_index,\n",
387 |     "                instance_masks=output_dict.get('detection_masks'),\n",
388 |     "                use_normalized_coordinates=True,\n",
389 |     "                line_thickness=4)\n",
390 |     "            plt.figure(figsize=IMAGE_SIZE)\n",
391 |     "            plt.imshow(image_np)\n"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "code",
396 |    "execution_count": null,
397 |    "metadata": {
398 |     "colab": {
399 |      "autoexec": {
400 |       "startup": false,
401 |       "wait_interval": 0
402 |      }
403 |     },
404 |     "colab_type": "code",
405 |     "id": "LQSEnEsPWKMj"
406 |    },
407 |    "outputs": [],
408 |    "source": []
409 |   }
410 |  ],
411 |  "metadata": {
412 |   "colab": {
413 |    "default_view": {},
414 |    "name": "object_detection_tutorial.ipynb?workspaceId=ronnyvotel:python_inference::citc",
415 |    "provenance": [],
416 |    "version": "0.3.2",
417 |    "views": {}
418 |   },
419 |   "kernelspec": {
420 |    "display_name": "Python 3",
421 |    "language": "python",
422 |    "name": "python3"
423 |   },
424 |   "language_info": {
425 |    "codemirror_mode": {
426 |     "name": "ipython",
427 |     "version": 3
428 |    },
429 |    "file_extension": ".py",
430 |    "mimetype": "text/x-python",
431 |    "name": "python",
432 |    "nbconvert_exporter": "python",
433 |    "pygments_lexer": "ipython3",
434 |    "version": "3.6.5"
435 |   }
436 |  },
437 |  "nbformat": 4,
438 |  "nbformat_minor": 1
439 | }
440 | 


--------------------------------------------------------------------------------