├── tensorflow └── object_detection │ ├── scripts │ ├── xml_to_csv.py │ ├── configure_training.py │ └── generate_tfrecord.py │ ├── video_recording_opencv.ipynb │ ├── webcam_detection_custom.ipynb │ ├── webcam_detection_custom-ip_camera.ipynb │ ├── webcam_detection_non_customized.ipynb │ └── object_detection_api_test.ipynb └── opencv └── yolov4_webcam.py /tensorflow/object_detection/scripts/xml_to_csv.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import glob 3 | import pandas as pd 4 | import xml.etree.ElementTree as ET 5 | import argparse 6 | 7 | 8 | def xml_to_csv(path): 9 | xml_list = [] 10 | for xml_file in glob.glob(path + '/*.xml'): 11 | tree = ET.parse(xml_file) 12 | root = tree.getroot() 13 | for member in root.findall('object'): 14 | value = (root.find('filename').text, 15 | int(root.find('size')[0].text), 16 | int(root.find('size')[1].text), 17 | member[0].text, 18 | int(member[4][0].text), 19 | int(member[4][1].text), 20 | int(member[4][2].text), 21 | int(member[4][3].text) 22 | ) 23 | xml_list.append(value) 24 | column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax'] 25 | xml_df = pd.DataFrame(xml_list, columns=column_name) 26 | return xml_df 27 | 28 | 29 | def main(): 30 | 31 | parser = argparse.ArgumentParser() 32 | parser.add_argument('-imageRoot',help='the folder containing the training images in test and train folders',type=str,required=True) 33 | args = parser.parse_args() 34 | 35 | print (args.imageRoot) 36 | 37 | for folder in ['train','test']: 38 | image_path = os.path.join(args.imageRoot, folder ) 39 | xml_df = xml_to_csv(image_path) 40 | xml_df.to_csv(os.path.join(args.imageRoot, folder + '_labels.csv'), index=None) 41 | print('Successfully converted xml to csv.') 42 | 43 | 44 | if __name__ == '__main__': 45 | sys.exit(main()) 46 | -------------------------------------------------------------------------------- /tensorflow/object_detection/scripts/configure_training.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import shlex 3 | import argparse 4 | import sys 5 | 6 | def getLabelMap(labelMapAsStr): 7 | labelMap = {} 8 | for labelValue in [x for x in labelMapAsStr.split(',')]: 9 | labelMap[labelValue.split(':')[0]] = int(labelValue.split(':')[1]) 10 | return labelMap 11 | 12 | def main(): 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('-imageRoot',help='the folder containing the training images in test and train folders',type=str,required=True) 16 | parser.add_argument('-labelMap',help='Comma separated label map, e.g. label1:value1,label2:value2',type=str,required=True) 17 | parser.add_argument('-labelMapOutputFile',help='The file where the .pbtxt file will be generated based on the labelMap',type=str,required=True) 18 | args = parser.parse_args() 19 | 20 | subprocess.call(shlex.split(r'python xml_to_csv.py -imageRoot %s' % args.imageRoot), shell=False) 21 | 22 | for imageSet in ['train','test']: 23 | subprocess.call(shlex.split(r'python generate_tfrecord.py --csv_input={imageRoot}/{imageSet}_labels.csv --image_dir={imageRoot}/{imageSet} --output_path={imageRoot}/{imageSet}.record --label_map={labelMap}'.format(imageRoot=args.imageRoot, imageSet=imageSet, labelMap=args.labelMap)), shell=False) 24 | 25 | labelDict = getLabelMap(args.labelMap) 26 | 27 | with open(args.labelMapOutputFile,'wt') as outFile: 28 | for label, id in labelDict.items(): 29 | outFile.write('item {\n') 30 | outFile.write("\tid: %s\n" % id) 31 | outFile.write("\tname: '%s'\n" % label) 32 | outFile.write('}\n') 33 | 34 | 35 | 36 | if __name__ == '__main__': 37 | sys.exit(main()) 38 | 39 | -------------------------------------------------------------------------------- /tensorflow/object_detection/video_recording_opencv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 14, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import cv2\n", 10 | "import numpy as np\n", 11 | "import time\n", 12 | "\n", 13 | "# remember to use your own path and credentials\n", 14 | "cap = cv2.VideoCapture('http://192.168.0.137:81/videostream.cgi?loginuse=&loginpas=&resolution=32')\n", 15 | "cap.set(cv2.CAP_PROP_BUFFERSIZE,1)\n", 16 | "#print(cv2.getBuildInformation())\n", 17 | "\n", 18 | "if (cap.isOpened() == False): \n", 19 | " print (\"Unable to read camera feed\")\n", 20 | "\n", 21 | "frame_width = int(cap.get(3))\n", 22 | "frame_height = int(cap.get(4))\n", 23 | "\n", 24 | "out = cv2.VideoWriter('recording.mp4',cv2.VideoWriter_fourcc(*'MP4V'), 20.0, (frame_width,frame_height))\n", 25 | "\n", 26 | "MAX_SECS = 60 * 60\n", 27 | "\n", 28 | "startTime = time.time()\n", 29 | "\n", 30 | "while True:\n", 31 | " currentTime = time.time()\n", 32 | "\n", 33 | " if currentTime - startTime > MAX_SECS:\n", 34 | " break\n", 35 | "\n", 36 | " ret, frame = cap.read()\n", 37 | "\n", 38 | " if ret == True: \n", 39 | " out.write(frame)\n", 40 | " cv2.imshow('frame',frame)\n", 41 | "\n", 42 | " # Press q on keyboard to stop recording\n", 43 | " if cv2.waitKey(1) & 0xFF == ord('q'):\n", 44 | " break\n", 45 | "\n", 46 | " else:\n", 47 | " break \n", 48 | "\n", 49 | "cap.release()\n", 50 | "out.release()\n", 51 | "\n", 52 | "cv2.destroyAllWindows() " 53 | ] 54 | } 55 | ], 56 | "metadata": { 57 | "kernelspec": { 58 | "display_name": "Python 3", 59 | "language": "python", 60 | "name": "python3" 61 | }, 62 | "language_info": { 63 | "codemirror_mode": { 64 | "name": "ipython", 65 | "version": 3 66 | }, 67 | "file_extension": ".py", 68 | "mimetype": "text/x-python", 69 | "name": "python", 70 | "nbconvert_exporter": "python", 71 | "pygments_lexer": "ipython3", 72 | "version": "3.6.5" 73 | } 74 | }, 75 | "nbformat": 4, 76 | "nbformat_minor": 4 77 | } 78 | -------------------------------------------------------------------------------- /tensorflow/object_detection/scripts/generate_tfrecord.py: -------------------------------------------------------------------------------- 1 | """ 2 | Usage: 3 | # From tensorflow/models/ 4 | # Create train data: 5 | python generate_tfrecord.py --csv_input=data/train_labels.csv --output_path=train.record 6 | 7 | # Create test data: 8 | python generate_tfrecord.py --csv_input=data/test_labels.csv --output_path=test.record 9 | """ 10 | from __future__ import division 11 | from __future__ import print_function 12 | from __future__ import absolute_import 13 | 14 | import os 15 | import io 16 | import pandas as pd 17 | import tensorflow.compat.v1 as tf 18 | 19 | from PIL import Image 20 | from object_detection.utils import dataset_util 21 | from collections import namedtuple, OrderedDict 22 | 23 | flags = tf.app.flags 24 | flags.DEFINE_string('csv_input', '', 'Path to the CSV input') 25 | flags.DEFINE_string('output_path', '', 'Path to output TFRecord') 26 | flags.DEFINE_string('image_dir', '', 'Path to images') 27 | flags.DEFINE_string('label_map', '', 'Comma separated label map, e.g. label1:value1,label2:value2') 28 | FLAGS = flags.FLAGS 29 | 30 | def split(df, group): 31 | data = namedtuple('data', ['filename', 'object']) 32 | gb = df.groupby(group) 33 | return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)] 34 | 35 | 36 | def create_tf_example(group, path, labelMap): 37 | with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid: 38 | encoded_jpg = fid.read() 39 | encoded_jpg_io = io.BytesIO(encoded_jpg) 40 | image = Image.open(encoded_jpg_io) 41 | width, height = image.size 42 | 43 | filename = group.filename.encode('utf8') 44 | image_format = b'jpg' 45 | xmins = [] 46 | xmaxs = [] 47 | ymins = [] 48 | ymaxs = [] 49 | classes_text = [] 50 | classes = [] 51 | 52 | for index, row in group.object.iterrows(): 53 | xmins.append(row['xmin'] / width) 54 | xmaxs.append(row['xmax'] / width) 55 | ymins.append(row['ymin'] / height) 56 | ymaxs.append(row['ymax'] / height) 57 | classes_text.append(row['class'].encode('utf8')) 58 | classes.append(labelMap[row['class']]) 59 | 60 | tf_example = tf.train.Example(features=tf.train.Features(feature={ 61 | 'image/height': dataset_util.int64_feature(height), 62 | 'image/width': dataset_util.int64_feature(width), 63 | 'image/filename': dataset_util.bytes_feature(filename), 64 | 'image/source_id': dataset_util.bytes_feature(filename), 65 | 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 66 | 'image/format': dataset_util.bytes_feature(image_format), 67 | 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 68 | 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 69 | 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 70 | 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 71 | 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 72 | 'image/object/class/label': dataset_util.int64_list_feature(classes), 73 | })) 74 | return tf_example 75 | 76 | def getLabelMap(labelMapAsStr): 77 | labelMap = {} 78 | for labelValue in [x for x in labelMapAsStr.split(',')]: 79 | labelMap[labelValue.split(':')[0]] = int(labelValue.split(':')[1]) 80 | return labelMap 81 | 82 | 83 | def main(_): 84 | writer = tf.python_io.TFRecordWriter(FLAGS.output_path) 85 | path = os.path.join(FLAGS.image_dir) 86 | examples = pd.read_csv(FLAGS.csv_input) 87 | label_map = getLabelMap(FLAGS.label_map) 88 | print(str(label_map)) 89 | grouped = split(examples, 'filename') 90 | for group in grouped: 91 | tf_example = create_tf_example(group, path, label_map) 92 | writer.write(tf_example.SerializeToString()) 93 | 94 | writer.close() 95 | output_path = os.path.join(os.getcwd(), FLAGS.output_path) 96 | print('Successfully created the TFRecords: {}'.format(output_path)) 97 | 98 | 99 | if __name__ == '__main__': 100 | tf.app.run() 101 | -------------------------------------------------------------------------------- /opencv/yolov4_webcam.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import imutils 3 | import numpy as np 4 | import random 5 | import colorsys 6 | 7 | # inspiration and some code pieces were copied from https://github.com/haroonshakeel/yolo_get_preds/blob/master/my_utils.py 8 | 9 | def get_random_bright_colors(size): 10 | for i in range(0,size-1): 11 | h,s,l = random.random(), 0.5 + random.random()/2.0, 0.4 + random.random()/5.0 12 | r,g,b = [int(256*i) for i in colorsys.hls_to_rgb(h,l,s)] 13 | yield (r,g,b) 14 | 15 | 16 | def get_yolo_preds(net, video_url, confidence_threshold, overlapping_threshold, labels = None, frame_resize_width=None): 17 | 18 | # List of colors to represent each class label with distinct bright color 19 | colors = list(get_random_bright_colors(len(labels))) 20 | 21 | ln = net.getLayerNames() 22 | ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()] 23 | cap = cv2.VideoCapture(video_url) 24 | 25 | try: 26 | if not cap.isOpened(): 27 | print("Error opening video stream or file") 28 | return 29 | 30 | yolo_width_height = (416, 416) 31 | 32 | counter = 0 33 | max_count = 0 34 | 35 | while True: 36 | (_, frame) = cap.read() 37 | 38 | counter += 1 39 | 40 | if frame_resize_width: 41 | frame = imutils.resize(frame, width=frame_resize_width) 42 | (H, W) = frame.shape[:2] 43 | 44 | # Construct blob of frames by standardization, resizing, and swapping Red and Blue channels (RBG to RGB) 45 | blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, yolo_width_height, swapRB=True, crop=False) 46 | net.setInput(blob) 47 | layerOutputs = net.forward(ln) 48 | boxes = [] 49 | confidences = [] 50 | classIDs = [] 51 | for output in layerOutputs: 52 | for detection in output: 53 | scores = detection[5:] 54 | classID = np.argmax(scores) 55 | confidence = scores[classID] 56 | if confidence > confidence_threshold: 57 | # Scale the bboxes back to the original image size 58 | box = detection[0:4] * np.array([W, H, W, H]) 59 | (centerX, centerY, width, height) = box.astype("int") 60 | x = int(centerX - (width / 2)) 61 | y = int(centerY - (height / 2)) 62 | boxes.append([x, y, int(width), int(height)]) 63 | confidences.append(float(confidence)) 64 | classIDs.append(classID) 65 | 66 | # Remove overlapping bounding boxes 67 | bboxes = cv2.dnn.NMSBoxes( 68 | boxes, confidences, confidence_threshold, overlapping_threshold) 69 | if len(bboxes) > 0: 70 | for i in bboxes.flatten(): 71 | (x, y) = (boxes[i][0], boxes[i][1]) 72 | (w, h) = (boxes[i][2], boxes[i][3]) 73 | color = [int(c) for c in colors[classIDs[i]]] 74 | cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2) 75 | text = "{}: {:.4f}".format(labels[classIDs[i]], confidences[i]) 76 | 77 | # draw bounding box title background 78 | text_offset_x = x 79 | text_offset_y = y 80 | text_color = (255, 255, 255) 81 | (text_width, text_height) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fontScale=1.0, thickness=1)[0] 82 | box_coords = ((text_offset_x, text_offset_y), (text_offset_x + text_width - 80, text_offset_y - text_height + 4)) 83 | cv2.rectangle(frame, box_coords[0], box_coords[1], color, cv2.FILLED) 84 | 85 | # draw bounding box title 86 | cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color, 1) 87 | 88 | #if len(bboxes) > max_count: 89 | # max_count = len(bboxes) 90 | # cv2.imwrite('captured_' + str(counter) + '.jpg', frame) 91 | cv2.imshow("YOLOv4 Object Detection", frame) 92 | key = cv2.waitKey(1) & 0xFF 93 | # if the `q` key was pressed, break the loop 94 | if key == ord("q"): 95 | break 96 | finally: 97 | cap.release() 98 | cv2.destroyAllWindows() 99 | 100 | 101 | with open("model/coco.names","r", encoding="utf-8") as f: 102 | labels = f.read().strip().split("\n") 103 | 104 | yolo_config_path = "model/yolov4.cfg" 105 | yolo_weights_path = "model/yolov4.weights" 106 | 107 | useCuda = True 108 | 109 | net = cv2.dnn.readNetFromDarknet(yolo_config_path, yolo_weights_path) 110 | 111 | if useCuda: 112 | net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) 113 | net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) 114 | 115 | video_url = "https://cdn-004.whatsupcams.com/hls/hr_pula01.m3u8" 116 | frame_width = 1200 117 | 118 | if __name__ == '__main__': 119 | get_yolo_preds(net, video_url, 0.6, 0.1, labels,frame_width) -------------------------------------------------------------------------------- /tensorflow/object_detection/webcam_detection_custom.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import pathlib\n", 11 | "\n", 12 | "\n", 13 | "if \"models\" in pathlib.Path.cwd().parts:\n", 14 | " while \"models\" in pathlib.Path.cwd().parts:\n", 15 | " os.chdir('..')\n", 16 | "elif not pathlib.Path('models').exists():\n", 17 | " !git clone --depth 1 https://github.com/tensorflow/models" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np\n", 27 | "import os\n", 28 | "import six.moves.urllib as urllib\n", 29 | "import sys\n", 30 | "import tarfile\n", 31 | "import tensorflow as tf\n", 32 | "import zipfile\n", 33 | "\n", 34 | "from collections import defaultdict\n", 35 | "from io import StringIO\n", 36 | "from matplotlib import pyplot as plt\n", 37 | "from PIL import Image\n", 38 | "from IPython.display import display" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "scrolled": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "from object_detection.utils import ops as utils_ops\n", 50 | "from object_detection.utils import label_map_util\n", 51 | "from object_detection.utils import visualization_utils as vis_util" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "name": "stdout", 61 | "output_type": "stream", 62 | "text": [ 63 | "WARNING:tensorflow:From e:\\python\\tensorflow\\models\\research\\object_detection\\utils\\label_map_util.py:138: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.\n", 64 | "\n" 65 | ] 66 | } 67 | ], 68 | "source": [ 69 | "MODEL_NAME = 'models/research/object_detection/inference_graph'\n", 70 | "PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n", 71 | "PATH_TO_LABELS = 'models/research/object_detection/training/labelmap.pbtxt'\n", 72 | "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 5, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "detection_graph = tf.Graph()\n", 82 | "with detection_graph.as_default():\n", 83 | " od_graph_def = tf.GraphDef()\n", 84 | " with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n", 85 | " serialized_graph = fid.read()\n", 86 | " od_graph_def.ParseFromString(serialized_graph)\n", 87 | " tf.import_graph_def(od_graph_def, name='')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 6, 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "def run_inference_for_single_image(image, graph):\n", 97 | " if 'detection_masks' in tensor_dict:\n", 98 | " # The following processing is only for single image\n", 99 | " detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n", 100 | " detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n", 101 | " # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n", 102 | " real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n", 103 | " detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n", 104 | " detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n", 105 | " detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n", 106 | " detection_masks, detection_boxes, image.shape[0], image.shape[1])\n", 107 | " detection_masks_reframed = tf.cast(\n", 108 | " tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n", 109 | " # Follow the convention by adding back the batch dimension\n", 110 | " tensor_dict['detection_masks'] = tf.expand_dims(\n", 111 | " detection_masks_reframed, 0)\n", 112 | " image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n", 113 | "\n", 114 | " # Run inference\n", 115 | " output_dict = sess.run(tensor_dict,\n", 116 | " feed_dict={image_tensor: np.expand_dims(image, 0)})\n", 117 | "\n", 118 | " # all outputs are float32 numpy arrays, so convert types as appropriate\n", 119 | " output_dict['num_detections'] = int(output_dict['num_detections'][0])\n", 120 | " output_dict['detection_classes'] = output_dict[\n", 121 | " 'detection_classes'][0].astype(np.uint8)\n", 122 | " output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n", 123 | " output_dict['detection_scores'] = output_dict['detection_scores'][0]\n", 124 | " if 'detection_masks' in output_dict:\n", 125 | " output_dict['detection_masks'] = output_dict['detection_masks'][0]\n", 126 | " return output_dict" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 7, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "import cv2\n", 136 | "cap = cv2.VideoCapture(0)\n", 137 | "try:\n", 138 | " with detection_graph.as_default():\n", 139 | " with tf.Session() as sess:\n", 140 | " # Get handles to input and output tensors\n", 141 | " ops = tf.get_default_graph().get_operations()\n", 142 | " all_tensor_names = {output.name for op in ops for output in op.outputs}\n", 143 | " tensor_dict = {}\n", 144 | " for key in [\n", 145 | " 'num_detections', 'detection_boxes', 'detection_scores',\n", 146 | " 'detection_classes', 'detection_masks'\n", 147 | " ]:\n", 148 | " tensor_name = key + ':0'\n", 149 | " if tensor_name in all_tensor_names:\n", 150 | " tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n", 151 | " tensor_name)\n", 152 | "\n", 153 | " while True:\n", 154 | " ret, image_np = cap.read()\n", 155 | " # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n", 156 | " image_np_expanded = np.expand_dims(image_np, axis=0)\n", 157 | " # Actual detection.\n", 158 | " output_dict = run_inference_for_single_image(image_np, detection_graph)\n", 159 | " # Visualization of the results of a detection.\n", 160 | " vis_util.visualize_boxes_and_labels_on_image_array(\n", 161 | " image_np,\n", 162 | " output_dict['detection_boxes'],\n", 163 | " output_dict['detection_classes'],\n", 164 | " output_dict['detection_scores'],\n", 165 | " category_index,\n", 166 | " instance_masks=output_dict.get('detection_masks'),\n", 167 | " use_normalized_coordinates=True,\n", 168 | " line_thickness=4)\n", 169 | " cv2.imshow('object_detection', cv2.resize(image_np, (800, 600)))\n", 170 | " if cv2.waitKey(25) & 0xFF == ord('q'):\n", 171 | " cap.release()\n", 172 | " cv2.destroyAllWindows()\n", 173 | " break\n", 174 | "except Exception as e:\n", 175 | " print(e)\n", 176 | " cap.release()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [] 185 | } 186 | ], 187 | "metadata": { 188 | "kernelspec": { 189 | "display_name": "Python 3", 190 | "language": "python", 191 | "name": "python3" 192 | }, 193 | "language_info": { 194 | "codemirror_mode": { 195 | "name": "ipython", 196 | "version": 3 197 | }, 198 | "file_extension": ".py", 199 | "mimetype": "text/x-python", 200 | "name": "python", 201 | "nbconvert_exporter": "python", 202 | "pygments_lexer": "ipython3", 203 | "version": "3.6.5" 204 | } 205 | }, 206 | "nbformat": 4, 207 | "nbformat_minor": 2 208 | } 209 | -------------------------------------------------------------------------------- /tensorflow/object_detection/webcam_detection_custom-ip_camera.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import os\n", 10 | "import pathlib\n", 11 | "\n", 12 | "\n", 13 | "if \"models\" in pathlib.Path.cwd().parts:\n", 14 | " while \"models\" in pathlib.Path.cwd().parts:\n", 15 | " os.chdir('..')\n", 16 | "elif not pathlib.Path('models').exists():\n", 17 | " !git clone --depth 1 https://github.com/tensorflow/models" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": null, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "import numpy as np\n", 27 | "import six.moves.urllib as urllib\n", 28 | "import sys\n", 29 | "import tarfile\n", 30 | "import tensorflow as tf\n", 31 | "import zipfile\n", 32 | "import cv2\n", 33 | "\n", 34 | "from collections import defaultdict\n", 35 | "from io import StringIO\n", 36 | "from matplotlib import pyplot as plt\n", 37 | "from PIL import Image\n", 38 | "from IPython.display import display" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "scrolled": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "from object_detection.utils import ops as utils_ops\n", 50 | "from object_detection.utils import label_map_util\n", 51 | "from object_detection.utils import visualization_utils as vis_util" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "MODEL_NAME = 'models/research/object_detection/inference_graph'\n", 61 | "PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n", 62 | "PATH_TO_LABELS = 'models/research/object_detection/training/labelmap.pbtxt'\n", 63 | "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "detection_graph = tf.Graph()\n", 73 | "with detection_graph.as_default():\n", 74 | " od_graph_def = tf.GraphDef()\n", 75 | " with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n", 76 | " serialized_graph = fid.read()\n", 77 | " od_graph_def.ParseFromString(serialized_graph)\n", 78 | " tf.import_graph_def(od_graph_def, name='')" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "def run_inference_for_single_image(image, graph):\n", 88 | " if 'detection_masks' in tensor_dict:\n", 89 | " # The following processing is only for single image\n", 90 | " detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n", 91 | " detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n", 92 | " # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n", 93 | " real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n", 94 | " detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n", 95 | " detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n", 96 | " detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n", 97 | " detection_masks, detection_boxes, image.shape[0], image.shape[1])\n", 98 | " detection_masks_reframed = tf.cast(\n", 99 | " tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n", 100 | " # Follow the convention by adding back the batch dimension\n", 101 | " tensor_dict['detection_masks'] = tf.expand_dims(\n", 102 | " detection_masks_reframed, 0)\n", 103 | " image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n", 104 | "\n", 105 | " # Run inference\n", 106 | " output_dict = sess.run(tensor_dict,\n", 107 | " feed_dict={image_tensor: np.expand_dims(image, 0)})\n", 108 | "\n", 109 | " # all outputs are float32 numpy arrays, so convert types as appropriate\n", 110 | " output_dict['num_detections'] = int(output_dict['num_detections'][0])\n", 111 | " output_dict['detection_classes'] = output_dict[\n", 112 | " 'detection_classes'][0].astype(np.uint8)\n", 113 | " output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n", 114 | " output_dict['detection_scores'] = output_dict['detection_scores'][0]\n", 115 | " if 'detection_masks' in output_dict:\n", 116 | " output_dict['detection_masks'] = output_dict['detection_masks'][0]\n", 117 | " return output_dict" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "#cap = cv2.VideoCapture('rtsp://username:password@192.168.0.137:10554/tcp/av0_1')\n", 127 | "cap = cv2.VideoCapture('http://192.168.0.137:81/videostream.cgi?loginuse=username&loginpas=password&resolution=32')\n", 128 | "cap.set(cv2.CAP_PROP_BUFFERSIZE,1)\n", 129 | "#print(cv2.getBuildInformation())\n", 130 | "\n", 131 | "IP_CAMERA_RESOLUTION = (640, 360)\n", 132 | "\n", 133 | "try:\n", 134 | " with detection_graph.as_default():\n", 135 | " with tf.Session() as sess:\n", 136 | " # Get handles to input and output tensors\n", 137 | " ops = tf.get_default_graph().get_operations()\n", 138 | " all_tensor_names = {output.name for op in ops for output in op.outputs}\n", 139 | " tensor_dict = {}\n", 140 | " for key in [\n", 141 | " 'num_detections', 'detection_boxes', 'detection_scores',\n", 142 | " 'detection_classes', 'detection_masks'\n", 143 | " ]:\n", 144 | " tensor_name = key + ':0'\n", 145 | " if tensor_name in all_tensor_names:\n", 146 | " tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n", 147 | " tensor_name)\n", 148 | "\n", 149 | " while True:\n", 150 | " ret, image_np = cap.read()\n", 151 | " # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n", 152 | " image_np_expanded = np.expand_dims(image_np, axis=0)\n", 153 | " # Actual detection.\n", 154 | " output_dict = run_inference_for_single_image(image_np, detection_graph)\n", 155 | " # Visualization of the results of a detection.\n", 156 | " vis_util.visualize_boxes_and_labels_on_image_array(\n", 157 | " image_np,\n", 158 | " output_dict['detection_boxes'],\n", 159 | " output_dict['detection_classes'],\n", 160 | " output_dict['detection_scores'],\n", 161 | " category_index,\n", 162 | " instance_masks=output_dict.get('detection_masks'),\n", 163 | " use_normalized_coordinates=True,\n", 164 | " line_thickness=4)\n", 165 | " cv2.imshow('object_detection', cv2.resize(image_np, IP_CAMERA_RESOLUTION))\n", 166 | " if cv2.waitKey(25) & 0xFF == ord('q'):\n", 167 | " cap.release()\n", 168 | " cv2.destroyAllWindows()\n", 169 | " break\n", 170 | "except Exception as e:\n", 171 | " print(e)\n", 172 | " cap.release()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.6.5" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /tensorflow/object_detection/webcam_detection_non_customized.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 13, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import numpy as np\n", 10 | "import os\n", 11 | "import six.moves.urllib as urllib\n", 12 | "import sys\n", 13 | "import tarfile\n", 14 | "import tensorflow as tf\n", 15 | "import zipfile\n", 16 | "import cv2\n", 17 | "\n", 18 | "from collections import defaultdict\n", 19 | "from io import StringIO\n", 20 | "from matplotlib import pyplot as plt\n", 21 | "from PIL import Image\n", 22 | "from IPython.display import display\n", 23 | "\n", 24 | "from object_detection.utils import ops as utils_ops\n", 25 | "from object_detection.utils import label_map_util\n", 26 | "from object_detection.utils import visualization_utils as vis_util" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 14, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "# define your root directory here:\n", 36 | "\n", 37 | "root='e:/python/tensorflow/models/research/object_detection'\n", 38 | "\n", 39 | "# What model to download.\n", 40 | "MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'\n", 41 | "MODEL_FILE = MODEL_NAME + '.tar.gz'\n", 42 | "DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n", 43 | "\n", 44 | "# Path to frozen detection graph. This is the actual model that is used for the object detection.\n", 45 | "PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n", 46 | "\n", 47 | "# List of the strings that is used to add correct label for each box.\n", 48 | "PATH_TO_LABELS = root + '/data/mscoco_label_map.pbtxt'\n", 49 | "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 15, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "# download and extract the model file\n", 59 | "\n", 60 | "opener = urllib.request.URLopener()\n", 61 | "opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n", 62 | "tar_file = tarfile.open(MODEL_FILE)\n", 63 | "for file in tar_file.getmembers():\n", 64 | " file_name = os.path.basename(file.name)\n", 65 | " if 'frozen_inference_graph.pb' in file_name:\n", 66 | " tar_file.extract(file, os.getcwd())" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 16, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "# open the inference graph and load in into Tensorflow\n", 76 | "\n", 77 | "detection_graph = tf.Graph()\n", 78 | "with detection_graph.as_default():\n", 79 | " od_graph_def = tf.GraphDef()\n", 80 | " with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n", 81 | " serialized_graph = fid.read()\n", 82 | " od_graph_def.ParseFromString(serialized_graph)\n", 83 | " tf.import_graph_def(od_graph_def, name='')" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 17, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "def run_inference_for_single_image(image, graph):\n", 93 | " if 'detection_masks' in tensor_dict:\n", 94 | " # The following processing is only for single image\n", 95 | " detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n", 96 | " detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n", 97 | " # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n", 98 | " real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n", 99 | " detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n", 100 | " detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n", 101 | " detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n", 102 | " detection_masks, detection_boxes, image.shape[0], image.shape[1])\n", 103 | " detection_masks_reframed = tf.cast(\n", 104 | " tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n", 105 | " # Follow the convention by adding back the batch dimension\n", 106 | " tensor_dict['detection_masks'] = tf.expand_dims(\n", 107 | " detection_masks_reframed, 0)\n", 108 | " \n", 109 | " image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n", 110 | "\n", 111 | " # Run inference\n", 112 | " output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)})\n", 113 | "\n", 114 | " # all outputs are float32 numpy arrays, so convert types as appropriate\n", 115 | " output_dict['num_detections'] = int(output_dict['num_detections'][0])\n", 116 | " output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8)\n", 117 | " output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n", 118 | " output_dict['detection_scores'] = output_dict['detection_scores'][0]\n", 119 | " \n", 120 | " if 'detection_masks' in output_dict:\n", 121 | " output_dict['detection_masks'] = output_dict['detection_masks'][0]\n", 122 | " \n", 123 | " return output_dict" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 18, 129 | "metadata": {}, 130 | "outputs": [], 131 | "source": [ 132 | "cap = cv2.VideoCapture(0)\n", 133 | "try:\n", 134 | " with detection_graph.as_default():\n", 135 | " with tf.Session() as sess:\n", 136 | " # Get handles to input and output tensors\n", 137 | " ops = tf.get_default_graph().get_operations()\n", 138 | " all_tensor_names = {output.name for op in ops for output in op.outputs}\n", 139 | " tensor_dict = {}\n", 140 | " for key in [\n", 141 | " 'num_detections', 'detection_boxes', 'detection_scores',\n", 142 | " 'detection_classes', 'detection_masks'\n", 143 | " ]:\n", 144 | " tensor_name = key + ':0'\n", 145 | " if tensor_name in all_tensor_names:\n", 146 | " tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n", 147 | " tensor_name)\n", 148 | "\n", 149 | " while True:\n", 150 | " ret, image_np = cap.read()\n", 151 | " # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n", 152 | " image_np_expanded = np.expand_dims(image_np, axis=0)\n", 153 | " # Actual detection.\n", 154 | " output_dict = run_inference_for_single_image(image_np, detection_graph)\n", 155 | " # Visualization of the results of a detection.\n", 156 | " vis_util.visualize_boxes_and_labels_on_image_array(\n", 157 | " image_np,\n", 158 | " output_dict['detection_boxes'],\n", 159 | " output_dict['detection_classes'],\n", 160 | " output_dict['detection_scores'],\n", 161 | " category_index,\n", 162 | " instance_masks=output_dict.get('detection_masks'),\n", 163 | " use_normalized_coordinates=True,\n", 164 | " line_thickness=4)\n", 165 | " cv2.imshow('object_detection', cv2.resize(image_np, (800, 600)))\n", 166 | " if cv2.waitKey(25) & 0xFF == ord('q'):\n", 167 | " cap.release()\n", 168 | " cv2.destroyAllWindows()\n", 169 | " break\n", 170 | "except Exception as e:\n", 171 | " print(e)\n", 172 | " cap.release()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": {}, 179 | "outputs": [], 180 | "source": [] 181 | } 182 | ], 183 | "metadata": { 184 | "kernelspec": { 185 | "display_name": "Python 3", 186 | "language": "python", 187 | "name": "python3" 188 | }, 189 | "language_info": { 190 | "codemirror_mode": { 191 | "name": "ipython", 192 | "version": 3 193 | }, 194 | "file_extension": ".py", 195 | "mimetype": "text/x-python", 196 | "name": "python", 197 | "nbconvert_exporter": "python", 198 | "pygments_lexer": "ipython3", 199 | "version": "3.6.5" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 2 204 | } 205 | -------------------------------------------------------------------------------- /tensorflow/object_detection/object_detection_api_test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "colab": { 8 | "autoexec": { 9 | "startup": false, 10 | "wait_interval": 0 11 | } 12 | }, 13 | "colab_type": "code", 14 | "id": "hV4P5gyTWKMI" 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import os\n", 20 | "import six.moves.urllib as urllib\n", 21 | "import sys\n", 22 | "import tarfile\n", 23 | "import tensorflow as tf\n", 24 | "import zipfile\n", 25 | "\n", 26 | "from distutils.version import StrictVersion\n", 27 | "from collections import defaultdict\n", 28 | "from io import StringIO\n", 29 | "from matplotlib import pyplot as plt\n", 30 | "from PIL import Image\n", 31 | "\n", 32 | "# This is needed since the notebook is stored in the object_detection folder.\n", 33 | "sys.path.append(\"..\")\n", 34 | "from object_detection.utils import ops as utils_ops\n", 35 | "\n", 36 | "if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):\n", 37 | " raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')\n", 38 | "\n", 39 | "# define your root directory here:\n", 40 | "\n", 41 | "root='e:/python/tensorflow/models/research/object_detection'\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 2, 47 | "metadata": { 48 | "colab": { 49 | "autoexec": { 50 | "startup": false, 51 | "wait_interval": 0 52 | } 53 | }, 54 | "colab_type": "code", 55 | "id": "v7m_NY_aWKMK" 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "# This is needed to display the images.\n", 60 | "%matplotlib inline" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": { 66 | "colab_type": "text", 67 | "id": "r5FNuiRPWKMN" 68 | }, 69 | "source": [ 70 | "## Object detection imports\n", 71 | "Here are the imports from the object detection module." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 3, 77 | "metadata": { 78 | "colab": { 79 | "autoexec": { 80 | "startup": false, 81 | "wait_interval": 0 82 | } 83 | }, 84 | "colab_type": "code", 85 | "id": "bm0_uNRnWKMN" 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "from utils import label_map_util\n", 90 | "from utils import visualization_utils as vis_util" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": { 96 | "colab_type": "text", 97 | "id": "cfn_tRFOWKMO" 98 | }, 99 | "source": [ 100 | "# Model preparation " 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 4, 106 | "metadata": { 107 | "colab": { 108 | "autoexec": { 109 | "startup": false, 110 | "wait_interval": 0 111 | } 112 | }, 113 | "colab_type": "code", 114 | "id": "VyPz_t8WWKMQ" 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "# What model to download.\n", 119 | "MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'\n", 120 | "MODEL_FILE = MODEL_NAME + '.tar.gz'\n", 121 | "DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'\n", 122 | "\n", 123 | "# Path to frozen detection graph. This is the actual model that is used for the object detection.\n", 124 | "PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'\n", 125 | "\n", 126 | "# List of the strings that is used to add correct label for each box.\n", 127 | "PATH_TO_LABELS = root + '/data/mscoco_label_map.pbtxt'" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": { 133 | "colab_type": "text", 134 | "id": "7ai8pLZZWKMS" 135 | }, 136 | "source": [ 137 | "## Download Model" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 5, 143 | "metadata": { 144 | "colab": { 145 | "autoexec": { 146 | "startup": false, 147 | "wait_interval": 0 148 | } 149 | }, 150 | "colab_type": "code", 151 | "id": "KILYnwR5WKMS" 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "opener = urllib.request.URLopener()\n", 156 | "opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)\n", 157 | "tar_file = tarfile.open(MODEL_FILE)\n", 158 | "for file in tar_file.getmembers():\n", 159 | " file_name = os.path.basename(file.name)\n", 160 | " if 'frozen_inference_graph.pb' in file_name:\n", 161 | " tar_file.extract(file, os.getcwd())" 162 | ] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": { 167 | "colab_type": "text", 168 | "id": "YBcB9QHLWKMU" 169 | }, 170 | "source": [ 171 | "## Load a (frozen) Tensorflow model into memory." 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 6, 177 | "metadata": { 178 | "colab": { 179 | "autoexec": { 180 | "startup": false, 181 | "wait_interval": 0 182 | } 183 | }, 184 | "colab_type": "code", 185 | "id": "KezjCRVvWKMV" 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "detection_graph = tf.Graph()\n", 190 | "with detection_graph.as_default():\n", 191 | " od_graph_def = tf.GraphDef()\n", 192 | " with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:\n", 193 | " serialized_graph = fid.read()\n", 194 | " od_graph_def.ParseFromString(serialized_graph)\n", 195 | " tf.import_graph_def(od_graph_def, name='')" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": { 201 | "colab_type": "text", 202 | "id": "_1MVVTcLWKMW" 203 | }, 204 | "source": [ 205 | "## Loading label map\n", 206 | "Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 7, 212 | "metadata": { 213 | "colab": { 214 | "autoexec": { 215 | "startup": false, 216 | "wait_interval": 0 217 | } 218 | }, 219 | "colab_type": "code", 220 | "id": "hDbpHkiWWKMX" 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": { 230 | "colab_type": "text", 231 | "id": "EFsoUHvbWKMZ" 232 | }, 233 | "source": [ 234 | "## Helper code" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 8, 240 | "metadata": { 241 | "colab": { 242 | "autoexec": { 243 | "startup": false, 244 | "wait_interval": 0 245 | } 246 | }, 247 | "colab_type": "code", 248 | "id": "aSlYc3JkWKMa" 249 | }, 250 | "outputs": [], 251 | "source": [ 252 | "def load_image_into_numpy_array(image):\n", 253 | " (im_width, im_height) = image.size\n", 254 | " return np.array(image.getdata()).reshape(\n", 255 | " (im_height, im_width, 3)).astype(np.uint8)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": { 261 | "colab_type": "text", 262 | "id": "H0_1AGhrWKMc" 263 | }, 264 | "source": [ 265 | "# Detection" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 9, 271 | "metadata": { 272 | "colab": { 273 | "autoexec": { 274 | "startup": false, 275 | "wait_interval": 0 276 | } 277 | }, 278 | "colab_type": "code", 279 | "id": "jG-zn5ykWKMd" 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "# For the sake of simplicity we will use only 2 images:\n", 284 | "# image1.jpg\n", 285 | "# image2.jpg\n", 286 | "# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.\n", 287 | "PATH_TO_TEST_IMAGES_DIR = root + '/test_images'\n", 288 | "TEST_IMAGE_PATHS = [ PATH_TO_TEST_IMAGES_DIR + '/image{}.jpg'.format(i) for i in range(1, 3) ]\n", 289 | "\n", 290 | "# Size, in inches, of the output images.\n", 291 | "IMAGE_SIZE = (14, 10)" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 10, 297 | "metadata": { 298 | "colab": { 299 | "autoexec": { 300 | "startup": false, 301 | "wait_interval": 0 302 | } 303 | }, 304 | "colab_type": "code", 305 | "id": "92BHxzcNWKMf" 306 | }, 307 | "outputs": [], 308 | "source": [ 309 | "def run_inference_for_single_image(image, graph):\n", 310 | " if 'detection_masks' in tensor_dict:\n", 311 | " # The following processing is only for single image\n", 312 | " detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])\n", 313 | " detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])\n", 314 | " # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.\n", 315 | " real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)\n", 316 | " detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])\n", 317 | " detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])\n", 318 | " detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n", 319 | " detection_masks, detection_boxes, image.shape[0], image.shape[1])\n", 320 | " detection_masks_reframed = tf.cast(\n", 321 | " tf.greater(detection_masks_reframed, 0.5), tf.uint8)\n", 322 | " # Follow the convention by adding back the batch dimension\n", 323 | " tensor_dict['detection_masks'] = tf.expand_dims(\n", 324 | " detection_masks_reframed, 0)\n", 325 | " image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')\n", 326 | "\n", 327 | " # Run inference\n", 328 | " output_dict = sess.run(tensor_dict,\n", 329 | " feed_dict={image_tensor: np.expand_dims(image, 0)})\n", 330 | "\n", 331 | " # all outputs are float32 numpy arrays, so convert types as appropriate\n", 332 | " output_dict['num_detections'] = int(output_dict['num_detections'][0])\n", 333 | " output_dict['detection_classes'] = output_dict[\n", 334 | " 'detection_classes'][0].astype(np.uint8)\n", 335 | " output_dict['detection_boxes'] = output_dict['detection_boxes'][0]\n", 336 | " output_dict['detection_scores'] = output_dict['detection_scores'][0]\n", 337 | " if 'detection_masks' in output_dict:\n", 338 | " output_dict['detection_masks'] = output_dict['detection_masks'][0]\n", 339 | " return output_dict" 340 | ] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": 11, 345 | "metadata": {}, 346 | "outputs": [ 347 | { 348 | "name": "stdout", 349 | "output_type": "stream", 350 | "text": [ 351 | "Processing image: e:/python/tensorflow/models/research/object_detection/test_images/image1.jpg\n", 352 | "Processing image: e:/python/tensorflow/models/research/object_detection/test_images/image2.jpg\n" 353 | ] 354 | } 355 | ], 356 | "source": [ 357 | "with detection_graph.as_default():\n", 358 | " with tf.Session() as sess:\n", 359 | " # Get handles to input and output tensors\n", 360 | " ops = tf.get_default_graph().get_operations()\n", 361 | " all_tensor_names = {output.name for op in ops for output in op.outputs}\n", 362 | " tensor_dict = {}\n", 363 | " for key in [\n", 364 | " 'num_detections', 'detection_boxes', 'detection_scores',\n", 365 | " 'detection_classes', 'detection_masks'\n", 366 | " ]:\n", 367 | " tensor_name = key + ':0'\n", 368 | " if tensor_name in all_tensor_names:\n", 369 | " tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(\n", 370 | " tensor_name)\n", 371 | "\n", 372 | " for image_path in TEST_IMAGE_PATHS:\n", 373 | " print ('Processing image: ' + image_path)\n", 374 | " image = Image.open(image_path) \n", 375 | " image_np = load_image_into_numpy_array(image)\n", 376 | " # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n", 377 | " image_np_expanded = np.expand_dims(image_np, axis=0)\n", 378 | " # Actual detection.\n", 379 | " output_dict = run_inference_for_single_image(image_np, detection_graph)\n", 380 | " # Visualization of the results of a detection.\n", 381 | " vis_util.visualize_boxes_and_labels_on_image_array(\n", 382 | " image_np,\n", 383 | " output_dict['detection_boxes'],\n", 384 | " output_dict['detection_classes'],\n", 385 | " output_dict['detection_scores'],\n", 386 | " category_index,\n", 387 | " instance_masks=output_dict.get('detection_masks'),\n", 388 | " use_normalized_coordinates=True,\n", 389 | " line_thickness=4)\n", 390 | " plt.figure(figsize=IMAGE_SIZE)\n", 391 | " plt.imshow(image_np)\n" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": null, 397 | "metadata": { 398 | "colab": { 399 | "autoexec": { 400 | "startup": false, 401 | "wait_interval": 0 402 | } 403 | }, 404 | "colab_type": "code", 405 | "id": "LQSEnEsPWKMj" 406 | }, 407 | "outputs": [], 408 | "source": [] 409 | } 410 | ], 411 | "metadata": { 412 | "colab": { 413 | "default_view": {}, 414 | "name": "object_detection_tutorial.ipynb?workspaceId=ronnyvotel:python_inference::citc", 415 | "provenance": [], 416 | "version": "0.3.2", 417 | "views": {} 418 | }, 419 | "kernelspec": { 420 | "display_name": "Python 3", 421 | "language": "python", 422 | "name": "python3" 423 | }, 424 | "language_info": { 425 | "codemirror_mode": { 426 | "name": "ipython", 427 | "version": 3 428 | }, 429 | "file_extension": ".py", 430 | "mimetype": "text/x-python", 431 | "name": "python", 432 | "nbconvert_exporter": "python", 433 | "pygments_lexer": "ipython3", 434 | "version": "3.6.5" 435 | } 436 | }, 437 | "nbformat": 4, 438 | "nbformat_minor": 1 439 | } 440 | --------------------------------------------------------------------------------