├── README.md ├── object_detection_phone.py ├── object_detection_tutorial.py └── object_detection_webcam.py /README.md: -------------------------------------------------------------------------------- 1 | # Real_time_Object_detection_TF 2 | We worked on a project based on Deep Learning and we are able to detect objects through an image, a video and real-time(web cam and cellphone camera) 3 | 4 | This is an implementation of tensor flow object detection API for running it in Real time through Webcam and cellphone camera. 5 | # Contributors: 6 | * https://github.com/himanshu8004 7 | * https://github.com/Anshu15bit 8 | * https://github.com/ashikaks 9 | * https://github.com/sumananand222 10 | 11 | The video for the same is at : 12 | 13 | Download the object_recognition_detection.zip 14 | 15 | For running the object detection on image files run the object_detection_tutorial.py 16 | 17 | For running the object detection in realtime with web camera run the object_detection_webcam.py 18 | 19 | For running the object detection in realtime with cellphone camera run the object_detection_phone.py 20 | 21 | The official tensorflow object detection API link is https://github.com/tensorflow/models/tree/master/object_detection 22 | 23 | For installing ProtoBuf : https://github.com/google/protobuf/releases?after=v3.4.1 24 | -------------------------------------------------------------------------------- /object_detection_phone.py: -------------------------------------------------------------------------------- 1 | 2 | # In[ ]: 3 | 4 | 5 | import numpy as np 6 | import os 7 | import six.moves.urllib as urllib 8 | import sys 9 | import tarfile 10 | import tensorflow as tf 11 | import zipfile 12 | 13 | from collections import defaultdict 14 | from io import StringIO 15 | from matplotlib import pyplot as plt 16 | from PIL import Image 17 | 18 | sys.path.append("..") 19 | from object_detection.utils import ops as utils_ops 20 | 21 | if tf.__version__ < '1.4.0': 22 | raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!') 23 | 24 | 25 | 26 | 27 | 28 | get_ipython().run_line_magic('matplotlib', 'inline') 29 | 30 | 31 | # In[ ]: 32 | 33 | 34 | from utils import label_map_util 35 | 36 | from utils import visualization_utils as vis_util 37 | 38 | 39 | 40 | 41 | 42 | 43 | # In[ ]: 44 | 45 | 46 | 47 | MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' 48 | MODEL_FILE = MODEL_NAME + '.tar.gz' 49 | DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' 50 | 51 | PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' 52 | 53 | 54 | PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') 55 | 56 | NUM_CLASSES = 90 57 | 58 | 59 | 60 | 61 | # In[ ]: 62 | 63 | 64 | opener = urllib.request.URLopener() 65 | opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) 66 | tar_file = tarfile.open(MODEL_FILE) 67 | for file in tar_file.getmembers(): 68 | file_name = os.path.basename(file.name) 69 | if 'frozen_inference_graph.pb' in file_name: 70 | tar_file.extract(file, os.getcwd()) 71 | 72 | 73 | 74 | 75 | # In[ ]: 76 | 77 | 78 | detection_graph = tf.Graph() 79 | with detection_graph.as_default(): 80 | od_graph_def = tf.GraphDef() 81 | with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 82 | serialized_graph = fid.read() 83 | od_graph_def.ParseFromString(serialized_graph) 84 | tf.import_graph_def(od_graph_def, name='') 85 | 86 | 87 | # ## Loading label map 88 | # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine 89 | 90 | # In[ ]: 91 | 92 | 93 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 94 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) 95 | category_index = label_map_util.create_category_index(categories) 96 | 97 | 98 | # ## Helper code 99 | 100 | #intializing the web camera device 101 | import urllib.request 102 | import cv2 103 | import numpy as np 104 | import time 105 | 106 | # Replace the URL with your own IPwebcam shot.jpg IP:port 107 | url='http://10.10.8.116:8080/shot.jpg' 108 | 109 | """ 110 | while True: 111 | # Use urllib to get the image from the IP camera 112 | imgResp = urllib.request.urlopen(url) 113 | 114 | # Numpy to convert into a array 115 | imgNp = np.array(bytearray(imgResp.read()),dtype=np.uint8) 116 | 117 | # Finally decode the array to OpenCV usable format ;) 118 | img = cv2.imdecode(imgNp,-1) 119 | 120 | """ 121 | 122 | # Running the tensorflow session 123 | with detection_graph.as_default(): 124 | with tf.Session(graph=detection_graph) as sess: 125 | ret = True 126 | while (ret): 127 | imgResp = urllib.request.urlopen(url) 128 | # imgNp = np.array(bytearray(imgResp.read()),dtype=np.uint8) 129 | #img = cv2.imdecode(imgNp,-1) 130 | ret,image_np = np.array(bytearray(imgResp.read()),dtype=np.uint8) 131 | # Expand dimensions since the model expects images to have shape: [1, None, None, 3] 132 | image_np_expanded = np.expand_dims(image_np, axis=0) 133 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 134 | # Each box represents a part of the image where a particular object was detected. 135 | boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 136 | # Each score represent how level of confidence for each of the objects. 137 | # Score is shown on the result image, together with the class label. 138 | scores = detection_graph.get_tensor_by_name('detection_scores:0') 139 | classes = detection_graph.get_tensor_by_name('detection_classes:0') 140 | num_detections = detection_graph.get_tensor_by_name('num_detections:0') 141 | # Actual detection. 142 | (boxes, scores, classes, num_detections) = sess.run( 143 | [boxes, scores, classes, num_detections], 144 | feed_dict={image_tensor: image_np_expanded}) 145 | # Visualization of the results of a detection. 146 | vis_util.visualize_boxes_and_labels_on_image_array( 147 | image_np, 148 | np.squeeze(boxes), 149 | np.squeeze(classes).astype(np.int32), 150 | np.squeeze(scores), 151 | category_index, 152 | use_normalized_coordinates=True, 153 | line_thickness=8) 154 | # plt.figure(figsize=IMAGE_SIZE) 155 | # plt.imshow(image_np) 156 | cv2.imshow('image',cv2.resize(image_np,(1280,960))) 157 | if cv2.waitKey(25) & 0xFF == ord('q'): 158 | cv2.destroyAllWindows() 159 | break 160 | -------------------------------------------------------------------------------- /object_detection_tutorial.py: -------------------------------------------------------------------------------- 1 | 2 | # coding: utf-8 3 | 4 | # # Object Detection Demo 5 | # Welcome to the object detection inference walkthrough! This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start. 6 | 7 | # # Imports 8 | 9 | # In[ ]: 10 | 11 | 12 | import numpy as np 13 | import os 14 | import six.moves.urllib as urllib 15 | import sys 16 | import tarfile 17 | import tensorflow as tf 18 | import zipfile 19 | 20 | from collections import defaultdict 21 | from io import StringIO 22 | from matplotlib import pyplot as plt 23 | from PIL import Image 24 | 25 | # This is needed since the notebook is stored in the object_detection folder. 26 | sys.path.append("..") 27 | from object_detection.utils import ops as utils_ops 28 | 29 | if tf.__version__ < '1.4.0': 30 | raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!') 31 | 32 | 33 | # ## Env setup 34 | 35 | # In[ ]: 36 | 37 | 38 | # This is needed to display the images. 39 | get_ipython().run_line_magic('matplotlib', 'inline') 40 | 41 | 42 | # ## Object detection imports 43 | # Here are the imports from the object detection module. 44 | 45 | # In[ ]: 46 | 47 | 48 | from utils import label_map_util 49 | 50 | from utils import visualization_utils as vis_util 51 | 52 | 53 | # # Model preparation 54 | 55 | # ## Variables 56 | # 57 | # Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. 58 | # 59 | # By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies. 60 | 61 | # In[ ]: 62 | 63 | 64 | # What model to download. 65 | MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' 66 | MODEL_FILE = MODEL_NAME + '.tar.gz' 67 | DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' 68 | 69 | # Path to frozen detection graph. This is the actual model that is used for the object detection. 70 | PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' 71 | 72 | # List of the strings that is used to add correct label for each box. 73 | PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') 74 | 75 | NUM_CLASSES = 90 76 | 77 | 78 | # ## Download Model 79 | 80 | # In[ ]: 81 | 82 | 83 | opener = urllib.request.URLopener() 84 | opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) 85 | tar_file = tarfile.open(MODEL_FILE) 86 | for file in tar_file.getmembers(): 87 | file_name = os.path.basename(file.name) 88 | if 'frozen_inference_graph.pb' in file_name: 89 | tar_file.extract(file, os.getcwd()) 90 | 91 | 92 | # ## Load a (frozen) Tensorflow model into memory. 93 | 94 | # In[ ]: 95 | 96 | 97 | detection_graph = tf.Graph() 98 | with detection_graph.as_default(): 99 | od_graph_def = tf.GraphDef() 100 | with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 101 | serialized_graph = fid.read() 102 | od_graph_def.ParseFromString(serialized_graph) 103 | tf.import_graph_def(od_graph_def, name='') 104 | 105 | 106 | # ## Loading label map 107 | # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine 108 | 109 | # In[ ]: 110 | 111 | 112 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 113 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) 114 | category_index = label_map_util.create_category_index(categories) 115 | 116 | 117 | # ## Helper code 118 | 119 | # In[ ]: 120 | 121 | 122 | def load_image_into_numpy_array(image): 123 | (im_width, im_height) = image.size 124 | return np.array(image.getdata()).reshape( 125 | (im_height, im_width, 3)).astype(np.uint8) 126 | 127 | 128 | # # Detection 129 | 130 | # In[ ]: 131 | 132 | 133 | # For the sake of simplicity we will use only 2 images: 134 | # image1.jpg 135 | # image2.jpg 136 | # If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS. 137 | PATH_TO_TEST_IMAGES_DIR = 'test_images' 138 | TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 8) ] 139 | 140 | # Size, in inches, of the output images. 141 | IMAGE_SIZE = (12, 8) 142 | 143 | 144 | # In[ ]: 145 | 146 | 147 | def run_inference_for_single_image(image, graph): 148 | with graph.as_default(): 149 | with tf.Session() as sess: 150 | # Get handles to input and output tensors 151 | ops = tf.get_default_graph().get_operations() 152 | all_tensor_names = {output.name for op in ops for output in op.outputs} 153 | tensor_dict = {} 154 | for key in [ 155 | 'num_detections', 'detection_boxes', 'detection_scores', 156 | 'detection_classes', 'detection_masks' 157 | ]: 158 | tensor_name = key + ':0' 159 | if tensor_name in all_tensor_names: 160 | tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( 161 | tensor_name) 162 | if 'detection_masks' in tensor_dict: 163 | # The following processing is only for single image 164 | detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) 165 | detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) 166 | # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. 167 | real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) 168 | detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) 169 | detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) 170 | detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( 171 | detection_masks, detection_boxes, image.shape[0], image.shape[1]) 172 | detection_masks_reframed = tf.cast( 173 | tf.greater(detection_masks_reframed, 0.5), tf.uint8) 174 | # Follow the convention by adding back the batch dimension 175 | tensor_dict['detection_masks'] = tf.expand_dims( 176 | detection_masks_reframed, 0) 177 | image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') 178 | 179 | # Run inference 180 | output_dict = sess.run(tensor_dict, 181 | feed_dict={image_tensor: np.expand_dims(image, 0)}) 182 | 183 | # all outputs are float32 numpy arrays, so convert types as appropriate 184 | output_dict['num_detections'] = int(output_dict['num_detections'][0]) 185 | output_dict['detection_classes'] = output_dict[ 186 | 'detection_classes'][0].astype(np.uint8) 187 | output_dict['detection_boxes'] = output_dict['detection_boxes'][0] 188 | output_dict['detection_scores'] = output_dict['detection_scores'][0] 189 | if 'detection_masks' in output_dict: 190 | output_dict['detection_masks'] = output_dict['detection_masks'][0] 191 | return output_dict 192 | 193 | 194 | # In[ ]: 195 | 196 | 197 | for image_path in TEST_IMAGE_PATHS: 198 | image = Image.open(image_path) 199 | # the array based representation of the image will be used later in order to prepare the 200 | # result image with boxes and labels on it. 201 | image_np = load_image_into_numpy_array(image) 202 | # Expand dimensions since the model expects images to have shape: [1, None, None, 3] 203 | image_np_expanded = np.expand_dims(image_np, axis=0) 204 | # Actual detection. 205 | output_dict = run_inference_for_single_image(image_np, detection_graph) 206 | # Visualization of the results of a detection. 207 | vis_util.visualize_boxes_and_labels_on_image_array( 208 | image_np, 209 | output_dict['detection_boxes'], 210 | output_dict['detection_classes'], 211 | output_dict['detection_scores'], 212 | category_index, 213 | instance_masks=output_dict.get('detection_masks'), 214 | use_normalized_coordinates=True, 215 | line_thickness=8) 216 | plt.figure(figsize=IMAGE_SIZE) 217 | plt.imshow(image_np) 218 | 219 | -------------------------------------------------------------------------------- /object_detection_webcam.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # coding: utf-8 4 | 5 | # # Object Detection Demo 6 | # Welcome to the object detection inference walkthrough! This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start. 7 | 8 | # # Imports 9 | 10 | # In[ ]: 11 | 12 | 13 | import numpy as np 14 | import os 15 | import six.moves.urllib as urllib 16 | import sys 17 | import tarfile 18 | import tensorflow as tf 19 | import zipfile 20 | 21 | from collections import defaultdict 22 | from io import StringIO 23 | from matplotlib import pyplot as plt 24 | from PIL import Image 25 | 26 | # This is needed since the notebook is stored in the object_detection folder. 27 | sys.path.append("..") 28 | from object_detection.utils import ops as utils_ops 29 | 30 | if tf.__version__ < '1.4.0': 31 | raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!') 32 | 33 | 34 | # ## Env setup 35 | 36 | # In[ ]: 37 | 38 | 39 | # This is needed to display the images. 40 | get_ipython().run_line_magic('matplotlib', 'inline') 41 | 42 | 43 | # ## Object detection imports 44 | # Here are the imports from the object detection module. 45 | 46 | # In[ ]: 47 | 48 | 49 | from utils import label_map_util 50 | 51 | from utils import visualization_utils as vis_util 52 | 53 | 54 | # # Model preparation 55 | 56 | # ## Variables 57 | # 58 | # Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file. 59 | # 60 | # By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies. 61 | 62 | # In[ ]: 63 | 64 | 65 | # What model to download. 66 | MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' 67 | MODEL_FILE = MODEL_NAME + '.tar.gz' 68 | DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' 69 | 70 | # Path to frozen detection graph. This is the actual model that is used for the object detection. 71 | PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' 72 | 73 | # List of the strings that is used to add correct label for each box. 74 | PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') 75 | 76 | NUM_CLASSES = 90 77 | 78 | 79 | # ## Download Model 80 | 81 | # In[ ]: 82 | 83 | 84 | opener = urllib.request.URLopener() 85 | opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) 86 | tar_file = tarfile.open(MODEL_FILE) 87 | for file in tar_file.getmembers(): 88 | file_name = os.path.basename(file.name) 89 | if 'frozen_inference_graph.pb' in file_name: 90 | tar_file.extract(file, os.getcwd()) 91 | 92 | 93 | # ## Load a (frozen) Tensorflow model into memory. 94 | 95 | # In[ ]: 96 | 97 | 98 | detection_graph = tf.Graph() 99 | with detection_graph.as_default(): 100 | od_graph_def = tf.GraphDef() 101 | with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: 102 | serialized_graph = fid.read() 103 | od_graph_def.ParseFromString(serialized_graph) 104 | tf.import_graph_def(od_graph_def, name='') 105 | 106 | 107 | # ## Loading label map 108 | # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine 109 | 110 | # In[ ]: 111 | 112 | 113 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 114 | categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) 115 | category_index = label_map_util.create_category_index(categories) 116 | 117 | 118 | # ## Helper code 119 | """ 120 | #intializing the web camera device 121 | import urllib.request 122 | import cv2 123 | import numpy as np 124 | import time 125 | 126 | # Replace the URL with your own IPwebcam shot.jpg IP:port 127 | url='http://10.10.8.116:8080/shot.jpg' 128 | 129 | 130 | while True: 131 | # Use urllib to get the image from the IP camera 132 | imgResp = urllib.request.urlopen(url) 133 | 134 | # Numpy to convert into a array 135 | imgNp = np.array(bytearray(imgResp.read()),dtype=np.uint8) 136 | 137 | # Finally decode the array to OpenCV usable format ;) 138 | img = cv2.imdecode(imgNp,-1) 139 | 140 | 141 | # put the image on screen 142 | cv2.imshow('test',img) 143 | 144 | #To give the processor some less stress 145 | time.sleep(0.1) 146 | 147 | # Quit if q is pressed 148 | if ord('q')==cv2.waitKey(10): 149 | exit(0) 150 | """ 151 | import cv2 152 | cap = cv2.VideoCapture(0) 153 | 154 | # Running the tensorflow session 155 | with detection_graph.as_default(): 156 | with tf.Session(graph=detection_graph) as sess: 157 | ret = True 158 | while (ret): 159 | ret,image_np = cap.read() 160 | # Expand dimensions since the model expects images to have shape: [1, None, None, 3] 161 | image_np_expanded = np.expand_dims(image_np, axis=0) 162 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 163 | # Each box represents a part of the image where a particular object was detected. 164 | boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 165 | # Each score represent how level of confidence for each of the objects. 166 | # Score is shown on the result image, together with the class label. 167 | scores = detection_graph.get_tensor_by_name('detection_scores:0') 168 | classes = detection_graph.get_tensor_by_name('detection_classes:0') 169 | num_detections = detection_graph.get_tensor_by_name('num_detections:0') 170 | # Actual detection. 171 | (boxes, scores, classes, num_detections) = sess.run( 172 | [boxes, scores, classes, num_detections], 173 | feed_dict={image_tensor: image_np_expanded}) 174 | # Visualization of the results of a detection. 175 | vis_util.visualize_boxes_and_labels_on_image_array( 176 | image_np, 177 | np.squeeze(boxes), 178 | np.squeeze(classes).astype(np.int32), 179 | np.squeeze(scores), 180 | category_index, 181 | use_normalized_coordinates=True, 182 | line_thickness=8) 183 | # plt.figure(figsize=IMAGE_SIZE) 184 | # plt.imshow(image_np) 185 | cv2.imshow('image',cv2.resize(image_np,(1280,960))) 186 | if cv2.waitKey(25) & 0xFF == ord('q'): 187 | cv2.destroyAllWindows() 188 | cap.release() 189 | break --------------------------------------------------------------------------------