├── .idea ├── .name ├── .gitignore ├── inspectionProfiles │ ├── profiles_settings.xml │ └── Project_Default.xml ├── modules.xml ├── misc.xml └── tensor flow 1.iml ├── Feed.py ├── README.md ├── detect_from_image.py └── camera_image.detection.py /.idea/.name: -------------------------------------------------------------------------------- 1 | tensor flow 1 -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/tensor flow 1.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 13 | -------------------------------------------------------------------------------- /Feed.py: -------------------------------------------------------------------------------- 1 | # Import necessary libraries 2 | import tensorflow as tf 3 | from tensorflow.keras import layers, models 4 | from tensorflow.keras.datasets import mnist 5 | import matplotlib.pyplot as plt 6 | 7 | # Load and preprocess the MNIST dataset 8 | (train_images, train_labels), (test_images, test_labels) = mnist.load_data() 9 | train_images, test_images = train_images / 255.0, test_images / 255.0 10 | 11 | # Build a simple neural network 12 | model = models.Sequential([ 13 | layers.Flatten(input_shape=(28, 28)), 14 | layers.Dense(128, activation='relu'), 15 | layers.Dropout(0.2), 16 | layers.Dense(10) 17 | ]) 18 | 19 | # Compile the model 20 | model.compile(optimizer='adam', 21 | loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 22 | metrics=['accuracy']) 23 | 24 | # Train the model 25 | model.fit(train_images, train_labels, epochs=5) 26 | 27 | # Evaluate the model on the test set 28 | test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2) 29 | print(f"\nTest accuracy: {test_acc}") 30 | 31 | # Make predictions 32 | probability_model = tf.keras.Sequential([model, layers.Softmax()]) 33 | predictions = probability_model.predict(test_images) 34 | 35 | # Display some predictions 36 | for i in range(5): 37 | plt.imshow(test_images[i], cmap=plt.cm.binary) 38 | plt.xlabel(f"Actual: {test_labels[i]}, Predicted: {tf.argmax(predictions[i])}") 39 | plt.show() 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow Object Detection with Tensorflow 2 2 | 3 | ![1_W3elu1yPiJ3bpj8MZrmvwA](https://github.com/patilabhi20/gesture-controlled-robotic-hand-by-using-CV/assets/157373320/69461b80-aa9a-4d40-90a0-42955450d4b4) 4 | 5 | 6 | 7 | ## Installation 8 | 9 | You can install the TensorFlow Object Detection API either with Python Package Installer (pip) or [Docker](https://www.docker.com/), an open-source platform for deploying and managing containerized applications. 10 | 11 | 12 | 13 | ### Docker Installation 14 | 15 | ``` 16 | # From the root of the git repository (inside the models directory) 17 | docker build -f research/object_detection/dockerfiles/tf2/Dockerfile -t od . 18 | docker run -it od 19 | ``` 20 | 21 | ### Python Package Installation 22 | 23 | ``` 24 | cd models/research 25 | # Compile protos. 26 | protoc object_detection/protos/*.proto --python_out=. 27 | # Install TensorFlow Object Detection API. 28 | cp object_detection/packages/tf2/setup.py . 29 | python -m pip install . 30 | ``` 31 | 32 | 33 | ```python 34 | import os 35 | import sys 36 | args = sys.argv 37 | directory = args[1] 38 | protoc_path = args[2] 39 | for file in os.listdir(directory): 40 | if file.endswith(".proto"): 41 | os.system(protoc_path+" "+directory+"/"+file+" --python_out=.") 42 | ``` 43 | 44 | ``` 45 | python use_protobuf.py 46 | ``` 47 | 48 | To test the installation run: 49 | 50 | ``` 51 | # Test the installation. 52 | python object_detection/builders/model_builder_tf2_test.py 53 | ``` 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /detect_from_image.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import os 4 | import tensorflow as tf 5 | from PIL import Image 6 | from io import BytesIO 7 | import glob 8 | import matplotlib.pyplot as plt 9 | 10 | from object_detection.utils import ops as utils_ops 11 | from object_detection.utils import label_map_util 12 | from object_detection.utils import visualization_utils as vis_util 13 | 14 | # patch tf1 into `utils.ops` 15 | utils_ops.tf = tf.compat.v1 16 | 17 | # Patch the location of gfile 18 | tf.gfile = tf.io.gfile 19 | 20 | 21 | def load_model(model_path): 22 | model = tf.saved_model.load(model_path) 23 | return model 24 | 25 | 26 | def load_image_into_numpy_array(path): 27 | """Load an image from file into a numpy array. 28 | 29 | Puts image into numpy array to feed into tensorflow graph. 30 | Note that by convention we put it into a numpy array with shape 31 | (height, width, channels), where channels=3 for RGB. 32 | 33 | Args: 34 | path: a file path (this can be local or on colossus) 35 | 36 | Returns: 37 | uint8 numpy array with shape (img_height, img_width, 3) 38 | """ 39 | img_data = tf.io.gfile.GFile(path, 'rb').read() 40 | image = Image.open(BytesIO(img_data)) 41 | (im_width, im_height) = image.size 42 | return np.array(image.getdata()).reshape( 43 | (im_height, im_width, 3)).astype(np.uint8) 44 | 45 | 46 | def run_inference_for_single_image(model, image): 47 | # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. 48 | input_tensor = tf.convert_to_tensor(image) 49 | # The model expects a batch of images, so add an axis with `tf.newaxis`. 50 | input_tensor = input_tensor[tf.newaxis,...] 51 | 52 | # Run inference 53 | output_dict = model(input_tensor) 54 | 55 | # All outputs are batches tensors. 56 | # Convert to numpy arrays, and take index [0] to remove the batch dimension. 57 | # We're only interested in the first num_detections. 58 | num_detections = int(output_dict.pop('num_detections')) 59 | output_dict = {key: value[0, :num_detections].numpy() 60 | for key, value in output_dict.items()} 61 | output_dict['num_detections'] = num_detections 62 | 63 | # detection_classes should be ints. 64 | output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64) 65 | 66 | # Handle models with masks: 67 | if 'detection_masks' in output_dict: 68 | # Reframe the the bbox mask to the image size. 69 | detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( 70 | output_dict['detection_masks'], output_dict['detection_boxes'], 71 | image.shape[0], image.shape[1]) 72 | detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) 73 | output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy() 74 | 75 | return output_dict 76 | 77 | 78 | def run_inference(model, category_index, image_path): 79 | if os.path.isdir(image_path): 80 | image_paths = [] 81 | for file_extension in ('*.png', '*jpg'): 82 | image_paths.extend(glob.glob(os.path.join(image_path, file_extension))) 83 | 84 | for i_path in image_paths: 85 | image_np = load_image_into_numpy_array(i_path) 86 | # Actual detection. 87 | output_dict = run_inference_for_single_image(model, image_np) 88 | # Visualization of the results of a detection. 89 | vis_util.visualize_boxes_and_labels_on_image_array( 90 | image_np, 91 | output_dict['detection_boxes'], 92 | output_dict['detection_classes'], 93 | output_dict['detection_scores'], 94 | category_index, 95 | instance_masks=output_dict.get('detection_masks_reframed', None), 96 | use_normalized_coordinates=True, 97 | line_thickness=8) 98 | plt.imshow(image_np) 99 | plt.show() 100 | else: 101 | image_np = load_image_into_numpy_array(image_path) 102 | # Actual detection. 103 | output_dict = run_inference_for_single_image(model, image_np) 104 | # Visualization of the results of a detection. 105 | vis_util.visualize_boxes_and_labels_on_image_array( 106 | image_np, 107 | output_dict['detection_boxes'], 108 | output_dict['detection_classes'], 109 | output_dict['detection_scores'], 110 | category_index, 111 | instance_masks=output_dict.get('detection_masks_reframed', None), 112 | use_normalized_coordinates=True, 113 | line_thickness=8) 114 | plt.imshow(image_np) 115 | plt.show() 116 | 117 | 118 | if __name__ == '__main__': 119 | parser = argparse.ArgumentParser(description='Detect objects inside webcam videostream') 120 | parser.add_argument('-m', '--model', type=str, required=True, help='Model Path') 121 | parser.add_argument('-l', '--labelmap', type=str, required=True, help='Path to Labelmap') 122 | parser.add_argument('-i', '--image_path', type=str, required=True, help='Path to image (or folder)') 123 | args = parser.parse_args() 124 | 125 | detection_model = load_model(args.model) 126 | category_index = label_map_util.create_category_index_from_labelmap(args.labelmap, use_display_name=True) 127 | 128 | run_inference(detection_model, category_index, args.image_path) 129 | -------------------------------------------------------------------------------- /camera_image.detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import 3 | import datetime 4 | import pandas as pd 5 | from PIL import Image 6 | 7 | from object_detection.utils import ops as utils_ops 8 | from object_detection.utils import label_map_util 9 | from object_detection.utils import visualization_utils as vis_util 10 | 11 | # patch tf1 into `utils.ops` 12 | utils_ops.tf = tf.compat.v1 13 | 14 | # Patch the location of gfile 15 | tf.gfile = tf.io.gfile 16 | 17 | 18 | def load_model(model_path): 19 | model = tf.saved_model.load(model_path) 20 | return model 21 | 22 | 23 | def run_inference_for_single_image(model, image): 24 | image = np.asarray(image) 25 | # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. 26 | input_tensor = tf.convert_to_tensor(image) 27 | # The model expects a batch of images, so add an axis with `tf.newaxis`. 28 | input_tensor = input_tensor[tf.newaxis,...] 29 | 30 | # Run inference 31 | output_dict = model(input_tensor) 32 | 33 | # All outputs are batches tensors. 34 | # Convert to numpy arrays, and take index [0] to remove the batch dimension. 35 | # We're only interested in the first num_detections. 36 | num_detections = int(output_dict.pop('num_detections')) 37 | output_dict = {key: value[0, :num_detections].numpy() 38 | for key, value in output_dict.items()} 39 | output_dict['num_detections'] = num_detections 40 | 41 | # detection_classes should be ints. 42 | output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64) 43 | 44 | # Handle models with masks: 45 | if 'detection_masks' in output_dict: 46 | # Reframe the the bbox mask to the image size. 47 | detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( 48 | output_dict['detection_masks'], output_dict['detection_boxes'], 49 | image.shape[0], image.shape[1]) 50 | detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) 51 | output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy() 52 | 53 | return output_dict 54 | 55 | 56 | def run_inference(model, category_index, cap, threshold, show_video_steam, label_to_look_for, output_directory): 57 | # Create output directory if not already created 58 | os.makedirs(output_directory, exist_ok=True) 59 | os.makedirs(output_directory+'/images', exist_ok=True) 60 | 61 | if os.path.exists(output_directory+'/results.csv'): 62 | df = pd.read_csv(output_directory+'/results.csv') 63 | else: 64 | df = pd.DataFrame(columns=['timestamp', 'img_path']) 65 | 66 | while True: 67 | ret, image_np = cap.read() 68 | 69 | # Copy image for later 70 | image_show = np.copy(image_np) 71 | 72 | image_height, image_width, _ = image_np.shape 73 | 74 | # Actual detection. 75 | output_dict = run_inference_for_single_image(model, image_np) 76 | 77 | if show_video_steam: 78 | # Visualization of the results of a detection. 79 | vis_util.visualize_boxes_and_labels_on_image_array( 80 | image_np, 81 | output_dict['detection_boxes'], 82 | output_dict['detection_classes'], 83 | output_dict['detection_scores'], 84 | category_index, 85 | instance_masks=output_dict.get('detection_masks_reframed', None), 86 | use_normalized_coordinates=True, 87 | line_thickness=8) 88 | cv2.imshow('object_detection', cv2.resize(image_np, (800, 600))) 89 | if cv2.waitKey(25) & 0xFF == ord('q'): 90 | cap.release() 91 | cv2.destroyAllWindows() 92 | break 93 | 94 | # Get data(label, xmin, ymin, xmax, ymax) 95 | output = [] 96 | for index, score in enumerate(output_dict['detection_scores']): 97 | if score < threshold: 98 | continue 99 | label = category_index[output_dict['detection_classes'][index]]['name'] 100 | ymin, xmin, ymax, xmax = output_dict['detection_boxes'][index] 101 | output.append((label, int(xmin * image_width), int(ymin * image_height), int(xmax * image_width), int(ymax * image_height))) 102 | 103 | # Save incident (could be extended to send a email or something) 104 | for l, x_min, y_min, x_max, y_max in output: 105 | if l == label_to_look_for: 106 | array = cv2.cvtColor(np.array(image_show), cv2.COLOR_RGB2BGR) 107 | image = Image.fromarray(array) 108 | cropped_img = image.crop((x_min, y_min, x_max, y_max)) 109 | file_path = output_directory+'/images/'+str(len(df))+'.jpg' 110 | cropped_img.save(file_path, "JPEG", icc_profile=cropped_img.info.get('icc_profile')) 111 | df.loc[len(df)] = [datetime.datetime.now(), file_path] 112 | df.to_csv(output_directory+'/results.csv', index=None) 113 | 114 | 115 | if __name__ == '__main__': 116 | parser = argparse.ArgumentParser(description='Detect objects inside webcam videostream') 117 | parser.add_argument('-m', '--model', type=str, required=True, help='Model Path') 118 | parser.add_argument('-l', '--labelmap', type=str, required=True, help='Path to Labelmap') 119 | parser.add_argument('-t', '--threshold', type=float, default=0.5, help='Threshold for bounding boxes') 120 | parser.add_argument('-s', '--show', default=True, action='store_true', help='Show window') 121 | parser.add_argument('-la', '--label', default='person', type=str, help='Label name to detect') 122 | parser.add_argument('-o', '--output_directory', default='results', type=str, help='Directory for the outputs') 123 | args = parser.parse_args() 124 | 125 | detection_model = load_model(args.model) 126 | category_index = label_map_util.create_category_index_from_labelmap(args.labelmap, use_display_name=True) 127 | 128 | cap = cv2.VideoCapture(0) 129 | run_inference(detection_model, category_index, cap, args.threshold, args.show, args.label, args.output_directory) 130 | --------------------------------------------------------------------------------