├── .gitignore ├── img1.jpg ├── .github └── workflows │ └── greetings.yml ├── README.md ├── requirements.txt ├── Object_Detection_Youtube.py └── Drone_Human_Detection_Model.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | -------------------------------------------------------------------------------- /img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akash-agni/Real-Time-Object-Detection/HEAD/img1.jpg -------------------------------------------------------------------------------- /.github/workflows/greetings.yml: -------------------------------------------------------------------------------- 1 | name: Greetings 2 | 3 | on: [pull_request, issues] 4 | 5 | jobs: 6 | greeting: 7 | runs-on: ubuntu-latest 8 | permissions: 9 | issues: write 10 | pull-requests: write 11 | steps: 12 | - uses: actions/first-interaction@v1 13 | with: 14 | repo-token: ${{ secrets.GITHUB_TOKEN }} 15 | issue-message: 'Hi, Thank you for submitting the issue, I will look into it and get back to at the earliest' 16 | pr-message: 'Hi, Thank you for submitting the PR, please make sure to provide details of the changes being made.' 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Real Time Object Detection 2 | TL;DR: Python application for read time object detection on video feed. 3 | 4 | 5 | 6 | ## Usage 7 | You can install all the used packages using. 8 | 9 | ```pip install -r requirements.txt``` 10 | 11 | To parse an URL. 12 | 13 | ```python Object_Detection_Youtube.py ``` 14 | 15 | To parse a drone video for humans only. 16 | 17 | ```python Drone_Human_Detection_Model.py ``` 18 | 19 | ## Upcoming Features. 20 |
    21 |
  • Real Time Object Detection using Webcam.
  • 22 |
  • Flask based REST API to stream parsed video live on web browser
  • 23 |
24 | 25 | :blue_heart: 26 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.12.0 2 | cachetools==4.2.1 3 | certifi==2020.12.5 4 | chardet==4.0.0 5 | cycler==0.10.0 6 | google-auth==1.30.0 7 | google-auth-oauthlib==0.4.4 8 | grpcio==1.37.0 9 | idna==2.10 10 | kiwisolver==1.3.1 11 | Markdown==3.3.4 12 | matplotlib==3.4.1 13 | numpy==1.20.2 14 | oauthlib==3.1.0 15 | opencv-python==4.5.1.48 16 | pandas==1.2.4 17 | Pillow==8.2.0 18 | protobuf==3.15.8 19 | pyasn1==0.4.8 20 | pyasn1-modules==0.2.8 21 | pyparsing==2.4.7 22 | python-dateutil==2.8.1 23 | pytz==2021.1 24 | PyYAML==5.4.1 25 | requests==2.25.1 26 | requests-oauthlib==1.3.0 27 | rsa==4.7.2 28 | scipy==1.6.3 29 | seaborn==0.11.1 30 | six==1.15.0 31 | tensorboard==2.5.0 32 | tensorboard-data-server==0.6.0 33 | tensorboard-plugin-wit==1.8.0 34 | torch==1.8.1 35 | torchvision==0.9.1 36 | tqdm==4.60.0 37 | typing-extensions==3.7.4.3 38 | urllib3==1.26.4 39 | Werkzeug==1.0.1 40 | -------------------------------------------------------------------------------- /Object_Detection_Youtube.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | import pafy 5 | from time import time 6 | 7 | 8 | class ObjectDetection: 9 | """ 10 | Class implements Yolo5 model to make inferences on a youtube video using Opencv2. 11 | """ 12 | 13 | def __init__(self, url, out_file="Labeled_Video.avi"): 14 | """ 15 | Initializes the class with youtube url and output file. 16 | :param url: Has to be as youtube URL,on which prediction is made. 17 | :param out_file: A valid output file name. 18 | """ 19 | self._URL = url 20 | self.model = self.load_model() 21 | self.classes = self.model.names 22 | self.out_file = out_file 23 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 24 | 25 | def get_video_from_url(self): 26 | """ 27 | Creates a new video streaming object to extract video frame by frame to make prediction on. 28 | :return: opencv2 video capture object, with lowest quality frame available for video. 29 | """ 30 | play = pafy.new(self._URL).streams[-1] 31 | assert play is not None 32 | return cv2.VideoCapture(play.url) 33 | 34 | def load_model(self): 35 | """ 36 | Loads Yolo5 model from pytorch hub. 37 | :return: Trained Pytorch model. 38 | """ 39 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) 40 | return model 41 | 42 | def score_frame(self, frame): 43 | """ 44 | Takes a single frame as input, and scores the frame using yolo5 model. 45 | :param frame: input frame in numpy/list/tuple format. 46 | :return: Labels and Coordinates of objects detected by model in the frame. 47 | """ 48 | self.model.to(self.device) 49 | frame = [frame] 50 | results = self.model(frame) 51 | labels, cord = results.xyxyn[0][:, -1].numpy(), results.xyxyn[0][:, :-1].numpy() 52 | return labels, cord 53 | 54 | def class_to_label(self, x): 55 | """ 56 | For a given label value, return corresponding string label. 57 | :param x: numeric label 58 | :return: corresponding string label 59 | """ 60 | return self.classes[int(x)] 61 | 62 | def plot_boxes(self, results, frame): 63 | """ 64 | Takes a frame and its results as input, and plots the bounding boxes and label on to the frame. 65 | :param results: contains labels and coordinates predicted by model on the given frame. 66 | :param frame: Frame which has been scored. 67 | :return: Frame with bounding boxes and labels ploted on it. 68 | """ 69 | labels, cord = results 70 | n = len(labels) 71 | x_shape, y_shape = frame.shape[1], frame.shape[0] 72 | for i in range(n): 73 | row = cord[i] 74 | if row[4] >= 0.2: 75 | x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape) 76 | bgr = (0, 255, 0) 77 | cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 2) 78 | cv2.putText(frame, self.class_to_label(labels[i]), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.9, bgr, 2) 79 | 80 | return frame 81 | 82 | def __call__(self): 83 | """ 84 | This function is called when class is executed, it runs the loop to read the video frame by frame, 85 | and write the output into a new file. 86 | :return: void 87 | """ 88 | player = self.get_video_from_url() 89 | assert player.isOpened() 90 | x_shape = int(player.get(cv2.CAP_PROP_FRAME_WIDTH)) 91 | y_shape = int(player.get(cv2.CAP_PROP_FRAME_HEIGHT)) 92 | four_cc = cv2.VideoWriter_fourcc(*"MJPG") 93 | out = cv2.VideoWriter(self.out_file, four_cc, 20, (x_shape, y_shape)) 94 | while True: 95 | start_time = time() 96 | ret, frame = player.read() 97 | assert ret 98 | results = self.score_frame(frame) 99 | frame = self.plot_boxes(results, frame) 100 | end_time = time() 101 | fps = 1/np.round(end_time - start_time, 3) 102 | print(f"Frames Per Second : {fps}") 103 | out.write(frame) 104 | 105 | # Create a new object and execute. 106 | a = ObjectDetection("https://www.youtube.com/watch?v=dwD1n7N7EAg") 107 | a() 108 | -------------------------------------------------------------------------------- /Drone_Human_Detection_Model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import cv2 4 | from time import time 5 | import sys 6 | 7 | 8 | class ObjectDetection: 9 | """ 10 | The class performs generic object detection on a video file. 11 | It uses yolo5 pretrained model to make inferences and opencv2 to manage frames. 12 | Included Features: 13 | 1. Reading and writing of video file using Opencv2 14 | 2. Using pretrained model to make inferences on frames. 15 | 3. Use the inferences to plot boxes on objects along with labels. 16 | Upcoming Features: 17 | """ 18 | def __init__(self, input_file, out_file="Labeled_Video.avi"): 19 | """ 20 | :param input_file: provide youtube url which will act as input for the model. 21 | :param out_file: name of a existing file, or a new file in which to write the output. 22 | :return: void 23 | """ 24 | self.input_file = input_file 25 | self.model = self.load_model() 26 | self.model.conf = 0.4 # set inference threshold at 0.3 27 | self.model.iou = 0.3 # set inference IOU threshold at 0.3 28 | self.model.classes = [0] # set model to only detect "Person" class 29 | self.out_file = out_file 30 | self.device = 'cuda' if torch.cuda.is_available() else 'cpu' 31 | 32 | def get_video_from_file(self): 33 | """ 34 | Function creates a streaming object to read the video from the file frame by frame. 35 | :param self: class object 36 | :return: OpenCV object to stream video frame by frame. 37 | """ 38 | cap = cv2.VideoCapture(self.input_file) 39 | assert cap is not None 40 | return cap 41 | 42 | def load_model(self): 43 | """ 44 | Function loads the yolo5 model from PyTorch Hub. 45 | """ 46 | model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) 47 | return model 48 | 49 | def score_frame(self, frame): 50 | """ 51 | function scores each frame of the video and returns results. 52 | :param frame: frame to be infered. 53 | :return: labels and coordinates of objects found. 54 | """ 55 | self.model.to(self.device) 56 | results = self.model([frame]) 57 | labels, cord = results.xyxyn[0][:, -1].to('cpu').numpy(), results.xyxyn[0][:, :-1].to('cpu').numpy() 58 | return labels, cord 59 | 60 | def plot_boxes(self, results, frame): 61 | """ 62 | plots boxes and labels on frame. 63 | :param results: inferences made by model 64 | :param frame: frame on which to make the plots 65 | :return: new frame with boxes and labels plotted. 66 | """ 67 | labels, cord = results 68 | n = len(labels) 69 | x_shape, y_shape = frame.shape[1], frame.shape[0] 70 | for i in range(n): 71 | row = cord[i] 72 | x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape) 73 | bgr = (0, 0, 255) 74 | cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 1) 75 | label = f"{int(row[4]*100)}" 76 | cv2.putText(frame, label, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1) 77 | cv2.putText(frame, f"Total Targets: {n}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2) 78 | 79 | return frame 80 | 81 | def __call__(self): 82 | player = self.get_video_from_file() # create streaming service for application 83 | assert player.isOpened() 84 | x_shape = int(player.get(cv2.CAP_PROP_FRAME_WIDTH)) 85 | y_shape = int(player.get(cv2.CAP_PROP_FRAME_HEIGHT)) 86 | four_cc = cv2.VideoWriter_fourcc(*"MJPG") 87 | out = cv2.VideoWriter(self.out_file, four_cc, 20, (x_shape, y_shape)) 88 | fc = 0 89 | fps = 0 90 | tfc = int(player.get(cv2.CAP_PROP_FRAME_COUNT)) 91 | tfcc = 0 92 | while True: 93 | fc += 1 94 | start_time = time() 95 | ret, frame = player.read() 96 | if not ret: 97 | break 98 | results = self.score_frame(frame) 99 | frame = self.plot_boxes(results, frame) 100 | end_time = time() 101 | fps += 1/np.round(end_time - start_time, 3) 102 | if fc == 10: 103 | fps = int(fps / 10) 104 | tfcc += fc 105 | fc = 0 106 | per_com = int(tfcc / tfc * 100) 107 | print(f"Frames Per Second : {fps} || Percentage Parsed : {per_com}") 108 | out.write(frame) 109 | player.release() 110 | 111 | 112 | link = sys.argv[1] 113 | output_file = sys.argv[2] 114 | a = ObjectDetection(link, output_file) 115 | a() --------------------------------------------------------------------------------