├── .gitignore
├── img1.jpg
├── .github
    └── workflows
    │   └── greetings.yml
├── README.md
├── requirements.txt
├── Object_Detection_Youtube.py
└── Drone_Human_Detection_Model.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | 


--------------------------------------------------------------------------------
/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/akash-agni/Real-Time-Object-Detection/HEAD/img1.jpg


--------------------------------------------------------------------------------
/.github/workflows/greetings.yml:
--------------------------------------------------------------------------------
 1 | name: Greetings
 2 | 
 3 | on: [pull_request, issues]
 4 | 
 5 | jobs:
 6 |   greeting:
 7 |     runs-on: ubuntu-latest
 8 |     permissions:
 9 |       issues: write
10 |       pull-requests: write
11 |     steps:
12 |     - uses: actions/first-interaction@v1
13 |       with:
14 |         repo-token: ${{ secrets.GITHUB_TOKEN }}
15 |         issue-message: 'Hi, Thank you for submitting the issue, I will look into it and get back to at the earliest'
16 |         pr-message: 'Hi, Thank you for submitting the PR, please make sure to provide details of the changes being made.'
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Real Time Object Detection
 2 | TL;DR: Python application for read time object detection on video feed.
 3 | 
 4 | <img src="https://raw.githubusercontent.com/akash-agni/Real-Time-Object-Detection/main/img1.jpg" width="400">
 5 | 
 6 | ## Usage
 7 | You can install all the used packages using.
 8 | 
 9 | ```pip install -r requirements.txt```
10 | 
11 | To parse an URL.
12 | 
13 | ```python Object_Detection_Youtube.py <URL> <Output_Filename.avi>```
14 | 
15 | To parse a drone video for humans only.
16 | 
17 | ```python Drone_Human_Detection_Model.py <input_file_name> <output_file_name.avi>```
18 | 
19 | ## Upcoming Features.
20 | <ul>
21 |     <li>Real Time Object Detection using Webcam.</li>
22 |     <li>Flask based REST API to stream parsed video live on web browser</li>
23 | </ul>
24 | 
25 | :blue_heart:
26 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | absl-py==0.12.0
 2 | cachetools==4.2.1
 3 | certifi==2020.12.5
 4 | chardet==4.0.0
 5 | cycler==0.10.0
 6 | google-auth==1.30.0
 7 | google-auth-oauthlib==0.4.4
 8 | grpcio==1.37.0
 9 | idna==2.10
10 | kiwisolver==1.3.1
11 | Markdown==3.3.4
12 | matplotlib==3.4.1
13 | numpy==1.20.2
14 | oauthlib==3.1.0
15 | opencv-python==4.5.1.48
16 | pandas==1.2.4
17 | Pillow==8.2.0
18 | protobuf==3.15.8
19 | pyasn1==0.4.8
20 | pyasn1-modules==0.2.8
21 | pyparsing==2.4.7
22 | python-dateutil==2.8.1
23 | pytz==2021.1
24 | PyYAML==5.4.1
25 | requests==2.25.1
26 | requests-oauthlib==1.3.0
27 | rsa==4.7.2
28 | scipy==1.6.3
29 | seaborn==0.11.1
30 | six==1.15.0
31 | tensorboard==2.5.0
32 | tensorboard-data-server==0.6.0
33 | tensorboard-plugin-wit==1.8.0
34 | torch==1.8.1
35 | torchvision==0.9.1
36 | tqdm==4.60.0
37 | typing-extensions==3.7.4.3
38 | urllib3==1.26.4
39 | Werkzeug==1.0.1
40 | 


--------------------------------------------------------------------------------
/Object_Detection_Youtube.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | import pafy
  5 | from time import time
  6 | 
  7 | 
  8 | class ObjectDetection:
  9 |     """
 10 |     Class implements Yolo5 model to make inferences on a youtube video using Opencv2.
 11 |     """
 12 | 
 13 |     def __init__(self, url, out_file="Labeled_Video.avi"):
 14 |         """
 15 |         Initializes the class with youtube url and output file.
 16 |         :param url: Has to be as youtube URL,on which prediction is made.
 17 |         :param out_file: A valid output file name.
 18 |         """
 19 |         self._URL = url
 20 |         self.model = self.load_model()
 21 |         self.classes = self.model.names
 22 |         self.out_file = out_file
 23 |         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 24 | 
 25 |     def get_video_from_url(self):
 26 |         """
 27 |         Creates a new video streaming object to extract video frame by frame to make prediction on.
 28 |         :return: opencv2 video capture object, with lowest quality frame available for video.
 29 |         """
 30 |         play = pafy.new(self._URL).streams[-1]
 31 |         assert play is not None
 32 |         return cv2.VideoCapture(play.url)
 33 | 
 34 |     def load_model(self):
 35 |         """
 36 |         Loads Yolo5 model from pytorch hub.
 37 |         :return: Trained Pytorch model.
 38 |         """
 39 |         model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
 40 |         return model
 41 | 
 42 |     def score_frame(self, frame):
 43 |         """
 44 |         Takes a single frame as input, and scores the frame using yolo5 model.
 45 |         :param frame: input frame in numpy/list/tuple format.
 46 |         :return: Labels and Coordinates of objects detected by model in the frame.
 47 |         """
 48 |         self.model.to(self.device)
 49 |         frame = [frame]
 50 |         results = self.model(frame)
 51 |         labels, cord = results.xyxyn[0][:, -1].numpy(), results.xyxyn[0][:, :-1].numpy()
 52 |         return labels, cord
 53 | 
 54 |     def class_to_label(self, x):
 55 |         """
 56 |         For a given label value, return corresponding string label.
 57 |         :param x: numeric label
 58 |         :return: corresponding string label
 59 |         """
 60 |         return self.classes[int(x)]
 61 | 
 62 |     def plot_boxes(self, results, frame):
 63 |         """
 64 |         Takes a frame and its results as input, and plots the bounding boxes and label on to the frame.
 65 |         :param results: contains labels and coordinates predicted by model on the given frame.
 66 |         :param frame: Frame which has been scored.
 67 |         :return: Frame with bounding boxes and labels ploted on it.
 68 |         """
 69 |         labels, cord = results
 70 |         n = len(labels)
 71 |         x_shape, y_shape = frame.shape[1], frame.shape[0]
 72 |         for i in range(n):
 73 |             row = cord[i]
 74 |             if row[4] >= 0.2:
 75 |                 x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)
 76 |                 bgr = (0, 255, 0)
 77 |                 cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 2)
 78 |                 cv2.putText(frame, self.class_to_label(labels[i]), (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.9, bgr, 2)
 79 | 
 80 |         return frame
 81 | 
 82 |     def __call__(self):
 83 |         """
 84 |         This function is called when class is executed, it runs the loop to read the video frame by frame,
 85 |         and write the output into a new file.
 86 |         :return: void
 87 |         """
 88 |         player = self.get_video_from_url()
 89 |         assert player.isOpened()
 90 |         x_shape = int(player.get(cv2.CAP_PROP_FRAME_WIDTH))
 91 |         y_shape = int(player.get(cv2.CAP_PROP_FRAME_HEIGHT))
 92 |         four_cc = cv2.VideoWriter_fourcc(*"MJPG")
 93 |         out = cv2.VideoWriter(self.out_file, four_cc, 20, (x_shape, y_shape))
 94 |         while True:
 95 |             start_time = time()
 96 |             ret, frame = player.read()
 97 |             assert ret
 98 |             results = self.score_frame(frame)
 99 |             frame = self.plot_boxes(results, frame)
100 |             end_time = time()
101 |             fps = 1/np.round(end_time - start_time, 3)
102 |             print(f"Frames Per Second : {fps}")
103 |             out.write(frame)
104 | 
105 | # Create a new object and execute.
106 | a = ObjectDetection("https://www.youtube.com/watch?v=dwD1n7N7EAg")
107 | a()
108 | 


--------------------------------------------------------------------------------
/Drone_Human_Detection_Model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | from time import time
  5 | import sys
  6 | 
  7 | 
  8 | class ObjectDetection:
  9 |     """
 10 |     The class performs generic object detection on a video file.
 11 |     It uses yolo5 pretrained model to make inferences and opencv2 to manage frames.
 12 |     Included Features:
 13 |     1. Reading and writing of video file using  Opencv2
 14 |     2. Using pretrained model to make inferences on frames.
 15 |     3. Use the inferences to plot boxes on objects along with labels.
 16 |     Upcoming Features:
 17 |     """
 18 |     def __init__(self, input_file, out_file="Labeled_Video.avi"):
 19 |         """
 20 |         :param input_file: provide youtube url which will act as input for the model.
 21 |         :param out_file: name of a existing file, or a new file in which to write the output.
 22 |         :return: void
 23 |         """
 24 |         self.input_file = input_file
 25 |         self.model = self.load_model()
 26 |         self.model.conf = 0.4 # set inference threshold at 0.3
 27 |         self.model.iou = 0.3 # set inference IOU threshold at 0.3
 28 |         self.model.classes = [0] # set model to only detect "Person" class
 29 |         self.out_file = out_file
 30 |         self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
 31 | 
 32 |     def get_video_from_file(self):
 33 |         """
 34 |         Function creates a streaming object to read the video from the file frame by frame.
 35 |         :param self:  class object
 36 |         :return:  OpenCV object to stream video frame by frame.
 37 |         """
 38 |         cap = cv2.VideoCapture(self.input_file)
 39 |         assert cap is not None
 40 |         return cap
 41 | 
 42 |     def load_model(self):
 43 |         """
 44 |         Function loads the yolo5 model from PyTorch Hub.
 45 |         """
 46 |         model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
 47 |         return model
 48 | 
 49 |     def score_frame(self, frame):
 50 |         """
 51 |         function scores each frame of the video and returns results.
 52 |         :param frame: frame to be infered.
 53 |         :return: labels and coordinates of objects found.
 54 |         """
 55 |         self.model.to(self.device)
 56 |         results = self.model([frame])
 57 |         labels, cord = results.xyxyn[0][:, -1].to('cpu').numpy(), results.xyxyn[0][:, :-1].to('cpu').numpy()
 58 |         return labels, cord
 59 | 
 60 |     def plot_boxes(self, results, frame):
 61 |         """
 62 |         plots boxes and labels on frame.
 63 |         :param results: inferences made by model
 64 |         :param frame: frame on which to  make the plots
 65 |         :return: new frame with boxes and labels plotted.
 66 |         """
 67 |         labels, cord = results
 68 |         n = len(labels)
 69 |         x_shape, y_shape = frame.shape[1], frame.shape[0]
 70 |         for i in range(n):
 71 |             row = cord[i]
 72 |             x1, y1, x2, y2 = int(row[0]*x_shape), int(row[1]*y_shape), int(row[2]*x_shape), int(row[3]*y_shape)
 73 |             bgr = (0, 0, 255)
 74 |             cv2.rectangle(frame, (x1, y1), (x2, y2), bgr, 1)
 75 |             label = f"{int(row[4]*100)}"
 76 |             cv2.putText(frame, label, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)
 77 |             cv2.putText(frame, f"Total Targets: {n}", (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
 78 | 
 79 |         return frame
 80 | 
 81 |     def __call__(self):
 82 |         player = self.get_video_from_file() # create streaming service for application
 83 |         assert player.isOpened()
 84 |         x_shape = int(player.get(cv2.CAP_PROP_FRAME_WIDTH))
 85 |         y_shape = int(player.get(cv2.CAP_PROP_FRAME_HEIGHT))
 86 |         four_cc = cv2.VideoWriter_fourcc(*"MJPG")
 87 |         out = cv2.VideoWriter(self.out_file, four_cc, 20, (x_shape, y_shape))
 88 |         fc = 0
 89 |         fps = 0
 90 |         tfc = int(player.get(cv2.CAP_PROP_FRAME_COUNT))
 91 |         tfcc = 0
 92 |         while True:
 93 |             fc += 1
 94 |             start_time = time()
 95 |             ret, frame = player.read()
 96 |             if not ret:
 97 |                 break
 98 |             results = self.score_frame(frame)
 99 |             frame = self.plot_boxes(results, frame)
100 |             end_time = time()
101 |             fps += 1/np.round(end_time - start_time, 3)
102 |             if fc == 10:
103 |                 fps = int(fps / 10)
104 |                 tfcc += fc
105 |                 fc = 0
106 |                 per_com = int(tfcc / tfc * 100)
107 |                 print(f"Frames Per Second : {fps} || Percentage Parsed : {per_com}")
108 |             out.write(frame)
109 |         player.release()
110 | 
111 | 
112 | link = sys.argv[1]
113 | output_file = sys.argv[2]
114 | a = ObjectDetection(link, output_file)
115 | a()


--------------------------------------------------------------------------------