├── .gitignore
├── README.md
├── classes.txt
├── output.avi
├── yolov4-tiny.cfg
├── yolov4-tiny.weights
├── yolov4.py
└── yolov4_Recording.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.mp4
2 | *.avi
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Yolov4-opencv-python
2 |
3 | Yolov4 object detection using opencv python, its simplest way to run inference on yolo V4
4 |
5 | ## YoloV4 objector Demo Video
6 |
7 | https://user-images.githubusercontent.com/66181793/122593546-41e6c980-d07f-11eb-8e18-bcc63d550183.mp4
8 |
9 | ---
10 |
11 | Implementation detail available on [_**Darknet**_](https://github.com/pjreddie/darknet)
12 |
13 | All the Frozen-Inference graph (pre-trained network) available on Darknet
14 |
15 | ---
16 |
17 | ## TODO
18 |
19 | - [x] [**YoloV4 object detector with opencv-python implementation**](https://youtu.be/1aL6tewfxFY)
20 |
21 | - [x] I will create distance estimation using yolo object detection, its coming up soon In sha Allah 😊
22 | Project Repository [Yolov4 Detection and Distance Estimation](https://github.com/Asadullah-Dal17/Yolov4-Detector-and-Distance-Estimator)
23 |
24 | ## Installation
25 |
26 | You Need [**Opencv Contrib**](https://pypi.org/project/opencv-contrib-python/)
27 |
28 | --> **windows**
29 | ```
30 | pip install opencv-contrib-python==4.5.3.56
31 | ```
32 | --> **Linux or Mac**
33 |
34 | ```
35 | pip3 install opencv-contrib-python==4.5.3.56
36 |
37 | ```
38 | ### Clone this repo
39 |
40 | `git clone https://github.com/Asadullah-Dal17/yolov4-opencv-python`
41 |
42 | Guess what you are done that was not hard, isn't it.😉
43 |
44 | I have use tiny weights, check out more on [_Darknet_](https://github.com/pjreddie/darknet) github for more
45 |
46 | Check out my Complete Video Tutorial on Yolov4 object detection YouTube [**Video Tutorial**](https://youtu.be/1aL6tewfxFY)
47 |
48 |
49 | if You found this Helpful, please star :star: it.
50 |
51 | You can Watch my Video Tutorial on Computer Vision Topics, just check out my YouTube Channel
52 |
53 |
54 | If You have any Question or need help in CV Project, Feel free to DM on Instagram
55 |
56 | ## 💚Join me on Social Media :green_heart 🖤
57 |
58 |
61 |
62 |
--------------------------------------------------------------------------------
/classes.txt:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
--------------------------------------------------------------------------------
/output.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asadullah-Dal17/yolov4-opencv-python/4989375ed0d7165cc8b0c4776faf0c3bf7b64ce9/output.avi
--------------------------------------------------------------------------------
/yolov4-tiny.cfg:
--------------------------------------------------------------------------------
1 | [net]
2 | # Testing
3 | #batch=1
4 | #subdivisions=1
5 | # Training
6 | batch=64
7 | subdivisions=1
8 | width=416
9 | height=416
10 | channels=3
11 | momentum=0.9
12 | decay=0.0005
13 | angle=0
14 | saturation = 1.5
15 | exposure = 1.5
16 | hue=.1
17 |
18 | learning_rate=0.00261
19 | burn_in=1000
20 |
21 | max_batches = 2000200
22 | policy=steps
23 | steps=1600000,1800000
24 | scales=.1,.1
25 |
26 |
27 | #weights_reject_freq=1001
28 | #ema_alpha=0.9998
29 | #equidistant_point=1000
30 | #num_sigmas_reject_badlabels=3
31 | #badlabels_rejection_percentage=0.2
32 |
33 |
34 | [convolutional]
35 | batch_normalize=1
36 | filters=32
37 | size=3
38 | stride=2
39 | pad=1
40 | activation=leaky
41 |
42 | [convolutional]
43 | batch_normalize=1
44 | filters=64
45 | size=3
46 | stride=2
47 | pad=1
48 | activation=leaky
49 |
50 | [convolutional]
51 | batch_normalize=1
52 | filters=64
53 | size=3
54 | stride=1
55 | pad=1
56 | activation=leaky
57 |
58 | [route]
59 | layers=-1
60 | groups=2
61 | group_id=1
62 |
63 | [convolutional]
64 | batch_normalize=1
65 | filters=32
66 | size=3
67 | stride=1
68 | pad=1
69 | activation=leaky
70 |
71 | [convolutional]
72 | batch_normalize=1
73 | filters=32
74 | size=3
75 | stride=1
76 | pad=1
77 | activation=leaky
78 |
79 | [route]
80 | layers = -1,-2
81 |
82 | [convolutional]
83 | batch_normalize=1
84 | filters=64
85 | size=1
86 | stride=1
87 | pad=1
88 | activation=leaky
89 |
90 | [route]
91 | layers = -6,-1
92 |
93 | [maxpool]
94 | size=2
95 | stride=2
96 |
97 | [convolutional]
98 | batch_normalize=1
99 | filters=128
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 |
105 | [route]
106 | layers=-1
107 | groups=2
108 | group_id=1
109 |
110 | [convolutional]
111 | batch_normalize=1
112 | filters=64
113 | size=3
114 | stride=1
115 | pad=1
116 | activation=leaky
117 |
118 | [convolutional]
119 | batch_normalize=1
120 | filters=64
121 | size=3
122 | stride=1
123 | pad=1
124 | activation=leaky
125 |
126 | [route]
127 | layers = -1,-2
128 |
129 | [convolutional]
130 | batch_normalize=1
131 | filters=128
132 | size=1
133 | stride=1
134 | pad=1
135 | activation=leaky
136 |
137 | [route]
138 | layers = -6,-1
139 |
140 | [maxpool]
141 | size=2
142 | stride=2
143 |
144 | [convolutional]
145 | batch_normalize=1
146 | filters=256
147 | size=3
148 | stride=1
149 | pad=1
150 | activation=leaky
151 |
152 | [route]
153 | layers=-1
154 | groups=2
155 | group_id=1
156 |
157 | [convolutional]
158 | batch_normalize=1
159 | filters=128
160 | size=3
161 | stride=1
162 | pad=1
163 | activation=leaky
164 |
165 | [convolutional]
166 | batch_normalize=1
167 | filters=128
168 | size=3
169 | stride=1
170 | pad=1
171 | activation=leaky
172 |
173 | [route]
174 | layers = -1,-2
175 |
176 | [convolutional]
177 | batch_normalize=1
178 | filters=256
179 | size=1
180 | stride=1
181 | pad=1
182 | activation=leaky
183 |
184 | [route]
185 | layers = -6,-1
186 |
187 | [maxpool]
188 | size=2
189 | stride=2
190 |
191 | [convolutional]
192 | batch_normalize=1
193 | filters=512
194 | size=3
195 | stride=1
196 | pad=1
197 | activation=leaky
198 |
199 | ##################################
200 |
201 | [convolutional]
202 | batch_normalize=1
203 | filters=256
204 | size=1
205 | stride=1
206 | pad=1
207 | activation=leaky
208 |
209 | [convolutional]
210 | batch_normalize=1
211 | filters=512
212 | size=3
213 | stride=1
214 | pad=1
215 | activation=leaky
216 |
217 | [convolutional]
218 | size=1
219 | stride=1
220 | pad=1
221 | filters=255
222 | activation=linear
223 |
224 |
225 |
226 | [yolo]
227 | mask = 3,4,5
228 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
229 | classes=80
230 | num=6
231 | jitter=.3
232 | scale_x_y = 1.05
233 | cls_normalizer=1.0
234 | iou_normalizer=0.07
235 | iou_loss=ciou
236 | ignore_thresh = .7
237 | truth_thresh = 1
238 | random=0
239 | resize=1.5
240 | nms_kind=greedynms
241 | beta_nms=0.6
242 | #new_coords=1
243 | #scale_x_y = 2.0
244 |
245 | [route]
246 | layers = -4
247 |
248 | [convolutional]
249 | batch_normalize=1
250 | filters=128
251 | size=1
252 | stride=1
253 | pad=1
254 | activation=leaky
255 |
256 | [upsample]
257 | stride=2
258 |
259 | [route]
260 | layers = -1, 23
261 |
262 | [convolutional]
263 | batch_normalize=1
264 | filters=256
265 | size=3
266 | stride=1
267 | pad=1
268 | activation=leaky
269 |
270 | [convolutional]
271 | size=1
272 | stride=1
273 | pad=1
274 | filters=255
275 | activation=linear
276 |
277 | [yolo]
278 | mask = 1,2,3
279 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
280 | classes=80
281 | num=6
282 | jitter=.3
283 | scale_x_y = 1.05
284 | cls_normalizer=1.0
285 | iou_normalizer=0.07
286 | iou_loss=ciou
287 | ignore_thresh = .7
288 | truth_thresh = 1
289 | random=0
290 | resize=1.5
291 | nms_kind=greedynms
292 | beta_nms=0.6
293 | #new_coords=1
294 | #scale_x_y = 2.0
--------------------------------------------------------------------------------
/yolov4-tiny.weights:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Asadullah-Dal17/yolov4-opencv-python/4989375ed0d7165cc8b0c4776faf0c3bf7b64ce9/yolov4-tiny.weights
--------------------------------------------------------------------------------
/yolov4.py:
--------------------------------------------------------------------------------
1 | import cv2 as cv
2 | import time
3 | Conf_threshold = 0.4
4 | NMS_threshold = 0.4
5 | COLORS = [(0, 255, 0), (0, 0, 255), (255, 0, 0),
6 | (255, 255, 0), (255, 0, 255), (0, 255, 255)]
7 |
8 | class_name = []
9 | with open('classes.txt', 'r') as f:
10 | class_name = [cname.strip() for cname in f.readlines()]
11 | # print(class_name)
12 | net = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
13 | net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
14 | net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
15 |
16 | model = cv.dnn_DetectionModel(net)
17 | model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
18 |
19 |
20 | cap = cv.VideoCapture('output.avi')
21 | starting_time = time.time()
22 | frame_counter = 0
23 | while True:
24 | ret, frame = cap.read()
25 | frame_counter += 1
26 | if ret == False:
27 | break
28 | classes, scores, boxes = model.detect(frame, Conf_threshold, NMS_threshold)
29 | for (classid, score, box) in zip(classes, scores, boxes):
30 | color = COLORS[int(classid) % len(COLORS)]
31 | label = "%s : %f" % (class_name[classid[0]], score)
32 | cv.rectangle(frame, box, color, 1)
33 | cv.putText(frame, label, (box[0], box[1]-10),
34 | cv.FONT_HERSHEY_COMPLEX, 0.3, color, 1)
35 | endingTime = time.time() - starting_time
36 | fps = frame_counter/endingTime
37 | # print(fps)
38 | cv.putText(frame, f'FPS: {fps}', (20, 50),
39 | cv.FONT_HERSHEY_COMPLEX, 0.7, (0, 255, 0), 2)
40 | cv.imshow('frame', frame)
41 | key = cv.waitKey(1)
42 | if key == ord('q'):
43 | break
44 | cap.release()
45 | cv.destroyAllWindows()
46 |
--------------------------------------------------------------------------------
/yolov4_Recording.py:
--------------------------------------------------------------------------------
1 | import cv2 as cv
2 | import time
3 | Conf_threshold = 0.6
4 | NMS_threshold = 0.4
5 | COLORS = [(0, 255, 0), (0, 0, 255), (255, 0, 0),
6 | (255, 255, 0), (255, 0, 255), (0, 255, 255)]
7 |
8 | class_name = []
9 | with open('classes.txt', 'r') as f:
10 | class_name = [cname.strip() for cname in f.readlines()]
11 | # print(class_name)
12 | net = cv.dnn.readNet('yolov4-tiny.weights', 'yolov4-tiny.cfg')
13 | net.setPreferableBackend(cv.dnn.DNN_BACKEND_CUDA)
14 | net.setPreferableTarget(cv.dnn.DNN_TARGET_CUDA_FP16)
15 |
16 | model = cv.dnn_DetectionModel(net)
17 | model.setInputParams(size=(416, 416), scale=1/255, swapRB=True)
18 |
19 |
20 | cap = cv.VideoCapture('pexels-alex-pelsh-6896028.mp4')
21 | frame_width = cap.get(cv.CAP_PROP_FRAME_WIDTH)
22 | frame_height = cap.get(cv.CAP_PROP_FRAME_HEIGHT)
23 |
24 | fourcc = cv.VideoWriter_fourcc('M', 'J', 'P', 'G')
25 | # cap.set(cv.CAP_PROP_FPS, 7)
26 | dim = (int(frame_width/4), int(frame_height/4))
27 | print(dim)
28 | out = cv.VideoWriter('OutputVideo3.avi', fourcc, 30.0, dim)
29 | starting_time = time.time()
30 | frame_counter = 0
31 | while True:
32 | ret, frame = cap.read()
33 |
34 | frame_counter += 1
35 | if ret == False:
36 | break
37 |
38 | # if frame_counter == 100:
39 | # break
40 |
41 | frame = cv.resize(frame, dim, interpolation=cv.INTER_AREA)
42 | classes, scores, boxes = model.detect(frame, Conf_threshold, NMS_threshold)
43 | for (classid, score, box) in zip(classes, scores, boxes):
44 | color = COLORS[int(classid) % len(COLORS)]
45 | label = "%s : %f" % (class_name[classid[0]], score)
46 | cv.rectangle(frame, box, color, 1)
47 | # cv.line(frame, (box[0]-3, box[1]-15),
48 | # (box[0]+110, box[1]-15), (0, 0, 0), 15)
49 | cv.rectangle(frame, (box[0]-2, box[1]-20),
50 | (box[0]+120, box[1]-4), (100, 130, 100), -1)
51 | cv.putText(frame, label, (box[0], box[1]-10),
52 | cv.FONT_HERSHEY_COMPLEX, 0.4, color, 1)
53 | endingTime = time.time() - starting_time
54 | fps = frame_counter/endingTime
55 | # print(fps)
56 | cv.line(frame, (18, 43), (140, 43), (0, 0, 0), 27)
57 | cv.putText(frame, f'FPS: {round(fps,2)}', (20, 50),
58 | cv.FONT_HERSHEY_COMPLEX, 0.7, (0, 255, 255), 2)
59 | cv.imshow('frame', frame)
60 |
61 | out.write(frame)
62 | key = cv.waitKey(1)
63 | if key == ord('q'):
64 | break
65 | out.release()
66 |
67 | cap.release()
68 | cv.destroyAllWindows()
69 | print('done')
70 |
--------------------------------------------------------------------------------