├── .gitignore
├── Collect_training_data.py
├── IMAGES
├── city.jpg
├── city_pred.jpg
├── kite.jpg
├── kite_pred.jpg
├── mnist_test.jpg
├── street.jpg
├── street_pred.jpg
├── tensorboard.png
├── test.mp4
└── tracking_results.gif
├── LICENSE
├── README.md
├── YOLOv3_colab_training.ipynb
├── checkpoints
└── checkpoint
├── deep_sort
├── detection.py
├── generate_detections.py
├── iou_matching.py
├── kalman_filter.py
├── linear_assignment.py
├── nn_matching.py
├── preprocessing.py
├── test_tracking.gif
├── track.py
└── tracker.py
├── detect_mnist.py
├── detection_custom.py
├── detection_demo.py
├── evaluate_mAP.py
├── mnist
├── make_data.py
├── mnist.names
├── mnist
│ ├── test.zip
│ └── train.zip
└── show_image.py
├── model_data
├── coco
│ ├── coco.names
│ ├── train2017.txt
│ └── val2017.txt
└── mars-small128.pb
├── object_tracker.py
├── requirements.txt
├── tools
├── Convert_to_TRT.py
├── Convert_to_pb.py
├── Detection_to_XML.py
├── XML_to_YOLOv3.py
└── oid_to_pascal_voc_xml.py
├── train.py
└── yolov3
├── __ init __.py
├── __pycache__
├── configs.cpython-36.pyc
├── dataset.cpython-36.pyc
├── utils.cpython-36.pyc
└── yolov3.cpython-36.pyc
├── configs.py
├── dataset.py
├── utils.py
├── yolov3.py
└── yolov4.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | model_data
3 | configs.py
--------------------------------------------------------------------------------
/Collect_training_data.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : Collect_training_data.py
4 | # Author : PyLessons
5 | # Created date: 2020-09-27
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : YOLO detection to XML example script
9 | #
10 | #================================================================
11 | import os
12 | import subprocess
13 | import time
14 | from datetime import datetime
15 | import cv2
16 | import mss
17 | import numpy as np
18 | import tensorflow as tf
19 | from yolov3.utils import *
20 | from yolov3.configs import *
21 | from yolov3.yolov4 import read_class_names
22 | from tools.Detection_to_XML import CreateXMLfile
23 | import random
24 |
25 | def draw_enemy(image, bboxes, CLASSES=YOLO_COCO_CLASSES, show_label=True, show_confidence = True, Text_colors=(255,255,0), rectangle_colors='', tracking=False):
26 | NUM_CLASS = read_class_names(CLASSES)
27 | num_classes = len(NUM_CLASS)
28 | image_h, image_w, _ = image.shape
29 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
30 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
31 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
32 |
33 | random.seed(0)
34 | random.shuffle(colors)
35 | random.seed(None)
36 |
37 | detection_list = []
38 |
39 | for i, bbox in enumerate(bboxes):
40 | coor = np.array(bbox[:4], dtype=np.int32)
41 | score = bbox[4]
42 | class_ind = int(bbox[5])
43 | bbox_color = rectangle_colors if rectangle_colors != '' else colors[class_ind]
44 | bbox_thick = int(0.6 * (image_h + image_w) / 1000)
45 | if bbox_thick < 1: bbox_thick = 1
46 | fontScale = 0.75 * bbox_thick
47 | (x1, y1), (x2, y2) = (coor[0], coor[1]), (coor[2], coor[3])
48 |
49 | # put object rectangle
50 | cv2.rectangle(image, (x1, y1), (x2, y2), bbox_color, bbox_thick*2)
51 |
52 | x, y = int(x1+(x2-x1)/2), int(y1+(y2-y1)/2)
53 |
54 | if show_label:
55 | # get text label
56 | score_str = " {:.2f}".format(score) if show_confidence else ""
57 |
58 | if tracking: score_str = " "+str(score)
59 |
60 | label = "{}".format(NUM_CLASS[class_ind]) + score_str
61 |
62 | # get text size
63 | (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_COMPLEX_SMALL,
64 | fontScale, thickness=bbox_thick)
65 | # put filled text rectangle
66 | cv2.rectangle(image, (x1, y1), (x1 + text_width, y1 - text_height - baseline), bbox_color, thickness=cv2.FILLED)
67 |
68 | # put text above rectangle
69 | cv2.putText(image, label, (x1, y1-4), cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale, Text_colors, bbox_thick, lineType=cv2.LINE_AA)
70 |
71 | return image
72 |
73 | def detect_enemy(Yolo, original_image, input_size=416, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''):
74 | image_data = image_preprocess(original_image, [input_size, input_size])
75 | image_data = image_data[np.newaxis, ...].astype(np.float32)
76 |
77 | if YOLO_FRAMEWORK == "tf":
78 | pred_bbox = Yolo.predict(image_data)
79 |
80 | elif YOLO_FRAMEWORK == "trt":
81 | batched_input = tf.constant(image_data)
82 | result = Yolo(batched_input)
83 | pred_bbox = []
84 | for key, value in result.items():
85 | value = value.numpy()
86 | pred_bbox.append(value)
87 |
88 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
89 | pred_bbox = tf.concat(pred_bbox, axis=0)
90 |
91 | bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
92 | bboxes = nms(bboxes, iou_threshold, method='nms')
93 |
94 | image = draw_enemy(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
95 |
96 | return image, bboxes
97 |
98 | offset = 30
99 | times = []
100 | sct = mss.mss()
101 | yolo = Load_Yolo_model()
102 | while True:
103 | t1 = time.time()
104 | img = np.array(sct.grab({"top": 87-offset, "left": 1920, "width": 1280, "height": 720, "mon": -1}))
105 | img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
106 | image, bboxes = detect_enemy(yolo, np.copy(img), input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0))
107 | if len(bboxes) > 0:
108 | CreateXMLfile("XML_Detections", str(int(time.time())), img, bboxes, read_class_names(TRAIN_CLASSES))
109 | print("got it")
110 | time.sleep(2)
111 |
112 | t2 = time.time()
113 | times.append(t2-t1)
114 | times = times[-20:]
115 | ms = sum(times)/len(times)*1000
116 | fps = 1000 / ms
117 | print("FPS", fps)
118 |
119 | #cv2.imshow("Detection image", img)
120 | #if cv2.waitKey(25) & 0xFF == ord("q"):
121 | #cv2.destroyAllWindows()
122 | #break
123 |
--------------------------------------------------------------------------------
/IMAGES/city.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/city.jpg
--------------------------------------------------------------------------------
/IMAGES/city_pred.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/city_pred.jpg
--------------------------------------------------------------------------------
/IMAGES/kite.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/kite.jpg
--------------------------------------------------------------------------------
/IMAGES/kite_pred.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/kite_pred.jpg
--------------------------------------------------------------------------------
/IMAGES/mnist_test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/mnist_test.jpg
--------------------------------------------------------------------------------
/IMAGES/street.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/street.jpg
--------------------------------------------------------------------------------
/IMAGES/street_pred.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/street_pred.jpg
--------------------------------------------------------------------------------
/IMAGES/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/tensorboard.png
--------------------------------------------------------------------------------
/IMAGES/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/test.mp4
--------------------------------------------------------------------------------
/IMAGES/tracking_results.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/tracking_results.gif
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 pythonlessons
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TensorFlow-2.x-YOLOv3 and YOLOv4 tutorials
2 |
3 | YOLOv3 and YOLOv4 implementation in TensorFlow 2.x, with support for training, transfer training, object tracking mAP and so on...
4 | Code was tested with following specs:
5 | - i7-7700k CPU and Nvidia 1080TI GPU
6 | - OS Ubuntu 18.04
7 | - CUDA 10.1
8 | - cuDNN v7.6.5
9 | - TensorRT-6.0.1.5
10 | - Tensorflow-GPU 2.3.1
11 | - Code was tested on Ubuntu and Windows 10 (TensorRT not supported officially)
12 |
13 | ## Installation
14 | First, clone or download this GitHub repository.
15 | Install requirements and download pretrained weights:
16 | ```
17 | pip install -r ./requirements.txt
18 |
19 | # yolov3
20 | wget -P model_data https://pjreddie.com/media/files/yolov3.weights
21 |
22 | # yolov3-tiny
23 | wget -P model_data https://pjreddie.com/media/files/yolov3-tiny.weights
24 |
25 | # yolov4
26 | wget -P model_data https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights
27 |
28 | # yolov4-tiny
29 | wget -P model_data https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights
30 | ```
31 |
32 | ## Quick start
33 | Start with using pretrained weights to test predictions on both image and video:
34 | ```
35 | python detection_demo.py
36 | ```
37 |
38 |
39 |
40 |
41 |
42 | ## Quick training for custom mnist dataset
43 | mnist folder contains mnist images, create training data:
44 | ```
45 | python mnist/make_data.py
46 | ```
47 | `./yolov3/configs.py` file is already configured for mnist training.
48 |
49 | Now, you can train it and then evaluate your model
50 | ```
51 | python train.py
52 | tensorboard --logdir=log
53 | ```
54 | Track training progress in Tensorboard and go to http://localhost:6006/:
55 |
56 |
57 |
58 |
59 | Test detection with `detect_mnist.py` script:
60 | ```
61 | python detect_mnist.py
62 | ```
63 | Results:
64 |
65 |
66 |
67 |
68 | ## Custom YOLOv3 & YOLOv4 object detection training
69 | Custom training required to prepare dataset first, how to prepare dataset and train custom model you can read in following link:
70 | https://pylessons.com/YOLOv3-TF2-custrom-train/
71 | More about YOLOv4 training you can read [on this link](https://pylessons.com/YOLOv4-TF2-training/). I didn’t have time to implement all YOLOv4 Bag-Of-Freebies to improve the training process… Maybe later I’ll find time to do that, but now I leave it as it is. I recommended to use [Alex's Darknet](https://github.com/AlexeyAB/darknet) to train your custom model, if you need maximum performance, otherwise, you can use my implementation.
72 |
73 | ## Google Colab Custom Yolo v3 training
74 | To learn more about Google Colab Free gpu training, visit my [text version tutorial](https://pylessons.com/YOLOv3-TF2-GoogleColab/)
75 |
76 | ## Yolo v3 Tiny train and detection
77 | To get detailed instructions how to use Yolov3-Tiny, follow my text version tutorial [YOLOv3-Tiny support](https://pylessons.com/YOLOv3-TF2-Tiny/). Short instructions:
78 | - Get YOLOv3-Tiny weights: ```wget -P model_data https://pjreddie.com/media/files/yolov3-tiny.weights```
79 | - From `yolov3/configs.py` change `TRAIN_YOLO_TINY` from `False` to `True`
80 | - Run `detection_demo.py` script.
81 |
82 | ## Yolo v3 Object tracking
83 | To learn more about Object tracking with Deep SORT, visit [Following link](https://pylessons.com/YOLOv3-TF2-DeepSort/).
84 | Quick test:
85 | - Clone this repository;
86 | - Make sure object detection works for you;
87 | - Run object_tracking.py script
88 |
89 |
90 |
91 |
92 | ## YOLOv3 vs YOLOv4 comparison on 1080TI:
93 |
94 | YOLO FPS on COCO 2017 Dataset:
95 | | Detection | 320x320 | 416x416 | 512x512 |
96 | |--------------|---------|---------|---------|
97 | | YoloV3 FPS | 24.38 | 20.94 | 18.57 |
98 | | YoloV4 FPS | 22.15 | 18.69 | 16.50 |
99 |
100 | TensorRT FPS on COCO 2017 Dataset:
101 | | Detection | 320x320 | 416x416 | 512x512 | 608x608 |
102 | |-----------------|---------|---------|---------|---------|
103 | | YoloV4 FP32 FPS | 31.23 | 27.30 | 22.63 | 18.17 |
104 | | YoloV4 FP16 FPS | 30.33 | 25.44 | 21.94 | 17.99 |
105 | | YoloV4 INT8 FPS | 85.18 | 62.02 | 47.50 | 37.32 |
106 | | YoloV3 INT8 FPS | 84.65 | 52.72 | 38.22 | 28.75 |
107 |
108 | mAP on COCO 2017 Dataset:
109 | | Detection | 320x320 | 416x416 | 512x512 |
110 | |------------------|---------|---------|---------|
111 | | YoloV3 mAP50 | 49.85 | 55.31 | 57.48 |
112 | | YoloV4 mAP50 | 48.58 | 56.92 | 61.71 |
113 |
114 | TensorRT mAP on COCO 2017 Dataset:
115 | | Detection | 320x320 | 416x416 | 512x512 | 608x608 |
116 | |-------------------|---------|---------|---------|---------|
117 | | YoloV4 FP32 mAP50 | 48.58 | 56.92 | 61.71 | 63.92 |
118 | | YoloV4 FP16 mAP50 | 48.57 | 56.92 | 61.69 | 63.92 |
119 | | YoloV4 INT8 mAP50 | 40.61 | 48.36 | 52.84 | 54.53 |
120 | | YoloV3 INT8 mAP50 | 44.19 | 48.64 | 50.10 | 50.69 |
121 |
122 | ## Converting YOLO to TensorRT
123 | I will give two examples, both will be for YOLOv4 model,quantize_mode=INT8 and model input size will be 608. Detailed tutorial is on this [link](https://pylessons.com/YOLOv4-TF2-TensorRT/).
124 | ### Default weights from COCO dataset:
125 | - Download weights from links above;
126 | - In `configs.py` script choose your `YOLO_TYPE`;
127 | - In `configs.py` script set `YOLO_INPUT_SIZE = 608`;
128 | - In `configs.py` script set `YOLO_FRAMEWORK = "trt"`;
129 | - From main directory in terminal type `python tools/Convert_to_pb.py`;
130 | - From main directory in terminal type `python tools/Convert_to_TRT.py`;
131 | - In `configs.py` script set `YOLO_CUSTOM_WEIGHTS = f'checkpoints/{YOLO_TYPE}-trt-{YOLO_TRT_QUANTIZE_MODE}–{YOLO_INPUT_SIZE}'`;
132 | - Now you can run `detection_demo.py`, best to test with `detect_video` function.
133 |
134 | ### Custom trained YOLO weights:
135 | - Download weights from links above;
136 | - In `configs.py` script choose your `YOLO_TYPE`;
137 | - In `configs.py` script set `YOLO_INPUT_SIZE = 608`;
138 | - Train custom YOLO model with instructions above;
139 | - In `configs.py` script set `YOLO_CUSTOM_WEIGHTS = f"{YOLO_TYPE}_custom"`;
140 | - In `configs.py` script make sure that `TRAIN_CLASSES` is with your custom classes text file;
141 | - From main directory in terminal type `python tools/Convert_to_pb.py`;
142 | - From main directory in terminal type `python tools/Convert_to_TRT.py`;
143 | - In `configs.py` script set `YOLO_FRAMEWORK = "trt"`;
144 | - In `configs.py` script set `YOLO_CUSTOM_WEIGHTS = f'checkpoints/{YOLO_TYPE}-trt-{YOLO_TRT_QUANTIZE_MODE}–{YOLO_INPUT_SIZE}'`;
145 | - Now you can run `detection_custom.py`, to test custom trained and converted TensorRT model.
146 |
147 | What is done:
148 | --------------------
149 | - [x] Detection with original weights [Tutorial link](https://pylessons.com/YOLOv3-TF2-introduction/)
150 | - [x] Mnist detection training [Tutorial link](https://pylessons.com/YOLOv3-TF2-mnist/)
151 | - [x] Custom detection training [Tutorial link1](https://pylessons.com/YOLOv3-TF2-custrom-train/), [link2](https://pylessons.com/YOLOv3-TF2-custrom-images/)
152 | - [x] Google Colab training [Tutorial link](https://pylessons.com/YOLOv3-TF2-GoogleColab/)
153 | - [x] YOLOv3-Tiny support [Tutorial link](https://pylessons.com/YOLOv3-TF2-Tiny/)
154 | - [X] Object tracking [Tutorial link](https://pylessons.com/YOLOv3-TF2-DeepSort/)
155 | - [X] Mean Average Precision (mAP) [Tutorial link](https://pylessons.com/YOLOv3-TF2-mAP/)
156 | - [X] Yolo v3 on Raspberry Pi [Tutorial link](https://pylessons.com/YOLOv3-TF2-RaspberryPi/)
157 | - [X] YOLOv4 and YOLOv4-tiny detection [Tutorial link](https://pylessons.com/YOLOv4-TF2-introduction/)
158 | - [X] YOLOv4 and YOLOv4-tiny detection training (Not fully) [Tutorial link](https://pylessons.com/YOLOv4-TF2-training/)
159 | - [X] Convert to TensorRT model [Tutorial link](https://pylessons.com/YOLOv4-TF2-TensorRT/)
160 | - [X] Add multiprocessing after detection (drawing bbox) [Tutorial link](https://pylessons.com/YOLOv4-TF2-multiprocessing/)
161 | - [X] Generate YOLO Object Detection training data from its own results [Tutorial link](https://pylessons.com/YOLOv4-TF2-CreateXML/)
162 | - [X] Counter-strike Global Offensive realtime YOLOv4 Object Detection aimbot [Tutorial link](https://pylessons.com/YOLOv4-TF2-CSGO-aimbot/)
163 |
164 | To be continued... (not anytime soon)
165 | --------------------
166 | - [ ] Converting to TensorFlow Lite
167 | - [ ] YOLO on Android (Leaving it for future, will need to convert everythin to java... not ready for this)
168 | - [ ] Generating anchors
169 | - [ ] YOLACT: Real-time Instance Segmentation
170 | - [ ] Model pruning (Pruning is a technique in deep learning that aids in the development of smaller and more efficient neural networks. It's a model optimization technique that involves eliminating unnecessary values in the weight tensor.)
171 |
--------------------------------------------------------------------------------
/checkpoints/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "yolov3_custom_2"
2 | all_model_checkpoint_paths: "yolov3_custom_2"
3 |
--------------------------------------------------------------------------------
/deep_sort/detection.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | class Detection(object):
6 | """
7 | This class represents a bounding box detection in a single image.
8 |
9 | Parameters
10 | ----------
11 | tlwh : array_like
12 | Bounding box in format `(x, y, w, h)`.
13 | confidence : float
14 | Detector confidence score.
15 | feature : array_like
16 | A feature vector that describes the object contained in this image.
17 |
18 | Attributes
19 | ----------
20 | tlwh : ndarray
21 | Bounding box in format `(top left x, top left y, width, height)`.
22 | confidence : ndarray
23 | Detector confidence score.
24 | class_name : ndarray
25 | Detector class.
26 | feature : ndarray | NoneType
27 | A feature vector that describes the object contained in this image.
28 |
29 | """
30 |
31 | def __init__(self, tlwh, confidence, class_name, feature):
32 | self.tlwh = np.asarray(tlwh, dtype=np.float)
33 | self.confidence = float(confidence)
34 | self.class_name = class_name
35 | self.feature = np.asarray(feature, dtype=np.float32)
36 |
37 | def get_class(self):
38 | return self.class_name
39 |
40 | def to_tlbr(self):
41 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
42 | `(top left, bottom right)`.
43 | """
44 | ret = self.tlwh.copy()
45 | ret[2:] += ret[:2]
46 | return ret
47 |
48 | def to_xyah(self):
49 | """Convert bounding box to format `(center x, center y, aspect ratio,
50 | height)`, where the aspect ratio is `width / height`.
51 | """
52 | ret = self.tlwh.copy()
53 | ret[:2] += ret[2:] / 2
54 | ret[2] /= ret[3]
55 | return ret
56 |
--------------------------------------------------------------------------------
/deep_sort/generate_detections.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import os
3 | import errno
4 | import argparse
5 | import numpy as np
6 | import cv2
7 | import tensorflow.compat.v1 as tf
8 |
9 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
10 | if len(physical_devices) > 0:
11 | tf.config.experimental.set_memory_growth(physical_devices[0], True)
12 |
13 | def _run_in_batches(f, data_dict, out, batch_size):
14 | data_len = len(out)
15 | num_batches = int(data_len / batch_size)
16 |
17 | s, e = 0, 0
18 | for i in range(num_batches):
19 | s, e = i * batch_size, (i + 1) * batch_size
20 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
21 | out[s:e] = f(batch_data_dict)
22 | if e < len(out):
23 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
24 | out[e:] = f(batch_data_dict)
25 |
26 |
27 | def extract_image_patch(image, bbox, patch_shape):
28 | """Extract image patch from bounding box.
29 |
30 | Parameters
31 | ----------
32 | image : ndarray
33 | The full image.
34 | bbox : array_like
35 | The bounding box in format (x, y, width, height).
36 | patch_shape : Optional[array_like]
37 | This parameter can be used to enforce a desired patch shape
38 | (height, width). First, the `bbox` is adapted to the aspect ratio
39 | of the patch shape, then it is clipped at the image boundaries.
40 | If None, the shape is computed from :arg:`bbox`.
41 |
42 | Returns
43 | -------
44 | ndarray | NoneType
45 | An image patch showing the :arg:`bbox`, optionally reshaped to
46 | :arg:`patch_shape`.
47 | Returns None if the bounding box is empty or fully outside of the image
48 | boundaries.
49 |
50 | """
51 | bbox = np.array(bbox)
52 | if patch_shape is not None:
53 | # correct aspect ratio to patch shape
54 | target_aspect = float(patch_shape[1]) / patch_shape[0]
55 | new_width = target_aspect * bbox[3]
56 | bbox[0] -= (new_width - bbox[2]) / 2
57 | bbox[2] = new_width
58 |
59 | # convert to top left, bottom right
60 | bbox[2:] += bbox[:2]
61 | bbox = bbox.astype(np.int)
62 |
63 | # clip at image boundaries
64 | bbox[:2] = np.maximum(0, bbox[:2])
65 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
66 | if np.any(bbox[:2] >= bbox[2:]):
67 | return None
68 | sx, sy, ex, ey = bbox
69 | image = image[sy:ey, sx:ex]
70 | image = cv2.resize(image, tuple(patch_shape[::-1]))
71 | return image
72 |
73 |
74 | class ImageEncoder(object):
75 |
76 | def __init__(self, checkpoint_filename, input_name="images", output_name="features"):
77 | self.session = tf.Session()
78 | with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
79 | graph_def = tf.GraphDef()
80 | graph_def.ParseFromString(file_handle.read())
81 | tf.import_graph_def(graph_def)
82 | try:
83 | self.input_var = tf.get_default_graph().get_tensor_by_name(input_name)
84 | self.output_var = tf.get_default_graph().get_tensor_by_name(output_name)
85 | except KeyError:
86 | layers = [i.name for i in tf.get_default_graph().get_operations()]
87 | self.input_var = tf.get_default_graph().get_tensor_by_name(layers[0]+':0')
88 | self.output_var = tf.get_default_graph().get_tensor_by_name(layers[-1]+':0')
89 |
90 | assert len(self.output_var.get_shape()) == 2
91 | assert len(self.input_var.get_shape()) == 4
92 | self.feature_dim = self.output_var.get_shape().as_list()[-1]
93 | self.image_shape = self.input_var.get_shape().as_list()[1:]
94 |
95 | def __call__(self, data_x, batch_size=32):
96 | out = np.zeros((len(data_x), self.feature_dim), np.float32)
97 | _run_in_batches(
98 | lambda x: self.session.run(self.output_var, feed_dict=x),
99 | {self.input_var: data_x}, out, batch_size)
100 | return out
101 |
102 |
103 | def create_box_encoder(model_filename, input_name="images:0", output_name="features:0", batch_size=32):
104 | image_encoder = ImageEncoder(model_filename, input_name, output_name)
105 | image_shape = image_encoder.image_shape
106 |
107 | def encoder(image, boxes):
108 | image_patches = []
109 | for box in boxes:
110 | patch = extract_image_patch(image, box, image_shape[:2])
111 | if patch is None:
112 | print("WARNING: Failed to extract image patch: %s." % str(box))
113 | patch = np.random.uniform(0., 255., image_shape).astype(np.uint8)
114 | image_patches.append(patch)
115 | image_patches = np.asarray(image_patches)
116 | return image_encoder(image_patches, batch_size)
117 |
118 | return encoder
119 |
120 |
121 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
122 | """Generate detections with features.
123 |
124 | Parameters
125 | ----------
126 | encoder : Callable[image, ndarray] -> ndarray
127 | The encoder function takes as input a BGR color image and a matrix of
128 | bounding boxes in format `(x, y, w, h)` and returns a matrix of
129 | corresponding feature vectors.
130 | mot_dir : str
131 | Path to the MOTChallenge directory (can be either train or test).
132 | output_dir
133 | Path to the output directory. Will be created if it does not exist.
134 | detection_dir
135 | Path to custom detections. The directory structure should be the default
136 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
137 | standard MOTChallenge detections.
138 |
139 | """
140 | if detection_dir is None:
141 | detection_dir = mot_dir
142 | try:
143 | os.makedirs(output_dir)
144 | except OSError as exception:
145 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
146 | pass
147 | else:
148 | raise ValueError(
149 | "Failed to created output directory '%s'" % output_dir)
150 |
151 | for sequence in os.listdir(mot_dir):
152 | print("Processing %s" % sequence)
153 | sequence_dir = os.path.join(mot_dir, sequence)
154 |
155 | image_dir = os.path.join(sequence_dir, "img1")
156 | image_filenames = {
157 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
158 | for f in os.listdir(image_dir)}
159 |
160 | detection_file = os.path.join(
161 | detection_dir, sequence, "det/det.txt")
162 | detections_in = np.loadtxt(detection_file, delimiter=',')
163 | detections_out = []
164 |
165 | frame_indices = detections_in[:, 0].astype(np.int)
166 | min_frame_idx = frame_indices.astype(np.int).min()
167 | max_frame_idx = frame_indices.astype(np.int).max()
168 | for frame_idx in range(min_frame_idx, max_frame_idx + 1):
169 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
170 | mask = frame_indices == frame_idx
171 | rows = detections_in[mask]
172 |
173 | if frame_idx not in image_filenames:
174 | print("WARNING could not find image for frame %d" % frame_idx)
175 | continue
176 | bgr_image = cv2.imread(
177 | image_filenames[frame_idx], cv2.IMREAD_COLOR)
178 | features = encoder(bgr_image, rows[:, 2:6].copy())
179 | detections_out += [np.r_[(row, feature)] for row, feature
180 | in zip(rows, features)]
181 |
182 | output_filename = os.path.join(output_dir, "%s.npy" % sequence)
183 | np.save(
184 | output_filename, np.asarray(detections_out), allow_pickle=False)
185 |
186 |
187 | def parse_args():
188 | """Parse command line arguments.
189 | """
190 | parser = argparse.ArgumentParser(description="Re-ID feature extractor")
191 | parser.add_argument(
192 | "--model",
193 | default="resources/networks/mars-small128.pb",
194 | help="Path to freezed inference graph protobuf.")
195 | parser.add_argument(
196 | "--mot_dir", help="Path to MOTChallenge directory (train or test)",
197 | required=True)
198 | parser.add_argument(
199 | "--detection_dir", help="Path to custom detections. Defaults to "
200 | "standard MOT detections Directory structure should be the default "
201 | "MOTChallenge structure: [sequence]/det/det.txt", default=None)
202 | parser.add_argument(
203 | "--output_dir", help="Output directory. Will be created if it does not"
204 | " exist.", default="detections")
205 | return parser.parse_args()
206 |
207 |
208 | def main():
209 | args = parse_args()
210 | encoder = create_box_encoder(args.model, batch_size=32)
211 | generate_detections(encoder, args.mot_dir, args.output_dir,
212 | args.detection_dir)
213 |
214 |
215 | if __name__ == "__main__":
216 | main()
217 |
--------------------------------------------------------------------------------
/deep_sort/iou_matching.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from . import linear_assignment
5 |
6 |
7 | def iou(bbox, candidates):
8 | """Computer intersection over union.
9 |
10 | Parameters
11 | ----------
12 | bbox : ndarray
13 | A bounding box in format `(top left x, top left y, width, height)`.
14 | candidates : ndarray
15 | A matrix of candidate bounding boxes (one per row) in the same format
16 | as `bbox`.
17 |
18 | Returns
19 | -------
20 | ndarray
21 | The intersection over union in [0, 1] between the `bbox` and each
22 | candidate. A higher score means a larger fraction of the `bbox` is
23 | occluded by the candidate.
24 |
25 | """
26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 | candidates_tl = candidates[:, :2]
28 | candidates_br = candidates[:, :2] + candidates[:, 2:]
29 |
30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 | wh = np.maximum(0., br - tl)
35 |
36 | area_intersection = wh.prod(axis=1)
37 | area_bbox = bbox[2:].prod()
38 | area_candidates = candidates[:, 2:].prod(axis=1)
39 | return area_intersection / (area_bbox + area_candidates - area_intersection)
40 |
41 |
42 | def iou_cost(tracks, detections, track_indices=None,
43 | detection_indices=None):
44 | """An intersection over union distance metric.
45 |
46 | Parameters
47 | ----------
48 | tracks : List[deep_sort.track.Track]
49 | A list of tracks.
50 | detections : List[deep_sort.detection.Detection]
51 | A list of detections.
52 | track_indices : Optional[List[int]]
53 | A list of indices to tracks that should be matched. Defaults to
54 | all `tracks`.
55 | detection_indices : Optional[List[int]]
56 | A list of indices to detections that should be matched. Defaults
57 | to all `detections`.
58 |
59 | Returns
60 | -------
61 | ndarray
62 | Returns a cost matrix of shape
63 | len(track_indices), len(detection_indices) where entry (i, j) is
64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 |
66 | """
67 | if track_indices is None:
68 | track_indices = np.arange(len(tracks))
69 | if detection_indices is None:
70 | detection_indices = np.arange(len(detections))
71 |
72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 | for row, track_idx in enumerate(track_indices):
74 | if tracks[track_idx].time_since_update > 1:
75 | cost_matrix[row, :] = linear_assignment.INFTY_COST
76 | continue
77 |
78 | bbox = tracks[track_idx].to_tlwh()
79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 | cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 | return cost_matrix
82 |
--------------------------------------------------------------------------------
/deep_sort/kalman_filter.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import scipy.linalg
4 |
5 |
6 | """
7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9 | function and used as Mahalanobis gating threshold.
10 | """
11 | chi2inv95 = {
12 | 1: 3.8415,
13 | 2: 5.9915,
14 | 3: 7.8147,
15 | 4: 9.4877,
16 | 5: 11.070,
17 | 6: 12.592,
18 | 7: 14.067,
19 | 8: 15.507,
20 | 9: 16.919}
21 |
22 |
23 | class KalmanFilter(object):
24 | """
25 | A simple Kalman filter for tracking bounding boxes in image space.
26 |
27 | The 8-dimensional state space
28 |
29 | x, y, a, h, vx, vy, va, vh
30 |
31 | contains the bounding box center position (x, y), aspect ratio a, height h,
32 | and their respective velocities.
33 |
34 | Object motion follows a constant velocity model. The bounding box location
35 | (x, y, a, h) is taken as direct observation of the state space (linear
36 | observation model).
37 |
38 | """
39 |
40 | def __init__(self):
41 | ndim, dt = 4, 1.
42 |
43 | # Create Kalman filter model matrices.
44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45 | for i in range(ndim):
46 | self._motion_mat[i, ndim + i] = dt
47 | self._update_mat = np.eye(ndim, 2 * ndim)
48 |
49 | # Motion and observation uncertainty are chosen relative to the current
50 | # state estimate. These weights control the amount of uncertainty in
51 | # the model. This is a bit hacky.
52 | self._std_weight_position = 1. / 20
53 | self._std_weight_velocity = 1. / 160
54 |
55 | def initiate(self, measurement):
56 | """Create track from unassociated measurement.
57 |
58 | Parameters
59 | ----------
60 | measurement : ndarray
61 | Bounding box coordinates (x, y, a, h) with center position (x, y),
62 | aspect ratio a, and height h.
63 |
64 | Returns
65 | -------
66 | (ndarray, ndarray)
67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
68 | dimensional) of the new track. Unobserved velocities are initialized
69 | to 0 mean.
70 |
71 | """
72 | mean_pos = measurement
73 | mean_vel = np.zeros_like(mean_pos)
74 | mean = np.r_[mean_pos, mean_vel]
75 |
76 | std = [
77 | 2 * self._std_weight_position * measurement[3],
78 | 2 * self._std_weight_position * measurement[3],
79 | 1e-2,
80 | 2 * self._std_weight_position * measurement[3],
81 | 10 * self._std_weight_velocity * measurement[3],
82 | 10 * self._std_weight_velocity * measurement[3],
83 | 1e-5,
84 | 10 * self._std_weight_velocity * measurement[3]]
85 | covariance = np.diag(np.square(std))
86 | return mean, covariance
87 |
88 | def predict(self, mean, covariance):
89 | """Run Kalman filter prediction step.
90 |
91 | Parameters
92 | ----------
93 | mean : ndarray
94 | The 8 dimensional mean vector of the object state at the previous
95 | time step.
96 | covariance : ndarray
97 | The 8x8 dimensional covariance matrix of the object state at the
98 | previous time step.
99 |
100 | Returns
101 | -------
102 | (ndarray, ndarray)
103 | Returns the mean vector and covariance matrix of the predicted
104 | state. Unobserved velocities are initialized to 0 mean.
105 |
106 | """
107 | std_pos = [
108 | self._std_weight_position * mean[3],
109 | self._std_weight_position * mean[3],
110 | 1e-2,
111 | self._std_weight_position * mean[3]]
112 | std_vel = [
113 | self._std_weight_velocity * mean[3],
114 | self._std_weight_velocity * mean[3],
115 | 1e-5,
116 | self._std_weight_velocity * mean[3]]
117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 |
119 | mean = np.dot(self._motion_mat, mean)
120 | covariance = np.linalg.multi_dot((
121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 |
123 | return mean, covariance
124 |
125 | def project(self, mean, covariance):
126 | """Project state distribution to measurement space.
127 |
128 | Parameters
129 | ----------
130 | mean : ndarray
131 | The state's mean vector (8 dimensional array).
132 | covariance : ndarray
133 | The state's covariance matrix (8x8 dimensional).
134 |
135 | Returns
136 | -------
137 | (ndarray, ndarray)
138 | Returns the projected mean and covariance matrix of the given state
139 | estimate.
140 |
141 | """
142 | std = [
143 | self._std_weight_position * mean[3],
144 | self._std_weight_position * mean[3],
145 | 1e-1,
146 | self._std_weight_position * mean[3]]
147 | innovation_cov = np.diag(np.square(std))
148 |
149 | mean = np.dot(self._update_mat, mean)
150 | covariance = np.linalg.multi_dot((
151 | self._update_mat, covariance, self._update_mat.T))
152 | return mean, covariance + innovation_cov
153 |
154 | def update(self, mean, covariance, measurement):
155 | """Run Kalman filter correction step.
156 |
157 | Parameters
158 | ----------
159 | mean : ndarray
160 | The predicted state's mean vector (8 dimensional).
161 | covariance : ndarray
162 | The state's covariance matrix (8x8 dimensional).
163 | measurement : ndarray
164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 | is the center position, a the aspect ratio, and h the height of the
166 | bounding box.
167 |
168 | Returns
169 | -------
170 | (ndarray, ndarray)
171 | Returns the measurement-corrected state distribution.
172 |
173 | """
174 | projected_mean, projected_cov = self.project(mean, covariance)
175 |
176 | chol_factor, lower = scipy.linalg.cho_factor(
177 | projected_cov, lower=True, check_finite=False)
178 | kalman_gain = scipy.linalg.cho_solve(
179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 | check_finite=False).T
181 | innovation = measurement - projected_mean
182 |
183 | new_mean = mean + np.dot(innovation, kalman_gain.T)
184 | new_covariance = covariance - np.linalg.multi_dot((
185 | kalman_gain, projected_cov, kalman_gain.T))
186 | return new_mean, new_covariance
187 |
188 | def gating_distance(self, mean, covariance, measurements,
189 | only_position=False):
190 | """Compute gating distance between state distribution and measurements.
191 |
192 | A suitable distance threshold can be obtained from `chi2inv95`. If
193 | `only_position` is False, the chi-square distribution has 4 degrees of
194 | freedom, otherwise 2.
195 |
196 | Parameters
197 | ----------
198 | mean : ndarray
199 | Mean vector over the state distribution (8 dimensional).
200 | covariance : ndarray
201 | Covariance of the state distribution (8x8 dimensional).
202 | measurements : ndarray
203 | An Nx4 dimensional matrix of N measurements, each in
204 | format (x, y, a, h) where (x, y) is the bounding box center
205 | position, a the aspect ratio, and h the height.
206 | only_position : Optional[bool]
207 | If True, distance computation is done with respect to the bounding
208 | box center position only.
209 |
210 | Returns
211 | -------
212 | ndarray
213 | Returns an array of length N, where the i-th element contains the
214 | squared Mahalanobis distance between (mean, covariance) and
215 | `measurements[i]`.
216 |
217 | """
218 | mean, covariance = self.project(mean, covariance)
219 | if only_position:
220 | mean, covariance = mean[:2], covariance[:2, :2]
221 | measurements = measurements[:, :2]
222 |
223 | cholesky_factor = np.linalg.cholesky(covariance)
224 | d = measurements - mean
225 | z = scipy.linalg.solve_triangular(
226 | cholesky_factor, d.T, lower=True, check_finite=False,
227 | overwrite_b=True)
228 | squared_maha = np.sum(z * z, axis=0)
229 | return squared_maha
230 |
--------------------------------------------------------------------------------
/deep_sort/linear_assignment.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from scipy.optimize import linear_sum_assignment
5 | from . import kalman_filter
6 |
7 |
8 | INFTY_COST = 1e+5
9 |
10 |
11 | def min_cost_matching(
12 | distance_metric, max_distance, tracks, detections, track_indices=None,
13 | detection_indices=None):
14 | """Solve linear assignment problem.
15 |
16 | Parameters
17 | ----------
18 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
19 | The distance metric is given a list of tracks and detections as well as
20 | a list of N track indices and M detection indices. The metric should
21 | return the NxM dimensional cost matrix, where element (i, j) is the
22 | association cost between the i-th track in the given track indices and
23 | the j-th detection in the given detection_indices.
24 | max_distance : float
25 | Gating threshold. Associations with cost larger than this value are
26 | disregarded.
27 | tracks : List[track.Track]
28 | A list of predicted tracks at the current time step.
29 | detections : List[detection.Detection]
30 | A list of detections at the current time step.
31 | track_indices : List[int]
32 | List of track indices that maps rows in `cost_matrix` to tracks in
33 | `tracks` (see description above).
34 | detection_indices : List[int]
35 | List of detection indices that maps columns in `cost_matrix` to
36 | detections in `detections` (see description above).
37 |
38 | Returns
39 | -------
40 | (List[(int, int)], List[int], List[int])
41 | Returns a tuple with the following three entries:
42 | * A list of matched track and detection indices.
43 | * A list of unmatched track indices.
44 | * A list of unmatched detection indices.
45 |
46 | """
47 | if track_indices is None:
48 | track_indices = np.arange(len(tracks))
49 | if detection_indices is None:
50 | detection_indices = np.arange(len(detections))
51 |
52 | if len(detection_indices) == 0 or len(track_indices) == 0:
53 | return [], track_indices, detection_indices # Nothing to match.
54 |
55 | cost_matrix = distance_metric(
56 | tracks, detections, track_indices, detection_indices)
57 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
58 | indices = linear_sum_assignment(cost_matrix)
59 | indices = np.asarray(indices)
60 | indices = np.transpose(indices)
61 | matches, unmatched_tracks, unmatched_detections = [], [], []
62 | for col, detection_idx in enumerate(detection_indices):
63 | if col not in indices[:, 1]:
64 | unmatched_detections.append(detection_idx)
65 | for row, track_idx in enumerate(track_indices):
66 | if row not in indices[:, 0]:
67 | unmatched_tracks.append(track_idx)
68 | for row, col in indices:
69 | track_idx = track_indices[row]
70 | detection_idx = detection_indices[col]
71 | if cost_matrix[row, col] > max_distance:
72 | unmatched_tracks.append(track_idx)
73 | unmatched_detections.append(detection_idx)
74 | else:
75 | matches.append((track_idx, detection_idx))
76 | return matches, unmatched_tracks, unmatched_detections
77 |
78 |
79 | def matching_cascade(
80 | distance_metric, max_distance, cascade_depth, tracks, detections,
81 | track_indices=None, detection_indices=None):
82 | """Run matching cascade.
83 |
84 | Parameters
85 | ----------
86 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
87 | The distance metric is given a list of tracks and detections as well as
88 | a list of N track indices and M detection indices. The metric should
89 | return the NxM dimensional cost matrix, where element (i, j) is the
90 | association cost between the i-th track in the given track indices and
91 | the j-th detection in the given detection indices.
92 | max_distance : float
93 | Gating threshold. Associations with cost larger than this value are
94 | disregarded.
95 | cascade_depth: int
96 | The cascade depth, should be se to the maximum track age.
97 | tracks : List[track.Track]
98 | A list of predicted tracks at the current time step.
99 | detections : List[detection.Detection]
100 | A list of detections at the current time step.
101 | track_indices : Optional[List[int]]
102 | List of track indices that maps rows in `cost_matrix` to tracks in
103 | `tracks` (see description above). Defaults to all tracks.
104 | detection_indices : Optional[List[int]]
105 | List of detection indices that maps columns in `cost_matrix` to
106 | detections in `detections` (see description above). Defaults to all
107 | detections.
108 |
109 | Returns
110 | -------
111 | (List[(int, int)], List[int], List[int])
112 | Returns a tuple with the following three entries:
113 | * A list of matched track and detection indices.
114 | * A list of unmatched track indices.
115 | * A list of unmatched detection indices.
116 |
117 | """
118 | if track_indices is None:
119 | track_indices = list(range(len(tracks)))
120 | if detection_indices is None:
121 | detection_indices = list(range(len(detections)))
122 |
123 | unmatched_detections = detection_indices
124 | matches = []
125 | for level in range(cascade_depth):
126 | if len(unmatched_detections) == 0: # No detections left
127 | break
128 |
129 | track_indices_l = [
130 | k for k in track_indices
131 | if tracks[k].time_since_update == 1 + level
132 | ]
133 | if len(track_indices_l) == 0: # Nothing to match at this level
134 | continue
135 |
136 | matches_l, _, unmatched_detections = \
137 | min_cost_matching(
138 | distance_metric, max_distance, tracks, detections,
139 | track_indices_l, unmatched_detections)
140 | matches += matches_l
141 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
142 | return matches, unmatched_tracks, unmatched_detections
143 |
144 |
145 | def gate_cost_matrix(
146 | kf, cost_matrix, tracks, detections, track_indices, detection_indices,
147 | gated_cost=INFTY_COST, only_position=False):
148 | """Invalidate infeasible entries in cost matrix based on the state
149 | distributions obtained by Kalman filtering.
150 |
151 | Parameters
152 | ----------
153 | kf : The Kalman filter.
154 | cost_matrix : ndarray
155 | The NxM dimensional cost matrix, where N is the number of track indices
156 | and M is the number of detection indices, such that entry (i, j) is the
157 | association cost between `tracks[track_indices[i]]` and
158 | `detections[detection_indices[j]]`.
159 | tracks : List[track.Track]
160 | A list of predicted tracks at the current time step.
161 | detections : List[detection.Detection]
162 | A list of detections at the current time step.
163 | track_indices : List[int]
164 | List of track indices that maps rows in `cost_matrix` to tracks in
165 | `tracks` (see description above).
166 | detection_indices : List[int]
167 | List of detection indices that maps columns in `cost_matrix` to
168 | detections in `detections` (see description above).
169 | gated_cost : Optional[float]
170 | Entries in the cost matrix corresponding to infeasible associations are
171 | set this value. Defaults to a very large value.
172 | only_position : Optional[bool]
173 | If True, only the x, y position of the state distribution is considered
174 | during gating. Defaults to False.
175 |
176 | Returns
177 | -------
178 | ndarray
179 | Returns the modified cost matrix.
180 |
181 | """
182 | gating_dim = 2 if only_position else 4
183 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
184 | measurements = np.asarray(
185 | [detections[i].to_xyah() for i in detection_indices])
186 | for row, track_idx in enumerate(track_indices):
187 | track = tracks[track_idx]
188 | gating_distance = kf.gating_distance(
189 | track.mean, track.covariance, measurements, only_position)
190 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost
191 | return cost_matrix
192 |
--------------------------------------------------------------------------------
/deep_sort/nn_matching.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | def _pdist(a, b):
6 | """Compute pair-wise squared distance between points in `a` and `b`.
7 |
8 | Parameters
9 | ----------
10 | a : array_like
11 | An NxM matrix of N samples of dimensionality M.
12 | b : array_like
13 | An LxM matrix of L samples of dimensionality M.
14 |
15 | Returns
16 | -------
17 | ndarray
18 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
19 | contains the squared distance between `a[i]` and `b[j]`.
20 |
21 | """
22 | a, b = np.asarray(a), np.asarray(b)
23 | if len(a) == 0 or len(b) == 0:
24 | return np.zeros((len(a), len(b)))
25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
27 | r2 = np.clip(r2, 0., float(np.inf))
28 | return r2
29 |
30 |
31 | def _cosine_distance(a, b, data_is_normalized=False):
32 | """Compute pair-wise cosine distance between points in `a` and `b`.
33 |
34 | Parameters
35 | ----------
36 | a : array_like
37 | An NxM matrix of N samples of dimensionality M.
38 | b : array_like
39 | An LxM matrix of L samples of dimensionality M.
40 | data_is_normalized : Optional[bool]
41 | If True, assumes rows in a and b are unit length vectors.
42 | Otherwise, a and b are explicitly normalized to lenght 1.
43 |
44 | Returns
45 | -------
46 | ndarray
47 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
48 | contains the squared distance between `a[i]` and `b[j]`.
49 |
50 | """
51 | if not data_is_normalized:
52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
54 | return 1. - np.dot(a, b.T)
55 |
56 |
57 | def _nn_euclidean_distance(x, y):
58 | """ Helper function for nearest neighbor distance metric (Euclidean).
59 |
60 | Parameters
61 | ----------
62 | x : ndarray
63 | A matrix of N row-vectors (sample points).
64 | y : ndarray
65 | A matrix of M row-vectors (query points).
66 |
67 | Returns
68 | -------
69 | ndarray
70 | A vector of length M that contains for each entry in `y` the
71 | smallest Euclidean distance to a sample in `x`.
72 |
73 | """
74 | distances = _pdist(x, y)
75 | return np.maximum(0.0, distances.min(axis=0))
76 |
77 |
78 | def _nn_cosine_distance(x, y):
79 | """ Helper function for nearest neighbor distance metric (cosine).
80 |
81 | Parameters
82 | ----------
83 | x : ndarray
84 | A matrix of N row-vectors (sample points).
85 | y : ndarray
86 | A matrix of M row-vectors (query points).
87 |
88 | Returns
89 | -------
90 | ndarray
91 | A vector of length M that contains for each entry in `y` the
92 | smallest cosine distance to a sample in `x`.
93 |
94 | """
95 | distances = _cosine_distance(x, y)
96 | return distances.min(axis=0)
97 |
98 |
99 | class NearestNeighborDistanceMetric(object):
100 | """
101 | A nearest neighbor distance metric that, for each target, returns
102 | the closest distance to any sample that has been observed so far.
103 |
104 | Parameters
105 | ----------
106 | metric : str
107 | Either "euclidean" or "cosine".
108 | matching_threshold: float
109 | The matching threshold. Samples with larger distance are considered an
110 | invalid match.
111 | budget : Optional[int]
112 | If not None, fix samples per class to at most this number. Removes
113 | the oldest samples when the budget is reached.
114 |
115 | Attributes
116 | ----------
117 | samples : Dict[int -> List[ndarray]]
118 | A dictionary that maps from target identities to the list of samples
119 | that have been observed so far.
120 |
121 | """
122 |
123 | def __init__(self, metric, matching_threshold, budget=None):
124 |
125 |
126 | if metric == "euclidean":
127 | self._metric = _nn_euclidean_distance
128 | elif metric == "cosine":
129 | self._metric = _nn_cosine_distance
130 | else:
131 | raise ValueError(
132 | "Invalid metric; must be either 'euclidean' or 'cosine'")
133 | self.matching_threshold = matching_threshold
134 | self.budget = budget
135 | self.samples = {}
136 |
137 | def partial_fit(self, features, targets, active_targets):
138 | """Update the distance metric with new data.
139 |
140 | Parameters
141 | ----------
142 | features : ndarray
143 | An NxM matrix of N features of dimensionality M.
144 | targets : ndarray
145 | An integer array of associated target identities.
146 | active_targets : List[int]
147 | A list of targets that are currently present in the scene.
148 |
149 | """
150 | for feature, target in zip(features, targets):
151 | self.samples.setdefault(target, []).append(feature)
152 | if self.budget is not None:
153 | self.samples[target] = self.samples[target][-self.budget:]
154 | self.samples = {k: self.samples[k] for k in active_targets}
155 |
156 | def distance(self, features, targets):
157 | """Compute distance between features and targets.
158 |
159 | Parameters
160 | ----------
161 | features : ndarray
162 | An NxM matrix of N features of dimensionality M.
163 | targets : List[int]
164 | A list of targets to match the given `features` against.
165 |
166 | Returns
167 | -------
168 | ndarray
169 | Returns a cost matrix of shape len(targets), len(features), where
170 | element (i, j) contains the closest squared distance between
171 | `targets[i]` and `features[j]`.
172 |
173 | """
174 | cost_matrix = np.zeros((len(targets), len(features)))
175 | for i, target in enumerate(targets):
176 | cost_matrix[i, :] = self._metric(self.samples[target], features)
177 | return cost_matrix
178 |
--------------------------------------------------------------------------------
/deep_sort/preprocessing.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import cv2
4 |
5 |
6 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
7 | """Suppress overlapping detections.
8 |
9 | Original code from [1]_ has been adapted to include confidence score.
10 |
11 | .. [1] http://www.pyimagesearch.com/2015/02/16/
12 | faster-non-maximum-suppression-python/
13 |
14 | Examples
15 | --------
16 |
17 | >>> boxes = [d.roi for d in detections]
18 | >>> classes = [d.classes for d in detections]
19 | >>> scores = [d.confidence for d in detections]
20 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
21 | >>> detections = [detections[i] for i in indices]
22 |
23 | Parameters
24 | ----------
25 | boxes : ndarray
26 | Array of ROIs (x, y, width, height).
27 | max_bbox_overlap : float
28 | ROIs that overlap more than this values are suppressed.
29 | scores : Optional[array_like]
30 | Detector confidence score.
31 |
32 | Returns
33 | -------
34 | List[int]
35 | Returns indices of detections that have survived non-maxima suppression.
36 |
37 | """
38 | if len(boxes) == 0:
39 | return []
40 |
41 | boxes = boxes.astype(np.float)
42 | pick = []
43 |
44 | x1 = boxes[:, 0]
45 | y1 = boxes[:, 1]
46 | x2 = boxes[:, 2] + boxes[:, 0]
47 | y2 = boxes[:, 3] + boxes[:, 1]
48 |
49 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
50 | if scores is not None:
51 | idxs = np.argsort(scores)
52 | else:
53 | idxs = np.argsort(y2)
54 |
55 | while len(idxs) > 0:
56 | last = len(idxs) - 1
57 | i = idxs[last]
58 | pick.append(i)
59 |
60 | xx1 = np.maximum(x1[i], x1[idxs[:last]])
61 | yy1 = np.maximum(y1[i], y1[idxs[:last]])
62 | xx2 = np.minimum(x2[i], x2[idxs[:last]])
63 | yy2 = np.minimum(y2[i], y2[idxs[:last]])
64 |
65 | w = np.maximum(0, xx2 - xx1 + 1)
66 | h = np.maximum(0, yy2 - yy1 + 1)
67 |
68 | overlap = (w * h) / area[idxs[:last]]
69 |
70 | idxs = np.delete(
71 | idxs, np.concatenate(
72 | ([last], np.where(overlap > max_bbox_overlap)[0])))
73 |
74 | return pick
75 |
--------------------------------------------------------------------------------
/deep_sort/test_tracking.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/deep_sort/test_tracking.gif
--------------------------------------------------------------------------------
/deep_sort/track.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 |
3 |
4 | class TrackState:
5 | """
6 | Enumeration type for the single target track state. Newly created tracks are
7 | classified as `tentative` until enough evidence has been collected. Then,
8 | the track state is changed to `confirmed`. Tracks that are no longer alive
9 | are classified as `deleted` to mark them for removal from the set of active
10 | tracks.
11 |
12 | """
13 |
14 | Tentative = 1
15 | Confirmed = 2
16 | Deleted = 3
17 |
18 |
19 | class Track:
20 | """
21 | A single target track with state space `(x, y, a, h)` and associated
22 | velocities, where `(x, y)` is the center of the bounding box, `a` is the
23 | aspect ratio and `h` is the height.
24 |
25 | Parameters
26 | ----------
27 | mean : ndarray
28 | Mean vector of the initial state distribution.
29 | covariance : ndarray
30 | Covariance matrix of the initial state distribution.
31 | track_id : int
32 | A unique track identifier.
33 | n_init : int
34 | Number of consecutive detections before the track is confirmed. The
35 | track state is set to `Deleted` if a miss occurs within the first
36 | `n_init` frames.
37 | max_age : int
38 | The maximum number of consecutive misses before the track state is
39 | set to `Deleted`.
40 | feature : Optional[ndarray]
41 | Feature vector of the detection this track originates from. If not None,
42 | this feature is added to the `features` cache.
43 |
44 | Attributes
45 | ----------
46 | mean : ndarray
47 | Mean vector of the initial state distribution.
48 | covariance : ndarray
49 | Covariance matrix of the initial state distribution.
50 | track_id : int
51 | A unique track identifier.
52 | hits : int
53 | Total number of measurement updates.
54 | age : int
55 | Total number of frames since first occurance.
56 | time_since_update : int
57 | Total number of frames since last measurement update.
58 | state : TrackState
59 | The current track state.
60 | features : List[ndarray]
61 | A cache of features. On each measurement update, the associated feature
62 | vector is added to this list.
63 |
64 | """
65 |
66 | def __init__(self, mean, covariance, track_id, n_init, max_age,
67 | feature=None, class_name=None):
68 | self.mean = mean
69 | self.covariance = covariance
70 | self.track_id = track_id
71 | self.hits = 1
72 | self.age = 1
73 | self.time_since_update = 0
74 |
75 | self.state = TrackState.Tentative
76 | self.features = []
77 | if feature is not None:
78 | self.features.append(feature)
79 |
80 | self._n_init = n_init
81 | self._max_age = max_age
82 | self.class_name = class_name
83 |
84 | def to_tlwh(self):
85 | """Get current position in bounding box format `(top left x, top left y,
86 | width, height)`.
87 |
88 | Returns
89 | -------
90 | ndarray
91 | The bounding box.
92 |
93 | """
94 | ret = self.mean[:4].copy()
95 | ret[2] *= ret[3]
96 | ret[:2] -= ret[2:] / 2
97 | return ret
98 |
99 | def to_tlbr(self):
100 | """Get current position in bounding box format `(min x, miny, max x,
101 | max y)`.
102 |
103 | Returns
104 | -------
105 | ndarray
106 | The bounding box.
107 |
108 | """
109 | ret = self.to_tlwh()
110 | ret[2:] = ret[:2] + ret[2:]
111 | return ret
112 |
113 | def get_class(self):
114 | return self.class_name
115 |
116 | def predict(self, kf):
117 | """Propagate the state distribution to the current time step using a
118 | Kalman filter prediction step.
119 |
120 | Parameters
121 | ----------
122 | kf : kalman_filter.KalmanFilter
123 | The Kalman filter.
124 |
125 | """
126 | self.mean, self.covariance = kf.predict(self.mean, self.covariance)
127 | self.age += 1
128 | self.time_since_update += 1
129 |
130 | def update(self, kf, detection):
131 | """Perform Kalman filter measurement update step and update the feature
132 | cache.
133 |
134 | Parameters
135 | ----------
136 | kf : kalman_filter.KalmanFilter
137 | The Kalman filter.
138 | detection : Detection
139 | The associated detection.
140 |
141 | """
142 | self.mean, self.covariance = kf.update(
143 | self.mean, self.covariance, detection.to_xyah())
144 | self.features.append(detection.feature)
145 |
146 | self.hits += 1
147 | self.time_since_update = 0
148 | if self.state == TrackState.Tentative and self.hits >= self._n_init:
149 | self.state = TrackState.Confirmed
150 |
151 | def mark_missed(self):
152 | """Mark this track as missed (no association at the current time step).
153 | """
154 | if self.state == TrackState.Tentative:
155 | self.state = TrackState.Deleted
156 | elif self.time_since_update > self._max_age:
157 | self.state = TrackState.Deleted
158 |
159 | def is_tentative(self):
160 | """Returns True if this track is tentative (unconfirmed).
161 | """
162 | return self.state == TrackState.Tentative
163 |
164 | def is_confirmed(self):
165 | """Returns True if this track is confirmed."""
166 | return self.state == TrackState.Confirmed
167 |
168 | def is_deleted(self):
169 | """Returns True if this track is dead and should be deleted."""
170 | return self.state == TrackState.Deleted
171 |
--------------------------------------------------------------------------------
/deep_sort/tracker.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from . import kalman_filter
5 | from . import linear_assignment
6 | from . import iou_matching
7 | from .track import Track
8 |
9 |
10 | class Tracker:
11 | """
12 | This is the multi-target tracker.
13 |
14 | Parameters
15 | ----------
16 | metric : nn_matching.NearestNeighborDistanceMetric
17 | A distance metric for measurement-to-track association.
18 | max_age : int
19 | Maximum number of missed misses before a track is deleted.
20 | n_init : int
21 | Number of consecutive detections before the track is confirmed. The
22 | track state is set to `Deleted` if a miss occurs within the first
23 | `n_init` frames.
24 |
25 | Attributes
26 | ----------
27 | metric : nn_matching.NearestNeighborDistanceMetric
28 | The distance metric used for measurement to track association.
29 | max_age : int
30 | Maximum number of missed misses before a track is deleted.
31 | n_init : int
32 | Number of frames that a track remains in initialization phase.
33 | kf : kalman_filter.KalmanFilter
34 | A Kalman filter to filter target trajectories in image space.
35 | tracks : List[Track]
36 | The list of active tracks at the current time step.
37 |
38 | """
39 |
40 | def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3):
41 | self.metric = metric
42 | self.max_iou_distance = max_iou_distance
43 | self.max_age = max_age
44 | self.n_init = n_init
45 |
46 | self.kf = kalman_filter.KalmanFilter()
47 | self.tracks = []
48 | self._next_id = 1
49 |
50 | def predict(self):
51 | """Propagate track state distributions one time step forward.
52 |
53 | This function should be called once every time step, before `update`.
54 | """
55 | for track in self.tracks:
56 | track.predict(self.kf)
57 |
58 | def update(self, detections):
59 | """Perform measurement update and track management.
60 |
61 | Parameters
62 | ----------
63 | detections : List[deep_sort.detection.Detection]
64 | A list of detections at the current time step.
65 |
66 | """
67 | # Run matching cascade.
68 | matches, unmatched_tracks, unmatched_detections = \
69 | self._match(detections)
70 |
71 | # Update track set.
72 | for track_idx, detection_idx in matches:
73 | self.tracks[track_idx].update(
74 | self.kf, detections[detection_idx])
75 | for track_idx in unmatched_tracks:
76 | self.tracks[track_idx].mark_missed()
77 | for detection_idx in unmatched_detections:
78 | self._initiate_track(detections[detection_idx])
79 | self.tracks = [t for t in self.tracks if not t.is_deleted()]
80 |
81 | # Update distance metric.
82 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
83 | features, targets = [], []
84 | for track in self.tracks:
85 | if not track.is_confirmed():
86 | continue
87 | features += track.features
88 | targets += [track.track_id for _ in track.features]
89 | track.features = []
90 | self.metric.partial_fit(
91 | np.asarray(features), np.asarray(targets), active_targets)
92 |
93 | def _match(self, detections):
94 |
95 | def gated_metric(tracks, dets, track_indices, detection_indices):
96 | features = np.array([dets[i].feature for i in detection_indices])
97 | targets = np.array([tracks[i].track_id for i in track_indices])
98 | cost_matrix = self.metric.distance(features, targets)
99 | cost_matrix = linear_assignment.gate_cost_matrix(
100 | self.kf, cost_matrix, tracks, dets, track_indices,
101 | detection_indices)
102 |
103 | return cost_matrix
104 |
105 | # Split track set into confirmed and unconfirmed tracks.
106 | confirmed_tracks = [
107 | i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108 | unconfirmed_tracks = [
109 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110 |
111 | # Associate confirmed tracks using appearance features.
112 | matches_a, unmatched_tracks_a, unmatched_detections = \
113 | linear_assignment.matching_cascade(
114 | gated_metric, self.metric.matching_threshold, self.max_age,
115 | self.tracks, detections, confirmed_tracks)
116 |
117 | # Associate remaining tracks together with unconfirmed tracks using IOU.
118 | iou_track_candidates = unconfirmed_tracks + [
119 | k for k in unmatched_tracks_a if
120 | self.tracks[k].time_since_update == 1]
121 | unmatched_tracks_a = [
122 | k for k in unmatched_tracks_a if
123 | self.tracks[k].time_since_update != 1]
124 | matches_b, unmatched_tracks_b, unmatched_detections = \
125 | linear_assignment.min_cost_matching(
126 | iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127 | detections, iou_track_candidates, unmatched_detections)
128 |
129 | matches = matches_a + matches_b
130 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131 | return matches, unmatched_tracks, unmatched_detections
132 |
133 | def _initiate_track(self, detection):
134 | mean, covariance = self.kf.initiate(detection.to_xyah())
135 | class_name = detection.get_class()
136 | self.tracks.append(Track(
137 | mean, covariance, self._next_id, self.n_init, self.max_age,
138 | detection.feature, class_name))
139 | self._next_id += 1
140 |
--------------------------------------------------------------------------------
/detect_mnist.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : detect_mnist.py
4 | # Author : PyLessons
5 | # Created date: 2020-08-12
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : mnist object detection example
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import cv2
14 | import numpy as np
15 | import random
16 | import time
17 | import tensorflow as tf
18 | from yolov3.yolov4 import Create_Yolo
19 | from yolov3.utils import detect_image
20 | from yolov3.configs import *
21 |
22 | while True:
23 | ID = random.randint(0, 200)
24 | label_txt = "mnist/mnist_test.txt"
25 | image_info = open(label_txt).readlines()[ID].split()
26 |
27 | image_path = image_info[0]
28 |
29 | yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES)
30 | yolo.load_weights(f"./checkpoints/{TRAIN_MODEL_NAME}") # use keras weights
31 |
32 | detect_image(yolo, image_path, "mnist_test.jpg", input_size=YOLO_INPUT_SIZE, show=True, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0))
33 |
--------------------------------------------------------------------------------
/detection_custom.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : detection_custom.py
4 | # Author : PyLessons
5 | # Created date: 2020-09-17
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : object detection image and video example
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import cv2
14 | import numpy as np
15 | import tensorflow as tf
16 | from yolov3.utils import detect_image, detect_realtime, detect_video, Load_Yolo_model, detect_video_realtime_mp
17 | from yolov3.configs import *
18 |
19 | image_path = "./IMAGES/plate_2.jpg"
20 | video_path = "./IMAGES/test.mp4"
21 |
22 | yolo = Load_Yolo_model()
23 | detect_image(yolo, image_path, "./IMAGES/plate_1_detect.jpg", input_size=YOLO_INPUT_SIZE, show=True, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0))
24 | #detect_video(yolo, video_path, './IMAGES/detected.mp4', input_size=YOLO_INPUT_SIZE, show=False, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0))
25 | #detect_realtime(yolo, '', input_size=YOLO_INPUT_SIZE, show=True, CLASSES=TRAIN_CLASSES, rectangle_colors=(255, 0, 0))
26 |
27 | #detect_video_realtime_mp(video_path, "Output.mp4", input_size=YOLO_INPUT_SIZE, show=True, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0), realtime=False)
28 |
--------------------------------------------------------------------------------
/detection_demo.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : detection_demo.py
4 | # Author : PyLessons
5 | # Created date: 2020-09-27
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : object detection image and video example
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import cv2
14 | import numpy as np
15 | import tensorflow as tf
16 | from yolov3.utils import detect_image, detect_realtime, detect_video, Load_Yolo_model, detect_video_realtime_mp
17 | from yolov3.configs import *
18 |
19 | image_path = "./IMAGES/kite.jpg"
20 | video_path = "./IMAGES/test.mp4"
21 |
22 | yolo = Load_Yolo_model()
23 | detect_image(yolo, image_path, "./IMAGES/kite_pred.jpg", input_size=YOLO_INPUT_SIZE, show=True, rectangle_colors=(255,0,0))
24 | #detect_video(yolo, video_path, "", input_size=YOLO_INPUT_SIZE, show=False, rectangle_colors=(255,0,0))
25 | #detect_realtime(yolo, '', input_size=YOLO_INPUT_SIZE, show=True, rectangle_colors=(255, 0, 0))
26 |
27 | #detect_video_realtime_mp(video_path, "Output.mp4", input_size=YOLO_INPUT_SIZE, show=False, rectangle_colors=(255,0,0), realtime=False)
28 |
--------------------------------------------------------------------------------
/evaluate_mAP.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : evaluate_mAP.py
4 | # Author : PyLessons
5 | # Created date: 2020-08-17
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : used to evaluate model mAP and FPS
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import cv2
14 | import numpy as np
15 | import tensorflow as tf
16 | from tensorflow.python.saved_model import tag_constants
17 | from yolov3.dataset import Dataset
18 | from yolov3.yolov4 import Create_Yolo
19 | from yolov3.utils import load_yolo_weights, detect_image, image_preprocess, postprocess_boxes, nms, read_class_names
20 | from yolov3.configs import *
21 | import shutil
22 | import json
23 | import time
24 |
25 | gpus = tf.config.experimental.list_physical_devices('GPU')
26 | if len(gpus) > 0:
27 | try: tf.config.experimental.set_memory_growth(gpus[0], True)
28 | except RuntimeError: print("RuntimeError in tf.config.experimental.list_physical_devices('GPU')")
29 |
30 |
31 | def voc_ap(rec, prec):
32 | """
33 | --- Official matlab code VOC2012---
34 | mrec=[0 ; rec ; 1];
35 | mpre=[0 ; prec ; 0];
36 | for i=numel(mpre)-1:-1:1
37 | mpre(i)=max(mpre(i),mpre(i+1));
38 | end
39 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
40 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
41 | """
42 | rec.insert(0, 0.0) # insert 0.0 at begining of list
43 | rec.append(1.0) # insert 1.0 at end of list
44 | mrec = rec[:]
45 | prec.insert(0, 0.0) # insert 0.0 at begining of list
46 | prec.append(0.0) # insert 0.0 at end of list
47 | mpre = prec[:]
48 | """
49 | This part makes the precision monotonically decreasing
50 | (goes from the end to the beginning)
51 | matlab: for i=numel(mpre)-1:-1:1
52 | mpre(i)=max(mpre(i),mpre(i+1));
53 | """
54 | # matlab indexes start in 1 but python in 0, so I have to do:
55 | # range(start=(len(mpre) - 2), end=0, step=-1)
56 | # also the python function range excludes the end, resulting in:
57 | # range(start=(len(mpre) - 2), end=-1, step=-1)
58 | for i in range(len(mpre)-2, -1, -1):
59 | mpre[i] = max(mpre[i], mpre[i+1])
60 | """
61 | This part creates a list of indexes where the recall changes
62 | matlab: i=find(mrec(2:end)~=mrec(1:end-1))+1;
63 | """
64 | i_list = []
65 | for i in range(1, len(mrec)):
66 | if mrec[i] != mrec[i-1]:
67 | i_list.append(i) # if it was matlab would be i + 1
68 | """
69 | The Average Precision (AP) is the area under the curve
70 | (numerical integration)
71 | matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
72 | """
73 | ap = 0.0
74 | for i in i_list:
75 | ap += ((mrec[i]-mrec[i-1])*mpre[i])
76 | return ap, mrec, mpre
77 |
78 |
79 | def get_mAP(Yolo, dataset, score_threshold=0.25, iou_threshold=0.50, TEST_INPUT_SIZE=TEST_INPUT_SIZE):
80 | MINOVERLAP = 0.5 # default value (defined in the PASCAL VOC2012 challenge)
81 | NUM_CLASS = read_class_names(TRAIN_CLASSES)
82 |
83 | ground_truth_dir_path = 'mAP/ground-truth'
84 | if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path)
85 |
86 | if not os.path.exists('mAP'): os.mkdir('mAP')
87 | os.mkdir(ground_truth_dir_path)
88 |
89 | print(f'\ncalculating mAP{int(iou_threshold*100)}...\n')
90 |
91 | gt_counter_per_class = {}
92 | for index in range(dataset.num_samples):
93 | ann_dataset = dataset.annotations[index]
94 |
95 | original_image, bbox_data_gt = dataset.parse_annotation(ann_dataset, True)
96 |
97 | if len(bbox_data_gt) == 0:
98 | bboxes_gt = []
99 | classes_gt = []
100 | else:
101 | bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
102 | ground_truth_path = os.path.join(ground_truth_dir_path, str(index) + '.txt')
103 | num_bbox_gt = len(bboxes_gt)
104 |
105 | bounding_boxes = []
106 | for i in range(num_bbox_gt):
107 | class_name = NUM_CLASS[classes_gt[i]]
108 | xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
109 | bbox = xmin + " " + ymin + " " + xmax + " " +ymax
110 | bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
111 |
112 | # count that object
113 | if class_name in gt_counter_per_class:
114 | gt_counter_per_class[class_name] += 1
115 | else:
116 | # if class didn't exist yet
117 | gt_counter_per_class[class_name] = 1
118 | bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
119 | with open(f'{ground_truth_dir_path}/{str(index)}_ground_truth.json', 'w') as outfile:
120 | json.dump(bounding_boxes, outfile)
121 |
122 | gt_classes = list(gt_counter_per_class.keys())
123 | # sort the classes alphabetically
124 | gt_classes = sorted(gt_classes)
125 | n_classes = len(gt_classes)
126 |
127 | times = []
128 | json_pred = [[] for i in range(n_classes)]
129 | for index in range(dataset.num_samples):
130 | ann_dataset = dataset.annotations[index]
131 |
132 | image_name = ann_dataset[0].split('/')[-1]
133 | original_image, bbox_data_gt = dataset.parse_annotation(ann_dataset, True)
134 |
135 | image = image_preprocess(np.copy(original_image), [TEST_INPUT_SIZE, TEST_INPUT_SIZE])
136 | image_data = image[np.newaxis, ...].astype(np.float32)
137 |
138 | t1 = time.time()
139 | if YOLO_FRAMEWORK == "tf":
140 | if tf.__version__ > '2.4.0':
141 | pred_bbox = Yolo(image_data)
142 | else:
143 | pred_bbox = Yolo.predict(image_data)
144 | elif YOLO_FRAMEWORK == "trt":
145 | batched_input = tf.constant(image_data)
146 | result = Yolo(batched_input)
147 | pred_bbox = []
148 | for key, value in result.items():
149 | value = value.numpy()
150 | pred_bbox.append(value)
151 |
152 | t2 = time.time()
153 |
154 | times.append(t2-t1)
155 |
156 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
157 | pred_bbox = tf.concat(pred_bbox, axis=0)
158 |
159 | bboxes = postprocess_boxes(pred_bbox, original_image, TEST_INPUT_SIZE, score_threshold)
160 | bboxes = nms(bboxes, iou_threshold, method='nms')
161 |
162 | for bbox in bboxes:
163 | coor = np.array(bbox[:4], dtype=np.int32)
164 | score = bbox[4]
165 | class_ind = int(bbox[5])
166 | class_name = NUM_CLASS[class_ind]
167 | score = '%.4f' % score
168 | xmin, ymin, xmax, ymax = list(map(str, coor))
169 | bbox = xmin + " " + ymin + " " + xmax + " " +ymax
170 | json_pred[gt_classes.index(class_name)].append({"confidence": str(score), "file_id": str(index), "bbox": str(bbox)})
171 |
172 | ms = sum(times)/len(times)*1000
173 | fps = 1000 / ms
174 |
175 | for class_name in gt_classes:
176 | json_pred[gt_classes.index(class_name)].sort(key=lambda x:float(x['confidence']), reverse=True)
177 | with open(f'{ground_truth_dir_path}/{class_name}_predictions.json', 'w') as outfile:
178 | json.dump(json_pred[gt_classes.index(class_name)], outfile)
179 |
180 | # Calculate the AP for each class
181 | sum_AP = 0.0
182 | ap_dictionary = {}
183 | # open file to store the results
184 | with open("mAP/results.txt", 'w') as results_file:
185 | results_file.write("# AP and precision/recall per class\n")
186 | count_true_positives = {}
187 | for class_index, class_name in enumerate(gt_classes):
188 | count_true_positives[class_name] = 0
189 | # Load predictions of that class
190 | predictions_file = f'{ground_truth_dir_path}/{class_name}_predictions.json'
191 | predictions_data = json.load(open(predictions_file))
192 |
193 | # Assign predictions to ground truth objects
194 | nd = len(predictions_data)
195 | tp = [0] * nd # creates an array of zeros of size nd
196 | fp = [0] * nd
197 | for idx, prediction in enumerate(predictions_data):
198 | file_id = prediction["file_id"]
199 | # assign prediction to ground truth object if any
200 | # open ground-truth with that file_id
201 | gt_file = f'{ground_truth_dir_path}/{str(file_id)}_ground_truth.json'
202 | ground_truth_data = json.load(open(gt_file))
203 | ovmax = -1
204 | gt_match = -1
205 | # load prediction bounding-box
206 | bb = [ float(x) for x in prediction["bbox"].split() ] # bounding box of prediction
207 | for obj in ground_truth_data:
208 | # look for a class_name match
209 | if obj["class_name"] == class_name:
210 | bbgt = [ float(x) for x in obj["bbox"].split() ] # bounding box of ground truth
211 | bi = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
212 | iw = bi[2] - bi[0] + 1
213 | ih = bi[3] - bi[1] + 1
214 | if iw > 0 and ih > 0:
215 | # compute overlap (IoU) = area of intersection / area of union
216 | ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
217 | + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
218 | ov = iw * ih / ua
219 | if ov > ovmax:
220 | ovmax = ov
221 | gt_match = obj
222 |
223 | # assign prediction as true positive/don't care/false positive
224 | if ovmax >= MINOVERLAP:# if ovmax > minimum overlap
225 | if not bool(gt_match["used"]):
226 | # true positive
227 | tp[idx] = 1
228 | gt_match["used"] = True
229 | count_true_positives[class_name] += 1
230 | # update the ".json" file
231 | with open(gt_file, 'w') as f:
232 | f.write(json.dumps(ground_truth_data))
233 | else:
234 | # false positive (multiple detection)
235 | fp[idx] = 1
236 | else:
237 | # false positive
238 | fp[idx] = 1
239 |
240 | # compute precision/recall
241 | cumsum = 0
242 | for idx, val in enumerate(fp):
243 | fp[idx] += cumsum
244 | cumsum += val
245 | cumsum = 0
246 | for idx, val in enumerate(tp):
247 | tp[idx] += cumsum
248 | cumsum += val
249 | #print(tp)
250 | rec = tp[:]
251 | for idx, val in enumerate(tp):
252 | rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
253 | #print(rec)
254 | prec = tp[:]
255 | for idx, val in enumerate(tp):
256 | prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
257 | #print(prec)
258 |
259 | ap, mrec, mprec = voc_ap(rec, prec)
260 | sum_AP += ap
261 | text = "{0:.3f}%".format(ap*100) + " = " + class_name + " AP " #class_name + " AP = {0:.2f}%".format(ap*100)
262 |
263 | rounded_prec = [ '%.3f' % elem for elem in prec ]
264 | rounded_rec = [ '%.3f' % elem for elem in rec ]
265 | # Write to results.txt
266 | results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall :" + str(rounded_rec) + "\n\n")
267 |
268 | print(text)
269 | ap_dictionary[class_name] = ap
270 |
271 | results_file.write("\n# mAP of all classes\n")
272 | mAP = sum_AP / n_classes
273 |
274 | text = "mAP = {:.3f}%, {:.2f} FPS".format(mAP*100, fps)
275 | results_file.write(text + "\n")
276 | print(text)
277 |
278 | return mAP*100
279 |
280 | if __name__ == '__main__':
281 | if YOLO_FRAMEWORK == "tf": # TensorFlow detection
282 | if YOLO_TYPE == "yolov4":
283 | Darknet_weights = YOLO_V4_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V4_WEIGHTS
284 | if YOLO_TYPE == "yolov3":
285 | Darknet_weights = YOLO_V3_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V3_WEIGHTS
286 |
287 | if YOLO_CUSTOM_WEIGHTS == False:
288 | yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES)
289 | load_yolo_weights(yolo, Darknet_weights) # use Darknet weights
290 | else:
291 | yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES)
292 | yolo.load_weights(f"./checkpoints/{TRAIN_MODEL_NAME}") # use custom weights
293 |
294 | elif YOLO_FRAMEWORK == "trt": # TensorRT detection
295 | saved_model_loaded = tf.saved_model.load(f"./checkpoints/{TRAIN_MODEL_NAME}", tags=[tag_constants.SERVING])
296 | signature_keys = list(saved_model_loaded.signatures.keys())
297 | yolo = saved_model_loaded.signatures['serving_default']
298 |
299 | testset = Dataset('test', TEST_INPUT_SIZE=YOLO_INPUT_SIZE)
300 | get_mAP(yolo, testset, score_threshold=0.05, iou_threshold=0.50, TEST_INPUT_SIZE=YOLO_INPUT_SIZE)
301 |
--------------------------------------------------------------------------------
/mnist/make_data.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : make_data.py
4 | # Author : PyLessons
5 | # Created date: 2020-04-20
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : create mnist example dataset to train custom yolov3
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
13 | import cv2
14 | import numpy as np
15 | import shutil
16 | import random
17 | from zipfile import ZipFile
18 |
19 | SIZE = 416
20 | images_num_train = 1000
21 | images_num_test = 200
22 |
23 | image_sizes = [3, 6, 3] # small, medium, big
24 |
25 | # this helps to run script both from terminal and python IDLE
26 | add_path = "mnist"
27 | if os.getcwd().split(os.sep)[-1] != "mnist":
28 | add_path = "mnist"
29 | os.chdir(add_path)
30 | else:
31 | add_path = ""
32 |
33 | def compute_iou(box1, box2):
34 | # xmin, ymin, xmax, ymax
35 | A1 = (box1[2] - box1[0])*(box1[3] - box1[1])
36 | A2 = (box2[2] - box2[0])*(box2[3] - box2[1])
37 |
38 | xmin = max(box1[0], box2[0])
39 | ymin = max(box1[1], box2[1])
40 | xmax = min(box1[2], box2[2])
41 | ymax = min(box1[3], box2[3])
42 |
43 | if ymin >= ymax or xmin >= xmax: return 0
44 | return ((xmax-xmin) * (ymax - ymin)) / (A1 + A2)
45 |
46 |
47 | def make_image(data, image_path, ratio=1):
48 | blank = data[0]
49 | boxes = data[1]
50 | label = data[2]
51 |
52 | ID = image_path.split("/")[-1][0]
53 | image = cv2.imread(image_path)
54 | image = cv2.resize(image, (int(28*ratio), int(28*ratio)))
55 | h, w, c = image.shape
56 |
57 | while True:
58 | xmin = np.random.randint(0, SIZE-w, 1)[0]
59 | ymin = np.random.randint(0, SIZE-h, 1)[0]
60 | xmax = xmin + w
61 | ymax = ymin + h
62 | box = [xmin, ymin, xmax, ymax]
63 |
64 | iou = [compute_iou(box, b) for b in boxes]
65 | if max(iou) < 0.02:
66 | boxes.append(box)
67 | label.append(ID)
68 | break
69 |
70 | for i in range(w):
71 | for j in range(h):
72 | x = xmin + i
73 | y = ymin + j
74 | blank[y][x] = image[j][i]
75 |
76 | # cv2.rectangle(blank, (xmin, ymin), (xmax, ymax), [0, 0, 255], 2)
77 | return blank
78 |
79 |
80 | for file in ["train", "test"]:
81 | if not os.path.exists(f"mnist/{file}"):
82 | with ZipFile(f"mnist/{file}.zip", 'r') as zip:
83 | # extracting all the files
84 | print(f'Extracting all {file} files now...')
85 | zip.extractall()
86 | shutil.move(file, "mnist")
87 | print('Done!')
88 |
89 | for file in ['train','test']:
90 | images_path = os.getcwd()+f"/mnist_{file}"
91 | labels_txt = os.getcwd()+f"/mnist_{file}.txt"
92 |
93 | if file == 'train': images_num = images_num_train
94 | if file == 'test': images_num = images_num_test
95 |
96 | if os.path.exists(images_path): shutil.rmtree(images_path)
97 | os.mkdir(images_path)
98 |
99 | image_paths = [os.path.join(os.path.realpath("."), os.getcwd()+f"/mnist/{file}/" + image_name)
100 | for image_name in os.listdir(os.getcwd()+f"/mnist/{file}")]
101 |
102 | with open(labels_txt, "w") as wf:
103 | image_num = 0
104 | while image_num < images_num:
105 | image_path = os.path.realpath(os.path.join(images_path, "%06d.jpg" %(image_num+1)))
106 | #print(image_path)
107 | annotation = image_path
108 | blanks = np.ones(shape=[SIZE, SIZE, 3]) * 255
109 | bboxes = [[0,0,1,1]]
110 | labels = [0]
111 | data = [blanks, bboxes, labels]
112 | bboxes_num = 0
113 |
114 | # ratios small, medium, big objects
115 | ratios = [[0.5, 0.8], [1., 1.5, 2.], [3., 4.]]
116 | for i in range(len(ratios)):
117 | N = random.randint(0, image_sizes[i])
118 | if N !=0: bboxes_num += 1
119 | for _ in range(N):
120 | ratio = random.choice(ratios[i])
121 | idx = random.randint(0, len(image_paths)-1)
122 | data[0] = make_image(data, image_paths[idx], ratio)
123 |
124 | if bboxes_num == 0: continue
125 | cv2.imwrite(image_path, data[0])
126 | for i in range(len(labels)):
127 | if i == 0: continue
128 | xmin = str(bboxes[i][0])
129 | ymin = str(bboxes[i][1])
130 | xmax = str(bboxes[i][2])
131 | ymax = str(bboxes[i][3])
132 | class_ind = str(labels[i])
133 | annotation += ' ' + ','.join([xmin, ymin, xmax, ymax, str(class_ind)])
134 | image_num += 1
135 | print("=> %s" %annotation)
136 | wf.write(annotation + "\n")
137 |
138 | if add_path != "": os.chdir("..")
139 |
--------------------------------------------------------------------------------
/mnist/mnist.names:
--------------------------------------------------------------------------------
1 | 0
2 | 1
3 | 2
4 | 3
5 | 4
6 | 5
7 | 6
8 | 7
9 | 8
10 | 9
11 |
--------------------------------------------------------------------------------
/mnist/mnist/test.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/mnist/mnist/test.zip
--------------------------------------------------------------------------------
/mnist/mnist/train.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/mnist/mnist/train.zip
--------------------------------------------------------------------------------
/mnist/show_image.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : show_image.py
4 | # Author : PyLessons
5 | # Created date: 2020-04-20
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : show random image from created dataset
9 | #
10 | #================================================================
11 | import random
12 | import cv2
13 | import numpy as np
14 | from PIL import Image
15 |
16 | ID = random.randint(0, 200)
17 | label_txt = "./mnist_train.txt"
18 | image_info = open(label_txt).readlines()[ID].split()
19 |
20 | image_path = image_info[0]
21 | image = cv2.imread(image_path)
22 | for bbox in image_info[1:]:
23 | bbox = bbox.split(",")
24 | image = cv2.rectangle(image,(int(float(bbox[0])),
25 | int(float(bbox[1]))),
26 | (int(float(bbox[2])),
27 | int(float(bbox[3]))), (255,0,0), 2)
28 |
29 | image = Image.fromarray(np.uint8(image))
30 | image.show()
31 |
--------------------------------------------------------------------------------
/model_data/coco/coco.names:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic-light
11 | fire-hydrant
12 | stop-sign
13 | parking-meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports-ball
34 | kite
35 | baseball-bat
36 | baseball-glove
37 | skateboard
38 | surfboard
39 | tennis-racket
40 | bottle
41 | wine-glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot-dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell-phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy-bear
79 | hair-drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/model_data/mars-small128.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/model_data/mars-small128.pb
--------------------------------------------------------------------------------
/object_tracker.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : object_tracker.py
4 | # Author : PyLessons
5 | # Created date: 2020-09-17
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : code to track detected object from video or webcam
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import cv2
14 | import numpy as np
15 | import tensorflow as tf
16 | from yolov3.utils import Load_Yolo_model, image_preprocess, postprocess_boxes, nms, draw_bbox, read_class_names
17 | from yolov3.configs import *
18 | import time
19 |
20 | from deep_sort import nn_matching
21 | from deep_sort.detection import Detection
22 | from deep_sort.tracker import Tracker
23 | from deep_sort import generate_detections as gdet
24 |
25 | video_path = "./IMAGES/test.mp4"
26 |
27 | def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = []):
28 | # Definition of the parameters
29 | max_cosine_distance = 0.7
30 | nn_budget = None
31 |
32 | #initialize deep sort object
33 | model_filename = 'model_data/mars-small128.pb'
34 | encoder = gdet.create_box_encoder(model_filename, batch_size=1)
35 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
36 | tracker = Tracker(metric)
37 |
38 | times, times_2 = [], []
39 |
40 | if video_path:
41 | vid = cv2.VideoCapture(video_path) # detect on video
42 | else:
43 | vid = cv2.VideoCapture(0) # detect from webcam
44 |
45 | # by default VideoCapture returns float instead of int
46 | width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
47 | height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
48 | fps = int(vid.get(cv2.CAP_PROP_FPS))
49 | codec = cv2.VideoWriter_fourcc(*'XVID')
50 | out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4
51 |
52 | NUM_CLASS = read_class_names(CLASSES)
53 | key_list = list(NUM_CLASS.keys())
54 | val_list = list(NUM_CLASS.values())
55 | while True:
56 | _, frame = vid.read()
57 |
58 | try:
59 | original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
60 | original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
61 | except:
62 | break
63 |
64 | image_data = image_preprocess(np.copy(original_frame), [input_size, input_size])
65 | #image_data = tf.expand_dims(image_data, 0)
66 | image_data = image_data[np.newaxis, ...].astype(np.float32)
67 |
68 | t1 = time.time()
69 | if YOLO_FRAMEWORK == "tf":
70 | pred_bbox = Yolo.predict(image_data)
71 | elif YOLO_FRAMEWORK == "trt":
72 | batched_input = tf.constant(image_data)
73 | result = Yolo(batched_input)
74 | pred_bbox = []
75 | for key, value in result.items():
76 | value = value.numpy()
77 | pred_bbox.append(value)
78 |
79 | #t1 = time.time()
80 | #pred_bbox = Yolo.predict(image_data)
81 | t2 = time.time()
82 |
83 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
84 | pred_bbox = tf.concat(pred_bbox, axis=0)
85 |
86 | bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
87 | bboxes = nms(bboxes, iou_threshold, method='nms')
88 |
89 | # extract bboxes to boxes (x, y, width, height), scores and names
90 | boxes, scores, names = [], [], []
91 | for bbox in bboxes:
92 | if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0:
93 | boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)])
94 | scores.append(bbox[4])
95 | names.append(NUM_CLASS[int(bbox[5])])
96 |
97 | # Obtain all the detections for the given frame.
98 | boxes = np.array(boxes)
99 | names = np.array(names)
100 | scores = np.array(scores)
101 | features = np.array(encoder(original_frame, boxes))
102 | detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(boxes, scores, names, features)]
103 |
104 | # Pass detections to the deepsort object and obtain the track information.
105 | tracker.predict()
106 | tracker.update(detections)
107 |
108 | # Obtain info from the tracks
109 | tracked_bboxes = []
110 | for track in tracker.tracks:
111 | if not track.is_confirmed() or track.time_since_update > 5:
112 | continue
113 | bbox = track.to_tlbr() # Get the corrected/predicted bounding box
114 | class_name = track.get_class() #Get the class name of particular object
115 | tracking_id = track.track_id # Get the ID for the particular track
116 | index = key_list[val_list.index(class_name)] # Get predicted object index by object name
117 | tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function
118 |
119 | # draw detection on frame
120 | image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True)
121 |
122 | t3 = time.time()
123 | times.append(t2-t1)
124 | times_2.append(t3-t1)
125 |
126 | times = times[-20:]
127 | times_2 = times_2[-20:]
128 |
129 | ms = sum(times)/len(times)*1000
130 | fps = 1000 / ms
131 | fps2 = 1000 / (sum(times_2)/len(times_2)*1000)
132 |
133 | image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
134 |
135 | # draw original yolo detection
136 | #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)
137 |
138 | print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
139 | if output_path != '': out.write(image)
140 | if show:
141 | cv2.imshow('output', image)
142 |
143 | if cv2.waitKey(25) & 0xFF == ord("q"):
144 | cv2.destroyAllWindows()
145 | break
146 |
147 | cv2.destroyAllWindows()
148 |
149 |
150 | yolo = Load_Yolo_model()
151 | Object_tracking(yolo, video_path, "detection.mp4", input_size=YOLO_INPUT_SIZE, show=True, iou_threshold=0.1, rectangle_colors=(255,0,0), Track_only = ["person"])
152 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.18.2
2 | scipy>=1.4.1
3 | wget>=3.2
4 | seaborn>=0.10.0
5 | tensorflow
6 | opencv-python==4.4.0.46
7 | tqdm==4.43.0
8 | pandas
9 | awscli
10 | urllib3
11 | mss
12 |
--------------------------------------------------------------------------------
/tools/Convert_to_TRT.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : Convert_to_TRT.py
4 | # Author : PyLessons
5 | # Created date: 2020-08-17
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : convert TF frozen graph to TensorRT model
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import sys
14 |
15 | foldername = os.path.basename(os.getcwd())
16 | if foldername == "tools":
17 | os.chdir("..")
18 | sys.path.insert(1, os.getcwd())
19 |
20 | import tensorflow as tf
21 | import numpy as np
22 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
23 | if len(physical_devices) > 0:
24 | tf.config.experimental.set_memory_growth(physical_devices[0], True)
25 | from yolov3.configs import *
26 | from tensorflow.python.compiler.tensorrt import trt_convert as trt
27 |
28 | def calibration_input():
29 | for i in range(100):
30 | batched_input = np.random.random((1, YOLO_INPUT_SIZE, YOLO_INPUT_SIZE, 3)).astype(np.float32)
31 | batched_input = tf.constant(batched_input)
32 | yield (batched_input,)
33 |
34 | conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS
35 | conversion_params = conversion_params._replace(max_workspace_size_bytes=4000000000)
36 | conversion_params = conversion_params._replace(precision_mode=YOLO_TRT_QUANTIZE_MODE)
37 | conversion_params = conversion_params._replace(max_batch_size=1)
38 | if YOLO_TRT_QUANTIZE_MODE == 'INT8':
39 | conversion_params = conversion_params._replace(use_calibration=True)
40 |
41 | converter = trt.TrtGraphConverterV2(input_saved_model_dir=f'./checkpoints/{YOLO_TYPE}-{YOLO_INPUT_SIZE}', conversion_params=conversion_params)
42 | if YOLO_TRT_QUANTIZE_MODE == 'INT8':
43 | converter.convert(calibration_input_fn=calibration_input)
44 | else:
45 | converter.convert()
46 |
47 | converter.save(output_saved_model_dir=f'./checkpoints/{YOLO_TYPE}-trt-{YOLO_TRT_QUANTIZE_MODE}-{YOLO_INPUT_SIZE}')
48 | print(f'Done Converting to TensorRT, model saved to: /checkpoints/{YOLO_TYPE}-trt-{YOLO_TRT_QUANTIZE_MODE}-{YOLO_INPUT_SIZE}')
49 |
--------------------------------------------------------------------------------
/tools/Convert_to_pb.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : Convert_to_pb.py
4 | # Author : PyLessons
5 | # Created date: 2020-08-17
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : used to freeze tf model to .pb model
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import sys
14 |
15 | foldername = os.path.basename(os.getcwd())
16 | if foldername == "tools":
17 | os.chdir("..")
18 | sys.path.insert(1, os.getcwd())
19 |
20 | import tensorflow as tf
21 | from yolov3.yolov4 import Create_Yolo
22 | from yolov3.utils import load_yolo_weights
23 | from yolov3.configs import *
24 |
25 | if YOLO_TYPE == "yolov4":
26 | Darknet_weights = YOLO_V4_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V4_WEIGHTS
27 | if YOLO_TYPE == "yolov3":
28 | Darknet_weights = YOLO_V3_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V3_WEIGHTS
29 |
30 | if YOLO_CUSTOM_WEIGHTS == False:
31 | yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES)
32 | load_yolo_weights(yolo, Darknet_weights) # use Darknet weights
33 | else:
34 | yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES)
35 | yolo.load_weights(YOLO_CUSTOM_WEIGHTS) # use custom weights
36 |
37 | yolo.summary()
38 | yolo.save(f'./checkpoints/{YOLO_TYPE}-{YOLO_INPUT_SIZE}')
39 |
40 | print(f"model saves to /checkpoints/{YOLO_TYPE}-{YOLO_INPUT_SIZE}")
41 |
--------------------------------------------------------------------------------
/tools/Detection_to_XML.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : Detection_to_XML.py
4 | # Author : PyLessons
5 | # Created date: 2020-09-27
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : converts YOLO detection to XML file
9 | #
10 | #===============================================================
11 | from textwrap import dedent
12 | from lxml import etree
13 | import glob
14 | import os
15 | import cv2
16 | import time
17 |
18 | def CreateXMLfile(path, file_name, image, bboxes, NUM_CLASS):
19 | boxes = []
20 | for bbox in bboxes:
21 | boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int), bbox[3].astype(int), NUM_CLASS[int(bbox[5])]])#, bbox[4], NUM_CLASS[int(bbox[5])]])
22 |
23 | if not os.path.exists(path):
24 | os.makedirs(path)
25 | os.chdir(path)
26 |
27 | img_name = "XML_"+file_name+".png"
28 |
29 | cv2.imwrite(img_name,image)
30 |
31 | annotation = etree.Element("annotation")
32 |
33 | folder = etree.Element("folder")
34 | folder.text = os.path.basename(os.getcwd())
35 | annotation.append(folder)
36 |
37 | filename_xml = etree.Element("filename")
38 | filename_str = img_name.split(".")[0]
39 | filename_xml.text = img_name
40 | annotation.append(filename_xml)
41 |
42 | path = etree.Element("path")
43 | path.text = os.path.join(os.getcwd(), filename_str + ".jpg")
44 | annotation.append(path)
45 |
46 | source = etree.Element("source")
47 | annotation.append(source)
48 |
49 | database = etree.Element("database")
50 | database.text = "Unknown"
51 | source.append(database)
52 |
53 | size = etree.Element("size")
54 | annotation.append(size)
55 |
56 | width = etree.Element("width")
57 | height = etree.Element("height")
58 | depth = etree.Element("depth")
59 |
60 | img = cv2.imread(filename_xml.text)
61 |
62 | width.text = str(img.shape[1])
63 | height.text = str(img.shape[0])
64 | depth.text = str(img.shape[2])
65 |
66 | size.append(width)
67 | size.append(height)
68 | size.append(depth)
69 |
70 | segmented = etree.Element("segmented")
71 | segmented.text = "0"
72 | annotation.append(segmented)
73 |
74 | for Object in boxes:
75 | class_name = Object[4]
76 | xmin_l = str(int(float(Object[0])))
77 | ymin_l = str(int(float(Object[1])))
78 | xmax_l = str(int(float(Object[2])))
79 | ymax_l = str(int(float(Object[3])))
80 |
81 | obj = etree.Element("object")
82 | annotation.append(obj)
83 |
84 | name = etree.Element("name")
85 | name.text = class_name
86 | obj.append(name)
87 |
88 | pose = etree.Element("pose")
89 | pose.text = "Unspecified"
90 | obj.append(pose)
91 |
92 | truncated = etree.Element("truncated")
93 | truncated.text = "0"
94 | obj.append(truncated)
95 |
96 | difficult = etree.Element("difficult")
97 | difficult.text = "0"
98 | obj.append(difficult)
99 |
100 | bndbox = etree.Element("bndbox")
101 | obj.append(bndbox)
102 |
103 | xmin = etree.Element("xmin")
104 | xmin.text = xmin_l
105 | bndbox.append(xmin)
106 |
107 | ymin = etree.Element("ymin")
108 | ymin.text = ymin_l
109 | bndbox.append(ymin)
110 |
111 | xmax = etree.Element("xmax")
112 | xmax.text = xmax_l
113 | bndbox.append(xmax)
114 |
115 | ymax = etree.Element("ymax")
116 | ymax.text = ymax_l
117 | bndbox.append(ymax)
118 |
119 | # write xml to file
120 | s = etree.tostring(annotation, pretty_print=True)
121 | with open(filename_str + ".xml", 'wb') as f:
122 | f.write(s)
123 | f.close()
124 |
125 | os.chdir("..")
126 |
--------------------------------------------------------------------------------
/tools/XML_to_YOLOv3.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : XML_to_YOLOv3.py
4 | # Author : PyLessons
5 | # Created date: 2020-06-04
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : used to convert XML labels to YOLOv3 training labels
9 | #
10 | #================================================================
11 | import xml.etree.ElementTree as ET
12 | import os
13 | import glob
14 |
15 | foldername = os.path.basename(os.getcwd())
16 | if foldername == "tools": os.chdir("..")
17 |
18 |
19 | data_dir = '/custom_dataset/'
20 | Dataset_names_path = "model_data/license_plate_names.txt"
21 | Dataset_train = "model_data/license_plate_train.txt"
22 | Dataset_test = "model_data/license_plate_test.txt"
23 | is_subfolder = False
24 |
25 | Dataset_names = []
26 |
27 | def ParseXML(img_folder, file):
28 | for xml_file in glob.glob(img_folder+'/*.xml'):
29 | tree=ET.parse(open(xml_file))
30 | root = tree.getroot()
31 | image_name = root.find('filename').text
32 | img_path = img_folder+'/'+image_name
33 | for i, obj in enumerate(root.iter('object')):
34 | difficult = obj.find('difficult').text
35 | cls = obj.find('name').text
36 | if cls not in Dataset_names:
37 | Dataset_names.append(cls)
38 | cls_id = Dataset_names.index(cls)
39 | xmlbox = obj.find('bndbox')
40 | OBJECT = (str(int(float(xmlbox.find('xmin').text)))+','
41 | +str(int(float(xmlbox.find('ymin').text)))+','
42 | +str(int(float(xmlbox.find('xmax').text)))+','
43 | +str(int(float(xmlbox.find('ymax').text)))+','
44 | +str(cls_id))
45 | img_path += ' '+OBJECT
46 | print(img_path)
47 | file.write(img_path+'\n')
48 |
49 | def run_XML_to_YOLOv3():
50 | for i, folder in enumerate(['train','test']):
51 | with open([Dataset_train,Dataset_test][i], "w") as file:
52 | print(os.getcwd()+data_dir+folder)
53 | img_path = os.path.join(os.getcwd()+data_dir+folder)
54 | if is_subfolder:
55 | for directory in os.listdir(img_path):
56 | xml_path = os.path.join(img_path, directory)
57 | ParseXML(xml_path, file)
58 | else:
59 | ParseXML(img_path, file)
60 |
61 | print("Dataset_names:", Dataset_names)
62 | with open(Dataset_names_path, "w") as file:
63 | for name in Dataset_names:
64 | file.write(str(name)+'\n')
65 |
66 | run_XML_to_YOLOv3()
67 |
--------------------------------------------------------------------------------
/tools/oid_to_pascal_voc_xml.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : oid_to_pascal_vos_xml.py
4 | # Author : PyLessons
5 | # Created date: 2020-06-04
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : used to convert oid labels to pascal vos xml
9 | #
10 | #================================================================
11 | import os
12 | from tqdm import tqdm
13 | from sys import exit
14 | import argparse
15 | import cv2
16 | from textwrap import dedent
17 | from lxml import etree
18 |
19 | foldername = os.path.basename(os.getcwd())
20 | if foldername == "tools": os.chdir("..")
21 |
22 | Dataset_path = "OIDv4_ToolKit/OID/Dataset"
23 |
24 | def convert_to_xml():
25 | current_path = os.getcwd()
26 | os.chdir(Dataset_path)
27 | DIRS = os.listdir(os.getcwd())
28 |
29 | for DIR in DIRS:
30 | if os.path.isdir(DIR):
31 | os.chdir(DIR)
32 |
33 | print("Currently in Subdirectory:", DIR)
34 | CLASS_DIRS = os.listdir(os.getcwd())
35 | for CLASS_DIR in CLASS_DIRS:
36 | if " " in CLASS_DIR:
37 | os.rename(CLASS_DIR, CLASS_DIR.replace(" ", "_"))
38 |
39 | CLASS_DIRS = os.listdir(os.getcwd())
40 | for CLASS_DIR in CLASS_DIRS:
41 | if os.path.isdir(CLASS_DIR):
42 | os.chdir(CLASS_DIR)
43 |
44 | print("\n" + "Creating PASCAL VOC XML Files for Class:", CLASS_DIR)
45 | # Create Directory for annotations if it does not exist yet
46 |
47 | #Read Labels from OIDv4 ToolKit
48 | os.chdir("Label")
49 |
50 | #Create PASCAL XML
51 | for filename in tqdm(os.listdir(os.getcwd())):
52 | if filename.endswith(".txt"):
53 | filename_str = str.split(filename, ".")[0]
54 |
55 |
56 | annotation = etree.Element("annotation")
57 |
58 | os.chdir("..")
59 | folder = etree.Element("folder")
60 | folder.text = os.path.basename(os.getcwd())
61 | annotation.append(folder)
62 |
63 | filename_xml = etree.Element("filename")
64 | filename_xml.text = filename_str + ".jpg"
65 | annotation.append(filename_xml)
66 |
67 | path = etree.Element("path")
68 | path.text = os.path.join(os.path.dirname(os.path.abspath(filename)), filename_str + ".jpg")
69 | annotation.append(path)
70 |
71 | source = etree.Element("source")
72 | annotation.append(source)
73 |
74 | database = etree.Element("database")
75 | database.text = "Unknown"
76 | source.append(database)
77 |
78 | size = etree.Element("size")
79 | annotation.append(size)
80 |
81 | width = etree.Element("width")
82 | height = etree.Element("height")
83 | depth = etree.Element("depth")
84 |
85 | img = cv2.imread(filename_xml.text)
86 |
87 | try:
88 | width.text = str(img.shape[1])
89 | except AttributeError:
90 | os.chdir("Label")
91 | continue
92 | height.text = str(img.shape[0])
93 | depth.text = str(img.shape[2])
94 |
95 | size.append(width)
96 | size.append(height)
97 | size.append(depth)
98 |
99 | segmented = etree.Element("segmented")
100 | segmented.text = "0"
101 | annotation.append(segmented)
102 |
103 | os.chdir("Label")
104 | label_original = open(filename, 'r')
105 |
106 | # Labels from OIDv4 Toolkit: name_of_class X_min Y_min X_max Y_max
107 | for line in label_original:
108 | line = line.strip()
109 | l = line.split(' ')
110 |
111 | class_name_len = len(l) - 4 # 4 coordinates
112 | class_name = l[0]
113 | for i in range(1,class_name_len):
114 | class_name = f"{class_name}_{l[i]}"
115 |
116 | addi = class_name_len
117 |
118 | xmin_l = str(int(round(float(l[0+addi]))))
119 | ymin_l = str(int(round(float(l[1+addi]))))
120 | xmax_l = str(int(round(float(l[2+addi]))))
121 | ymax_l = str(int(round(float(l[3+addi]))))
122 |
123 | obj = etree.Element("object")
124 | annotation.append(obj)
125 |
126 | name = etree.Element("name")
127 | name.text = class_name
128 | obj.append(name)
129 |
130 | pose = etree.Element("pose")
131 | pose.text = "Unspecified"
132 | obj.append(pose)
133 |
134 | truncated = etree.Element("truncated")
135 | truncated.text = "0"
136 | obj.append(truncated)
137 |
138 | difficult = etree.Element("difficult")
139 | difficult.text = "0"
140 | obj.append(difficult)
141 |
142 | bndbox = etree.Element("bndbox")
143 | obj.append(bndbox)
144 |
145 | xmin = etree.Element("xmin")
146 | xmin.text = xmin_l
147 | bndbox.append(xmin)
148 |
149 | ymin = etree.Element("ymin")
150 | ymin.text = ymin_l
151 | bndbox.append(ymin)
152 |
153 | xmax = etree.Element("xmax")
154 | xmax.text = xmax_l
155 | bndbox.append(xmax)
156 |
157 | ymax = etree.Element("ymax")
158 | ymax.text = ymax_l
159 | bndbox.append(ymax)
160 |
161 | os.chdir("..")
162 | # write xml to file
163 | s = etree.tostring(annotation, pretty_print=True)
164 | with open(filename_str + ".xml", 'wb') as f:
165 | f.write(s)
166 | f.close()
167 |
168 | os.chdir("Label")
169 |
170 | os.chdir("..")
171 | os.chdir("..")
172 |
173 | os.chdir("..")
174 |
175 | convert_to_xml()
176 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : train.py
4 | # Author : PyLessons
5 | # Created date: 2020-08-06
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : used to train custom object detector
9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
14 | from tensorflow.python.client import device_lib
15 | print(device_lib.list_local_devices())
16 | import shutil
17 | import numpy as np
18 | import tensorflow as tf
19 | #from tensorflow.keras.utils import plot_model
20 | from yolov3.dataset import Dataset
21 | from yolov3.yolov4 import Create_Yolo, compute_loss
22 | from yolov3.utils import load_yolo_weights
23 | from yolov3.configs import *
24 | from evaluate_mAP import get_mAP
25 |
26 | if YOLO_TYPE == "yolov4":
27 | Darknet_weights = YOLO_V4_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V4_WEIGHTS
28 | if YOLO_TYPE == "yolov3":
29 | Darknet_weights = YOLO_V3_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V3_WEIGHTS
30 | if TRAIN_YOLO_TINY: TRAIN_MODEL_NAME += "_Tiny"
31 |
32 | def main():
33 | global TRAIN_FROM_CHECKPOINT
34 |
35 | gpus = tf.config.experimental.list_physical_devices('GPU')
36 | print(f'GPUs {gpus}')
37 | if len(gpus) > 0:
38 | try: tf.config.experimental.set_memory_growth(gpus[0], True)
39 | except RuntimeError: pass
40 |
41 | if os.path.exists(TRAIN_LOGDIR): shutil.rmtree(TRAIN_LOGDIR)
42 | writer = tf.summary.create_file_writer(TRAIN_LOGDIR)
43 |
44 | trainset = Dataset('train')
45 | testset = Dataset('test')
46 |
47 | steps_per_epoch = len(trainset)
48 | global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
49 | warmup_steps = TRAIN_WARMUP_EPOCHS * steps_per_epoch
50 | total_steps = TRAIN_EPOCHS * steps_per_epoch
51 |
52 | if TRAIN_TRANSFER:
53 | Darknet = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES)
54 | load_yolo_weights(Darknet, Darknet_weights) # use darknet weights
55 |
56 | yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, training=True, CLASSES=TRAIN_CLASSES)
57 | if TRAIN_FROM_CHECKPOINT:
58 | try:
59 | yolo.load_weights(f"./checkpoints/{TRAIN_MODEL_NAME}")
60 | except ValueError:
61 | print("Shapes are incompatible, transfering Darknet weights")
62 | TRAIN_FROM_CHECKPOINT = False
63 |
64 | if TRAIN_TRANSFER and not TRAIN_FROM_CHECKPOINT:
65 | for i, l in enumerate(Darknet.layers):
66 | layer_weights = l.get_weights()
67 | if layer_weights != []:
68 | try:
69 | yolo.layers[i].set_weights(layer_weights)
70 | except:
71 | print("skipping", yolo.layers[i].name)
72 |
73 | optimizer = tf.keras.optimizers.Adam()
74 |
75 |
76 | def train_step(image_data, target):
77 | with tf.GradientTape() as tape:
78 | pred_result = yolo(image_data, training=True)
79 | giou_loss=conf_loss=prob_loss=0
80 |
81 | # optimizing process
82 | grid = 3 if not TRAIN_YOLO_TINY else 2
83 | for i in range(grid):
84 | conv, pred = pred_result[i*2], pred_result[i*2+1]
85 | loss_items = compute_loss(pred, conv, *target[i], i, CLASSES=TRAIN_CLASSES)
86 | giou_loss += loss_items[0]
87 | conf_loss += loss_items[1]
88 | prob_loss += loss_items[2]
89 |
90 | total_loss = giou_loss + conf_loss + prob_loss
91 |
92 | gradients = tape.gradient(total_loss, yolo.trainable_variables)
93 | optimizer.apply_gradients(zip(gradients, yolo.trainable_variables))
94 |
95 | # update learning rate
96 | # about warmup: https://arxiv.org/pdf/1812.01187.pdf&usg=ALkJrhglKOPDjNt6SHGbphTHyMcT0cuMJg
97 | global_steps.assign_add(1)
98 | if global_steps < warmup_steps:# and not TRAIN_TRANSFER:
99 | lr = global_steps / warmup_steps * TRAIN_LR_INIT
100 | else:
101 | lr = TRAIN_LR_END + 0.5 * (TRAIN_LR_INIT - TRAIN_LR_END)*(
102 | (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi)))
103 | optimizer.lr.assign(lr.numpy())
104 |
105 | # writing summary data
106 | with writer.as_default():
107 | tf.summary.scalar("lr", optimizer.lr, step=global_steps)
108 | tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
109 | tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps)
110 | tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
111 | tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
112 | writer.flush()
113 |
114 | return global_steps.numpy(), optimizer.lr.numpy(), giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy()
115 |
116 | validate_writer = tf.summary.create_file_writer(TRAIN_LOGDIR)
117 | def validate_step(image_data, target):
118 | with tf.GradientTape() as tape:
119 | pred_result = yolo(image_data, training=False)
120 | giou_loss=conf_loss=prob_loss=0
121 |
122 | # optimizing process
123 | grid = 3 if not TRAIN_YOLO_TINY else 2
124 | for i in range(grid):
125 | conv, pred = pred_result[i*2], pred_result[i*2+1]
126 | loss_items = compute_loss(pred, conv, *target[i], i, CLASSES=TRAIN_CLASSES)
127 | giou_loss += loss_items[0]
128 | conf_loss += loss_items[1]
129 | prob_loss += loss_items[2]
130 |
131 | total_loss = giou_loss + conf_loss + prob_loss
132 |
133 | return giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy()
134 |
135 | mAP_model = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES) # create second model to measure mAP
136 |
137 | best_val_loss = 1000 # should be large at start
138 | for epoch in range(TRAIN_EPOCHS):
139 | for image_data, target in trainset:
140 | results = train_step(image_data, target)
141 | cur_step = results[0]%steps_per_epoch
142 | print("epoch:{:2.0f} step:{:5.0f}/{}, lr:{:.6f}, giou_loss:{:7.2f}, conf_loss:{:7.2f}, prob_loss:{:7.2f}, total_loss:{:7.2f}"
143 | .format(epoch, cur_step, steps_per_epoch, results[1], results[2], results[3], results[4], results[5]))
144 |
145 | if len(testset) == 0:
146 | print("configure TEST options to validate model")
147 | yolo.save_weights(os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME))
148 | continue
149 |
150 | count, giou_val, conf_val, prob_val, total_val = 0., 0, 0, 0, 0
151 | for image_data, target in testset:
152 | results = validate_step(image_data, target)
153 | count += 1
154 | giou_val += results[0]
155 | conf_val += results[1]
156 | prob_val += results[2]
157 | total_val += results[3]
158 | # writing validate summary data
159 | with validate_writer.as_default():
160 | tf.summary.scalar("validate_loss/total_val", total_val/count, step=epoch)
161 | tf.summary.scalar("validate_loss/giou_val", giou_val/count, step=epoch)
162 | tf.summary.scalar("validate_loss/conf_val", conf_val/count, step=epoch)
163 | tf.summary.scalar("validate_loss/prob_val", prob_val/count, step=epoch)
164 | validate_writer.flush()
165 |
166 | print("\n\ngiou_val_loss:{:7.2f}, conf_val_loss:{:7.2f}, prob_val_loss:{:7.2f}, total_val_loss:{:7.2f}\n\n".
167 | format(giou_val/count, conf_val/count, prob_val/count, total_val/count))
168 |
169 | if TRAIN_SAVE_CHECKPOINT and not TRAIN_SAVE_BEST_ONLY:
170 | save_directory = os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME+"_val_loss_{:7.2f}".format(total_val/count))
171 | yolo.save_weights(save_directory)
172 | if TRAIN_SAVE_BEST_ONLY and best_val_loss>total_val/count:
173 | save_directory = os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME)
174 | yolo.save_weights(save_directory)
175 | best_val_loss = total_val/count
176 | if not TRAIN_SAVE_BEST_ONLY and not TRAIN_SAVE_CHECKPOINT:
177 | save_directory = os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME)
178 | yolo.save_weights(save_directory)
179 |
180 | # measure mAP of trained custom model
181 | try:
182 | mAP_model.load_weights(save_directory) # use keras weights
183 | get_mAP(mAP_model, testset, score_threshold=TEST_SCORE_THRESHOLD, iou_threshold=TEST_IOU_THRESHOLD)
184 | except UnboundLocalError:
185 | print("You don't have saved model weights to measure mAP, check TRAIN_SAVE_BEST_ONLY and TRAIN_SAVE_CHECKPOINT lines in configs.py")
186 |
187 | if __name__ == '__main__':
188 | main()
189 |
--------------------------------------------------------------------------------
/yolov3/__ init __.py:
--------------------------------------------------------------------------------
1 | #
2 |
--------------------------------------------------------------------------------
/yolov3/__pycache__/configs.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/yolov3/__pycache__/configs.cpython-36.pyc
--------------------------------------------------------------------------------
/yolov3/__pycache__/dataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/yolov3/__pycache__/dataset.cpython-36.pyc
--------------------------------------------------------------------------------
/yolov3/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/yolov3/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/yolov3/__pycache__/yolov3.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/yolov3/__pycache__/yolov3.cpython-36.pyc
--------------------------------------------------------------------------------
/yolov3/configs.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : configs.py
4 | # Author : PyLessons
5 | # Created date: 2020-08-18
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : yolov3 configuration file
9 | #
10 | #================================================================
11 |
12 | # YOLO options
13 | YOLO_TYPE = "yolov3" # yolov4 or yolov3
14 | YOLO_FRAMEWORK = "tf" # "tf" or "trt"
15 | YOLO_V3_WEIGHTS = "model_data/yolov3.weights"
16 | YOLO_V4_WEIGHTS = "model_data/yolov4.weights"
17 | YOLO_V3_TINY_WEIGHTS = "model_data/yolov3-tiny.weights"
18 | YOLO_V4_TINY_WEIGHTS = "model_data/yolov4-tiny.weights"
19 | YOLO_TRT_QUANTIZE_MODE = "INT8" # INT8, FP16, FP32
20 | YOLO_CUSTOM_WEIGHTS = False # "checkpoints/yolov3_custom" # used in evaluate_mAP.py and custom model detection, if not using leave False
21 | # YOLO_CUSTOM_WEIGHTS also used with TensorRT and custom model detection
22 | YOLO_COCO_CLASSES = "model_data/coco/coco.names"
23 | YOLO_STRIDES = [8, 16, 32]
24 | YOLO_IOU_LOSS_THRESH = 0.5
25 | YOLO_ANCHOR_PER_SCALE = 3
26 | YOLO_MAX_BBOX_PER_SCALE = 100
27 | YOLO_INPUT_SIZE = 416
28 | if YOLO_TYPE == "yolov4":
29 | YOLO_ANCHORS = [[[12, 16], [19, 36], [40, 28]],
30 | [[36, 75], [76, 55], [72, 146]],
31 | [[142,110], [192, 243], [459, 401]]]
32 | if YOLO_TYPE == "yolov3":
33 | YOLO_ANCHORS = [[[10, 13], [16, 30], [33, 23]],
34 | [[30, 61], [62, 45], [59, 119]],
35 | [[116, 90], [156, 198], [373, 326]]]
36 | # Train options
37 | TRAIN_YOLO_TINY = False
38 | TRAIN_SAVE_BEST_ONLY = True # saves only best model according validation loss (True recommended)
39 | TRAIN_SAVE_CHECKPOINT = False # saves all best validated checkpoints in training process (may require a lot disk space) (False recommended)
40 | TRAIN_CLASSES = "mnist/mnist.names"
41 | TRAIN_ANNOT_PATH = "mnist/mnist_train.txt"
42 | TRAIN_LOGDIR = "log"
43 | TRAIN_CHECKPOINTS_FOLDER = "checkpoints"
44 | TRAIN_MODEL_NAME = f"{YOLO_TYPE}_custom"
45 | TRAIN_LOAD_IMAGES_TO_RAM = True # With True faster training, but need more RAM
46 | TRAIN_BATCH_SIZE = 4
47 | TRAIN_INPUT_SIZE = 416
48 | TRAIN_DATA_AUG = True
49 | TRAIN_TRANSFER = True
50 | TRAIN_FROM_CHECKPOINT = False # "checkpoints/yolov3_custom"
51 | TRAIN_LR_INIT = 1e-4
52 | TRAIN_LR_END = 1e-6
53 | TRAIN_WARMUP_EPOCHS = 2
54 | TRAIN_EPOCHS = 100
55 |
56 | # TEST options
57 | TEST_ANNOT_PATH = "mnist/mnist_test.txt"
58 | TEST_BATCH_SIZE = 4
59 | TEST_INPUT_SIZE = 416
60 | TEST_DATA_AUG = False
61 | TEST_DECTECTED_IMAGE_PATH = ""
62 | TEST_SCORE_THRESHOLD = 0.3
63 | TEST_IOU_THRESHOLD = 0.45
64 |
65 | if TRAIN_YOLO_TINY:
66 | YOLO_STRIDES = [16, 32]
67 | # YOLO_ANCHORS = [[[23, 27], [37, 58], [81, 82]], # this line can be uncommented for default coco weights
68 | YOLO_ANCHORS = [[[10, 14], [23, 27], [37, 58]],
69 | [[81, 82], [135, 169], [344, 319]]]
70 |
--------------------------------------------------------------------------------
/yolov3/dataset.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : dataset.py
4 | # Author : PyLessons
5 | # Created date: 2020-07-31
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : functions used to prepare dataset for custom training
9 | #
10 | #================================================================
11 | # TODO: transfer numpy to tensorflow operations
12 | import os
13 | import cv2
14 | import random
15 | import numpy as np
16 | import tensorflow as tf
17 | from yolov3.utils import read_class_names, image_preprocess
18 | from yolov3.yolov3 import bbox_iou
19 | from yolov3.configs import *
20 |
21 |
22 | class Dataset(object):
23 | # Dataset preprocess implementation
24 | def __init__(self, dataset_type, TEST_INPUT_SIZE=TEST_INPUT_SIZE):
25 | self.annot_path = TRAIN_ANNOT_PATH if dataset_type == 'train' else TEST_ANNOT_PATH
26 | self.input_sizes = TRAIN_INPUT_SIZE if dataset_type == 'train' else TEST_INPUT_SIZE
27 | self.batch_size = TRAIN_BATCH_SIZE if dataset_type == 'train' else TEST_BATCH_SIZE
28 | self.data_aug = TRAIN_DATA_AUG if dataset_type == 'train' else TEST_DATA_AUG
29 |
30 | self.train_yolo_tiny = TRAIN_YOLO_TINY
31 | self.train_input_sizes = TRAIN_INPUT_SIZE
32 | self.strides = np.array(YOLO_STRIDES)
33 | self.classes = read_class_names(TRAIN_CLASSES)
34 | self.num_classes = len(self.classes)
35 | self.anchors = (np.array(YOLO_ANCHORS).T/self.strides).T
36 | self.anchor_per_scale = YOLO_ANCHOR_PER_SCALE
37 | self.max_bbox_per_scale = YOLO_MAX_BBOX_PER_SCALE
38 |
39 | self.annotations = self.load_annotations(dataset_type)
40 | self.num_samples = len(self.annotations)
41 | self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
42 | self.batch_count = 0
43 |
44 |
45 | def load_annotations(self, dataset_type):
46 | final_annotations = []
47 | with open(self.annot_path, 'r') as f:
48 | txt = f.read().splitlines()
49 | annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0]
50 | np.random.shuffle(annotations)
51 |
52 | # for annotation in annotations:
53 | # image_extension = '.jpg'
54 | # extension_index = annotation.find(image_extension)
55 | # image_path = annotation[:extension_index+len(image_extension)]
56 | # line = annotation[extension_index+len(image_extension):].split()
57 | # if not os.path.exists(image_path):
58 | # raise KeyError("%s does not exist ... " %image_path)
59 | # if TRAIN_LOAD_IMAGES_TO_RAM:
60 | # image = cv2.imread(image_path)
61 | # else:
62 | # image = ''
63 | # final_annotations.append([image_path, line, image])
64 | # return final_annotations
65 | for annotation in annotations:
66 | # fully parse annotations
67 | line = annotation.split()
68 | image_path, index = "", 1
69 | for i, one_line in enumerate(line):
70 | if not one_line.replace(",","").isnumeric():
71 | if image_path != "": image_path += " "
72 | image_path += one_line
73 | else:
74 | index = i
75 | break
76 | if not os.path.exists(image_path):
77 | raise KeyError("%s does not exist ... " %image_path)
78 | if TRAIN_LOAD_IMAGES_TO_RAM:
79 | image = cv2.imread(image_path)
80 | else:
81 | image = ''
82 | final_annotations.append([image_path, line[index:], image])
83 | return final_annotations
84 |
85 | def __iter__(self):
86 | return self
87 |
88 | def Delete_bad_annotation(self, bad_annotation):
89 | print(f'Deleting {bad_annotation} annotation line')
90 | bad_image_path = bad_annotation[0]
91 | bad_image_name = bad_annotation[0].split('/')[-1] # can be used to delete bad image
92 | bad_xml_path = bad_annotation[0][:-3]+'xml' # can be used to delete bad xml file
93 |
94 | # remove bad annotation line from annotation file
95 | with open(self.annot_path, "r+") as f:
96 | d = f.readlines()
97 | f.seek(0)
98 | for i in d:
99 | if bad_image_name not in i:
100 | f.write(i)
101 | f.truncate()
102 |
103 | def __next__(self):
104 | with tf.device('/cpu:0'):
105 | self.train_input_size = random.choice([self.train_input_sizes])
106 | self.train_output_sizes = self.train_input_size // self.strides
107 |
108 | batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32)
109 |
110 | if self.train_yolo_tiny:
111 | batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
112 | batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
113 | else:
114 | batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
115 | batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
116 | batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
117 |
118 | batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
119 |
120 | batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
121 | batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
122 |
123 | exceptions = False
124 | num = 0
125 | if self.batch_count < self.num_batchs:
126 | while num < self.batch_size:
127 | index = self.batch_count * self.batch_size + num
128 | if index >= self.num_samples: index -= self.num_samples
129 | annotation = self.annotations[index]
130 | image, bboxes = self.parse_annotation(annotation)
131 | try:
132 | if self.train_yolo_tiny:
133 | label_mbbox, label_lbbox, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes)
134 | else:
135 | label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes)
136 | except IndexError:
137 | exceptions = True
138 | self.Delete_bad_annotation(annotation)
139 | print("IndexError, something wrong with", annotation[0], "removed this line from annotation file")
140 |
141 | batch_image[num, :, :, :] = image
142 | batch_label_mbbox[num, :, :, :, :] = label_mbbox
143 | batch_label_lbbox[num, :, :, :, :] = label_lbbox
144 | batch_mbboxes[num, :, :] = mbboxes
145 | batch_lbboxes[num, :, :] = lbboxes
146 | if not self.train_yolo_tiny:
147 | batch_label_sbbox[num, :, :, :, :] = label_sbbox
148 | batch_sbboxes[num, :, :] = sbboxes
149 |
150 | num += 1
151 |
152 | if exceptions:
153 | print('\n')
154 | raise Exception("There were problems with dataset, I fixed them, now restart the training process.")
155 | self.batch_count += 1
156 | if not self.train_yolo_tiny:
157 | batch_smaller_target = batch_label_sbbox, batch_sbboxes
158 | batch_medium_target = batch_label_mbbox, batch_mbboxes
159 | batch_larger_target = batch_label_lbbox, batch_lbboxes
160 |
161 | if self.train_yolo_tiny:
162 | return batch_image, (batch_medium_target, batch_larger_target)
163 | return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target)
164 | else:
165 | self.batch_count = 0
166 | np.random.shuffle(self.annotations)
167 | raise StopIteration
168 |
169 | def random_horizontal_flip(self, image, bboxes):
170 | if random.random() < 0.5:
171 | _, w, _ = image.shape
172 | image = image[:, ::-1, :]
173 | bboxes[:, [0,2]] = w - bboxes[:, [2,0]]
174 |
175 | return image, bboxes
176 |
177 | def random_crop(self, image, bboxes):
178 | if random.random() < 0.5:
179 | h, w, _ = image.shape
180 | max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
181 |
182 | max_l_trans = max_bbox[0]
183 | max_u_trans = max_bbox[1]
184 | max_r_trans = w - max_bbox[2]
185 | max_d_trans = h - max_bbox[3]
186 |
187 | crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans)))
188 | crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans)))
189 | crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans)))
190 | crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans)))
191 |
192 | image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax]
193 |
194 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
195 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin
196 |
197 | return image, bboxes
198 |
199 | def random_translate(self, image, bboxes):
200 | if random.random() < 0.5:
201 | h, w, _ = image.shape
202 | max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
203 |
204 | max_l_trans = max_bbox[0]
205 | max_u_trans = max_bbox[1]
206 | max_r_trans = w - max_bbox[2]
207 | max_d_trans = h - max_bbox[3]
208 |
209 | tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
210 | ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))
211 |
212 | M = np.array([[1, 0, tx], [0, 1, ty]])
213 | image = cv2.warpAffine(image, M, (w, h))
214 |
215 | bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
216 | bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty
217 |
218 | return image, bboxes
219 |
220 | def parse_annotation(self, annotation, mAP = 'False'):
221 | if TRAIN_LOAD_IMAGES_TO_RAM:
222 | image_path = annotation[0]
223 | image = annotation[2]
224 | else:
225 | image_path = annotation[0]
226 | image = cv2.imread(image_path)
227 |
228 | bboxes = np.array([list(map(int, box.split(','))) for box in annotation[1]])
229 |
230 | if self.data_aug:
231 | image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes))
232 | image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
233 | image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes))
234 |
235 | #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
236 | if mAP == True:
237 | return image, bboxes
238 |
239 | image, bboxes = image_preprocess(np.copy(image), [self.input_sizes, self.input_sizes], np.copy(bboxes))
240 | return image, bboxes
241 |
242 | def preprocess_true_boxes(self, bboxes):
243 | OUTPUT_LEVELS = len(self.strides)
244 |
245 | label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale,
246 | 5 + self.num_classes)) for i in range(OUTPUT_LEVELS)]
247 | bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(OUTPUT_LEVELS)]
248 | bbox_count = np.zeros((OUTPUT_LEVELS,))
249 |
250 | for bbox in bboxes:
251 | bbox_coor = bbox[:4]
252 | bbox_class_ind = bbox[4]
253 |
254 | onehot = np.zeros(self.num_classes, dtype=np.float)
255 | onehot[bbox_class_ind] = 1.0
256 | uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes)
257 | deta = 0.01
258 | smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution
259 |
260 | bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
261 | bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
262 |
263 | iou = []
264 | exist_positive = False
265 | for i in range(OUTPUT_LEVELS):#range(3):
266 | anchors_xywh = np.zeros((self.anchor_per_scale, 4))
267 | anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
268 | anchors_xywh[:, 2:4] = self.anchors[i]
269 |
270 | iou_scale = bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
271 | iou.append(iou_scale)
272 | iou_mask = iou_scale > 0.3
273 |
274 | if np.any(iou_mask):
275 | xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)
276 |
277 | label[i][yind, xind, iou_mask, :] = 0
278 | label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
279 | label[i][yind, xind, iou_mask, 4:5] = 1.0
280 | label[i][yind, xind, iou_mask, 5:] = smooth_onehot
281 |
282 | bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
283 | bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
284 | bbox_count[i] += 1
285 |
286 | exist_positive = True
287 |
288 | if not exist_positive:
289 | best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
290 | best_detect = int(best_anchor_ind / self.anchor_per_scale)
291 | best_anchor = int(best_anchor_ind % self.anchor_per_scale)
292 | xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)
293 |
294 | label[best_detect][yind, xind, best_anchor, :] = 0
295 | label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
296 | label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
297 | label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot
298 |
299 | bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale)
300 | bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
301 | bbox_count[best_detect] += 1
302 |
303 | if self.train_yolo_tiny:
304 | label_mbbox, label_lbbox = label
305 | mbboxes, lbboxes = bboxes_xywh
306 | return label_mbbox, label_lbbox, mbboxes, lbboxes
307 |
308 | label_sbbox, label_mbbox, label_lbbox = label
309 | sbboxes, mbboxes, lbboxes = bboxes_xywh
310 | return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
311 |
312 | def __len__(self):
313 | return self.num_batchs
314 |
--------------------------------------------------------------------------------
/yolov3/utils.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : utils.py
4 | # Author : PyLessons
5 | # Created date: 2020-09-27
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : additional yolov3 and yolov4 functions
9 | #
10 | #================================================================
11 | from multiprocessing import Process, Queue, Pipe
12 | import cv2
13 | import time
14 | import random
15 | import colorsys
16 | import numpy as np
17 | import tensorflow as tf
18 | from yolov3.configs import *
19 | from yolov3.yolov4 import *
20 | from tensorflow.python.saved_model import tag_constants
21 |
22 | def load_yolo_weights(model, weights_file):
23 | tf.keras.backend.clear_session() # used to reset layer names
24 | # load Darknet original weights to TensorFlow model
25 | if YOLO_TYPE == "yolov3":
26 | range1 = 75 if not TRAIN_YOLO_TINY else 13
27 | range2 = [58, 66, 74] if not TRAIN_YOLO_TINY else [9, 12]
28 | if YOLO_TYPE == "yolov4":
29 | range1 = 110 if not TRAIN_YOLO_TINY else 21
30 | range2 = [93, 101, 109] if not TRAIN_YOLO_TINY else [17, 20]
31 |
32 | with open(weights_file, 'rb') as wf:
33 | major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
34 |
35 | j = 0
36 | for i in range(range1):
37 | if i > 0:
38 | conv_layer_name = 'conv2d_%d' %i
39 | else:
40 | conv_layer_name = 'conv2d'
41 |
42 | if j > 0:
43 | bn_layer_name = 'batch_normalization_%d' %j
44 | else:
45 | bn_layer_name = 'batch_normalization'
46 |
47 | conv_layer = model.get_layer(conv_layer_name)
48 | filters = conv_layer.filters
49 | k_size = conv_layer.kernel_size[0]
50 | in_dim = conv_layer.input_shape[-1]
51 |
52 | if i not in range2:
53 | # darknet weights: [beta, gamma, mean, variance]
54 | bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
55 | # tf weights: [gamma, beta, mean, variance]
56 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
57 | bn_layer = model.get_layer(bn_layer_name)
58 | j += 1
59 | else:
60 | conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
61 |
62 | # darknet shape (out_dim, in_dim, height, width)
63 | conv_shape = (filters, in_dim, k_size, k_size)
64 | conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
65 | # tf shape (height, width, in_dim, out_dim)
66 | conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
67 |
68 | if i not in range2:
69 | conv_layer.set_weights([conv_weights])
70 | bn_layer.set_weights(bn_weights)
71 | else:
72 | conv_layer.set_weights([conv_weights, conv_bias])
73 |
74 | assert len(wf.read()) == 0, 'failed to read all data'
75 |
76 | def Load_Yolo_model():
77 | gpus = tf.config.experimental.list_physical_devices('GPU')
78 | if len(gpus) > 0:
79 | print(f'GPUs {gpus}')
80 | try: tf.config.experimental.set_memory_growth(gpus[0], True)
81 | except RuntimeError: pass
82 |
83 | if YOLO_FRAMEWORK == "tf": # TensorFlow detection
84 | if YOLO_TYPE == "yolov4":
85 | Darknet_weights = YOLO_V4_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V4_WEIGHTS
86 | if YOLO_TYPE == "yolov3":
87 | Darknet_weights = YOLO_V3_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V3_WEIGHTS
88 |
89 | if YOLO_CUSTOM_WEIGHTS == False:
90 | print("Loading Darknet_weights from:", Darknet_weights)
91 | yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES)
92 | load_yolo_weights(yolo, Darknet_weights) # use Darknet weights
93 | else:
94 | checkpoint = f"./checkpoints/{TRAIN_MODEL_NAME}"
95 | if TRAIN_YOLO_TINY:
96 | checkpoint += "_Tiny"
97 | print("Loading custom weights from:", checkpoint)
98 | yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES)
99 | yolo.load_weights(checkpoint) # use custom weights
100 |
101 | elif YOLO_FRAMEWORK == "trt": # TensorRT detection
102 | saved_model_loaded = tf.saved_model.load(YOLO_CUSTOM_WEIGHTS, tags=[tag_constants.SERVING])
103 | signature_keys = list(saved_model_loaded.signatures.keys())
104 | yolo = saved_model_loaded.signatures['serving_default']
105 |
106 | return yolo
107 |
108 | def image_preprocess(image, target_size, gt_boxes=None):
109 | ih, iw = target_size
110 | h, w, _ = image.shape
111 |
112 | scale = min(iw/w, ih/h)
113 | nw, nh = int(scale * w), int(scale * h)
114 | image_resized = cv2.resize(image, (nw, nh))
115 |
116 | image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
117 | dw, dh = (iw - nw) // 2, (ih-nh) // 2
118 | image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
119 | image_paded = image_paded / 255.
120 |
121 | if gt_boxes is None:
122 | return image_paded
123 |
124 | else:
125 | gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
126 | gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
127 | return image_paded, gt_boxes
128 |
129 |
130 | def draw_bbox(image, bboxes, CLASSES=YOLO_COCO_CLASSES, show_label=True, show_confidence = True, Text_colors=(255,255,0), rectangle_colors='', tracking=False):
131 | NUM_CLASS = read_class_names(CLASSES)
132 | num_classes = len(NUM_CLASS)
133 | image_h, image_w, _ = image.shape
134 | hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
135 | #print("hsv_tuples", hsv_tuples)
136 | colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
137 | colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
138 |
139 | random.seed(0)
140 | random.shuffle(colors)
141 | random.seed(None)
142 |
143 | for i, bbox in enumerate(bboxes):
144 | coor = np.array(bbox[:4], dtype=np.int32)
145 | score = bbox[4]
146 | class_ind = int(bbox[5])
147 | bbox_color = rectangle_colors if rectangle_colors != '' else colors[class_ind]
148 | bbox_thick = int(0.6 * (image_h + image_w) / 1000)
149 | if bbox_thick < 1: bbox_thick = 1
150 | fontScale = 0.75 * bbox_thick
151 | (x1, y1), (x2, y2) = (coor[0], coor[1]), (coor[2], coor[3])
152 |
153 | # put object rectangle
154 | cv2.rectangle(image, (x1, y1), (x2, y2), bbox_color, bbox_thick*2)
155 |
156 | if show_label:
157 | # get text label
158 | score_str = " {:.2f}".format(score) if show_confidence else ""
159 |
160 | if tracking: score_str = " "+str(score)
161 |
162 | try:
163 | label = "{}".format(NUM_CLASS[class_ind]) + score_str
164 | except KeyError:
165 | print("You received KeyError, this might be that you are trying to use yolo original weights")
166 | print("while using custom classes, if using custom model in configs.py set YOLO_CUSTOM_WEIGHTS = True")
167 |
168 | # get text size
169 | (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_COMPLEX_SMALL,
170 | fontScale, thickness=bbox_thick)
171 | # put filled text rectangle
172 | cv2.rectangle(image, (x1, y1), (x1 + text_width, y1 - text_height - baseline), bbox_color, thickness=cv2.FILLED)
173 |
174 | # put text above rectangle
175 | cv2.putText(image, label, (x1, y1-4), cv2.FONT_HERSHEY_COMPLEX_SMALL,
176 | fontScale, Text_colors, bbox_thick, lineType=cv2.LINE_AA)
177 |
178 | return image
179 |
180 |
181 | def bboxes_iou(boxes1, boxes2):
182 | boxes1 = np.array(boxes1)
183 | boxes2 = np.array(boxes2)
184 |
185 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
186 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
187 |
188 | left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
189 | right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
190 |
191 | inter_section = np.maximum(right_down - left_up, 0.0)
192 | inter_area = inter_section[..., 0] * inter_section[..., 1]
193 | union_area = boxes1_area + boxes2_area - inter_area
194 | ious = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
195 |
196 | return ious
197 |
198 |
199 | def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
200 | """
201 | :param bboxes: (xmin, ymin, xmax, ymax, score, class)
202 |
203 | Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
204 | https://github.com/bharatsingh430/soft-nms
205 | """
206 | classes_in_img = list(set(bboxes[:, 5]))
207 | best_bboxes = []
208 |
209 | for cls in classes_in_img:
210 | cls_mask = (bboxes[:, 5] == cls)
211 | cls_bboxes = bboxes[cls_mask]
212 | # Process 1: Determine whether the number of bounding boxes is greater than 0
213 | while len(cls_bboxes) > 0:
214 | # Process 2: Select the bounding box with the highest score according to socre order A
215 | max_ind = np.argmax(cls_bboxes[:, 4])
216 | best_bbox = cls_bboxes[max_ind]
217 | best_bboxes.append(best_bbox)
218 | cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
219 | # Process 3: Calculate this bounding box A and
220 | # Remain all iou of the bounding box and remove those bounding boxes whose iou value is higher than the threshold
221 | iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
222 | weight = np.ones((len(iou),), dtype=np.float32)
223 |
224 | assert method in ['nms', 'soft-nms']
225 |
226 | if method == 'nms':
227 | iou_mask = iou > iou_threshold
228 | weight[iou_mask] = 0.0
229 |
230 | if method == 'soft-nms':
231 | weight = np.exp(-(1.0 * iou ** 2 / sigma))
232 |
233 | cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
234 | score_mask = cls_bboxes[:, 4] > 0.
235 | cls_bboxes = cls_bboxes[score_mask]
236 |
237 | return best_bboxes
238 |
239 |
240 | def postprocess_boxes(pred_bbox, original_image, input_size, score_threshold):
241 | valid_scale=[0, np.inf]
242 | pred_bbox = np.array(pred_bbox)
243 |
244 | pred_xywh = pred_bbox[:, 0:4]
245 | pred_conf = pred_bbox[:, 4]
246 | pred_prob = pred_bbox[:, 5:]
247 |
248 | # 1. (x, y, w, h) --> (xmin, ymin, xmax, ymax)
249 | pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
250 | pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
251 | # 2. (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
252 | org_h, org_w = original_image.shape[:2]
253 | resize_ratio = min(input_size / org_w, input_size / org_h)
254 |
255 | dw = (input_size - resize_ratio * org_w) / 2
256 | dh = (input_size - resize_ratio * org_h) / 2
257 |
258 | pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
259 | pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
260 |
261 | # 3. clip some boxes those are out of range
262 | pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
263 | np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
264 | invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
265 | pred_coor[invalid_mask] = 0
266 |
267 | # 4. discard some invalid boxes
268 | bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
269 | scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
270 |
271 | # 5. discard boxes with low scores
272 | classes = np.argmax(pred_prob, axis=-1)
273 | scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
274 | score_mask = scores > score_threshold
275 | mask = np.logical_and(scale_mask, score_mask)
276 | coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
277 |
278 | return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
279 |
280 |
281 | def detect_image(Yolo, image_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''):
282 | original_image = cv2.imread(image_path)
283 | original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
284 | original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
285 |
286 | image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
287 | image_data = image_data[np.newaxis, ...].astype(np.float32)
288 |
289 | if YOLO_FRAMEWORK == "tf":
290 | pred_bbox = Yolo.predict(image_data)
291 | elif YOLO_FRAMEWORK == "trt":
292 | batched_input = tf.constant(image_data)
293 | result = Yolo(batched_input)
294 | pred_bbox = []
295 | for key, value in result.items():
296 | value = value.numpy()
297 | pred_bbox.append(value)
298 |
299 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
300 | pred_bbox = tf.concat(pred_bbox, axis=0)
301 |
302 | bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
303 | bboxes = nms(bboxes, iou_threshold, method='nms')
304 |
305 | image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
306 | # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))
307 |
308 | if output_path != '': cv2.imwrite(output_path, image)
309 | if show:
310 | # Show the image
311 | cv2.imshow("predicted image", image)
312 | # Load and hold the image
313 | cv2.waitKey(0)
314 | # To close the window after the required kill value was provided
315 | cv2.destroyAllWindows()
316 |
317 | return image
318 |
319 | def Predict_bbox_mp(Frames_data, Predicted_data, Processing_times):
320 | gpus = tf.config.experimental.list_physical_devices('GPU')
321 | if len(gpus) > 0:
322 | try: tf.config.experimental.set_memory_growth(gpus[0], True)
323 | except RuntimeError: print("RuntimeError in tf.config.experimental.list_physical_devices('GPU')")
324 | Yolo = Load_Yolo_model()
325 | times = []
326 | while True:
327 | if Frames_data.qsize()>0:
328 | image_data = Frames_data.get()
329 | t1 = time.time()
330 | Processing_times.put(time.time())
331 |
332 | if YOLO_FRAMEWORK == "tf":
333 | if tf.__version__ > '2.4.0':
334 | pred_bbox = Yolo(image_data)
335 | else:
336 | pred_bbox = Yolo.predict(image_data)
337 | elif YOLO_FRAMEWORK == "trt":
338 | batched_input = tf.constant(image_data)
339 | result = Yolo(batched_input)
340 | pred_bbox = []
341 | for key, value in result.items():
342 | value = value.numpy()
343 | pred_bbox.append(value)
344 |
345 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
346 | pred_bbox = tf.concat(pred_bbox, axis=0)
347 |
348 | Predicted_data.put(pred_bbox)
349 |
350 |
351 | def postprocess_mp(Predicted_data, original_frames, Processed_frames, Processing_times, input_size, CLASSES, score_threshold, iou_threshold, rectangle_colors, realtime):
352 | times = []
353 | while True:
354 | if Predicted_data.qsize()>0:
355 | pred_bbox = Predicted_data.get()
356 | if realtime:
357 | while original_frames.qsize() > 1:
358 | original_image = original_frames.get()
359 | else:
360 | original_image = original_frames.get()
361 |
362 | bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
363 | bboxes = nms(bboxes, iou_threshold, method='nms')
364 | image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
365 | times.append(time.time()-Processing_times.get())
366 | times = times[-20:]
367 |
368 | ms = sum(times)/len(times)*1000
369 | fps = 1000 / ms
370 | image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
371 | #print("Time: {:.2f}ms, Final FPS: {:.1f}".format(ms, fps))
372 |
373 | Processed_frames.put(image)
374 |
375 | def Show_Image_mp(Processed_frames, show, Final_frames):
376 | while True:
377 | if Processed_frames.qsize()>0:
378 | image = Processed_frames.get()
379 | Final_frames.put(image)
380 | if show:
381 | cv2.imshow('output', image)
382 | if cv2.waitKey(25) & 0xFF == ord("q"):
383 | cv2.destroyAllWindows()
384 | break
385 |
386 | # detect from webcam
387 | def detect_video_realtime_mp(video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', realtime=False):
388 | if realtime:
389 | vid = cv2.VideoCapture(0)
390 | else:
391 | vid = cv2.VideoCapture(video_path)
392 |
393 | # by default VideoCapture returns float instead of int
394 | width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
395 | height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
396 | fps = int(vid.get(cv2.CAP_PROP_FPS))
397 | codec = cv2.VideoWriter_fourcc(*'XVID')
398 | out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4
399 | no_of_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
400 |
401 | original_frames = Queue()
402 | Frames_data = Queue()
403 | Predicted_data = Queue()
404 | Processed_frames = Queue()
405 | Processing_times = Queue()
406 | Final_frames = Queue()
407 |
408 | p1 = Process(target=Predict_bbox_mp, args=(Frames_data, Predicted_data, Processing_times))
409 | p2 = Process(target=postprocess_mp, args=(Predicted_data, original_frames, Processed_frames, Processing_times, input_size, CLASSES, score_threshold, iou_threshold, rectangle_colors, realtime))
410 | p3 = Process(target=Show_Image_mp, args=(Processed_frames, show, Final_frames))
411 | p1.start()
412 | p2.start()
413 | p3.start()
414 |
415 | while True:
416 | ret, img = vid.read()
417 | if not ret:
418 | break
419 |
420 | original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
421 | original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
422 | original_frames.put(original_image)
423 |
424 | image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
425 | image_data = image_data[np.newaxis, ...].astype(np.float32)
426 | Frames_data.put(image_data)
427 |
428 | while True:
429 | if original_frames.qsize() == 0 and Frames_data.qsize() == 0 and Predicted_data.qsize() == 0 and Processed_frames.qsize() == 0 and Processing_times.qsize() == 0 and Final_frames.qsize() == 0:
430 | p1.terminate()
431 | p2.terminate()
432 | p3.terminate()
433 | break
434 | elif Final_frames.qsize()>0:
435 | image = Final_frames.get()
436 | if output_path != '': out.write(image)
437 |
438 | cv2.destroyAllWindows()
439 |
440 | def detect_video(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''):
441 | times, times_2 = [], []
442 | vid = cv2.VideoCapture(video_path)
443 |
444 | # by default VideoCapture returns float instead of int
445 | width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
446 | height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
447 | fps = int(vid.get(cv2.CAP_PROP_FPS))
448 | codec = cv2.VideoWriter_fourcc(*'XVID')
449 | out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4
450 |
451 | while True:
452 | _, img = vid.read()
453 |
454 | try:
455 | original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
456 | original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
457 | except:
458 | break
459 |
460 | image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
461 | image_data = image_data[np.newaxis, ...].astype(np.float32)
462 |
463 | t1 = time.time()
464 | if YOLO_FRAMEWORK == "tf":
465 | if tf.__version__ > '2.4.0':
466 | pred_bbox = Yolo(image_data, training=False)
467 | else:
468 | pred_bbox = Yolo.predict(image_data)
469 | elif YOLO_FRAMEWORK == "trt":
470 | batched_input = tf.constant(image_data)
471 | result = Yolo(batched_input)
472 | pred_bbox = []
473 | for key, value in result.items():
474 | value = value.numpy()
475 | pred_bbox.append(value)
476 |
477 | t2 = time.time()
478 |
479 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
480 | pred_bbox = tf.concat(pred_bbox, axis=0)
481 |
482 | bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
483 | bboxes = nms(bboxes, iou_threshold, method='nms')
484 |
485 | image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
486 |
487 | t3 = time.time()
488 | times.append(t2-t1)
489 | times_2.append(t3-t1)
490 |
491 | times = times[-20:]
492 | times_2 = times_2[-20:]
493 |
494 | ms = sum(times)/len(times)*1000
495 | fps = 1000 / ms
496 | fps2 = 1000 / (sum(times_2)/len(times_2)*1000)
497 |
498 | image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
499 | # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))
500 |
501 | print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
502 | if output_path != '': out.write(image)
503 | if show:
504 | cv2.imshow('output', image)
505 | if cv2.waitKey(25) & 0xFF == ord("q"):
506 | cv2.destroyAllWindows()
507 | break
508 |
509 | cv2.destroyAllWindows()
510 |
511 | # detect from webcam
512 | def detect_realtime(Yolo, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''):
513 | times = []
514 | vid = cv2.VideoCapture(1)
515 |
516 | if output_path:
517 | # by default VideoCapture returns float instead of int
518 | width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
519 | height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
520 | fps = int(vid.get(cv2.CAP_PROP_FPS))
521 | codec = cv2.VideoWriter_fourcc(*'XVID')
522 | out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4
523 |
524 | while True:
525 | ret, frame = vid.read()
526 |
527 | try:
528 | original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
529 | original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
530 | except:
531 | break
532 | image_data = image_preprocess(np.copy(original_frame), [input_size, input_size])
533 | image_data = image_data[np.newaxis, ...].astype(np.float32)
534 |
535 | t1 = time.time()
536 | if YOLO_FRAMEWORK == "tf":
537 | if tf.__version__ > '2.4.0':
538 | pred_bbox = Yolo(image_data, training=False)
539 | else:
540 | pred_bbox = Yolo.predict(image_data)
541 | # if True:
542 | # pred_bbox = Yolo.predict(image_data)
543 | elif YOLO_FRAMEWORK == "trt":
544 | batched_input = tf.constant(image_data)
545 | result = Yolo(batched_input)
546 | pred_bbox = []
547 | for key, value in result.items():
548 | value = value.numpy()
549 | pred_bbox.append(value)
550 |
551 | t2 = time.time()
552 |
553 | pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
554 | pred_bbox = tf.concat(pred_bbox, axis=0)
555 |
556 | bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
557 | bboxes = nms(bboxes, iou_threshold, method='nms')
558 |
559 | times.append(t2-t1)
560 | times = times[-20:]
561 |
562 | ms = sum(times)/len(times)*1000
563 | fps = 1000 / ms
564 |
565 | print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))
566 |
567 | frame = draw_bbox(original_frame, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
568 | # CreateXMLfile("XML_Detections", str(int(time.time())), original_frame, bboxes, read_class_names(CLASSES))
569 | image = cv2.putText(frame, "Time: {:.1f}FPS".format(fps), (0, 30),
570 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
571 |
572 | if output_path != '': out.write(frame)
573 | if show:
574 | cv2.imshow('output', frame)
575 | if cv2.waitKey(25) & 0xFF == ord("q"):
576 | cv2.destroyAllWindows()
577 | break
578 |
579 | cv2.destroyAllWindows()
580 |
--------------------------------------------------------------------------------
/yolov3/yolov3.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : yolov3.py
4 | # Author : PyLessons
5 | # Created date: 2020-06-04
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : main yolov3 functions
9 | #
10 | #================================================================
11 | import numpy as np
12 | import tensorflow as tf
13 | from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, ZeroPadding2D, BatchNormalization, MaxPool2D
14 | from tensorflow.keras.regularizers import l2
15 | from yolov3.utils import read_class_names
16 | from yolov3.configs import *
17 |
18 | STRIDES = np.array(YOLO_STRIDES)
19 | ANCHORS = (np.array(YOLO_ANCHORS).T/STRIDES).T
20 |
21 | class BatchNormalization(BatchNormalization):
22 | # "Frozen state" and "inference mode" are two separate concepts.
23 | # `layer.trainable = False` is to freeze the layer, so the layer will use
24 | # stored moving `var` and `mean` in the "inference mode", and both `gama`
25 | # and `beta` will not be updated !
26 | def call(self, x, training=False):
27 | if not training:
28 | training = tf.constant(False)
29 | training = tf.logical_and(training, self.trainable)
30 | return super().call(x, training)
31 |
32 | def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True):
33 | if downsample:
34 | input_layer = ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
35 | padding = 'valid'
36 | strides = 2
37 | else:
38 | strides = 1
39 | padding = 'same'
40 |
41 | conv = Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides,
42 | padding=padding, use_bias=not bn, kernel_regularizer=l2(0.0005),
43 | kernel_initializer=tf.random_normal_initializer(stddev=0.01),
44 | bias_initializer=tf.constant_initializer(0.))(input_layer)
45 | if bn:
46 | conv = BatchNormalization()(conv)
47 | if activate == True:
48 | conv = LeakyReLU(alpha=0.1)(conv)
49 |
50 | return conv
51 |
52 | def residual_block(input_layer, input_channel, filter_num1, filter_num2):
53 | short_cut = input_layer
54 | conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1))
55 | conv = convolutional(conv , filters_shape=(3, 3, filter_num1, filter_num2))
56 |
57 | residual_output = short_cut + conv
58 | return residual_output
59 |
60 | def upsample(input_layer):
61 | return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')
62 |
63 |
64 | def darknet53(input_data):
65 | input_data = convolutional(input_data, (3, 3, 3, 32))
66 | input_data = convolutional(input_data, (3, 3, 32, 64), downsample=True)
67 |
68 | for i in range(1):
69 | input_data = residual_block(input_data, 64, 32, 64)
70 |
71 | input_data = convolutional(input_data, (3, 3, 64, 128), downsample=True)
72 |
73 | for i in range(2):
74 | input_data = residual_block(input_data, 128, 64, 128)
75 |
76 | input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True)
77 |
78 | for i in range(8):
79 | input_data = residual_block(input_data, 256, 128, 256)
80 |
81 | route_1 = input_data
82 | input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True)
83 |
84 | for i in range(8):
85 | input_data = residual_block(input_data, 512, 256, 512)
86 |
87 | route_2 = input_data
88 | input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True)
89 |
90 | for i in range(4):
91 | input_data = residual_block(input_data, 1024, 512, 1024)
92 |
93 | return route_1, route_2, input_data
94 |
95 | def darknet19_tiny(input_data):
96 | input_data = convolutional(input_data, (3, 3, 3, 16))
97 | input_data = MaxPool2D(2, 2, 'same')(input_data)
98 | input_data = convolutional(input_data, (3, 3, 16, 32))
99 | input_data = MaxPool2D(2, 2, 'same')(input_data)
100 | input_data = convolutional(input_data, (3, 3, 32, 64))
101 | input_data = MaxPool2D(2, 2, 'same')(input_data)
102 | input_data = convolutional(input_data, (3, 3, 64, 128))
103 | input_data = MaxPool2D(2, 2, 'same')(input_data)
104 | input_data = convolutional(input_data, (3, 3, 128, 256))
105 | route_1 = input_data
106 | input_data = MaxPool2D(2, 2, 'same')(input_data)
107 | input_data = convolutional(input_data, (3, 3, 256, 512))
108 | input_data = MaxPool2D(2, 1, 'same')(input_data)
109 | input_data = convolutional(input_data, (3, 3, 512, 1024))
110 |
111 | return route_1, input_data
112 |
113 | def YOLOv3(input_layer, NUM_CLASS):
114 | # After the input layer enters the Darknet-53 network, we get three branches
115 | route_1, route_2, conv = darknet53(input_layer)
116 | # See the orange module (DBL) in the figure above, a total of 5 Subconvolution operation
117 | conv = convolutional(conv, (1, 1, 1024, 512))
118 | conv = convolutional(conv, (3, 3, 512, 1024))
119 | conv = convolutional(conv, (1, 1, 1024, 512))
120 | conv = convolutional(conv, (3, 3, 512, 1024))
121 | conv = convolutional(conv, (1, 1, 1024, 512))
122 | conv_lobj_branch = convolutional(conv, (3, 3, 512, 1024))
123 |
124 | # conv_lbbox is used to predict large-sized objects , Shape = [None, 13, 13, 255]
125 | conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
126 |
127 | conv = convolutional(conv, (1, 1, 512, 256))
128 | # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
129 | # upsampling process does not need to learn, thereby reducing the network parameter
130 | conv = upsample(conv)
131 |
132 | conv = tf.concat([conv, route_2], axis=-1)
133 | conv = convolutional(conv, (1, 1, 768, 256))
134 | conv = convolutional(conv, (3, 3, 256, 512))
135 | conv = convolutional(conv, (1, 1, 512, 256))
136 | conv = convolutional(conv, (3, 3, 256, 512))
137 | conv = convolutional(conv, (1, 1, 512, 256))
138 | conv_mobj_branch = convolutional(conv, (3, 3, 256, 512))
139 |
140 | # conv_mbbox is used to predict medium-sized objects, shape = [None, 26, 26, 255]
141 | conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
142 |
143 | conv = convolutional(conv, (1, 1, 256, 128))
144 | conv = upsample(conv)
145 |
146 | conv = tf.concat([conv, route_1], axis=-1)
147 | conv = convolutional(conv, (1, 1, 384, 128))
148 | conv = convolutional(conv, (3, 3, 128, 256))
149 | conv = convolutional(conv, (1, 1, 256, 128))
150 | conv = convolutional(conv, (3, 3, 128, 256))
151 | conv = convolutional(conv, (1, 1, 256, 128))
152 | conv_sobj_branch = convolutional(conv, (3, 3, 128, 256))
153 |
154 | # conv_sbbox is used to predict small size objects, shape = [None, 52, 52, 255]
155 | conv_sbbox = convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
156 |
157 | return [conv_sbbox, conv_mbbox, conv_lbbox]
158 |
159 | def YOLOv3_tiny(input_layer, NUM_CLASS):
160 | # After the input layer enters the Darknet-53 network, we get three branches
161 | route_1, conv = darknet19_tiny(input_layer)
162 |
163 | conv = convolutional(conv, (1, 1, 1024, 256))
164 | conv_lobj_branch = convolutional(conv, (3, 3, 256, 512))
165 |
166 | # conv_lbbox is used to predict large-sized objects , Shape = [None, 26, 26, 255]
167 | conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
168 |
169 | conv = convolutional(conv, (1, 1, 256, 128))
170 | # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
171 | # upsampling process does not need to learn, thereby reducing the network parameter
172 | conv = upsample(conv)
173 |
174 | conv = tf.concat([conv, route_1], axis=-1)
175 | conv_mobj_branch = convolutional(conv, (3, 3, 128, 256))
176 | # conv_mbbox is used to predict medium size objects, shape = [None, 13, 13, 255]
177 | conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
178 |
179 | return [conv_mbbox, conv_lbbox]
180 |
181 | def Create_Yolov3(input_size=416, channels=3, training=False, CLASSES=YOLO_COCO_CLASSES):
182 | NUM_CLASS = len(read_class_names(CLASSES))
183 | input_layer = Input([input_size, input_size, channels])
184 |
185 | if TRAIN_YOLO_TINY:
186 | conv_tensors = YOLOv3_tiny(input_layer, NUM_CLASS)
187 | else:
188 | conv_tensors = YOLOv3(input_layer, NUM_CLASS)
189 |
190 | output_tensors = []
191 | for i, conv_tensor in enumerate(conv_tensors):
192 | pred_tensor = decode(conv_tensor, NUM_CLASS, i)
193 | if training: output_tensors.append(conv_tensor)
194 | output_tensors.append(pred_tensor)
195 |
196 | YoloV3 = tf.keras.Model(input_layer, output_tensors)
197 | return YoloV3
198 |
199 | def decode(conv_output, NUM_CLASS, i=0):
200 | # where i = 0, 1 or 2 to correspond to the three grid scales
201 | conv_shape = tf.shape(conv_output)
202 | batch_size = conv_shape[0]
203 | output_size = conv_shape[1]
204 |
205 | conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
206 |
207 | conv_raw_dxdy = conv_output[:, :, :, :, 0:2] # offset of center position
208 | conv_raw_dwdh = conv_output[:, :, :, :, 2:4] # Prediction box length and width offset
209 | conv_raw_conf = conv_output[:, :, :, :, 4:5] # confidence of the prediction box
210 | conv_raw_prob = conv_output[:, :, :, :, 5: ] # category probability of the prediction box
211 |
212 | # next need Draw the grid. Where output_size is equal to 13, 26 or 52
213 | y = tf.range(output_size, dtype=tf.int32)
214 | y = tf.expand_dims(y, -1)
215 | y = tf.tile(y, [1, output_size])
216 | x = tf.range(output_size,dtype=tf.int32)
217 | x = tf.expand_dims(x, 0)
218 | x = tf.tile(x, [output_size, 1])
219 |
220 | xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
221 | xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
222 | xy_grid = tf.cast(xy_grid, tf.float32)
223 |
224 | # Calculate the center position of the prediction box:
225 | pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
226 | # Calculate the length and width of the prediction box:
227 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
228 |
229 | pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
230 | pred_conf = tf.sigmoid(conv_raw_conf) # object box calculates the predicted confidence
231 | pred_prob = tf.sigmoid(conv_raw_prob) # calculating the predicted probability category box object
232 |
233 | # calculating the predicted probability category box object
234 | return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
235 |
236 | def bbox_iou(boxes1, boxes2):
237 | boxes1_area = boxes1[..., 2] * boxes1[..., 3]
238 | boxes2_area = boxes2[..., 2] * boxes2[..., 3]
239 |
240 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
241 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
242 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
243 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
244 |
245 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
246 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
247 |
248 | inter_section = tf.maximum(right_down - left_up, 0.0)
249 | inter_area = inter_section[..., 0] * inter_section[..., 1]
250 | union_area = boxes1_area + boxes2_area - inter_area
251 |
252 | return 1.0 * inter_area / union_area
253 |
254 | def bbox_giou(boxes1, boxes2):
255 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
256 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
257 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
258 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
259 |
260 | boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
261 | tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
262 | boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
263 | tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
264 |
265 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
266 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
267 |
268 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
269 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
270 |
271 | inter_section = tf.maximum(right_down - left_up, 0.0)
272 | inter_area = inter_section[..., 0] * inter_section[..., 1]
273 | union_area = boxes1_area + boxes2_area - inter_area
274 |
275 | # Calculate the iou value between the two bounding boxes
276 | iou = inter_area / union_area
277 |
278 | # Calculate the coordinates of the upper left corner and the lower right corner of the smallest closed convex surface
279 | enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
280 | enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
281 | enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
282 |
283 | # Calculate the area of the smallest closed convex surface C
284 | enclose_area = enclose[..., 0] * enclose[..., 1]
285 |
286 | # Calculate the GIoU value according to the GioU formula
287 | giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
288 |
289 | return giou
290 |
291 | # testing (should be better than giou)
292 | def bbox_ciou(boxes1, boxes2):
293 | boxes1_coor = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
294 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
295 | boxes2_coor = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
296 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
297 |
298 | left = tf.maximum(boxes1_coor[..., 0], boxes2_coor[..., 0])
299 | up = tf.maximum(boxes1_coor[..., 1], boxes2_coor[..., 1])
300 | right = tf.maximum(boxes1_coor[..., 2], boxes2_coor[..., 2])
301 | down = tf.maximum(boxes1_coor[..., 3], boxes2_coor[..., 3])
302 |
303 | c = (right - left) * (right - left) + (up - down) * (up - down)
304 | iou = bbox_iou(boxes1, boxes2)
305 |
306 | u = (boxes1[..., 0] - boxes2[..., 0]) * (boxes1[..., 0] - boxes2[..., 0]) + (boxes1[..., 1] - boxes2[..., 1]) * (boxes1[..., 1] - boxes2[..., 1])
307 | d = u / c
308 |
309 | ar_gt = boxes2[..., 2] / boxes2[..., 3]
310 | ar_pred = boxes1[..., 2] / boxes1[..., 3]
311 |
312 | ar_loss = 4 / (np.pi * np.pi) * (tf.atan(ar_gt) - tf.atan(ar_pred)) * (tf.atan(ar_gt) - tf.atan(ar_pred))
313 | alpha = ar_loss / (1 - iou + ar_loss + 0.000001)
314 | ciou_term = d + alpha * ar_loss
315 |
316 | return iou - ciou_term
317 |
318 |
319 | def compute_loss(pred, conv, label, bboxes, i=0, CLASSES=YOLO_COCO_CLASSES):
320 | NUM_CLASS = len(read_class_names(CLASSES))
321 | conv_shape = tf.shape(conv)
322 | batch_size = conv_shape[0]
323 | output_size = conv_shape[1]
324 | input_size = STRIDES[i] * output_size
325 | conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
326 |
327 | conv_raw_conf = conv[:, :, :, :, 4:5]
328 | conv_raw_prob = conv[:, :, :, :, 5:]
329 |
330 | pred_xywh = pred[:, :, :, :, 0:4]
331 | pred_conf = pred[:, :, :, :, 4:5]
332 |
333 | label_xywh = label[:, :, :, :, 0:4]
334 | respond_bbox = label[:, :, :, :, 4:5]
335 | label_prob = label[:, :, :, :, 5:]
336 |
337 | giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
338 | input_size = tf.cast(input_size, tf.float32)
339 |
340 | bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
341 | giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)
342 |
343 | iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
344 | # Find the value of IoU with the real box The largest prediction box
345 | max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
346 |
347 | # If the largest iou is less than the threshold, it is considered that the prediction box contains no objects, then the background box
348 | respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < YOLO_IOU_LOSS_THRESH, tf.float32 )
349 |
350 | conf_focal = tf.pow(respond_bbox - pred_conf, 2)
351 |
352 | # Calculate the loss of confidence
353 | # we hope that if the grid contains objects, then the network output prediction box has a confidence of 1 and 0 when there is no object.
354 | conf_loss = conf_focal * (
355 | respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
356 | +
357 | respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
358 | )
359 |
360 | prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
361 |
362 | giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
363 | conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
364 | prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
365 |
366 | return giou_loss, conf_loss, prob_loss
367 |
--------------------------------------------------------------------------------
/yolov3/yolov4.py:
--------------------------------------------------------------------------------
1 | #================================================================
2 | #
3 | # File name : yolov4.py
4 | # Author : PyLessons
5 | # Created date: 2020-09-31
6 | # Website : https://pylessons.com/
7 | # GitHub : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
8 | # Description : main yolov3 & yolov4 functions
9 | #
10 | #================================================================
11 | import numpy as np
12 | import tensorflow as tf
13 | from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, ZeroPadding2D, BatchNormalization, MaxPool2D
14 | from tensorflow.keras.regularizers import l2
15 | from yolov3.configs import *
16 |
17 | STRIDES = np.array(YOLO_STRIDES)
18 | ANCHORS = (np.array(YOLO_ANCHORS).T/STRIDES).T
19 |
20 | def read_class_names(class_file_name):
21 | # loads class name from a file
22 | names = {}
23 | with open(class_file_name, 'r') as data:
24 | for ID, name in enumerate(data):
25 | names[ID] = name.strip('\n')
26 | return names
27 |
28 | class BatchNormalization(BatchNormalization):
29 | # "Frozen state" and "inference mode" are two separate concepts.
30 | # `layer.trainable = False` is to freeze the layer, so the layer will use
31 | # stored moving `var` and `mean` in the "inference mode", and both `gama`
32 | # and `beta` will not be updated !
33 | def call(self, x, training=False):
34 | if not training:
35 | training = tf.constant(False)
36 | training = tf.logical_and(training, self.trainable)
37 | return super().call(x, training)
38 |
39 | def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True, activate_type='leaky'):
40 | if downsample:
41 | input_layer = ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
42 | padding = 'valid'
43 | strides = 2
44 | else:
45 | strides = 1
46 | padding = 'same'
47 |
48 | conv = Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides,
49 | padding=padding, use_bias=not bn, kernel_regularizer=l2(0.0005),
50 | kernel_initializer=tf.random_normal_initializer(stddev=0.01),
51 | bias_initializer=tf.constant_initializer(0.))(input_layer)
52 | if bn:
53 | conv = BatchNormalization()(conv)
54 | if activate == True:
55 | if activate_type == "leaky":
56 | conv = LeakyReLU(alpha=0.1)(conv)
57 | elif activate_type == "mish":
58 | conv = mish(conv)
59 |
60 | return conv
61 |
62 | def mish(x):
63 | return x * tf.math.tanh(tf.math.softplus(x))
64 |
65 | def residual_block(input_layer, input_channel, filter_num1, filter_num2, activate_type='leaky'):
66 | short_cut = input_layer
67 | conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1), activate_type=activate_type)
68 | conv = convolutional(conv , filters_shape=(3, 3, filter_num1, filter_num2), activate_type=activate_type)
69 |
70 | residual_output = short_cut + conv
71 | return residual_output
72 |
73 | def upsample(input_layer):
74 | return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')
75 |
76 | def route_group(input_layer, groups, group_id):
77 | convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1)
78 | return convs[group_id]
79 |
80 | def darknet53(input_data):
81 | input_data = convolutional(input_data, (3, 3, 3, 32))
82 | input_data = convolutional(input_data, (3, 3, 32, 64), downsample=True)
83 |
84 | for i in range(1):
85 | input_data = residual_block(input_data, 64, 32, 64)
86 |
87 | input_data = convolutional(input_data, (3, 3, 64, 128), downsample=True)
88 |
89 | for i in range(2):
90 | input_data = residual_block(input_data, 128, 64, 128)
91 |
92 | input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True)
93 |
94 | for i in range(8):
95 | input_data = residual_block(input_data, 256, 128, 256)
96 |
97 | route_1 = input_data
98 | input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True)
99 |
100 | for i in range(8):
101 | input_data = residual_block(input_data, 512, 256, 512)
102 |
103 | route_2 = input_data
104 | input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True)
105 |
106 | for i in range(4):
107 | input_data = residual_block(input_data, 1024, 512, 1024)
108 |
109 | return route_1, route_2, input_data
110 |
111 | def cspdarknet53(input_data):
112 | input_data = convolutional(input_data, (3, 3, 3, 32), activate_type="mish")
113 | input_data = convolutional(input_data, (3, 3, 32, 64), downsample=True, activate_type="mish")
114 |
115 | route = input_data
116 | route = convolutional(route, (1, 1, 64, 64), activate_type="mish")
117 | input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")
118 | for i in range(1):
119 | input_data = residual_block(input_data, 64, 32, 64, activate_type="mish")
120 | input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")
121 |
122 | input_data = tf.concat([input_data, route], axis=-1)
123 | input_data = convolutional(input_data, (1, 1, 128, 64), activate_type="mish")
124 | input_data = convolutional(input_data, (3, 3, 64, 128), downsample=True, activate_type="mish")
125 | route = input_data
126 | route = convolutional(route, (1, 1, 128, 64), activate_type="mish")
127 | input_data = convolutional(input_data, (1, 1, 128, 64), activate_type="mish")
128 | for i in range(2):
129 | input_data = residual_block(input_data, 64, 64, 64, activate_type="mish")
130 | input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")
131 | input_data = tf.concat([input_data, route], axis=-1)
132 |
133 | input_data = convolutional(input_data, (1, 1, 128, 128), activate_type="mish")
134 | input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True, activate_type="mish")
135 | route = input_data
136 | route = convolutional(route, (1, 1, 256, 128), activate_type="mish")
137 | input_data = convolutional(input_data, (1, 1, 256, 128), activate_type="mish")
138 | for i in range(8):
139 | input_data = residual_block(input_data, 128, 128, 128, activate_type="mish")
140 | input_data = convolutional(input_data, (1, 1, 128, 128), activate_type="mish")
141 | input_data = tf.concat([input_data, route], axis=-1)
142 |
143 | input_data = convolutional(input_data, (1, 1, 256, 256), activate_type="mish")
144 | route_1 = input_data
145 | input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True, activate_type="mish")
146 | route = input_data
147 | route = convolutional(route, (1, 1, 512, 256), activate_type="mish")
148 | input_data = convolutional(input_data, (1, 1, 512, 256), activate_type="mish")
149 | for i in range(8):
150 | input_data = residual_block(input_data, 256, 256, 256, activate_type="mish")
151 | input_data = convolutional(input_data, (1, 1, 256, 256), activate_type="mish")
152 | input_data = tf.concat([input_data, route], axis=-1)
153 |
154 | input_data = convolutional(input_data, (1, 1, 512, 512), activate_type="mish")
155 | route_2 = input_data
156 | input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True, activate_type="mish")
157 | route = input_data
158 | route = convolutional(route, (1, 1, 1024, 512), activate_type="mish")
159 | input_data = convolutional(input_data, (1, 1, 1024, 512), activate_type="mish")
160 | for i in range(4):
161 | input_data = residual_block(input_data, 512, 512, 512, activate_type="mish")
162 | input_data = convolutional(input_data, (1, 1, 512, 512), activate_type="mish")
163 | input_data = tf.concat([input_data, route], axis=-1)
164 |
165 | input_data = convolutional(input_data, (1, 1, 1024, 1024), activate_type="mish")
166 | input_data = convolutional(input_data, (1, 1, 1024, 512))
167 | input_data = convolutional(input_data, (3, 3, 512, 1024))
168 | input_data = convolutional(input_data, (1, 1, 1024, 512))
169 |
170 | max_pooling_1 = tf.keras.layers.MaxPool2D(pool_size=13, padding='SAME', strides=1)(input_data)
171 | max_pooling_2 = tf.keras.layers.MaxPool2D(pool_size=9, padding='SAME', strides=1)(input_data)
172 | max_pooling_3 = tf.keras.layers.MaxPool2D(pool_size=5, padding='SAME', strides=1)(input_data)
173 | input_data = tf.concat([max_pooling_1, max_pooling_2, max_pooling_3, input_data], axis=-1)
174 |
175 | input_data = convolutional(input_data, (1, 1, 2048, 512))
176 | input_data = convolutional(input_data, (3, 3, 512, 1024))
177 | input_data = convolutional(input_data, (1, 1, 1024, 512))
178 |
179 | return route_1, route_2, input_data
180 |
181 | def darknet19_tiny(input_data):
182 | input_data = convolutional(input_data, (3, 3, 3, 16))
183 | input_data = MaxPool2D(2, 2, 'same')(input_data)
184 | input_data = convolutional(input_data, (3, 3, 16, 32))
185 | input_data = MaxPool2D(2, 2, 'same')(input_data)
186 | input_data = convolutional(input_data, (3, 3, 32, 64))
187 | input_data = MaxPool2D(2, 2, 'same')(input_data)
188 | input_data = convolutional(input_data, (3, 3, 64, 128))
189 | input_data = MaxPool2D(2, 2, 'same')(input_data)
190 | input_data = convolutional(input_data, (3, 3, 128, 256))
191 | route_1 = input_data
192 | input_data = MaxPool2D(2, 2, 'same')(input_data)
193 | input_data = convolutional(input_data, (3, 3, 256, 512))
194 | input_data = MaxPool2D(2, 1, 'same')(input_data)
195 | input_data = convolutional(input_data, (3, 3, 512, 1024))
196 |
197 | return route_1, input_data
198 |
199 | def cspdarknet53_tiny(input_data): # not sure how this should be called
200 | input_data = convolutional(input_data, (3, 3, 3, 32), downsample=True)
201 | input_data = convolutional(input_data, (3, 3, 32, 64), downsample=True)
202 | input_data = convolutional(input_data, (3, 3, 64, 64))
203 |
204 | route = input_data
205 | input_data = route_group(input_data, 2, 1)
206 | input_data = convolutional(input_data, (3, 3, 32, 32))
207 | route_1 = input_data
208 | input_data = convolutional(input_data, (3, 3, 32, 32))
209 | input_data = tf.concat([input_data, route_1], axis=-1)
210 | input_data = convolutional(input_data, (1, 1, 32, 64))
211 | input_data = tf.concat([route, input_data], axis=-1)
212 | input_data = MaxPool2D(2, 2, 'same')(input_data)
213 |
214 | input_data = convolutional(input_data, (3, 3, 64, 128))
215 | route = input_data
216 | input_data = route_group(input_data, 2, 1)
217 | input_data = convolutional(input_data, (3, 3, 64, 64))
218 | route_1 = input_data
219 | input_data = convolutional(input_data, (3, 3, 64, 64))
220 | input_data = tf.concat([input_data, route_1], axis=-1)
221 | input_data = convolutional(input_data, (1, 1, 64, 128))
222 | input_data = tf.concat([route, input_data], axis=-1)
223 | input_data = MaxPool2D(2, 2, 'same')(input_data)
224 |
225 | input_data = convolutional(input_data, (3, 3, 128, 256))
226 | route = input_data
227 | input_data = route_group(input_data, 2, 1)
228 | input_data = convolutional(input_data, (3, 3, 128, 128))
229 | route_1 = input_data
230 | input_data = convolutional(input_data, (3, 3, 128, 128))
231 | input_data = tf.concat([input_data, route_1], axis=-1)
232 | input_data = convolutional(input_data, (1, 1, 128, 256))
233 | route_1 = input_data
234 | input_data = tf.concat([route, input_data], axis=-1)
235 | input_data = MaxPool2D(2, 2, 'same')(input_data)
236 |
237 | input_data = convolutional(input_data, (3, 3, 512, 512))
238 |
239 | return route_1, input_data
240 |
241 | def YOLOv3(input_layer, NUM_CLASS):
242 | # After the input layer enters the Darknet-53 network, we get three branches
243 | route_1, route_2, conv = darknet53(input_layer)
244 | # See the orange module (DBL) in the figure above, a total of 5 Subconvolution operation
245 | conv = convolutional(conv, (1, 1, 1024, 512))
246 | conv = convolutional(conv, (3, 3, 512, 1024))
247 | conv = convolutional(conv, (1, 1, 1024, 512))
248 | conv = convolutional(conv, (3, 3, 512, 1024))
249 | conv = convolutional(conv, (1, 1, 1024, 512))
250 | conv_lobj_branch = convolutional(conv, (3, 3, 512, 1024))
251 |
252 | # conv_lbbox is used to predict large-sized objects , Shape = [None, 13, 13, 255]
253 | conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
254 |
255 | conv = convolutional(conv, (1, 1, 512, 256))
256 | # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
257 | # upsampling process does not need to learn, thereby reducing the network parameter
258 | conv = upsample(conv)
259 |
260 | conv = tf.concat([conv, route_2], axis=-1)
261 | conv = convolutional(conv, (1, 1, 768, 256))
262 | conv = convolutional(conv, (3, 3, 256, 512))
263 | conv = convolutional(conv, (1, 1, 512, 256))
264 | conv = convolutional(conv, (3, 3, 256, 512))
265 | conv = convolutional(conv, (1, 1, 512, 256))
266 | conv_mobj_branch = convolutional(conv, (3, 3, 256, 512))
267 |
268 | # conv_mbbox is used to predict medium-sized objects, shape = [None, 26, 26, 255]
269 | conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
270 |
271 | conv = convolutional(conv, (1, 1, 256, 128))
272 | conv = upsample(conv)
273 |
274 | conv = tf.concat([conv, route_1], axis=-1)
275 | conv = convolutional(conv, (1, 1, 384, 128))
276 | conv = convolutional(conv, (3, 3, 128, 256))
277 | conv = convolutional(conv, (1, 1, 256, 128))
278 | conv = convolutional(conv, (3, 3, 128, 256))
279 | conv = convolutional(conv, (1, 1, 256, 128))
280 | conv_sobj_branch = convolutional(conv, (3, 3, 128, 256))
281 |
282 | # conv_sbbox is used to predict small size objects, shape = [None, 52, 52, 255]
283 | conv_sbbox = convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
284 |
285 | return [conv_sbbox, conv_mbbox, conv_lbbox]
286 |
287 | def YOLOv4(input_layer, NUM_CLASS):
288 | route_1, route_2, conv = cspdarknet53(input_layer)
289 |
290 | route = conv
291 | conv = convolutional(conv, (1, 1, 512, 256))
292 | conv = upsample(conv)
293 | route_2 = convolutional(route_2, (1, 1, 512, 256))
294 | conv = tf.concat([route_2, conv], axis=-1)
295 |
296 | conv = convolutional(conv, (1, 1, 512, 256))
297 | conv = convolutional(conv, (3, 3, 256, 512))
298 | conv = convolutional(conv, (1, 1, 512, 256))
299 | conv = convolutional(conv, (3, 3, 256, 512))
300 | conv = convolutional(conv, (1, 1, 512, 256))
301 |
302 | route_2 = conv
303 | conv = convolutional(conv, (1, 1, 256, 128))
304 | conv = upsample(conv)
305 | route_1 = convolutional(route_1, (1, 1, 256, 128))
306 | conv = tf.concat([route_1, conv], axis=-1)
307 |
308 | conv = convolutional(conv, (1, 1, 256, 128))
309 | conv = convolutional(conv, (3, 3, 128, 256))
310 | conv = convolutional(conv, (1, 1, 256, 128))
311 | conv = convolutional(conv, (3, 3, 128, 256))
312 | conv = convolutional(conv, (1, 1, 256, 128))
313 |
314 | route_1 = conv
315 | conv = convolutional(conv, (3, 3, 128, 256))
316 | conv_sbbox = convolutional(conv, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
317 |
318 | conv = convolutional(route_1, (3, 3, 128, 256), downsample=True)
319 | conv = tf.concat([conv, route_2], axis=-1)
320 |
321 | conv = convolutional(conv, (1, 1, 512, 256))
322 | conv = convolutional(conv, (3, 3, 256, 512))
323 | conv = convolutional(conv, (1, 1, 512, 256))
324 | conv = convolutional(conv, (3, 3, 256, 512))
325 | conv = convolutional(conv, (1, 1, 512, 256))
326 |
327 | route_2 = conv
328 | conv = convolutional(conv, (3, 3, 256, 512))
329 | conv_mbbox = convolutional(conv, (1, 1, 512, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
330 |
331 | conv = convolutional(route_2, (3, 3, 256, 512), downsample=True)
332 | conv = tf.concat([conv, route], axis=-1)
333 |
334 | conv = convolutional(conv, (1, 1, 1024, 512))
335 | conv = convolutional(conv, (3, 3, 512, 1024))
336 | conv = convolutional(conv, (1, 1, 1024, 512))
337 | conv = convolutional(conv, (3, 3, 512, 1024))
338 | conv = convolutional(conv, (1, 1, 1024, 512))
339 |
340 | conv = convolutional(conv, (3, 3, 512, 1024))
341 | conv_lbbox = convolutional(conv, (1, 1, 1024, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
342 |
343 | return [conv_sbbox, conv_mbbox, conv_lbbox]
344 |
345 | def YOLOv3_tiny(input_layer, NUM_CLASS):
346 | # After the input layer enters the Darknet-53 network, we get three branches
347 | route_1, conv = darknet19_tiny(input_layer)
348 |
349 | conv = convolutional(conv, (1, 1, 1024, 256))
350 | conv_lobj_branch = convolutional(conv, (3, 3, 256, 512))
351 |
352 | # conv_lbbox is used to predict large-sized objects , Shape = [None, 26, 26, 255]
353 | conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
354 |
355 | conv = convolutional(conv, (1, 1, 256, 128))
356 | # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
357 | # upsampling process does not need to learn, thereby reducing the network parameter
358 | conv = upsample(conv)
359 |
360 | conv = tf.concat([conv, route_1], axis=-1)
361 | conv_mobj_branch = convolutional(conv, (3, 3, 128, 256))
362 | # conv_mbbox is used to predict medium size objects, shape = [None, 13, 13, 255]
363 | conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
364 |
365 | return [conv_mbbox, conv_lbbox]
366 |
367 | def YOLOv4_tiny(input_layer, NUM_CLASS):
368 | route_1, conv = cspdarknet53_tiny(input_layer)
369 |
370 | conv = convolutional(conv, (1, 1, 512, 256))
371 |
372 | conv_lobj_branch = convolutional(conv, (3, 3, 256, 512))
373 | conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 512, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
374 |
375 | conv = convolutional(conv, (1, 1, 256, 128))
376 | conv = upsample(conv)
377 | conv = tf.concat([conv, route_1], axis=-1)
378 |
379 | conv_mobj_branch = convolutional(conv, (3, 3, 128, 256))
380 | conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
381 |
382 | return [conv_mbbox, conv_lbbox]
383 |
384 | def Create_Yolo(input_size=416, channels=3, training=False, CLASSES=YOLO_COCO_CLASSES):
385 | NUM_CLASS = len(read_class_names(CLASSES))
386 | input_layer = Input([input_size, input_size, channels])
387 |
388 | if TRAIN_YOLO_TINY:
389 | if YOLO_TYPE == "yolov4":
390 | conv_tensors = YOLOv4_tiny(input_layer, NUM_CLASS)
391 | if YOLO_TYPE == "yolov3":
392 | conv_tensors = YOLOv3_tiny(input_layer, NUM_CLASS)
393 | else:
394 | if YOLO_TYPE == "yolov4":
395 | conv_tensors = YOLOv4(input_layer, NUM_CLASS)
396 | if YOLO_TYPE == "yolov3":
397 | conv_tensors = YOLOv3(input_layer, NUM_CLASS)
398 |
399 | output_tensors = []
400 | for i, conv_tensor in enumerate(conv_tensors):
401 | pred_tensor = decode(conv_tensor, NUM_CLASS, i)
402 | if training: output_tensors.append(conv_tensor)
403 | output_tensors.append(pred_tensor)
404 |
405 | Yolo = tf.keras.Model(input_layer, output_tensors)
406 | return Yolo
407 |
408 |
409 | def decode(conv_output, NUM_CLASS, i=0):
410 | # where i = 0, 1 or 2 to correspond to the three grid scales
411 | conv_shape = tf.shape(conv_output)
412 | batch_size = conv_shape[0]
413 | output_size = conv_shape[1]
414 |
415 | conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
416 |
417 | #conv_raw_dxdy = conv_output[:, :, :, :, 0:2] # offset of center position
418 | #conv_raw_dwdh = conv_output[:, :, :, :, 2:4] # Prediction box length and width offset
419 | #conv_raw_conf = conv_output[:, :, :, :, 4:5] # confidence of the prediction box
420 | #conv_raw_prob = conv_output[:, :, :, :, 5: ] # category probability of the prediction box
421 | conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS), axis=-1)
422 |
423 | # next need Draw the grid. Where output_size is equal to 13, 26 or 52
424 | #y = tf.range(output_size, dtype=tf.int32)
425 | #y = tf.expand_dims(y, -1)
426 | #y = tf.tile(y, [1, output_size])
427 | #x = tf.range(output_size,dtype=tf.int32)
428 | #x = tf.expand_dims(x, 0)
429 | #x = tf.tile(x, [output_size, 1])
430 | xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size))
431 | xy_grid = tf.expand_dims(tf.stack(xy_grid, axis=-1), axis=2) # [gx, gy, 1, 2]
432 | xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [batch_size, 1, 1, 3, 1])
433 | xy_grid = tf.cast(xy_grid, tf.float32)
434 |
435 | #xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
436 | #xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
437 | #y_grid = tf.cast(xy_grid, tf.float32)
438 |
439 | # Calculate the center position of the prediction box:
440 | pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
441 | # Calculate the length and width of the prediction box:
442 | pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
443 |
444 | pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
445 | pred_conf = tf.sigmoid(conv_raw_conf) # object box calculates the predicted confidence
446 | pred_prob = tf.sigmoid(conv_raw_prob) # calculating the predicted probability category box object
447 |
448 | # calculating the predicted probability category box object
449 | return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
450 |
451 |
452 | def bbox_iou(boxes1, boxes2):
453 | boxes1_area = boxes1[..., 2] * boxes1[..., 3]
454 | boxes2_area = boxes2[..., 2] * boxes2[..., 3]
455 |
456 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
457 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
458 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
459 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
460 |
461 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
462 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
463 |
464 | inter_section = tf.maximum(right_down - left_up, 0.0)
465 | inter_area = inter_section[..., 0] * inter_section[..., 1]
466 | union_area = boxes1_area + boxes2_area - inter_area
467 |
468 | return 1.0 * inter_area / union_area
469 |
470 | def bbox_giou(boxes1, boxes2):
471 | boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
472 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
473 | boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
474 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
475 |
476 | boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
477 | tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
478 | boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
479 | tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
480 |
481 | boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
482 | boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
483 |
484 | left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
485 | right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
486 |
487 | inter_section = tf.maximum(right_down - left_up, 0.0)
488 | inter_area = inter_section[..., 0] * inter_section[..., 1]
489 | union_area = boxes1_area + boxes2_area - inter_area
490 |
491 | # Calculate the iou value between the two bounding boxes
492 | iou = inter_area / union_area
493 |
494 | # Calculate the coordinates of the upper left corner and the lower right corner of the smallest closed convex surface
495 | enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
496 | enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
497 | enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
498 |
499 | # Calculate the area of the smallest closed convex surface C
500 | enclose_area = enclose[..., 0] * enclose[..., 1]
501 |
502 | # Calculate the GIoU value according to the GioU formula
503 | giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
504 |
505 | return giou
506 |
507 | # testing (should be better than giou)
508 | def bbox_ciou(boxes1, boxes2):
509 | boxes1_coor = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
510 | boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
511 | boxes2_coor = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
512 | boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
513 |
514 | left = tf.maximum(boxes1_coor[..., 0], boxes2_coor[..., 0])
515 | up = tf.maximum(boxes1_coor[..., 1], boxes2_coor[..., 1])
516 | right = tf.maximum(boxes1_coor[..., 2], boxes2_coor[..., 2])
517 | down = tf.maximum(boxes1_coor[..., 3], boxes2_coor[..., 3])
518 |
519 | c = (right - left) * (right - left) + (up - down) * (up - down)
520 | iou = bbox_iou(boxes1, boxes2)
521 |
522 | u = (boxes1[..., 0] - boxes2[..., 0]) * (boxes1[..., 0] - boxes2[..., 0]) + (boxes1[..., 1] - boxes2[..., 1]) * (boxes1[..., 1] - boxes2[..., 1])
523 | d = u / c
524 |
525 | ar_gt = boxes2[..., 2] / boxes2[..., 3]
526 | ar_pred = boxes1[..., 2] / boxes1[..., 3]
527 |
528 | ar_loss = 4 / (np.pi * np.pi) * (tf.atan(ar_gt) - tf.atan(ar_pred)) * (tf.atan(ar_gt) - tf.atan(ar_pred))
529 | alpha = ar_loss / (1 - iou + ar_loss + 0.000001)
530 | ciou_term = d + alpha * ar_loss
531 |
532 | return iou - ciou_term
533 |
534 |
535 | def compute_loss(pred, conv, label, bboxes, i=0, CLASSES=YOLO_COCO_CLASSES):
536 | NUM_CLASS = len(read_class_names(CLASSES))
537 | conv_shape = tf.shape(conv)
538 | batch_size = conv_shape[0]
539 | output_size = conv_shape[1]
540 | input_size = STRIDES[i] * output_size
541 | conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
542 |
543 | conv_raw_conf = conv[:, :, :, :, 4:5]
544 | conv_raw_prob = conv[:, :, :, :, 5:]
545 |
546 | pred_xywh = pred[:, :, :, :, 0:4]
547 | pred_conf = pred[:, :, :, :, 4:5]
548 |
549 | label_xywh = label[:, :, :, :, 0:4]
550 | respond_bbox = label[:, :, :, :, 4:5]
551 | label_prob = label[:, :, :, :, 5:]
552 |
553 | giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
554 | input_size = tf.cast(input_size, tf.float32)
555 |
556 | bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
557 | giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)
558 |
559 | iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
560 | # Find the value of IoU with the real box The largest prediction box
561 | max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
562 |
563 | # If the largest iou is less than the threshold, it is considered that the prediction box contains no objects, then the background box
564 | respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < YOLO_IOU_LOSS_THRESH, tf.float32 )
565 |
566 | conf_focal = tf.pow(respond_bbox - pred_conf, 2)
567 |
568 | # Calculate the loss of confidence
569 | # we hope that if the grid contains objects, then the network output prediction box has a confidence of 1 and 0 when there is no object.
570 | conf_loss = conf_focal * (
571 | respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
572 | +
573 | respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
574 | )
575 |
576 | prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
577 |
578 | giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
579 | conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
580 | prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
581 |
582 | return giou_loss, conf_loss, prob_loss
583 |
--------------------------------------------------------------------------------