├── .gitignore
├── Collect_training_data.py
├── IMAGES
    ├── city.jpg
    ├── city_pred.jpg
    ├── kite.jpg
    ├── kite_pred.jpg
    ├── mnist_test.jpg
    ├── street.jpg
    ├── street_pred.jpg
    ├── tensorboard.png
    ├── test.mp4
    └── tracking_results.gif
├── LICENSE
├── README.md
├── YOLOv3_colab_training.ipynb
├── checkpoints
    └── checkpoint
├── deep_sort
    ├── detection.py
    ├── generate_detections.py
    ├── iou_matching.py
    ├── kalman_filter.py
    ├── linear_assignment.py
    ├── nn_matching.py
    ├── preprocessing.py
    ├── test_tracking.gif
    ├── track.py
    └── tracker.py
├── detect_mnist.py
├── detection_custom.py
├── detection_demo.py
├── evaluate_mAP.py
├── mnist
    ├── make_data.py
    ├── mnist.names
    ├── mnist
    │   ├── test.zip
    │   └── train.zip
    └── show_image.py
├── model_data
    ├── coco
    │   ├── coco.names
    │   ├── train2017.txt
    │   └── val2017.txt
    └── mars-small128.pb
├── object_tracker.py
├── requirements.txt
├── tools
    ├── Convert_to_TRT.py
    ├── Convert_to_pb.py
    ├── Detection_to_XML.py
    ├── XML_to_YOLOv3.py
    └── oid_to_pascal_voc_xml.py
├── train.py
└── yolov3
    ├── __ init __.py
    ├── __pycache__
        ├── configs.cpython-36.pyc
        ├── dataset.cpython-36.pyc
        ├── utils.cpython-36.pyc
        └── yolov3.cpython-36.pyc
    ├── configs.py
    ├── dataset.py
    ├── utils.py
    ├── yolov3.py
    └── yolov4.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | model_data
3 | configs.py


--------------------------------------------------------------------------------
/Collect_training_data.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : Collect_training_data.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-09-27
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : YOLO detection to XML example script
  9 | #
 10 | #================================================================
 11 | import os
 12 | import subprocess
 13 | import time
 14 | from datetime import datetime
 15 | import cv2
 16 | import mss
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | from yolov3.utils import *
 20 | from yolov3.configs import *
 21 | from yolov3.yolov4 import read_class_names
 22 | from tools.Detection_to_XML import CreateXMLfile
 23 | import random
 24 | 
 25 | def draw_enemy(image, bboxes, CLASSES=YOLO_COCO_CLASSES, show_label=True, show_confidence = True, Text_colors=(255,255,0), rectangle_colors='', tracking=False):   
 26 |     NUM_CLASS = read_class_names(CLASSES)
 27 |     num_classes = len(NUM_CLASS)
 28 |     image_h, image_w, _ = image.shape
 29 |     hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
 30 |     colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
 31 |     colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
 32 | 
 33 |     random.seed(0)
 34 |     random.shuffle(colors)
 35 |     random.seed(None)
 36 | 
 37 |     detection_list = []
 38 | 
 39 |     for i, bbox in enumerate(bboxes):
 40 |         coor = np.array(bbox[:4], dtype=np.int32)
 41 |         score = bbox[4]
 42 |         class_ind = int(bbox[5])
 43 |         bbox_color = rectangle_colors if rectangle_colors != '' else colors[class_ind]
 44 |         bbox_thick = int(0.6 * (image_h + image_w) / 1000)
 45 |         if bbox_thick < 1: bbox_thick = 1
 46 |         fontScale = 0.75 * bbox_thick
 47 |         (x1, y1), (x2, y2) = (coor[0], coor[1]), (coor[2], coor[3])
 48 | 
 49 |         # put object rectangle
 50 |         cv2.rectangle(image, (x1, y1), (x2, y2), bbox_color, bbox_thick*2)
 51 | 
 52 |         x, y = int(x1+(x2-x1)/2), int(y1+(y2-y1)/2)
 53 | 
 54 |         if show_label:
 55 |             # get text label
 56 |             score_str = " {:.2f}".format(score) if show_confidence else ""
 57 | 
 58 |             if tracking: score_str = " "+str(score)
 59 | 
 60 |             label = "{}".format(NUM_CLASS[class_ind]) + score_str
 61 | 
 62 |             # get text size
 63 |             (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_COMPLEX_SMALL,
 64 |                                                                   fontScale, thickness=bbox_thick)
 65 |             # put filled text rectangle
 66 |             cv2.rectangle(image, (x1, y1), (x1 + text_width, y1 - text_height - baseline), bbox_color, thickness=cv2.FILLED)
 67 | 
 68 |             # put text above rectangle
 69 |             cv2.putText(image, label, (x1, y1-4), cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale, Text_colors, bbox_thick, lineType=cv2.LINE_AA)
 70 | 
 71 |     return image
 72 | 
 73 | def detect_enemy(Yolo, original_image, input_size=416, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''):
 74 |     image_data = image_preprocess(original_image, [input_size, input_size])
 75 |     image_data = image_data[np.newaxis, ...].astype(np.float32)
 76 | 
 77 |     if YOLO_FRAMEWORK == "tf":
 78 |         pred_bbox = Yolo.predict(image_data)
 79 | 
 80 |     elif YOLO_FRAMEWORK == "trt":
 81 |         batched_input = tf.constant(image_data)
 82 |         result = Yolo(batched_input)
 83 |         pred_bbox = []
 84 |         for key, value in result.items():
 85 |             value = value.numpy()
 86 |             pred_bbox.append(value)
 87 |         
 88 |     pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
 89 |     pred_bbox = tf.concat(pred_bbox, axis=0)
 90 |     
 91 |     bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
 92 |     bboxes = nms(bboxes, iou_threshold, method='nms')
 93 | 
 94 |     image = draw_enemy(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
 95 |         
 96 |     return image, bboxes
 97 | 
 98 | offset = 30
 99 | times = []
100 | sct = mss.mss()
101 | yolo = Load_Yolo_model()
102 | while True:
103 |     t1 = time.time()
104 |     img = np.array(sct.grab({"top": 87-offset, "left": 1920, "width": 1280, "height": 720, "mon": -1}))
105 |     img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
106 |     image, bboxes = detect_enemy(yolo, np.copy(img), input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0))
107 |     if len(bboxes) > 0:
108 |         CreateXMLfile("XML_Detections", str(int(time.time())), img, bboxes, read_class_names(TRAIN_CLASSES))
109 |         print("got it")
110 |         time.sleep(2)
111 |     
112 |     t2 = time.time()
113 |     times.append(t2-t1)
114 |     times = times[-20:]
115 |     ms = sum(times)/len(times)*1000
116 |     fps = 1000 / ms
117 |     print("FPS", fps)
118 |     
119 |     #cv2.imshow("Detection image", img)
120 |     #if cv2.waitKey(25) & 0xFF == ord("q"):
121 |         #cv2.destroyAllWindows()
122 |         #break
123 | 


--------------------------------------------------------------------------------
/IMAGES/city.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/city.jpg


--------------------------------------------------------------------------------
/IMAGES/city_pred.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/city_pred.jpg


--------------------------------------------------------------------------------
/IMAGES/kite.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/kite.jpg


--------------------------------------------------------------------------------
/IMAGES/kite_pred.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/kite_pred.jpg


--------------------------------------------------------------------------------
/IMAGES/mnist_test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/mnist_test.jpg


--------------------------------------------------------------------------------
/IMAGES/street.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/street.jpg


--------------------------------------------------------------------------------
/IMAGES/street_pred.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/street_pred.jpg


--------------------------------------------------------------------------------
/IMAGES/tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/tensorboard.png


--------------------------------------------------------------------------------
/IMAGES/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/test.mp4


--------------------------------------------------------------------------------
/IMAGES/tracking_results.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/IMAGES/tracking_results.gif


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 pythonlessons
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # TensorFlow-2.x-YOLOv3 and YOLOv4 tutorials
  2 | 
  3 | YOLOv3 and YOLOv4 implementation in TensorFlow 2.x, with support for training, transfer training, object tracking mAP and so on...
  4 | Code was tested with following specs:
  5 | - i7-7700k CPU and Nvidia 1080TI GPU
  6 | - OS Ubuntu 18.04
  7 | - CUDA 10.1
  8 | - cuDNN v7.6.5
  9 | - TensorRT-6.0.1.5
 10 | - Tensorflow-GPU 2.3.1
 11 | - Code was tested on Ubuntu and Windows 10 (TensorRT not supported officially)
 12 | 
 13 | ## Installation
 14 | First, clone or download this GitHub repository.
 15 | Install requirements and download pretrained weights:
 16 | ```
 17 | pip install -r ./requirements.txt
 18 | 
 19 | # yolov3
 20 | wget -P model_data https://pjreddie.com/media/files/yolov3.weights
 21 | 
 22 | # yolov3-tiny
 23 | wget -P model_data https://pjreddie.com/media/files/yolov3-tiny.weights
 24 | 
 25 | # yolov4
 26 | wget -P model_data https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v3_optimal/yolov4.weights
 27 | 
 28 | # yolov4-tiny
 29 | wget -P model_data https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights
 30 | ```
 31 | 
 32 | ## Quick start
 33 | Start with using pretrained weights to test predictions on both image and video:
 34 | ```
 35 | python detection_demo.py
 36 | ```
 37 | 
 38 | <p align="center">
 39 |     <img width="100%" src="IMAGES/city_pred.jpg" style="max-width:100%;"></a>
 40 | </p>
 41 | 
 42 | ## Quick training for custom mnist dataset
 43 | mnist folder contains mnist images, create training data:
 44 | ```
 45 | python mnist/make_data.py
 46 | ```
 47 | `./yolov3/configs.py` file is already configured for mnist training.
 48 | 
 49 | Now, you can train it and then evaluate your model
 50 | ```
 51 | python train.py
 52 | tensorboard --logdir=log
 53 | ```
 54 | Track training progress in Tensorboard and go to http://localhost:6006/:
 55 | <p align="center">
 56 |     <img width="100%" src="IMAGES/tensorboard.png" style="max-width:100%;"></a>
 57 | </p>
 58 | 
 59 | Test detection with `detect_mnist.py` script:
 60 | ```
 61 | python detect_mnist.py
 62 | ```
 63 | Results:
 64 | <p align="center">
 65 |     <img width="40%" src="IMAGES/mnist_test.jpg" style="max-width:40%;"></a>
 66 | </p>
 67 | 
 68 | ## Custom YOLOv3 & YOLOv4 object detection training
 69 | Custom training required to prepare dataset first, how to prepare dataset and train custom model you can read in following link:<br>
 70 | https://pylessons.com/YOLOv3-TF2-custrom-train/<br>
 71 | More about YOLOv4 training you can read [on this link](https://pylessons.com/YOLOv4-TF2-training/). I didn’t have time to implement all YOLOv4 Bag-Of-Freebies to improve the training process… Maybe later I’ll find time to do that, but now I leave it as it is. I recommended to use [Alex's Darknet](https://github.com/AlexeyAB/darknet) to train your custom model, if you need maximum performance, otherwise, you can use my implementation.
 72 | 
 73 | ## Google Colab Custom Yolo v3 training
 74 | To learn more about Google Colab Free gpu training, visit my [text version tutorial](https://pylessons.com/YOLOv3-TF2-GoogleColab/)
 75 | 
 76 | ## Yolo v3 Tiny train and detection
 77 | To get detailed instructions how to use Yolov3-Tiny, follow my text version tutorial [YOLOv3-Tiny support](https://pylessons.com/YOLOv3-TF2-Tiny/). Short instructions:
 78 | - Get YOLOv3-Tiny weights: ```wget -P model_data https://pjreddie.com/media/files/yolov3-tiny.weights```
 79 | - From `yolov3/configs.py` change `TRAIN_YOLO_TINY` from `False` to `True`
 80 | - Run `detection_demo.py` script.
 81 | 
 82 | ## Yolo v3 Object tracking
 83 | To learn more about Object tracking with Deep SORT, visit [Following link](https://pylessons.com/YOLOv3-TF2-DeepSort/).
 84 | Quick test:
 85 | - Clone this repository;
 86 | - Make sure object detection works for you;
 87 | - Run object_tracking.py script
 88 | <p align="center">
 89 |     <img src="IMAGES/tracking_results.gif"></a>
 90 | </p>
 91 | 
 92 | ## YOLOv3 vs YOLOv4 comparison on 1080TI:
 93 | 
 94 | YOLO FPS on COCO 2017 Dataset:
 95 | | Detection    | 320x320 | 416x416 | 512x512 |
 96 | |--------------|---------|---------|---------|
 97 | | YoloV3 FPS   | 24.38   | 20.94   | 18.57   |
 98 | | YoloV4 FPS   | 22.15   | 18.69   | 16.50   |
 99 | 
100 | TensorRT FPS on COCO 2017 Dataset:
101 | | Detection       | 320x320 | 416x416 | 512x512 | 608x608 |
102 | |-----------------|---------|---------|---------|---------|
103 | | YoloV4 FP32 FPS | 31.23   | 27.30   | 22.63   | 18.17   |
104 | | YoloV4 FP16 FPS | 30.33   | 25.44   | 21.94   | 17.99   |
105 | | YoloV4 INT8 FPS | 85.18   | 62.02   | 47.50   | 37.32   |
106 | | YoloV3 INT8 FPS | 84.65   | 52.72   | 38.22   | 28.75   |
107 | 
108 | mAP on COCO 2017 Dataset:
109 | | Detection        | 320x320 | 416x416 | 512x512 |
110 | |------------------|---------|---------|---------|
111 | | YoloV3 mAP50     | 49.85   | 55.31   | 57.48   |         
112 | | YoloV4 mAP50     | 48.58   | 56.92   | 61.71   |         
113 | 
114 | TensorRT mAP on COCO 2017 Dataset:
115 | | Detection         | 320x320 | 416x416 | 512x512 | 608x608 |
116 | |-------------------|---------|---------|---------|---------|
117 | | YoloV4 FP32 mAP50 | 48.58   | 56.92   | 61.71   | 63.92   |
118 | | YoloV4 FP16 mAP50 | 48.57   | 56.92   | 61.69   | 63.92   |
119 | | YoloV4 INT8 mAP50 | 40.61   | 48.36   | 52.84   | 54.53   |
120 | | YoloV3 INT8 mAP50 | 44.19   | 48.64   | 50.10   | 50.69   |
121 | 
122 | ## Converting YOLO to TensorRT
123 | I will give two examples, both will be for YOLOv4 model,quantize_mode=INT8 and model input size will be 608. Detailed tutorial is on this [link](https://pylessons.com/YOLOv4-TF2-TensorRT/).
124 | ### Default weights from COCO dataset:
125 | - Download weights from links above;
126 | - In `configs.py` script choose your `YOLO_TYPE`;
127 | - In `configs.py` script set `YOLO_INPUT_SIZE = 608`;
128 | - In `configs.py` script set `YOLO_FRAMEWORK = "trt"`;
129 | - From main directory in terminal type `python tools/Convert_to_pb.py`;
130 | - From main directory in terminal type `python tools/Convert_to_TRT.py`;
131 | - In `configs.py` script set `YOLO_CUSTOM_WEIGHTS = f'checkpoints/{YOLO_TYPE}-trt-{YOLO_TRT_QUANTIZE_MODE}–{YOLO_INPUT_SIZE}'`;
132 | - Now you can run `detection_demo.py`, best to test with `detect_video` function.
133 | 
134 | ### Custom trained YOLO weights:
135 | - Download weights from links above;
136 | - In `configs.py` script choose your `YOLO_TYPE`;
137 | - In `configs.py` script set `YOLO_INPUT_SIZE = 608`;
138 | - Train custom YOLO model with instructions above;
139 | - In `configs.py` script set `YOLO_CUSTOM_WEIGHTS = f"{YOLO_TYPE}_custom"`;
140 | - In `configs.py` script make sure that  `TRAIN_CLASSES` is with your custom classes text file;
141 | - From main directory in terminal type `python tools/Convert_to_pb.py`;
142 | - From main directory in terminal type `python tools/Convert_to_TRT.py`;
143 | - In `configs.py` script set `YOLO_FRAMEWORK = "trt"`;
144 | - In `configs.py` script set `YOLO_CUSTOM_WEIGHTS = f'checkpoints/{YOLO_TYPE}-trt-{YOLO_TRT_QUANTIZE_MODE}–{YOLO_INPUT_SIZE}'`;
145 | - Now you can run `detection_custom.py`, to test custom trained and converted TensorRT model.
146 | 
147 | What is done:
148 | --------------------
149 | - [x] Detection with original weights [Tutorial link](https://pylessons.com/YOLOv3-TF2-introduction/)
150 | - [x] Mnist detection training [Tutorial link](https://pylessons.com/YOLOv3-TF2-mnist/)
151 | - [x] Custom detection training [Tutorial link1](https://pylessons.com/YOLOv3-TF2-custrom-train/), [link2](https://pylessons.com/YOLOv3-TF2-custrom-images/)
152 | - [x] Google Colab training [Tutorial link](https://pylessons.com/YOLOv3-TF2-GoogleColab/)
153 | - [x] YOLOv3-Tiny support [Tutorial link](https://pylessons.com/YOLOv3-TF2-Tiny/)
154 | - [X] Object tracking [Tutorial link](https://pylessons.com/YOLOv3-TF2-DeepSort/)
155 | - [X] Mean Average Precision (mAP) [Tutorial link](https://pylessons.com/YOLOv3-TF2-mAP/)
156 | - [X] Yolo v3 on Raspberry Pi [Tutorial link](https://pylessons.com/YOLOv3-TF2-RaspberryPi/)
157 | - [X] YOLOv4 and YOLOv4-tiny detection [Tutorial link](https://pylessons.com/YOLOv4-TF2-introduction/)
158 | - [X] YOLOv4 and YOLOv4-tiny detection training (Not fully) [Tutorial link](https://pylessons.com/YOLOv4-TF2-training/)
159 | - [X] Convert to TensorRT model [Tutorial link](https://pylessons.com/YOLOv4-TF2-TensorRT/)
160 | - [X] Add multiprocessing after detection (drawing bbox) [Tutorial link](https://pylessons.com/YOLOv4-TF2-multiprocessing/)
161 | - [X] Generate YOLO Object Detection training data from its own results [Tutorial link](https://pylessons.com/YOLOv4-TF2-CreateXML/)
162 | - [X] Counter-strike Global Offensive realtime YOLOv4 Object Detection aimbot [Tutorial link](https://pylessons.com/YOLOv4-TF2-CSGO-aimbot/)
163 | 
164 | To be continued... (not anytime soon)
165 | --------------------
166 | - [ ] Converting to TensorFlow Lite
167 | - [ ] YOLO on Android (Leaving it for future, will need to convert everythin to java... not ready for this)
168 | - [ ] Generating anchors
169 | - [ ] YOLACT: Real-time Instance Segmentation
170 | - [ ] Model pruning (Pruning is a technique in deep learning that aids in the development of smaller and more efficient neural networks. It's a model optimization technique that involves eliminating unnecessary values in the weight tensor.)
171 | 


--------------------------------------------------------------------------------
/checkpoints/checkpoint:
--------------------------------------------------------------------------------
1 | model_checkpoint_path: "yolov3_custom_2"
2 | all_model_checkpoint_paths: "yolov3_custom_2"
3 | 


--------------------------------------------------------------------------------
/deep_sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     class_name : ndarray
25 |         Detector class.
26 |     feature : ndarray | NoneType
27 |         A feature vector that describes the object contained in this image.
28 | 
29 |     """
30 | 
31 |     def __init__(self, tlwh, confidence, class_name, feature):
32 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
33 |         self.confidence = float(confidence)
34 |         self.class_name = class_name
35 |         self.feature = np.asarray(feature, dtype=np.float32)
36 | 
37 |     def get_class(self):
38 |         return self.class_name
39 | 
40 |     def to_tlbr(self):
41 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
42 |         `(top left, bottom right)`.
43 |         """
44 |         ret = self.tlwh.copy()
45 |         ret[2:] += ret[:2]
46 |         return ret
47 | 
48 |     def to_xyah(self):
49 |         """Convert bounding box to format `(center x, center y, aspect ratio,
50 |         height)`, where the aspect ratio is `width / height`.
51 |         """
52 |         ret = self.tlwh.copy()
53 |         ret[:2] += ret[2:] / 2
54 |         ret[2] /= ret[3]
55 |         return ret
56 | 


--------------------------------------------------------------------------------
/deep_sort/generate_detections.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import os
  3 | import errno
  4 | import argparse
  5 | import numpy as np
  6 | import cv2
  7 | import tensorflow.compat.v1 as tf
  8 |     
  9 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
 10 | if len(physical_devices) > 0:
 11 |     tf.config.experimental.set_memory_growth(physical_devices[0], True)
 12 | 
 13 | def _run_in_batches(f, data_dict, out, batch_size):
 14 |     data_len = len(out)
 15 |     num_batches = int(data_len / batch_size)
 16 | 
 17 |     s, e = 0, 0
 18 |     for i in range(num_batches):
 19 |         s, e = i * batch_size, (i + 1) * batch_size
 20 |         batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
 21 |         out[s:e] = f(batch_data_dict)
 22 |     if e < len(out):
 23 |         batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
 24 |         out[e:] = f(batch_data_dict)
 25 | 
 26 | 
 27 | def extract_image_patch(image, bbox, patch_shape):
 28 |     """Extract image patch from bounding box.
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     image : ndarray
 33 |         The full image.
 34 |     bbox : array_like
 35 |         The bounding box in format (x, y, width, height).
 36 |     patch_shape : Optional[array_like]
 37 |         This parameter can be used to enforce a desired patch shape
 38 |         (height, width). First, the `bbox` is adapted to the aspect ratio
 39 |         of the patch shape, then it is clipped at the image boundaries.
 40 |         If None, the shape is computed from :arg:`bbox`.
 41 | 
 42 |     Returns
 43 |     -------
 44 |     ndarray | NoneType
 45 |         An image patch showing the :arg:`bbox`, optionally reshaped to
 46 |         :arg:`patch_shape`.
 47 |         Returns None if the bounding box is empty or fully outside of the image
 48 |         boundaries.
 49 | 
 50 |     """
 51 |     bbox = np.array(bbox)
 52 |     if patch_shape is not None:
 53 |         # correct aspect ratio to patch shape
 54 |         target_aspect = float(patch_shape[1]) / patch_shape[0]
 55 |         new_width = target_aspect * bbox[3]
 56 |         bbox[0] -= (new_width - bbox[2]) / 2
 57 |         bbox[2] = new_width
 58 | 
 59 |     # convert to top left, bottom right
 60 |     bbox[2:] += bbox[:2]
 61 |     bbox = bbox.astype(np.int)
 62 | 
 63 |     # clip at image boundaries
 64 |     bbox[:2] = np.maximum(0, bbox[:2])
 65 |     bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
 66 |     if np.any(bbox[:2] >= bbox[2:]):
 67 |         return None
 68 |     sx, sy, ex, ey = bbox
 69 |     image = image[sy:ey, sx:ex]
 70 |     image = cv2.resize(image, tuple(patch_shape[::-1]))
 71 |     return image
 72 | 
 73 | 
 74 | class ImageEncoder(object):
 75 | 
 76 |     def __init__(self, checkpoint_filename, input_name="images", output_name="features"):
 77 |         self.session = tf.Session()
 78 |         with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
 79 |             graph_def = tf.GraphDef()
 80 |             graph_def.ParseFromString(file_handle.read())
 81 |         tf.import_graph_def(graph_def)
 82 |         try:
 83 |             self.input_var = tf.get_default_graph().get_tensor_by_name(input_name)
 84 |             self.output_var = tf.get_default_graph().get_tensor_by_name(output_name)
 85 |         except KeyError:
 86 |             layers = [i.name for i in tf.get_default_graph().get_operations()]
 87 |             self.input_var = tf.get_default_graph().get_tensor_by_name(layers[0]+':0')
 88 |             self.output_var = tf.get_default_graph().get_tensor_by_name(layers[-1]+':0')            
 89 | 
 90 |         assert len(self.output_var.get_shape()) == 2
 91 |         assert len(self.input_var.get_shape()) == 4
 92 |         self.feature_dim = self.output_var.get_shape().as_list()[-1]
 93 |         self.image_shape = self.input_var.get_shape().as_list()[1:]
 94 | 
 95 |     def __call__(self, data_x, batch_size=32):
 96 |         out = np.zeros((len(data_x), self.feature_dim), np.float32)
 97 |         _run_in_batches(
 98 |             lambda x: self.session.run(self.output_var, feed_dict=x),
 99 |             {self.input_var: data_x}, out, batch_size)
100 |         return out
101 | 
102 | 
103 | def create_box_encoder(model_filename, input_name="images:0", output_name="features:0", batch_size=32):
104 |     image_encoder = ImageEncoder(model_filename, input_name, output_name)
105 |     image_shape = image_encoder.image_shape
106 | 
107 |     def encoder(image, boxes):
108 |         image_patches = []
109 |         for box in boxes:
110 |             patch = extract_image_patch(image, box, image_shape[:2])
111 |             if patch is None:
112 |                 print("WARNING: Failed to extract image patch: %s." % str(box))
113 |                 patch = np.random.uniform(0., 255., image_shape).astype(np.uint8)
114 |             image_patches.append(patch)
115 |         image_patches = np.asarray(image_patches)
116 |         return image_encoder(image_patches, batch_size)
117 | 
118 |     return encoder
119 | 
120 | 
121 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
122 |     """Generate detections with features.
123 | 
124 |     Parameters
125 |     ----------
126 |     encoder : Callable[image, ndarray] -> ndarray
127 |         The encoder function takes as input a BGR color image and a matrix of
128 |         bounding boxes in format `(x, y, w, h)` and returns a matrix of
129 |         corresponding feature vectors.
130 |     mot_dir : str
131 |         Path to the MOTChallenge directory (can be either train or test).
132 |     output_dir
133 |         Path to the output directory. Will be created if it does not exist.
134 |     detection_dir
135 |         Path to custom detections. The directory structure should be the default
136 |         MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
137 |         standard MOTChallenge detections.
138 | 
139 |     """
140 |     if detection_dir is None:
141 |         detection_dir = mot_dir
142 |     try:
143 |         os.makedirs(output_dir)
144 |     except OSError as exception:
145 |         if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
146 |             pass
147 |         else:
148 |             raise ValueError(
149 |                 "Failed to created output directory '%s'" % output_dir)
150 | 
151 |     for sequence in os.listdir(mot_dir):
152 |         print("Processing %s" % sequence)
153 |         sequence_dir = os.path.join(mot_dir, sequence)
154 | 
155 |         image_dir = os.path.join(sequence_dir, "img1")
156 |         image_filenames = {
157 |             int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
158 |             for f in os.listdir(image_dir)}
159 | 
160 |         detection_file = os.path.join(
161 |             detection_dir, sequence, "det/det.txt")
162 |         detections_in = np.loadtxt(detection_file, delimiter=',')
163 |         detections_out = []
164 | 
165 |         frame_indices = detections_in[:, 0].astype(np.int)
166 |         min_frame_idx = frame_indices.astype(np.int).min()
167 |         max_frame_idx = frame_indices.astype(np.int).max()
168 |         for frame_idx in range(min_frame_idx, max_frame_idx + 1):
169 |             print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
170 |             mask = frame_indices == frame_idx
171 |             rows = detections_in[mask]
172 | 
173 |             if frame_idx not in image_filenames:
174 |                 print("WARNING could not find image for frame %d" % frame_idx)
175 |                 continue
176 |             bgr_image = cv2.imread(
177 |                 image_filenames[frame_idx], cv2.IMREAD_COLOR)
178 |             features = encoder(bgr_image, rows[:, 2:6].copy())
179 |             detections_out += [np.r_[(row, feature)] for row, feature
180 |                                in zip(rows, features)]
181 | 
182 |         output_filename = os.path.join(output_dir, "%s.npy" % sequence)
183 |         np.save(
184 |             output_filename, np.asarray(detections_out), allow_pickle=False)
185 | 
186 | 
187 | def parse_args():
188 |     """Parse command line arguments.
189 |     """
190 |     parser = argparse.ArgumentParser(description="Re-ID feature extractor")
191 |     parser.add_argument(
192 |         "--model",
193 |         default="resources/networks/mars-small128.pb",
194 |         help="Path to freezed inference graph protobuf.")
195 |     parser.add_argument(
196 |         "--mot_dir", help="Path to MOTChallenge directory (train or test)",
197 |         required=True)
198 |     parser.add_argument(
199 |         "--detection_dir", help="Path to custom detections. Defaults to "
200 |         "standard MOT detections Directory structure should be the default "
201 |         "MOTChallenge structure: [sequence]/det/det.txt", default=None)
202 |     parser.add_argument(
203 |         "--output_dir", help="Output directory. Will be created if it does not"
204 |         " exist.", default="detections")
205 |     return parser.parse_args()
206 | 
207 | 
208 | def main():
209 |     args = parse_args()
210 |     encoder = create_box_encoder(args.model, batch_size=32)
211 |     generate_detections(encoder, args.mot_dir, args.output_dir,
212 |                         args.detection_dir)
213 | 
214 | 
215 | if __name__ == "__main__":
216 |     main()
217 | 


--------------------------------------------------------------------------------
/deep_sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from . import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 | 
10 |     Parameters
11 |     ----------
12 |     bbox : ndarray
13 |         A bounding box in format `(top left x, top left y, width, height)`.
14 |     candidates : ndarray
15 |         A matrix of candidate bounding boxes (one per row) in the same format
16 |         as `bbox`.
17 | 
18 |     Returns
19 |     -------
20 |     ndarray
21 |         The intersection over union in [0, 1] between the `bbox` and each
22 |         candidate. A higher score means a larger fraction of the `bbox` is
23 |         occluded by the candidate.
24 | 
25 |     """
26 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 |     candidates_tl = candidates[:, :2]
28 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
29 | 
30 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 |     wh = np.maximum(0., br - tl)
35 | 
36 |     area_intersection = wh.prod(axis=1)
37 |     area_bbox = bbox[2:].prod()
38 |     area_candidates = candidates[:, 2:].prod(axis=1)
39 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
40 | 
41 | 
42 | def iou_cost(tracks, detections, track_indices=None,
43 |              detection_indices=None):
44 |     """An intersection over union distance metric.
45 | 
46 |     Parameters
47 |     ----------
48 |     tracks : List[deep_sort.track.Track]
49 |         A list of tracks.
50 |     detections : List[deep_sort.detection.Detection]
51 |         A list of detections.
52 |     track_indices : Optional[List[int]]
53 |         A list of indices to tracks that should be matched. Defaults to
54 |         all `tracks`.
55 |     detection_indices : Optional[List[int]]
56 |         A list of indices to detections that should be matched. Defaults
57 |         to all `detections`.
58 | 
59 |     Returns
60 |     -------
61 |     ndarray
62 |         Returns a cost matrix of shape
63 |         len(track_indices), len(detection_indices) where entry (i, j) is
64 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 | 
66 |     """
67 |     if track_indices is None:
68 |         track_indices = np.arange(len(tracks))
69 |     if detection_indices is None:
70 |         detection_indices = np.arange(len(detections))
71 | 
72 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 |     for row, track_idx in enumerate(track_indices):
74 |         if tracks[track_idx].time_since_update > 1:
75 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
76 |             continue
77 | 
78 |         bbox = tracks[track_idx].to_tlwh()
79 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 |     return cost_matrix
82 | 


--------------------------------------------------------------------------------
/deep_sort/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         mean = np.dot(self._motion_mat, mean)
120 |         covariance = np.linalg.multi_dot((
121 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 | 
123 |         return mean, covariance
124 | 
125 |     def project(self, mean, covariance):
126 |         """Project state distribution to measurement space.
127 | 
128 |         Parameters
129 |         ----------
130 |         mean : ndarray
131 |             The state's mean vector (8 dimensional array).
132 |         covariance : ndarray
133 |             The state's covariance matrix (8x8 dimensional).
134 | 
135 |         Returns
136 |         -------
137 |         (ndarray, ndarray)
138 |             Returns the projected mean and covariance matrix of the given state
139 |             estimate.
140 | 
141 |         """
142 |         std = [
143 |             self._std_weight_position * mean[3],
144 |             self._std_weight_position * mean[3],
145 |             1e-1,
146 |             self._std_weight_position * mean[3]]
147 |         innovation_cov = np.diag(np.square(std))
148 | 
149 |         mean = np.dot(self._update_mat, mean)
150 |         covariance = np.linalg.multi_dot((
151 |             self._update_mat, covariance, self._update_mat.T))
152 |         return mean, covariance + innovation_cov
153 | 
154 |     def update(self, mean, covariance, measurement):
155 |         """Run Kalman filter correction step.
156 | 
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The predicted state's mean vector (8 dimensional).
161 |         covariance : ndarray
162 |             The state's covariance matrix (8x8 dimensional).
163 |         measurement : ndarray
164 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 |             is the center position, a the aspect ratio, and h the height of the
166 |             bounding box.
167 | 
168 |         Returns
169 |         -------
170 |         (ndarray, ndarray)
171 |             Returns the measurement-corrected state distribution.
172 | 
173 |         """
174 |         projected_mean, projected_cov = self.project(mean, covariance)
175 | 
176 |         chol_factor, lower = scipy.linalg.cho_factor(
177 |             projected_cov, lower=True, check_finite=False)
178 |         kalman_gain = scipy.linalg.cho_solve(
179 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 |             check_finite=False).T
181 |         innovation = measurement - projected_mean
182 | 
183 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
184 |         new_covariance = covariance - np.linalg.multi_dot((
185 |             kalman_gain, projected_cov, kalman_gain.T))
186 |         return new_mean, new_covariance
187 | 
188 |     def gating_distance(self, mean, covariance, measurements,
189 |                         only_position=False):
190 |         """Compute gating distance between state distribution and measurements.
191 | 
192 |         A suitable distance threshold can be obtained from `chi2inv95`. If
193 |         `only_position` is False, the chi-square distribution has 4 degrees of
194 |         freedom, otherwise 2.
195 | 
196 |         Parameters
197 |         ----------
198 |         mean : ndarray
199 |             Mean vector over the state distribution (8 dimensional).
200 |         covariance : ndarray
201 |             Covariance of the state distribution (8x8 dimensional).
202 |         measurements : ndarray
203 |             An Nx4 dimensional matrix of N measurements, each in
204 |             format (x, y, a, h) where (x, y) is the bounding box center
205 |             position, a the aspect ratio, and h the height.
206 |         only_position : Optional[bool]
207 |             If True, distance computation is done with respect to the bounding
208 |             box center position only.
209 | 
210 |         Returns
211 |         -------
212 |         ndarray
213 |             Returns an array of length N, where the i-th element contains the
214 |             squared Mahalanobis distance between (mean, covariance) and
215 |             `measurements[i]`.
216 | 
217 |         """
218 |         mean, covariance = self.project(mean, covariance)
219 |         if only_position:
220 |             mean, covariance = mean[:2], covariance[:2, :2]
221 |             measurements = measurements[:, :2]
222 | 
223 |         cholesky_factor = np.linalg.cholesky(covariance)
224 |         d = measurements - mean
225 |         z = scipy.linalg.solve_triangular(
226 |             cholesky_factor, d.T, lower=True, check_finite=False,
227 |             overwrite_b=True)
228 |         squared_maha = np.sum(z * z, axis=0)
229 |         return squared_maha
230 | 


--------------------------------------------------------------------------------
/deep_sort/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from scipy.optimize import linear_sum_assignment
  5 | from . import kalman_filter
  6 | 
  7 | 
  8 | INFTY_COST = 1e+5
  9 | 
 10 | 
 11 | def min_cost_matching(
 12 |         distance_metric, max_distance, tracks, detections, track_indices=None,
 13 |         detection_indices=None):
 14 |     """Solve linear assignment problem.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 19 |         The distance metric is given a list of tracks and detections as well as
 20 |         a list of N track indices and M detection indices. The metric should
 21 |         return the NxM dimensional cost matrix, where element (i, j) is the
 22 |         association cost between the i-th track in the given track indices and
 23 |         the j-th detection in the given detection_indices.
 24 |     max_distance : float
 25 |         Gating threshold. Associations with cost larger than this value are
 26 |         disregarded.
 27 |     tracks : List[track.Track]
 28 |         A list of predicted tracks at the current time step.
 29 |     detections : List[detection.Detection]
 30 |         A list of detections at the current time step.
 31 |     track_indices : List[int]
 32 |         List of track indices that maps rows in `cost_matrix` to tracks in
 33 |         `tracks` (see description above).
 34 |     detection_indices : List[int]
 35 |         List of detection indices that maps columns in `cost_matrix` to
 36 |         detections in `detections` (see description above).
 37 | 
 38 |     Returns
 39 |     -------
 40 |     (List[(int, int)], List[int], List[int])
 41 |         Returns a tuple with the following three entries:
 42 |         * A list of matched track and detection indices.
 43 |         * A list of unmatched track indices.
 44 |         * A list of unmatched detection indices.
 45 | 
 46 |     """
 47 |     if track_indices is None:
 48 |         track_indices = np.arange(len(tracks))
 49 |     if detection_indices is None:
 50 |         detection_indices = np.arange(len(detections))
 51 | 
 52 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 53 |         return [], track_indices, detection_indices  # Nothing to match.
 54 | 
 55 |     cost_matrix = distance_metric(
 56 |         tracks, detections, track_indices, detection_indices)
 57 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 58 |     indices = linear_sum_assignment(cost_matrix)
 59 |     indices = np.asarray(indices)
 60 |     indices = np.transpose(indices)
 61 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 62 |     for col, detection_idx in enumerate(detection_indices):
 63 |         if col not in indices[:, 1]:
 64 |             unmatched_detections.append(detection_idx)
 65 |     for row, track_idx in enumerate(track_indices):
 66 |         if row not in indices[:, 0]:
 67 |             unmatched_tracks.append(track_idx)
 68 |     for row, col in indices:
 69 |         track_idx = track_indices[row]
 70 |         detection_idx = detection_indices[col]
 71 |         if cost_matrix[row, col] > max_distance:
 72 |             unmatched_tracks.append(track_idx)
 73 |             unmatched_detections.append(detection_idx)
 74 |         else:
 75 |             matches.append((track_idx, detection_idx))
 76 |     return matches, unmatched_tracks, unmatched_detections
 77 | 
 78 | 
 79 | def matching_cascade(
 80 |         distance_metric, max_distance, cascade_depth, tracks, detections,
 81 |         track_indices=None, detection_indices=None):
 82 |     """Run matching cascade.
 83 | 
 84 |     Parameters
 85 |     ----------
 86 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 87 |         The distance metric is given a list of tracks and detections as well as
 88 |         a list of N track indices and M detection indices. The metric should
 89 |         return the NxM dimensional cost matrix, where element (i, j) is the
 90 |         association cost between the i-th track in the given track indices and
 91 |         the j-th detection in the given detection indices.
 92 |     max_distance : float
 93 |         Gating threshold. Associations with cost larger than this value are
 94 |         disregarded.
 95 |     cascade_depth: int
 96 |         The cascade depth, should be se to the maximum track age.
 97 |     tracks : List[track.Track]
 98 |         A list of predicted tracks at the current time step.
 99 |     detections : List[detection.Detection]
100 |         A list of detections at the current time step.
101 |     track_indices : Optional[List[int]]
102 |         List of track indices that maps rows in `cost_matrix` to tracks in
103 |         `tracks` (see description above). Defaults to all tracks.
104 |     detection_indices : Optional[List[int]]
105 |         List of detection indices that maps columns in `cost_matrix` to
106 |         detections in `detections` (see description above). Defaults to all
107 |         detections.
108 | 
109 |     Returns
110 |     -------
111 |     (List[(int, int)], List[int], List[int])
112 |         Returns a tuple with the following three entries:
113 |         * A list of matched track and detection indices.
114 |         * A list of unmatched track indices.
115 |         * A list of unmatched detection indices.
116 | 
117 |     """
118 |     if track_indices is None:
119 |         track_indices = list(range(len(tracks)))
120 |     if detection_indices is None:
121 |         detection_indices = list(range(len(detections)))
122 | 
123 |     unmatched_detections = detection_indices
124 |     matches = []
125 |     for level in range(cascade_depth):
126 |         if len(unmatched_detections) == 0:  # No detections left
127 |             break
128 | 
129 |         track_indices_l = [
130 |             k for k in track_indices
131 |             if tracks[k].time_since_update == 1 + level
132 |         ]
133 |         if len(track_indices_l) == 0:  # Nothing to match at this level
134 |             continue
135 | 
136 |         matches_l, _, unmatched_detections = \
137 |             min_cost_matching(
138 |                 distance_metric, max_distance, tracks, detections,
139 |                 track_indices_l, unmatched_detections)
140 |         matches += matches_l
141 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
142 |     return matches, unmatched_tracks, unmatched_detections
143 | 
144 | 
145 | def gate_cost_matrix(
146 |         kf, cost_matrix, tracks, detections, track_indices, detection_indices,
147 |         gated_cost=INFTY_COST, only_position=False):
148 |     """Invalidate infeasible entries in cost matrix based on the state
149 |     distributions obtained by Kalman filtering.
150 | 
151 |     Parameters
152 |     ----------
153 |     kf : The Kalman filter.
154 |     cost_matrix : ndarray
155 |         The NxM dimensional cost matrix, where N is the number of track indices
156 |         and M is the number of detection indices, such that entry (i, j) is the
157 |         association cost between `tracks[track_indices[i]]` and
158 |         `detections[detection_indices[j]]`.
159 |     tracks : List[track.Track]
160 |         A list of predicted tracks at the current time step.
161 |     detections : List[detection.Detection]
162 |         A list of detections at the current time step.
163 |     track_indices : List[int]
164 |         List of track indices that maps rows in `cost_matrix` to tracks in
165 |         `tracks` (see description above).
166 |     detection_indices : List[int]
167 |         List of detection indices that maps columns in `cost_matrix` to
168 |         detections in `detections` (see description above).
169 |     gated_cost : Optional[float]
170 |         Entries in the cost matrix corresponding to infeasible associations are
171 |         set this value. Defaults to a very large value.
172 |     only_position : Optional[bool]
173 |         If True, only the x, y position of the state distribution is considered
174 |         during gating. Defaults to False.
175 | 
176 |     Returns
177 |     -------
178 |     ndarray
179 |         Returns the modified cost matrix.
180 | 
181 |     """
182 |     gating_dim = 2 if only_position else 4
183 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
184 |     measurements = np.asarray(
185 |         [detections[i].to_xyah() for i in detection_indices])
186 |     for row, track_idx in enumerate(track_indices):
187 |         track = tracks[track_idx]
188 |         gating_distance = kf.gating_distance(
189 |             track.mean, track.covariance, measurements, only_position)
190 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
191 |     return cost_matrix
192 | 


--------------------------------------------------------------------------------
/deep_sort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | 
  4 | 
  5 | def _pdist(a, b):
  6 |     """Compute pair-wise squared distance between points in `a` and `b`.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     a : array_like
 11 |         An NxM matrix of N samples of dimensionality M.
 12 |     b : array_like
 13 |         An LxM matrix of L samples of dimensionality M.
 14 | 
 15 |     Returns
 16 |     -------
 17 |     ndarray
 18 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 19 |         contains the squared distance between `a[i]` and `b[j]`.
 20 | 
 21 |     """
 22 |     a, b = np.asarray(a), np.asarray(b)
 23 |     if len(a) == 0 or len(b) == 0:
 24 |         return np.zeros((len(a), len(b)))
 25 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
 26 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 27 |     r2 = np.clip(r2, 0., float(np.inf))
 28 |     return r2
 29 | 
 30 | 
 31 | def _cosine_distance(a, b, data_is_normalized=False):
 32 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     a : array_like
 37 |         An NxM matrix of N samples of dimensionality M.
 38 |     b : array_like
 39 |         An LxM matrix of L samples of dimensionality M.
 40 |     data_is_normalized : Optional[bool]
 41 |         If True, assumes rows in a and b are unit length vectors.
 42 |         Otherwise, a and b are explicitly normalized to lenght 1.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     ndarray
 47 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 48 |         contains the squared distance between `a[i]` and `b[j]`.
 49 | 
 50 |     """
 51 |     if not data_is_normalized:
 52 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 53 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 54 |     return 1. - np.dot(a, b.T)
 55 | 
 56 | 
 57 | def _nn_euclidean_distance(x, y):
 58 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     x : ndarray
 63 |         A matrix of N row-vectors (sample points).
 64 |     y : ndarray
 65 |         A matrix of M row-vectors (query points).
 66 | 
 67 |     Returns
 68 |     -------
 69 |     ndarray
 70 |         A vector of length M that contains for each entry in `y` the
 71 |         smallest Euclidean distance to a sample in `x`.
 72 | 
 73 |     """
 74 |     distances = _pdist(x, y)
 75 |     return np.maximum(0.0, distances.min(axis=0))
 76 | 
 77 | 
 78 | def _nn_cosine_distance(x, y):
 79 |     """ Helper function for nearest neighbor distance metric (cosine).
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     x : ndarray
 84 |         A matrix of N row-vectors (sample points).
 85 |     y : ndarray
 86 |         A matrix of M row-vectors (query points).
 87 | 
 88 |     Returns
 89 |     -------
 90 |     ndarray
 91 |         A vector of length M that contains for each entry in `y` the
 92 |         smallest cosine distance to a sample in `x`.
 93 | 
 94 |     """
 95 |     distances = _cosine_distance(x, y)
 96 |     return distances.min(axis=0)
 97 | 
 98 | 
 99 | class NearestNeighborDistanceMetric(object):
100 |     """
101 |     A nearest neighbor distance metric that, for each target, returns
102 |     the closest distance to any sample that has been observed so far.
103 | 
104 |     Parameters
105 |     ----------
106 |     metric : str
107 |         Either "euclidean" or "cosine".
108 |     matching_threshold: float
109 |         The matching threshold. Samples with larger distance are considered an
110 |         invalid match.
111 |     budget : Optional[int]
112 |         If not None, fix samples per class to at most this number. Removes
113 |         the oldest samples when the budget is reached.
114 | 
115 |     Attributes
116 |     ----------
117 |     samples : Dict[int -> List[ndarray]]
118 |         A dictionary that maps from target identities to the list of samples
119 |         that have been observed so far.
120 | 
121 |     """
122 | 
123 |     def __init__(self, metric, matching_threshold, budget=None):
124 | 
125 | 
126 |         if metric == "euclidean":
127 |             self._metric = _nn_euclidean_distance
128 |         elif metric == "cosine":
129 |             self._metric = _nn_cosine_distance
130 |         else:
131 |             raise ValueError(
132 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
133 |         self.matching_threshold = matching_threshold
134 |         self.budget = budget
135 |         self.samples = {}
136 | 
137 |     def partial_fit(self, features, targets, active_targets):
138 |         """Update the distance metric with new data.
139 | 
140 |         Parameters
141 |         ----------
142 |         features : ndarray
143 |             An NxM matrix of N features of dimensionality M.
144 |         targets : ndarray
145 |             An integer array of associated target identities.
146 |         active_targets : List[int]
147 |             A list of targets that are currently present in the scene.
148 | 
149 |         """
150 |         for feature, target in zip(features, targets):
151 |             self.samples.setdefault(target, []).append(feature)
152 |             if self.budget is not None:
153 |                 self.samples[target] = self.samples[target][-self.budget:]
154 |         self.samples = {k: self.samples[k] for k in active_targets}
155 | 
156 |     def distance(self, features, targets):
157 |         """Compute distance between features and targets.
158 | 
159 |         Parameters
160 |         ----------
161 |         features : ndarray
162 |             An NxM matrix of N features of dimensionality M.
163 |         targets : List[int]
164 |             A list of targets to match the given `features` against.
165 | 
166 |         Returns
167 |         -------
168 |         ndarray
169 |             Returns a cost matrix of shape len(targets), len(features), where
170 |             element (i, j) contains the closest squared distance between
171 |             `targets[i]` and `features[j]`.
172 | 
173 |         """
174 |         cost_matrix = np.zeros((len(targets), len(features)))
175 |         for i, target in enumerate(targets):
176 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
177 |         return cost_matrix
178 | 


--------------------------------------------------------------------------------
/deep_sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> classes = [d.classes for d in detections]
19 |         >>> scores = [d.confidence for d in detections]
20 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
21 |         >>> detections = [detections[i] for i in indices]
22 | 
23 |     Parameters
24 |     ----------
25 |     boxes : ndarray
26 |         Array of ROIs (x, y, width, height).
27 |     max_bbox_overlap : float
28 |         ROIs that overlap more than this values are suppressed.
29 |     scores : Optional[array_like]
30 |         Detector confidence score.
31 | 
32 |     Returns
33 |     -------
34 |     List[int]
35 |         Returns indices of detections that have survived non-maxima suppression.
36 | 
37 |     """
38 |     if len(boxes) == 0:
39 |         return []
40 | 
41 |     boxes = boxes.astype(np.float)
42 |     pick = []
43 | 
44 |     x1 = boxes[:, 0]
45 |     y1 = boxes[:, 1]
46 |     x2 = boxes[:, 2] + boxes[:, 0]
47 |     y2 = boxes[:, 3] + boxes[:, 1]
48 | 
49 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
50 |     if scores is not None:
51 |         idxs = np.argsort(scores)
52 |     else:
53 |         idxs = np.argsort(y2)
54 | 
55 |     while len(idxs) > 0:
56 |         last = len(idxs) - 1
57 |         i = idxs[last]
58 |         pick.append(i)
59 | 
60 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
61 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
62 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
63 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
64 | 
65 |         w = np.maximum(0, xx2 - xx1 + 1)
66 |         h = np.maximum(0, yy2 - yy1 + 1)
67 | 
68 |         overlap = (w * h) / area[idxs[:last]]
69 | 
70 |         idxs = np.delete(
71 |             idxs, np.concatenate(
72 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
73 | 
74 |     return pick
75 | 


--------------------------------------------------------------------------------
/deep_sort/test_tracking.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/deep_sort/test_tracking.gif


--------------------------------------------------------------------------------
/deep_sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | 
  3 | 
  4 | class TrackState:
  5 |     """
  6 |     Enumeration type for the single target track state. Newly created tracks are
  7 |     classified as `tentative` until enough evidence has been collected. Then,
  8 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  9 |     are classified as `deleted` to mark them for removal from the set of active
 10 |     tracks.
 11 | 
 12 |     """
 13 | 
 14 |     Tentative = 1
 15 |     Confirmed = 2
 16 |     Deleted = 3
 17 | 
 18 | 
 19 | class Track:
 20 |     """
 21 |     A single target track with state space `(x, y, a, h)` and associated
 22 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 23 |     aspect ratio and `h` is the height.
 24 | 
 25 |     Parameters
 26 |     ----------
 27 |     mean : ndarray
 28 |         Mean vector of the initial state distribution.
 29 |     covariance : ndarray
 30 |         Covariance matrix of the initial state distribution.
 31 |     track_id : int
 32 |         A unique track identifier.
 33 |     n_init : int
 34 |         Number of consecutive detections before the track is confirmed. The
 35 |         track state is set to `Deleted` if a miss occurs within the first
 36 |         `n_init` frames.
 37 |     max_age : int
 38 |         The maximum number of consecutive misses before the track state is
 39 |         set to `Deleted`.
 40 |     feature : Optional[ndarray]
 41 |         Feature vector of the detection this track originates from. If not None,
 42 |         this feature is added to the `features` cache.
 43 | 
 44 |     Attributes
 45 |     ----------
 46 |     mean : ndarray
 47 |         Mean vector of the initial state distribution.
 48 |     covariance : ndarray
 49 |         Covariance matrix of the initial state distribution.
 50 |     track_id : int
 51 |         A unique track identifier.
 52 |     hits : int
 53 |         Total number of measurement updates.
 54 |     age : int
 55 |         Total number of frames since first occurance.
 56 |     time_since_update : int
 57 |         Total number of frames since last measurement update.
 58 |     state : TrackState
 59 |         The current track state.
 60 |     features : List[ndarray]
 61 |         A cache of features. On each measurement update, the associated feature
 62 |         vector is added to this list.
 63 | 
 64 |     """
 65 | 
 66 |     def __init__(self, mean, covariance, track_id, n_init, max_age,
 67 |                  feature=None, class_name=None):
 68 |         self.mean = mean
 69 |         self.covariance = covariance
 70 |         self.track_id = track_id
 71 |         self.hits = 1
 72 |         self.age = 1
 73 |         self.time_since_update = 0
 74 | 
 75 |         self.state = TrackState.Tentative
 76 |         self.features = []
 77 |         if feature is not None:
 78 |             self.features.append(feature)
 79 | 
 80 |         self._n_init = n_init
 81 |         self._max_age = max_age
 82 |         self.class_name = class_name
 83 | 
 84 |     def to_tlwh(self):
 85 |         """Get current position in bounding box format `(top left x, top left y,
 86 |         width, height)`.
 87 | 
 88 |         Returns
 89 |         -------
 90 |         ndarray
 91 |             The bounding box.
 92 | 
 93 |         """
 94 |         ret = self.mean[:4].copy()
 95 |         ret[2] *= ret[3]
 96 |         ret[:2] -= ret[2:] / 2
 97 |         return ret
 98 | 
 99 |     def to_tlbr(self):
100 |         """Get current position in bounding box format `(min x, miny, max x,
101 |         max y)`.
102 | 
103 |         Returns
104 |         -------
105 |         ndarray
106 |             The bounding box.
107 | 
108 |         """
109 |         ret = self.to_tlwh()
110 |         ret[2:] = ret[:2] + ret[2:]
111 |         return ret
112 |     
113 |     def get_class(self):
114 |         return self.class_name
115 | 
116 |     def predict(self, kf):
117 |         """Propagate the state distribution to the current time step using a
118 |         Kalman filter prediction step.
119 | 
120 |         Parameters
121 |         ----------
122 |         kf : kalman_filter.KalmanFilter
123 |             The Kalman filter.
124 | 
125 |         """
126 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
127 |         self.age += 1
128 |         self.time_since_update += 1
129 | 
130 |     def update(self, kf, detection):
131 |         """Perform Kalman filter measurement update step and update the feature
132 |         cache.
133 | 
134 |         Parameters
135 |         ----------
136 |         kf : kalman_filter.KalmanFilter
137 |             The Kalman filter.
138 |         detection : Detection
139 |             The associated detection.
140 | 
141 |         """
142 |         self.mean, self.covariance = kf.update(
143 |             self.mean, self.covariance, detection.to_xyah())
144 |         self.features.append(detection.feature)
145 | 
146 |         self.hits += 1
147 |         self.time_since_update = 0
148 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
149 |             self.state = TrackState.Confirmed
150 | 
151 |     def mark_missed(self):
152 |         """Mark this track as missed (no association at the current time step).
153 |         """
154 |         if self.state == TrackState.Tentative:
155 |             self.state = TrackState.Deleted
156 |         elif self.time_since_update > self._max_age:
157 |             self.state = TrackState.Deleted
158 | 
159 |     def is_tentative(self):
160 |         """Returns True if this track is tentative (unconfirmed).
161 |         """
162 |         return self.state == TrackState.Tentative
163 | 
164 |     def is_confirmed(self):
165 |         """Returns True if this track is confirmed."""
166 |         return self.state == TrackState.Confirmed
167 | 
168 |     def is_deleted(self):
169 |         """Returns True if this track is dead and should be deleted."""
170 |         return self.state == TrackState.Deleted
171 | 


--------------------------------------------------------------------------------
/deep_sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from . import kalman_filter
  5 | from . import linear_assignment
  6 | from . import iou_matching
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 |     """
 12 |     This is the multi-target tracker.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     metric : nn_matching.NearestNeighborDistanceMetric
 17 |         A distance metric for measurement-to-track association.
 18 |     max_age : int
 19 |         Maximum number of missed misses before a track is deleted.
 20 |     n_init : int
 21 |         Number of consecutive detections before the track is confirmed. The
 22 |         track state is set to `Deleted` if a miss occurs within the first
 23 |         `n_init` frames.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     metric : nn_matching.NearestNeighborDistanceMetric
 28 |         The distance metric used for measurement to track association.
 29 |     max_age : int
 30 |         Maximum number of missed misses before a track is deleted.
 31 |     n_init : int
 32 |         Number of frames that a track remains in initialization phase.
 33 |     kf : kalman_filter.KalmanFilter
 34 |         A Kalman filter to filter target trajectories in image space.
 35 |     tracks : List[Track]
 36 |         The list of active tracks at the current time step.
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3):
 41 |         self.metric = metric
 42 |         self.max_iou_distance = max_iou_distance
 43 |         self.max_age = max_age
 44 |         self.n_init = n_init
 45 | 
 46 |         self.kf = kalman_filter.KalmanFilter()
 47 |         self.tracks = []
 48 |         self._next_id = 1
 49 | 
 50 |     def predict(self):
 51 |         """Propagate track state distributions one time step forward.
 52 | 
 53 |         This function should be called once every time step, before `update`.
 54 |         """
 55 |         for track in self.tracks:
 56 |             track.predict(self.kf)
 57 | 
 58 |     def update(self, detections):
 59 |         """Perform measurement update and track management.
 60 | 
 61 |         Parameters
 62 |         ----------
 63 |         detections : List[deep_sort.detection.Detection]
 64 |             A list of detections at the current time step.
 65 | 
 66 |         """
 67 |         # Run matching cascade.
 68 |         matches, unmatched_tracks, unmatched_detections = \
 69 |             self._match(detections)
 70 | 
 71 |         # Update track set.
 72 |         for track_idx, detection_idx in matches:
 73 |             self.tracks[track_idx].update(
 74 |                 self.kf, detections[detection_idx])
 75 |         for track_idx in unmatched_tracks:
 76 |             self.tracks[track_idx].mark_missed()
 77 |         for detection_idx in unmatched_detections:
 78 |             self._initiate_track(detections[detection_idx])
 79 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 80 | 
 81 |         # Update distance metric.
 82 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 83 |         features, targets = [], []
 84 |         for track in self.tracks:
 85 |             if not track.is_confirmed():
 86 |                 continue
 87 |             features += track.features
 88 |             targets += [track.track_id for _ in track.features]
 89 |             track.features = []
 90 |         self.metric.partial_fit(
 91 |             np.asarray(features), np.asarray(targets), active_targets)
 92 | 
 93 |     def _match(self, detections):
 94 | 
 95 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 96 |             features = np.array([dets[i].feature for i in detection_indices])
 97 |             targets = np.array([tracks[i].track_id for i in track_indices])
 98 |             cost_matrix = self.metric.distance(features, targets)
 99 |             cost_matrix = linear_assignment.gate_cost_matrix(
100 |                 self.kf, cost_matrix, tracks, dets, track_indices,
101 |                 detection_indices)
102 | 
103 |             return cost_matrix
104 | 
105 |         # Split track set into confirmed and unconfirmed tracks.
106 |         confirmed_tracks = [
107 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
108 |         unconfirmed_tracks = [
109 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
110 | 
111 |         # Associate confirmed tracks using appearance features.
112 |         matches_a, unmatched_tracks_a, unmatched_detections = \
113 |             linear_assignment.matching_cascade(
114 |                 gated_metric, self.metric.matching_threshold, self.max_age,
115 |                 self.tracks, detections, confirmed_tracks)
116 | 
117 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
118 |         iou_track_candidates = unconfirmed_tracks + [
119 |             k for k in unmatched_tracks_a if
120 |             self.tracks[k].time_since_update == 1]
121 |         unmatched_tracks_a = [
122 |             k for k in unmatched_tracks_a if
123 |             self.tracks[k].time_since_update != 1]
124 |         matches_b, unmatched_tracks_b, unmatched_detections = \
125 |             linear_assignment.min_cost_matching(
126 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
127 |                 detections, iou_track_candidates, unmatched_detections)
128 | 
129 |         matches = matches_a + matches_b
130 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
131 |         return matches, unmatched_tracks, unmatched_detections
132 | 
133 |     def _initiate_track(self, detection):
134 |         mean, covariance = self.kf.initiate(detection.to_xyah())
135 |         class_name = detection.get_class()
136 |         self.tracks.append(Track(
137 |             mean, covariance, self._next_id, self.n_init, self.max_age,
138 |             detection.feature, class_name))
139 |         self._next_id += 1
140 | 


--------------------------------------------------------------------------------
/detect_mnist.py:
--------------------------------------------------------------------------------
 1 | #================================================================
 2 | #
 3 | #   File name   : detect_mnist.py
 4 | #   Author      : PyLessons
 5 | #   Created date: 2020-08-12
 6 | #   Website     : https://pylessons.com/
 7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
 8 | #   Description : mnist object detection example
 9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import cv2
14 | import numpy as np
15 | import random
16 | import time
17 | import tensorflow as tf
18 | from yolov3.yolov4 import Create_Yolo
19 | from yolov3.utils import detect_image
20 | from yolov3.configs import *
21 | 
22 | while True:
23 |     ID = random.randint(0, 200)
24 |     label_txt = "mnist/mnist_test.txt"
25 |     image_info = open(label_txt).readlines()[ID].split()
26 | 
27 |     image_path = image_info[0]
28 | 
29 |     yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES)
30 |     yolo.load_weights(f"./checkpoints/{TRAIN_MODEL_NAME}") # use keras weights
31 | 
32 |     detect_image(yolo, image_path, "mnist_test.jpg", input_size=YOLO_INPUT_SIZE, show=True, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0))
33 | 


--------------------------------------------------------------------------------
/detection_custom.py:
--------------------------------------------------------------------------------
 1 | #================================================================
 2 | #
 3 | #   File name   : detection_custom.py
 4 | #   Author      : PyLessons
 5 | #   Created date: 2020-09-17
 6 | #   Website     : https://pylessons.com/
 7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
 8 | #   Description : object detection image and video example
 9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import cv2
14 | import numpy as np
15 | import tensorflow as tf
16 | from yolov3.utils import detect_image, detect_realtime, detect_video, Load_Yolo_model, detect_video_realtime_mp
17 | from yolov3.configs import *
18 | 
19 | image_path   = "./IMAGES/plate_2.jpg"
20 | video_path   = "./IMAGES/test.mp4"
21 | 
22 | yolo = Load_Yolo_model()
23 | detect_image(yolo, image_path, "./IMAGES/plate_1_detect.jpg", input_size=YOLO_INPUT_SIZE, show=True, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0))
24 | #detect_video(yolo, video_path, './IMAGES/detected.mp4', input_size=YOLO_INPUT_SIZE, show=False, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0))
25 | #detect_realtime(yolo, '', input_size=YOLO_INPUT_SIZE, show=True, CLASSES=TRAIN_CLASSES, rectangle_colors=(255, 0, 0))
26 | 
27 | #detect_video_realtime_mp(video_path, "Output.mp4", input_size=YOLO_INPUT_SIZE, show=True, CLASSES=TRAIN_CLASSES, rectangle_colors=(255,0,0), realtime=False)
28 | 


--------------------------------------------------------------------------------
/detection_demo.py:
--------------------------------------------------------------------------------
 1 | #================================================================
 2 | #
 3 | #   File name   : detection_demo.py
 4 | #   Author      : PyLessons
 5 | #   Created date: 2020-09-27
 6 | #   Website     : https://pylessons.com/
 7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
 8 | #   Description : object detection image and video example
 9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import cv2
14 | import numpy as np
15 | import tensorflow as tf
16 | from yolov3.utils import detect_image, detect_realtime, detect_video, Load_Yolo_model, detect_video_realtime_mp
17 | from yolov3.configs import *
18 | 
19 | image_path   = "./IMAGES/kite.jpg"
20 | video_path   = "./IMAGES/test.mp4"
21 | 
22 | yolo = Load_Yolo_model()
23 | detect_image(yolo, image_path, "./IMAGES/kite_pred.jpg", input_size=YOLO_INPUT_SIZE, show=True, rectangle_colors=(255,0,0))
24 | #detect_video(yolo, video_path, "", input_size=YOLO_INPUT_SIZE, show=False, rectangle_colors=(255,0,0))
25 | #detect_realtime(yolo, '', input_size=YOLO_INPUT_SIZE, show=True, rectangle_colors=(255, 0, 0))
26 | 
27 | #detect_video_realtime_mp(video_path, "Output.mp4", input_size=YOLO_INPUT_SIZE, show=False, rectangle_colors=(255,0,0), realtime=False)
28 | 


--------------------------------------------------------------------------------
/evaluate_mAP.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : evaluate_mAP.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-08-17
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : used to evaluate model mAP and FPS
  9 | #
 10 | #================================================================
 11 | import os
 12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 13 | import cv2
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | from tensorflow.python.saved_model import tag_constants
 17 | from yolov3.dataset import Dataset
 18 | from yolov3.yolov4 import Create_Yolo
 19 | from yolov3.utils import load_yolo_weights, detect_image, image_preprocess, postprocess_boxes, nms, read_class_names
 20 | from yolov3.configs import *
 21 | import shutil
 22 | import json
 23 | import time
 24 | 
 25 | gpus = tf.config.experimental.list_physical_devices('GPU')
 26 | if len(gpus) > 0:
 27 |     try: tf.config.experimental.set_memory_growth(gpus[0], True)
 28 |     except RuntimeError: print("RuntimeError in tf.config.experimental.list_physical_devices('GPU')")
 29 | 
 30 | 
 31 | def voc_ap(rec, prec):
 32 |     """
 33 |     --- Official matlab code VOC2012---
 34 |     mrec=[0 ; rec ; 1];
 35 |     mpre=[0 ; prec ; 0];
 36 |     for i=numel(mpre)-1:-1:1
 37 |             mpre(i)=max(mpre(i),mpre(i+1));
 38 |     end
 39 |     i=find(mrec(2:end)~=mrec(1:end-1))+1;
 40 |     ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
 41 |     """
 42 |     rec.insert(0, 0.0) # insert 0.0 at begining of list
 43 |     rec.append(1.0) # insert 1.0 at end of list
 44 |     mrec = rec[:]
 45 |     prec.insert(0, 0.0) # insert 0.0 at begining of list
 46 |     prec.append(0.0) # insert 0.0 at end of list
 47 |     mpre = prec[:]
 48 |     """
 49 |      This part makes the precision monotonically decreasing
 50 |         (goes from the end to the beginning)
 51 |         matlab:  for i=numel(mpre)-1:-1:1
 52 |                                 mpre(i)=max(mpre(i),mpre(i+1));
 53 |     """
 54 |     # matlab indexes start in 1 but python in 0, so I have to do:
 55 |     #   range(start=(len(mpre) - 2), end=0, step=-1)
 56 |     # also the python function range excludes the end, resulting in:
 57 |     #   range(start=(len(mpre) - 2), end=-1, step=-1)
 58 |     for i in range(len(mpre)-2, -1, -1):
 59 |         mpre[i] = max(mpre[i], mpre[i+1])
 60 |     """
 61 |      This part creates a list of indexes where the recall changes
 62 |         matlab:  i=find(mrec(2:end)~=mrec(1:end-1))+1;
 63 |     """
 64 |     i_list = []
 65 |     for i in range(1, len(mrec)):
 66 |         if mrec[i] != mrec[i-1]:
 67 |             i_list.append(i) # if it was matlab would be i + 1
 68 |     """
 69 |      The Average Precision (AP) is the area under the curve
 70 |         (numerical integration)
 71 |         matlab: ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
 72 |     """
 73 |     ap = 0.0
 74 |     for i in i_list:
 75 |         ap += ((mrec[i]-mrec[i-1])*mpre[i])
 76 |     return ap, mrec, mpre
 77 | 
 78 | 
 79 | def get_mAP(Yolo, dataset, score_threshold=0.25, iou_threshold=0.50, TEST_INPUT_SIZE=TEST_INPUT_SIZE):
 80 |     MINOVERLAP = 0.5 # default value (defined in the PASCAL VOC2012 challenge)
 81 |     NUM_CLASS = read_class_names(TRAIN_CLASSES)
 82 | 
 83 |     ground_truth_dir_path = 'mAP/ground-truth'
 84 |     if os.path.exists(ground_truth_dir_path): shutil.rmtree(ground_truth_dir_path)
 85 | 
 86 |     if not os.path.exists('mAP'): os.mkdir('mAP')
 87 |     os.mkdir(ground_truth_dir_path)
 88 | 
 89 |     print(f'\ncalculating mAP{int(iou_threshold*100)}...\n')
 90 | 
 91 |     gt_counter_per_class = {}
 92 |     for index in range(dataset.num_samples):
 93 |         ann_dataset = dataset.annotations[index]
 94 | 
 95 |         original_image, bbox_data_gt = dataset.parse_annotation(ann_dataset, True)
 96 | 
 97 |         if len(bbox_data_gt) == 0:
 98 |             bboxes_gt = []
 99 |             classes_gt = []
100 |         else:
101 |             bboxes_gt, classes_gt = bbox_data_gt[:, :4], bbox_data_gt[:, 4]
102 |         ground_truth_path = os.path.join(ground_truth_dir_path, str(index) + '.txt')
103 |         num_bbox_gt = len(bboxes_gt)
104 | 
105 |         bounding_boxes = []
106 |         for i in range(num_bbox_gt):
107 |             class_name = NUM_CLASS[classes_gt[i]]
108 |             xmin, ymin, xmax, ymax = list(map(str, bboxes_gt[i]))
109 |             bbox = xmin + " " + ymin + " " + xmax + " " +ymax
110 |             bounding_boxes.append({"class_name":class_name, "bbox":bbox, "used":False})
111 | 
112 |             # count that object
113 |             if class_name in gt_counter_per_class:
114 |                 gt_counter_per_class[class_name] += 1
115 |             else:
116 |                 # if class didn't exist yet
117 |                 gt_counter_per_class[class_name] = 1
118 |             bbox_mess = ' '.join([class_name, xmin, ymin, xmax, ymax]) + '\n'
119 |         with open(f'{ground_truth_dir_path}/{str(index)}_ground_truth.json', 'w') as outfile:
120 |             json.dump(bounding_boxes, outfile)
121 | 
122 |     gt_classes = list(gt_counter_per_class.keys())
123 |     # sort the classes alphabetically
124 |     gt_classes = sorted(gt_classes)
125 |     n_classes = len(gt_classes)
126 | 
127 |     times = []
128 |     json_pred = [[] for i in range(n_classes)]
129 |     for index in range(dataset.num_samples):
130 |         ann_dataset = dataset.annotations[index]
131 | 
132 |         image_name = ann_dataset[0].split('/')[-1]
133 |         original_image, bbox_data_gt = dataset.parse_annotation(ann_dataset, True)
134 |         
135 |         image = image_preprocess(np.copy(original_image), [TEST_INPUT_SIZE, TEST_INPUT_SIZE])
136 |         image_data = image[np.newaxis, ...].astype(np.float32)
137 | 
138 |         t1 = time.time()
139 |         if YOLO_FRAMEWORK == "tf":
140 |             if tf.__version__ > '2.4.0':
141 |                 pred_bbox = Yolo(image_data)
142 |             else:
143 |                 pred_bbox = Yolo.predict(image_data)
144 |         elif YOLO_FRAMEWORK == "trt":
145 |             batched_input = tf.constant(image_data)
146 |             result = Yolo(batched_input)
147 |             pred_bbox = []
148 |             for key, value in result.items():
149 |                 value = value.numpy()
150 |                 pred_bbox.append(value)
151 |         
152 |         t2 = time.time()
153 |         
154 |         times.append(t2-t1)
155 |         
156 |         pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
157 |         pred_bbox = tf.concat(pred_bbox, axis=0)
158 | 
159 |         bboxes = postprocess_boxes(pred_bbox, original_image, TEST_INPUT_SIZE, score_threshold)
160 |         bboxes = nms(bboxes, iou_threshold, method='nms')
161 | 
162 |         for bbox in bboxes:
163 |             coor = np.array(bbox[:4], dtype=np.int32)
164 |             score = bbox[4]
165 |             class_ind = int(bbox[5])
166 |             class_name = NUM_CLASS[class_ind]
167 |             score = '%.4f' % score
168 |             xmin, ymin, xmax, ymax = list(map(str, coor))
169 |             bbox = xmin + " " + ymin + " " + xmax + " " +ymax
170 |             json_pred[gt_classes.index(class_name)].append({"confidence": str(score), "file_id": str(index), "bbox": str(bbox)})
171 | 
172 |     ms = sum(times)/len(times)*1000
173 |     fps = 1000 / ms
174 | 
175 |     for class_name in gt_classes:
176 |         json_pred[gt_classes.index(class_name)].sort(key=lambda x:float(x['confidence']), reverse=True)
177 |         with open(f'{ground_truth_dir_path}/{class_name}_predictions.json', 'w') as outfile:
178 |             json.dump(json_pred[gt_classes.index(class_name)], outfile)
179 | 
180 |     # Calculate the AP for each class
181 |     sum_AP = 0.0
182 |     ap_dictionary = {}
183 |     # open file to store the results
184 |     with open("mAP/results.txt", 'w') as results_file:
185 |         results_file.write("# AP and precision/recall per class\n")
186 |         count_true_positives = {}
187 |         for class_index, class_name in enumerate(gt_classes):
188 |             count_true_positives[class_name] = 0
189 |             # Load predictions of that class
190 |             predictions_file = f'{ground_truth_dir_path}/{class_name}_predictions.json'
191 |             predictions_data = json.load(open(predictions_file))
192 | 
193 |             # Assign predictions to ground truth objects
194 |             nd = len(predictions_data)
195 |             tp = [0] * nd # creates an array of zeros of size nd
196 |             fp = [0] * nd
197 |             for idx, prediction in enumerate(predictions_data):
198 |                 file_id = prediction["file_id"]
199 |                 # assign prediction to ground truth object if any
200 |                 #   open ground-truth with that file_id
201 |                 gt_file = f'{ground_truth_dir_path}/{str(file_id)}_ground_truth.json'
202 |                 ground_truth_data = json.load(open(gt_file))
203 |                 ovmax = -1
204 |                 gt_match = -1
205 |                 # load prediction bounding-box
206 |                 bb = [ float(x) for x in prediction["bbox"].split() ] # bounding box of prediction
207 |                 for obj in ground_truth_data:
208 |                     # look for a class_name match
209 |                     if obj["class_name"] == class_name:
210 |                         bbgt = [ float(x) for x in obj["bbox"].split() ] # bounding box of ground truth
211 |                         bi = [max(bb[0],bbgt[0]), max(bb[1],bbgt[1]), min(bb[2],bbgt[2]), min(bb[3],bbgt[3])]
212 |                         iw = bi[2] - bi[0] + 1
213 |                         ih = bi[3] - bi[1] + 1
214 |                         if iw > 0 and ih > 0:
215 |                             # compute overlap (IoU) = area of intersection / area of union
216 |                             ua = (bb[2] - bb[0] + 1) * (bb[3] - bb[1] + 1) + (bbgt[2] - bbgt[0]
217 |                                             + 1) * (bbgt[3] - bbgt[1] + 1) - iw * ih
218 |                             ov = iw * ih / ua
219 |                             if ov > ovmax:
220 |                                 ovmax = ov
221 |                                 gt_match = obj
222 | 
223 |                 # assign prediction as true positive/don't care/false positive
224 |                 if ovmax >= MINOVERLAP:# if ovmax > minimum overlap
225 |                     if not bool(gt_match["used"]):
226 |                         # true positive
227 |                         tp[idx] = 1
228 |                         gt_match["used"] = True
229 |                         count_true_positives[class_name] += 1
230 |                         # update the ".json" file
231 |                         with open(gt_file, 'w') as f:
232 |                             f.write(json.dumps(ground_truth_data))
233 |                     else:
234 |                         # false positive (multiple detection)
235 |                         fp[idx] = 1
236 |                 else:
237 |                     # false positive
238 |                     fp[idx] = 1
239 | 
240 |             # compute precision/recall
241 |             cumsum = 0
242 |             for idx, val in enumerate(fp):
243 |                 fp[idx] += cumsum
244 |                 cumsum += val
245 |             cumsum = 0
246 |             for idx, val in enumerate(tp):
247 |                 tp[idx] += cumsum
248 |                 cumsum += val
249 |             #print(tp)
250 |             rec = tp[:]
251 |             for idx, val in enumerate(tp):
252 |                 rec[idx] = float(tp[idx]) / gt_counter_per_class[class_name]
253 |             #print(rec)
254 |             prec = tp[:]
255 |             for idx, val in enumerate(tp):
256 |                 prec[idx] = float(tp[idx]) / (fp[idx] + tp[idx])
257 |             #print(prec)
258 | 
259 |             ap, mrec, mprec = voc_ap(rec, prec)
260 |             sum_AP += ap
261 |             text = "{0:.3f}%".format(ap*100) + " = " + class_name + " AP  " #class_name + " AP = {0:.2f}%".format(ap*100)
262 | 
263 |             rounded_prec = [ '%.3f' % elem for elem in prec ]
264 |             rounded_rec = [ '%.3f' % elem for elem in rec ]
265 |             # Write to results.txt
266 |             results_file.write(text + "\n Precision: " + str(rounded_prec) + "\n Recall   :" + str(rounded_rec) + "\n\n")
267 | 
268 |             print(text)
269 |             ap_dictionary[class_name] = ap
270 | 
271 |         results_file.write("\n# mAP of all classes\n")
272 |         mAP = sum_AP / n_classes
273 | 
274 |         text = "mAP = {:.3f}%, {:.2f} FPS".format(mAP*100, fps)
275 |         results_file.write(text + "\n")
276 |         print(text)
277 |         
278 |         return mAP*100
279 | 
280 | if __name__ == '__main__':       
281 |     if YOLO_FRAMEWORK == "tf": # TensorFlow detection
282 |         if YOLO_TYPE == "yolov4":
283 |             Darknet_weights = YOLO_V4_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V4_WEIGHTS
284 |         if YOLO_TYPE == "yolov3":
285 |             Darknet_weights = YOLO_V3_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V3_WEIGHTS
286 | 
287 |         if YOLO_CUSTOM_WEIGHTS == False:
288 |             yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES)
289 |             load_yolo_weights(yolo, Darknet_weights) # use Darknet weights
290 |         else:
291 |             yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES)
292 |             yolo.load_weights(f"./checkpoints/{TRAIN_MODEL_NAME}") # use custom weights
293 |         
294 |     elif YOLO_FRAMEWORK == "trt": # TensorRT detection
295 |         saved_model_loaded = tf.saved_model.load(f"./checkpoints/{TRAIN_MODEL_NAME}", tags=[tag_constants.SERVING])
296 |         signature_keys = list(saved_model_loaded.signatures.keys())
297 |         yolo = saved_model_loaded.signatures['serving_default']
298 | 
299 |     testset = Dataset('test', TEST_INPUT_SIZE=YOLO_INPUT_SIZE)
300 |     get_mAP(yolo, testset, score_threshold=0.05, iou_threshold=0.50, TEST_INPUT_SIZE=YOLO_INPUT_SIZE)
301 | 


--------------------------------------------------------------------------------
/mnist/make_data.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : make_data.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-04-20
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : create mnist example dataset to train custom yolov3
  9 | #
 10 | #================================================================
 11 | import os
 12 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
 13 | import cv2
 14 | import numpy as np
 15 | import shutil
 16 | import random
 17 | from zipfile import ZipFile 
 18 | 
 19 | SIZE = 416
 20 | images_num_train = 1000
 21 | images_num_test = 200
 22 | 
 23 | image_sizes = [3, 6, 3] # small, medium, big
 24 | 
 25 | # this helps to run script both from terminal and python IDLE
 26 | add_path = "mnist"
 27 | if os.getcwd().split(os.sep)[-1] != "mnist":
 28 |     add_path = "mnist"
 29 |     os.chdir(add_path)
 30 | else:
 31 |     add_path = ""  
 32 |     
 33 | def compute_iou(box1, box2):
 34 |     # xmin, ymin, xmax, ymax
 35 |     A1 = (box1[2] - box1[0])*(box1[3] - box1[1])
 36 |     A2 = (box2[2] - box2[0])*(box2[3] - box2[1])
 37 | 
 38 |     xmin = max(box1[0], box2[0])
 39 |     ymin = max(box1[1], box2[1])
 40 |     xmax = min(box1[2], box2[2])
 41 |     ymax = min(box1[3], box2[3])
 42 | 
 43 |     if ymin >= ymax or xmin >= xmax: return 0
 44 |     return  ((xmax-xmin) * (ymax - ymin)) / (A1 + A2)
 45 | 
 46 | 
 47 | def make_image(data, image_path, ratio=1):
 48 |     blank = data[0]
 49 |     boxes = data[1]
 50 |     label = data[2]
 51 | 
 52 |     ID = image_path.split("/")[-1][0]
 53 |     image = cv2.imread(image_path)
 54 |     image = cv2.resize(image, (int(28*ratio), int(28*ratio)))
 55 |     h, w, c = image.shape
 56 | 
 57 |     while True:
 58 |         xmin = np.random.randint(0, SIZE-w, 1)[0]
 59 |         ymin = np.random.randint(0, SIZE-h, 1)[0]
 60 |         xmax = xmin + w
 61 |         ymax = ymin + h
 62 |         box = [xmin, ymin, xmax, ymax]
 63 | 
 64 |         iou = [compute_iou(box, b) for b in boxes]
 65 |         if max(iou) < 0.02:
 66 |             boxes.append(box)
 67 |             label.append(ID)
 68 |             break
 69 | 
 70 |     for i in range(w):
 71 |         for j in range(h):
 72 |             x = xmin + i
 73 |             y = ymin + j
 74 |             blank[y][x] = image[j][i]
 75 | 
 76 |     # cv2.rectangle(blank, (xmin, ymin), (xmax, ymax), [0, 0, 255], 2)
 77 |     return blank
 78 | 
 79 | 
 80 | for file in ["train", "test"]:
 81 |     if not os.path.exists(f"mnist/{file}"):
 82 |         with ZipFile(f"mnist/{file}.zip", 'r') as zip:
 83 |             # extracting all the files 
 84 |             print(f'Extracting all {file} files now...') 
 85 |             zip.extractall()
 86 |             shutil.move(file, "mnist")
 87 |             print('Done!')
 88 | 
 89 | for file in ['train','test']:
 90 |     images_path = os.getcwd()+f"/mnist_{file}"
 91 |     labels_txt = os.getcwd()+f"/mnist_{file}.txt"
 92 |     
 93 |     if file == 'train': images_num = images_num_train
 94 |     if file == 'test': images_num = images_num_test
 95 |         
 96 |     if os.path.exists(images_path): shutil.rmtree(images_path)
 97 |     os.mkdir(images_path)
 98 | 
 99 |     image_paths  = [os.path.join(os.path.realpath("."), os.getcwd()+f"/mnist/{file}/" + image_name)
100 |                            for image_name in os.listdir(os.getcwd()+f"/mnist/{file}")]
101 |     
102 |     with open(labels_txt, "w") as wf:
103 |         image_num = 0
104 |         while image_num < images_num:
105 |             image_path = os.path.realpath(os.path.join(images_path, "%06d.jpg" %(image_num+1)))
106 |             #print(image_path)
107 |             annotation = image_path
108 |             blanks = np.ones(shape=[SIZE, SIZE, 3]) * 255
109 |             bboxes = [[0,0,1,1]]
110 |             labels = [0]
111 |             data = [blanks, bboxes, labels]
112 |             bboxes_num = 0
113 |             
114 |             # ratios small, medium, big objects
115 |             ratios = [[0.5, 0.8], [1., 1.5, 2.], [3., 4.]]
116 |             for i in range(len(ratios)):
117 |                 N = random.randint(0, image_sizes[i])
118 |                 if N !=0: bboxes_num += 1
119 |                 for _ in range(N):
120 |                     ratio = random.choice(ratios[i])
121 |                     idx = random.randint(0, len(image_paths)-1)
122 |                     data[0] = make_image(data, image_paths[idx], ratio)
123 | 
124 |             if bboxes_num == 0: continue
125 |             cv2.imwrite(image_path, data[0])
126 |             for i in range(len(labels)):
127 |                 if i == 0: continue
128 |                 xmin = str(bboxes[i][0])
129 |                 ymin = str(bboxes[i][1])
130 |                 xmax = str(bboxes[i][2])
131 |                 ymax = str(bboxes[i][3])
132 |                 class_ind = str(labels[i])
133 |                 annotation += ' ' + ','.join([xmin, ymin, xmax, ymax, str(class_ind)])
134 |             image_num += 1
135 |             print("=> %s" %annotation)
136 |             wf.write(annotation + "\n")
137 | 
138 | if add_path != "": os.chdir("..")
139 | 


--------------------------------------------------------------------------------
/mnist/mnist.names:
--------------------------------------------------------------------------------
 1 | 0
 2 | 1
 3 | 2
 4 | 3
 5 | 4
 6 | 5
 7 | 6
 8 | 7
 9 | 8
10 | 9
11 | 


--------------------------------------------------------------------------------
/mnist/mnist/test.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/mnist/mnist/test.zip


--------------------------------------------------------------------------------
/mnist/mnist/train.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/mnist/mnist/train.zip


--------------------------------------------------------------------------------
/mnist/show_image.py:
--------------------------------------------------------------------------------
 1 | #================================================================
 2 | #
 3 | #   File name   : show_image.py
 4 | #   Author      : PyLessons
 5 | #   Created date: 2020-04-20
 6 | #   Website     : https://pylessons.com/
 7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
 8 | #   Description : show random image from created dataset
 9 | #
10 | #================================================================
11 | import random
12 | import cv2
13 | import numpy as np
14 | from PIL import Image
15 | 
16 | ID = random.randint(0, 200)
17 | label_txt = "./mnist_train.txt"
18 | image_info = open(label_txt).readlines()[ID].split()
19 | 
20 | image_path = image_info[0]
21 | image = cv2.imread(image_path)
22 | for bbox in image_info[1:]:
23 |     bbox = bbox.split(",")
24 |     image = cv2.rectangle(image,(int(float(bbox[0])),
25 |                                  int(float(bbox[1]))),
26 |                                 (int(float(bbox[2])),
27 |                                  int(float(bbox[3]))), (255,0,0), 2)
28 | 
29 | image = Image.fromarray(np.uint8(image))
30 | image.show()
31 | 


--------------------------------------------------------------------------------
/model_data/coco/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic-light
11 | fire-hydrant
12 | stop-sign
13 | parking-meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports-ball
34 | kite
35 | baseball-bat
36 | baseball-glove
37 | skateboard
38 | surfboard
39 | tennis-racket
40 | bottle
41 | wine-glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot-dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell-phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy-bear
79 | hair-drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/model_data/mars-small128.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/model_data/mars-small128.pb


--------------------------------------------------------------------------------
/object_tracker.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : object_tracker.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-09-17
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : code to track detected object from video or webcam
  9 | #
 10 | #================================================================
 11 | import os
 12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 13 | import cv2
 14 | import numpy as np
 15 | import tensorflow as tf
 16 | from yolov3.utils import Load_Yolo_model, image_preprocess, postprocess_boxes, nms, draw_bbox, read_class_names
 17 | from yolov3.configs import *
 18 | import time
 19 | 
 20 | from deep_sort import nn_matching
 21 | from deep_sort.detection import Detection
 22 | from deep_sort.tracker import Tracker
 23 | from deep_sort import generate_detections as gdet
 24 | 
 25 | video_path   = "./IMAGES/test.mp4"
 26 | 
 27 | def Object_tracking(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', Track_only = []):
 28 |     # Definition of the parameters
 29 |     max_cosine_distance = 0.7
 30 |     nn_budget = None
 31 |     
 32 |     #initialize deep sort object
 33 |     model_filename = 'model_data/mars-small128.pb'
 34 |     encoder = gdet.create_box_encoder(model_filename, batch_size=1)
 35 |     metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
 36 |     tracker = Tracker(metric)
 37 | 
 38 |     times, times_2 = [], []
 39 | 
 40 |     if video_path:
 41 |         vid = cv2.VideoCapture(video_path) # detect on video
 42 |     else:
 43 |         vid = cv2.VideoCapture(0) # detect from webcam
 44 | 
 45 |     # by default VideoCapture returns float instead of int
 46 |     width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
 47 |     height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
 48 |     fps = int(vid.get(cv2.CAP_PROP_FPS))
 49 |     codec = cv2.VideoWriter_fourcc(*'XVID')
 50 |     out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4
 51 | 
 52 |     NUM_CLASS = read_class_names(CLASSES)
 53 |     key_list = list(NUM_CLASS.keys()) 
 54 |     val_list = list(NUM_CLASS.values())
 55 |     while True:
 56 |         _, frame = vid.read()
 57 | 
 58 |         try:
 59 |             original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 60 |             original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
 61 |         except:
 62 |             break
 63 |         
 64 |         image_data = image_preprocess(np.copy(original_frame), [input_size, input_size])
 65 |         #image_data = tf.expand_dims(image_data, 0)
 66 |         image_data = image_data[np.newaxis, ...].astype(np.float32)
 67 | 
 68 |         t1 = time.time()
 69 |         if YOLO_FRAMEWORK == "tf":
 70 |             pred_bbox = Yolo.predict(image_data)
 71 |         elif YOLO_FRAMEWORK == "trt":
 72 |             batched_input = tf.constant(image_data)
 73 |             result = Yolo(batched_input)
 74 |             pred_bbox = []
 75 |             for key, value in result.items():
 76 |                 value = value.numpy()
 77 |                 pred_bbox.append(value)
 78 |         
 79 |         #t1 = time.time()
 80 |         #pred_bbox = Yolo.predict(image_data)
 81 |         t2 = time.time()
 82 |         
 83 |         pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
 84 |         pred_bbox = tf.concat(pred_bbox, axis=0)
 85 | 
 86 |         bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
 87 |         bboxes = nms(bboxes, iou_threshold, method='nms')
 88 | 
 89 |         # extract bboxes to boxes (x, y, width, height), scores and names
 90 |         boxes, scores, names = [], [], []
 91 |         for bbox in bboxes:
 92 |             if len(Track_only) !=0 and NUM_CLASS[int(bbox[5])] in Track_only or len(Track_only) == 0:
 93 |                 boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int)-bbox[0].astype(int), bbox[3].astype(int)-bbox[1].astype(int)])
 94 |                 scores.append(bbox[4])
 95 |                 names.append(NUM_CLASS[int(bbox[5])])
 96 | 
 97 |         # Obtain all the detections for the given frame.
 98 |         boxes = np.array(boxes) 
 99 |         names = np.array(names)
100 |         scores = np.array(scores)
101 |         features = np.array(encoder(original_frame, boxes))
102 |         detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(boxes, scores, names, features)]
103 | 
104 |         # Pass detections to the deepsort object and obtain the track information.
105 |         tracker.predict()
106 |         tracker.update(detections)
107 | 
108 |         # Obtain info from the tracks
109 |         tracked_bboxes = []
110 |         for track in tracker.tracks:
111 |             if not track.is_confirmed() or track.time_since_update > 5:
112 |                 continue 
113 |             bbox = track.to_tlbr() # Get the corrected/predicted bounding box
114 |             class_name = track.get_class() #Get the class name of particular object
115 |             tracking_id = track.track_id # Get the ID for the particular track
116 |             index = key_list[val_list.index(class_name)] # Get predicted object index by object name
117 |             tracked_bboxes.append(bbox.tolist() + [tracking_id, index]) # Structure data, that we could use it with our draw_bbox function
118 | 
119 |         # draw detection on frame
120 |         image = draw_bbox(original_frame, tracked_bboxes, CLASSES=CLASSES, tracking=True)
121 | 
122 |         t3 = time.time()
123 |         times.append(t2-t1)
124 |         times_2.append(t3-t1)
125 |         
126 |         times = times[-20:]
127 |         times_2 = times_2[-20:]
128 | 
129 |         ms = sum(times)/len(times)*1000
130 |         fps = 1000 / ms
131 |         fps2 = 1000 / (sum(times_2)/len(times_2)*1000)
132 |         
133 |         image = cv2.putText(image, "Time: {:.1f} FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
134 | 
135 |         # draw original yolo detection
136 |         #image = draw_bbox(image, bboxes, CLASSES=CLASSES, show_label=False, rectangle_colors=rectangle_colors, tracking=True)
137 | 
138 |         print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
139 |         if output_path != '': out.write(image)
140 |         if show:
141 |             cv2.imshow('output', image)
142 |             
143 |             if cv2.waitKey(25) & 0xFF == ord("q"):
144 |                 cv2.destroyAllWindows()
145 |                 break
146 |             
147 |     cv2.destroyAllWindows()
148 | 
149 | 
150 | yolo = Load_Yolo_model()
151 | Object_tracking(yolo, video_path, "detection.mp4", input_size=YOLO_INPUT_SIZE, show=True, iou_threshold=0.1, rectangle_colors=(255,0,0), Track_only = ["person"])
152 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy>=1.18.2
 2 | scipy>=1.4.1
 3 | wget>=3.2
 4 | seaborn>=0.10.0
 5 | tensorflow
 6 | opencv-python==4.4.0.46
 7 | tqdm==4.43.0
 8 | pandas
 9 | awscli
10 | urllib3
11 | mss
12 | 


--------------------------------------------------------------------------------
/tools/Convert_to_TRT.py:
--------------------------------------------------------------------------------
 1 | #================================================================
 2 | #
 3 | #   File name   : Convert_to_TRT.py
 4 | #   Author      : PyLessons
 5 | #   Created date: 2020-08-17
 6 | #   Website     : https://pylessons.com/
 7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
 8 | #   Description : convert TF frozen graph to TensorRT model
 9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import sys
14 | 
15 | foldername = os.path.basename(os.getcwd())
16 | if foldername == "tools":
17 |     os.chdir("..")
18 | sys.path.insert(1, os.getcwd())
19 |     
20 | import tensorflow as tf
21 | import numpy as np
22 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
23 | if len(physical_devices) > 0:
24 |     tf.config.experimental.set_memory_growth(physical_devices[0], True)
25 | from yolov3.configs import *
26 | from tensorflow.python.compiler.tensorrt import trt_convert as trt
27 | 
28 | def calibration_input():
29 |     for i in range(100):
30 |         batched_input = np.random.random((1, YOLO_INPUT_SIZE, YOLO_INPUT_SIZE, 3)).astype(np.float32)
31 |         batched_input = tf.constant(batched_input)
32 |         yield (batched_input,)
33 | 
34 | conversion_params = trt.DEFAULT_TRT_CONVERSION_PARAMS
35 | conversion_params = conversion_params._replace(max_workspace_size_bytes=4000000000)
36 | conversion_params = conversion_params._replace(precision_mode=YOLO_TRT_QUANTIZE_MODE)
37 | conversion_params = conversion_params._replace(max_batch_size=1)
38 | if YOLO_TRT_QUANTIZE_MODE == 'INT8':
39 |     conversion_params = conversion_params._replace(use_calibration=True)
40 | 
41 | converter = trt.TrtGraphConverterV2(input_saved_model_dir=f'./checkpoints/{YOLO_TYPE}-{YOLO_INPUT_SIZE}', conversion_params=conversion_params)
42 | if YOLO_TRT_QUANTIZE_MODE == 'INT8':
43 |     converter.convert(calibration_input_fn=calibration_input)
44 | else:
45 |     converter.convert()
46 | 
47 | converter.save(output_saved_model_dir=f'./checkpoints/{YOLO_TYPE}-trt-{YOLO_TRT_QUANTIZE_MODE}-{YOLO_INPUT_SIZE}')
48 | print(f'Done Converting to TensorRT, model saved to: /checkpoints/{YOLO_TYPE}-trt-{YOLO_TRT_QUANTIZE_MODE}-{YOLO_INPUT_SIZE}')
49 | 


--------------------------------------------------------------------------------
/tools/Convert_to_pb.py:
--------------------------------------------------------------------------------
 1 | #================================================================
 2 | #
 3 | #   File name   : Convert_to_pb.py
 4 | #   Author      : PyLessons
 5 | #   Created date: 2020-08-17
 6 | #   Website     : https://pylessons.com/
 7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
 8 | #   Description : used to freeze tf model to .pb model
 9 | #
10 | #================================================================
11 | import os
12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
13 | import sys
14 | 
15 | foldername = os.path.basename(os.getcwd())
16 | if foldername == "tools":
17 |     os.chdir("..")
18 | sys.path.insert(1, os.getcwd())
19 | 
20 | import tensorflow as tf
21 | from yolov3.yolov4 import Create_Yolo
22 | from yolov3.utils import load_yolo_weights
23 | from yolov3.configs import *
24 | 
25 | if YOLO_TYPE == "yolov4":
26 |     Darknet_weights = YOLO_V4_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V4_WEIGHTS
27 | if YOLO_TYPE == "yolov3":
28 |     Darknet_weights = YOLO_V3_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V3_WEIGHTS
29 | 
30 | if YOLO_CUSTOM_WEIGHTS == False:
31 |     yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES)
32 |     load_yolo_weights(yolo, Darknet_weights) # use Darknet weights
33 | else:
34 |     yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES)
35 |     yolo.load_weights(YOLO_CUSTOM_WEIGHTS) # use custom weights
36 | 
37 | yolo.summary()
38 | yolo.save(f'./checkpoints/{YOLO_TYPE}-{YOLO_INPUT_SIZE}')
39 | 
40 | print(f"model saves to /checkpoints/{YOLO_TYPE}-{YOLO_INPUT_SIZE}")
41 | 


--------------------------------------------------------------------------------
/tools/Detection_to_XML.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : Detection_to_XML.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-09-27
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : converts YOLO detection to XML file
  9 | #
 10 | #===============================================================
 11 | from textwrap import dedent
 12 | from lxml import etree
 13 | import glob
 14 | import os
 15 | import cv2
 16 | import time
 17 | 
 18 | def CreateXMLfile(path, file_name, image, bboxes, NUM_CLASS):
 19 |     boxes = []
 20 |     for bbox in bboxes:
 21 |         boxes.append([bbox[0].astype(int), bbox[1].astype(int), bbox[2].astype(int), bbox[3].astype(int), NUM_CLASS[int(bbox[5])]])#, bbox[4], NUM_CLASS[int(bbox[5])]])
 22 | 
 23 |     if not os.path.exists(path):
 24 |         os.makedirs(path)
 25 |     os.chdir(path)
 26 | 
 27 |     img_name = "XML_"+file_name+".png"
 28 |     
 29 |     cv2.imwrite(img_name,image)
 30 | 
 31 |     annotation = etree.Element("annotation")
 32 | 
 33 |     folder = etree.Element("folder")
 34 |     folder.text = os.path.basename(os.getcwd())
 35 |     annotation.append(folder)
 36 | 
 37 |     filename_xml = etree.Element("filename")
 38 |     filename_str = img_name.split(".")[0]
 39 |     filename_xml.text = img_name
 40 |     annotation.append(filename_xml)
 41 | 
 42 |     path = etree.Element("path")
 43 |     path.text = os.path.join(os.getcwd(), filename_str + ".jpg")
 44 |     annotation.append(path)
 45 | 
 46 |     source = etree.Element("source")
 47 |     annotation.append(source)
 48 | 
 49 |     database = etree.Element("database")
 50 |     database.text = "Unknown"
 51 |     source.append(database)
 52 | 
 53 |     size = etree.Element("size")
 54 |     annotation.append(size)
 55 | 
 56 |     width = etree.Element("width")
 57 |     height = etree.Element("height")
 58 |     depth = etree.Element("depth")
 59 | 
 60 |     img = cv2.imread(filename_xml.text)
 61 | 
 62 |     width.text = str(img.shape[1])
 63 |     height.text = str(img.shape[0])
 64 |     depth.text = str(img.shape[2])
 65 | 
 66 |     size.append(width)
 67 |     size.append(height)
 68 |     size.append(depth)
 69 | 
 70 |     segmented = etree.Element("segmented")
 71 |     segmented.text = "0"
 72 |     annotation.append(segmented)
 73 | 
 74 |     for Object in boxes:
 75 |         class_name = Object[4]
 76 |         xmin_l = str(int(float(Object[0])))
 77 |         ymin_l = str(int(float(Object[1])))
 78 |         xmax_l = str(int(float(Object[2])))
 79 |         ymax_l = str(int(float(Object[3])))
 80 | 
 81 |         obj = etree.Element("object")
 82 |         annotation.append(obj)
 83 | 
 84 |         name = etree.Element("name")
 85 |         name.text = class_name
 86 |         obj.append(name)
 87 | 
 88 |         pose = etree.Element("pose")
 89 |         pose.text = "Unspecified"
 90 |         obj.append(pose)
 91 | 
 92 |         truncated = etree.Element("truncated")
 93 |         truncated.text = "0"
 94 |         obj.append(truncated)
 95 | 
 96 |         difficult = etree.Element("difficult")
 97 |         difficult.text = "0"
 98 |         obj.append(difficult)
 99 | 
100 |         bndbox = etree.Element("bndbox")
101 |         obj.append(bndbox)
102 | 
103 |         xmin = etree.Element("xmin")
104 |         xmin.text = xmin_l
105 |         bndbox.append(xmin)
106 | 
107 |         ymin = etree.Element("ymin")
108 |         ymin.text = ymin_l
109 |         bndbox.append(ymin)
110 | 
111 |         xmax = etree.Element("xmax")
112 |         xmax.text = xmax_l
113 |         bndbox.append(xmax)
114 | 
115 |         ymax = etree.Element("ymax")
116 |         ymax.text = ymax_l
117 |         bndbox.append(ymax)
118 | 
119 |     # write xml to file
120 |     s = etree.tostring(annotation, pretty_print=True)
121 |     with open(filename_str + ".xml", 'wb') as f:
122 |         f.write(s)
123 |         f.close()
124 | 
125 |     os.chdir("..")
126 | 


--------------------------------------------------------------------------------
/tools/XML_to_YOLOv3.py:
--------------------------------------------------------------------------------
 1 | #================================================================
 2 | #
 3 | #   File name   : XML_to_YOLOv3.py
 4 | #   Author      : PyLessons
 5 | #   Created date: 2020-06-04
 6 | #   Website     : https://pylessons.com/
 7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
 8 | #   Description : used to convert XML labels to YOLOv3 training labels
 9 | #
10 | #================================================================
11 | import xml.etree.ElementTree as ET
12 | import os
13 | import glob
14 | 
15 | foldername = os.path.basename(os.getcwd())
16 | if foldername == "tools": os.chdir("..")
17 | 
18 | 
19 | data_dir = '/custom_dataset/'
20 | Dataset_names_path = "model_data/license_plate_names.txt"
21 | Dataset_train = "model_data/license_plate_train.txt"
22 | Dataset_test = "model_data/license_plate_test.txt"
23 | is_subfolder = False
24 | 
25 | Dataset_names = []
26 |       
27 | def ParseXML(img_folder, file):
28 |     for xml_file in glob.glob(img_folder+'/*.xml'):
29 |         tree=ET.parse(open(xml_file))
30 |         root = tree.getroot()
31 |         image_name = root.find('filename').text
32 |         img_path = img_folder+'/'+image_name
33 |         for i, obj in enumerate(root.iter('object')):
34 |             difficult = obj.find('difficult').text
35 |             cls = obj.find('name').text
36 |             if cls not in Dataset_names:
37 |                 Dataset_names.append(cls)
38 |             cls_id = Dataset_names.index(cls)
39 |             xmlbox = obj.find('bndbox')
40 |             OBJECT = (str(int(float(xmlbox.find('xmin').text)))+','
41 |                       +str(int(float(xmlbox.find('ymin').text)))+','
42 |                       +str(int(float(xmlbox.find('xmax').text)))+','
43 |                       +str(int(float(xmlbox.find('ymax').text)))+','
44 |                       +str(cls_id))
45 |             img_path += ' '+OBJECT
46 |         print(img_path)
47 |         file.write(img_path+'\n')
48 | 
49 | def run_XML_to_YOLOv3():
50 |     for i, folder in enumerate(['train','test']):
51 |         with open([Dataset_train,Dataset_test][i], "w") as file:
52 |             print(os.getcwd()+data_dir+folder)
53 |             img_path = os.path.join(os.getcwd()+data_dir+folder)
54 |             if is_subfolder:
55 |                 for directory in os.listdir(img_path):
56 |                     xml_path = os.path.join(img_path, directory)
57 |                     ParseXML(xml_path, file)
58 |             else:
59 |                 ParseXML(img_path, file)
60 | 
61 |     print("Dataset_names:", Dataset_names)
62 |     with open(Dataset_names_path, "w") as file:
63 |         for name in Dataset_names:
64 |             file.write(str(name)+'\n')
65 | 
66 | run_XML_to_YOLOv3()
67 | 


--------------------------------------------------------------------------------
/tools/oid_to_pascal_voc_xml.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : oid_to_pascal_vos_xml.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-06-04
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : used to convert oid labels to pascal vos xml
  9 | #
 10 | #================================================================
 11 | import os
 12 | from tqdm import tqdm
 13 | from sys import exit
 14 | import argparse
 15 | import cv2
 16 | from textwrap import dedent
 17 | from lxml import etree
 18 | 
 19 | foldername = os.path.basename(os.getcwd())
 20 | if foldername == "tools": os.chdir("..")
 21 | 
 22 | Dataset_path = "OIDv4_ToolKit/OID/Dataset"
 23 | 
 24 | def convert_to_xml():
 25 |     current_path = os.getcwd()
 26 |     os.chdir(Dataset_path)
 27 |     DIRS = os.listdir(os.getcwd())
 28 | 
 29 |     for DIR in DIRS:
 30 |         if os.path.isdir(DIR):
 31 |             os.chdir(DIR)
 32 | 
 33 |             print("Currently in Subdirectory:", DIR)
 34 |             CLASS_DIRS = os.listdir(os.getcwd()) 
 35 |             for CLASS_DIR in CLASS_DIRS:
 36 |                 if " " in CLASS_DIR:
 37 |                     os.rename(CLASS_DIR, CLASS_DIR.replace(" ", "_"))
 38 |             
 39 |             CLASS_DIRS = os.listdir(os.getcwd())
 40 |             for CLASS_DIR in CLASS_DIRS:
 41 |                 if os.path.isdir(CLASS_DIR):
 42 |                     os.chdir(CLASS_DIR)
 43 | 
 44 |                     print("\n" + "Creating PASCAL VOC XML Files for Class:", CLASS_DIR)
 45 |                     # Create Directory for annotations if it does not exist yet
 46 | 
 47 |                     #Read Labels from OIDv4 ToolKit
 48 |                     os.chdir("Label")
 49 | 
 50 |                     #Create PASCAL XML
 51 |                     for filename in tqdm(os.listdir(os.getcwd())):
 52 |                         if filename.endswith(".txt"):
 53 |                             filename_str = str.split(filename, ".")[0]
 54 | 
 55 | 
 56 |                             annotation = etree.Element("annotation")
 57 |                             
 58 |                             os.chdir("..")
 59 |                             folder = etree.Element("folder")
 60 |                             folder.text = os.path.basename(os.getcwd())
 61 |                             annotation.append(folder)
 62 | 
 63 |                             filename_xml = etree.Element("filename")
 64 |                             filename_xml.text = filename_str + ".jpg"
 65 |                             annotation.append(filename_xml)
 66 | 
 67 |                             path = etree.Element("path")
 68 |                             path.text = os.path.join(os.path.dirname(os.path.abspath(filename)), filename_str + ".jpg")
 69 |                             annotation.append(path)
 70 | 
 71 |                             source = etree.Element("source")
 72 |                             annotation.append(source)
 73 | 
 74 |                             database = etree.Element("database")
 75 |                             database.text = "Unknown"
 76 |                             source.append(database)
 77 | 
 78 |                             size = etree.Element("size")
 79 |                             annotation.append(size)
 80 | 
 81 |                             width = etree.Element("width")
 82 |                             height = etree.Element("height")
 83 |                             depth = etree.Element("depth")
 84 | 
 85 |                             img = cv2.imread(filename_xml.text)
 86 | 
 87 |                             try:
 88 |                                 width.text = str(img.shape[1])
 89 |                             except AttributeError:
 90 |                                 os.chdir("Label")
 91 |                                 continue
 92 |                             height.text = str(img.shape[0])
 93 |                             depth.text = str(img.shape[2])
 94 | 
 95 |                             size.append(width)
 96 |                             size.append(height)
 97 |                             size.append(depth)
 98 | 
 99 |                             segmented = etree.Element("segmented")
100 |                             segmented.text = "0"
101 |                             annotation.append(segmented)
102 | 
103 |                             os.chdir("Label")
104 |                             label_original = open(filename, 'r')
105 | 
106 |                             # Labels from OIDv4 Toolkit: name_of_class X_min Y_min X_max Y_max
107 |                             for line in label_original:
108 |                                 line = line.strip()
109 |                                 l = line.split(' ')
110 |                                 
111 |                                 class_name_len = len(l) - 4 # 4 coordinates
112 |                                 class_name = l[0]
113 |                                 for i in range(1,class_name_len):
114 |                                     class_name = f"{class_name}_{l[i]}"
115 | 
116 |                                 addi = class_name_len
117 | 
118 |                                 xmin_l = str(int(round(float(l[0+addi]))))
119 |                                 ymin_l = str(int(round(float(l[1+addi]))))
120 |                                 xmax_l = str(int(round(float(l[2+addi]))))
121 |                                 ymax_l = str(int(round(float(l[3+addi]))))
122 |                                 
123 |                                 obj = etree.Element("object")
124 |                                 annotation.append(obj)
125 | 
126 |                                 name = etree.Element("name")
127 |                                 name.text = class_name
128 |                                 obj.append(name)
129 | 
130 |                                 pose = etree.Element("pose")
131 |                                 pose.text = "Unspecified"
132 |                                 obj.append(pose)
133 | 
134 |                                 truncated = etree.Element("truncated")
135 |                                 truncated.text = "0"
136 |                                 obj.append(truncated)
137 | 
138 |                                 difficult = etree.Element("difficult")
139 |                                 difficult.text = "0"
140 |                                 obj.append(difficult)
141 | 
142 |                                 bndbox = etree.Element("bndbox")
143 |                                 obj.append(bndbox)
144 | 
145 |                                 xmin = etree.Element("xmin")
146 |                                 xmin.text = xmin_l
147 |                                 bndbox.append(xmin)
148 | 
149 |                                 ymin = etree.Element("ymin")
150 |                                 ymin.text = ymin_l
151 |                                 bndbox.append(ymin)
152 | 
153 |                                 xmax = etree.Element("xmax")
154 |                                 xmax.text = xmax_l
155 |                                 bndbox.append(xmax)
156 | 
157 |                                 ymax = etree.Element("ymax")
158 |                                 ymax.text = ymax_l
159 |                                 bndbox.append(ymax)
160 | 
161 |                             os.chdir("..")
162 |                             # write xml to file
163 |                             s = etree.tostring(annotation, pretty_print=True)
164 |                             with open(filename_str + ".xml", 'wb') as f:
165 |                                 f.write(s)
166 |                                 f.close()
167 | 
168 |                             os.chdir("Label")
169 | 
170 |                     os.chdir("..")
171 |                     os.chdir("..")   
172 |                        
173 |             os.chdir("..")
174 | 
175 | convert_to_xml()
176 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : train.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-08-06
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : used to train custom object detector
  9 | #
 10 | #================================================================
 11 | import os
 12 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 13 | os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
 14 | from tensorflow.python.client import device_lib
 15 | print(device_lib.list_local_devices())
 16 | import shutil
 17 | import numpy as np
 18 | import tensorflow as tf
 19 | #from tensorflow.keras.utils import plot_model
 20 | from yolov3.dataset import Dataset
 21 | from yolov3.yolov4 import Create_Yolo, compute_loss
 22 | from yolov3.utils import load_yolo_weights
 23 | from yolov3.configs import *
 24 | from evaluate_mAP import get_mAP
 25 |     
 26 | if YOLO_TYPE == "yolov4":
 27 |     Darknet_weights = YOLO_V4_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V4_WEIGHTS
 28 | if YOLO_TYPE == "yolov3":
 29 |     Darknet_weights = YOLO_V3_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V3_WEIGHTS
 30 | if TRAIN_YOLO_TINY: TRAIN_MODEL_NAME += "_Tiny"
 31 | 
 32 | def main():
 33 |     global TRAIN_FROM_CHECKPOINT
 34 |     
 35 |     gpus = tf.config.experimental.list_physical_devices('GPU')
 36 |     print(f'GPUs {gpus}')
 37 |     if len(gpus) > 0:
 38 |         try: tf.config.experimental.set_memory_growth(gpus[0], True)
 39 |         except RuntimeError: pass
 40 | 
 41 |     if os.path.exists(TRAIN_LOGDIR): shutil.rmtree(TRAIN_LOGDIR)
 42 |     writer = tf.summary.create_file_writer(TRAIN_LOGDIR)
 43 | 
 44 |     trainset = Dataset('train')
 45 |     testset = Dataset('test')
 46 | 
 47 |     steps_per_epoch = len(trainset)
 48 |     global_steps = tf.Variable(1, trainable=False, dtype=tf.int64)
 49 |     warmup_steps = TRAIN_WARMUP_EPOCHS * steps_per_epoch
 50 |     total_steps = TRAIN_EPOCHS * steps_per_epoch
 51 | 
 52 |     if TRAIN_TRANSFER:
 53 |         Darknet = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES)
 54 |         load_yolo_weights(Darknet, Darknet_weights) # use darknet weights
 55 | 
 56 |     yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, training=True, CLASSES=TRAIN_CLASSES)
 57 |     if TRAIN_FROM_CHECKPOINT:
 58 |         try:
 59 |             yolo.load_weights(f"./checkpoints/{TRAIN_MODEL_NAME}")
 60 |         except ValueError:
 61 |             print("Shapes are incompatible, transfering Darknet weights")
 62 |             TRAIN_FROM_CHECKPOINT = False
 63 | 
 64 |     if TRAIN_TRANSFER and not TRAIN_FROM_CHECKPOINT:
 65 |         for i, l in enumerate(Darknet.layers):
 66 |             layer_weights = l.get_weights()
 67 |             if layer_weights != []:
 68 |                 try:
 69 |                     yolo.layers[i].set_weights(layer_weights)
 70 |                 except:
 71 |                     print("skipping", yolo.layers[i].name)
 72 |     
 73 |     optimizer = tf.keras.optimizers.Adam()
 74 | 
 75 | 
 76 |     def train_step(image_data, target):
 77 |         with tf.GradientTape() as tape:
 78 |             pred_result = yolo(image_data, training=True)
 79 |             giou_loss=conf_loss=prob_loss=0
 80 | 
 81 |             # optimizing process
 82 |             grid = 3 if not TRAIN_YOLO_TINY else 2
 83 |             for i in range(grid):
 84 |                 conv, pred = pred_result[i*2], pred_result[i*2+1]
 85 |                 loss_items = compute_loss(pred, conv, *target[i], i, CLASSES=TRAIN_CLASSES)
 86 |                 giou_loss += loss_items[0]
 87 |                 conf_loss += loss_items[1]
 88 |                 prob_loss += loss_items[2]
 89 | 
 90 |             total_loss = giou_loss + conf_loss + prob_loss
 91 | 
 92 |             gradients = tape.gradient(total_loss, yolo.trainable_variables)
 93 |             optimizer.apply_gradients(zip(gradients, yolo.trainable_variables))
 94 | 
 95 |             # update learning rate
 96 |             # about warmup: https://arxiv.org/pdf/1812.01187.pdf&usg=ALkJrhglKOPDjNt6SHGbphTHyMcT0cuMJg
 97 |             global_steps.assign_add(1)
 98 |             if global_steps < warmup_steps:# and not TRAIN_TRANSFER:
 99 |                 lr = global_steps / warmup_steps * TRAIN_LR_INIT
100 |             else:
101 |                 lr = TRAIN_LR_END + 0.5 * (TRAIN_LR_INIT - TRAIN_LR_END)*(
102 |                     (1 + tf.cos((global_steps - warmup_steps) / (total_steps - warmup_steps) * np.pi)))
103 |             optimizer.lr.assign(lr.numpy())
104 | 
105 |             # writing summary data
106 |             with writer.as_default():
107 |                 tf.summary.scalar("lr", optimizer.lr, step=global_steps)
108 |                 tf.summary.scalar("loss/total_loss", total_loss, step=global_steps)
109 |                 tf.summary.scalar("loss/giou_loss", giou_loss, step=global_steps)
110 |                 tf.summary.scalar("loss/conf_loss", conf_loss, step=global_steps)
111 |                 tf.summary.scalar("loss/prob_loss", prob_loss, step=global_steps)
112 |             writer.flush()
113 |             
114 |         return global_steps.numpy(), optimizer.lr.numpy(), giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy()
115 | 
116 |     validate_writer = tf.summary.create_file_writer(TRAIN_LOGDIR)
117 |     def validate_step(image_data, target):
118 |         with tf.GradientTape() as tape:
119 |             pred_result = yolo(image_data, training=False)
120 |             giou_loss=conf_loss=prob_loss=0
121 | 
122 |             # optimizing process
123 |             grid = 3 if not TRAIN_YOLO_TINY else 2
124 |             for i in range(grid):
125 |                 conv, pred = pred_result[i*2], pred_result[i*2+1]
126 |                 loss_items = compute_loss(pred, conv, *target[i], i, CLASSES=TRAIN_CLASSES)
127 |                 giou_loss += loss_items[0]
128 |                 conf_loss += loss_items[1]
129 |                 prob_loss += loss_items[2]
130 | 
131 |             total_loss = giou_loss + conf_loss + prob_loss
132 |             
133 |         return giou_loss.numpy(), conf_loss.numpy(), prob_loss.numpy(), total_loss.numpy()
134 | 
135 |     mAP_model = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES) # create second model to measure mAP
136 | 
137 |     best_val_loss = 1000 # should be large at start
138 |     for epoch in range(TRAIN_EPOCHS):
139 |         for image_data, target in trainset:
140 |             results = train_step(image_data, target)
141 |             cur_step = results[0]%steps_per_epoch
142 |             print("epoch:{:2.0f} step:{:5.0f}/{}, lr:{:.6f}, giou_loss:{:7.2f}, conf_loss:{:7.2f}, prob_loss:{:7.2f}, total_loss:{:7.2f}"
143 |                   .format(epoch, cur_step, steps_per_epoch, results[1], results[2], results[3], results[4], results[5]))
144 | 
145 |         if len(testset) == 0:
146 |             print("configure TEST options to validate model")
147 |             yolo.save_weights(os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME))
148 |             continue
149 |         
150 |         count, giou_val, conf_val, prob_val, total_val = 0., 0, 0, 0, 0
151 |         for image_data, target in testset:
152 |             results = validate_step(image_data, target)
153 |             count += 1
154 |             giou_val += results[0]
155 |             conf_val += results[1]
156 |             prob_val += results[2]
157 |             total_val += results[3]
158 |         # writing validate summary data
159 |         with validate_writer.as_default():
160 |             tf.summary.scalar("validate_loss/total_val", total_val/count, step=epoch)
161 |             tf.summary.scalar("validate_loss/giou_val", giou_val/count, step=epoch)
162 |             tf.summary.scalar("validate_loss/conf_val", conf_val/count, step=epoch)
163 |             tf.summary.scalar("validate_loss/prob_val", prob_val/count, step=epoch)
164 |         validate_writer.flush()
165 |             
166 |         print("\n\ngiou_val_loss:{:7.2f}, conf_val_loss:{:7.2f}, prob_val_loss:{:7.2f}, total_val_loss:{:7.2f}\n\n".
167 |               format(giou_val/count, conf_val/count, prob_val/count, total_val/count))
168 | 
169 |         if TRAIN_SAVE_CHECKPOINT and not TRAIN_SAVE_BEST_ONLY:
170 |             save_directory = os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME+"_val_loss_{:7.2f}".format(total_val/count))
171 |             yolo.save_weights(save_directory)
172 |         if TRAIN_SAVE_BEST_ONLY and best_val_loss>total_val/count:
173 |             save_directory = os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME)
174 |             yolo.save_weights(save_directory)
175 |             best_val_loss = total_val/count
176 |         if not TRAIN_SAVE_BEST_ONLY and not TRAIN_SAVE_CHECKPOINT:
177 |             save_directory = os.path.join(TRAIN_CHECKPOINTS_FOLDER, TRAIN_MODEL_NAME)
178 |             yolo.save_weights(save_directory)
179 | 
180 |     # measure mAP of trained custom model
181 |     try:
182 |         mAP_model.load_weights(save_directory) # use keras weights
183 |         get_mAP(mAP_model, testset, score_threshold=TEST_SCORE_THRESHOLD, iou_threshold=TEST_IOU_THRESHOLD)
184 |     except UnboundLocalError:
185 |         print("You don't have saved model weights to measure mAP, check TRAIN_SAVE_BEST_ONLY and TRAIN_SAVE_CHECKPOINT lines in configs.py")
186 |         
187 | if __name__ == '__main__':
188 |     main()
189 | 


--------------------------------------------------------------------------------
/yolov3/__ init __.py:
--------------------------------------------------------------------------------
1 | #
2 | 


--------------------------------------------------------------------------------
/yolov3/__pycache__/configs.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/yolov3/__pycache__/configs.cpython-36.pyc


--------------------------------------------------------------------------------
/yolov3/__pycache__/dataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/yolov3/__pycache__/dataset.cpython-36.pyc


--------------------------------------------------------------------------------
/yolov3/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/yolov3/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/yolov3/__pycache__/yolov3.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pythonlessons/TensorFlow-2.x-YOLOv3/9f29d73ee24cd5db4ead280f95ff06f66d538fc2/yolov3/__pycache__/yolov3.cpython-36.pyc


--------------------------------------------------------------------------------
/yolov3/configs.py:
--------------------------------------------------------------------------------
 1 | #================================================================
 2 | #
 3 | #   File name   : configs.py
 4 | #   Author      : PyLessons
 5 | #   Created date: 2020-08-18
 6 | #   Website     : https://pylessons.com/
 7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
 8 | #   Description : yolov3 configuration file
 9 | #
10 | #================================================================
11 | 
12 | # YOLO options
13 | YOLO_TYPE                   = "yolov3" # yolov4 or yolov3
14 | YOLO_FRAMEWORK              = "tf" # "tf" or "trt"
15 | YOLO_V3_WEIGHTS             = "model_data/yolov3.weights"
16 | YOLO_V4_WEIGHTS             = "model_data/yolov4.weights"
17 | YOLO_V3_TINY_WEIGHTS        = "model_data/yolov3-tiny.weights"
18 | YOLO_V4_TINY_WEIGHTS        = "model_data/yolov4-tiny.weights"
19 | YOLO_TRT_QUANTIZE_MODE      = "INT8" # INT8, FP16, FP32
20 | YOLO_CUSTOM_WEIGHTS         = False # "checkpoints/yolov3_custom" # used in evaluate_mAP.py and custom model detection, if not using leave False
21 |                             # YOLO_CUSTOM_WEIGHTS also used with TensorRT and custom model detection
22 | YOLO_COCO_CLASSES           = "model_data/coco/coco.names"
23 | YOLO_STRIDES                = [8, 16, 32]
24 | YOLO_IOU_LOSS_THRESH        = 0.5
25 | YOLO_ANCHOR_PER_SCALE       = 3
26 | YOLO_MAX_BBOX_PER_SCALE     = 100
27 | YOLO_INPUT_SIZE             = 416
28 | if YOLO_TYPE                == "yolov4":
29 |     YOLO_ANCHORS            = [[[12,  16], [19,   36], [40,   28]],
30 |                                [[36,  75], [76,   55], [72,  146]],
31 |                                [[142,110], [192, 243], [459, 401]]]
32 | if YOLO_TYPE                == "yolov3":
33 |     YOLO_ANCHORS            = [[[10,  13], [16,   30], [33,   23]],
34 |                                [[30,  61], [62,   45], [59,  119]],
35 |                                [[116, 90], [156, 198], [373, 326]]]
36 | # Train options
37 | TRAIN_YOLO_TINY             = False
38 | TRAIN_SAVE_BEST_ONLY        = True # saves only best model according validation loss (True recommended)
39 | TRAIN_SAVE_CHECKPOINT       = False # saves all best validated checkpoints in training process (may require a lot disk space) (False recommended)
40 | TRAIN_CLASSES               = "mnist/mnist.names"
41 | TRAIN_ANNOT_PATH            = "mnist/mnist_train.txt"
42 | TRAIN_LOGDIR                = "log"
43 | TRAIN_CHECKPOINTS_FOLDER    = "checkpoints"
44 | TRAIN_MODEL_NAME            = f"{YOLO_TYPE}_custom"
45 | TRAIN_LOAD_IMAGES_TO_RAM    = True # With True faster training, but need more RAM
46 | TRAIN_BATCH_SIZE            = 4
47 | TRAIN_INPUT_SIZE            = 416
48 | TRAIN_DATA_AUG              = True
49 | TRAIN_TRANSFER              = True
50 | TRAIN_FROM_CHECKPOINT       = False # "checkpoints/yolov3_custom"
51 | TRAIN_LR_INIT               = 1e-4
52 | TRAIN_LR_END                = 1e-6
53 | TRAIN_WARMUP_EPOCHS         = 2
54 | TRAIN_EPOCHS                = 100
55 | 
56 | # TEST options
57 | TEST_ANNOT_PATH             = "mnist/mnist_test.txt"
58 | TEST_BATCH_SIZE             = 4
59 | TEST_INPUT_SIZE             = 416
60 | TEST_DATA_AUG               = False
61 | TEST_DECTECTED_IMAGE_PATH   = ""
62 | TEST_SCORE_THRESHOLD        = 0.3
63 | TEST_IOU_THRESHOLD          = 0.45
64 | 
65 | if TRAIN_YOLO_TINY:
66 |     YOLO_STRIDES            = [16, 32]    
67 |     # YOLO_ANCHORS            = [[[23, 27],  [37, 58],   [81,  82]], # this line can be uncommented for default coco weights
68 |     YOLO_ANCHORS            = [[[10, 14],  [23, 27],   [37, 58]],
69 |                                [[81,  82], [135, 169], [344, 319]]]
70 | 


--------------------------------------------------------------------------------
/yolov3/dataset.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : dataset.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-07-31
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : functions used to prepare dataset for custom training
  9 | #
 10 | #================================================================
 11 | # TODO: transfer numpy to tensorflow operations
 12 | import os
 13 | import cv2
 14 | import random
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | from yolov3.utils import read_class_names, image_preprocess
 18 | from yolov3.yolov3 import bbox_iou
 19 | from yolov3.configs import *
 20 | 
 21 | 
 22 | class Dataset(object):
 23 |     # Dataset preprocess implementation
 24 |     def __init__(self, dataset_type, TEST_INPUT_SIZE=TEST_INPUT_SIZE):
 25 |         self.annot_path  = TRAIN_ANNOT_PATH if dataset_type == 'train' else TEST_ANNOT_PATH
 26 |         self.input_sizes = TRAIN_INPUT_SIZE if dataset_type == 'train' else TEST_INPUT_SIZE
 27 |         self.batch_size  = TRAIN_BATCH_SIZE if dataset_type == 'train' else TEST_BATCH_SIZE
 28 |         self.data_aug    = TRAIN_DATA_AUG   if dataset_type == 'train' else TEST_DATA_AUG
 29 | 
 30 |         self.train_yolo_tiny = TRAIN_YOLO_TINY
 31 |         self.train_input_sizes = TRAIN_INPUT_SIZE
 32 |         self.strides = np.array(YOLO_STRIDES)
 33 |         self.classes = read_class_names(TRAIN_CLASSES)
 34 |         self.num_classes = len(self.classes)
 35 |         self.anchors = (np.array(YOLO_ANCHORS).T/self.strides).T
 36 |         self.anchor_per_scale = YOLO_ANCHOR_PER_SCALE
 37 |         self.max_bbox_per_scale = YOLO_MAX_BBOX_PER_SCALE
 38 | 
 39 |         self.annotations = self.load_annotations(dataset_type)
 40 |         self.num_samples = len(self.annotations)
 41 |         self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
 42 |         self.batch_count = 0
 43 | 
 44 | 
 45 |     def load_annotations(self, dataset_type):
 46 |         final_annotations = []
 47 |         with open(self.annot_path, 'r') as f:
 48 |             txt = f.read().splitlines()
 49 |             annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0]
 50 |         np.random.shuffle(annotations)
 51 | 
 52 |         # for annotation in annotations:
 53 |         #     image_extension = '.jpg'
 54 |         #     extension_index = annotation.find(image_extension)
 55 |         #     image_path = annotation[:extension_index+len(image_extension)]
 56 |         #     line = annotation[extension_index+len(image_extension):].split()
 57 |         #     if not os.path.exists(image_path):
 58 |         #         raise KeyError("%s does not exist ... " %image_path)
 59 |         #     if TRAIN_LOAD_IMAGES_TO_RAM:
 60 |         #         image = cv2.imread(image_path)
 61 |         #     else:
 62 |         #         image = ''
 63 |         #     final_annotations.append([image_path, line, image])
 64 |         # return final_annotations
 65 |         for annotation in annotations:
 66 |             # fully parse annotations
 67 |             line = annotation.split()
 68 |             image_path, index = "", 1
 69 |             for i, one_line in enumerate(line):
 70 |                 if not one_line.replace(",","").isnumeric():
 71 |                     if image_path != "": image_path += " "
 72 |                     image_path += one_line
 73 |                 else:
 74 |                     index = i
 75 |                     break
 76 |             if not os.path.exists(image_path):
 77 |                 raise KeyError("%s does not exist ... " %image_path)
 78 |             if TRAIN_LOAD_IMAGES_TO_RAM:
 79 |                 image = cv2.imread(image_path)
 80 |             else:
 81 |                 image = ''
 82 |             final_annotations.append([image_path, line[index:], image])
 83 |         return final_annotations
 84 | 
 85 |     def __iter__(self):
 86 |         return self
 87 | 
 88 |     def Delete_bad_annotation(self, bad_annotation):
 89 |         print(f'Deleting {bad_annotation} annotation line')
 90 |         bad_image_path = bad_annotation[0]
 91 |         bad_image_name = bad_annotation[0].split('/')[-1] # can be used to delete bad image
 92 |         bad_xml_path = bad_annotation[0][:-3]+'xml' # can be used to delete bad xml file
 93 | 
 94 |         # remove bad annotation line from annotation file
 95 |         with open(self.annot_path, "r+") as f:
 96 |             d = f.readlines()
 97 |             f.seek(0)
 98 |             for i in d:
 99 |                 if bad_image_name not in i:
100 |                     f.write(i)
101 |             f.truncate()
102 | 
103 |     def __next__(self):
104 |         with tf.device('/cpu:0'):
105 |             self.train_input_size = random.choice([self.train_input_sizes])
106 |             self.train_output_sizes = self.train_input_size // self.strides
107 | 
108 |             batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32)
109 | 
110 |             if self.train_yolo_tiny:
111 |                 batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
112 |                 batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
113 |             else:
114 |                 batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
115 |                 batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
116 |                 batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2], self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)
117 | 
118 |                 batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
119 | 
120 |             batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
121 |             batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)
122 | 
123 |             exceptions = False
124 |             num = 0
125 |             if self.batch_count < self.num_batchs:
126 |                 while num < self.batch_size:
127 |                     index = self.batch_count * self.batch_size + num
128 |                     if index >= self.num_samples: index -= self.num_samples
129 |                     annotation = self.annotations[index]
130 |                     image, bboxes = self.parse_annotation(annotation)
131 |                     try:
132 |                         if self.train_yolo_tiny:
133 |                             label_mbbox, label_lbbox, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes)
134 |                         else:
135 |                             label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes)
136 |                     except IndexError:
137 |                         exceptions = True
138 |                         self.Delete_bad_annotation(annotation)
139 |                         print("IndexError, something wrong with", annotation[0], "removed this line from annotation file")
140 | 
141 |                     batch_image[num, :, :, :] = image
142 |                     batch_label_mbbox[num, :, :, :, :] = label_mbbox
143 |                     batch_label_lbbox[num, :, :, :, :] = label_lbbox
144 |                     batch_mbboxes[num, :, :] = mbboxes
145 |                     batch_lbboxes[num, :, :] = lbboxes
146 |                     if not self.train_yolo_tiny:
147 |                         batch_label_sbbox[num, :, :, :, :] = label_sbbox
148 |                         batch_sbboxes[num, :, :] = sbboxes
149 | 
150 |                     num += 1
151 | 
152 |                 if exceptions:
153 |                     print('\n')
154 |                     raise Exception("There were problems with dataset, I fixed them, now restart the training process.")
155 |                 self.batch_count += 1
156 |                 if not self.train_yolo_tiny:
157 |                     batch_smaller_target = batch_label_sbbox, batch_sbboxes
158 |                 batch_medium_target  = batch_label_mbbox, batch_mbboxes
159 |                 batch_larger_target  = batch_label_lbbox, batch_lbboxes
160 | 
161 |                 if self.train_yolo_tiny:
162 |                     return batch_image, (batch_medium_target, batch_larger_target)
163 |                 return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target)
164 |             else:
165 |                 self.batch_count = 0
166 |                 np.random.shuffle(self.annotations)
167 |                 raise StopIteration
168 | 
169 |     def random_horizontal_flip(self, image, bboxes):
170 |         if random.random() < 0.5:
171 |             _, w, _ = image.shape
172 |             image = image[:, ::-1, :]
173 |             bboxes[:, [0,2]] = w - bboxes[:, [2,0]]
174 | 
175 |         return image, bboxes
176 | 
177 |     def random_crop(self, image, bboxes):
178 |         if random.random() < 0.5:
179 |             h, w, _ = image.shape
180 |             max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
181 | 
182 |             max_l_trans = max_bbox[0]
183 |             max_u_trans = max_bbox[1]
184 |             max_r_trans = w - max_bbox[2]
185 |             max_d_trans = h - max_bbox[3]
186 | 
187 |             crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans)))
188 |             crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans)))
189 |             crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans)))
190 |             crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans)))
191 | 
192 |             image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax]
193 | 
194 |             bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
195 |             bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin
196 | 
197 |         return image, bboxes
198 | 
199 |     def random_translate(self, image, bboxes):
200 |         if random.random() < 0.5:
201 |             h, w, _ = image.shape
202 |             max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
203 | 
204 |             max_l_trans = max_bbox[0]
205 |             max_u_trans = max_bbox[1]
206 |             max_r_trans = w - max_bbox[2]
207 |             max_d_trans = h - max_bbox[3]
208 | 
209 |             tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
210 |             ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))
211 | 
212 |             M = np.array([[1, 0, tx], [0, 1, ty]])
213 |             image = cv2.warpAffine(image, M, (w, h))
214 | 
215 |             bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
216 |             bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty
217 | 
218 |         return image, bboxes
219 | 
220 |     def parse_annotation(self, annotation, mAP = 'False'):
221 |         if TRAIN_LOAD_IMAGES_TO_RAM:
222 |             image_path = annotation[0]
223 |             image = annotation[2]
224 |         else:
225 |             image_path = annotation[0]
226 |             image = cv2.imread(image_path)
227 | 
228 |         bboxes = np.array([list(map(int, box.split(','))) for box in annotation[1]])
229 | 
230 |         if self.data_aug:
231 |             image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes))
232 |             image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))
233 |             image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes))
234 | 
235 |         #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
236 |         if mAP == True:
237 |             return image, bboxes
238 | 
239 |         image, bboxes = image_preprocess(np.copy(image), [self.input_sizes, self.input_sizes], np.copy(bboxes))
240 |         return image, bboxes
241 | 
242 |     def preprocess_true_boxes(self, bboxes):
243 |         OUTPUT_LEVELS = len(self.strides)
244 | 
245 |         label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale,
246 |                            5 + self.num_classes)) for i in range(OUTPUT_LEVELS)]
247 |         bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(OUTPUT_LEVELS)]
248 |         bbox_count = np.zeros((OUTPUT_LEVELS,))
249 | 
250 |         for bbox in bboxes:
251 |             bbox_coor = bbox[:4]
252 |             bbox_class_ind = bbox[4]
253 | 
254 |             onehot = np.zeros(self.num_classes, dtype=np.float)
255 |             onehot[bbox_class_ind] = 1.0
256 |             uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes)
257 |             deta = 0.01
258 |             smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution
259 | 
260 |             bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
261 |             bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
262 | 
263 |             iou = []
264 |             exist_positive = False
265 |             for i in range(OUTPUT_LEVELS):#range(3):
266 |                 anchors_xywh = np.zeros((self.anchor_per_scale, 4))
267 |                 anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5
268 |                 anchors_xywh[:, 2:4] = self.anchors[i]
269 | 
270 |                 iou_scale = bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
271 |                 iou.append(iou_scale)
272 |                 iou_mask = iou_scale > 0.3
273 | 
274 |                 if np.any(iou_mask):
275 |                     xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)
276 | 
277 |                     label[i][yind, xind, iou_mask, :] = 0
278 |                     label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
279 |                     label[i][yind, xind, iou_mask, 4:5] = 1.0
280 |                     label[i][yind, xind, iou_mask, 5:] = smooth_onehot
281 | 
282 |                     bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
283 |                     bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
284 |                     bbox_count[i] += 1
285 | 
286 |                     exist_positive = True
287 | 
288 |             if not exist_positive:
289 |                 best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)
290 |                 best_detect = int(best_anchor_ind / self.anchor_per_scale)
291 |                 best_anchor = int(best_anchor_ind % self.anchor_per_scale)
292 |                 xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)
293 | 
294 |                 label[best_detect][yind, xind, best_anchor, :] = 0
295 |                 label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
296 |                 label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
297 |                 label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot
298 | 
299 |                 bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale)
300 |                 bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
301 |                 bbox_count[best_detect] += 1
302 | 
303 |         if self.train_yolo_tiny:
304 |             label_mbbox, label_lbbox = label
305 |             mbboxes, lbboxes = bboxes_xywh
306 |             return label_mbbox, label_lbbox, mbboxes, lbboxes
307 | 
308 |         label_sbbox, label_mbbox, label_lbbox = label
309 |         sbboxes, mbboxes, lbboxes = bboxes_xywh
310 |         return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
311 | 
312 |     def __len__(self):
313 |         return self.num_batchs
314 | 


--------------------------------------------------------------------------------
/yolov3/utils.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : utils.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-09-27
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : additional yolov3 and yolov4 functions
  9 | #
 10 | #================================================================
 11 | from multiprocessing import Process, Queue, Pipe
 12 | import cv2
 13 | import time
 14 | import random
 15 | import colorsys
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | from yolov3.configs import *
 19 | from yolov3.yolov4 import *
 20 | from tensorflow.python.saved_model import tag_constants
 21 | 
 22 | def load_yolo_weights(model, weights_file):
 23 |     tf.keras.backend.clear_session() # used to reset layer names
 24 |     # load Darknet original weights to TensorFlow model
 25 |     if YOLO_TYPE == "yolov3":
 26 |         range1 = 75 if not TRAIN_YOLO_TINY else 13
 27 |         range2 = [58, 66, 74] if not TRAIN_YOLO_TINY else [9, 12]
 28 |     if YOLO_TYPE == "yolov4":
 29 |         range1 = 110 if not TRAIN_YOLO_TINY else 21
 30 |         range2 = [93, 101, 109] if not TRAIN_YOLO_TINY else [17, 20]
 31 |     
 32 |     with open(weights_file, 'rb') as wf:
 33 |         major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
 34 | 
 35 |         j = 0
 36 |         for i in range(range1):
 37 |             if i > 0:
 38 |                 conv_layer_name = 'conv2d_%d' %i
 39 |             else:
 40 |                 conv_layer_name = 'conv2d'
 41 |                 
 42 |             if j > 0:
 43 |                 bn_layer_name = 'batch_normalization_%d' %j
 44 |             else:
 45 |                 bn_layer_name = 'batch_normalization'
 46 |             
 47 |             conv_layer = model.get_layer(conv_layer_name)
 48 |             filters = conv_layer.filters
 49 |             k_size = conv_layer.kernel_size[0]
 50 |             in_dim = conv_layer.input_shape[-1]
 51 | 
 52 |             if i not in range2:
 53 |                 # darknet weights: [beta, gamma, mean, variance]
 54 |                 bn_weights = np.fromfile(wf, dtype=np.float32, count=4 * filters)
 55 |                 # tf weights: [gamma, beta, mean, variance]
 56 |                 bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
 57 |                 bn_layer = model.get_layer(bn_layer_name)
 58 |                 j += 1
 59 |             else:
 60 |                 conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
 61 | 
 62 |             # darknet shape (out_dim, in_dim, height, width)
 63 |             conv_shape = (filters, in_dim, k_size, k_size)
 64 |             conv_weights = np.fromfile(wf, dtype=np.float32, count=np.product(conv_shape))
 65 |             # tf shape (height, width, in_dim, out_dim)
 66 |             conv_weights = conv_weights.reshape(conv_shape).transpose([2, 3, 1, 0])
 67 | 
 68 |             if i not in range2:
 69 |                 conv_layer.set_weights([conv_weights])
 70 |                 bn_layer.set_weights(bn_weights)
 71 |             else:
 72 |                 conv_layer.set_weights([conv_weights, conv_bias])
 73 | 
 74 |         assert len(wf.read()) == 0, 'failed to read all data'
 75 | 
 76 | def Load_Yolo_model():
 77 |     gpus = tf.config.experimental.list_physical_devices('GPU')
 78 |     if len(gpus) > 0:
 79 |         print(f'GPUs {gpus}')
 80 |         try: tf.config.experimental.set_memory_growth(gpus[0], True)
 81 |         except RuntimeError: pass
 82 |         
 83 |     if YOLO_FRAMEWORK == "tf": # TensorFlow detection
 84 |         if YOLO_TYPE == "yolov4":
 85 |             Darknet_weights = YOLO_V4_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V4_WEIGHTS
 86 |         if YOLO_TYPE == "yolov3":
 87 |             Darknet_weights = YOLO_V3_TINY_WEIGHTS if TRAIN_YOLO_TINY else YOLO_V3_WEIGHTS
 88 |             
 89 |         if YOLO_CUSTOM_WEIGHTS == False:
 90 |             print("Loading Darknet_weights from:", Darknet_weights)
 91 |             yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=YOLO_COCO_CLASSES)
 92 |             load_yolo_weights(yolo, Darknet_weights) # use Darknet weights
 93 |         else:
 94 |             checkpoint = f"./checkpoints/{TRAIN_MODEL_NAME}"
 95 |             if TRAIN_YOLO_TINY:
 96 |                 checkpoint += "_Tiny"
 97 |             print("Loading custom weights from:", checkpoint)
 98 |             yolo = Create_Yolo(input_size=YOLO_INPUT_SIZE, CLASSES=TRAIN_CLASSES)
 99 |             yolo.load_weights(checkpoint)  # use custom weights
100 |         
101 |     elif YOLO_FRAMEWORK == "trt": # TensorRT detection
102 |         saved_model_loaded = tf.saved_model.load(YOLO_CUSTOM_WEIGHTS, tags=[tag_constants.SERVING])
103 |         signature_keys = list(saved_model_loaded.signatures.keys())
104 |         yolo = saved_model_loaded.signatures['serving_default']
105 | 
106 |     return yolo
107 | 
108 | def image_preprocess(image, target_size, gt_boxes=None):
109 |     ih, iw    = target_size
110 |     h,  w, _  = image.shape
111 | 
112 |     scale = min(iw/w, ih/h)
113 |     nw, nh  = int(scale * w), int(scale * h)
114 |     image_resized = cv2.resize(image, (nw, nh))
115 | 
116 |     image_paded = np.full(shape=[ih, iw, 3], fill_value=128.0)
117 |     dw, dh = (iw - nw) // 2, (ih-nh) // 2
118 |     image_paded[dh:nh+dh, dw:nw+dw, :] = image_resized
119 |     image_paded = image_paded / 255.
120 | 
121 |     if gt_boxes is None:
122 |         return image_paded
123 | 
124 |     else:
125 |         gt_boxes[:, [0, 2]] = gt_boxes[:, [0, 2]] * scale + dw
126 |         gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
127 |         return image_paded, gt_boxes
128 | 
129 | 
130 | def draw_bbox(image, bboxes, CLASSES=YOLO_COCO_CLASSES, show_label=True, show_confidence = True, Text_colors=(255,255,0), rectangle_colors='', tracking=False):   
131 |     NUM_CLASS = read_class_names(CLASSES)
132 |     num_classes = len(NUM_CLASS)
133 |     image_h, image_w, _ = image.shape
134 |     hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
135 |     #print("hsv_tuples", hsv_tuples)
136 |     colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
137 |     colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
138 | 
139 |     random.seed(0)
140 |     random.shuffle(colors)
141 |     random.seed(None)
142 | 
143 |     for i, bbox in enumerate(bboxes):
144 |         coor = np.array(bbox[:4], dtype=np.int32)
145 |         score = bbox[4]
146 |         class_ind = int(bbox[5])
147 |         bbox_color = rectangle_colors if rectangle_colors != '' else colors[class_ind]
148 |         bbox_thick = int(0.6 * (image_h + image_w) / 1000)
149 |         if bbox_thick < 1: bbox_thick = 1
150 |         fontScale = 0.75 * bbox_thick
151 |         (x1, y1), (x2, y2) = (coor[0], coor[1]), (coor[2], coor[3])
152 | 
153 |         # put object rectangle
154 |         cv2.rectangle(image, (x1, y1), (x2, y2), bbox_color, bbox_thick*2)
155 | 
156 |         if show_label:
157 |             # get text label
158 |             score_str = " {:.2f}".format(score) if show_confidence else ""
159 | 
160 |             if tracking: score_str = " "+str(score)
161 | 
162 |             try:
163 |                 label = "{}".format(NUM_CLASS[class_ind]) + score_str
164 |             except KeyError:
165 |                 print("You received KeyError, this might be that you are trying to use yolo original weights")
166 |                 print("while using custom classes, if using custom model in configs.py set YOLO_CUSTOM_WEIGHTS = True")
167 | 
168 |             # get text size
169 |             (text_width, text_height), baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_COMPLEX_SMALL,
170 |                                                                   fontScale, thickness=bbox_thick)
171 |             # put filled text rectangle
172 |             cv2.rectangle(image, (x1, y1), (x1 + text_width, y1 - text_height - baseline), bbox_color, thickness=cv2.FILLED)
173 | 
174 |             # put text above rectangle
175 |             cv2.putText(image, label, (x1, y1-4), cv2.FONT_HERSHEY_COMPLEX_SMALL,
176 |                         fontScale, Text_colors, bbox_thick, lineType=cv2.LINE_AA)
177 | 
178 |     return image
179 | 
180 | 
181 | def bboxes_iou(boxes1, boxes2):
182 |     boxes1 = np.array(boxes1)
183 |     boxes2 = np.array(boxes2)
184 | 
185 |     boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
186 |     boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
187 | 
188 |     left_up       = np.maximum(boxes1[..., :2], boxes2[..., :2])
189 |     right_down    = np.minimum(boxes1[..., 2:], boxes2[..., 2:])
190 | 
191 |     inter_section = np.maximum(right_down - left_up, 0.0)
192 |     inter_area    = inter_section[..., 0] * inter_section[..., 1]
193 |     union_area    = boxes1_area + boxes2_area - inter_area
194 |     ious          = np.maximum(1.0 * inter_area / union_area, np.finfo(np.float32).eps)
195 | 
196 |     return ious
197 | 
198 | 
199 | def nms(bboxes, iou_threshold, sigma=0.3, method='nms'):
200 |     """
201 |     :param bboxes: (xmin, ymin, xmax, ymax, score, class)
202 | 
203 |     Note: soft-nms, https://arxiv.org/pdf/1704.04503.pdf
204 |           https://github.com/bharatsingh430/soft-nms
205 |     """
206 |     classes_in_img = list(set(bboxes[:, 5]))
207 |     best_bboxes = []
208 | 
209 |     for cls in classes_in_img:
210 |         cls_mask = (bboxes[:, 5] == cls)
211 |         cls_bboxes = bboxes[cls_mask]
212 |         # Process 1: Determine whether the number of bounding boxes is greater than 0 
213 |         while len(cls_bboxes) > 0:
214 |             # Process 2: Select the bounding box with the highest score according to socre order A
215 |             max_ind = np.argmax(cls_bboxes[:, 4])
216 |             best_bbox = cls_bboxes[max_ind]
217 |             best_bboxes.append(best_bbox)
218 |             cls_bboxes = np.concatenate([cls_bboxes[: max_ind], cls_bboxes[max_ind + 1:]])
219 |             # Process 3: Calculate this bounding box A and
220 |             # Remain all iou of the bounding box and remove those bounding boxes whose iou value is higher than the threshold 
221 |             iou = bboxes_iou(best_bbox[np.newaxis, :4], cls_bboxes[:, :4])
222 |             weight = np.ones((len(iou),), dtype=np.float32)
223 | 
224 |             assert method in ['nms', 'soft-nms']
225 | 
226 |             if method == 'nms':
227 |                 iou_mask = iou > iou_threshold
228 |                 weight[iou_mask] = 0.0
229 | 
230 |             if method == 'soft-nms':
231 |                 weight = np.exp(-(1.0 * iou ** 2 / sigma))
232 | 
233 |             cls_bboxes[:, 4] = cls_bboxes[:, 4] * weight
234 |             score_mask = cls_bboxes[:, 4] > 0.
235 |             cls_bboxes = cls_bboxes[score_mask]
236 | 
237 |     return best_bboxes
238 | 
239 | 
240 | def postprocess_boxes(pred_bbox, original_image, input_size, score_threshold):
241 |     valid_scale=[0, np.inf]
242 |     pred_bbox = np.array(pred_bbox)
243 | 
244 |     pred_xywh = pred_bbox[:, 0:4]
245 |     pred_conf = pred_bbox[:, 4]
246 |     pred_prob = pred_bbox[:, 5:]
247 | 
248 |     # 1. (x, y, w, h) --> (xmin, ymin, xmax, ymax)
249 |     pred_coor = np.concatenate([pred_xywh[:, :2] - pred_xywh[:, 2:] * 0.5,
250 |                                 pred_xywh[:, :2] + pred_xywh[:, 2:] * 0.5], axis=-1)
251 |     # 2. (xmin, ymin, xmax, ymax) -> (xmin_org, ymin_org, xmax_org, ymax_org)
252 |     org_h, org_w = original_image.shape[:2]
253 |     resize_ratio = min(input_size / org_w, input_size / org_h)
254 | 
255 |     dw = (input_size - resize_ratio * org_w) / 2
256 |     dh = (input_size - resize_ratio * org_h) / 2
257 | 
258 |     pred_coor[:, 0::2] = 1.0 * (pred_coor[:, 0::2] - dw) / resize_ratio
259 |     pred_coor[:, 1::2] = 1.0 * (pred_coor[:, 1::2] - dh) / resize_ratio
260 | 
261 |     # 3. clip some boxes those are out of range
262 |     pred_coor = np.concatenate([np.maximum(pred_coor[:, :2], [0, 0]),
263 |                                 np.minimum(pred_coor[:, 2:], [org_w - 1, org_h - 1])], axis=-1)
264 |     invalid_mask = np.logical_or((pred_coor[:, 0] > pred_coor[:, 2]), (pred_coor[:, 1] > pred_coor[:, 3]))
265 |     pred_coor[invalid_mask] = 0
266 | 
267 |     # 4. discard some invalid boxes
268 |     bboxes_scale = np.sqrt(np.multiply.reduce(pred_coor[:, 2:4] - pred_coor[:, 0:2], axis=-1))
269 |     scale_mask = np.logical_and((valid_scale[0] < bboxes_scale), (bboxes_scale < valid_scale[1]))
270 | 
271 |     # 5. discard boxes with low scores
272 |     classes = np.argmax(pred_prob, axis=-1)
273 |     scores = pred_conf * pred_prob[np.arange(len(pred_coor)), classes]
274 |     score_mask = scores > score_threshold
275 |     mask = np.logical_and(scale_mask, score_mask)
276 |     coors, scores, classes = pred_coor[mask], scores[mask], classes[mask]
277 | 
278 |     return np.concatenate([coors, scores[:, np.newaxis], classes[:, np.newaxis]], axis=-1)
279 | 
280 | 
281 | def detect_image(Yolo, image_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''):
282 |     original_image      = cv2.imread(image_path)
283 |     original_image      = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
284 |     original_image      = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
285 | 
286 |     image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
287 |     image_data = image_data[np.newaxis, ...].astype(np.float32)
288 | 
289 |     if YOLO_FRAMEWORK == "tf":
290 |         pred_bbox = Yolo.predict(image_data)
291 |     elif YOLO_FRAMEWORK == "trt":
292 |         batched_input = tf.constant(image_data)
293 |         result = Yolo(batched_input)
294 |         pred_bbox = []
295 |         for key, value in result.items():
296 |             value = value.numpy()
297 |             pred_bbox.append(value)
298 |         
299 |     pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
300 |     pred_bbox = tf.concat(pred_bbox, axis=0)
301 |     
302 |     bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
303 |     bboxes = nms(bboxes, iou_threshold, method='nms')
304 | 
305 |     image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
306 |     # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))
307 | 
308 |     if output_path != '': cv2.imwrite(output_path, image)
309 |     if show:
310 |         # Show the image
311 |         cv2.imshow("predicted image", image)
312 |         # Load and hold the image
313 |         cv2.waitKey(0)
314 |         # To close the window after the required kill value was provided
315 |         cv2.destroyAllWindows()
316 |         
317 |     return image
318 | 
319 | def Predict_bbox_mp(Frames_data, Predicted_data, Processing_times):
320 |     gpus = tf.config.experimental.list_physical_devices('GPU')
321 |     if len(gpus) > 0:
322 |         try: tf.config.experimental.set_memory_growth(gpus[0], True)
323 |         except RuntimeError: print("RuntimeError in tf.config.experimental.list_physical_devices('GPU')")
324 |     Yolo = Load_Yolo_model()
325 |     times = []
326 |     while True:
327 |         if Frames_data.qsize()>0:
328 |             image_data = Frames_data.get()
329 |             t1 = time.time()
330 |             Processing_times.put(time.time())
331 |             
332 |             if YOLO_FRAMEWORK == "tf":
333 |                 if tf.__version__ > '2.4.0':
334 |                     pred_bbox = Yolo(image_data)
335 |                 else:
336 |                     pred_bbox = Yolo.predict(image_data)
337 |             elif YOLO_FRAMEWORK == "trt":
338 |                 batched_input = tf.constant(image_data)
339 |                 result = Yolo(batched_input)
340 |                 pred_bbox = []
341 |                 for key, value in result.items():
342 |                     value = value.numpy()
343 |                     pred_bbox.append(value)
344 | 
345 |             pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
346 |             pred_bbox = tf.concat(pred_bbox, axis=0)
347 |             
348 |             Predicted_data.put(pred_bbox)
349 | 
350 | 
351 | def postprocess_mp(Predicted_data, original_frames, Processed_frames, Processing_times, input_size, CLASSES, score_threshold, iou_threshold, rectangle_colors, realtime):
352 |     times = []
353 |     while True:
354 |         if Predicted_data.qsize()>0:
355 |             pred_bbox = Predicted_data.get()
356 |             if realtime:
357 |                 while original_frames.qsize() > 1:
358 |                     original_image = original_frames.get()
359 |             else:
360 |                 original_image = original_frames.get()
361 |             
362 |             bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
363 |             bboxes = nms(bboxes, iou_threshold, method='nms')
364 |             image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
365 |             times.append(time.time()-Processing_times.get())
366 |             times = times[-20:]
367 |             
368 |             ms = sum(times)/len(times)*1000
369 |             fps = 1000 / ms
370 |             image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
371 |             #print("Time: {:.2f}ms, Final FPS: {:.1f}".format(ms, fps))
372 |             
373 |             Processed_frames.put(image)
374 | 
375 | def Show_Image_mp(Processed_frames, show, Final_frames):
376 |     while True:
377 |         if Processed_frames.qsize()>0:
378 |             image = Processed_frames.get()
379 |             Final_frames.put(image)
380 |             if show:
381 |                 cv2.imshow('output', image)
382 |                 if cv2.waitKey(25) & 0xFF == ord("q"):
383 |                     cv2.destroyAllWindows()
384 |                     break
385 | 
386 | # detect from webcam
387 | def detect_video_realtime_mp(video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors='', realtime=False):
388 |     if realtime:
389 |         vid = cv2.VideoCapture(0)
390 |     else:
391 |         vid = cv2.VideoCapture(video_path)
392 | 
393 |     # by default VideoCapture returns float instead of int
394 |     width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
395 |     height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
396 |     fps = int(vid.get(cv2.CAP_PROP_FPS))
397 |     codec = cv2.VideoWriter_fourcc(*'XVID')
398 |     out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4
399 |     no_of_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
400 | 
401 |     original_frames = Queue()
402 |     Frames_data = Queue()
403 |     Predicted_data = Queue()
404 |     Processed_frames = Queue()
405 |     Processing_times = Queue()
406 |     Final_frames = Queue()
407 |     
408 |     p1 = Process(target=Predict_bbox_mp, args=(Frames_data, Predicted_data, Processing_times))
409 |     p2 = Process(target=postprocess_mp, args=(Predicted_data, original_frames, Processed_frames, Processing_times, input_size, CLASSES, score_threshold, iou_threshold, rectangle_colors, realtime))
410 |     p3 = Process(target=Show_Image_mp, args=(Processed_frames, show, Final_frames))
411 |     p1.start()
412 |     p2.start()
413 |     p3.start()
414 |         
415 |     while True:
416 |         ret, img = vid.read()
417 |         if not ret:
418 |             break
419 | 
420 |         original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
421 |         original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
422 |         original_frames.put(original_image)
423 | 
424 |         image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
425 |         image_data = image_data[np.newaxis, ...].astype(np.float32)
426 |         Frames_data.put(image_data)
427 |         
428 |     while True:
429 |         if original_frames.qsize() == 0 and Frames_data.qsize() == 0 and Predicted_data.qsize() == 0  and Processed_frames.qsize() == 0  and Processing_times.qsize() == 0 and Final_frames.qsize() == 0:
430 |             p1.terminate()
431 |             p2.terminate()
432 |             p3.terminate()
433 |             break
434 |         elif Final_frames.qsize()>0:
435 |             image = Final_frames.get()
436 |             if output_path != '': out.write(image)
437 | 
438 |     cv2.destroyAllWindows()
439 | 
440 | def detect_video(Yolo, video_path, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''):
441 |     times, times_2 = [], []
442 |     vid = cv2.VideoCapture(video_path)
443 | 
444 |     # by default VideoCapture returns float instead of int
445 |     width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
446 |     height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
447 |     fps = int(vid.get(cv2.CAP_PROP_FPS))
448 |     codec = cv2.VideoWriter_fourcc(*'XVID')
449 |     out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4
450 | 
451 |     while True:
452 |         _, img = vid.read()
453 | 
454 |         try:
455 |             original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
456 |             original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
457 |         except:
458 |             break
459 | 
460 |         image_data = image_preprocess(np.copy(original_image), [input_size, input_size])
461 |         image_data = image_data[np.newaxis, ...].astype(np.float32)
462 | 
463 |         t1 = time.time()
464 |         if YOLO_FRAMEWORK == "tf":
465 |             if tf.__version__ > '2.4.0':
466 |                 pred_bbox = Yolo(image_data, training=False)
467 |             else:
468 |                 pred_bbox = Yolo.predict(image_data)
469 |         elif YOLO_FRAMEWORK == "trt":
470 |             batched_input = tf.constant(image_data)
471 |             result = Yolo(batched_input)
472 |             pred_bbox = []
473 |             for key, value in result.items():
474 |                 value = value.numpy()
475 |                 pred_bbox.append(value)
476 |         
477 |         t2 = time.time()
478 |         
479 |         pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
480 |         pred_bbox = tf.concat(pred_bbox, axis=0)
481 | 
482 |         bboxes = postprocess_boxes(pred_bbox, original_image, input_size, score_threshold)
483 |         bboxes = nms(bboxes, iou_threshold, method='nms')
484 |         
485 |         image = draw_bbox(original_image, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
486 | 
487 |         t3 = time.time()
488 |         times.append(t2-t1)
489 |         times_2.append(t3-t1)
490 |         
491 |         times = times[-20:]
492 |         times_2 = times_2[-20:]
493 | 
494 |         ms = sum(times)/len(times)*1000
495 |         fps = 1000 / ms
496 |         fps2 = 1000 / (sum(times_2)/len(times_2)*1000)
497 |         
498 |         image = cv2.putText(image, "Time: {:.1f}FPS".format(fps), (0, 30), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
499 |         # CreateXMLfile("XML_Detections", str(int(time.time())), original_image, bboxes, read_class_names(CLASSES))
500 |         
501 |         print("Time: {:.2f}ms, Detection FPS: {:.1f}, total FPS: {:.1f}".format(ms, fps, fps2))
502 |         if output_path != '': out.write(image)
503 |         if show:
504 |             cv2.imshow('output', image)
505 |             if cv2.waitKey(25) & 0xFF == ord("q"):
506 |                 cv2.destroyAllWindows()
507 |                 break
508 | 
509 |     cv2.destroyAllWindows()
510 | 
511 | # detect from webcam
512 | def detect_realtime(Yolo, output_path, input_size=416, show=False, CLASSES=YOLO_COCO_CLASSES, score_threshold=0.3, iou_threshold=0.45, rectangle_colors=''):
513 |     times = []
514 |     vid = cv2.VideoCapture(1)
515 | 
516 |     if output_path:
517 |         # by default VideoCapture returns float instead of int
518 |         width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
519 |         height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
520 |         fps = int(vid.get(cv2.CAP_PROP_FPS))
521 |         codec = cv2.VideoWriter_fourcc(*'XVID')
522 |         out = cv2.VideoWriter(output_path, codec, fps, (width, height)) # output_path must be .mp4
523 | 
524 |     while True:
525 |         ret, frame = vid.read()
526 | 
527 |         try:
528 |             original_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
529 |             original_frame = cv2.cvtColor(original_frame, cv2.COLOR_BGR2RGB)
530 |         except:
531 |             break
532 |         image_data = image_preprocess(np.copy(original_frame), [input_size, input_size])
533 |         image_data = image_data[np.newaxis, ...].astype(np.float32)
534 | 
535 |         t1 = time.time()
536 |         if YOLO_FRAMEWORK == "tf":
537 |             if tf.__version__ > '2.4.0':
538 |                 pred_bbox = Yolo(image_data, training=False)
539 |             else:
540 |                 pred_bbox = Yolo.predict(image_data)
541 |             # if True:
542 |             #     pred_bbox = Yolo.predict(image_data)
543 |         elif YOLO_FRAMEWORK == "trt":
544 |             batched_input = tf.constant(image_data)
545 |             result = Yolo(batched_input)
546 |             pred_bbox = []
547 |             for key, value in result.items():
548 |                 value = value.numpy()
549 |                 pred_bbox.append(value)
550 |         
551 |         t2 = time.time()
552 |         
553 |         pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
554 |         pred_bbox = tf.concat(pred_bbox, axis=0)
555 | 
556 |         bboxes = postprocess_boxes(pred_bbox, original_frame, input_size, score_threshold)
557 |         bboxes = nms(bboxes, iou_threshold, method='nms')
558 |         
559 |         times.append(t2-t1)
560 |         times = times[-20:]
561 |         
562 |         ms = sum(times)/len(times)*1000
563 |         fps = 1000 / ms
564 |         
565 |         print("Time: {:.2f}ms, {:.1f} FPS".format(ms, fps))
566 | 
567 |         frame = draw_bbox(original_frame, bboxes, CLASSES=CLASSES, rectangle_colors=rectangle_colors)
568 |         # CreateXMLfile("XML_Detections", str(int(time.time())), original_frame, bboxes, read_class_names(CLASSES))
569 |         image = cv2.putText(frame, "Time: {:.1f}FPS".format(fps), (0, 30),
570 |                           cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
571 | 
572 |         if output_path != '': out.write(frame)
573 |         if show:
574 |             cv2.imshow('output', frame)
575 |             if cv2.waitKey(25) & 0xFF == ord("q"):
576 |                 cv2.destroyAllWindows()
577 |                 break
578 | 
579 |     cv2.destroyAllWindows()
580 | 


--------------------------------------------------------------------------------
/yolov3/yolov3.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : yolov3.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-06-04
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : main yolov3 functions
  9 | #
 10 | #================================================================
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, ZeroPadding2D, BatchNormalization, MaxPool2D
 14 | from tensorflow.keras.regularizers import l2
 15 | from yolov3.utils import read_class_names
 16 | from yolov3.configs import *
 17 | 
 18 | STRIDES         = np.array(YOLO_STRIDES)
 19 | ANCHORS         = (np.array(YOLO_ANCHORS).T/STRIDES).T
 20 | 
 21 | class BatchNormalization(BatchNormalization):
 22 |     # "Frozen state" and "inference mode" are two separate concepts.
 23 |     # `layer.trainable = False` is to freeze the layer, so the layer will use
 24 |     # stored moving `var` and `mean` in the "inference mode", and both `gama`
 25 |     # and `beta` will not be updated !
 26 |     def call(self, x, training=False):
 27 |         if not training:
 28 |             training = tf.constant(False)
 29 |         training = tf.logical_and(training, self.trainable)
 30 |         return super().call(x, training)
 31 | 
 32 | def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True):
 33 |     if downsample:
 34 |         input_layer = ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
 35 |         padding = 'valid'
 36 |         strides = 2
 37 |     else:
 38 |         strides = 1
 39 |         padding = 'same'
 40 | 
 41 |     conv = Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides,
 42 |                   padding=padding, use_bias=not bn, kernel_regularizer=l2(0.0005),
 43 |                   kernel_initializer=tf.random_normal_initializer(stddev=0.01),
 44 |                   bias_initializer=tf.constant_initializer(0.))(input_layer)
 45 |     if bn:
 46 |         conv = BatchNormalization()(conv)
 47 |     if activate == True:
 48 |         conv = LeakyReLU(alpha=0.1)(conv)
 49 | 
 50 |     return conv
 51 | 
 52 | def residual_block(input_layer, input_channel, filter_num1, filter_num2):
 53 |     short_cut = input_layer
 54 |     conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1))
 55 |     conv = convolutional(conv       , filters_shape=(3, 3, filter_num1,   filter_num2))
 56 | 
 57 |     residual_output = short_cut + conv
 58 |     return residual_output
 59 | 
 60 | def upsample(input_layer):
 61 |     return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')
 62 | 
 63 | 
 64 | def darknet53(input_data):
 65 |     input_data = convolutional(input_data, (3, 3,  3,  32))
 66 |     input_data = convolutional(input_data, (3, 3, 32,  64), downsample=True)
 67 | 
 68 |     for i in range(1):
 69 |         input_data = residual_block(input_data,  64,  32, 64)
 70 | 
 71 |     input_data = convolutional(input_data, (3, 3,  64, 128), downsample=True)
 72 | 
 73 |     for i in range(2):
 74 |         input_data = residual_block(input_data, 128,  64, 128)
 75 | 
 76 |     input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True)
 77 | 
 78 |     for i in range(8):
 79 |         input_data = residual_block(input_data, 256, 128, 256)
 80 | 
 81 |     route_1 = input_data
 82 |     input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True)
 83 | 
 84 |     for i in range(8):
 85 |         input_data = residual_block(input_data, 512, 256, 512)
 86 | 
 87 |     route_2 = input_data
 88 |     input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True)
 89 | 
 90 |     for i in range(4):
 91 |         input_data = residual_block(input_data, 1024, 512, 1024)
 92 | 
 93 |     return route_1, route_2, input_data
 94 | 
 95 | def darknet19_tiny(input_data):
 96 |     input_data = convolutional(input_data, (3, 3, 3, 16))
 97 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
 98 |     input_data = convolutional(input_data, (3, 3, 16, 32))
 99 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
100 |     input_data = convolutional(input_data, (3, 3, 32, 64))
101 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
102 |     input_data = convolutional(input_data, (3, 3, 64, 128))
103 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
104 |     input_data = convolutional(input_data, (3, 3, 128, 256))
105 |     route_1 = input_data
106 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
107 |     input_data = convolutional(input_data, (3, 3, 256, 512))
108 |     input_data = MaxPool2D(2, 1, 'same')(input_data)
109 |     input_data = convolutional(input_data, (3, 3, 512, 1024))
110 | 
111 |     return route_1, input_data
112 | 
113 | def YOLOv3(input_layer, NUM_CLASS):
114 |     # After the input layer enters the Darknet-53 network, we get three branches
115 |     route_1, route_2, conv = darknet53(input_layer)
116 |     # See the orange module (DBL) in the figure above, a total of 5 Subconvolution operation
117 |     conv = convolutional(conv, (1, 1, 1024,  512))
118 |     conv = convolutional(conv, (3, 3,  512, 1024))
119 |     conv = convolutional(conv, (1, 1, 1024,  512))
120 |     conv = convolutional(conv, (3, 3,  512, 1024))
121 |     conv = convolutional(conv, (1, 1, 1024,  512))
122 |     conv_lobj_branch = convolutional(conv, (3, 3, 512, 1024))
123 |     
124 |     # conv_lbbox is used to predict large-sized objects , Shape = [None, 13, 13, 255] 
125 |     conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
126 | 
127 |     conv = convolutional(conv, (1, 1,  512,  256))
128 |     # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
129 |     # upsampling process does not need to learn, thereby reducing the network parameter  
130 |     conv = upsample(conv)
131 | 
132 |     conv = tf.concat([conv, route_2], axis=-1)
133 |     conv = convolutional(conv, (1, 1, 768, 256))
134 |     conv = convolutional(conv, (3, 3, 256, 512))
135 |     conv = convolutional(conv, (1, 1, 512, 256))
136 |     conv = convolutional(conv, (3, 3, 256, 512))
137 |     conv = convolutional(conv, (1, 1, 512, 256))
138 |     conv_mobj_branch = convolutional(conv, (3, 3, 256, 512))
139 | 
140 |     # conv_mbbox is used to predict medium-sized objects, shape = [None, 26, 26, 255]
141 |     conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
142 | 
143 |     conv = convolutional(conv, (1, 1, 256, 128))
144 |     conv = upsample(conv)
145 | 
146 |     conv = tf.concat([conv, route_1], axis=-1)
147 |     conv = convolutional(conv, (1, 1, 384, 128))
148 |     conv = convolutional(conv, (3, 3, 128, 256))
149 |     conv = convolutional(conv, (1, 1, 256, 128))
150 |     conv = convolutional(conv, (3, 3, 128, 256))
151 |     conv = convolutional(conv, (1, 1, 256, 128))
152 |     conv_sobj_branch = convolutional(conv, (3, 3, 128, 256))
153 |     
154 |     # conv_sbbox is used to predict small size objects, shape = [None, 52, 52, 255]
155 |     conv_sbbox = convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
156 |         
157 |     return [conv_sbbox, conv_mbbox, conv_lbbox]
158 | 
159 | def YOLOv3_tiny(input_layer, NUM_CLASS):
160 |     # After the input layer enters the Darknet-53 network, we get three branches
161 |     route_1, conv = darknet19_tiny(input_layer)
162 | 
163 |     conv = convolutional(conv, (1, 1, 1024, 256))
164 |     conv_lobj_branch = convolutional(conv, (3, 3, 256, 512))
165 |     
166 |     # conv_lbbox is used to predict large-sized objects , Shape = [None, 26, 26, 255]
167 |     conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
168 | 
169 |     conv = convolutional(conv, (1, 1, 256, 128))
170 |     # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
171 |     # upsampling process does not need to learn, thereby reducing the network parameter  
172 |     conv = upsample(conv)
173 |     
174 |     conv = tf.concat([conv, route_1], axis=-1)
175 |     conv_mobj_branch = convolutional(conv, (3, 3, 128, 256))
176 |     # conv_mbbox is used to predict medium size objects, shape = [None, 13, 13, 255]
177 |     conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
178 | 
179 |     return [conv_mbbox, conv_lbbox]
180 | 
181 | def Create_Yolov3(input_size=416, channels=3, training=False, CLASSES=YOLO_COCO_CLASSES):
182 |     NUM_CLASS = len(read_class_names(CLASSES))
183 |     input_layer  = Input([input_size, input_size, channels])
184 | 
185 |     if TRAIN_YOLO_TINY:
186 |         conv_tensors = YOLOv3_tiny(input_layer, NUM_CLASS)
187 |     else:
188 |         conv_tensors = YOLOv3(input_layer, NUM_CLASS)
189 | 
190 |     output_tensors = []
191 |     for i, conv_tensor in enumerate(conv_tensors):
192 |         pred_tensor = decode(conv_tensor, NUM_CLASS, i)
193 |         if training: output_tensors.append(conv_tensor)
194 |         output_tensors.append(pred_tensor)
195 | 
196 |     YoloV3 = tf.keras.Model(input_layer, output_tensors)
197 |     return YoloV3
198 | 
199 | def decode(conv_output, NUM_CLASS, i=0):
200 |     # where i = 0, 1 or 2 to correspond to the three grid scales  
201 |     conv_shape       = tf.shape(conv_output)
202 |     batch_size       = conv_shape[0]
203 |     output_size      = conv_shape[1]
204 | 
205 |     conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
206 | 
207 |     conv_raw_dxdy = conv_output[:, :, :, :, 0:2] # offset of center position     
208 |     conv_raw_dwdh = conv_output[:, :, :, :, 2:4] # Prediction box length and width offset
209 |     conv_raw_conf = conv_output[:, :, :, :, 4:5] # confidence of the prediction box
210 |     conv_raw_prob = conv_output[:, :, :, :, 5: ] # category probability of the prediction box 
211 | 
212 |     # next need Draw the grid. Where output_size is equal to 13, 26 or 52  
213 |     y = tf.range(output_size, dtype=tf.int32)
214 |     y = tf.expand_dims(y, -1)
215 |     y = tf.tile(y, [1, output_size])
216 |     x = tf.range(output_size,dtype=tf.int32)
217 |     x = tf.expand_dims(x, 0)
218 |     x = tf.tile(x, [output_size, 1])
219 | 
220 |     xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
221 |     xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
222 |     xy_grid = tf.cast(xy_grid, tf.float32)
223 | 
224 |     # Calculate the center position of the prediction box:
225 |     pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
226 |     # Calculate the length and width of the prediction box:
227 |     pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
228 | 
229 |     pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
230 |     pred_conf = tf.sigmoid(conv_raw_conf) # object box calculates the predicted confidence
231 |     pred_prob = tf.sigmoid(conv_raw_prob) # calculating the predicted probability category box object
232 | 
233 |     # calculating the predicted probability category box object
234 |     return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
235 | 
236 | def bbox_iou(boxes1, boxes2):
237 |     boxes1_area = boxes1[..., 2] * boxes1[..., 3]
238 |     boxes2_area = boxes2[..., 2] * boxes2[..., 3]
239 | 
240 |     boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
241 |                         boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
242 |     boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
243 |                         boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
244 | 
245 |     left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
246 |     right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
247 | 
248 |     inter_section = tf.maximum(right_down - left_up, 0.0)
249 |     inter_area = inter_section[..., 0] * inter_section[..., 1]
250 |     union_area = boxes1_area + boxes2_area - inter_area
251 | 
252 |     return 1.0 * inter_area / union_area
253 | 
254 | def bbox_giou(boxes1, boxes2):
255 |     boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
256 |                         boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
257 |     boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
258 |                         boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
259 | 
260 |     boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
261 |                         tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
262 |     boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
263 |                         tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
264 | 
265 |     boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
266 |     boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
267 | 
268 |     left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
269 |     right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
270 | 
271 |     inter_section = tf.maximum(right_down - left_up, 0.0)
272 |     inter_area = inter_section[..., 0] * inter_section[..., 1]
273 |     union_area = boxes1_area + boxes2_area - inter_area
274 | 
275 |     # Calculate the iou value between the two bounding boxes
276 |     iou = inter_area / union_area
277 | 
278 |     # Calculate the coordinates of the upper left corner and the lower right corner of the smallest closed convex surface
279 |     enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
280 |     enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
281 |     enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
282 | 
283 |     # Calculate the area of the smallest closed convex surface C
284 |     enclose_area = enclose[..., 0] * enclose[..., 1]
285 | 
286 |     # Calculate the GIoU value according to the GioU formula  
287 |     giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
288 | 
289 |     return giou
290 | 
291 | # testing (should be better than giou)
292 | def bbox_ciou(boxes1, boxes2):
293 |     boxes1_coor = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
294 |                         boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
295 |     boxes2_coor = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
296 |                         boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
297 | 
298 |     left = tf.maximum(boxes1_coor[..., 0], boxes2_coor[..., 0])
299 |     up = tf.maximum(boxes1_coor[..., 1], boxes2_coor[..., 1])
300 |     right = tf.maximum(boxes1_coor[..., 2], boxes2_coor[..., 2])
301 |     down = tf.maximum(boxes1_coor[..., 3], boxes2_coor[..., 3])
302 | 
303 |     c = (right - left) * (right - left) + (up - down) * (up - down)
304 |     iou = bbox_iou(boxes1, boxes2)
305 | 
306 |     u = (boxes1[..., 0] - boxes2[..., 0]) * (boxes1[..., 0] - boxes2[..., 0]) + (boxes1[..., 1] - boxes2[..., 1]) * (boxes1[..., 1] - boxes2[..., 1])
307 |     d = u / c
308 | 
309 |     ar_gt = boxes2[..., 2] / boxes2[..., 3]
310 |     ar_pred = boxes1[..., 2] / boxes1[..., 3]
311 | 
312 |     ar_loss = 4 / (np.pi * np.pi) * (tf.atan(ar_gt) - tf.atan(ar_pred)) * (tf.atan(ar_gt) - tf.atan(ar_pred))
313 |     alpha = ar_loss / (1 - iou + ar_loss + 0.000001)
314 |     ciou_term = d + alpha * ar_loss
315 | 
316 |     return iou - ciou_term
317 | 
318 | 
319 | def compute_loss(pred, conv, label, bboxes, i=0, CLASSES=YOLO_COCO_CLASSES):
320 |     NUM_CLASS = len(read_class_names(CLASSES))
321 |     conv_shape  = tf.shape(conv)
322 |     batch_size  = conv_shape[0]
323 |     output_size = conv_shape[1]
324 |     input_size  = STRIDES[i] * output_size
325 |     conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
326 | 
327 |     conv_raw_conf = conv[:, :, :, :, 4:5]
328 |     conv_raw_prob = conv[:, :, :, :, 5:]
329 | 
330 |     pred_xywh     = pred[:, :, :, :, 0:4]
331 |     pred_conf     = pred[:, :, :, :, 4:5]
332 | 
333 |     label_xywh    = label[:, :, :, :, 0:4]
334 |     respond_bbox  = label[:, :, :, :, 4:5]
335 |     label_prob    = label[:, :, :, :, 5:]
336 | 
337 |     giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
338 |     input_size = tf.cast(input_size, tf.float32)
339 | 
340 |     bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
341 |     giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)
342 | 
343 |     iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
344 |     # Find the value of IoU with the real box The largest prediction box
345 |     max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
346 | 
347 |     # If the largest iou is less than the threshold, it is considered that the prediction box contains no objects, then the background box
348 |     respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < YOLO_IOU_LOSS_THRESH, tf.float32 )
349 | 
350 |     conf_focal = tf.pow(respond_bbox - pred_conf, 2)
351 | 
352 |     # Calculate the loss of confidence
353 |     # we hope that if the grid contains objects, then the network output prediction box has a confidence of 1 and 0 when there is no object.
354 |     conf_loss = conf_focal * (
355 |             respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
356 |             +
357 |             respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
358 |     )
359 | 
360 |     prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
361 | 
362 |     giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
363 |     conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
364 |     prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
365 | 
366 |     return giou_loss, conf_loss, prob_loss
367 | 


--------------------------------------------------------------------------------
/yolov3/yolov4.py:
--------------------------------------------------------------------------------
  1 | #================================================================
  2 | #
  3 | #   File name   : yolov4.py
  4 | #   Author      : PyLessons
  5 | #   Created date: 2020-09-31
  6 | #   Website     : https://pylessons.com/
  7 | #   GitHub      : https://github.com/pythonlessons/TensorFlow-2.x-YOLOv3
  8 | #   Description : main yolov3 & yolov4 functions
  9 | #
 10 | #================================================================
 11 | import numpy as np
 12 | import tensorflow as tf
 13 | from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, ZeroPadding2D, BatchNormalization, MaxPool2D
 14 | from tensorflow.keras.regularizers import l2
 15 | from yolov3.configs import *
 16 | 
 17 | STRIDES         = np.array(YOLO_STRIDES)
 18 | ANCHORS         = (np.array(YOLO_ANCHORS).T/STRIDES).T
 19 | 
 20 | def read_class_names(class_file_name):
 21 |     # loads class name from a file
 22 |     names = {}
 23 |     with open(class_file_name, 'r') as data:
 24 |         for ID, name in enumerate(data):
 25 |             names[ID] = name.strip('\n')
 26 |     return names
 27 | 
 28 | class BatchNormalization(BatchNormalization):
 29 |     # "Frozen state" and "inference mode" are two separate concepts.
 30 |     # `layer.trainable = False` is to freeze the layer, so the layer will use
 31 |     # stored moving `var` and `mean` in the "inference mode", and both `gama`
 32 |     # and `beta` will not be updated !
 33 |     def call(self, x, training=False):
 34 |         if not training:
 35 |             training = tf.constant(False)
 36 |         training = tf.logical_and(training, self.trainable)
 37 |         return super().call(x, training)
 38 | 
 39 | def convolutional(input_layer, filters_shape, downsample=False, activate=True, bn=True, activate_type='leaky'):
 40 |     if downsample:
 41 |         input_layer = ZeroPadding2D(((1, 0), (1, 0)))(input_layer)
 42 |         padding = 'valid'
 43 |         strides = 2
 44 |     else:
 45 |         strides = 1
 46 |         padding = 'same'
 47 | 
 48 |     conv = Conv2D(filters=filters_shape[-1], kernel_size = filters_shape[0], strides=strides,
 49 |                   padding=padding, use_bias=not bn, kernel_regularizer=l2(0.0005),
 50 |                   kernel_initializer=tf.random_normal_initializer(stddev=0.01),
 51 |                   bias_initializer=tf.constant_initializer(0.))(input_layer)
 52 |     if bn:
 53 |         conv = BatchNormalization()(conv)
 54 |     if activate == True:
 55 |         if activate_type == "leaky":
 56 |             conv = LeakyReLU(alpha=0.1)(conv)
 57 |         elif activate_type == "mish":
 58 |             conv = mish(conv)
 59 | 
 60 |     return conv
 61 | 
 62 | def mish(x):
 63 |     return x * tf.math.tanh(tf.math.softplus(x))
 64 | 
 65 | def residual_block(input_layer, input_channel, filter_num1, filter_num2, activate_type='leaky'):
 66 |     short_cut = input_layer
 67 |     conv = convolutional(input_layer, filters_shape=(1, 1, input_channel, filter_num1), activate_type=activate_type)
 68 |     conv = convolutional(conv       , filters_shape=(3, 3, filter_num1,   filter_num2), activate_type=activate_type)
 69 | 
 70 |     residual_output = short_cut + conv
 71 |     return residual_output
 72 | 
 73 | def upsample(input_layer):
 74 |     return tf.image.resize(input_layer, (input_layer.shape[1] * 2, input_layer.shape[2] * 2), method='nearest')
 75 | 
 76 | def route_group(input_layer, groups, group_id):
 77 |     convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1)
 78 |     return convs[group_id]
 79 | 
 80 | def darknet53(input_data):
 81 |     input_data = convolutional(input_data, (3, 3,  3,  32))
 82 |     input_data = convolutional(input_data, (3, 3, 32,  64), downsample=True)
 83 | 
 84 |     for i in range(1):
 85 |         input_data = residual_block(input_data,  64,  32, 64)
 86 | 
 87 |     input_data = convolutional(input_data, (3, 3,  64, 128), downsample=True)
 88 | 
 89 |     for i in range(2):
 90 |         input_data = residual_block(input_data, 128,  64, 128)
 91 | 
 92 |     input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True)
 93 | 
 94 |     for i in range(8):
 95 |         input_data = residual_block(input_data, 256, 128, 256)
 96 | 
 97 |     route_1 = input_data
 98 |     input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True)
 99 | 
100 |     for i in range(8):
101 |         input_data = residual_block(input_data, 512, 256, 512)
102 | 
103 |     route_2 = input_data
104 |     input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True)
105 | 
106 |     for i in range(4):
107 |         input_data = residual_block(input_data, 1024, 512, 1024)
108 | 
109 |     return route_1, route_2, input_data
110 | 
111 | def cspdarknet53(input_data):
112 |     input_data = convolutional(input_data, (3, 3,  3,  32), activate_type="mish")
113 |     input_data = convolutional(input_data, (3, 3, 32,  64), downsample=True, activate_type="mish")
114 | 
115 |     route = input_data
116 |     route = convolutional(route, (1, 1, 64, 64), activate_type="mish")
117 |     input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")
118 |     for i in range(1):
119 |         input_data = residual_block(input_data,  64,  32, 64, activate_type="mish")
120 |     input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")
121 | 
122 |     input_data = tf.concat([input_data, route], axis=-1)
123 |     input_data = convolutional(input_data, (1, 1, 128, 64), activate_type="mish")
124 |     input_data = convolutional(input_data, (3, 3, 64, 128), downsample=True, activate_type="mish")
125 |     route = input_data
126 |     route = convolutional(route, (1, 1, 128, 64), activate_type="mish")
127 |     input_data = convolutional(input_data, (1, 1, 128, 64), activate_type="mish")
128 |     for i in range(2):
129 |         input_data = residual_block(input_data, 64,  64, 64, activate_type="mish")
130 |     input_data = convolutional(input_data, (1, 1, 64, 64), activate_type="mish")
131 |     input_data = tf.concat([input_data, route], axis=-1)
132 | 
133 |     input_data = convolutional(input_data, (1, 1, 128, 128), activate_type="mish")
134 |     input_data = convolutional(input_data, (3, 3, 128, 256), downsample=True, activate_type="mish")
135 |     route = input_data
136 |     route = convolutional(route, (1, 1, 256, 128), activate_type="mish")
137 |     input_data = convolutional(input_data, (1, 1, 256, 128), activate_type="mish")
138 |     for i in range(8):
139 |         input_data = residual_block(input_data, 128, 128, 128, activate_type="mish")
140 |     input_data = convolutional(input_data, (1, 1, 128, 128), activate_type="mish")
141 |     input_data = tf.concat([input_data, route], axis=-1)
142 | 
143 |     input_data = convolutional(input_data, (1, 1, 256, 256), activate_type="mish")
144 |     route_1 = input_data
145 |     input_data = convolutional(input_data, (3, 3, 256, 512), downsample=True, activate_type="mish")
146 |     route = input_data
147 |     route = convolutional(route, (1, 1, 512, 256), activate_type="mish")
148 |     input_data = convolutional(input_data, (1, 1, 512, 256), activate_type="mish")
149 |     for i in range(8):
150 |         input_data = residual_block(input_data, 256, 256, 256, activate_type="mish")
151 |     input_data = convolutional(input_data, (1, 1, 256, 256), activate_type="mish")
152 |     input_data = tf.concat([input_data, route], axis=-1)
153 | 
154 |     input_data = convolutional(input_data, (1, 1, 512, 512), activate_type="mish")
155 |     route_2 = input_data
156 |     input_data = convolutional(input_data, (3, 3, 512, 1024), downsample=True, activate_type="mish")
157 |     route = input_data
158 |     route = convolutional(route, (1, 1, 1024, 512), activate_type="mish")
159 |     input_data = convolutional(input_data, (1, 1, 1024, 512), activate_type="mish")
160 |     for i in range(4):
161 |         input_data = residual_block(input_data, 512, 512, 512, activate_type="mish")
162 |     input_data = convolutional(input_data, (1, 1, 512, 512), activate_type="mish")
163 |     input_data = tf.concat([input_data, route], axis=-1)
164 | 
165 |     input_data = convolutional(input_data, (1, 1, 1024, 1024), activate_type="mish")
166 |     input_data = convolutional(input_data, (1, 1, 1024, 512))
167 |     input_data = convolutional(input_data, (3, 3, 512, 1024))
168 |     input_data = convolutional(input_data, (1, 1, 1024, 512))
169 | 
170 |     max_pooling_1 = tf.keras.layers.MaxPool2D(pool_size=13, padding='SAME', strides=1)(input_data)
171 |     max_pooling_2 = tf.keras.layers.MaxPool2D(pool_size=9, padding='SAME', strides=1)(input_data)
172 |     max_pooling_3 = tf.keras.layers.MaxPool2D(pool_size=5, padding='SAME', strides=1)(input_data)
173 |     input_data = tf.concat([max_pooling_1, max_pooling_2, max_pooling_3, input_data], axis=-1)
174 | 
175 |     input_data = convolutional(input_data, (1, 1, 2048, 512))
176 |     input_data = convolutional(input_data, (3, 3, 512, 1024))
177 |     input_data = convolutional(input_data, (1, 1, 1024, 512))
178 | 
179 |     return route_1, route_2, input_data
180 | 
181 | def darknet19_tiny(input_data):
182 |     input_data = convolutional(input_data, (3, 3, 3, 16))
183 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
184 |     input_data = convolutional(input_data, (3, 3, 16, 32))
185 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
186 |     input_data = convolutional(input_data, (3, 3, 32, 64))
187 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
188 |     input_data = convolutional(input_data, (3, 3, 64, 128))
189 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
190 |     input_data = convolutional(input_data, (3, 3, 128, 256))
191 |     route_1 = input_data
192 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
193 |     input_data = convolutional(input_data, (3, 3, 256, 512))
194 |     input_data = MaxPool2D(2, 1, 'same')(input_data)
195 |     input_data = convolutional(input_data, (3, 3, 512, 1024))
196 | 
197 |     return route_1, input_data
198 | 
199 | def cspdarknet53_tiny(input_data): # not sure how this should be called
200 |     input_data = convolutional(input_data, (3, 3, 3, 32), downsample=True)
201 |     input_data = convolutional(input_data, (3, 3, 32, 64), downsample=True)
202 |     input_data = convolutional(input_data, (3, 3, 64, 64))
203 | 
204 |     route = input_data
205 |     input_data = route_group(input_data, 2, 1)
206 |     input_data = convolutional(input_data, (3, 3, 32, 32))
207 |     route_1 = input_data
208 |     input_data = convolutional(input_data, (3, 3, 32, 32))
209 |     input_data = tf.concat([input_data, route_1], axis=-1)
210 |     input_data = convolutional(input_data, (1, 1, 32, 64))
211 |     input_data = tf.concat([route, input_data], axis=-1)
212 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
213 | 
214 |     input_data = convolutional(input_data, (3, 3, 64, 128))
215 |     route = input_data
216 |     input_data = route_group(input_data, 2, 1)
217 |     input_data = convolutional(input_data, (3, 3, 64, 64))
218 |     route_1 = input_data
219 |     input_data = convolutional(input_data, (3, 3, 64, 64))
220 |     input_data = tf.concat([input_data, route_1], axis=-1)
221 |     input_data = convolutional(input_data, (1, 1, 64, 128))
222 |     input_data = tf.concat([route, input_data], axis=-1)
223 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
224 | 
225 |     input_data = convolutional(input_data, (3, 3, 128, 256))
226 |     route = input_data
227 |     input_data = route_group(input_data, 2, 1)
228 |     input_data = convolutional(input_data, (3, 3, 128, 128))
229 |     route_1 = input_data
230 |     input_data = convolutional(input_data, (3, 3, 128, 128))
231 |     input_data = tf.concat([input_data, route_1], axis=-1)
232 |     input_data = convolutional(input_data, (1, 1, 128, 256))
233 |     route_1 = input_data
234 |     input_data = tf.concat([route, input_data], axis=-1)
235 |     input_data = MaxPool2D(2, 2, 'same')(input_data)
236 | 
237 |     input_data = convolutional(input_data, (3, 3, 512, 512))
238 | 
239 |     return route_1, input_data
240 | 
241 | def YOLOv3(input_layer, NUM_CLASS):
242 |     # After the input layer enters the Darknet-53 network, we get three branches
243 |     route_1, route_2, conv = darknet53(input_layer)
244 |     # See the orange module (DBL) in the figure above, a total of 5 Subconvolution operation
245 |     conv = convolutional(conv, (1, 1, 1024,  512))
246 |     conv = convolutional(conv, (3, 3,  512, 1024))
247 |     conv = convolutional(conv, (1, 1, 1024,  512))
248 |     conv = convolutional(conv, (3, 3,  512, 1024))
249 |     conv = convolutional(conv, (1, 1, 1024,  512))
250 |     conv_lobj_branch = convolutional(conv, (3, 3, 512, 1024))
251 |     
252 |     # conv_lbbox is used to predict large-sized objects , Shape = [None, 13, 13, 255] 
253 |     conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
254 | 
255 |     conv = convolutional(conv, (1, 1,  512,  256))
256 |     # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
257 |     # upsampling process does not need to learn, thereby reducing the network parameter  
258 |     conv = upsample(conv)
259 | 
260 |     conv = tf.concat([conv, route_2], axis=-1)
261 |     conv = convolutional(conv, (1, 1, 768, 256))
262 |     conv = convolutional(conv, (3, 3, 256, 512))
263 |     conv = convolutional(conv, (1, 1, 512, 256))
264 |     conv = convolutional(conv, (3, 3, 256, 512))
265 |     conv = convolutional(conv, (1, 1, 512, 256))
266 |     conv_mobj_branch = convolutional(conv, (3, 3, 256, 512))
267 | 
268 |     # conv_mbbox is used to predict medium-sized objects, shape = [None, 26, 26, 255]
269 |     conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
270 | 
271 |     conv = convolutional(conv, (1, 1, 256, 128))
272 |     conv = upsample(conv)
273 | 
274 |     conv = tf.concat([conv, route_1], axis=-1)
275 |     conv = convolutional(conv, (1, 1, 384, 128))
276 |     conv = convolutional(conv, (3, 3, 128, 256))
277 |     conv = convolutional(conv, (1, 1, 256, 128))
278 |     conv = convolutional(conv, (3, 3, 128, 256))
279 |     conv = convolutional(conv, (1, 1, 256, 128))
280 |     conv_sobj_branch = convolutional(conv, (3, 3, 128, 256))
281 |     
282 |     # conv_sbbox is used to predict small size objects, shape = [None, 52, 52, 255]
283 |     conv_sbbox = convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
284 |         
285 |     return [conv_sbbox, conv_mbbox, conv_lbbox]
286 | 
287 | def YOLOv4(input_layer, NUM_CLASS):
288 |     route_1, route_2, conv = cspdarknet53(input_layer)
289 | 
290 |     route = conv
291 |     conv = convolutional(conv, (1, 1, 512, 256))
292 |     conv = upsample(conv)
293 |     route_2 = convolutional(route_2, (1, 1, 512, 256))
294 |     conv = tf.concat([route_2, conv], axis=-1)
295 | 
296 |     conv = convolutional(conv, (1, 1, 512, 256))
297 |     conv = convolutional(conv, (3, 3, 256, 512))
298 |     conv = convolutional(conv, (1, 1, 512, 256))
299 |     conv = convolutional(conv, (3, 3, 256, 512))
300 |     conv = convolutional(conv, (1, 1, 512, 256))
301 | 
302 |     route_2 = conv
303 |     conv = convolutional(conv, (1, 1, 256, 128))
304 |     conv = upsample(conv)
305 |     route_1 = convolutional(route_1, (1, 1, 256, 128))
306 |     conv = tf.concat([route_1, conv], axis=-1)
307 | 
308 |     conv = convolutional(conv, (1, 1, 256, 128))
309 |     conv = convolutional(conv, (3, 3, 128, 256))
310 |     conv = convolutional(conv, (1, 1, 256, 128))
311 |     conv = convolutional(conv, (3, 3, 128, 256))
312 |     conv = convolutional(conv, (1, 1, 256, 128))
313 | 
314 |     route_1 = conv
315 |     conv = convolutional(conv, (3, 3, 128, 256))
316 |     conv_sbbox = convolutional(conv, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
317 | 
318 |     conv = convolutional(route_1, (3, 3, 128, 256), downsample=True)
319 |     conv = tf.concat([conv, route_2], axis=-1)
320 | 
321 |     conv = convolutional(conv, (1, 1, 512, 256))
322 |     conv = convolutional(conv, (3, 3, 256, 512))
323 |     conv = convolutional(conv, (1, 1, 512, 256))
324 |     conv = convolutional(conv, (3, 3, 256, 512))
325 |     conv = convolutional(conv, (1, 1, 512, 256))
326 | 
327 |     route_2 = conv
328 |     conv = convolutional(conv, (3, 3, 256, 512))
329 |     conv_mbbox = convolutional(conv, (1, 1, 512, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
330 | 
331 |     conv = convolutional(route_2, (3, 3, 256, 512), downsample=True)
332 |     conv = tf.concat([conv, route], axis=-1)
333 | 
334 |     conv = convolutional(conv, (1, 1, 1024, 512))
335 |     conv = convolutional(conv, (3, 3, 512, 1024))
336 |     conv = convolutional(conv, (1, 1, 1024, 512))
337 |     conv = convolutional(conv, (3, 3, 512, 1024))
338 |     conv = convolutional(conv, (1, 1, 1024, 512))
339 | 
340 |     conv = convolutional(conv, (3, 3, 512, 1024))
341 |     conv_lbbox = convolutional(conv, (1, 1, 1024, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
342 | 
343 |     return [conv_sbbox, conv_mbbox, conv_lbbox]
344 | 
345 | def YOLOv3_tiny(input_layer, NUM_CLASS):
346 |     # After the input layer enters the Darknet-53 network, we get three branches
347 |     route_1, conv = darknet19_tiny(input_layer)
348 | 
349 |     conv = convolutional(conv, (1, 1, 1024, 256))
350 |     conv_lobj_branch = convolutional(conv, (3, 3, 256, 512))
351 |     
352 |     # conv_lbbox is used to predict large-sized objects , Shape = [None, 26, 26, 255]
353 |     conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
354 | 
355 |     conv = convolutional(conv, (1, 1, 256, 128))
356 |     # upsample here uses the nearest neighbor interpolation method, which has the advantage that the
357 |     # upsampling process does not need to learn, thereby reducing the network parameter  
358 |     conv = upsample(conv)
359 |     
360 |     conv = tf.concat([conv, route_1], axis=-1)
361 |     conv_mobj_branch = convolutional(conv, (3, 3, 128, 256))
362 |     # conv_mbbox is used to predict medium size objects, shape = [None, 13, 13, 255]
363 |     conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
364 | 
365 |     return [conv_mbbox, conv_lbbox]
366 | 
367 | def YOLOv4_tiny(input_layer, NUM_CLASS):
368 |     route_1, conv = cspdarknet53_tiny(input_layer)
369 | 
370 |     conv = convolutional(conv, (1, 1, 512, 256))
371 | 
372 |     conv_lobj_branch = convolutional(conv, (3, 3, 256, 512))
373 |     conv_lbbox = convolutional(conv_lobj_branch, (1, 1, 512, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
374 | 
375 |     conv = convolutional(conv, (1, 1, 256, 128))
376 |     conv = upsample(conv)
377 |     conv = tf.concat([conv, route_1], axis=-1)
378 | 
379 |     conv_mobj_branch = convolutional(conv, (3, 3, 128, 256))
380 |     conv_mbbox = convolutional(conv_mobj_branch, (1, 1, 256, 3 * (NUM_CLASS + 5)), activate=False, bn=False)
381 | 
382 |     return [conv_mbbox, conv_lbbox]
383 | 
384 | def Create_Yolo(input_size=416, channels=3, training=False, CLASSES=YOLO_COCO_CLASSES):
385 |     NUM_CLASS = len(read_class_names(CLASSES))
386 |     input_layer  = Input([input_size, input_size, channels])
387 | 
388 |     if TRAIN_YOLO_TINY:
389 |         if YOLO_TYPE == "yolov4":
390 |             conv_tensors = YOLOv4_tiny(input_layer, NUM_CLASS)
391 |         if YOLO_TYPE == "yolov3":
392 |             conv_tensors = YOLOv3_tiny(input_layer, NUM_CLASS)
393 |     else:
394 |         if YOLO_TYPE == "yolov4":
395 |             conv_tensors = YOLOv4(input_layer, NUM_CLASS)
396 |         if YOLO_TYPE == "yolov3":
397 |             conv_tensors = YOLOv3(input_layer, NUM_CLASS)
398 | 
399 |     output_tensors = []
400 |     for i, conv_tensor in enumerate(conv_tensors):
401 |         pred_tensor = decode(conv_tensor, NUM_CLASS, i)
402 |         if training: output_tensors.append(conv_tensor)
403 |         output_tensors.append(pred_tensor)
404 | 
405 |     Yolo = tf.keras.Model(input_layer, output_tensors)
406 |     return Yolo
407 | 
408 | 
409 | def decode(conv_output, NUM_CLASS, i=0):
410 |     # where i = 0, 1 or 2 to correspond to the three grid scales  
411 |     conv_shape       = tf.shape(conv_output)
412 |     batch_size       = conv_shape[0]
413 |     output_size      = conv_shape[1]
414 | 
415 |     conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
416 | 
417 |     #conv_raw_dxdy = conv_output[:, :, :, :, 0:2] # offset of center position     
418 |     #conv_raw_dwdh = conv_output[:, :, :, :, 2:4] # Prediction box length and width offset
419 |     #conv_raw_conf = conv_output[:, :, :, :, 4:5] # confidence of the prediction box
420 |     #conv_raw_prob = conv_output[:, :, :, :, 5: ] # category probability of the prediction box
421 |     conv_raw_dxdy, conv_raw_dwdh, conv_raw_conf, conv_raw_prob = tf.split(conv_output, (2, 2, 1, NUM_CLASS), axis=-1)
422 | 
423 |     # next need Draw the grid. Where output_size is equal to 13, 26 or 52  
424 |     #y = tf.range(output_size, dtype=tf.int32)
425 |     #y = tf.expand_dims(y, -1)
426 |     #y = tf.tile(y, [1, output_size])
427 |     #x = tf.range(output_size,dtype=tf.int32)
428 |     #x = tf.expand_dims(x, 0)
429 |     #x = tf.tile(x, [output_size, 1])
430 |     xy_grid = tf.meshgrid(tf.range(output_size), tf.range(output_size))
431 |     xy_grid = tf.expand_dims(tf.stack(xy_grid, axis=-1), axis=2)  # [gx, gy, 1, 2]
432 |     xy_grid = tf.tile(tf.expand_dims(xy_grid, axis=0), [batch_size, 1, 1, 3, 1])
433 |     xy_grid = tf.cast(xy_grid, tf.float32)
434 |     
435 |     #xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
436 |     #xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
437 |     #y_grid = tf.cast(xy_grid, tf.float32)
438 | 
439 |     # Calculate the center position of the prediction box:
440 |     pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i]
441 |     # Calculate the length and width of the prediction box:
442 |     pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
443 | 
444 |     pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
445 |     pred_conf = tf.sigmoid(conv_raw_conf) # object box calculates the predicted confidence
446 |     pred_prob = tf.sigmoid(conv_raw_prob) # calculating the predicted probability category box object
447 | 
448 |     # calculating the predicted probability category box object
449 |     return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
450 | 
451 | 
452 | def bbox_iou(boxes1, boxes2):
453 |     boxes1_area = boxes1[..., 2] * boxes1[..., 3]
454 |     boxes2_area = boxes2[..., 2] * boxes2[..., 3]
455 | 
456 |     boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
457 |                         boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
458 |     boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
459 |                         boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
460 | 
461 |     left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
462 |     right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
463 | 
464 |     inter_section = tf.maximum(right_down - left_up, 0.0)
465 |     inter_area = inter_section[..., 0] * inter_section[..., 1]
466 |     union_area = boxes1_area + boxes2_area - inter_area
467 | 
468 |     return 1.0 * inter_area / union_area
469 | 
470 | def bbox_giou(boxes1, boxes2):
471 |     boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
472 |                         boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
473 |     boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
474 |                         boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
475 | 
476 |     boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
477 |                         tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
478 |     boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
479 |                         tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
480 | 
481 |     boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
482 |     boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
483 | 
484 |     left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
485 |     right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
486 | 
487 |     inter_section = tf.maximum(right_down - left_up, 0.0)
488 |     inter_area = inter_section[..., 0] * inter_section[..., 1]
489 |     union_area = boxes1_area + boxes2_area - inter_area
490 | 
491 |     # Calculate the iou value between the two bounding boxes
492 |     iou = inter_area / union_area
493 | 
494 |     # Calculate the coordinates of the upper left corner and the lower right corner of the smallest closed convex surface
495 |     enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
496 |     enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
497 |     enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
498 | 
499 |     # Calculate the area of the smallest closed convex surface C
500 |     enclose_area = enclose[..., 0] * enclose[..., 1]
501 | 
502 |     # Calculate the GIoU value according to the GioU formula  
503 |     giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
504 | 
505 |     return giou
506 | 
507 | # testing (should be better than giou)
508 | def bbox_ciou(boxes1, boxes2):
509 |     boxes1_coor = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
510 |                         boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
511 |     boxes2_coor = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
512 |                         boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
513 | 
514 |     left = tf.maximum(boxes1_coor[..., 0], boxes2_coor[..., 0])
515 |     up = tf.maximum(boxes1_coor[..., 1], boxes2_coor[..., 1])
516 |     right = tf.maximum(boxes1_coor[..., 2], boxes2_coor[..., 2])
517 |     down = tf.maximum(boxes1_coor[..., 3], boxes2_coor[..., 3])
518 | 
519 |     c = (right - left) * (right - left) + (up - down) * (up - down)
520 |     iou = bbox_iou(boxes1, boxes2)
521 | 
522 |     u = (boxes1[..., 0] - boxes2[..., 0]) * (boxes1[..., 0] - boxes2[..., 0]) + (boxes1[..., 1] - boxes2[..., 1]) * (boxes1[..., 1] - boxes2[..., 1])
523 |     d = u / c
524 | 
525 |     ar_gt = boxes2[..., 2] / boxes2[..., 3]
526 |     ar_pred = boxes1[..., 2] / boxes1[..., 3]
527 | 
528 |     ar_loss = 4 / (np.pi * np.pi) * (tf.atan(ar_gt) - tf.atan(ar_pred)) * (tf.atan(ar_gt) - tf.atan(ar_pred))
529 |     alpha = ar_loss / (1 - iou + ar_loss + 0.000001)
530 |     ciou_term = d + alpha * ar_loss
531 | 
532 |     return iou - ciou_term
533 | 
534 | 
535 | def compute_loss(pred, conv, label, bboxes, i=0, CLASSES=YOLO_COCO_CLASSES):
536 |     NUM_CLASS = len(read_class_names(CLASSES))
537 |     conv_shape  = tf.shape(conv)
538 |     batch_size  = conv_shape[0]
539 |     output_size = conv_shape[1]
540 |     input_size  = STRIDES[i] * output_size
541 |     conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
542 | 
543 |     conv_raw_conf = conv[:, :, :, :, 4:5]
544 |     conv_raw_prob = conv[:, :, :, :, 5:]
545 | 
546 |     pred_xywh     = pred[:, :, :, :, 0:4]
547 |     pred_conf     = pred[:, :, :, :, 4:5]
548 | 
549 |     label_xywh    = label[:, :, :, :, 0:4]
550 |     respond_bbox  = label[:, :, :, :, 4:5]
551 |     label_prob    = label[:, :, :, :, 5:]
552 | 
553 |     giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)
554 |     input_size = tf.cast(input_size, tf.float32)
555 | 
556 |     bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
557 |     giou_loss = respond_bbox * bbox_loss_scale * (1 - giou)
558 | 
559 |     iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
560 |     # Find the value of IoU with the real box The largest prediction box
561 |     max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
562 | 
563 |     # If the largest iou is less than the threshold, it is considered that the prediction box contains no objects, then the background box
564 |     respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < YOLO_IOU_LOSS_THRESH, tf.float32 )
565 | 
566 |     conf_focal = tf.pow(respond_bbox - pred_conf, 2)
567 | 
568 |     # Calculate the loss of confidence
569 |     # we hope that if the grid contains objects, then the network output prediction box has a confidence of 1 and 0 when there is no object.
570 |     conf_loss = conf_focal * (
571 |             respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
572 |             +
573 |             respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
574 |     )
575 | 
576 |     prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
577 | 
578 |     giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
579 |     conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
580 |     prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
581 | 
582 |     return giou_loss, conf_loss, prob_loss
583 | 


--------------------------------------------------------------------------------