├── yolo3
    ├── __init__.py
    ├── utils.py
    ├── generate_detections.py
    └── model.py
├── deepsort
    ├── __init__.py
    ├── detection.py
    ├── preprocessing.py
    ├── iou_matching.py
    ├── tracker.py
    ├── track.py
    ├── nn_matching.py
    ├── linear_assignment.py
    └── kalman_filter.py
├── model_h5
    └── h5 files can be located in here.txt
├── model_data
    ├── tiny_yolo_anchors.txt
    ├── yolo_anchors.txt
    ├── cifar_classes.txt
    ├── voc_classes.txt
    └── coco_classes.txt
├── font
    └── times.ttf
├── input
    └── Demo1.jpg
├── output
    └── Demo1.png
├── openh264-1.8.0-win64.dll
├── LICENSE
├── .gitignore
├── tracker_func.py
├── yolo_video.py
├── kmeans_anchors.py
├── README.md
├── read_data_cifar100.py
├── sort.py
├── train.py
└── yolo.py


/yolo3/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/deepsort/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/model_h5/h5 files can be located in here.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/model_data/tiny_yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
2 | 


--------------------------------------------------------------------------------
/font/times.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImLaoBJie/yolo3_sort_deepsort/HEAD/font/times.ttf


--------------------------------------------------------------------------------
/input/Demo1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImLaoBJie/yolo3_sort_deepsort/HEAD/input/Demo1.jpg


--------------------------------------------------------------------------------
/output/Demo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImLaoBJie/yolo3_sort_deepsort/HEAD/output/Demo1.png


--------------------------------------------------------------------------------
/model_data/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
2 | 


--------------------------------------------------------------------------------
/openh264-1.8.0-win64.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImLaoBJie/yolo3_sort_deepsort/HEAD/openh264-1.8.0-win64.dll


--------------------------------------------------------------------------------
/model_data/cifar_classes.txt:
--------------------------------------------------------------------------------
 1 | boy
 2 | girl
 3 | man
 4 | woman
 5 | bicycle
 6 | bus
 7 | motorcycle
 8 | pickuptruck
 9 | streetcar
10 | tank
11 | 


--------------------------------------------------------------------------------
/model_data/voc_classes.txt:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/model_data/coco_classes.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 ImLaoBJie
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/deepsort/detection.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class Detection(object):
 5 |     """
 6 |     This class represents a bounding box detection in a single image.
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     tlwh : array_like
11 |         Bounding box in format `(x, y, w, h)`.
12 |     confidence : float
13 |         Detector confidence score.
14 |     feature : array_like
15 |         A feature vector that describes the object contained in this image.
16 | 
17 |     Attributes
18 |     ----------
19 |     tlwh : ndarray
20 |         Bounding box in format `(top left x, top left y, width, height)`.
21 |     confidence : ndarray
22 |         Detector confidence score.
23 |     class_name : ndarray
24 |         Detector class.
25 |     feature : ndarray | NoneType
26 |         A feature vector that describes the object contained in this image.
27 | 
28 |     """
29 | 
30 |     def __init__(self, tlwh, confidence, class_name, feature):
31 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
32 |         self.confidence = float(confidence)
33 |         self.class_name = class_name
34 |         self.feature = np.asarray(feature, dtype=np.float32)
35 | 
36 |     def get_class(self):
37 |         return self.class_name
38 | 
39 |     def to_tlbr(self):
40 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
41 |         `(top left, bottom right)`.
42 |         """
43 |         ret = self.tlwh.copy()
44 |         ret[2:] += ret[:2]
45 |         return ret
46 | 
47 |     def to_xyah(self):
48 |         """Convert bounding box to format `(center x, center y, aspect ratio,
49 |         height)`, where the aspect ratio is `width / height`.
50 |         """
51 |         ret = self.tlwh.copy()
52 |         ret[:2] += ret[2:] / 2
53 |         ret[2] /= ret[3]
54 |         return ret
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .nox/
 42 | .coverage
 43 | .coverage.*
 44 | .cache
 45 | nosetests.xml
 46 | coverage.xml
 47 | *.cover
 48 | .hypothesis/
 49 | .pytest_cache/
 50 | 
 51 | # Translations
 52 | *.mo
 53 | *.pot
 54 | 
 55 | # Django stuff:
 56 | *.log
 57 | local_settings.py
 58 | db.sqlite3
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # IPython
 77 | profile_default/
 78 | ipython_config.py
 79 | 
 80 | # pyenv
 81 | .python-version
 82 | 
 83 | # celery beat schedule file
 84 | celerybeat-schedule
 85 | 
 86 | # SageMath parsed files
 87 | *.sage.py
 88 | 
 89 | # Environments
 90 | .env
 91 | .venv
 92 | env/
 93 | venv/
 94 | ENV/
 95 | env.bak/
 96 | venv.bak/
 97 | 
 98 | # Spyder project settings
 99 | .spyderproject
100 | .spyproject
101 | 
102 | # Rope project settings
103 | .ropeproject
104 | 
105 | # mkdocs documentation
106 | /site
107 | 
108 | # mypy
109 | .mypy_cache/
110 | .dmypy.json
111 | dmypy.json
112 | 
113 | # Pyre type checker
114 | .pyre/
115 | 
116 | # Demo
117 | *.mp4
118 | *.weights
119 | *.dat
120 | *.webm
121 | *.png
122 | 
123 | # Weights
124 | *.h5
125 | *.pb
126 | 
127 | __pycache__/
128 | .idea/
129 | cifar-100-python/
130 | 


--------------------------------------------------------------------------------
/deepsort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | 
 5 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
 6 |     """Suppress overlapping detections.
 7 | 
 8 |     Original code from [1]_ has been adapted to include confidence score.
 9 | 
10 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
11 |            faster-non-maximum-suppression-python/
12 | 
13 |     Examples
14 |     --------
15 | 
16 |         >>> boxes = [d.roi for d in detections]
17 |         >>> classes = [d.classes for d in detections]
18 |         >>> scores = [d.confidence for d in detections]
19 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 |         >>> detections = [detections[i] for i in indices]
21 | 
22 |     Parameters
23 |     ----------
24 |     boxes : ndarray
25 |         Array of ROIs (x, y, width, height).
26 |     max_bbox_overlap : float
27 |         ROIs that overlap more than this values are suppressed.
28 |     scores : Optional[array_like]
29 |         Detector confidence score.
30 | 
31 |     Returns
32 |     -------
33 |     List[int]
34 |         Returns indices of detections that have survived non-maxima suppression.
35 | 
36 |     """
37 |     if len(boxes) == 0:
38 |         return []
39 | 
40 |     boxes = boxes.astype(np.float)
41 |     pick = []
42 | 
43 |     x1 = boxes[:, 0]
44 |     y1 = boxes[:, 1]
45 |     x2 = boxes[:, 2] + boxes[:, 0]
46 |     y2 = boxes[:, 3] + boxes[:, 1]
47 | 
48 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 |     if scores is not None:
50 |         idxs = np.argsort(scores)
51 |     else:
52 |         idxs = np.argsort(y2)
53 | 
54 |     while len(idxs) > 0:
55 |         last = len(idxs) - 1
56 |         i = idxs[last]
57 |         pick.append(i)
58 | 
59 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 | 
64 |         w = np.maximum(0, xx2 - xx1 + 1)
65 |         h = np.maximum(0, yy2 - yy1 + 1)
66 | 
67 |         overlap = (w * h) / area[idxs[:last]]
68 | 
69 |         idxs = np.delete(
70 |             idxs, np.concatenate(
71 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
72 | 
73 |     return pick
74 | 


--------------------------------------------------------------------------------
/tracker_func.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from sort import Sort
 4 | from deepsort.tracker import Tracker
 5 | from deepsort.detection import Detection
 6 | from deepsort import preprocessing
 7 | from yolo3.utils import convert_boxes
 8 | 
 9 | 
10 | def sort_image(sort_class: Sort, out_boxes, out_scores, out_classes):
11 |     dets = []
12 | 
13 |     for i in range(0, len(out_boxes)):
14 |         dets.append([out_boxes[i][1], out_boxes[i][0], out_boxes[i][3], out_boxes[i][2], out_scores[i], out_classes[i]])
15 | 
16 |     dets = np.array(dets)
17 |     # update
18 |     trackers = sort_class.update(dets)
19 | 
20 |     out_boxes = []
21 |     out_scores = []
22 |     out_classes = []
23 |     object_id = []
24 |     # d [x1,y1,x2,y2,object_id,score,type]
25 |     for d in trackers:
26 |         out_boxes.append(list([d[1], d[0], d[3], d[2]]))
27 |         object_id.append(int(d[4]))
28 |         out_scores.append(float(d[5]))
29 |         out_classes.append(int(d[6]))
30 | 
31 |     return np.array(out_boxes), np.array(out_scores), np.array(out_classes), np.array(object_id)
32 | 
33 | 
34 | def deepsort_image(deepsort_class: Tracker, encoder, frame, out_boxes, out_scores, out_classes,
35 |                    nms_max_overlap=1.0):
36 | 
37 |     converted_boxes = convert_boxes(out_boxes)
38 |     features = encoder(frame, converted_boxes)
39 |     detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
40 |                   zip(converted_boxes, out_scores, out_classes, features)]
41 | 
42 |     # run non-maxima suppresion
43 |     boxs = np.array([d.tlwh for d in detections])
44 |     scores = np.array([d.confidence for d in detections])
45 |     classes = np.array([d.class_name for d in detections])
46 |     indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
47 |     detections = [detections[i] for i in indices]
48 | 
49 |     deepsort_class.predict()
50 |     deepsort_class.update(detections)
51 | 
52 |     num_trackers = len(deepsort_class.tracks)
53 |     out_boxes = []
54 |     out_classes = []
55 |     out_scores = []
56 |     object_id = []
57 |     # d [x1,y1,x2,y2,object_id,score,type]
58 |     for index, track in enumerate(deepsort_class.tracks):
59 |         if not track.is_confirmed() or track.time_since_update > 1:
60 |             continue
61 |         out_boxes.append(track.to_tlbr())
62 |         out_classes.append(int(track.get_class()))
63 |         out_scores.append(float(track.get_score()))
64 |         object_id.append(int(track.track_id))
65 | 
66 |     return np.array(out_boxes), np.array(out_scores), np.array(out_classes), np.array(object_id)
67 | 


--------------------------------------------------------------------------------
/yolo_video.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import argparse
 3 | 
 4 | import numpy as np
 5 | 
 6 | from yolo import YOLO, detect_video
 7 | from PIL import Image
 8 | 
 9 | 
10 | DEFAULTS = {
11 |         "model_path": './model_h5/yolo.h5',
12 |         "anchors_path": './model_data/yolo_anchors.txt',
13 |         "classes_path": './model_data/coco_classes.txt',
14 |         "deepsort_model": './model_data/mars-small128.pb',
15 |         "gpu_num": 1,
16 |         "image": False,  # 如果此处设置了True，"tracker"则被忽略
17 |         "tracker": 'deepsort',  # 此处根据需要为'sort'或'deepsort'
18 |         "write_to_file": True,
19 |         "input": './input/your_video.format',
20 |         "output": './output/your_video.format',
21 |         "output_path": './output/',
22 |         "score": 0.4,  # threshold
23 |         "iou": 0.4,  # threshold
24 |         "repeat_iou": 0.95,  # threshold
25 |     }
26 | 
27 | 
28 | def getvalue(FLAGS, defaults):
29 | 
30 |     args = vars(FLAGS)
31 | 
32 |     for value in defaults:
33 |         args[value] = defaults[value]
34 | 
35 |     return FLAGS
36 | 
37 | 
38 | def detect_img(yolo):
39 |     while True:
40 | 
41 |         img = input('Input image filename:')
42 |         try:
43 |             image = Image.open(img)
44 |             image = np.asarray(image)
45 |         except:
46 |             print('Open Error! Try again!')
47 |             continue
48 |         else:
49 |             # Initialization
50 |             # mot_tracker = sort.Sort()
51 |             # yolo.mot_tracker = mot_tracker
52 |             yolo.frame = 1
53 | 
54 |             if yolo.write_to_file:
55 |                 emptyFile = open(yolo.output_path + 'result.dat', 'w')
56 |             else:
57 |                 emptyFile = None
58 |             r_image = yolo.detect_image(image, emptyFile)
59 |             if yolo.write_to_file:
60 |                 emptyFile.close()
61 |             r_image.save(yolo.__dict__['output_path'] + 'output.png', 'png')
62 |     yolo.close_session()
63 | 
64 | 
65 | FLAGS = None
66 | 
67 | if __name__ == '__main__':
68 | 
69 |     FLAGS = argparse.Namespace()
70 |     FLAGS = getvalue(FLAGS, DEFAULTS)
71 | 
72 |     if FLAGS.image:
73 |         """
74 |         Image detection mode, disregard any remaining command line arguments
75 |         """
76 |         print("Image detection mode")
77 |         if "input" in FLAGS:
78 |             print(" Ignoring remaining command line arguments: " + FLAGS.input + "," + FLAGS.output)
79 |         detect_img(YOLO(**vars(FLAGS)))
80 |     elif "input" in FLAGS:
81 |         detect_video(YOLO(**vars(FLAGS)), FLAGS.input, FLAGS.output)
82 |     else:
83 |         print("Must specify at least video_input_path.  See usage with --help.")
84 | 


--------------------------------------------------------------------------------
/deepsort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | import numpy as np
 3 | from . import linear_assignment
 4 | 
 5 | 
 6 | def iou(bbox, candidates):
 7 |     """Computer intersection over union.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     bbox : ndarray
12 |         A bounding box in format `(top left x, top left y, width, height)`.
13 |     candidates : ndarray
14 |         A matrix of candidate bounding boxes (one per row) in the same format
15 |         as `bbox`.
16 | 
17 |     Returns
18 |     -------
19 |     ndarray
20 |         The intersection over union in [0, 1] between the `bbox` and each
21 |         candidate. A higher score means a larger fraction of the `bbox` is
22 |         occluded by the candidate.
23 | 
24 |     """
25 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
26 |     candidates_tl = candidates[:, :2]
27 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
28 | 
29 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
30 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
31 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
32 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
33 |     wh = np.maximum(0., br - tl)
34 | 
35 |     area_intersection = wh.prod(axis=1)
36 |     area_bbox = bbox[2:].prod()
37 |     area_candidates = candidates[:, 2:].prod(axis=1)
38 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
39 | 
40 | 
41 | def iou_cost(tracks, detections, track_indices=None,
42 |              detection_indices=None):
43 |     """An intersection over union distance metric.
44 | 
45 |     Parameters
46 |     ----------
47 |     tracks : List[deep_sort.track.Track]
48 |         A list of tracks.
49 |     detections : List[deep_sort.detection.Detection]
50 |         A list of detections.
51 |     track_indices : Optional[List[int]]
52 |         A list of indices to tracks that should be matched. Defaults to
53 |         all `tracks`.
54 |     detection_indices : Optional[List[int]]
55 |         A list of indices to detections that should be matched. Defaults
56 |         to all `detections`.
57 | 
58 |     Returns
59 |     -------
60 |     ndarray
61 |         Returns a cost matrix of shape
62 |         len(track_indices), len(detection_indices) where entry (i, j) is
63 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
64 | 
65 |     """
66 |     if track_indices is None:
67 |         track_indices = np.arange(len(tracks))
68 |     if detection_indices is None:
69 |         detection_indices = np.arange(len(detections))
70 | 
71 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
72 |     for row, track_idx in enumerate(track_indices):
73 |         if tracks[track_idx].time_since_update > 1:
74 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
75 |             continue
76 | 
77 |         bbox = tracks[track_idx].to_tlwh()
78 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
79 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
80 |     return cost_matrix
81 | 


--------------------------------------------------------------------------------
/kmeans_anchors.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | class YOLO_Kmeans:
  5 | 
  6 |     def __init__(self, cluster_number, filename):
  7 |         self.cluster_number = cluster_number
  8 |         self.filename = filename
  9 | 
 10 |     def iou(self, boxes, clusters):  # 1 box -> k clusters
 11 |         n = boxes.shape[0]
 12 |         k = self.cluster_number
 13 | 
 14 |         box_area = boxes[:, 0] * boxes[:, 1]
 15 |         box_area = box_area.repeat(k)
 16 |         box_area = np.reshape(box_area, (n, k))
 17 | 
 18 |         cluster_area = clusters[:, 0] * clusters[:, 1]
 19 |         cluster_area = np.tile(cluster_area, [1, n])
 20 |         cluster_area = np.reshape(cluster_area, (n, k))
 21 | 
 22 |         box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
 23 |         cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
 24 |         min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
 25 | 
 26 |         box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
 27 |         cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
 28 |         min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
 29 |         inter_area = np.multiply(min_w_matrix, min_h_matrix)
 30 | 
 31 |         result = inter_area / (box_area + cluster_area - inter_area)
 32 |         return result
 33 | 
 34 |     def avg_iou(self, boxes, clusters):
 35 |         accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
 36 |         return accuracy
 37 | 
 38 |     def kmeans(self, boxes, k, dist=np.median):
 39 |         box_number = boxes.shape[0]
 40 |         distances = np.empty((box_number, k))
 41 |         last_nearest = np.zeros((box_number,))
 42 |         np.random.seed()
 43 |         clusters = boxes[np.random.choice(
 44 |             box_number, k, replace=False)]  # init k clusters
 45 |         while True:
 46 | 
 47 |             distances = 1 - self.iou(boxes, clusters)
 48 | 
 49 |             current_nearest = np.argmin(distances, axis=1)
 50 |             if (last_nearest == current_nearest).all():
 51 |                 break  # clusters won't change
 52 |             for cluster in range(k):
 53 |                 clusters[cluster] = dist(  # update clusters
 54 |                     boxes[current_nearest == cluster], axis=0)
 55 | 
 56 |             last_nearest = current_nearest
 57 | 
 58 |         return clusters
 59 | 
 60 |     def result2txt(self, data):
 61 |         f = open('output/yolo_anchors.txt', 'w')
 62 |         row = np.shape(data)[0]
 63 |         for i in range(row):
 64 |             if i == 0:
 65 |                 x_y = "%d,%d" % (data[i][0], data[i][1])
 66 |             else:
 67 |                 x_y = ", %d,%d" % (data[i][0], data[i][1])
 68 |             f.write(x_y)
 69 |         f.close()
 70 | 
 71 |     def txt2boxes(self):
 72 |         f = open(self.filename, 'r')
 73 |         dataSet = []
 74 |         for line in f:
 75 |             infos = line.split(', ')
 76 |             length = len(infos)
 77 |             for i in range(1, length):
 78 |                 width = int(abs(float(infos[4]) - float(infos[2])))
 79 |                 height = int(abs(float(infos[5]) - float(infos[3])))
 80 |                 dataSet.append([width, height])
 81 |         result = np.array(dataSet)
 82 |         f.close()
 83 |         return result
 84 | 
 85 |     def txt2clusters(self):
 86 |         all_boxes = self.txt2boxes()
 87 |         result = self.kmeans(all_boxes, k=self.cluster_number)
 88 |         result = result[np.lexsort(result.T[0, None])]
 89 |         self.result2txt(result)
 90 |         print("K anchors:\n {}".format(result))
 91 |         print("Accuracy: {:.2f}%".format(
 92 |             self.avg_iou(all_boxes, result) * 100))
 93 | 
 94 | 
 95 | if __name__ == "__main__":
 96 |     cluster_number = 9
 97 |     filename = 'output/result.dat'
 98 |     kmeans = YOLO_Kmeans(cluster_number, filename)
 99 |     kmeans.txt2clusters()
100 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # YOLOv3+SORT+DeepSort
  2 | 
  3 | * Update 2020.7.16 增加deepsort，并作了大量调整
  4 | 
  5 | # 介绍 Introduction
  6 | 
  7 | YOLOV3及其训练的实现借鉴：[qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3)
  8 | 
  9 | SORT的实现借鉴：[abewley/sort](https://github.com/abewley/sort)
 10 | 
 11 | DeepSort的实现借鉴：[theAIGuysCode/yolov3_deepsort](https://github.com/theAIGuysCode/yolov3_deepsort)
 12 | 
 13 | 参考文献：
 14 | 
 15 | 1. [SIMPLE ONLINE AND REALTIME TRACKING](https://arxiv.org/pdf/1602.00763.pdf)
 16 | 
 17 | 2. [SIMPLE ONLINE AND REALTIME TRACKING WITH A DEEP ASSOCIATION METRIC](https://arxiv.org/pdf/1703.07402.pdf)
 18 | 
 19 | 演示视频：[SORT](https://www.bilibili.com/video/av56450343/)
 20 | [DEEPSORT](https://www.bilibili.com/video/BV16A411e7ih/)
 21 | 
 22 | ---
 23 | 
 24 | # 搞快点 Quick Start
 25 | 
 26 | 1. 打开`yolo_video.py`
 27 | 
 28 | 2. 修改`DEFAULTS`（个人原因不太喜欢用`argparse`）
 29 | 
 30 | ```
 31 | DEFAULTS = {
 32 |         "model_path": './model_h5/yolo.h5',
 33 |         "anchors_path": './model_data/yolo_anchors.txt',
 34 |         "classes_path": './model_data/coco_classes.txt',
 35 |         "deepsort_model": './model_data/mars-small128.pb',
 36 |         "gpu_num": 1,
 37 |         "image": False,  # 如果此处设置了True，"tracker"则被忽略
 38 |         "tracker": 'deepsort',  # 此处根据需要为'sort'或'deepsort'
 39 |         "write_to_file": True,
 40 |         "input": './input/your_video.format',
 41 |         "output": './output/your_video.format',
 42 |         "output_path": './output/',
 43 |         "score": 0.4,  # threshold
 44 |         "iou": 0.4,  # threshold
 45 |         "repeat_iou": 0.95,  # threshold
 46 |     }
 47 | ```
 48 | 
 49 | 3. 运行`yolo_video.py`，结果可在`"output_path"`中指定的文件夹查看
 50 | 
 51 | ```
 52 | python yolo_video.py
 53 | ```
 54 | 
 55 | 4. 如果想适用轻量级的YOLOv3模型，修改'"model_path"'和'"anchors_path"'即可
 56 | 
 57 | *关于YOLOV3的内容，可以查看[YOLO WEBSITE](https://pjreddie.com/darknet/yolo/)
 58 | 
 59 | *tiny-YOLOv3下载：[tiny-YOLOv3](https://pjreddie.com/media/files/yolov3-tiny.weights)
 60 | 
 61 | *YOLOv3下载：[YOLOv3](https://pjreddie.com/media/files/yolov3.weights)
 62 | 
 63 | *预训练的DeepSort网络：Google Drive: [DeepSort](https://drive.google.com/open?id=18fKzfqnqhqW3s9zwsCbnVJ5XF2JFeqMp), BaiduDisk: [DeepSort](https://pan.baidu.com/s/1B4xKXYWckM4TLIg6WGW6uw)  pw:9i6p
 64 | 
 65 | ---
 66 | 
 67 | # 参数含义 Parameter
 68 | 
 69 | ```
 70 | model_path  # h5文件路径
 71 | anchors_path  # anchor的路径
 72 | classes_path  # 存放识别对象类别的路径
 73 | deepsort_model  # DeepSort预训练权重存放路径
 74 | gpu_num  # gpu数
 75 | image  # 处理video(False)或处理图片(True)
 76 | tracker  # 是否使用追踪
 77 | write_to_file  # 是否写入到文件
 78 | input  # video的路径
 79 | output  # 输出video的路径
 80 | output_path  # 其他文件output的路径
 81 | score  # 分数低于该阈值的物体会被忽略
 82 | iou  # iou低于该阈值的物体会被忽略
 83 | repeat_iou  # 去除重复bounding box
 84 | ```
 85 | 
 86 | *写入到文件的格式为：
 87 | 
 88 | ```
 89 | <frame>, <id>, <bb_left>, <bb_top>, <bb_width>, <bb_height>, <conf>, <x>, <y>, <z>
 90 | ```
 91 | 
 92 | ---
 93 | 
 94 | # 训练自己的模型 Training
 95 | 
 96 | 选取的图片从CIFAR-100 dataset中提取，由于主要研究对象是交通方面的，因此选取的物体种类主要围绕车辆和
 97 | 人，详细分类见`model_data/cifar_classes.txt`
 98 | 
 99 | CIFAR数据集可在此网站查看：[The CIFAR-10 and CIFAR-100](http://www.cs.toronto.edu/~kriz/cifar.html)
100 | 
101 | CIFAR-100 dataset下载：[CIFAR-100 python version](http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz)
102 | 
103 | 1. 可通过修改`read_data_cifar100.py`确定训练数据集的物体类别
104 | 
105 | ```
106 | REMAIN = list(np.concatenate([[11, 35, 46, 98], [8, 13, 48, 58], [81, 85]]))
107 | ```
108 | 
109 | 2. 运行train.py
110 | 
111 | ```
112 | python train.py
113 | ```
114 | 
115 | 可自行修改`epochs`，`batch_size`
116 | 
117 | 3. 可先使用训练好的YOLOv3模型`yolo.h5`获取bounding box数据，再使用`kmeans_anchors.py`
118 | 计算获得anchors
119 | 
120 | ---
121 | 
122 | # TIPS
123 | 
124 | 1. 环境 Environment
125 | 
126 |  * 主要依赖
127 | 
128 |     * python 3.6
129 |     * Keras 2.3.1
130 |     * tensorflow-gpu 1.13.0
131 |     * numpy 1.17.0
132 |     
133 |     (较低版本貌似也支持)
134 | 
135 | 3. 缺少`openh264-1.8.0-win64.dll`可能会发生未知错误，因此需要将此文件和`python yolo_video.py`放置在
136 | 同一目录下（貌似少了也没啥事）
137 | 
138 | 4. DeepSort能解决短时遮挡问题，解决不了长时间object消失或被遮挡问题
139 | 
140 | 5. **DEMO**上传至[百度云](https://pan.baidu.com/s/1VLKI8OGDbzsfqtzMe1amxg) PW: pb34
141 | 
142 | 6. **MOT_DEMO** [Multiple Object Tracking Benchmark](https://motchallenge.net/data/MOT16/)
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/read_data_cifar100.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import numpy as np
  3 | import os
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | PATH = 'cifar-100-python/'
  7 | REMOVE = list(range(0, 100))
  8 | REMAIN = list(np.concatenate([[11, 35, 46, 98], [8, 13, 48, 58], [81, 85]]))
  9 | for i in REMAIN:
 10 |     REMOVE.remove(i)
 11 | 
 12 | 
 13 | def filter(image, label):
 14 |     # filter
 15 |     remove_index = []
 16 |     for index, element in enumerate(label):
 17 |         if int(element) in REMOVE:
 18 |             remove_index.append(index)
 19 | 
 20 |     label = np.delete(label, remove_index)
 21 |     image = np.delete(image, remove_index, 0)
 22 | 
 23 |     if not REMAIN == []:
 24 |         value = 0
 25 |         for index in REMAIN:
 26 |             label[label == np.int32(index)] = np.int32(value)
 27 |             value = value + 1
 28 | 
 29 |     return image, label
 30 | 
 31 | 
 32 | def load_CIFAR_batch(filename, N, data_filter: bool):
 33 |     # 单个batch
 34 |     # load single batch of cifar
 35 |     with open(filename, 'rb') as f:
 36 |         datadict = pickle.load(f, encoding='latin1')  # dict类型
 37 |         image = datadict['data']  # X, ndarray, 像素值
 38 |         label = datadict['fine_labels']  # Y, list, 标签, 分类
 39 | 
 40 |         # check the id of fine_labels relevant to the coarse_labels
 41 |         # label = np.array(label)
 42 |         # coarse = np.array(datadict['coarse_labels'])
 43 |         # print(np.unique(label[np.array(np.where(coarse == 19))[0]]))
 44 | 
 45 |         # reshape, 一维数组转为矩阵10000行3列。每个entries是32x32
 46 |         # transpose，转置
 47 |         # astype，复制，同时指定类型
 48 |         image = image.reshape(N, 3, 32, 32).transpose(0, 2, 3, 1).astype('float')
 49 |         label = np.array(label)
 50 | 
 51 |         if data_filter:
 52 |             image, label = filter(image, label)
 53 | 
 54 |         return image, label
 55 | 
 56 | 
 57 | def load_CIFAR100(path, data_filter: bool):
 58 |     # 所有batch
 59 |     # load all of cifar
 60 |     images = []  # list
 61 |     labels = []
 62 | 
 63 |     # 训练集
 64 |     f = os.path.join(path, 'train')
 65 |     image, label = load_CIFAR_batch(f, 50000, data_filter)
 66 |     images.append(image)
 67 |     labels.append(label)
 68 | 
 69 |     images = np.concatenate(images)  # [ndarray, ndarray] 合并为一个ndarray
 70 |     labels = np.concatenate(labels)
 71 | 
 72 |     # 测试集
 73 |     img_val, lab_val = load_CIFAR_batch(os.path.join(path, 'test'), 10000, data_filter)
 74 |     return images, labels, img_val, lab_val
 75 | 
 76 | 
 77 | # 警告：使用该函数可能会导致内存溢出，可以适当修改减少扩充量
 78 | # WARNING：Using this function may cause out of memory and OS breakdown
 79 | def creat_more_data(images):
 80 |     # 通过旋转、翻转扩充数据 expand dataset through rotation and mirroring
 81 |     images_rot90 = []
 82 |     images_rot180 = []
 83 |     images_rot270 = []
 84 |     img_lr = []
 85 |     img_ud = []
 86 | 
 87 |     for index in range(0, images.shape[0]):
 88 |         band_1 = images[index, :, :, 0]
 89 |         band_2 = images[index, :, :, 1]
 90 |         band_3 = images[index, :, :, 2]
 91 | 
 92 |         # 旋转90, rotating 90 degrees
 93 |         band_1_rot90 = np.rot90(band_1)
 94 |         band_2_rot90 = np.rot90(band_2)
 95 |         band_3_rot90 = np.rot90(band_3)
 96 |         images_rot90.append(np.dstack((band_1_rot90, band_2_rot90, band_3_rot90)))
 97 | 
 98 |         # 180
 99 |         band_1_rot180 = np.rot90(band_1_rot90)
100 |         band_2_rot180 = np.rot90(band_2_rot90)
101 |         band_3_rot180 = np.rot90(band_3_rot90)
102 |         images_rot180.append(np.dstack((band_1_rot180, band_2_rot180, band_3_rot180)))
103 | 
104 |         # 270
105 |         band_1_rot270 = np.rot90(band_1_rot180)
106 |         band_2_rot270 = np.rot90(band_2_rot180)
107 |         band_3_rot270 = np.rot90(band_3_rot180)
108 |         images_rot270.append(np.dstack((band_1_rot270, band_2_rot270, band_3_rot270)))
109 | 
110 |         # 左右翻转 flip horizontally
111 |         lr1 = np.flip(band_1, 0)
112 |         lr2 = np.flip(band_2, 0)
113 |         lr3 = np.flip(band_3, 0)
114 |         img_lr.append(np.dstack((lr1, lr2, lr3)))
115 | 
116 |         # 上下反转 flip vertical
117 |         ud1 = np.flip(band_1, 1)
118 |         ud2 = np.flip(band_2, 1)
119 |         ud3 = np.flip(band_3, 1)
120 |         img_ud.append(np.dstack((ud1, ud2, ud3)))
121 | 
122 |     rot90 = np.array(images_rot90)
123 |     rot180 = np.array(images_rot180)
124 |     rot270 = np.array(images_rot270)
125 |     lr = np.array(img_lr)
126 |     ud = np.array(img_ud)
127 | 
128 |     images = np.concatenate((rot90, rot180, rot270, lr, ud))
129 | 
130 |     return images
131 | 
132 | 
133 | def shuffle(images, labels):
134 |     permutation = np.random.permutation(images.shape[0])
135 |     shuffled_dataset = images[permutation, :, :, :]
136 |     shuffled_labels = labels[permutation]
137 |     return shuffled_dataset, shuffled_labels
138 | 
139 | 
140 | def data(path, more_data: bool, shuffle_data: bool, data_filter: bool):
141 |     images, labels, img_val, lab_val = load_CIFAR100(path, data_filter)
142 | 
143 |     if more_data:
144 |         # 扩充数据 expand dataset
145 |         images = creat_more_data(np.array(images))
146 |         # 扩充标签 expend labels
147 |         labels = np.concatenate((labels, labels, labels, labels, labels, labels))
148 | 
149 |     if shuffle_data:
150 |         images, labels = shuffle(images, labels)
151 |         img_val, lab_val = shuffle(img_val, lab_val)
152 | 
153 |     return images, labels, img_val, lab_val
154 | 
155 | 
156 | def main():
157 |     images, labels, img_val, lab_val = data(PATH, False, True, True)
158 |     # test
159 |     print(len(images))
160 |     print(len(labels))
161 |     plt.imshow(images[0] / 255)
162 |     print(images[0])
163 |     print(labels[0])
164 |     plt.show()
165 | 
166 | 
167 | if __name__ == '__main__':
168 |     main()
169 | 


--------------------------------------------------------------------------------
/deepsort/tracker.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import numpy as np
  3 | from . import kalman_filter
  4 | from . import linear_assignment
  5 | from . import iou_matching
  6 | from .track import Track
  7 | 
  8 | 
  9 | class Tracker:
 10 |     """
 11 |     This is the multi-target tracker.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     metric : nn_matching.NearestNeighborDistanceMetric
 16 |         A distance metric for measurement-to-track association.
 17 |     max_age : int
 18 |         Maximum number of missed misses before a track is deleted.
 19 |     n_init : int
 20 |         Number of consecutive detections before the track is confirmed. The
 21 |         track state is set to `Deleted` if a miss occurs within the first
 22 |         `n_init` frames.
 23 | 
 24 |     Attributes
 25 |     ----------
 26 |     metric : nn_matching.NearestNeighborDistanceMetric
 27 |         The distance metric used for measurement to track association.
 28 |     max_age : int
 29 |         Maximum number of missed misses before a track is deleted.
 30 |     n_init : int
 31 |         Number of frames that a track remains in initialization phase.
 32 |     kf : kalman_filter.KalmanFilter
 33 |         A Kalman filter to filter target trajectories in image space.
 34 |     tracks : List[Track]
 35 |         The list of active tracks at the current time step.
 36 | 
 37 |     """
 38 | 
 39 |     def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3):
 40 |         self.metric = metric
 41 |         self.max_iou_distance = max_iou_distance
 42 |         self.max_age = max_age
 43 |         self.n_init = n_init
 44 | 
 45 |         self.kf = kalman_filter.KalmanFilter()
 46 |         self.tracks = []
 47 |         self._next_id = 1
 48 | 
 49 |     def predict(self):
 50 |         """Propagate track state distributions one time step forward.
 51 | 
 52 |         This function should be called once every time step, before `update`.
 53 |         """
 54 |         for track in self.tracks:
 55 |             track.predict(self.kf)
 56 | 
 57 |     def update(self, detections):
 58 |         """Perform measurement update and track management.
 59 | 
 60 |         Parameters
 61 |         ----------
 62 |         detections : List[deep_sort.detection.Detection]
 63 |             A list of detections at the current time step.
 64 | 
 65 |         """
 66 |         # Run matching cascade.
 67 |         matches, unmatched_tracks, unmatched_detections = \
 68 |             self._match(detections)
 69 | 
 70 |         # Update track set.
 71 |         for track_idx, detection_idx in matches:
 72 |             self.tracks[track_idx].update(
 73 |                 self.kf, detections[detection_idx])
 74 |         for track_idx in unmatched_tracks:
 75 |             self.tracks[track_idx].mark_missed()
 76 |         for detection_idx in unmatched_detections:
 77 |             self._initiate_track(detections[detection_idx])
 78 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 79 | 
 80 |         # Update distance metric.
 81 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 82 |         features, targets = [], []
 83 |         for track in self.tracks:
 84 |             if not track.is_confirmed():
 85 |                 continue
 86 |             features += track.features
 87 |             targets += [track.track_id for _ in track.features]
 88 |             track.features = []
 89 |         self.metric.partial_fit(
 90 |             np.asarray(features), np.asarray(targets), active_targets)
 91 | 
 92 |     def _match(self, detections):
 93 | 
 94 |         def gated_metric(tracks, dets, track_indices, detection_indices):
 95 |             features = np.array([dets[i].feature for i in detection_indices])
 96 |             targets = np.array([tracks[i].track_id for i in track_indices])
 97 |             cost_matrix = self.metric.distance(features, targets)
 98 |             cost_matrix = linear_assignment.gate_cost_matrix(
 99 |                 self.kf, cost_matrix, tracks, dets, track_indices,
100 |                 detection_indices)
101 | 
102 |             return cost_matrix
103 | 
104 |         # Split track set into confirmed and unconfirmed tracks.
105 |         confirmed_tracks = [
106 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
107 |         unconfirmed_tracks = [
108 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
109 | 
110 |         # Associate confirmed tracks using appearance features.
111 |         matches_a, unmatched_tracks_a, unmatched_detections = \
112 |             linear_assignment.matching_cascade(
113 |                 gated_metric, self.metric.matching_threshold, self.max_age,
114 |                 self.tracks, detections, confirmed_tracks)
115 | 
116 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
117 |         iou_track_candidates = unconfirmed_tracks + [
118 |             k for k in unmatched_tracks_a if
119 |             self.tracks[k].time_since_update == 1]
120 |         unmatched_tracks_a = [
121 |             k for k in unmatched_tracks_a if
122 |             self.tracks[k].time_since_update != 1]
123 |         matches_b, unmatched_tracks_b, unmatched_detections = \
124 |             linear_assignment.min_cost_matching(
125 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
126 |                 detections, iou_track_candidates, unmatched_detections)
127 | 
128 |         matches = matches_a + matches_b
129 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
130 |         return matches, unmatched_tracks, unmatched_detections
131 | 
132 |     def _initiate_track(self, detection):
133 |         mean, covariance = self.kf.initiate(detection.to_xyah())
134 |         class_name = detection.get_class()
135 |         self.tracks.append(Track(
136 |             mean, covariance, self._next_id, self.n_init, self.max_age,
137 |             detection.feature, class_name, detection.confidence))
138 |         self._next_id += 1
139 | 


--------------------------------------------------------------------------------
/deepsort/track.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | class TrackState:
  4 |     """
  5 |     Enumeration type for the single target track state. Newly created tracks are
  6 |     classified as `tentative` until enough evidence has been collected. Then,
  7 |     the track state is changed to `confirmed`. Tracks that are no longer alive
  8 |     are classified as `deleted` to mark them for removal from the set of active
  9 |     tracks.
 10 | 
 11 |     """
 12 | 
 13 |     Tentative = 1
 14 |     Confirmed = 2
 15 |     Deleted = 3
 16 | 
 17 | 
 18 | class Track:
 19 |     """
 20 |     A single target track with state space `(x, y, a, h)` and associated
 21 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 22 |     aspect ratio and `h` is the height.
 23 | 
 24 |     Parameters
 25 |     ----------
 26 |     mean : ndarray
 27 |         Mean vector of the initial state distribution.
 28 |     covariance : ndarray
 29 |         Covariance matrix of the initial state distribution.
 30 |     track_id : int
 31 |         A unique track identifier.
 32 |     n_init : int
 33 |         Number of consecutive detections before the track is confirmed. The
 34 |         track state is set to `Deleted` if a miss occurs within the first
 35 |         `n_init` frames.
 36 |     max_age : int
 37 |         The maximum number of consecutive misses before the track state is
 38 |         set to `Deleted`.
 39 |     feature : Optional[ndarray]
 40 |         Feature vector of the detection this track originates from. If not None,
 41 |         this feature is added to the `features` cache.
 42 | 
 43 |     Attributes
 44 |     ----------
 45 |     mean : ndarray
 46 |         Mean vector of the initial state distribution.
 47 |     covariance : ndarray
 48 |         Covariance matrix of the initial state distribution.
 49 |     track_id : int
 50 |         A unique track identifier.
 51 |     hits : int
 52 |         Total number of measurement updates.
 53 |     age : int
 54 |         Total number of frames since first occurance.
 55 |     time_since_update : int
 56 |         Total number of frames since last measurement update.
 57 |     state : TrackState
 58 |         The current track state.
 59 |     features : List[ndarray]
 60 |         A cache of features. On each measurement update, the associated feature
 61 |         vector is added to this list.
 62 | 
 63 |     """
 64 | 
 65 |     def __init__(self, mean, covariance, track_id, n_init, max_age,
 66 |                  feature=None, class_name=None, confidence=None):
 67 |         self.mean = mean
 68 |         self.covariance = covariance
 69 |         self.track_id = track_id
 70 |         self.hits = 1
 71 |         self.age = 1
 72 |         self.time_since_update = 0
 73 | 
 74 |         self.state = TrackState.Tentative
 75 |         self.features = []
 76 |         if feature is not None:
 77 |             self.features.append(feature)
 78 | 
 79 |         self._n_init = n_init
 80 |         self._max_age = max_age
 81 |         self.class_name = class_name
 82 |         self.confidence = confidence
 83 | 
 84 |     def to_tlwh(self):
 85 |         """Get current position in bounding box format `(top left x, top left y,
 86 |         width, height)`.
 87 | 
 88 |         Returns
 89 |         -------
 90 |         ndarray
 91 |             The bounding box.
 92 | 
 93 |         """
 94 |         ret = self.mean[:4].copy()
 95 |         ret[2] *= ret[3]
 96 |         ret[:2] -= ret[2:] / 2
 97 |         return ret
 98 | 
 99 |     def to_tlbr(self):
100 |         """Get current position in bounding box format `(min x, miny, max x,
101 |         max y)`.
102 | 
103 |         Returns
104 |         -------
105 |         ndarray
106 |             The bounding box.
107 | 
108 |         """
109 |         ret = self.to_tlwh()
110 |         ret[2:] = ret[:2] + ret[2:]
111 |         return ret
112 |     
113 |     def get_class(self):
114 |         return self.class_name
115 | 
116 |     def get_score(self):
117 |         return self.confidence
118 | 
119 |     def predict(self, kf):
120 |         """Propagate the state distribution to the current time step using a
121 |         Kalman filter prediction step.
122 | 
123 |         Parameters
124 |         ----------
125 |         kf : kalman_filter.KalmanFilter
126 |             The Kalman filter.
127 | 
128 |         """
129 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
130 |         self.age += 1
131 |         self.time_since_update += 1
132 | 
133 |     def update(self, kf, detection):
134 |         """Perform Kalman filter measurement update step and update the feature
135 |         cache.
136 | 
137 |         Parameters
138 |         ----------
139 |         kf : kalman_filter.KalmanFilter
140 |             The Kalman filter.
141 |         detection : Detection
142 |             The associated detection.
143 | 
144 |         """
145 |         self.mean, self.covariance = kf.update(
146 |             self.mean, self.covariance, detection.to_xyah())
147 |         self.features.append(detection.feature)
148 | 
149 |         self.hits += 1
150 |         self.time_since_update = 0
151 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
152 |             self.state = TrackState.Confirmed
153 | 
154 |     def mark_missed(self):
155 |         """Mark this track as missed (no association at the current time step).
156 |         """
157 |         if self.state == TrackState.Tentative:
158 |             self.state = TrackState.Deleted
159 |         elif self.time_since_update > self._max_age:
160 |             self.state = TrackState.Deleted
161 | 
162 |     def is_tentative(self):
163 |         """Returns True if this track is tentative (unconfirmed).
164 |         """
165 |         return self.state == TrackState.Tentative
166 | 
167 |     def is_confirmed(self):
168 |         """Returns True if this track is confirmed."""
169 |         return self.state == TrackState.Confirmed
170 | 
171 |     def is_deleted(self):
172 |         """Returns True if this track is dead and should be deleted."""
173 |         return self.state == TrackState.Deleted
174 | 


--------------------------------------------------------------------------------
/yolo3/utils.py:
--------------------------------------------------------------------------------
  1 | """Miscellaneous utility functions."""
  2 | 
  3 | from functools import reduce
  4 | 
  5 | from PIL import Image
  6 | import numpy as np
  7 | from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
  8 | 
  9 | from sort import iou
 10 | 
 11 | 
 12 | def compose(*funcs):
 13 |     """Compose arbitrarily many functions, evaluated left to right.
 14 | 
 15 |     Reference: https://mathieularose.com/function-composition-in-python/
 16 |     """
 17 |     # return lambda x: reduce(lambda v, f: f(v), funcs, x)
 18 |     if funcs:
 19 |         return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
 20 |     else:
 21 |         raise ValueError('Composition of empty sequence not supported.')
 22 | 
 23 | 
 24 | def letterbox_image(image, size):
 25 |     '''resize image with unchanged aspect ratio using padding'''
 26 |     iw, ih = image.size
 27 |     w, h = size
 28 |     scale = min(w / iw, h / ih)
 29 |     nw = int(iw * scale)
 30 |     nh = int(ih * scale)
 31 | 
 32 |     image = image.resize((nw, nh), Image.BICUBIC)
 33 |     new_image = Image.new('RGB', size, (128, 128, 128))
 34 |     new_image.paste(image, ((w - nw) // 2, (h - nh) // 2))
 35 |     return new_image
 36 | 
 37 | 
 38 | def rand(a=0, b=1):
 39 |     return np.random.rand() * (b - a) + a
 40 | 
 41 | 
 42 | def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5,
 43 |                     proc_img=True):
 44 |     '''random preprocessing for real-time data augmentation'''
 45 |     line = annotation_line.split()
 46 |     image = Image.open(line[0])
 47 |     iw, ih = image.size
 48 |     h, w = input_shape
 49 |     box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
 50 | 
 51 |     if not random:
 52 |         # resize image
 53 |         scale = min(w / iw, h / ih)
 54 |         nw = int(iw * scale)
 55 |         nh = int(ih * scale)
 56 |         dx = (w - nw) // 2
 57 |         dy = (h - nh) // 2
 58 |         image_data = 0
 59 |         if proc_img:
 60 |             image = image.resize((nw, nh), Image.BICUBIC)
 61 |             new_image = Image.new('RGB', (w, h), (128, 128, 128))
 62 |             new_image.paste(image, (dx, dy))
 63 |             image_data = np.array(new_image) / 255.
 64 | 
 65 |         # correct boxes
 66 |         box_data = np.zeros((max_boxes, 5))
 67 |         if len(box) > 0:
 68 |             np.random.shuffle(box)
 69 |             if len(box) > max_boxes: box = box[:max_boxes]
 70 |             box[:, [0, 2]] = box[:, [0, 2]] * scale + dx
 71 |             box[:, [1, 3]] = box[:, [1, 3]] * scale + dy
 72 |             box_data[:len(box)] = box
 73 | 
 74 |         return image_data, box_data
 75 | 
 76 |     # resize image
 77 |     new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
 78 |     scale = rand(.25, 2)
 79 |     if new_ar < 1:
 80 |         nh = int(scale * h)
 81 |         nw = int(nh * new_ar)
 82 |     else:
 83 |         nw = int(scale * w)
 84 |         nh = int(nw / new_ar)
 85 |     image = image.resize((nw, nh), Image.BICUBIC)
 86 | 
 87 |     # place image
 88 |     dx = int(rand(0, w - nw))
 89 |     dy = int(rand(0, h - nh))
 90 |     new_image = Image.new('RGB', (w, h), (128, 128, 128))
 91 |     new_image.paste(image, (dx, dy))
 92 |     image = new_image
 93 | 
 94 |     # flip image or not
 95 |     flip = rand() < .5
 96 |     if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
 97 | 
 98 |     # distort image
 99 |     hue = rand(-hue, hue)
100 |     sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
101 |     val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
102 |     x = rgb_to_hsv(np.array(image) / 255.)
103 |     x[..., 0] += hue
104 |     x[..., 0][x[..., 0] > 1] -= 1
105 |     x[..., 0][x[..., 0] < 0] += 1
106 |     x[..., 1] *= sat
107 |     x[..., 2] *= val
108 |     x[x > 1] = 1
109 |     x[x < 0] = 0
110 |     image_data = hsv_to_rgb(x)  # numpy array, 0 to 1
111 | 
112 |     # correct boxes
113 |     box_data = np.zeros((max_boxes, 5))
114 |     if len(box) > 0:
115 |         np.random.shuffle(box)
116 |         box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
117 |         box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
118 |         if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
119 |         box[:, 0:2][box[:, 0:2] < 0] = 0
120 |         box[:, 2][box[:, 2] > w] = w
121 |         box[:, 3][box[:, 3] > h] = h
122 |         box_w = box[:, 2] - box[:, 0]
123 |         box_h = box[:, 3] - box[:, 1]
124 |         box = box[np.logical_and(box_w > 1, box_h > 1)]  # discard invalid box
125 |         if len(box) > max_boxes: box = box[:max_boxes]
126 |         box_data[:len(box)] = box
127 | 
128 |     return image_data, box_data
129 | 
130 | 
131 | def delete_repeat_bbox(out_boxes, out_scores, out_classes, iou_threshold):
132 |     '''Delete the same bboxes marked as different classes'''
133 |     to_del = []
134 |     for i in range(0, len(out_classes) - 1):
135 |         for j in range(i + 1, len(out_classes)):
136 |             if (i not in to_del) and (j not in to_del):
137 |                 # bounding box 1
138 |                 y1_1, x1_1, y2_1, x2_1 = out_boxes[i]
139 |                 # bounding box 2
140 |                 y1_2, x1_2, y2_2, x2_2 = out_boxes[j]
141 |                 if iou([x1_1, y1_1, x2_1, y2_1], [x1_2, y1_2, x2_2, y2_2]) >= iou_threshold:
142 |                     if out_scores[i] >= out_scores[j]:
143 |                         to_del.append(j)
144 |                     else:
145 |                         to_del.append(i)
146 | 
147 |     to_del = sorted(to_del)
148 | 
149 |     for t in reversed(to_del):
150 |         out_boxes.pop(t)
151 |         out_scores.pop(t)
152 |         out_classes.pop(t)
153 | 
154 |     return np.array(out_boxes), np.array(out_scores), np.array(out_classes)
155 | 
156 | 
157 | # boxes: np.array
158 | def convert_boxes(boxes):
159 |     # [x1, y1, x2, y2] ->
160 |     returned_boxes = []
161 |     for box in boxes:
162 |         box = box.astype(int)
163 |         box[2] = int(box[2]-box[0])  # width
164 |         box[3] = int(box[3]-box[1])  # height
165 |         box = box.astype(int)
166 |         box = box.tolist()
167 |         if box != [0, 0, 0, 0]:
168 |             returned_boxes.append(box)
169 |     return returned_boxes


--------------------------------------------------------------------------------
/deepsort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def _pdist(a, b):
  5 |     """Compute pair-wise squared distance between points in `a` and `b`.
  6 | 
  7 |     Parameters
  8 |     ----------
  9 |     a : array_like
 10 |         An NxM matrix of N samples of dimensionality M.
 11 |     b : array_like
 12 |         An LxM matrix of L samples of dimensionality M.
 13 | 
 14 |     Returns
 15 |     -------
 16 |     ndarray
 17 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 18 |         contains the squared distance between `a[i]` and `b[j]`.
 19 | 
 20 |     """
 21 |     a, b = np.asarray(a), np.asarray(b)
 22 |     if len(a) == 0 or len(b) == 0:
 23 |         return np.zeros((len(a), len(b)))
 24 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
 25 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 26 |     r2 = np.clip(r2, 0., float(np.inf))
 27 |     return r2
 28 | 
 29 | 
 30 | def _cosine_distance(a, b, data_is_normalized=False):
 31 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 32 | 
 33 |     Parameters
 34 |     ----------
 35 |     a : array_like
 36 |         An NxM matrix of N samples of dimensionality M.
 37 |     b : array_like
 38 |         An LxM matrix of L samples of dimensionality M.
 39 |     data_is_normalized : Optional[bool]
 40 |         If True, assumes rows in a and b are unit length vectors.
 41 |         Otherwise, a and b are explicitly normalized to lenght 1.
 42 | 
 43 |     Returns
 44 |     -------
 45 |     ndarray
 46 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 47 |         contains the squared distance between `a[i]` and `b[j]`.
 48 | 
 49 |     """
 50 |     if not data_is_normalized:
 51 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 52 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 53 |     return 1. - np.dot(a, b.T)
 54 | 
 55 | 
 56 | def _nn_euclidean_distance(x, y):
 57 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 58 | 
 59 |     Parameters
 60 |     ----------
 61 |     x : ndarray
 62 |         A matrix of N row-vectors (sample points).
 63 |     y : ndarray
 64 |         A matrix of M row-vectors (query points).
 65 | 
 66 |     Returns
 67 |     -------
 68 |     ndarray
 69 |         A vector of length M that contains for each entry in `y` the
 70 |         smallest Euclidean distance to a sample in `x`.
 71 | 
 72 |     """
 73 |     distances = _pdist(x, y)
 74 |     return np.maximum(0.0, distances.min(axis=0))
 75 | 
 76 | 
 77 | def _nn_cosine_distance(x, y):
 78 |     """ Helper function for nearest neighbor distance metric (cosine).
 79 | 
 80 |     Parameters
 81 |     ----------
 82 |     x : ndarray
 83 |         A matrix of N row-vectors (sample points).
 84 |     y : ndarray
 85 |         A matrix of M row-vectors (query points).
 86 | 
 87 |     Returns
 88 |     -------
 89 |     ndarray
 90 |         A vector of length M that contains for each entry in `y` the
 91 |         smallest cosine distance to a sample in `x`.
 92 | 
 93 |     """
 94 |     distances = _cosine_distance(x, y)
 95 |     return distances.min(axis=0)
 96 | 
 97 | 
 98 | class NearestNeighborDistanceMetric(object):
 99 |     """
100 |     A nearest neighbor distance metric that, for each target, returns
101 |     the closest distance to any sample that has been observed so far.
102 | 
103 |     Parameters
104 |     ----------
105 |     metric : str
106 |         Either "euclidean" or "cosine".
107 |     matching_threshold: float
108 |         The matching threshold. Samples with larger distance are considered an
109 |         invalid match.
110 |     budget : Optional[int]
111 |         If not None, fix samples per class to at most this number. Removes
112 |         the oldest samples when the budget is reached.
113 | 
114 |     Attributes
115 |     ----------
116 |     samples : Dict[int -> List[ndarray]]
117 |         A dictionary that maps from target identities to the list of samples
118 |         that have been observed so far.
119 | 
120 |     """
121 | 
122 |     def __init__(self, metric, matching_threshold, budget=None):
123 | 
124 | 
125 |         if metric == "euclidean":
126 |             self._metric = _nn_euclidean_distance
127 |         elif metric == "cosine":
128 |             self._metric = _nn_cosine_distance
129 |         else:
130 |             raise ValueError(
131 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
132 |         self.matching_threshold = matching_threshold
133 |         self.budget = budget
134 |         self.samples = {}
135 | 
136 |     def partial_fit(self, features, targets, active_targets):
137 |         """Update the distance metric with new data.
138 | 
139 |         Parameters
140 |         ----------
141 |         features : ndarray
142 |             An NxM matrix of N features of dimensionality M.
143 |         targets : ndarray
144 |             An integer array of associated target identities.
145 |         active_targets : List[int]
146 |             A list of targets that are currently present in the scene.
147 | 
148 |         """
149 |         for feature, target in zip(features, targets):
150 |             self.samples.setdefault(target, []).append(feature)
151 |             if self.budget is not None:
152 |                 self.samples[target] = self.samples[target][-self.budget:]
153 |         self.samples = {k: self.samples[k] for k in active_targets}
154 | 
155 |     def distance(self, features, targets):
156 |         """Compute distance between features and targets.
157 | 
158 |         Parameters
159 |         ----------
160 |         features : ndarray
161 |             An NxM matrix of N features of dimensionality M.
162 |         targets : List[int]
163 |             A list of targets to match the given `features` against.
164 | 
165 |         Returns
166 |         -------
167 |         ndarray
168 |             Returns a cost matrix of shape len(targets), len(features), where
169 |             element (i, j) contains the closest squared distance between
170 |             `targets[i]` and `features[j]`.
171 | 
172 |         """
173 |         cost_matrix = np.zeros((len(targets), len(features)))
174 |         for i, target in enumerate(targets):
175 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
176 |         return cost_matrix
177 | 


--------------------------------------------------------------------------------
/yolo3/generate_detections.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import errno
  3 | import argparse
  4 | import numpy as np
  5 | import cv2
  6 | import tensorflow.compat.v1 as tf
  7 | 
  8 | 
  9 | def _run_in_batches(f, data_dict, out, batch_size):
 10 |     data_len = len(out)
 11 |     num_batches = int(data_len / batch_size)
 12 | 
 13 |     s, e = 0, 0
 14 |     for i in range(num_batches):
 15 |         s, e = i * batch_size, (i + 1) * batch_size
 16 |         batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
 17 |         out[s:e] = f(batch_data_dict)
 18 |     if e < len(out):
 19 |         batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
 20 |         out[e:] = f(batch_data_dict)
 21 | 
 22 | 
 23 | def extract_image_patch(image, bbox, patch_shape):
 24 |     """Extract image patch from bounding box.
 25 |     Parameters
 26 |     ----------
 27 |     image : ndarray
 28 |         The full image.
 29 |     bbox : array_like
 30 |         The bounding box in format (x, y, width, height).
 31 |     patch_shape : Optional[array_like]
 32 |         This parameter can be used to enforce a desired patch shape
 33 |         (height, width). First, the `bbox` is adapted to the aspect ratio
 34 |         of the patch shape, then it is clipped at the image boundaries.
 35 |         If None, the shape is computed from :arg:`bbox`.
 36 |     Returns
 37 |     -------
 38 |     ndarray | NoneType
 39 |         An image patch showing the :arg:`bbox`, optionally reshaped to
 40 |         :arg:`patch_shape`.
 41 |         Returns None if the bounding box is empty or fully outside of the image
 42 |         boundaries.
 43 |     """
 44 |     bbox = np.array(bbox)
 45 |     if patch_shape is not None:
 46 |         # correct aspect ratio to patch shape
 47 |         target_aspect = float(patch_shape[1]) / patch_shape[0]
 48 |         new_width = target_aspect * bbox[3]
 49 |         bbox[0] -= (new_width - bbox[2]) / 2
 50 |         bbox[2] = new_width
 51 | 
 52 |     # convert to top left, bottom right
 53 |     bbox[2:] += bbox[:2]
 54 |     bbox = bbox.astype(np.int)
 55 | 
 56 |     # clip at image boundaries
 57 |     bbox[:2] = np.maximum(0, bbox[:2])
 58 |     bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
 59 |     if np.any(bbox[:2] >= bbox[2:]):
 60 |         return None
 61 |     sx, sy, ex, ey = bbox
 62 |     image = image[sy:ey, sx:ex]
 63 |     image = cv2.resize(image, tuple(patch_shape[::-1]))
 64 |     return image
 65 | 
 66 | 
 67 | class ImageEncoder(object):
 68 | 
 69 |     def __init__(self, checkpoint_filename, input_name="images",
 70 |                  output_name="features"):
 71 |         self.session = tf.Session()
 72 |         with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
 73 |             graph_def = tf.GraphDef()
 74 |             graph_def.ParseFromString(file_handle.read())
 75 |         tf.import_graph_def(graph_def, name="net")
 76 |         self.input_var = tf.get_default_graph().get_tensor_by_name(
 77 |             "net/%s:0" % input_name)
 78 |         self.output_var = tf.get_default_graph().get_tensor_by_name(
 79 |             "net/%s:0" % output_name)
 80 | 
 81 |         assert len(self.output_var.get_shape()) == 2
 82 |         assert len(self.input_var.get_shape()) == 4
 83 |         self.feature_dim = self.output_var.get_shape().as_list()[-1]
 84 |         self.image_shape = self.input_var.get_shape().as_list()[1:]
 85 | 
 86 |     def __call__(self, data_x, batch_size=32):
 87 |         out = np.zeros((len(data_x), self.feature_dim), np.float32)
 88 |         _run_in_batches(
 89 |             lambda x: self.session.run(self.output_var, feed_dict=x),
 90 |             {self.input_var: data_x}, out, batch_size)
 91 |         return out
 92 | 
 93 | 
 94 | def create_box_encoder(model_filename, input_name="images",
 95 |                        output_name="features", batch_size=32):
 96 |     image_encoder = ImageEncoder(model_filename, input_name, output_name)
 97 |     image_shape = image_encoder.image_shape
 98 | 
 99 |     def encoder(image, boxes):
100 |         image_patches = []
101 |         for box in boxes:
102 |             patch = extract_image_patch(image, box, image_shape[:2])
103 |             if patch is None:
104 |                 print("WARNING: Failed to extract image patch: %s." % str(box))
105 |                 patch = np.random.uniform(
106 |                     0., 255., image_shape).astype(np.uint8)
107 |             image_patches.append(patch)
108 |         image_patches = np.asarray(image_patches)
109 |         return image_encoder(image_patches, batch_size)
110 | 
111 |     return encoder
112 | 
113 | 
114 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
115 |     """Generate detections with features.
116 |     Parameters
117 |     ----------
118 |     encoder : Callable[image, ndarray] -> ndarray
119 |         The encoder function takes as input a BGR color image and a matrix of
120 |         bounding boxes in format `(x, y, w, h)` and returns a matrix of
121 |         corresponding feature vectors.
122 |     mot_dir : str
123 |         Path to the MOTChallenge directory (can be either train or test).
124 |     output_dir
125 |         Path to the output directory. Will be created if it does not exist.
126 |     detection_dir
127 |         Path to custom detections. The directory structure should be the default
128 |         MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
129 |         standard MOTChallenge detections.
130 |     """
131 |     if detection_dir is None:
132 |         detection_dir = mot_dir
133 |     try:
134 |         os.makedirs(output_dir)
135 |     except OSError as exception:
136 |         if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
137 |             pass
138 |         else:
139 |             raise ValueError(
140 |                 "Failed to created output directory '%s'" % output_dir)
141 | 
142 |     for sequence in os.listdir(mot_dir):
143 |         print("Processing %s" % sequence)
144 |         sequence_dir = os.path.join(mot_dir, sequence)
145 | 
146 |         image_dir = os.path.join(sequence_dir, "img1")
147 |         image_filenames = {
148 |             int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
149 |             for f in os.listdir(image_dir)}
150 | 
151 |         detection_file = os.path.join(
152 |             detection_dir, sequence, "det/det.txt")
153 |         detections_in = np.loadtxt(detection_file, delimiter=',')
154 |         detections_out = []
155 | 
156 |         frame_indices = detections_in[:, 0].astype(np.int)
157 |         min_frame_idx = frame_indices.astype(np.int).min()
158 |         max_frame_idx = frame_indices.astype(np.int).max()
159 |         for frame_idx in range(min_frame_idx, max_frame_idx + 1):
160 |             print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
161 |             mask = frame_indices == frame_idx
162 |             rows = detections_in[mask]
163 | 
164 |             if frame_idx not in image_filenames:
165 |                 print("WARNING could not find image for frame %d" % frame_idx)
166 |                 continue
167 |             bgr_image = cv2.imread(
168 |                 image_filenames[frame_idx], cv2.IMREAD_COLOR)
169 |             features = encoder(bgr_image, rows[:, 2:6].copy())
170 |             detections_out += [np.r_[(row, feature)] for row, feature
171 |                                in zip(rows, features)]
172 | 
173 |         output_filename = os.path.join(output_dir, "%s.npy" % sequence)
174 |         np.save(
175 |             output_filename, np.asarray(detections_out), allow_pickle=False)


--------------------------------------------------------------------------------
/deepsort/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | import numpy as np
  3 | from scipy.optimize import linear_sum_assignment
  4 | from . import kalman_filter
  5 | 
  6 | 
  7 | INFTY_COST = 1e+5
  8 | 
  9 | 
 10 | def min_cost_matching(
 11 |         distance_metric, max_distance, tracks, detections, track_indices=None,
 12 |         detection_indices=None):
 13 |     """Solve linear assignment problem.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 18 |         The distance metric is given a list of tracks and detections as well as
 19 |         a list of N track indices and M detection indices. The metric should
 20 |         return the NxM dimensional cost matrix, where element (i, j) is the
 21 |         association cost between the i-th track in the given track indices and
 22 |         the j-th detection in the given detection_indices.
 23 |     max_distance : float
 24 |         Gating threshold. Associations with cost larger than this value are
 25 |         disregarded.
 26 |     tracks : List[track.Track]
 27 |         A list of predicted tracks at the current time step.
 28 |     detections : List[detection.Detection]
 29 |         A list of detections at the current time step.
 30 |     track_indices : List[int]
 31 |         List of track indices that maps rows in `cost_matrix` to tracks in
 32 |         `tracks` (see description above).
 33 |     detection_indices : List[int]
 34 |         List of detection indices that maps columns in `cost_matrix` to
 35 |         detections in `detections` (see description above).
 36 | 
 37 |     Returns
 38 |     -------
 39 |     (List[(int, int)], List[int], List[int])
 40 |         Returns a tuple with the following three entries:
 41 |         * A list of matched track and detection indices.
 42 |         * A list of unmatched track indices.
 43 |         * A list of unmatched detection indices.
 44 | 
 45 |     """
 46 |     if track_indices is None:
 47 |         track_indices = np.arange(len(tracks))
 48 |     if detection_indices is None:
 49 |         detection_indices = np.arange(len(detections))
 50 | 
 51 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 52 |         return [], track_indices, detection_indices  # Nothing to match.
 53 | 
 54 |     cost_matrix = distance_metric(
 55 |         tracks, detections, track_indices, detection_indices)
 56 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 57 |     indices = linear_sum_assignment(cost_matrix)
 58 |     indices = np.asarray(indices)
 59 |     indices = np.transpose(indices)
 60 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 61 |     for col, detection_idx in enumerate(detection_indices):
 62 |         if col not in indices[:, 1]:
 63 |             unmatched_detections.append(detection_idx)
 64 |     for row, track_idx in enumerate(track_indices):
 65 |         if row not in indices[:, 0]:
 66 |             unmatched_tracks.append(track_idx)
 67 |     for row, col in indices:
 68 |         track_idx = track_indices[row]
 69 |         detection_idx = detection_indices[col]
 70 |         if cost_matrix[row, col] > max_distance:
 71 |             unmatched_tracks.append(track_idx)
 72 |             unmatched_detections.append(detection_idx)
 73 |         else:
 74 |             matches.append((track_idx, detection_idx))
 75 |     return matches, unmatched_tracks, unmatched_detections
 76 | 
 77 | 
 78 | def matching_cascade(
 79 |         distance_metric, max_distance, cascade_depth, tracks, detections,
 80 |         track_indices=None, detection_indices=None):
 81 |     """Run matching cascade.
 82 | 
 83 |     Parameters
 84 |     ----------
 85 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 86 |         The distance metric is given a list of tracks and detections as well as
 87 |         a list of N track indices and M detection indices. The metric should
 88 |         return the NxM dimensional cost matrix, where element (i, j) is the
 89 |         association cost between the i-th track in the given track indices and
 90 |         the j-th detection in the given detection indices.
 91 |     max_distance : float
 92 |         Gating threshold. Associations with cost larger than this value are
 93 |         disregarded.
 94 |     cascade_depth: int
 95 |         The cascade depth, should be se to the maximum track age.
 96 |     tracks : List[track.Track]
 97 |         A list of predicted tracks at the current time step.
 98 |     detections : List[detection.Detection]
 99 |         A list of detections at the current time step.
100 |     track_indices : Optional[List[int]]
101 |         List of track indices that maps rows in `cost_matrix` to tracks in
102 |         `tracks` (see description above). Defaults to all tracks.
103 |     detection_indices : Optional[List[int]]
104 |         List of detection indices that maps columns in `cost_matrix` to
105 |         detections in `detections` (see description above). Defaults to all
106 |         detections.
107 | 
108 |     Returns
109 |     -------
110 |     (List[(int, int)], List[int], List[int])
111 |         Returns a tuple with the following three entries:
112 |         * A list of matched track and detection indices.
113 |         * A list of unmatched track indices.
114 |         * A list of unmatched detection indices.
115 | 
116 |     """
117 |     if track_indices is None:
118 |         track_indices = list(range(len(tracks)))
119 |     if detection_indices is None:
120 |         detection_indices = list(range(len(detections)))
121 | 
122 |     unmatched_detections = detection_indices
123 |     matches = []
124 |     for level in range(cascade_depth):
125 |         if len(unmatched_detections) == 0:  # No detections left
126 |             break
127 | 
128 |         track_indices_l = [
129 |             k for k in track_indices
130 |             if tracks[k].time_since_update == 1 + level
131 |         ]
132 |         if len(track_indices_l) == 0:  # Nothing to match at this level
133 |             continue
134 | 
135 |         matches_l, _, unmatched_detections = \
136 |             min_cost_matching(
137 |                 distance_metric, max_distance, tracks, detections,
138 |                 track_indices_l, unmatched_detections)
139 |         matches += matches_l
140 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
141 |     return matches, unmatched_tracks, unmatched_detections
142 | 
143 | 
144 | def gate_cost_matrix(
145 |         kf, cost_matrix, tracks, detections, track_indices, detection_indices,
146 |         gated_cost=INFTY_COST, only_position=False):
147 |     """Invalidate infeasible entries in cost matrix based on the state
148 |     distributions obtained by Kalman filtering.
149 | 
150 |     Parameters
151 |     ----------
152 |     kf : The Kalman filter.
153 |     cost_matrix : ndarray
154 |         The NxM dimensional cost matrix, where N is the number of track indices
155 |         and M is the number of detection indices, such that entry (i, j) is the
156 |         association cost between `tracks[track_indices[i]]` and
157 |         `detections[detection_indices[j]]`.
158 |     tracks : List[track.Track]
159 |         A list of predicted tracks at the current time step.
160 |     detections : List[detection.Detection]
161 |         A list of detections at the current time step.
162 |     track_indices : List[int]
163 |         List of track indices that maps rows in `cost_matrix` to tracks in
164 |         `tracks` (see description above).
165 |     detection_indices : List[int]
166 |         List of detection indices that maps columns in `cost_matrix` to
167 |         detections in `detections` (see description above).
168 |     gated_cost : Optional[float]
169 |         Entries in the cost matrix corresponding to infeasible associations are
170 |         set this value. Defaults to a very large value.
171 |     only_position : Optional[bool]
172 |         If True, only the x, y position of the state distribution is considered
173 |         during gating. Defaults to False.
174 | 
175 |     Returns
176 |     -------
177 |     ndarray
178 |         Returns the modified cost matrix.
179 | 
180 |     """
181 |     gating_dim = 2 if only_position else 4
182 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
183 |     measurements = np.asarray(
184 |         [detections[i].to_xyah() for i in detection_indices])
185 |     for row, track_idx in enumerate(track_indices):
186 |         track = tracks[track_idx]
187 |         gating_distance = kf.gating_distance(
188 |             track.mean, track.covariance, measurements, only_position)
189 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
190 |     return cost_matrix
191 | 


--------------------------------------------------------------------------------
/deepsort/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.linalg
  3 | 
  4 | 
  5 | """
  6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  8 | function and used as Mahalanobis gating threshold.
  9 | """
 10 | chi2inv95 = {
 11 |     1: 3.8415,
 12 |     2: 5.9915,
 13 |     3: 7.8147,
 14 |     4: 9.4877,
 15 |     5: 11.070,
 16 |     6: 12.592,
 17 |     7: 14.067,
 18 |     8: 15.507,
 19 |     9: 16.919}
 20 | 
 21 | 
 22 | class KalmanFilter(object):
 23 |     """
 24 |     A simple Kalman filter for tracking bounding boxes in image space.
 25 | 
 26 |     The 8-dimensional state space
 27 | 
 28 |         x, y, a, h, vx, vy, va, vh
 29 | 
 30 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 31 |     and their respective velocities.
 32 | 
 33 |     Object motion follows a constant velocity model. The bounding box location
 34 |     (x, y, a, h) is taken as direct observation of the state space (linear
 35 |     observation model).
 36 | 
 37 |     """
 38 | 
 39 |     def __init__(self):
 40 |         ndim, dt = 4, 1.
 41 | 
 42 |         # Create Kalman filter model matrices.
 43 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 44 |         for i in range(ndim):
 45 |             self._motion_mat[i, ndim + i] = dt
 46 |         self._update_mat = np.eye(ndim, 2 * ndim)
 47 | 
 48 |         # Motion and observation uncertainty are chosen relative to the current
 49 |         # state estimate. These weights control the amount of uncertainty in
 50 |         # the model. This is a bit hacky.
 51 |         self._std_weight_position = 1. / 20
 52 |         self._std_weight_velocity = 1. / 160
 53 | 
 54 |     def initiate(self, measurement):
 55 |         """Create track from unassociated measurement.
 56 | 
 57 |         Parameters
 58 |         ----------
 59 |         measurement : ndarray
 60 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 61 |             aspect ratio a, and height h.
 62 | 
 63 |         Returns
 64 |         -------
 65 |         (ndarray, ndarray)
 66 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 67 |             dimensional) of the new track. Unobserved velocities are initialized
 68 |             to 0 mean.
 69 | 
 70 |         """
 71 |         mean_pos = measurement
 72 |         mean_vel = np.zeros_like(mean_pos)
 73 |         mean = np.r_[mean_pos, mean_vel]
 74 | 
 75 |         std = [
 76 |             2 * self._std_weight_position * measurement[3],
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             1e-2,
 79 |             2 * self._std_weight_position * measurement[3],
 80 |             10 * self._std_weight_velocity * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             1e-5,
 83 |             10 * self._std_weight_velocity * measurement[3]]
 84 |         covariance = np.diag(np.square(std))
 85 |         return mean, covariance
 86 | 
 87 |     def predict(self, mean, covariance):
 88 |         """Run Kalman filter prediction step.
 89 | 
 90 |         Parameters
 91 |         ----------
 92 |         mean : ndarray
 93 |             The 8 dimensional mean vector of the object state at the previous
 94 |             time step.
 95 |         covariance : ndarray
 96 |             The 8x8 dimensional covariance matrix of the object state at the
 97 |             previous time step.
 98 | 
 99 |         Returns
100 |         -------
101 |         (ndarray, ndarray)
102 |             Returns the mean vector and covariance matrix of the predicted
103 |             state. Unobserved velocities are initialized to 0 mean.
104 | 
105 |         """
106 |         std_pos = [
107 |             self._std_weight_position * mean[3],
108 |             self._std_weight_position * mean[3],
109 |             1e-2,
110 |             self._std_weight_position * mean[3]]
111 |         std_vel = [
112 |             self._std_weight_velocity * mean[3],
113 |             self._std_weight_velocity * mean[3],
114 |             1e-5,
115 |             self._std_weight_velocity * mean[3]]
116 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
117 | 
118 |         mean = np.dot(self._motion_mat, mean)
119 |         covariance = np.linalg.multi_dot((
120 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
121 | 
122 |         return mean, covariance
123 | 
124 |     def project(self, mean, covariance):
125 |         """Project state distribution to measurement space.
126 | 
127 |         Parameters
128 |         ----------
129 |         mean : ndarray
130 |             The state's mean vector (8 dimensional array).
131 |         covariance : ndarray
132 |             The state's covariance matrix (8x8 dimensional).
133 | 
134 |         Returns
135 |         -------
136 |         (ndarray, ndarray)
137 |             Returns the projected mean and covariance matrix of the given state
138 |             estimate.
139 | 
140 |         """
141 |         std = [
142 |             self._std_weight_position * mean[3],
143 |             self._std_weight_position * mean[3],
144 |             1e-1,
145 |             self._std_weight_position * mean[3]]
146 |         innovation_cov = np.diag(np.square(std))
147 | 
148 |         mean = np.dot(self._update_mat, mean)
149 |         covariance = np.linalg.multi_dot((
150 |             self._update_mat, covariance, self._update_mat.T))
151 |         return mean, covariance + innovation_cov
152 | 
153 |     def update(self, mean, covariance, measurement):
154 |         """Run Kalman filter correction step.
155 | 
156 |         Parameters
157 |         ----------
158 |         mean : ndarray
159 |             The predicted state's mean vector (8 dimensional).
160 |         covariance : ndarray
161 |             The state's covariance matrix (8x8 dimensional).
162 |         measurement : ndarray
163 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
164 |             is the center position, a the aspect ratio, and h the height of the
165 |             bounding box.
166 | 
167 |         Returns
168 |         -------
169 |         (ndarray, ndarray)
170 |             Returns the measurement-corrected state distribution.
171 | 
172 |         """
173 |         projected_mean, projected_cov = self.project(mean, covariance)
174 | 
175 |         chol_factor, lower = scipy.linalg.cho_factor(
176 |             projected_cov, lower=True, check_finite=False)
177 |         kalman_gain = scipy.linalg.cho_solve(
178 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
179 |             check_finite=False).T
180 |         innovation = measurement - projected_mean
181 | 
182 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
183 |         new_covariance = covariance - np.linalg.multi_dot((
184 |             kalman_gain, projected_cov, kalman_gain.T))
185 |         return new_mean, new_covariance
186 | 
187 |     def gating_distance(self, mean, covariance, measurements,
188 |                         only_position=False):
189 |         """Compute gating distance between state distribution and measurements.
190 | 
191 |         A suitable distance threshold can be obtained from `chi2inv95`. If
192 |         `only_position` is False, the chi-square distribution has 4 degrees of
193 |         freedom, otherwise 2.
194 | 
195 |         Parameters
196 |         ----------
197 |         mean : ndarray
198 |             Mean vector over the state distribution (8 dimensional).
199 |         covariance : ndarray
200 |             Covariance of the state distribution (8x8 dimensional).
201 |         measurements : ndarray
202 |             An Nx4 dimensional matrix of N measurements, each in
203 |             format (x, y, a, h) where (x, y) is the bounding box center
204 |             position, a the aspect ratio, and h the height.
205 |         only_position : Optional[bool]
206 |             If True, distance computation is done with respect to the bounding
207 |             box center position only.
208 | 
209 |         Returns
210 |         -------
211 |         ndarray
212 |             Returns an array of length N, where the i-th element contains the
213 |             squared Mahalanobis distance between (mean, covariance) and
214 |             `measurements[i]`.
215 | 
216 |         """
217 |         mean, covariance = self.project(mean, covariance)
218 |         if only_position:
219 |             mean, covariance = mean[:2], covariance[:2, :2]
220 |             measurements = measurements[:, :2]
221 | 
222 |         cholesky_factor = np.linalg.cholesky(covariance)
223 |         d = measurements - mean
224 |         z = scipy.linalg.solve_triangular(
225 |             cholesky_factor, d.T, lower=True, check_finite=False,
226 |             overwrite_b=True)
227 |         squared_maha = np.sum(z * z, axis=0)
228 |         return squared_maha
229 | 


--------------------------------------------------------------------------------
/sort.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | from scipy.optimize import linear_sum_assignment
  5 | from filterpy.kalman import KalmanFilter
  6 | 
  7 | 
  8 | def iou(bb_test, bb_gt):
  9 |     """
 10 |     Computes IOU between two bboxes in the form [x1,y1,x2,y2]
 11 |     """
 12 |     xx1 = np.maximum(bb_test[0], bb_gt[0])
 13 |     yy1 = np.maximum(bb_test[1], bb_gt[1])
 14 |     xx2 = np.minimum(bb_test[2], bb_gt[2])
 15 |     yy2 = np.minimum(bb_test[3], bb_gt[3])
 16 |     w = np.maximum(0., xx2 - xx1)
 17 |     h = np.maximum(0., yy2 - yy1)
 18 |     wh = w * h
 19 |     o = wh / ((bb_test[2] - bb_test[0]) * (bb_test[3] - bb_test[1])
 20 |               + (bb_gt[2] - bb_gt[0]) * (bb_gt[3] - bb_gt[1]) - wh)
 21 |     return o
 22 | 
 23 | 
 24 | def convert_bbox_to_z(bbox):
 25 |     """
 26 |     Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
 27 |       [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
 28 |       the aspect ratio
 29 |     """
 30 |     w = bbox[2] - bbox[0]
 31 |     h = bbox[3] - bbox[1]
 32 |     x = bbox[0] + w / 2.
 33 |     y = bbox[1] + h / 2.
 34 |     s = w * h  # scale is just area
 35 |     r = w / float(h)
 36 |     return np.array([x, y, s, r]).reshape((4, 1))
 37 | 
 38 | 
 39 | def convert_x_to_bbox(x, score=None):
 40 |     """
 41 |     Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
 42 |       [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
 43 |     """
 44 |     w = np.sqrt(x[2] * x[3])
 45 |     h = x[2] / w
 46 |     if score is None:
 47 |         return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2.]).reshape((1, 4))
 48 |     else:
 49 |         return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score]).reshape((1, 5))
 50 | 
 51 | 
 52 | class KalmanBoxTracker(object):
 53 |     """
 54 |     This class represents the internel state of individual tracked objects observed as bbox.
 55 |     """
 56 |     count = 0
 57 | 
 58 |     def __init__(self, bbox):
 59 |         """
 60 |         Initialises a tracker using initial bounding box.
 61 |         """
 62 |         # define constant velocity model
 63 |         self.kf = KalmanFilter(dim_x=7, dim_z=4)
 64 |         self.kf.F = np.array(
 65 |             [[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0],
 66 |              [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1]])
 67 |         self.kf.H = np.array(
 68 |             [[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0]])
 69 | 
 70 |         self.kf.R[2:, 2:] *= 10.
 71 |         self.kf.P[4:, 4:] *= 1000.  # give high uncertainty to the unobservable initial velocities
 72 |         self.kf.P *= 10.
 73 |         self.kf.Q[-1, -1] *= 0.01
 74 |         self.kf.Q[4:, 4:] *= 0.01
 75 | 
 76 |         self.kf.x[:4] = convert_bbox_to_z(bbox)
 77 |         self.time_since_update = 0
 78 |         self.id = KalmanBoxTracker.count
 79 |         KalmanBoxTracker.count += 1
 80 |         self.history = []
 81 |         self.hits = 0
 82 |         self.hit_streak = 0
 83 |         self.age = 0
 84 | 
 85 |     def update(self, bbox):
 86 |         """
 87 |         Updates the state vector with observed bbox.
 88 |         """
 89 |         self.time_since_update = 0
 90 |         self.history = []
 91 |         self.hits += 1
 92 |         self.hit_streak += 1
 93 |         self.kf.update(convert_bbox_to_z(bbox))
 94 | 
 95 |     def predict(self):
 96 |         """
 97 |         Advances the state vector and returns the predicted bounding box estimate.
 98 |         """
 99 |         if (self.kf.x[6] + self.kf.x[2]) <= 0:
100 |             self.kf.x[6] *= 0.0
101 |         self.kf.predict()
102 |         self.age += 1
103 |         if self.time_since_update > 0:
104 |             self.hit_streak = 0
105 |         self.time_since_update += 1
106 |         self.history.append(convert_x_to_bbox(self.kf.x))
107 |         return self.history[-1]
108 | 
109 |     def get_state(self):
110 |         """
111 |         Returns the current bounding box estimate.
112 |         """
113 |         return convert_x_to_bbox(self.kf.x)
114 | 
115 | 
116 | def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
117 |     """
118 |     Assigns detections to tracked object (both represented as bounding boxes)
119 |     Returns 3 lists of matches, unmatched_detections and unmatched_trackers
120 |     """
121 |     if len(trackers) == 0:
122 |         return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 1), dtype=int)
123 |     iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)
124 | 
125 |     for d, det in enumerate(detections):
126 |         for t, trk in enumerate(trackers):
127 |             iou_matrix[d, t] = iou(det, trk)
128 |     row_ind, col_ind = linear_sum_assignment(-iou_matrix)
129 |     matched_indices = np.zeros(shape=(row_ind.shape[0], 2), dtype=np.int64)
130 |     matched_indices[:, 0] = row_ind
131 |     matched_indices[:, 1] = col_ind
132 | 
133 |     unmatched_detections = []
134 |     for d, det in enumerate(detections):
135 |         if d not in matched_indices[:, 0]:
136 |             unmatched_detections.append(d)
137 |     unmatched_trackers = []
138 |     for t, trk in enumerate(trackers):
139 |         if t not in matched_indices[:, 1]:
140 |             unmatched_trackers.append(t)
141 | 
142 |     # filter out matched with low IOU
143 |     matches = []
144 |     for m in matched_indices:
145 |         if (iou_matrix[m[0], m[1]] < iou_threshold) and (not int(detections[m[0]][5]) == int(trackers[m[1]][5])):
146 |             unmatched_detections.append(m[0])
147 |             unmatched_trackers.append(m[1])
148 |         else:
149 |             matches.append(m.reshape(1, 2))
150 |     if len(matches) == 0:
151 |         matches = np.empty((0, 2), dtype=int)
152 |     else:
153 |         matches = np.concatenate(matches, axis=0)
154 | 
155 |     return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
156 | 
157 | 
158 | class Sort(object):
159 |     def __init__(self, max_age=2, min_hits=3):
160 |         """
161 |         Sets key parameters for SORT
162 |         """
163 |         self.max_age = max_age
164 |         self.min_hits = min_hits
165 |         self.trackers = []
166 |         self.scores = []
167 |         self.types = []
168 |         self.frame_count = 0
169 | 
170 |     def update(self, dets):
171 |         """
172 |         Params:
173 |         dets - a numpy array of detections in the format [[x1,y1,x2,y2,score,type],[x1,y1,x2,y2,score,type],...]
174 |         Requires: this method must be called once for each frame even with empty detections.
175 |         Returns the a numpy array in the format [x1,y1,x2,y2,object_id,score,type]
176 |         NOTE: The number of objects returned may differ from the number of detections provided.
177 |         """
178 |         self.frame_count += 1
179 |         # get predicted locations from existing trackers.
180 |         trks = np.zeros((len(self.trackers), 6))
181 |         to_del = []
182 |         ret = []
183 |         for t, trk in enumerate(trks):
184 |             pos = self.trackers[t].predict()[0]
185 |             trk[:] = [pos[0], pos[1], pos[2], pos[3], self.scores[t], self.types[t]]
186 |             if np.any(np.isnan(pos)):
187 |                 to_del.append(t)
188 |         trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
189 |         for t in reversed(to_del):
190 |             self.trackers.pop(t)
191 |             self.scores.pop(t)
192 |             self.types.pop(t)
193 |         matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks)
194 | 
195 |         # update matched trackers with assigned detections
196 |         for t, trk in enumerate(self.trackers):
197 |             if t not in unmatched_trks:
198 |                 d = matched[np.where(matched[:, 1] == t)[0], 0]
199 |                 trk.update(dets[d, :][0])
200 |                 self.scores[t] = dets[d, :][0][4]
201 |                 self.types[t] = dets[d, :][0][5]
202 | 
203 |         # create and initialise new trackers for unmatched detections
204 |         for i in unmatched_dets:
205 |             trk = KalmanBoxTracker(dets[i, 0:5])
206 |             self.trackers.append(trk)
207 |             self.scores.append(dets[i, :][4])
208 |             self.types.append(dets[i, :][5])
209 |         i = len(self.trackers)
210 |         for trk in reversed(self.trackers):
211 |             pos = trk.get_state()[0]
212 |             i -= 1
213 |             if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
214 |                 ret.append(np.concatenate((pos, [trk.id + 1], [self.scores[i]], [self.types[i]])).reshape(1, -1))  # +1 as MOT benchmark requires positive
215 |             # remove dead tracklet
216 |             if trk.time_since_update > self.max_age:
217 |                 self.trackers.pop(i)
218 |                 self.scores.pop(i)
219 |                 self.types.pop(i)
220 | 
221 |         if len(ret) > 0:
222 |             return np.concatenate(ret)
223 |         else:
224 |             return np.empty((0, 5))
225 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Retrain the YOLO model for your own dataset.
  3 | """
  4 | 
  5 | import numpy as np
  6 | import keras.backend as K
  7 | from keras.layers import Input, Lambda
  8 | from keras.models import Model
  9 | from keras.optimizers import Adam
 10 | from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
 11 | 
 12 | from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
 13 | 
 14 | import read_data_cifar100
 15 | from PIL import Image
 16 | 
 17 | 
 18 | def _main():
 19 |     # constant
 20 |     path = 'cifar-100-python/'
 21 |     log_dir = 'output/'
 22 |     classes_path = 'model_data/cifar_classes.txt'
 23 |     anchors_path = 'model_data/yolo_anchors.txt'
 24 |     # anchors_path = 'model_data/tiny_yolo_anchors.txt'
 25 |     pretrained_weight = 'model_h5/yolo.h5'
 26 |     pretrained_weight_tiny = 'model_h5/yolo-tiny.h5'
 27 |     pretrained = True
 28 | 
 29 |     # epoch
 30 |     epoch_first = 10
 31 |     epoch_second = 20
 32 | 
 33 |     class_names = get_classes(classes_path)
 34 |     num_classes = len(class_names)
 35 |     anchors = get_anchors(anchors_path)
 36 | 
 37 |     input_shape = (416, 416)  # multiple of 32, hw; (32, 32) in cifar-100
 38 | 
 39 |     is_tiny_version = len(anchors) == 6  # default setting
 40 |     if is_tiny_version:
 41 |         model = create_tiny_model(input_shape, anchors, num_classes, load_pretrained=pretrained, freeze_body=2,
 42 |                                   weights_path=pretrained_weight_tiny)
 43 |     else:
 44 |         model = create_model(input_shape, anchors, num_classes, load_pretrained=pretrained, freeze_body=2,
 45 |                              weights_path=pretrained_weight)  # make sure you know what you freeze
 46 | 
 47 |     logging = TensorBoard(log_dir=log_dir)
 48 |     checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
 49 |                                  monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)
 50 |     reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
 51 |     early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
 52 | 
 53 |     # load cifar dataset
 54 |     images, labels, img_val, lab_val = read_data_cifar100.data(path, False, True, True)
 55 | 
 56 |     num_val = len(img_val)
 57 |     num_train = len(images)
 58 | 
 59 |     # Train with frozen layers first, to get a stable loss.
 60 |     # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
 61 |     if True:
 62 |         model.compile(optimizer=Adam(lr=1e-3), loss={
 63 |             # use custom yolo_loss Lambda layer.
 64 |             'yolo_loss': lambda y_true, y_pred: y_pred}, metrics=['accuracy'])
 65 | 
 66 |         batch_size = 32
 67 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 68 |         model.fit_generator(data_generator_wrapper(images, labels, batch_size, input_shape, anchors, num_classes),
 69 |                             steps_per_epoch=max(1, num_train // batch_size),
 70 |                             validation_data=data_generator_wrapper(img_val, lab_val, batch_size, input_shape, anchors,
 71 |                                                                    num_classes),
 72 |                             validation_steps=max(1, num_val // batch_size),
 73 |                             epochs=epoch_first,
 74 |                             initial_epoch=0,
 75 |                             callbacks=[logging, checkpoint])
 76 |         model.save_weights(log_dir + 'trained_weights_stage_1.h5')
 77 | 
 78 |     # Unfreeze and continue training, to fine-tune.
 79 |     # Train longer if the result is not good.
 80 |     if True:
 81 |         for i in range(len(model.layers)):
 82 |             model.layers[i].trainable = True
 83 |         model.compile(optimizer=Adam(lr=1e-4),
 84 |                       loss={'yolo_loss': lambda y_true, y_pred: y_pred}, metrics=['accuracy'])
 85 |         # recompile to apply the change
 86 |         print('Unfreeze all of the layers.')
 87 | 
 88 |         batch_size = 32  # note that more GPU memory is required after unfreezing the body
 89 |         print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
 90 |         model.fit_generator(data_generator_wrapper(images, labels, batch_size, input_shape, anchors, num_classes),
 91 |                             steps_per_epoch=max(1, num_train // batch_size),
 92 |                             validation_data=data_generator_wrapper(img_val, lab_val, batch_size, input_shape, anchors,
 93 |                                                                    num_classes),
 94 |                             validation_steps=max(1, num_val // batch_size),
 95 |                             epochs=epoch_second,
 96 |                             initial_epoch=epoch_first,
 97 |                             callbacks=[logging, checkpoint, reduce_lr, early_stopping])
 98 |         model.save_weights(log_dir + 'trained_weights_final.h5')
 99 | 
100 |     # Further training if needed.
101 | 
102 | 
103 | def get_classes(classes_path):
104 |     """loads the classes"""
105 |     with open(classes_path) as f:
106 |         class_names = f.readlines()
107 |     class_names = [c.strip() for c in class_names]
108 |     return class_names
109 | 
110 | 
111 | def get_anchors(anchors_path):
112 |     """loads the anchors from a file"""
113 |     with open(anchors_path) as f:
114 |         anchors = f.readline()
115 |     anchors = [float(x) for x in anchors.split(',')]
116 |     return np.array(anchors).reshape(-1, 2)
117 | 
118 | 
119 | def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
120 |                  weights_path='model_data/yolo_weights.h5'):
121 |     """create the training model"""
122 |     K.clear_session()  # get a new session
123 |     image_input = Input(shape=(None, None, 3))
124 |     h, w = input_shape
125 |     num_anchors = len(anchors)
126 | 
127 |     y_true = [
128 |         Input(shape=(h // {0: 32, 1: 16, 2: 8}[l], w // {0: 32, 1: 16, 2: 8}[l], num_anchors // 3, num_classes + 5)) for
129 |         l in range(3)]
130 | 
131 |     model_body = yolo_body(image_input, num_anchors // 3, num_classes)
132 |     print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
133 | 
134 |     if load_pretrained:
135 |         model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
136 |         print('Load weights {}.'.format(weights_path))
137 |         if freeze_body in [1, 2]:
138 |             # Freeze darknet53 body or freeze all but 3 output layers.
139 |             num = (185, len(model_body.layers) - 3)[freeze_body - 1]
140 |             for i in range(num):
141 |                 model_body.layers[i].trainable = False
142 |             print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
143 | 
144 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
145 |                         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
146 |         [*model_body.output, *y_true])
147 |     model = Model([model_body.input, *y_true], model_loss)
148 | 
149 |     return model
150 | 
151 | 
152 | def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
153 |                       weights_path='model_data/tiny_yolo_weights.h5'):
154 |     """create the training model, for Tiny YOLOv3"""
155 |     K.clear_session()  # get a new session
156 |     image_input = Input(shape=(None, None, 3))
157 |     h, w = input_shape
158 |     num_anchors = len(anchors)
159 | 
160 |     y_true = [Input(shape=(h // {0: 32, 1: 16}[l], w // {0: 32, 1: 16}[l], num_anchors // 2, num_classes + 5)) for l in
161 |               range(2)]
162 | 
163 |     model_body = tiny_yolo_body(image_input, num_anchors // 2, num_classes)
164 |     print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
165 | 
166 |     if load_pretrained:
167 |         model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
168 |         print('Load weights {}.'.format(weights_path))
169 |         if freeze_body in [1, 2]:
170 |             # Freeze the darknet body or freeze all but 2 output layers.
171 |             num = (20, len(model_body.layers) - 2)[freeze_body - 1]
172 |             for i in range(num):
173 |                 model_body.layers[i].trainable = False
174 |             print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
175 | 
176 |     model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
177 |                         arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})(
178 |         [*model_body.output, *y_true])
179 |     model = Model([model_body.input, *y_true], model_loss)
180 | 
181 |     return model
182 | 
183 | 
184 | def get_data(single_image, single_label, input_shape, proc_img=True):
185 |     """
186 |     pre-processing for real-time data augmentation
187 |     """
188 |     image = Image.fromarray(np.uint8(single_image))
189 |     iw, ih = image.size
190 |     h, w = input_shape
191 |     # cifar dateset has captured the target area of images
192 |     box = np.array([[0, 0, 32, 32, int(single_label)]])
193 | 
194 |     # resize image
195 |     scale = min(w / iw, h / ih)
196 |     nw = int(iw * scale)
197 |     nh = int(ih * scale)
198 |     dx = (w - nw) // 2
199 |     dy = (h - nh) // 2
200 |     image_data = 0
201 |     if proc_img:
202 |         image = image.resize((nw, nh), Image.BICUBIC)
203 |         new_image = Image.new('RGB', (w, h), (128, 128, 128))
204 |         new_image.paste(image, (dx, dy))
205 |         image_data = np.array(new_image) / 255.
206 | 
207 |     # correct boxes
208 |     box_data = np.zeros((1, 5))
209 |     box[:, [0, 2]] = box[:, [0, 2]] * scale + dx
210 |     box[:, [1, 3]] = box[:, [1, 3]] * scale + dy
211 |     box_data[:len(box)] = box
212 | 
213 |     return image_data, box_data
214 | 
215 | 
216 | def data_generator(images, labels, batch_size, input_shape, anchors, num_classes):
217 |     """data generator for fit_generator"""
218 |     n = len(images)
219 |     i = 0
220 |     while True:
221 |         image_data = []
222 |         box_data = []
223 |         for b in range(batch_size):
224 |             image, box = get_data(images[i], labels[i], input_shape, proc_img=True)
225 |             image_data.append(image)
226 |             box_data.append(box)
227 |             # avoid IndexError
228 |             i = (i + 1) % n
229 |         image_data = np.array(image_data)
230 |         box_data = np.array(box_data)
231 |         y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
232 |         yield [image_data, *y_true], np.zeros(batch_size)
233 | 
234 | 
235 | def data_generator_wrapper(images, labels, batch_size, input_shape, anchors, num_classes):
236 |     n = len(images)
237 |     if n == 0 or batch_size <= 0:
238 |         return None
239 |     return data_generator(images, labels, batch_size, input_shape, anchors, num_classes)
240 | 
241 | 
242 | if __name__ == '__main__':
243 |     _main()
244 | 


--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Class definition of YOLO_v3 style detection model on image and video
  4 | """
  5 | 
  6 | import colorsys
  7 | from timeit import default_timer as timer
  8 | 
  9 | import numpy as np
 10 | from keras import backend as K
 11 | from keras.models import load_model
 12 | from keras.layers import Input
 13 | from PIL import Image, ImageFont, ImageDraw
 14 | 
 15 | from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
 16 | from yolo3.utils import letterbox_image
 17 | from yolo3.utils import delete_repeat_bbox
 18 | import sort
 19 | import os
 20 | from keras.utils import multi_gpu_model
 21 | 
 22 | from tracker_func import sort_image
 23 | from sort import KalmanBoxTracker
 24 | 
 25 | from tracker_func import deepsort_image
 26 | from deepsort.tracker import Tracker as deepsort_Tracker
 27 | from yolo3 import generate_detections as gdet
 28 | from deepsort import nn_matching
 29 | 
 30 | 
 31 | class YOLO(object):
 32 |     _defaults = {
 33 |         "model_image_size": (416, 416),
 34 |     }
 35 | 
 36 |     @classmethod
 37 |     def get_defaults(cls, n):
 38 |         if n in cls._defaults:
 39 |             return cls._defaults[n]
 40 |         else:
 41 |             return "Unrecognized attribute name '" + n + "'"
 42 | 
 43 |     def __init__(self, **kwargs):
 44 |         self.__dict__.update(self._defaults)  # set up default values
 45 |         self.__dict__.update(kwargs)  # and update with user overrides
 46 |         self.class_names = self._get_class()
 47 |         self.anchors = self._get_anchors()
 48 |         self.sess = K.get_session()
 49 |         self.boxes, self.scores, self.classes = self.generate()
 50 |         self.frame = 1
 51 |         self.mot_tracker, self.encoder = self._initialize_tracker()
 52 | 
 53 |     def _initialize_tracker(self):
 54 |         if not self.image:
 55 |             if self.tracker == 'sort':
 56 |                 tracker = sort.Sort()
 57 |                 return tracker, None
 58 |             elif self.tracker == 'deepsort':
 59 |                 # initialize deep sort
 60 |                 model_filename = self.deepsort_model
 61 |                 encoder = gdet.create_box_encoder(model_filename, batch_size=1)
 62 |                 metric = nn_matching.NearestNeighborDistanceMetric("cosine", matching_threshold=0.5, budget=None)
 63 |                 tracker = deepsort_Tracker(metric)
 64 |                 return tracker, encoder
 65 |             else:
 66 |                 raise ValueError('The variable \"tracker\" must be \"sort\" or \"deepsort\".')
 67 |         else:
 68 |             return None, None
 69 | 
 70 |     def _get_class(self):
 71 |         classes_path = os.path.expanduser(self.classes_path)
 72 |         with open(classes_path) as f:
 73 |             class_names = f.readlines()
 74 |         class_names = [c.strip() for c in class_names]
 75 |         return class_names
 76 | 
 77 |     def _get_anchors(self):
 78 |         anchors_path = os.path.expanduser(self.anchors_path)
 79 |         with open(anchors_path) as f:
 80 |             anchors = f.readline()
 81 |         anchors = [float(x) for x in anchors.split(',')]
 82 |         return np.array(anchors).reshape(-1, 2)
 83 | 
 84 |     def generate(self):
 85 |         model_path = os.path.expanduser(self.model_path)
 86 |         assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
 87 | 
 88 |         # Load model, or construct model and load weights.
 89 |         num_anchors = len(self.anchors)
 90 |         num_classes = len(self.class_names)
 91 |         is_tiny_version = num_anchors == 6  # default setting
 92 |         try:
 93 |             self.yolo_model = load_model(model_path, compile=False)
 94 |         except:
 95 |             self.yolo_model = tiny_yolo_body(Input(shape=(None, None, 3)), num_anchors // 2, num_classes) \
 96 |                 if is_tiny_version else yolo_body(Input(shape=(None, None, 3)), num_anchors // 3, num_classes)
 97 |             self.yolo_model.load_weights(self.model_path)  # make sure model, anchors and classes match
 98 |         else:
 99 |             try:
100 |                 assert self.yolo_model.layers[-1].output_shape[-1] == \
101 |                        num_anchors / len(self.yolo_model.output) * (num_classes + 5), \
102 |                     'Mismatch between model and given anchor and class sizes'
103 |             except TypeError:
104 |                 # the number of yolo_model.output(Tensor) may be just one
105 |                 assert self.yolo_model.layers[-1].output_shape[-1] == \
106 |                        num_anchors / 1 * (num_classes + 5), 'Mismatch between model and given anchor and class sizes'
107 | 
108 |         print('{} model, anchors, and classes loaded.'.format(model_path))
109 | 
110 |         # Generate colors for drawing bounding boxes.
111 |         hsv_tuples = [(x / len(self.class_names), 1., 1.)
112 |                       for x in range(len(self.class_names))]
113 |         self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
114 |         self.colors = list(
115 |             map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
116 |                 self.colors))
117 |         np.random.seed(10101)  # Fixed seed for consistent colors across runs.
118 |         np.random.shuffle(self.colors)  # Shuffle colors to decorrelate adjacent classes.
119 |         np.random.seed(None)  # Reset seed to default.
120 | 
121 |         # Generate output tensor targets for filtered bounding boxes.
122 |         self.input_image_shape = K.placeholder(shape=(2,))
123 |         if self.gpu_num >= 2:
124 |             self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
125 |         boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
126 |                                            len(self.class_names), self.input_image_shape,
127 |                                            score_threshold=self.score, iou_threshold=self.iou)
128 |         return boxes, scores, classes
129 | 
130 |     def detect_image(self, frame, fo=None):
131 |         image = Image.fromarray(frame)
132 | 
133 |         start = timer()
134 | 
135 |         if self.model_image_size != (None, None):
136 |             assert self.model_image_size[0] % 32 == 0, 'Multiples of 32 required'
137 |             assert self.model_image_size[1] % 32 == 0, 'Multiples of 32 required'
138 |             boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
139 |         else:
140 |             new_image_size = (image.width - (image.width % 32),
141 |                               image.height - (image.height % 32))
142 |             boxed_image = letterbox_image(image, new_image_size)
143 |         image_data = np.array(boxed_image, dtype='float32')
144 | 
145 |         print(image_data.shape)
146 |         image_data /= 255.
147 |         image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
148 | 
149 |         out_boxes, out_scores, out_classes = self.sess.run(
150 |             [self.boxes, self.scores, self.classes],
151 |             feed_dict={
152 |                 self.yolo_model.input: image_data,
153 |                 self.input_image_shape: [image.size[1], image.size[0]],
154 |                 K.learning_phase(): 0
155 |             })
156 | 
157 |         # print(type(out_boxes), type(out_scores), type(out_classes))
158 |         # print(out_boxes, out_scores, out_classes)
159 | 
160 |         # delete repeat bbox
161 |         out_boxes, out_scores, out_classes = \
162 |             delete_repeat_bbox(list(out_boxes), list(out_scores), list(out_classes), self.repeat_iou)
163 | 
164 |         # open or close tracker
165 |         if self.mot_tracker is not None:
166 |             if self.tracker == 'sort':
167 |                 out_boxes, out_scores, out_classes, object_id = \
168 |                     sort_image(self.mot_tracker, out_boxes, out_scores, out_classes)
169 |             elif self.tracker == 'deepsort':
170 |                 out_boxes, out_scores, out_classes, object_id = \
171 |                     deepsort_image(self.mot_tracker, self.encoder, frame, out_boxes, out_scores, out_classes,
172 |                                    nms_max_overlap=1.0)
173 |             else:
174 |                 raise ValueError('The variable \"tracker\" must be \"sort\" or \"deepsort\".')
175 |         else:
176 |             KalmanBoxTracker.count = 0
177 |             object_id = np.concatenate(np.zeros((1, len(out_boxes))))
178 | 
179 |         # write to file
180 |         if self.write_to_file:
181 |             for i in reversed(range(0, len(out_boxes))):
182 |                 result = [self.frame, object_id[i], out_boxes[i][0], out_boxes[i][1],
183 |                           abs(out_boxes[i][2] - out_boxes[i][0]), abs(out_boxes[i][3] - out_boxes[i][1]), out_scores[i],
184 |                           -1, -1, -1]
185 |                 fo.write(', '.join(map(str, result)))
186 |                 fo.write('\n')
187 | 
188 |         print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
189 | 
190 |         font = ImageFont.truetype(font='font/times.ttf',
191 |                                   size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
192 |         thickness = (image.size[0] + image.size[1]) // 300
193 | 
194 |         for i, c in reversed(list(enumerate(out_classes))):
195 |             predicted_class = self.class_names[c]
196 |             box = out_boxes[i]
197 |             score = out_scores[i]
198 |             id = int(object_id[i])
199 | 
200 |             # bounding box
201 |             top, left, bottom, right = box
202 |             top = max(0, np.floor(top + 0.5).astype('int32'))
203 |             left = max(0, np.floor(left + 0.5).astype('int32'))
204 |             bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
205 |             right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
206 | 
207 |             label = '{} {:.2f} id:{}'.format(predicted_class, score, id)
208 |             draw = ImageDraw.Draw(image)
209 |             label_size = draw.textsize(label, font)
210 | 
211 |             print(label, (left, top), (right, bottom))
212 | 
213 |             if top - label_size[1] >= 0:
214 |                 text_origin = np.array([left, top - label_size[1]])
215 |             else:
216 |                 text_origin = np.array([left, top + 1])
217 | 
218 |             # My kingdom for a good redistributable image drawing library.
219 |             for i in range(thickness):
220 |                 draw.rectangle(
221 |                     [left + i, top + i, right - i, bottom - i],
222 |                     outline=self.colors[c])
223 |             draw.rectangle(
224 |                 [tuple(text_origin), tuple(text_origin + label_size)],
225 |                 fill=self.colors[c])
226 |             draw.text(text_origin, label, fill=(0, 0, 0), font=font)
227 |             del draw
228 | 
229 |         end = timer()
230 |         print('time:', end - start, 's')
231 |         self.frame = self.frame + 1
232 |         return image
233 | 
234 |     def close_session(self):
235 |         self.sess.close()
236 | 
237 | 
238 | def detect_video(yolo, video_path, output_path=""):
239 |     import cv2
240 |     vid = cv2.VideoCapture(video_path)
241 |     if not vid.isOpened():
242 |         raise IOError("Couldn't open webcam or video")
243 |     video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
244 |     video_fps = vid.get(cv2.CAP_PROP_FPS)
245 |     video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
246 |                   int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
247 |     isOutput = True if output_path != "" else False
248 |     if isOutput:
249 |         print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
250 |         out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
251 |     accum_time = 0
252 |     curr_fps = 0
253 |     fps = "FPS: ??"
254 |     prev_time = timer()
255 | 
256 |     if yolo.write_to_file:
257 |         emptyFile = open(yolo.output_path + 'result.dat', 'w')
258 |     else:
259 |         emptyFile = None
260 | 
261 |     while True:
262 |         return_value, frame = vid.read()
263 |         try:
264 |             image = Image.fromarray(frame)
265 |         except AttributeError:
266 |             break
267 |         image = yolo.detect_image(frame, emptyFile)
268 |         result = np.asarray(image)
269 |         curr_time = timer()
270 |         exec_time = curr_time - prev_time
271 |         prev_time = curr_time
272 |         accum_time = accum_time + exec_time
273 |         curr_fps = curr_fps + 1
274 |         if accum_time > 1:
275 |             accum_time = accum_time - 1
276 |             fps = "FPS: " + str(curr_fps)
277 |             curr_fps = 0
278 |         cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
279 |                     fontScale=0.50, color=(255, 0, 0), thickness=2)
280 |         cv2.namedWindow("result", cv2.WINDOW_NORMAL)
281 |         cv2.imshow("result", result)
282 |         if isOutput:
283 |             out.write(result)
284 |         if cv2.waitKey(1) & 0xFF == ord('q'):
285 |             break
286 |     if yolo.write_to_file:
287 |         emptyFile.close()
288 |     yolo.close_session()
289 | 


--------------------------------------------------------------------------------
/yolo3/model.py:
--------------------------------------------------------------------------------
  1 | """YOLO_v3 Model Defined in Keras."""
  2 | 
  3 | from functools import wraps
  4 | 
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
  9 | from keras.layers.advanced_activations import LeakyReLU
 10 | from keras.layers.normalization import BatchNormalization
 11 | from keras.models import Model
 12 | from keras.regularizers import l2
 13 | 
 14 | from yolo3.utils import compose
 15 | 
 16 | 
 17 | @wraps(Conv2D)
 18 | def DarknetConv2D(*args, **kwargs):
 19 |     """Wrapper to set Darknet parameters for Convolution2D."""
 20 |     darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
 21 |     darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides') == (2, 2) else 'same'
 22 |     darknet_conv_kwargs.update(kwargs)
 23 |     return Conv2D(*args, **darknet_conv_kwargs)
 24 | 
 25 | 
 26 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
 27 |     """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
 28 |     no_bias_kwargs = {'use_bias': False}
 29 |     no_bias_kwargs.update(kwargs)
 30 |     return compose(
 31 |         DarknetConv2D(*args, **no_bias_kwargs),
 32 |         BatchNormalization(),
 33 |         LeakyReLU(alpha=0.1))
 34 | 
 35 | 
 36 | def resblock_body(x, num_filters, num_blocks):
 37 |     '''A series of resblocks starting with a downsampling Convolution2D'''
 38 |     # Darknet uses left and top padding instead of 'same' mode
 39 |     x = ZeroPadding2D(((1, 0), (1, 0)))(x)
 40 |     x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x)
 41 |     for i in range(num_blocks):
 42 |         y = compose(
 43 |             DarknetConv2D_BN_Leaky(num_filters // 2, (1, 1)),
 44 |             DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x)
 45 |         x = Add()([x, y])
 46 |     return x
 47 | 
 48 | 
 49 | def darknet_body(x):
 50 |     '''Darknent body having 52 Convolution2D layers'''
 51 |     x = DarknetConv2D_BN_Leaky(32, (3, 3))(x)
 52 |     x = resblock_body(x, 64, 1)
 53 |     x = resblock_body(x, 128, 2)
 54 |     x = resblock_body(x, 256, 8)
 55 |     x = resblock_body(x, 512, 8)
 56 |     x = resblock_body(x, 1024, 4)
 57 |     return x
 58 | 
 59 | 
 60 | def make_last_layers(x, num_filters, out_filters):
 61 |     '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
 62 |     x = compose(
 63 |         DarknetConv2D_BN_Leaky(num_filters, (1, 1)),
 64 |         DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
 65 |         DarknetConv2D_BN_Leaky(num_filters, (1, 1)),
 66 |         DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
 67 |         DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(x)
 68 |     y = compose(
 69 |         DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
 70 |         DarknetConv2D(out_filters, (1, 1)))(x)
 71 |     return x, y
 72 | 
 73 | 
 74 | def yolo_body(inputs, num_anchors, num_classes):
 75 |     """Create YOLO_V3 model CNN body in Keras."""
 76 |     darknet = Model(inputs, darknet_body(inputs))
 77 |     x, y1 = make_last_layers(darknet.output, 512, num_anchors * (num_classes + 5))
 78 | 
 79 |     x = compose(
 80 |         DarknetConv2D_BN_Leaky(256, (1, 1)),
 81 |         UpSampling2D(2))(x)
 82 |     x = Concatenate()([x, darknet.layers[152].output])
 83 |     x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5))
 84 | 
 85 |     x = compose(
 86 |         DarknetConv2D_BN_Leaky(128, (1, 1)),
 87 |         UpSampling2D(2))(x)
 88 |     x = Concatenate()([x, darknet.layers[92].output])
 89 |     x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5))
 90 | 
 91 |     return Model(inputs, [y1, y2, y3])
 92 | 
 93 | 
 94 | def tiny_yolo_body(inputs, num_anchors, num_classes):
 95 |     '''Create Tiny YOLO_v3 model CNN body in keras.'''
 96 |     x1 = compose(
 97 |         DarknetConv2D_BN_Leaky(16, (3, 3)),
 98 |         MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
 99 |         DarknetConv2D_BN_Leaky(32, (3, 3)),
100 |         MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
101 |         DarknetConv2D_BN_Leaky(64, (3, 3)),
102 |         MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
103 |         DarknetConv2D_BN_Leaky(128, (3, 3)),
104 |         MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
105 |         DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs)
106 |     x2 = compose(
107 |         MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
108 |         DarknetConv2D_BN_Leaky(512, (3, 3)),
109 |         MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
110 |         DarknetConv2D_BN_Leaky(1024, (3, 3)),
111 |         DarknetConv2D_BN_Leaky(256, (1, 1)))(x1)
112 |     y1 = compose(
113 |         DarknetConv2D_BN_Leaky(512, (3, 3)),
114 |         DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)
115 | 
116 |     x2 = compose(
117 |         DarknetConv2D_BN_Leaky(128, (1, 1)),
118 |         UpSampling2D(2))(x2)
119 |     y2 = compose(
120 |         Concatenate(),
121 |         DarknetConv2D_BN_Leaky(256, (3, 3)),
122 |         DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1])
123 | 
124 |     return Model(inputs, [y1, y2])
125 | 
126 | 
127 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
128 |     """Convert final layer features to bounding box parameters."""
129 |     num_anchors = len(anchors)
130 |     # Reshape to batch, height, width, num_anchors, box_params.
131 |     anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
132 | 
133 |     grid_shape = K.shape(feats)[1:3]  # height, width
134 |     grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
135 |                     [1, grid_shape[1], 1, 1])
136 |     grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
137 |                     [grid_shape[0], 1, 1, 1])
138 |     grid = K.concatenate([grid_x, grid_y])
139 |     grid = K.cast(grid, K.dtype(feats))
140 | 
141 |     feats = K.reshape(
142 |         feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
143 | 
144 |     # Adjust preditions to each spatial grid point and anchor size.
145 |     box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
146 |     box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
147 |     box_confidence = K.sigmoid(feats[..., 4:5])
148 |     box_class_probs = K.sigmoid(feats[..., 5:])
149 | 
150 |     if calc_loss == True:
151 |         return grid, feats, box_xy, box_wh
152 |     return box_xy, box_wh, box_confidence, box_class_probs
153 | 
154 | 
155 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
156 |     '''Get corrected boxes'''
157 |     box_yx = box_xy[..., ::-1]
158 |     box_hw = box_wh[..., ::-1]
159 |     input_shape = K.cast(input_shape, K.dtype(box_yx))
160 |     image_shape = K.cast(image_shape, K.dtype(box_yx))
161 |     new_shape = K.round(image_shape * K.min(input_shape / image_shape))
162 |     offset = (input_shape - new_shape) / 2. / input_shape
163 |     scale = input_shape / new_shape
164 |     box_yx = (box_yx - offset) * scale
165 |     box_hw *= scale
166 | 
167 |     box_mins = box_yx - (box_hw / 2.)
168 |     box_maxes = box_yx + (box_hw / 2.)
169 |     boxes = K.concatenate([
170 |         box_mins[..., 0:1],  # y_min
171 |         box_mins[..., 1:2],  # x_min
172 |         box_maxes[..., 0:1],  # y_max
173 |         box_maxes[..., 1:2]  # x_max
174 |     ])
175 | 
176 |     # Scale boxes back to original image shape.
177 |     boxes *= K.concatenate([image_shape, image_shape])
178 |     return boxes
179 | 
180 | 
181 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
182 |     '''Process Conv layer output'''
183 |     box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
184 |                                                                 anchors, num_classes, input_shape)
185 |     boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
186 |     boxes = K.reshape(boxes, [-1, 4])
187 |     box_scores = box_confidence * box_class_probs
188 |     box_scores = K.reshape(box_scores, [-1, num_classes])
189 |     return boxes, box_scores
190 | 
191 | 
192 | def yolo_eval(yolo_outputs,
193 |               anchors,
194 |               num_classes,
195 |               image_shape,
196 |               max_boxes=20,
197 |               score_threshold=.6,
198 |               iou_threshold=.5):
199 |     """Evaluate YOLO model on given input and return filtered boxes."""
200 |     num_layers = len(yolo_outputs)
201 |     anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]  # default setting
202 |     input_shape = K.shape(yolo_outputs[0])[1:3] * 32
203 |     boxes = []
204 |     box_scores = []
205 |     for l in range(num_layers):
206 |         _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
207 |                                                     anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
208 |         boxes.append(_boxes)
209 |         box_scores.append(_box_scores)
210 |     boxes = K.concatenate(boxes, axis=0)
211 |     box_scores = K.concatenate(box_scores, axis=0)
212 | 
213 |     mask = box_scores >= score_threshold
214 |     max_boxes_tensor = K.constant(max_boxes, dtype='int32')
215 |     boxes_ = []
216 |     scores_ = []
217 |     classes_ = []
218 |     for c in range(num_classes):
219 |         # TODO: use keras backend instead of tf.
220 |         class_boxes = tf.boolean_mask(boxes, mask[:, c])
221 |         class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
222 |         nms_index = tf.image.non_max_suppression(
223 |             class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
224 |         class_boxes = K.gather(class_boxes, nms_index)
225 |         class_box_scores = K.gather(class_box_scores, nms_index)
226 |         classes = K.ones_like(class_box_scores, 'int32') * c
227 |         boxes_.append(class_boxes)
228 |         scores_.append(class_box_scores)
229 |         classes_.append(classes)
230 |     boxes_ = K.concatenate(boxes_, axis=0)
231 |     scores_ = K.concatenate(scores_, axis=0)
232 |     classes_ = K.concatenate(classes_, axis=0)
233 | 
234 |     return boxes_, scores_, classes_
235 | 
236 | 
237 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
238 |     '''Preprocess true boxes to training input format
239 | 
240 |     Parameters
241 |     ----------
242 |     true_boxes: array, shape=(m, T, 5)
243 |         Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
244 |     input_shape: array-like, hw, multiples of 32
245 |     anchors: array, shape=(N, 2), wh
246 |     num_classes: integer
247 | 
248 |     Returns
249 |     -------
250 |     y_true: list of array, shape like yolo_outputs, xywh are reletive value
251 | 
252 |     '''
253 |     assert (true_boxes[..., 4] < num_classes).all(), 'class id must be less than num_classes'
254 |     num_layers = len(anchors) // 3  # default setting
255 |     anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
256 | 
257 |     true_boxes = np.array(true_boxes, dtype='float32')
258 |     input_shape = np.array(input_shape, dtype='int32')
259 |     boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
260 |     boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
261 |     true_boxes[..., 0:2] = boxes_xy / input_shape[::-1]
262 |     true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]
263 | 
264 |     m = true_boxes.shape[0]
265 |     grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]
266 |     y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + num_classes),
267 |                        dtype='float32') for l in range(num_layers)]
268 | 
269 |     # Expand dim to apply broadcasting.
270 |     anchors = np.expand_dims(anchors, 0)
271 |     anchor_maxes = anchors / 2.
272 |     anchor_mins = -anchor_maxes
273 |     valid_mask = boxes_wh[..., 0] > 0
274 | 
275 |     for b in range(m):
276 |         # Discard zero rows.
277 |         wh = boxes_wh[b, valid_mask[b]]
278 |         if len(wh) == 0: continue
279 |         # Expand dim to apply broadcasting.
280 |         wh = np.expand_dims(wh, -2)
281 |         box_maxes = wh / 2.
282 |         box_mins = -box_maxes
283 | 
284 |         intersect_mins = np.maximum(box_mins, anchor_mins)
285 |         intersect_maxes = np.minimum(box_maxes, anchor_maxes)
286 |         intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
287 |         intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
288 |         box_area = wh[..., 0] * wh[..., 1]
289 |         anchor_area = anchors[..., 0] * anchors[..., 1]
290 |         iou = intersect_area / (box_area + anchor_area - intersect_area)
291 | 
292 |         # Find best anchor for each true box
293 |         best_anchor = np.argmax(iou, axis=-1)
294 | 
295 |         for t, n in enumerate(best_anchor):
296 |             for l in range(num_layers):
297 |                 if n in anchor_mask[l]:
298 |                     i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32')
299 |                     j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32')
300 |                     k = anchor_mask[l].index(n)
301 |                     c = true_boxes[b, t, 4].astype('int32')
302 |                     y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
303 |                     y_true[l][b, j, i, k, 4] = 1
304 |                     y_true[l][b, j, i, k, 5 + c] = 1
305 | 
306 |     return y_true
307 | 
308 | 
309 | def box_iou(b1, b2):
310 |     '''Return iou tensor
311 | 
312 |     Parameters
313 |     ----------
314 |     b1: tensor, shape=(i1,...,iN, 4), xywh
315 |     b2: tensor, shape=(j, 4), xywh
316 | 
317 |     Returns
318 |     -------
319 |     iou: tensor, shape=(i1,...,iN, j)
320 | 
321 |     '''
322 | 
323 |     # Expand dim to apply broadcasting.
324 |     b1 = K.expand_dims(b1, -2)
325 |     b1_xy = b1[..., :2]
326 |     b1_wh = b1[..., 2:4]
327 |     b1_wh_half = b1_wh / 2.
328 |     b1_mins = b1_xy - b1_wh_half
329 |     b1_maxes = b1_xy + b1_wh_half
330 | 
331 |     # Expand dim to apply broadcasting.
332 |     b2 = K.expand_dims(b2, 0)
333 |     b2_xy = b2[..., :2]
334 |     b2_wh = b2[..., 2:4]
335 |     b2_wh_half = b2_wh / 2.
336 |     b2_mins = b2_xy - b2_wh_half
337 |     b2_maxes = b2_xy + b2_wh_half
338 | 
339 |     intersect_mins = K.maximum(b1_mins, b2_mins)
340 |     intersect_maxes = K.minimum(b1_maxes, b2_maxes)
341 |     intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
342 |     intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
343 |     b1_area = b1_wh[..., 0] * b1_wh[..., 1]
344 |     b2_area = b2_wh[..., 0] * b2_wh[..., 1]
345 |     iou = intersect_area / (b1_area + b2_area - intersect_area)
346 | 
347 |     return iou
348 | 
349 | 
350 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
351 |     '''Return yolo_loss tensor
352 | 
353 |     Parameters
354 |     ----------
355 |     yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
356 |     y_true: list of array, the output of preprocess_true_boxes
357 |     anchors: array, shape=(N, 2), wh
358 |     num_classes: integer
359 |     ignore_thresh: float, the iou threshold whether to ignore object confidence loss
360 | 
361 |     Returns
362 |     -------
363 |     loss: tensor, shape=(1,)
364 | 
365 |     '''
366 |     num_layers = len(anchors) // 3  # default setting
367 |     yolo_outputs = args[:num_layers]
368 |     y_true = args[num_layers:]
369 |     anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
370 |     input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
371 |     grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
372 |     loss = 0
373 |     m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
374 |     mf = K.cast(m, K.dtype(yolo_outputs[0]))
375 | 
376 |     for l in range(num_layers):
377 |         object_mask = y_true[l][..., 4:5]
378 |         true_class_probs = y_true[l][..., 5:]
379 | 
380 |         grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
381 |                                                      anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
382 |         pred_box = K.concatenate([pred_xy, pred_wh])
383 | 
384 |         # Darknet raw box to calculate loss.
385 |         raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
386 |         raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
387 |         raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
388 |         box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]
389 | 
390 |         # Find ignore mask, iterate over each of batch.
391 |         ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
392 |         object_mask_bool = K.cast(object_mask, 'bool')
393 | 
394 |         def loop_body(b, ignore_mask):
395 |             true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0])
396 |             iou = box_iou(pred_box[b], true_box)
397 |             best_iou = K.max(iou, axis=-1)
398 |             ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
399 |             return b + 1, ignore_mask
400 | 
401 |         _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask])
402 |         ignore_mask = ignore_mask.stack()
403 |         ignore_mask = K.expand_dims(ignore_mask, -1)
404 | 
405 |         # K.binary_crossentropy is helpful to avoid exp overflow.
406 |         xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2],
407 |                                                                        from_logits=True)
408 |         wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4])
409 |         confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
410 |                           (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
411 |                                                                     from_logits=True) * ignore_mask
412 |         class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[..., 5:], from_logits=True)
413 | 
414 |         xy_loss = K.sum(xy_loss) / mf
415 |         wh_loss = K.sum(wh_loss) / mf
416 |         confidence_loss = K.sum(confidence_loss) / mf
417 |         class_loss = K.sum(class_loss) / mf
418 |         loss += xy_loss + wh_loss + confidence_loss + class_loss
419 |         if print_loss:
420 |             loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)],
421 |                             message='loss: ')
422 |     return loss
423 | 


--------------------------------------------------------------------------------