├── yolo3
├── __init__.py
├── utils.py
├── generate_detections.py
└── model.py
├── deepsort
├── __init__.py
├── detection.py
├── preprocessing.py
├── iou_matching.py
├── tracker.py
├── track.py
├── nn_matching.py
├── linear_assignment.py
└── kalman_filter.py
├── model_h5
└── h5 files can be located in here.txt
├── model_data
├── tiny_yolo_anchors.txt
├── yolo_anchors.txt
├── cifar_classes.txt
├── voc_classes.txt
└── coco_classes.txt
├── font
└── times.ttf
├── input
└── Demo1.jpg
├── output
└── Demo1.png
├── openh264-1.8.0-win64.dll
├── LICENSE
├── .gitignore
├── tracker_func.py
├── yolo_video.py
├── kmeans_anchors.py
├── README.md
├── read_data_cifar100.py
├── sort.py
├── train.py
└── yolo.py
/yolo3/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/deepsort/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/model_h5/h5 files can be located in here.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/model_data/tiny_yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
2 |
--------------------------------------------------------------------------------
/font/times.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImLaoBJie/yolo3_sort_deepsort/HEAD/font/times.ttf
--------------------------------------------------------------------------------
/input/Demo1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImLaoBJie/yolo3_sort_deepsort/HEAD/input/Demo1.jpg
--------------------------------------------------------------------------------
/output/Demo1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImLaoBJie/yolo3_sort_deepsort/HEAD/output/Demo1.png
--------------------------------------------------------------------------------
/model_data/yolo_anchors.txt:
--------------------------------------------------------------------------------
1 | 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
2 |
--------------------------------------------------------------------------------
/openh264-1.8.0-win64.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ImLaoBJie/yolo3_sort_deepsort/HEAD/openh264-1.8.0-win64.dll
--------------------------------------------------------------------------------
/model_data/cifar_classes.txt:
--------------------------------------------------------------------------------
1 | boy
2 | girl
3 | man
4 | woman
5 | bicycle
6 | bus
7 | motorcycle
8 | pickuptruck
9 | streetcar
10 | tank
11 |
--------------------------------------------------------------------------------
/model_data/voc_classes.txt:
--------------------------------------------------------------------------------
1 | aeroplane
2 | bicycle
3 | bird
4 | boat
5 | bottle
6 | bus
7 | car
8 | cat
9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 |
--------------------------------------------------------------------------------
/model_data/coco_classes.txt:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorbike
5 | aeroplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 ImLaoBJie
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/deepsort/detection.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Detection(object):
5 | """
6 | This class represents a bounding box detection in a single image.
7 |
8 | Parameters
9 | ----------
10 | tlwh : array_like
11 | Bounding box in format `(x, y, w, h)`.
12 | confidence : float
13 | Detector confidence score.
14 | feature : array_like
15 | A feature vector that describes the object contained in this image.
16 |
17 | Attributes
18 | ----------
19 | tlwh : ndarray
20 | Bounding box in format `(top left x, top left y, width, height)`.
21 | confidence : ndarray
22 | Detector confidence score.
23 | class_name : ndarray
24 | Detector class.
25 | feature : ndarray | NoneType
26 | A feature vector that describes the object contained in this image.
27 |
28 | """
29 |
30 | def __init__(self, tlwh, confidence, class_name, feature):
31 | self.tlwh = np.asarray(tlwh, dtype=np.float)
32 | self.confidence = float(confidence)
33 | self.class_name = class_name
34 | self.feature = np.asarray(feature, dtype=np.float32)
35 |
36 | def get_class(self):
37 | return self.class_name
38 |
39 | def to_tlbr(self):
40 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
41 | `(top left, bottom right)`.
42 | """
43 | ret = self.tlwh.copy()
44 | ret[2:] += ret[:2]
45 | return ret
46 |
47 | def to_xyah(self):
48 | """Convert bounding box to format `(center x, center y, aspect ratio,
49 | height)`, where the aspect ratio is `width / height`.
50 | """
51 | ret = self.tlwh.copy()
52 | ret[:2] += ret[2:] / 2
53 | ret[2] /= ret[3]
54 | return ret
55 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | MANIFEST
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .nox/
42 | .coverage
43 | .coverage.*
44 | .cache
45 | nosetests.xml
46 | coverage.xml
47 | *.cover
48 | .hypothesis/
49 | .pytest_cache/
50 |
51 | # Translations
52 | *.mo
53 | *.pot
54 |
55 | # Django stuff:
56 | *.log
57 | local_settings.py
58 | db.sqlite3
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # Jupyter Notebook
74 | .ipynb_checkpoints
75 |
76 | # IPython
77 | profile_default/
78 | ipython_config.py
79 |
80 | # pyenv
81 | .python-version
82 |
83 | # celery beat schedule file
84 | celerybeat-schedule
85 |
86 | # SageMath parsed files
87 | *.sage.py
88 |
89 | # Environments
90 | .env
91 | .venv
92 | env/
93 | venv/
94 | ENV/
95 | env.bak/
96 | venv.bak/
97 |
98 | # Spyder project settings
99 | .spyderproject
100 | .spyproject
101 |
102 | # Rope project settings
103 | .ropeproject
104 |
105 | # mkdocs documentation
106 | /site
107 |
108 | # mypy
109 | .mypy_cache/
110 | .dmypy.json
111 | dmypy.json
112 |
113 | # Pyre type checker
114 | .pyre/
115 |
116 | # Demo
117 | *.mp4
118 | *.weights
119 | *.dat
120 | *.webm
121 | *.png
122 |
123 | # Weights
124 | *.h5
125 | *.pb
126 |
127 | __pycache__/
128 | .idea/
129 | cifar-100-python/
130 |
--------------------------------------------------------------------------------
/deepsort/preprocessing.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 |
5 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
6 | """Suppress overlapping detections.
7 |
8 | Original code from [1]_ has been adapted to include confidence score.
9 |
10 | .. [1] http://www.pyimagesearch.com/2015/02/16/
11 | faster-non-maximum-suppression-python/
12 |
13 | Examples
14 | --------
15 |
16 | >>> boxes = [d.roi for d in detections]
17 | >>> classes = [d.classes for d in detections]
18 | >>> scores = [d.confidence for d in detections]
19 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
20 | >>> detections = [detections[i] for i in indices]
21 |
22 | Parameters
23 | ----------
24 | boxes : ndarray
25 | Array of ROIs (x, y, width, height).
26 | max_bbox_overlap : float
27 | ROIs that overlap more than this values are suppressed.
28 | scores : Optional[array_like]
29 | Detector confidence score.
30 |
31 | Returns
32 | -------
33 | List[int]
34 | Returns indices of detections that have survived non-maxima suppression.
35 |
36 | """
37 | if len(boxes) == 0:
38 | return []
39 |
40 | boxes = boxes.astype(np.float)
41 | pick = []
42 |
43 | x1 = boxes[:, 0]
44 | y1 = boxes[:, 1]
45 | x2 = boxes[:, 2] + boxes[:, 0]
46 | y2 = boxes[:, 3] + boxes[:, 1]
47 |
48 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
49 | if scores is not None:
50 | idxs = np.argsort(scores)
51 | else:
52 | idxs = np.argsort(y2)
53 |
54 | while len(idxs) > 0:
55 | last = len(idxs) - 1
56 | i = idxs[last]
57 | pick.append(i)
58 |
59 | xx1 = np.maximum(x1[i], x1[idxs[:last]])
60 | yy1 = np.maximum(y1[i], y1[idxs[:last]])
61 | xx2 = np.minimum(x2[i], x2[idxs[:last]])
62 | yy2 = np.minimum(y2[i], y2[idxs[:last]])
63 |
64 | w = np.maximum(0, xx2 - xx1 + 1)
65 | h = np.maximum(0, yy2 - yy1 + 1)
66 |
67 | overlap = (w * h) / area[idxs[:last]]
68 |
69 | idxs = np.delete(
70 | idxs, np.concatenate(
71 | ([last], np.where(overlap > max_bbox_overlap)[0])))
72 |
73 | return pick
74 |
--------------------------------------------------------------------------------
/tracker_func.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from sort import Sort
4 | from deepsort.tracker import Tracker
5 | from deepsort.detection import Detection
6 | from deepsort import preprocessing
7 | from yolo3.utils import convert_boxes
8 |
9 |
10 | def sort_image(sort_class: Sort, out_boxes, out_scores, out_classes):
11 | dets = []
12 |
13 | for i in range(0, len(out_boxes)):
14 | dets.append([out_boxes[i][1], out_boxes[i][0], out_boxes[i][3], out_boxes[i][2], out_scores[i], out_classes[i]])
15 |
16 | dets = np.array(dets)
17 | # update
18 | trackers = sort_class.update(dets)
19 |
20 | out_boxes = []
21 | out_scores = []
22 | out_classes = []
23 | object_id = []
24 | # d [x1,y1,x2,y2,object_id,score,type]
25 | for d in trackers:
26 | out_boxes.append(list([d[1], d[0], d[3], d[2]]))
27 | object_id.append(int(d[4]))
28 | out_scores.append(float(d[5]))
29 | out_classes.append(int(d[6]))
30 |
31 | return np.array(out_boxes), np.array(out_scores), np.array(out_classes), np.array(object_id)
32 |
33 |
34 | def deepsort_image(deepsort_class: Tracker, encoder, frame, out_boxes, out_scores, out_classes,
35 | nms_max_overlap=1.0):
36 |
37 | converted_boxes = convert_boxes(out_boxes)
38 | features = encoder(frame, converted_boxes)
39 | detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
40 | zip(converted_boxes, out_scores, out_classes, features)]
41 |
42 | # run non-maxima suppresion
43 | boxs = np.array([d.tlwh for d in detections])
44 | scores = np.array([d.confidence for d in detections])
45 | classes = np.array([d.class_name for d in detections])
46 | indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
47 | detections = [detections[i] for i in indices]
48 |
49 | deepsort_class.predict()
50 | deepsort_class.update(detections)
51 |
52 | num_trackers = len(deepsort_class.tracks)
53 | out_boxes = []
54 | out_classes = []
55 | out_scores = []
56 | object_id = []
57 | # d [x1,y1,x2,y2,object_id,score,type]
58 | for index, track in enumerate(deepsort_class.tracks):
59 | if not track.is_confirmed() or track.time_since_update > 1:
60 | continue
61 | out_boxes.append(track.to_tlbr())
62 | out_classes.append(int(track.get_class()))
63 | out_scores.append(float(track.get_score()))
64 | object_id.append(int(track.track_id))
65 |
66 | return np.array(out_boxes), np.array(out_scores), np.array(out_classes), np.array(object_id)
67 |
--------------------------------------------------------------------------------
/yolo_video.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import argparse
3 |
4 | import numpy as np
5 |
6 | from yolo import YOLO, detect_video
7 | from PIL import Image
8 |
9 |
10 | DEFAULTS = {
11 | "model_path": './model_h5/yolo.h5',
12 | "anchors_path": './model_data/yolo_anchors.txt',
13 | "classes_path": './model_data/coco_classes.txt',
14 | "deepsort_model": './model_data/mars-small128.pb',
15 | "gpu_num": 1,
16 | "image": False, # 如果此处设置了True,"tracker"则被忽略
17 | "tracker": 'deepsort', # 此处根据需要为'sort'或'deepsort'
18 | "write_to_file": True,
19 | "input": './input/your_video.format',
20 | "output": './output/your_video.format',
21 | "output_path": './output/',
22 | "score": 0.4, # threshold
23 | "iou": 0.4, # threshold
24 | "repeat_iou": 0.95, # threshold
25 | }
26 |
27 |
28 | def getvalue(FLAGS, defaults):
29 |
30 | args = vars(FLAGS)
31 |
32 | for value in defaults:
33 | args[value] = defaults[value]
34 |
35 | return FLAGS
36 |
37 |
38 | def detect_img(yolo):
39 | while True:
40 |
41 | img = input('Input image filename:')
42 | try:
43 | image = Image.open(img)
44 | image = np.asarray(image)
45 | except:
46 | print('Open Error! Try again!')
47 | continue
48 | else:
49 | # Initialization
50 | # mot_tracker = sort.Sort()
51 | # yolo.mot_tracker = mot_tracker
52 | yolo.frame = 1
53 |
54 | if yolo.write_to_file:
55 | emptyFile = open(yolo.output_path + 'result.dat', 'w')
56 | else:
57 | emptyFile = None
58 | r_image = yolo.detect_image(image, emptyFile)
59 | if yolo.write_to_file:
60 | emptyFile.close()
61 | r_image.save(yolo.__dict__['output_path'] + 'output.png', 'png')
62 | yolo.close_session()
63 |
64 |
65 | FLAGS = None
66 |
67 | if __name__ == '__main__':
68 |
69 | FLAGS = argparse.Namespace()
70 | FLAGS = getvalue(FLAGS, DEFAULTS)
71 |
72 | if FLAGS.image:
73 | """
74 | Image detection mode, disregard any remaining command line arguments
75 | """
76 | print("Image detection mode")
77 | if "input" in FLAGS:
78 | print(" Ignoring remaining command line arguments: " + FLAGS.input + "," + FLAGS.output)
79 | detect_img(YOLO(**vars(FLAGS)))
80 | elif "input" in FLAGS:
81 | detect_video(YOLO(**vars(FLAGS)), FLAGS.input, FLAGS.output)
82 | else:
83 | print("Must specify at least video_input_path. See usage with --help.")
84 |
--------------------------------------------------------------------------------
/deepsort/iou_matching.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import numpy as np
3 | from . import linear_assignment
4 |
5 |
6 | def iou(bbox, candidates):
7 | """Computer intersection over union.
8 |
9 | Parameters
10 | ----------
11 | bbox : ndarray
12 | A bounding box in format `(top left x, top left y, width, height)`.
13 | candidates : ndarray
14 | A matrix of candidate bounding boxes (one per row) in the same format
15 | as `bbox`.
16 |
17 | Returns
18 | -------
19 | ndarray
20 | The intersection over union in [0, 1] between the `bbox` and each
21 | candidate. A higher score means a larger fraction of the `bbox` is
22 | occluded by the candidate.
23 |
24 | """
25 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
26 | candidates_tl = candidates[:, :2]
27 | candidates_br = candidates[:, :2] + candidates[:, 2:]
28 |
29 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
30 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
31 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
32 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
33 | wh = np.maximum(0., br - tl)
34 |
35 | area_intersection = wh.prod(axis=1)
36 | area_bbox = bbox[2:].prod()
37 | area_candidates = candidates[:, 2:].prod(axis=1)
38 | return area_intersection / (area_bbox + area_candidates - area_intersection)
39 |
40 |
41 | def iou_cost(tracks, detections, track_indices=None,
42 | detection_indices=None):
43 | """An intersection over union distance metric.
44 |
45 | Parameters
46 | ----------
47 | tracks : List[deep_sort.track.Track]
48 | A list of tracks.
49 | detections : List[deep_sort.detection.Detection]
50 | A list of detections.
51 | track_indices : Optional[List[int]]
52 | A list of indices to tracks that should be matched. Defaults to
53 | all `tracks`.
54 | detection_indices : Optional[List[int]]
55 | A list of indices to detections that should be matched. Defaults
56 | to all `detections`.
57 |
58 | Returns
59 | -------
60 | ndarray
61 | Returns a cost matrix of shape
62 | len(track_indices), len(detection_indices) where entry (i, j) is
63 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
64 |
65 | """
66 | if track_indices is None:
67 | track_indices = np.arange(len(tracks))
68 | if detection_indices is None:
69 | detection_indices = np.arange(len(detections))
70 |
71 | cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
72 | for row, track_idx in enumerate(track_indices):
73 | if tracks[track_idx].time_since_update > 1:
74 | cost_matrix[row, :] = linear_assignment.INFTY_COST
75 | continue
76 |
77 | bbox = tracks[track_idx].to_tlwh()
78 | candidates = np.asarray([detections[i].tlwh for i in detection_indices])
79 | cost_matrix[row, :] = 1. - iou(bbox, candidates)
80 | return cost_matrix
81 |
--------------------------------------------------------------------------------
/kmeans_anchors.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class YOLO_Kmeans:
5 |
6 | def __init__(self, cluster_number, filename):
7 | self.cluster_number = cluster_number
8 | self.filename = filename
9 |
10 | def iou(self, boxes, clusters): # 1 box -> k clusters
11 | n = boxes.shape[0]
12 | k = self.cluster_number
13 |
14 | box_area = boxes[:, 0] * boxes[:, 1]
15 | box_area = box_area.repeat(k)
16 | box_area = np.reshape(box_area, (n, k))
17 |
18 | cluster_area = clusters[:, 0] * clusters[:, 1]
19 | cluster_area = np.tile(cluster_area, [1, n])
20 | cluster_area = np.reshape(cluster_area, (n, k))
21 |
22 | box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k))
23 | cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k))
24 | min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix)
25 |
26 | box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k))
27 | cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k))
28 | min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix)
29 | inter_area = np.multiply(min_w_matrix, min_h_matrix)
30 |
31 | result = inter_area / (box_area + cluster_area - inter_area)
32 | return result
33 |
34 | def avg_iou(self, boxes, clusters):
35 | accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)])
36 | return accuracy
37 |
38 | def kmeans(self, boxes, k, dist=np.median):
39 | box_number = boxes.shape[0]
40 | distances = np.empty((box_number, k))
41 | last_nearest = np.zeros((box_number,))
42 | np.random.seed()
43 | clusters = boxes[np.random.choice(
44 | box_number, k, replace=False)] # init k clusters
45 | while True:
46 |
47 | distances = 1 - self.iou(boxes, clusters)
48 |
49 | current_nearest = np.argmin(distances, axis=1)
50 | if (last_nearest == current_nearest).all():
51 | break # clusters won't change
52 | for cluster in range(k):
53 | clusters[cluster] = dist( # update clusters
54 | boxes[current_nearest == cluster], axis=0)
55 |
56 | last_nearest = current_nearest
57 |
58 | return clusters
59 |
60 | def result2txt(self, data):
61 | f = open('output/yolo_anchors.txt', 'w')
62 | row = np.shape(data)[0]
63 | for i in range(row):
64 | if i == 0:
65 | x_y = "%d,%d" % (data[i][0], data[i][1])
66 | else:
67 | x_y = ", %d,%d" % (data[i][0], data[i][1])
68 | f.write(x_y)
69 | f.close()
70 |
71 | def txt2boxes(self):
72 | f = open(self.filename, 'r')
73 | dataSet = []
74 | for line in f:
75 | infos = line.split(', ')
76 | length = len(infos)
77 | for i in range(1, length):
78 | width = int(abs(float(infos[4]) - float(infos[2])))
79 | height = int(abs(float(infos[5]) - float(infos[3])))
80 | dataSet.append([width, height])
81 | result = np.array(dataSet)
82 | f.close()
83 | return result
84 |
85 | def txt2clusters(self):
86 | all_boxes = self.txt2boxes()
87 | result = self.kmeans(all_boxes, k=self.cluster_number)
88 | result = result[np.lexsort(result.T[0, None])]
89 | self.result2txt(result)
90 | print("K anchors:\n {}".format(result))
91 | print("Accuracy: {:.2f}%".format(
92 | self.avg_iou(all_boxes, result) * 100))
93 |
94 |
95 | if __name__ == "__main__":
96 | cluster_number = 9
97 | filename = 'output/result.dat'
98 | kmeans = YOLO_Kmeans(cluster_number, filename)
99 | kmeans.txt2clusters()
100 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # YOLOv3+SORT+DeepSort
2 |
3 | * Update 2020.7.16 增加deepsort,并作了大量调整
4 |
5 | # 介绍 Introduction
6 |
7 | YOLOV3及其训练的实现借鉴:[qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3)
8 |
9 | SORT的实现借鉴:[abewley/sort](https://github.com/abewley/sort)
10 |
11 | DeepSort的实现借鉴:[theAIGuysCode/yolov3_deepsort](https://github.com/theAIGuysCode/yolov3_deepsort)
12 |
13 | 参考文献:
14 |
15 | 1. [SIMPLE ONLINE AND REALTIME TRACKING](https://arxiv.org/pdf/1602.00763.pdf)
16 |
17 | 2. [SIMPLE ONLINE AND REALTIME TRACKING WITH A DEEP ASSOCIATION METRIC](https://arxiv.org/pdf/1703.07402.pdf)
18 |
19 | 演示视频:[SORT](https://www.bilibili.com/video/av56450343/)
20 | [DEEPSORT](https://www.bilibili.com/video/BV16A411e7ih/)
21 |
22 | ---
23 |
24 | # 搞快点 Quick Start
25 |
26 | 1. 打开`yolo_video.py`
27 |
28 | 2. 修改`DEFAULTS`(个人原因不太喜欢用`argparse`)
29 |
30 | ```
31 | DEFAULTS = {
32 | "model_path": './model_h5/yolo.h5',
33 | "anchors_path": './model_data/yolo_anchors.txt',
34 | "classes_path": './model_data/coco_classes.txt',
35 | "deepsort_model": './model_data/mars-small128.pb',
36 | "gpu_num": 1,
37 | "image": False, # 如果此处设置了True,"tracker"则被忽略
38 | "tracker": 'deepsort', # 此处根据需要为'sort'或'deepsort'
39 | "write_to_file": True,
40 | "input": './input/your_video.format',
41 | "output": './output/your_video.format',
42 | "output_path": './output/',
43 | "score": 0.4, # threshold
44 | "iou": 0.4, # threshold
45 | "repeat_iou": 0.95, # threshold
46 | }
47 | ```
48 |
49 | 3. 运行`yolo_video.py`,结果可在`"output_path"`中指定的文件夹查看
50 |
51 | ```
52 | python yolo_video.py
53 | ```
54 |
55 | 4. 如果想适用轻量级的YOLOv3模型,修改'"model_path"'和'"anchors_path"'即可
56 |
57 | *关于YOLOV3的内容,可以查看[YOLO WEBSITE](https://pjreddie.com/darknet/yolo/)
58 |
59 | *tiny-YOLOv3下载:[tiny-YOLOv3](https://pjreddie.com/media/files/yolov3-tiny.weights)
60 |
61 | *YOLOv3下载:[YOLOv3](https://pjreddie.com/media/files/yolov3.weights)
62 |
63 | *预训练的DeepSort网络:Google Drive: [DeepSort](https://drive.google.com/open?id=18fKzfqnqhqW3s9zwsCbnVJ5XF2JFeqMp), BaiduDisk: [DeepSort](https://pan.baidu.com/s/1B4xKXYWckM4TLIg6WGW6uw) pw:9i6p
64 |
65 | ---
66 |
67 | # 参数含义 Parameter
68 |
69 | ```
70 | model_path # h5文件路径
71 | anchors_path # anchor的路径
72 | classes_path # 存放识别对象类别的路径
73 | deepsort_model # DeepSort预训练权重存放路径
74 | gpu_num # gpu数
75 | image # 处理video(False)或处理图片(True)
76 | tracker # 是否使用追踪
77 | write_to_file # 是否写入到文件
78 | input # video的路径
79 | output # 输出video的路径
80 | output_path # 其他文件output的路径
81 | score # 分数低于该阈值的物体会被忽略
82 | iou # iou低于该阈值的物体会被忽略
83 | repeat_iou # 去除重复bounding box
84 | ```
85 |
86 | *写入到文件的格式为:
87 |
88 | ```
89 | , , , , , , , , ,
90 | ```
91 |
92 | ---
93 |
94 | # 训练自己的模型 Training
95 |
96 | 选取的图片从CIFAR-100 dataset中提取,由于主要研究对象是交通方面的,因此选取的物体种类主要围绕车辆和
97 | 人,详细分类见`model_data/cifar_classes.txt`
98 |
99 | CIFAR数据集可在此网站查看:[The CIFAR-10 and CIFAR-100](http://www.cs.toronto.edu/~kriz/cifar.html)
100 |
101 | CIFAR-100 dataset下载:[CIFAR-100 python version](http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz)
102 |
103 | 1. 可通过修改`read_data_cifar100.py`确定训练数据集的物体类别
104 |
105 | ```
106 | REMAIN = list(np.concatenate([[11, 35, 46, 98], [8, 13, 48, 58], [81, 85]]))
107 | ```
108 |
109 | 2. 运行train.py
110 |
111 | ```
112 | python train.py
113 | ```
114 |
115 | 可自行修改`epochs`,`batch_size`
116 |
117 | 3. 可先使用训练好的YOLOv3模型`yolo.h5`获取bounding box数据,再使用`kmeans_anchors.py`
118 | 计算获得anchors
119 |
120 | ---
121 |
122 | # TIPS
123 |
124 | 1. 环境 Environment
125 |
126 | * 主要依赖
127 |
128 | * python 3.6
129 | * Keras 2.3.1
130 | * tensorflow-gpu 1.13.0
131 | * numpy 1.17.0
132 |
133 | (较低版本貌似也支持)
134 |
135 | 3. 缺少`openh264-1.8.0-win64.dll`可能会发生未知错误,因此需要将此文件和`python yolo_video.py`放置在
136 | 同一目录下(貌似少了也没啥事)
137 |
138 | 4. DeepSort能解决短时遮挡问题,解决不了长时间object消失或被遮挡问题
139 |
140 | 5. **DEMO**上传至[百度云](https://pan.baidu.com/s/1VLKI8OGDbzsfqtzMe1amxg) PW: pb34
141 |
142 | 6. **MOT_DEMO** [Multiple Object Tracking Benchmark](https://motchallenge.net/data/MOT16/)
143 |
144 |
145 |
--------------------------------------------------------------------------------
/read_data_cifar100.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import numpy as np
3 | import os
4 | import matplotlib.pyplot as plt
5 |
6 | PATH = 'cifar-100-python/'
7 | REMOVE = list(range(0, 100))
8 | REMAIN = list(np.concatenate([[11, 35, 46, 98], [8, 13, 48, 58], [81, 85]]))
9 | for i in REMAIN:
10 | REMOVE.remove(i)
11 |
12 |
13 | def filter(image, label):
14 | # filter
15 | remove_index = []
16 | for index, element in enumerate(label):
17 | if int(element) in REMOVE:
18 | remove_index.append(index)
19 |
20 | label = np.delete(label, remove_index)
21 | image = np.delete(image, remove_index, 0)
22 |
23 | if not REMAIN == []:
24 | value = 0
25 | for index in REMAIN:
26 | label[label == np.int32(index)] = np.int32(value)
27 | value = value + 1
28 |
29 | return image, label
30 |
31 |
32 | def load_CIFAR_batch(filename, N, data_filter: bool):
33 | # 单个batch
34 | # load single batch of cifar
35 | with open(filename, 'rb') as f:
36 | datadict = pickle.load(f, encoding='latin1') # dict类型
37 | image = datadict['data'] # X, ndarray, 像素值
38 | label = datadict['fine_labels'] # Y, list, 标签, 分类
39 |
40 | # check the id of fine_labels relevant to the coarse_labels
41 | # label = np.array(label)
42 | # coarse = np.array(datadict['coarse_labels'])
43 | # print(np.unique(label[np.array(np.where(coarse == 19))[0]]))
44 |
45 | # reshape, 一维数组转为矩阵10000行3列。每个entries是32x32
46 | # transpose,转置
47 | # astype,复制,同时指定类型
48 | image = image.reshape(N, 3, 32, 32).transpose(0, 2, 3, 1).astype('float')
49 | label = np.array(label)
50 |
51 | if data_filter:
52 | image, label = filter(image, label)
53 |
54 | return image, label
55 |
56 |
57 | def load_CIFAR100(path, data_filter: bool):
58 | # 所有batch
59 | # load all of cifar
60 | images = [] # list
61 | labels = []
62 |
63 | # 训练集
64 | f = os.path.join(path, 'train')
65 | image, label = load_CIFAR_batch(f, 50000, data_filter)
66 | images.append(image)
67 | labels.append(label)
68 |
69 | images = np.concatenate(images) # [ndarray, ndarray] 合并为一个ndarray
70 | labels = np.concatenate(labels)
71 |
72 | # 测试集
73 | img_val, lab_val = load_CIFAR_batch(os.path.join(path, 'test'), 10000, data_filter)
74 | return images, labels, img_val, lab_val
75 |
76 |
77 | # 警告:使用该函数可能会导致内存溢出,可以适当修改减少扩充量
78 | # WARNING:Using this function may cause out of memory and OS breakdown
79 | def creat_more_data(images):
80 | # 通过旋转、翻转扩充数据 expand dataset through rotation and mirroring
81 | images_rot90 = []
82 | images_rot180 = []
83 | images_rot270 = []
84 | img_lr = []
85 | img_ud = []
86 |
87 | for index in range(0, images.shape[0]):
88 | band_1 = images[index, :, :, 0]
89 | band_2 = images[index, :, :, 1]
90 | band_3 = images[index, :, :, 2]
91 |
92 | # 旋转90, rotating 90 degrees
93 | band_1_rot90 = np.rot90(band_1)
94 | band_2_rot90 = np.rot90(band_2)
95 | band_3_rot90 = np.rot90(band_3)
96 | images_rot90.append(np.dstack((band_1_rot90, band_2_rot90, band_3_rot90)))
97 |
98 | # 180
99 | band_1_rot180 = np.rot90(band_1_rot90)
100 | band_2_rot180 = np.rot90(band_2_rot90)
101 | band_3_rot180 = np.rot90(band_3_rot90)
102 | images_rot180.append(np.dstack((band_1_rot180, band_2_rot180, band_3_rot180)))
103 |
104 | # 270
105 | band_1_rot270 = np.rot90(band_1_rot180)
106 | band_2_rot270 = np.rot90(band_2_rot180)
107 | band_3_rot270 = np.rot90(band_3_rot180)
108 | images_rot270.append(np.dstack((band_1_rot270, band_2_rot270, band_3_rot270)))
109 |
110 | # 左右翻转 flip horizontally
111 | lr1 = np.flip(band_1, 0)
112 | lr2 = np.flip(band_2, 0)
113 | lr3 = np.flip(band_3, 0)
114 | img_lr.append(np.dstack((lr1, lr2, lr3)))
115 |
116 | # 上下反转 flip vertical
117 | ud1 = np.flip(band_1, 1)
118 | ud2 = np.flip(band_2, 1)
119 | ud3 = np.flip(band_3, 1)
120 | img_ud.append(np.dstack((ud1, ud2, ud3)))
121 |
122 | rot90 = np.array(images_rot90)
123 | rot180 = np.array(images_rot180)
124 | rot270 = np.array(images_rot270)
125 | lr = np.array(img_lr)
126 | ud = np.array(img_ud)
127 |
128 | images = np.concatenate((rot90, rot180, rot270, lr, ud))
129 |
130 | return images
131 |
132 |
133 | def shuffle(images, labels):
134 | permutation = np.random.permutation(images.shape[0])
135 | shuffled_dataset = images[permutation, :, :, :]
136 | shuffled_labels = labels[permutation]
137 | return shuffled_dataset, shuffled_labels
138 |
139 |
140 | def data(path, more_data: bool, shuffle_data: bool, data_filter: bool):
141 | images, labels, img_val, lab_val = load_CIFAR100(path, data_filter)
142 |
143 | if more_data:
144 | # 扩充数据 expand dataset
145 | images = creat_more_data(np.array(images))
146 | # 扩充标签 expend labels
147 | labels = np.concatenate((labels, labels, labels, labels, labels, labels))
148 |
149 | if shuffle_data:
150 | images, labels = shuffle(images, labels)
151 | img_val, lab_val = shuffle(img_val, lab_val)
152 |
153 | return images, labels, img_val, lab_val
154 |
155 |
156 | def main():
157 | images, labels, img_val, lab_val = data(PATH, False, True, True)
158 | # test
159 | print(len(images))
160 | print(len(labels))
161 | plt.imshow(images[0] / 255)
162 | print(images[0])
163 | print(labels[0])
164 | plt.show()
165 |
166 |
167 | if __name__ == '__main__':
168 | main()
169 |
--------------------------------------------------------------------------------
/deepsort/tracker.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import numpy as np
3 | from . import kalman_filter
4 | from . import linear_assignment
5 | from . import iou_matching
6 | from .track import Track
7 |
8 |
9 | class Tracker:
10 | """
11 | This is the multi-target tracker.
12 |
13 | Parameters
14 | ----------
15 | metric : nn_matching.NearestNeighborDistanceMetric
16 | A distance metric for measurement-to-track association.
17 | max_age : int
18 | Maximum number of missed misses before a track is deleted.
19 | n_init : int
20 | Number of consecutive detections before the track is confirmed. The
21 | track state is set to `Deleted` if a miss occurs within the first
22 | `n_init` frames.
23 |
24 | Attributes
25 | ----------
26 | metric : nn_matching.NearestNeighborDistanceMetric
27 | The distance metric used for measurement to track association.
28 | max_age : int
29 | Maximum number of missed misses before a track is deleted.
30 | n_init : int
31 | Number of frames that a track remains in initialization phase.
32 | kf : kalman_filter.KalmanFilter
33 | A Kalman filter to filter target trajectories in image space.
34 | tracks : List[Track]
35 | The list of active tracks at the current time step.
36 |
37 | """
38 |
39 | def __init__(self, metric, max_iou_distance=0.7, max_age=30, n_init=3):
40 | self.metric = metric
41 | self.max_iou_distance = max_iou_distance
42 | self.max_age = max_age
43 | self.n_init = n_init
44 |
45 | self.kf = kalman_filter.KalmanFilter()
46 | self.tracks = []
47 | self._next_id = 1
48 |
49 | def predict(self):
50 | """Propagate track state distributions one time step forward.
51 |
52 | This function should be called once every time step, before `update`.
53 | """
54 | for track in self.tracks:
55 | track.predict(self.kf)
56 |
57 | def update(self, detections):
58 | """Perform measurement update and track management.
59 |
60 | Parameters
61 | ----------
62 | detections : List[deep_sort.detection.Detection]
63 | A list of detections at the current time step.
64 |
65 | """
66 | # Run matching cascade.
67 | matches, unmatched_tracks, unmatched_detections = \
68 | self._match(detections)
69 |
70 | # Update track set.
71 | for track_idx, detection_idx in matches:
72 | self.tracks[track_idx].update(
73 | self.kf, detections[detection_idx])
74 | for track_idx in unmatched_tracks:
75 | self.tracks[track_idx].mark_missed()
76 | for detection_idx in unmatched_detections:
77 | self._initiate_track(detections[detection_idx])
78 | self.tracks = [t for t in self.tracks if not t.is_deleted()]
79 |
80 | # Update distance metric.
81 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
82 | features, targets = [], []
83 | for track in self.tracks:
84 | if not track.is_confirmed():
85 | continue
86 | features += track.features
87 | targets += [track.track_id for _ in track.features]
88 | track.features = []
89 | self.metric.partial_fit(
90 | np.asarray(features), np.asarray(targets), active_targets)
91 |
92 | def _match(self, detections):
93 |
94 | def gated_metric(tracks, dets, track_indices, detection_indices):
95 | features = np.array([dets[i].feature for i in detection_indices])
96 | targets = np.array([tracks[i].track_id for i in track_indices])
97 | cost_matrix = self.metric.distance(features, targets)
98 | cost_matrix = linear_assignment.gate_cost_matrix(
99 | self.kf, cost_matrix, tracks, dets, track_indices,
100 | detection_indices)
101 |
102 | return cost_matrix
103 |
104 | # Split track set into confirmed and unconfirmed tracks.
105 | confirmed_tracks = [
106 | i for i, t in enumerate(self.tracks) if t.is_confirmed()]
107 | unconfirmed_tracks = [
108 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
109 |
110 | # Associate confirmed tracks using appearance features.
111 | matches_a, unmatched_tracks_a, unmatched_detections = \
112 | linear_assignment.matching_cascade(
113 | gated_metric, self.metric.matching_threshold, self.max_age,
114 | self.tracks, detections, confirmed_tracks)
115 |
116 | # Associate remaining tracks together with unconfirmed tracks using IOU.
117 | iou_track_candidates = unconfirmed_tracks + [
118 | k for k in unmatched_tracks_a if
119 | self.tracks[k].time_since_update == 1]
120 | unmatched_tracks_a = [
121 | k for k in unmatched_tracks_a if
122 | self.tracks[k].time_since_update != 1]
123 | matches_b, unmatched_tracks_b, unmatched_detections = \
124 | linear_assignment.min_cost_matching(
125 | iou_matching.iou_cost, self.max_iou_distance, self.tracks,
126 | detections, iou_track_candidates, unmatched_detections)
127 |
128 | matches = matches_a + matches_b
129 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
130 | return matches, unmatched_tracks, unmatched_detections
131 |
132 | def _initiate_track(self, detection):
133 | mean, covariance = self.kf.initiate(detection.to_xyah())
134 | class_name = detection.get_class()
135 | self.tracks.append(Track(
136 | mean, covariance, self._next_id, self.n_init, self.max_age,
137 | detection.feature, class_name, detection.confidence))
138 | self._next_id += 1
139 |
--------------------------------------------------------------------------------
/deepsort/track.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | class TrackState:
4 | """
5 | Enumeration type for the single target track state. Newly created tracks are
6 | classified as `tentative` until enough evidence has been collected. Then,
7 | the track state is changed to `confirmed`. Tracks that are no longer alive
8 | are classified as `deleted` to mark them for removal from the set of active
9 | tracks.
10 |
11 | """
12 |
13 | Tentative = 1
14 | Confirmed = 2
15 | Deleted = 3
16 |
17 |
18 | class Track:
19 | """
20 | A single target track with state space `(x, y, a, h)` and associated
21 | velocities, where `(x, y)` is the center of the bounding box, `a` is the
22 | aspect ratio and `h` is the height.
23 |
24 | Parameters
25 | ----------
26 | mean : ndarray
27 | Mean vector of the initial state distribution.
28 | covariance : ndarray
29 | Covariance matrix of the initial state distribution.
30 | track_id : int
31 | A unique track identifier.
32 | n_init : int
33 | Number of consecutive detections before the track is confirmed. The
34 | track state is set to `Deleted` if a miss occurs within the first
35 | `n_init` frames.
36 | max_age : int
37 | The maximum number of consecutive misses before the track state is
38 | set to `Deleted`.
39 | feature : Optional[ndarray]
40 | Feature vector of the detection this track originates from. If not None,
41 | this feature is added to the `features` cache.
42 |
43 | Attributes
44 | ----------
45 | mean : ndarray
46 | Mean vector of the initial state distribution.
47 | covariance : ndarray
48 | Covariance matrix of the initial state distribution.
49 | track_id : int
50 | A unique track identifier.
51 | hits : int
52 | Total number of measurement updates.
53 | age : int
54 | Total number of frames since first occurance.
55 | time_since_update : int
56 | Total number of frames since last measurement update.
57 | state : TrackState
58 | The current track state.
59 | features : List[ndarray]
60 | A cache of features. On each measurement update, the associated feature
61 | vector is added to this list.
62 |
63 | """
64 |
65 | def __init__(self, mean, covariance, track_id, n_init, max_age,
66 | feature=None, class_name=None, confidence=None):
67 | self.mean = mean
68 | self.covariance = covariance
69 | self.track_id = track_id
70 | self.hits = 1
71 | self.age = 1
72 | self.time_since_update = 0
73 |
74 | self.state = TrackState.Tentative
75 | self.features = []
76 | if feature is not None:
77 | self.features.append(feature)
78 |
79 | self._n_init = n_init
80 | self._max_age = max_age
81 | self.class_name = class_name
82 | self.confidence = confidence
83 |
84 | def to_tlwh(self):
85 | """Get current position in bounding box format `(top left x, top left y,
86 | width, height)`.
87 |
88 | Returns
89 | -------
90 | ndarray
91 | The bounding box.
92 |
93 | """
94 | ret = self.mean[:4].copy()
95 | ret[2] *= ret[3]
96 | ret[:2] -= ret[2:] / 2
97 | return ret
98 |
99 | def to_tlbr(self):
100 | """Get current position in bounding box format `(min x, miny, max x,
101 | max y)`.
102 |
103 | Returns
104 | -------
105 | ndarray
106 | The bounding box.
107 |
108 | """
109 | ret = self.to_tlwh()
110 | ret[2:] = ret[:2] + ret[2:]
111 | return ret
112 |
113 | def get_class(self):
114 | return self.class_name
115 |
116 | def get_score(self):
117 | return self.confidence
118 |
119 | def predict(self, kf):
120 | """Propagate the state distribution to the current time step using a
121 | Kalman filter prediction step.
122 |
123 | Parameters
124 | ----------
125 | kf : kalman_filter.KalmanFilter
126 | The Kalman filter.
127 |
128 | """
129 | self.mean, self.covariance = kf.predict(self.mean, self.covariance)
130 | self.age += 1
131 | self.time_since_update += 1
132 |
133 | def update(self, kf, detection):
134 | """Perform Kalman filter measurement update step and update the feature
135 | cache.
136 |
137 | Parameters
138 | ----------
139 | kf : kalman_filter.KalmanFilter
140 | The Kalman filter.
141 | detection : Detection
142 | The associated detection.
143 |
144 | """
145 | self.mean, self.covariance = kf.update(
146 | self.mean, self.covariance, detection.to_xyah())
147 | self.features.append(detection.feature)
148 |
149 | self.hits += 1
150 | self.time_since_update = 0
151 | if self.state == TrackState.Tentative and self.hits >= self._n_init:
152 | self.state = TrackState.Confirmed
153 |
154 | def mark_missed(self):
155 | """Mark this track as missed (no association at the current time step).
156 | """
157 | if self.state == TrackState.Tentative:
158 | self.state = TrackState.Deleted
159 | elif self.time_since_update > self._max_age:
160 | self.state = TrackState.Deleted
161 |
162 | def is_tentative(self):
163 | """Returns True if this track is tentative (unconfirmed).
164 | """
165 | return self.state == TrackState.Tentative
166 |
167 | def is_confirmed(self):
168 | """Returns True if this track is confirmed."""
169 | return self.state == TrackState.Confirmed
170 |
171 | def is_deleted(self):
172 | """Returns True if this track is dead and should be deleted."""
173 | return self.state == TrackState.Deleted
174 |
--------------------------------------------------------------------------------
/yolo3/utils.py:
--------------------------------------------------------------------------------
1 | """Miscellaneous utility functions."""
2 |
3 | from functools import reduce
4 |
5 | from PIL import Image
6 | import numpy as np
7 | from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
8 |
9 | from sort import iou
10 |
11 |
12 | def compose(*funcs):
13 | """Compose arbitrarily many functions, evaluated left to right.
14 |
15 | Reference: https://mathieularose.com/function-composition-in-python/
16 | """
17 | # return lambda x: reduce(lambda v, f: f(v), funcs, x)
18 | if funcs:
19 | return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
20 | else:
21 | raise ValueError('Composition of empty sequence not supported.')
22 |
23 |
24 | def letterbox_image(image, size):
25 | '''resize image with unchanged aspect ratio using padding'''
26 | iw, ih = image.size
27 | w, h = size
28 | scale = min(w / iw, h / ih)
29 | nw = int(iw * scale)
30 | nh = int(ih * scale)
31 |
32 | image = image.resize((nw, nh), Image.BICUBIC)
33 | new_image = Image.new('RGB', size, (128, 128, 128))
34 | new_image.paste(image, ((w - nw) // 2, (h - nh) // 2))
35 | return new_image
36 |
37 |
38 | def rand(a=0, b=1):
39 | return np.random.rand() * (b - a) + a
40 |
41 |
42 | def get_random_data(annotation_line, input_shape, random=True, max_boxes=20, jitter=.3, hue=.1, sat=1.5, val=1.5,
43 | proc_img=True):
44 | '''random preprocessing for real-time data augmentation'''
45 | line = annotation_line.split()
46 | image = Image.open(line[0])
47 | iw, ih = image.size
48 | h, w = input_shape
49 | box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])
50 |
51 | if not random:
52 | # resize image
53 | scale = min(w / iw, h / ih)
54 | nw = int(iw * scale)
55 | nh = int(ih * scale)
56 | dx = (w - nw) // 2
57 | dy = (h - nh) // 2
58 | image_data = 0
59 | if proc_img:
60 | image = image.resize((nw, nh), Image.BICUBIC)
61 | new_image = Image.new('RGB', (w, h), (128, 128, 128))
62 | new_image.paste(image, (dx, dy))
63 | image_data = np.array(new_image) / 255.
64 |
65 | # correct boxes
66 | box_data = np.zeros((max_boxes, 5))
67 | if len(box) > 0:
68 | np.random.shuffle(box)
69 | if len(box) > max_boxes: box = box[:max_boxes]
70 | box[:, [0, 2]] = box[:, [0, 2]] * scale + dx
71 | box[:, [1, 3]] = box[:, [1, 3]] * scale + dy
72 | box_data[:len(box)] = box
73 |
74 | return image_data, box_data
75 |
76 | # resize image
77 | new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
78 | scale = rand(.25, 2)
79 | if new_ar < 1:
80 | nh = int(scale * h)
81 | nw = int(nh * new_ar)
82 | else:
83 | nw = int(scale * w)
84 | nh = int(nw / new_ar)
85 | image = image.resize((nw, nh), Image.BICUBIC)
86 |
87 | # place image
88 | dx = int(rand(0, w - nw))
89 | dy = int(rand(0, h - nh))
90 | new_image = Image.new('RGB', (w, h), (128, 128, 128))
91 | new_image.paste(image, (dx, dy))
92 | image = new_image
93 |
94 | # flip image or not
95 | flip = rand() < .5
96 | if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
97 |
98 | # distort image
99 | hue = rand(-hue, hue)
100 | sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
101 | val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
102 | x = rgb_to_hsv(np.array(image) / 255.)
103 | x[..., 0] += hue
104 | x[..., 0][x[..., 0] > 1] -= 1
105 | x[..., 0][x[..., 0] < 0] += 1
106 | x[..., 1] *= sat
107 | x[..., 2] *= val
108 | x[x > 1] = 1
109 | x[x < 0] = 0
110 | image_data = hsv_to_rgb(x) # numpy array, 0 to 1
111 |
112 | # correct boxes
113 | box_data = np.zeros((max_boxes, 5))
114 | if len(box) > 0:
115 | np.random.shuffle(box)
116 | box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
117 | box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
118 | if flip: box[:, [0, 2]] = w - box[:, [2, 0]]
119 | box[:, 0:2][box[:, 0:2] < 0] = 0
120 | box[:, 2][box[:, 2] > w] = w
121 | box[:, 3][box[:, 3] > h] = h
122 | box_w = box[:, 2] - box[:, 0]
123 | box_h = box[:, 3] - box[:, 1]
124 | box = box[np.logical_and(box_w > 1, box_h > 1)] # discard invalid box
125 | if len(box) > max_boxes: box = box[:max_boxes]
126 | box_data[:len(box)] = box
127 |
128 | return image_data, box_data
129 |
130 |
131 | def delete_repeat_bbox(out_boxes, out_scores, out_classes, iou_threshold):
132 | '''Delete the same bboxes marked as different classes'''
133 | to_del = []
134 | for i in range(0, len(out_classes) - 1):
135 | for j in range(i + 1, len(out_classes)):
136 | if (i not in to_del) and (j not in to_del):
137 | # bounding box 1
138 | y1_1, x1_1, y2_1, x2_1 = out_boxes[i]
139 | # bounding box 2
140 | y1_2, x1_2, y2_2, x2_2 = out_boxes[j]
141 | if iou([x1_1, y1_1, x2_1, y2_1], [x1_2, y1_2, x2_2, y2_2]) >= iou_threshold:
142 | if out_scores[i] >= out_scores[j]:
143 | to_del.append(j)
144 | else:
145 | to_del.append(i)
146 |
147 | to_del = sorted(to_del)
148 |
149 | for t in reversed(to_del):
150 | out_boxes.pop(t)
151 | out_scores.pop(t)
152 | out_classes.pop(t)
153 |
154 | return np.array(out_boxes), np.array(out_scores), np.array(out_classes)
155 |
156 |
157 | # boxes: np.array
158 | def convert_boxes(boxes):
159 | # [x1, y1, x2, y2] ->
160 | returned_boxes = []
161 | for box in boxes:
162 | box = box.astype(int)
163 | box[2] = int(box[2]-box[0]) # width
164 | box[3] = int(box[3]-box[1]) # height
165 | box = box.astype(int)
166 | box = box.tolist()
167 | if box != [0, 0, 0, 0]:
168 | returned_boxes.append(box)
169 | return returned_boxes
--------------------------------------------------------------------------------
/deepsort/nn_matching.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def _pdist(a, b):
5 | """Compute pair-wise squared distance between points in `a` and `b`.
6 |
7 | Parameters
8 | ----------
9 | a : array_like
10 | An NxM matrix of N samples of dimensionality M.
11 | b : array_like
12 | An LxM matrix of L samples of dimensionality M.
13 |
14 | Returns
15 | -------
16 | ndarray
17 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
18 | contains the squared distance between `a[i]` and `b[j]`.
19 |
20 | """
21 | a, b = np.asarray(a), np.asarray(b)
22 | if len(a) == 0 or len(b) == 0:
23 | return np.zeros((len(a), len(b)))
24 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
25 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
26 | r2 = np.clip(r2, 0., float(np.inf))
27 | return r2
28 |
29 |
30 | def _cosine_distance(a, b, data_is_normalized=False):
31 | """Compute pair-wise cosine distance between points in `a` and `b`.
32 |
33 | Parameters
34 | ----------
35 | a : array_like
36 | An NxM matrix of N samples of dimensionality M.
37 | b : array_like
38 | An LxM matrix of L samples of dimensionality M.
39 | data_is_normalized : Optional[bool]
40 | If True, assumes rows in a and b are unit length vectors.
41 | Otherwise, a and b are explicitly normalized to lenght 1.
42 |
43 | Returns
44 | -------
45 | ndarray
46 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
47 | contains the squared distance between `a[i]` and `b[j]`.
48 |
49 | """
50 | if not data_is_normalized:
51 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
52 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
53 | return 1. - np.dot(a, b.T)
54 |
55 |
56 | def _nn_euclidean_distance(x, y):
57 | """ Helper function for nearest neighbor distance metric (Euclidean).
58 |
59 | Parameters
60 | ----------
61 | x : ndarray
62 | A matrix of N row-vectors (sample points).
63 | y : ndarray
64 | A matrix of M row-vectors (query points).
65 |
66 | Returns
67 | -------
68 | ndarray
69 | A vector of length M that contains for each entry in `y` the
70 | smallest Euclidean distance to a sample in `x`.
71 |
72 | """
73 | distances = _pdist(x, y)
74 | return np.maximum(0.0, distances.min(axis=0))
75 |
76 |
77 | def _nn_cosine_distance(x, y):
78 | """ Helper function for nearest neighbor distance metric (cosine).
79 |
80 | Parameters
81 | ----------
82 | x : ndarray
83 | A matrix of N row-vectors (sample points).
84 | y : ndarray
85 | A matrix of M row-vectors (query points).
86 |
87 | Returns
88 | -------
89 | ndarray
90 | A vector of length M that contains for each entry in `y` the
91 | smallest cosine distance to a sample in `x`.
92 |
93 | """
94 | distances = _cosine_distance(x, y)
95 | return distances.min(axis=0)
96 |
97 |
98 | class NearestNeighborDistanceMetric(object):
99 | """
100 | A nearest neighbor distance metric that, for each target, returns
101 | the closest distance to any sample that has been observed so far.
102 |
103 | Parameters
104 | ----------
105 | metric : str
106 | Either "euclidean" or "cosine".
107 | matching_threshold: float
108 | The matching threshold. Samples with larger distance are considered an
109 | invalid match.
110 | budget : Optional[int]
111 | If not None, fix samples per class to at most this number. Removes
112 | the oldest samples when the budget is reached.
113 |
114 | Attributes
115 | ----------
116 | samples : Dict[int -> List[ndarray]]
117 | A dictionary that maps from target identities to the list of samples
118 | that have been observed so far.
119 |
120 | """
121 |
122 | def __init__(self, metric, matching_threshold, budget=None):
123 |
124 |
125 | if metric == "euclidean":
126 | self._metric = _nn_euclidean_distance
127 | elif metric == "cosine":
128 | self._metric = _nn_cosine_distance
129 | else:
130 | raise ValueError(
131 | "Invalid metric; must be either 'euclidean' or 'cosine'")
132 | self.matching_threshold = matching_threshold
133 | self.budget = budget
134 | self.samples = {}
135 |
136 | def partial_fit(self, features, targets, active_targets):
137 | """Update the distance metric with new data.
138 |
139 | Parameters
140 | ----------
141 | features : ndarray
142 | An NxM matrix of N features of dimensionality M.
143 | targets : ndarray
144 | An integer array of associated target identities.
145 | active_targets : List[int]
146 | A list of targets that are currently present in the scene.
147 |
148 | """
149 | for feature, target in zip(features, targets):
150 | self.samples.setdefault(target, []).append(feature)
151 | if self.budget is not None:
152 | self.samples[target] = self.samples[target][-self.budget:]
153 | self.samples = {k: self.samples[k] for k in active_targets}
154 |
155 | def distance(self, features, targets):
156 | """Compute distance between features and targets.
157 |
158 | Parameters
159 | ----------
160 | features : ndarray
161 | An NxM matrix of N features of dimensionality M.
162 | targets : List[int]
163 | A list of targets to match the given `features` against.
164 |
165 | Returns
166 | -------
167 | ndarray
168 | Returns a cost matrix of shape len(targets), len(features), where
169 | element (i, j) contains the closest squared distance between
170 | `targets[i]` and `features[j]`.
171 |
172 | """
173 | cost_matrix = np.zeros((len(targets), len(features)))
174 | for i, target in enumerate(targets):
175 | cost_matrix[i, :] = self._metric(self.samples[target], features)
176 | return cost_matrix
177 |
--------------------------------------------------------------------------------
/yolo3/generate_detections.py:
--------------------------------------------------------------------------------
1 | import os
2 | import errno
3 | import argparse
4 | import numpy as np
5 | import cv2
6 | import tensorflow.compat.v1 as tf
7 |
8 |
9 | def _run_in_batches(f, data_dict, out, batch_size):
10 | data_len = len(out)
11 | num_batches = int(data_len / batch_size)
12 |
13 | s, e = 0, 0
14 | for i in range(num_batches):
15 | s, e = i * batch_size, (i + 1) * batch_size
16 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
17 | out[s:e] = f(batch_data_dict)
18 | if e < len(out):
19 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
20 | out[e:] = f(batch_data_dict)
21 |
22 |
23 | def extract_image_patch(image, bbox, patch_shape):
24 | """Extract image patch from bounding box.
25 | Parameters
26 | ----------
27 | image : ndarray
28 | The full image.
29 | bbox : array_like
30 | The bounding box in format (x, y, width, height).
31 | patch_shape : Optional[array_like]
32 | This parameter can be used to enforce a desired patch shape
33 | (height, width). First, the `bbox` is adapted to the aspect ratio
34 | of the patch shape, then it is clipped at the image boundaries.
35 | If None, the shape is computed from :arg:`bbox`.
36 | Returns
37 | -------
38 | ndarray | NoneType
39 | An image patch showing the :arg:`bbox`, optionally reshaped to
40 | :arg:`patch_shape`.
41 | Returns None if the bounding box is empty or fully outside of the image
42 | boundaries.
43 | """
44 | bbox = np.array(bbox)
45 | if patch_shape is not None:
46 | # correct aspect ratio to patch shape
47 | target_aspect = float(patch_shape[1]) / patch_shape[0]
48 | new_width = target_aspect * bbox[3]
49 | bbox[0] -= (new_width - bbox[2]) / 2
50 | bbox[2] = new_width
51 |
52 | # convert to top left, bottom right
53 | bbox[2:] += bbox[:2]
54 | bbox = bbox.astype(np.int)
55 |
56 | # clip at image boundaries
57 | bbox[:2] = np.maximum(0, bbox[:2])
58 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
59 | if np.any(bbox[:2] >= bbox[2:]):
60 | return None
61 | sx, sy, ex, ey = bbox
62 | image = image[sy:ey, sx:ex]
63 | image = cv2.resize(image, tuple(patch_shape[::-1]))
64 | return image
65 |
66 |
67 | class ImageEncoder(object):
68 |
69 | def __init__(self, checkpoint_filename, input_name="images",
70 | output_name="features"):
71 | self.session = tf.Session()
72 | with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
73 | graph_def = tf.GraphDef()
74 | graph_def.ParseFromString(file_handle.read())
75 | tf.import_graph_def(graph_def, name="net")
76 | self.input_var = tf.get_default_graph().get_tensor_by_name(
77 | "net/%s:0" % input_name)
78 | self.output_var = tf.get_default_graph().get_tensor_by_name(
79 | "net/%s:0" % output_name)
80 |
81 | assert len(self.output_var.get_shape()) == 2
82 | assert len(self.input_var.get_shape()) == 4
83 | self.feature_dim = self.output_var.get_shape().as_list()[-1]
84 | self.image_shape = self.input_var.get_shape().as_list()[1:]
85 |
86 | def __call__(self, data_x, batch_size=32):
87 | out = np.zeros((len(data_x), self.feature_dim), np.float32)
88 | _run_in_batches(
89 | lambda x: self.session.run(self.output_var, feed_dict=x),
90 | {self.input_var: data_x}, out, batch_size)
91 | return out
92 |
93 |
94 | def create_box_encoder(model_filename, input_name="images",
95 | output_name="features", batch_size=32):
96 | image_encoder = ImageEncoder(model_filename, input_name, output_name)
97 | image_shape = image_encoder.image_shape
98 |
99 | def encoder(image, boxes):
100 | image_patches = []
101 | for box in boxes:
102 | patch = extract_image_patch(image, box, image_shape[:2])
103 | if patch is None:
104 | print("WARNING: Failed to extract image patch: %s." % str(box))
105 | patch = np.random.uniform(
106 | 0., 255., image_shape).astype(np.uint8)
107 | image_patches.append(patch)
108 | image_patches = np.asarray(image_patches)
109 | return image_encoder(image_patches, batch_size)
110 |
111 | return encoder
112 |
113 |
114 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
115 | """Generate detections with features.
116 | Parameters
117 | ----------
118 | encoder : Callable[image, ndarray] -> ndarray
119 | The encoder function takes as input a BGR color image and a matrix of
120 | bounding boxes in format `(x, y, w, h)` and returns a matrix of
121 | corresponding feature vectors.
122 | mot_dir : str
123 | Path to the MOTChallenge directory (can be either train or test).
124 | output_dir
125 | Path to the output directory. Will be created if it does not exist.
126 | detection_dir
127 | Path to custom detections. The directory structure should be the default
128 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
129 | standard MOTChallenge detections.
130 | """
131 | if detection_dir is None:
132 | detection_dir = mot_dir
133 | try:
134 | os.makedirs(output_dir)
135 | except OSError as exception:
136 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
137 | pass
138 | else:
139 | raise ValueError(
140 | "Failed to created output directory '%s'" % output_dir)
141 |
142 | for sequence in os.listdir(mot_dir):
143 | print("Processing %s" % sequence)
144 | sequence_dir = os.path.join(mot_dir, sequence)
145 |
146 | image_dir = os.path.join(sequence_dir, "img1")
147 | image_filenames = {
148 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
149 | for f in os.listdir(image_dir)}
150 |
151 | detection_file = os.path.join(
152 | detection_dir, sequence, "det/det.txt")
153 | detections_in = np.loadtxt(detection_file, delimiter=',')
154 | detections_out = []
155 |
156 | frame_indices = detections_in[:, 0].astype(np.int)
157 | min_frame_idx = frame_indices.astype(np.int).min()
158 | max_frame_idx = frame_indices.astype(np.int).max()
159 | for frame_idx in range(min_frame_idx, max_frame_idx + 1):
160 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
161 | mask = frame_indices == frame_idx
162 | rows = detections_in[mask]
163 |
164 | if frame_idx not in image_filenames:
165 | print("WARNING could not find image for frame %d" % frame_idx)
166 | continue
167 | bgr_image = cv2.imread(
168 | image_filenames[frame_idx], cv2.IMREAD_COLOR)
169 | features = encoder(bgr_image, rows[:, 2:6].copy())
170 | detections_out += [np.r_[(row, feature)] for row, feature
171 | in zip(rows, features)]
172 |
173 | output_filename = os.path.join(output_dir, "%s.npy" % sequence)
174 | np.save(
175 | output_filename, np.asarray(detections_out), allow_pickle=False)
--------------------------------------------------------------------------------
/deepsort/linear_assignment.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | import numpy as np
3 | from scipy.optimize import linear_sum_assignment
4 | from . import kalman_filter
5 |
6 |
7 | INFTY_COST = 1e+5
8 |
9 |
10 | def min_cost_matching(
11 | distance_metric, max_distance, tracks, detections, track_indices=None,
12 | detection_indices=None):
13 | """Solve linear assignment problem.
14 |
15 | Parameters
16 | ----------
17 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
18 | The distance metric is given a list of tracks and detections as well as
19 | a list of N track indices and M detection indices. The metric should
20 | return the NxM dimensional cost matrix, where element (i, j) is the
21 | association cost between the i-th track in the given track indices and
22 | the j-th detection in the given detection_indices.
23 | max_distance : float
24 | Gating threshold. Associations with cost larger than this value are
25 | disregarded.
26 | tracks : List[track.Track]
27 | A list of predicted tracks at the current time step.
28 | detections : List[detection.Detection]
29 | A list of detections at the current time step.
30 | track_indices : List[int]
31 | List of track indices that maps rows in `cost_matrix` to tracks in
32 | `tracks` (see description above).
33 | detection_indices : List[int]
34 | List of detection indices that maps columns in `cost_matrix` to
35 | detections in `detections` (see description above).
36 |
37 | Returns
38 | -------
39 | (List[(int, int)], List[int], List[int])
40 | Returns a tuple with the following three entries:
41 | * A list of matched track and detection indices.
42 | * A list of unmatched track indices.
43 | * A list of unmatched detection indices.
44 |
45 | """
46 | if track_indices is None:
47 | track_indices = np.arange(len(tracks))
48 | if detection_indices is None:
49 | detection_indices = np.arange(len(detections))
50 |
51 | if len(detection_indices) == 0 or len(track_indices) == 0:
52 | return [], track_indices, detection_indices # Nothing to match.
53 |
54 | cost_matrix = distance_metric(
55 | tracks, detections, track_indices, detection_indices)
56 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
57 | indices = linear_sum_assignment(cost_matrix)
58 | indices = np.asarray(indices)
59 | indices = np.transpose(indices)
60 | matches, unmatched_tracks, unmatched_detections = [], [], []
61 | for col, detection_idx in enumerate(detection_indices):
62 | if col not in indices[:, 1]:
63 | unmatched_detections.append(detection_idx)
64 | for row, track_idx in enumerate(track_indices):
65 | if row not in indices[:, 0]:
66 | unmatched_tracks.append(track_idx)
67 | for row, col in indices:
68 | track_idx = track_indices[row]
69 | detection_idx = detection_indices[col]
70 | if cost_matrix[row, col] > max_distance:
71 | unmatched_tracks.append(track_idx)
72 | unmatched_detections.append(detection_idx)
73 | else:
74 | matches.append((track_idx, detection_idx))
75 | return matches, unmatched_tracks, unmatched_detections
76 |
77 |
78 | def matching_cascade(
79 | distance_metric, max_distance, cascade_depth, tracks, detections,
80 | track_indices=None, detection_indices=None):
81 | """Run matching cascade.
82 |
83 | Parameters
84 | ----------
85 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
86 | The distance metric is given a list of tracks and detections as well as
87 | a list of N track indices and M detection indices. The metric should
88 | return the NxM dimensional cost matrix, where element (i, j) is the
89 | association cost between the i-th track in the given track indices and
90 | the j-th detection in the given detection indices.
91 | max_distance : float
92 | Gating threshold. Associations with cost larger than this value are
93 | disregarded.
94 | cascade_depth: int
95 | The cascade depth, should be se to the maximum track age.
96 | tracks : List[track.Track]
97 | A list of predicted tracks at the current time step.
98 | detections : List[detection.Detection]
99 | A list of detections at the current time step.
100 | track_indices : Optional[List[int]]
101 | List of track indices that maps rows in `cost_matrix` to tracks in
102 | `tracks` (see description above). Defaults to all tracks.
103 | detection_indices : Optional[List[int]]
104 | List of detection indices that maps columns in `cost_matrix` to
105 | detections in `detections` (see description above). Defaults to all
106 | detections.
107 |
108 | Returns
109 | -------
110 | (List[(int, int)], List[int], List[int])
111 | Returns a tuple with the following three entries:
112 | * A list of matched track and detection indices.
113 | * A list of unmatched track indices.
114 | * A list of unmatched detection indices.
115 |
116 | """
117 | if track_indices is None:
118 | track_indices = list(range(len(tracks)))
119 | if detection_indices is None:
120 | detection_indices = list(range(len(detections)))
121 |
122 | unmatched_detections = detection_indices
123 | matches = []
124 | for level in range(cascade_depth):
125 | if len(unmatched_detections) == 0: # No detections left
126 | break
127 |
128 | track_indices_l = [
129 | k for k in track_indices
130 | if tracks[k].time_since_update == 1 + level
131 | ]
132 | if len(track_indices_l) == 0: # Nothing to match at this level
133 | continue
134 |
135 | matches_l, _, unmatched_detections = \
136 | min_cost_matching(
137 | distance_metric, max_distance, tracks, detections,
138 | track_indices_l, unmatched_detections)
139 | matches += matches_l
140 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
141 | return matches, unmatched_tracks, unmatched_detections
142 |
143 |
144 | def gate_cost_matrix(
145 | kf, cost_matrix, tracks, detections, track_indices, detection_indices,
146 | gated_cost=INFTY_COST, only_position=False):
147 | """Invalidate infeasible entries in cost matrix based on the state
148 | distributions obtained by Kalman filtering.
149 |
150 | Parameters
151 | ----------
152 | kf : The Kalman filter.
153 | cost_matrix : ndarray
154 | The NxM dimensional cost matrix, where N is the number of track indices
155 | and M is the number of detection indices, such that entry (i, j) is the
156 | association cost between `tracks[track_indices[i]]` and
157 | `detections[detection_indices[j]]`.
158 | tracks : List[track.Track]
159 | A list of predicted tracks at the current time step.
160 | detections : List[detection.Detection]
161 | A list of detections at the current time step.
162 | track_indices : List[int]
163 | List of track indices that maps rows in `cost_matrix` to tracks in
164 | `tracks` (see description above).
165 | detection_indices : List[int]
166 | List of detection indices that maps columns in `cost_matrix` to
167 | detections in `detections` (see description above).
168 | gated_cost : Optional[float]
169 | Entries in the cost matrix corresponding to infeasible associations are
170 | set this value. Defaults to a very large value.
171 | only_position : Optional[bool]
172 | If True, only the x, y position of the state distribution is considered
173 | during gating. Defaults to False.
174 |
175 | Returns
176 | -------
177 | ndarray
178 | Returns the modified cost matrix.
179 |
180 | """
181 | gating_dim = 2 if only_position else 4
182 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
183 | measurements = np.asarray(
184 | [detections[i].to_xyah() for i in detection_indices])
185 | for row, track_idx in enumerate(track_indices):
186 | track = tracks[track_idx]
187 | gating_distance = kf.gating_distance(
188 | track.mean, track.covariance, measurements, only_position)
189 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost
190 | return cost_matrix
191 |
--------------------------------------------------------------------------------
/deepsort/kalman_filter.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.linalg
3 |
4 |
5 | """
6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
8 | function and used as Mahalanobis gating threshold.
9 | """
10 | chi2inv95 = {
11 | 1: 3.8415,
12 | 2: 5.9915,
13 | 3: 7.8147,
14 | 4: 9.4877,
15 | 5: 11.070,
16 | 6: 12.592,
17 | 7: 14.067,
18 | 8: 15.507,
19 | 9: 16.919}
20 |
21 |
22 | class KalmanFilter(object):
23 | """
24 | A simple Kalman filter for tracking bounding boxes in image space.
25 |
26 | The 8-dimensional state space
27 |
28 | x, y, a, h, vx, vy, va, vh
29 |
30 | contains the bounding box center position (x, y), aspect ratio a, height h,
31 | and their respective velocities.
32 |
33 | Object motion follows a constant velocity model. The bounding box location
34 | (x, y, a, h) is taken as direct observation of the state space (linear
35 | observation model).
36 |
37 | """
38 |
39 | def __init__(self):
40 | ndim, dt = 4, 1.
41 |
42 | # Create Kalman filter model matrices.
43 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
44 | for i in range(ndim):
45 | self._motion_mat[i, ndim + i] = dt
46 | self._update_mat = np.eye(ndim, 2 * ndim)
47 |
48 | # Motion and observation uncertainty are chosen relative to the current
49 | # state estimate. These weights control the amount of uncertainty in
50 | # the model. This is a bit hacky.
51 | self._std_weight_position = 1. / 20
52 | self._std_weight_velocity = 1. / 160
53 |
54 | def initiate(self, measurement):
55 | """Create track from unassociated measurement.
56 |
57 | Parameters
58 | ----------
59 | measurement : ndarray
60 | Bounding box coordinates (x, y, a, h) with center position (x, y),
61 | aspect ratio a, and height h.
62 |
63 | Returns
64 | -------
65 | (ndarray, ndarray)
66 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
67 | dimensional) of the new track. Unobserved velocities are initialized
68 | to 0 mean.
69 |
70 | """
71 | mean_pos = measurement
72 | mean_vel = np.zeros_like(mean_pos)
73 | mean = np.r_[mean_pos, mean_vel]
74 |
75 | std = [
76 | 2 * self._std_weight_position * measurement[3],
77 | 2 * self._std_weight_position * measurement[3],
78 | 1e-2,
79 | 2 * self._std_weight_position * measurement[3],
80 | 10 * self._std_weight_velocity * measurement[3],
81 | 10 * self._std_weight_velocity * measurement[3],
82 | 1e-5,
83 | 10 * self._std_weight_velocity * measurement[3]]
84 | covariance = np.diag(np.square(std))
85 | return mean, covariance
86 |
87 | def predict(self, mean, covariance):
88 | """Run Kalman filter prediction step.
89 |
90 | Parameters
91 | ----------
92 | mean : ndarray
93 | The 8 dimensional mean vector of the object state at the previous
94 | time step.
95 | covariance : ndarray
96 | The 8x8 dimensional covariance matrix of the object state at the
97 | previous time step.
98 |
99 | Returns
100 | -------
101 | (ndarray, ndarray)
102 | Returns the mean vector and covariance matrix of the predicted
103 | state. Unobserved velocities are initialized to 0 mean.
104 |
105 | """
106 | std_pos = [
107 | self._std_weight_position * mean[3],
108 | self._std_weight_position * mean[3],
109 | 1e-2,
110 | self._std_weight_position * mean[3]]
111 | std_vel = [
112 | self._std_weight_velocity * mean[3],
113 | self._std_weight_velocity * mean[3],
114 | 1e-5,
115 | self._std_weight_velocity * mean[3]]
116 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
117 |
118 | mean = np.dot(self._motion_mat, mean)
119 | covariance = np.linalg.multi_dot((
120 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
121 |
122 | return mean, covariance
123 |
124 | def project(self, mean, covariance):
125 | """Project state distribution to measurement space.
126 |
127 | Parameters
128 | ----------
129 | mean : ndarray
130 | The state's mean vector (8 dimensional array).
131 | covariance : ndarray
132 | The state's covariance matrix (8x8 dimensional).
133 |
134 | Returns
135 | -------
136 | (ndarray, ndarray)
137 | Returns the projected mean and covariance matrix of the given state
138 | estimate.
139 |
140 | """
141 | std = [
142 | self._std_weight_position * mean[3],
143 | self._std_weight_position * mean[3],
144 | 1e-1,
145 | self._std_weight_position * mean[3]]
146 | innovation_cov = np.diag(np.square(std))
147 |
148 | mean = np.dot(self._update_mat, mean)
149 | covariance = np.linalg.multi_dot((
150 | self._update_mat, covariance, self._update_mat.T))
151 | return mean, covariance + innovation_cov
152 |
153 | def update(self, mean, covariance, measurement):
154 | """Run Kalman filter correction step.
155 |
156 | Parameters
157 | ----------
158 | mean : ndarray
159 | The predicted state's mean vector (8 dimensional).
160 | covariance : ndarray
161 | The state's covariance matrix (8x8 dimensional).
162 | measurement : ndarray
163 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
164 | is the center position, a the aspect ratio, and h the height of the
165 | bounding box.
166 |
167 | Returns
168 | -------
169 | (ndarray, ndarray)
170 | Returns the measurement-corrected state distribution.
171 |
172 | """
173 | projected_mean, projected_cov = self.project(mean, covariance)
174 |
175 | chol_factor, lower = scipy.linalg.cho_factor(
176 | projected_cov, lower=True, check_finite=False)
177 | kalman_gain = scipy.linalg.cho_solve(
178 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
179 | check_finite=False).T
180 | innovation = measurement - projected_mean
181 |
182 | new_mean = mean + np.dot(innovation, kalman_gain.T)
183 | new_covariance = covariance - np.linalg.multi_dot((
184 | kalman_gain, projected_cov, kalman_gain.T))
185 | return new_mean, new_covariance
186 |
187 | def gating_distance(self, mean, covariance, measurements,
188 | only_position=False):
189 | """Compute gating distance between state distribution and measurements.
190 |
191 | A suitable distance threshold can be obtained from `chi2inv95`. If
192 | `only_position` is False, the chi-square distribution has 4 degrees of
193 | freedom, otherwise 2.
194 |
195 | Parameters
196 | ----------
197 | mean : ndarray
198 | Mean vector over the state distribution (8 dimensional).
199 | covariance : ndarray
200 | Covariance of the state distribution (8x8 dimensional).
201 | measurements : ndarray
202 | An Nx4 dimensional matrix of N measurements, each in
203 | format (x, y, a, h) where (x, y) is the bounding box center
204 | position, a the aspect ratio, and h the height.
205 | only_position : Optional[bool]
206 | If True, distance computation is done with respect to the bounding
207 | box center position only.
208 |
209 | Returns
210 | -------
211 | ndarray
212 | Returns an array of length N, where the i-th element contains the
213 | squared Mahalanobis distance between (mean, covariance) and
214 | `measurements[i]`.
215 |
216 | """
217 | mean, covariance = self.project(mean, covariance)
218 | if only_position:
219 | mean, covariance = mean[:2], covariance[:2, :2]
220 | measurements = measurements[:, :2]
221 |
222 | cholesky_factor = np.linalg.cholesky(covariance)
223 | d = measurements - mean
224 | z = scipy.linalg.solve_triangular(
225 | cholesky_factor, d.T, lower=True, check_finite=False,
226 | overwrite_b=True)
227 | squared_maha = np.sum(z * z, axis=0)
228 | return squared_maha
229 |
--------------------------------------------------------------------------------
/sort.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | from scipy.optimize import linear_sum_assignment
5 | from filterpy.kalman import KalmanFilter
6 |
7 |
8 | def iou(bb_test, bb_gt):
9 | """
10 | Computes IOU between two bboxes in the form [x1,y1,x2,y2]
11 | """
12 | xx1 = np.maximum(bb_test[0], bb_gt[0])
13 | yy1 = np.maximum(bb_test[1], bb_gt[1])
14 | xx2 = np.minimum(bb_test[2], bb_gt[2])
15 | yy2 = np.minimum(bb_test[3], bb_gt[3])
16 | w = np.maximum(0., xx2 - xx1)
17 | h = np.maximum(0., yy2 - yy1)
18 | wh = w * h
19 | o = wh / ((bb_test[2] - bb_test[0]) * (bb_test[3] - bb_test[1])
20 | + (bb_gt[2] - bb_gt[0]) * (bb_gt[3] - bb_gt[1]) - wh)
21 | return o
22 |
23 |
24 | def convert_bbox_to_z(bbox):
25 | """
26 | Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
27 | [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
28 | the aspect ratio
29 | """
30 | w = bbox[2] - bbox[0]
31 | h = bbox[3] - bbox[1]
32 | x = bbox[0] + w / 2.
33 | y = bbox[1] + h / 2.
34 | s = w * h # scale is just area
35 | r = w / float(h)
36 | return np.array([x, y, s, r]).reshape((4, 1))
37 |
38 |
39 | def convert_x_to_bbox(x, score=None):
40 | """
41 | Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
42 | [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
43 | """
44 | w = np.sqrt(x[2] * x[3])
45 | h = x[2] / w
46 | if score is None:
47 | return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2.]).reshape((1, 4))
48 | else:
49 | return np.array([x[0] - w / 2., x[1] - h / 2., x[0] + w / 2., x[1] + h / 2., score]).reshape((1, 5))
50 |
51 |
52 | class KalmanBoxTracker(object):
53 | """
54 | This class represents the internel state of individual tracked objects observed as bbox.
55 | """
56 | count = 0
57 |
58 | def __init__(self, bbox):
59 | """
60 | Initialises a tracker using initial bounding box.
61 | """
62 | # define constant velocity model
63 | self.kf = KalmanFilter(dim_x=7, dim_z=4)
64 | self.kf.F = np.array(
65 | [[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0],
66 | [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1]])
67 | self.kf.H = np.array(
68 | [[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0]])
69 |
70 | self.kf.R[2:, 2:] *= 10.
71 | self.kf.P[4:, 4:] *= 1000. # give high uncertainty to the unobservable initial velocities
72 | self.kf.P *= 10.
73 | self.kf.Q[-1, -1] *= 0.01
74 | self.kf.Q[4:, 4:] *= 0.01
75 |
76 | self.kf.x[:4] = convert_bbox_to_z(bbox)
77 | self.time_since_update = 0
78 | self.id = KalmanBoxTracker.count
79 | KalmanBoxTracker.count += 1
80 | self.history = []
81 | self.hits = 0
82 | self.hit_streak = 0
83 | self.age = 0
84 |
85 | def update(self, bbox):
86 | """
87 | Updates the state vector with observed bbox.
88 | """
89 | self.time_since_update = 0
90 | self.history = []
91 | self.hits += 1
92 | self.hit_streak += 1
93 | self.kf.update(convert_bbox_to_z(bbox))
94 |
95 | def predict(self):
96 | """
97 | Advances the state vector and returns the predicted bounding box estimate.
98 | """
99 | if (self.kf.x[6] + self.kf.x[2]) <= 0:
100 | self.kf.x[6] *= 0.0
101 | self.kf.predict()
102 | self.age += 1
103 | if self.time_since_update > 0:
104 | self.hit_streak = 0
105 | self.time_since_update += 1
106 | self.history.append(convert_x_to_bbox(self.kf.x))
107 | return self.history[-1]
108 |
109 | def get_state(self):
110 | """
111 | Returns the current bounding box estimate.
112 | """
113 | return convert_x_to_bbox(self.kf.x)
114 |
115 |
116 | def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
117 | """
118 | Assigns detections to tracked object (both represented as bounding boxes)
119 | Returns 3 lists of matches, unmatched_detections and unmatched_trackers
120 | """
121 | if len(trackers) == 0:
122 | return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 1), dtype=int)
123 | iou_matrix = np.zeros((len(detections), len(trackers)), dtype=np.float32)
124 |
125 | for d, det in enumerate(detections):
126 | for t, trk in enumerate(trackers):
127 | iou_matrix[d, t] = iou(det, trk)
128 | row_ind, col_ind = linear_sum_assignment(-iou_matrix)
129 | matched_indices = np.zeros(shape=(row_ind.shape[0], 2), dtype=np.int64)
130 | matched_indices[:, 0] = row_ind
131 | matched_indices[:, 1] = col_ind
132 |
133 | unmatched_detections = []
134 | for d, det in enumerate(detections):
135 | if d not in matched_indices[:, 0]:
136 | unmatched_detections.append(d)
137 | unmatched_trackers = []
138 | for t, trk in enumerate(trackers):
139 | if t not in matched_indices[:, 1]:
140 | unmatched_trackers.append(t)
141 |
142 | # filter out matched with low IOU
143 | matches = []
144 | for m in matched_indices:
145 | if (iou_matrix[m[0], m[1]] < iou_threshold) and (not int(detections[m[0]][5]) == int(trackers[m[1]][5])):
146 | unmatched_detections.append(m[0])
147 | unmatched_trackers.append(m[1])
148 | else:
149 | matches.append(m.reshape(1, 2))
150 | if len(matches) == 0:
151 | matches = np.empty((0, 2), dtype=int)
152 | else:
153 | matches = np.concatenate(matches, axis=0)
154 |
155 | return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
156 |
157 |
158 | class Sort(object):
159 | def __init__(self, max_age=2, min_hits=3):
160 | """
161 | Sets key parameters for SORT
162 | """
163 | self.max_age = max_age
164 | self.min_hits = min_hits
165 | self.trackers = []
166 | self.scores = []
167 | self.types = []
168 | self.frame_count = 0
169 |
170 | def update(self, dets):
171 | """
172 | Params:
173 | dets - a numpy array of detections in the format [[x1,y1,x2,y2,score,type],[x1,y1,x2,y2,score,type],...]
174 | Requires: this method must be called once for each frame even with empty detections.
175 | Returns the a numpy array in the format [x1,y1,x2,y2,object_id,score,type]
176 | NOTE: The number of objects returned may differ from the number of detections provided.
177 | """
178 | self.frame_count += 1
179 | # get predicted locations from existing trackers.
180 | trks = np.zeros((len(self.trackers), 6))
181 | to_del = []
182 | ret = []
183 | for t, trk in enumerate(trks):
184 | pos = self.trackers[t].predict()[0]
185 | trk[:] = [pos[0], pos[1], pos[2], pos[3], self.scores[t], self.types[t]]
186 | if np.any(np.isnan(pos)):
187 | to_del.append(t)
188 | trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
189 | for t in reversed(to_del):
190 | self.trackers.pop(t)
191 | self.scores.pop(t)
192 | self.types.pop(t)
193 | matched, unmatched_dets, unmatched_trks = associate_detections_to_trackers(dets, trks)
194 |
195 | # update matched trackers with assigned detections
196 | for t, trk in enumerate(self.trackers):
197 | if t not in unmatched_trks:
198 | d = matched[np.where(matched[:, 1] == t)[0], 0]
199 | trk.update(dets[d, :][0])
200 | self.scores[t] = dets[d, :][0][4]
201 | self.types[t] = dets[d, :][0][5]
202 |
203 | # create and initialise new trackers for unmatched detections
204 | for i in unmatched_dets:
205 | trk = KalmanBoxTracker(dets[i, 0:5])
206 | self.trackers.append(trk)
207 | self.scores.append(dets[i, :][4])
208 | self.types.append(dets[i, :][5])
209 | i = len(self.trackers)
210 | for trk in reversed(self.trackers):
211 | pos = trk.get_state()[0]
212 | i -= 1
213 | if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):
214 | ret.append(np.concatenate((pos, [trk.id + 1], [self.scores[i]], [self.types[i]])).reshape(1, -1)) # +1 as MOT benchmark requires positive
215 | # remove dead tracklet
216 | if trk.time_since_update > self.max_age:
217 | self.trackers.pop(i)
218 | self.scores.pop(i)
219 | self.types.pop(i)
220 |
221 | if len(ret) > 0:
222 | return np.concatenate(ret)
223 | else:
224 | return np.empty((0, 5))
225 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | """
2 | Retrain the YOLO model for your own dataset.
3 | """
4 |
5 | import numpy as np
6 | import keras.backend as K
7 | from keras.layers import Input, Lambda
8 | from keras.models import Model
9 | from keras.optimizers import Adam
10 | from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
11 |
12 | from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
13 |
14 | import read_data_cifar100
15 | from PIL import Image
16 |
17 |
18 | def _main():
19 | # constant
20 | path = 'cifar-100-python/'
21 | log_dir = 'output/'
22 | classes_path = 'model_data/cifar_classes.txt'
23 | anchors_path = 'model_data/yolo_anchors.txt'
24 | # anchors_path = 'model_data/tiny_yolo_anchors.txt'
25 | pretrained_weight = 'model_h5/yolo.h5'
26 | pretrained_weight_tiny = 'model_h5/yolo-tiny.h5'
27 | pretrained = True
28 |
29 | # epoch
30 | epoch_first = 10
31 | epoch_second = 20
32 |
33 | class_names = get_classes(classes_path)
34 | num_classes = len(class_names)
35 | anchors = get_anchors(anchors_path)
36 |
37 | input_shape = (416, 416) # multiple of 32, hw; (32, 32) in cifar-100
38 |
39 | is_tiny_version = len(anchors) == 6 # default setting
40 | if is_tiny_version:
41 | model = create_tiny_model(input_shape, anchors, num_classes, load_pretrained=pretrained, freeze_body=2,
42 | weights_path=pretrained_weight_tiny)
43 | else:
44 | model = create_model(input_shape, anchors, num_classes, load_pretrained=pretrained, freeze_body=2,
45 | weights_path=pretrained_weight) # make sure you know what you freeze
46 |
47 | logging = TensorBoard(log_dir=log_dir)
48 | checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
49 | monitor='val_loss', save_weights_only=True, save_best_only=True, period=3)
50 | reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
51 | early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
52 |
53 | # load cifar dataset
54 | images, labels, img_val, lab_val = read_data_cifar100.data(path, False, True, True)
55 |
56 | num_val = len(img_val)
57 | num_train = len(images)
58 |
59 | # Train with frozen layers first, to get a stable loss.
60 | # Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
61 | if True:
62 | model.compile(optimizer=Adam(lr=1e-3), loss={
63 | # use custom yolo_loss Lambda layer.
64 | 'yolo_loss': lambda y_true, y_pred: y_pred}, metrics=['accuracy'])
65 |
66 | batch_size = 32
67 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
68 | model.fit_generator(data_generator_wrapper(images, labels, batch_size, input_shape, anchors, num_classes),
69 | steps_per_epoch=max(1, num_train // batch_size),
70 | validation_data=data_generator_wrapper(img_val, lab_val, batch_size, input_shape, anchors,
71 | num_classes),
72 | validation_steps=max(1, num_val // batch_size),
73 | epochs=epoch_first,
74 | initial_epoch=0,
75 | callbacks=[logging, checkpoint])
76 | model.save_weights(log_dir + 'trained_weights_stage_1.h5')
77 |
78 | # Unfreeze and continue training, to fine-tune.
79 | # Train longer if the result is not good.
80 | if True:
81 | for i in range(len(model.layers)):
82 | model.layers[i].trainable = True
83 | model.compile(optimizer=Adam(lr=1e-4),
84 | loss={'yolo_loss': lambda y_true, y_pred: y_pred}, metrics=['accuracy'])
85 | # recompile to apply the change
86 | print('Unfreeze all of the layers.')
87 |
88 | batch_size = 32 # note that more GPU memory is required after unfreezing the body
89 | print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
90 | model.fit_generator(data_generator_wrapper(images, labels, batch_size, input_shape, anchors, num_classes),
91 | steps_per_epoch=max(1, num_train // batch_size),
92 | validation_data=data_generator_wrapper(img_val, lab_val, batch_size, input_shape, anchors,
93 | num_classes),
94 | validation_steps=max(1, num_val // batch_size),
95 | epochs=epoch_second,
96 | initial_epoch=epoch_first,
97 | callbacks=[logging, checkpoint, reduce_lr, early_stopping])
98 | model.save_weights(log_dir + 'trained_weights_final.h5')
99 |
100 | # Further training if needed.
101 |
102 |
103 | def get_classes(classes_path):
104 | """loads the classes"""
105 | with open(classes_path) as f:
106 | class_names = f.readlines()
107 | class_names = [c.strip() for c in class_names]
108 | return class_names
109 |
110 |
111 | def get_anchors(anchors_path):
112 | """loads the anchors from a file"""
113 | with open(anchors_path) as f:
114 | anchors = f.readline()
115 | anchors = [float(x) for x in anchors.split(',')]
116 | return np.array(anchors).reshape(-1, 2)
117 |
118 |
119 | def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
120 | weights_path='model_data/yolo_weights.h5'):
121 | """create the training model"""
122 | K.clear_session() # get a new session
123 | image_input = Input(shape=(None, None, 3))
124 | h, w = input_shape
125 | num_anchors = len(anchors)
126 |
127 | y_true = [
128 | Input(shape=(h // {0: 32, 1: 16, 2: 8}[l], w // {0: 32, 1: 16, 2: 8}[l], num_anchors // 3, num_classes + 5)) for
129 | l in range(3)]
130 |
131 | model_body = yolo_body(image_input, num_anchors // 3, num_classes)
132 | print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
133 |
134 | if load_pretrained:
135 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
136 | print('Load weights {}.'.format(weights_path))
137 | if freeze_body in [1, 2]:
138 | # Freeze darknet53 body or freeze all but 3 output layers.
139 | num = (185, len(model_body.layers) - 3)[freeze_body - 1]
140 | for i in range(num):
141 | model_body.layers[i].trainable = False
142 | print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
143 |
144 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
145 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
146 | [*model_body.output, *y_true])
147 | model = Model([model_body.input, *y_true], model_loss)
148 |
149 | return model
150 |
151 |
152 | def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
153 | weights_path='model_data/tiny_yolo_weights.h5'):
154 | """create the training model, for Tiny YOLOv3"""
155 | K.clear_session() # get a new session
156 | image_input = Input(shape=(None, None, 3))
157 | h, w = input_shape
158 | num_anchors = len(anchors)
159 |
160 | y_true = [Input(shape=(h // {0: 32, 1: 16}[l], w // {0: 32, 1: 16}[l], num_anchors // 2, num_classes + 5)) for l in
161 | range(2)]
162 |
163 | model_body = tiny_yolo_body(image_input, num_anchors // 2, num_classes)
164 | print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
165 |
166 | if load_pretrained:
167 | model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
168 | print('Load weights {}.'.format(weights_path))
169 | if freeze_body in [1, 2]:
170 | # Freeze the darknet body or freeze all but 2 output layers.
171 | num = (20, len(model_body.layers) - 2)[freeze_body - 1]
172 | for i in range(num):
173 | model_body.layers[i].trainable = False
174 | print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
175 |
176 | model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
177 | arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})(
178 | [*model_body.output, *y_true])
179 | model = Model([model_body.input, *y_true], model_loss)
180 |
181 | return model
182 |
183 |
184 | def get_data(single_image, single_label, input_shape, proc_img=True):
185 | """
186 | pre-processing for real-time data augmentation
187 | """
188 | image = Image.fromarray(np.uint8(single_image))
189 | iw, ih = image.size
190 | h, w = input_shape
191 | # cifar dateset has captured the target area of images
192 | box = np.array([[0, 0, 32, 32, int(single_label)]])
193 |
194 | # resize image
195 | scale = min(w / iw, h / ih)
196 | nw = int(iw * scale)
197 | nh = int(ih * scale)
198 | dx = (w - nw) // 2
199 | dy = (h - nh) // 2
200 | image_data = 0
201 | if proc_img:
202 | image = image.resize((nw, nh), Image.BICUBIC)
203 | new_image = Image.new('RGB', (w, h), (128, 128, 128))
204 | new_image.paste(image, (dx, dy))
205 | image_data = np.array(new_image) / 255.
206 |
207 | # correct boxes
208 | box_data = np.zeros((1, 5))
209 | box[:, [0, 2]] = box[:, [0, 2]] * scale + dx
210 | box[:, [1, 3]] = box[:, [1, 3]] * scale + dy
211 | box_data[:len(box)] = box
212 |
213 | return image_data, box_data
214 |
215 |
216 | def data_generator(images, labels, batch_size, input_shape, anchors, num_classes):
217 | """data generator for fit_generator"""
218 | n = len(images)
219 | i = 0
220 | while True:
221 | image_data = []
222 | box_data = []
223 | for b in range(batch_size):
224 | image, box = get_data(images[i], labels[i], input_shape, proc_img=True)
225 | image_data.append(image)
226 | box_data.append(box)
227 | # avoid IndexError
228 | i = (i + 1) % n
229 | image_data = np.array(image_data)
230 | box_data = np.array(box_data)
231 | y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
232 | yield [image_data, *y_true], np.zeros(batch_size)
233 |
234 |
235 | def data_generator_wrapper(images, labels, batch_size, input_shape, anchors, num_classes):
236 | n = len(images)
237 | if n == 0 or batch_size <= 0:
238 | return None
239 | return data_generator(images, labels, batch_size, input_shape, anchors, num_classes)
240 |
241 |
242 | if __name__ == '__main__':
243 | _main()
244 |
--------------------------------------------------------------------------------
/yolo.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Class definition of YOLO_v3 style detection model on image and video
4 | """
5 |
6 | import colorsys
7 | from timeit import default_timer as timer
8 |
9 | import numpy as np
10 | from keras import backend as K
11 | from keras.models import load_model
12 | from keras.layers import Input
13 | from PIL import Image, ImageFont, ImageDraw
14 |
15 | from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
16 | from yolo3.utils import letterbox_image
17 | from yolo3.utils import delete_repeat_bbox
18 | import sort
19 | import os
20 | from keras.utils import multi_gpu_model
21 |
22 | from tracker_func import sort_image
23 | from sort import KalmanBoxTracker
24 |
25 | from tracker_func import deepsort_image
26 | from deepsort.tracker import Tracker as deepsort_Tracker
27 | from yolo3 import generate_detections as gdet
28 | from deepsort import nn_matching
29 |
30 |
31 | class YOLO(object):
32 | _defaults = {
33 | "model_image_size": (416, 416),
34 | }
35 |
36 | @classmethod
37 | def get_defaults(cls, n):
38 | if n in cls._defaults:
39 | return cls._defaults[n]
40 | else:
41 | return "Unrecognized attribute name '" + n + "'"
42 |
43 | def __init__(self, **kwargs):
44 | self.__dict__.update(self._defaults) # set up default values
45 | self.__dict__.update(kwargs) # and update with user overrides
46 | self.class_names = self._get_class()
47 | self.anchors = self._get_anchors()
48 | self.sess = K.get_session()
49 | self.boxes, self.scores, self.classes = self.generate()
50 | self.frame = 1
51 | self.mot_tracker, self.encoder = self._initialize_tracker()
52 |
53 | def _initialize_tracker(self):
54 | if not self.image:
55 | if self.tracker == 'sort':
56 | tracker = sort.Sort()
57 | return tracker, None
58 | elif self.tracker == 'deepsort':
59 | # initialize deep sort
60 | model_filename = self.deepsort_model
61 | encoder = gdet.create_box_encoder(model_filename, batch_size=1)
62 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", matching_threshold=0.5, budget=None)
63 | tracker = deepsort_Tracker(metric)
64 | return tracker, encoder
65 | else:
66 | raise ValueError('The variable \"tracker\" must be \"sort\" or \"deepsort\".')
67 | else:
68 | return None, None
69 |
70 | def _get_class(self):
71 | classes_path = os.path.expanduser(self.classes_path)
72 | with open(classes_path) as f:
73 | class_names = f.readlines()
74 | class_names = [c.strip() for c in class_names]
75 | return class_names
76 |
77 | def _get_anchors(self):
78 | anchors_path = os.path.expanduser(self.anchors_path)
79 | with open(anchors_path) as f:
80 | anchors = f.readline()
81 | anchors = [float(x) for x in anchors.split(',')]
82 | return np.array(anchors).reshape(-1, 2)
83 |
84 | def generate(self):
85 | model_path = os.path.expanduser(self.model_path)
86 | assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
87 |
88 | # Load model, or construct model and load weights.
89 | num_anchors = len(self.anchors)
90 | num_classes = len(self.class_names)
91 | is_tiny_version = num_anchors == 6 # default setting
92 | try:
93 | self.yolo_model = load_model(model_path, compile=False)
94 | except:
95 | self.yolo_model = tiny_yolo_body(Input(shape=(None, None, 3)), num_anchors // 2, num_classes) \
96 | if is_tiny_version else yolo_body(Input(shape=(None, None, 3)), num_anchors // 3, num_classes)
97 | self.yolo_model.load_weights(self.model_path) # make sure model, anchors and classes match
98 | else:
99 | try:
100 | assert self.yolo_model.layers[-1].output_shape[-1] == \
101 | num_anchors / len(self.yolo_model.output) * (num_classes + 5), \
102 | 'Mismatch between model and given anchor and class sizes'
103 | except TypeError:
104 | # the number of yolo_model.output(Tensor) may be just one
105 | assert self.yolo_model.layers[-1].output_shape[-1] == \
106 | num_anchors / 1 * (num_classes + 5), 'Mismatch between model and given anchor and class sizes'
107 |
108 | print('{} model, anchors, and classes loaded.'.format(model_path))
109 |
110 | # Generate colors for drawing bounding boxes.
111 | hsv_tuples = [(x / len(self.class_names), 1., 1.)
112 | for x in range(len(self.class_names))]
113 | self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
114 | self.colors = list(
115 | map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
116 | self.colors))
117 | np.random.seed(10101) # Fixed seed for consistent colors across runs.
118 | np.random.shuffle(self.colors) # Shuffle colors to decorrelate adjacent classes.
119 | np.random.seed(None) # Reset seed to default.
120 |
121 | # Generate output tensor targets for filtered bounding boxes.
122 | self.input_image_shape = K.placeholder(shape=(2,))
123 | if self.gpu_num >= 2:
124 | self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num)
125 | boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
126 | len(self.class_names), self.input_image_shape,
127 | score_threshold=self.score, iou_threshold=self.iou)
128 | return boxes, scores, classes
129 |
130 | def detect_image(self, frame, fo=None):
131 | image = Image.fromarray(frame)
132 |
133 | start = timer()
134 |
135 | if self.model_image_size != (None, None):
136 | assert self.model_image_size[0] % 32 == 0, 'Multiples of 32 required'
137 | assert self.model_image_size[1] % 32 == 0, 'Multiples of 32 required'
138 | boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
139 | else:
140 | new_image_size = (image.width - (image.width % 32),
141 | image.height - (image.height % 32))
142 | boxed_image = letterbox_image(image, new_image_size)
143 | image_data = np.array(boxed_image, dtype='float32')
144 |
145 | print(image_data.shape)
146 | image_data /= 255.
147 | image_data = np.expand_dims(image_data, 0) # Add batch dimension.
148 |
149 | out_boxes, out_scores, out_classes = self.sess.run(
150 | [self.boxes, self.scores, self.classes],
151 | feed_dict={
152 | self.yolo_model.input: image_data,
153 | self.input_image_shape: [image.size[1], image.size[0]],
154 | K.learning_phase(): 0
155 | })
156 |
157 | # print(type(out_boxes), type(out_scores), type(out_classes))
158 | # print(out_boxes, out_scores, out_classes)
159 |
160 | # delete repeat bbox
161 | out_boxes, out_scores, out_classes = \
162 | delete_repeat_bbox(list(out_boxes), list(out_scores), list(out_classes), self.repeat_iou)
163 |
164 | # open or close tracker
165 | if self.mot_tracker is not None:
166 | if self.tracker == 'sort':
167 | out_boxes, out_scores, out_classes, object_id = \
168 | sort_image(self.mot_tracker, out_boxes, out_scores, out_classes)
169 | elif self.tracker == 'deepsort':
170 | out_boxes, out_scores, out_classes, object_id = \
171 | deepsort_image(self.mot_tracker, self.encoder, frame, out_boxes, out_scores, out_classes,
172 | nms_max_overlap=1.0)
173 | else:
174 | raise ValueError('The variable \"tracker\" must be \"sort\" or \"deepsort\".')
175 | else:
176 | KalmanBoxTracker.count = 0
177 | object_id = np.concatenate(np.zeros((1, len(out_boxes))))
178 |
179 | # write to file
180 | if self.write_to_file:
181 | for i in reversed(range(0, len(out_boxes))):
182 | result = [self.frame, object_id[i], out_boxes[i][0], out_boxes[i][1],
183 | abs(out_boxes[i][2] - out_boxes[i][0]), abs(out_boxes[i][3] - out_boxes[i][1]), out_scores[i],
184 | -1, -1, -1]
185 | fo.write(', '.join(map(str, result)))
186 | fo.write('\n')
187 |
188 | print('Found {} boxes for {}'.format(len(out_boxes), 'img'))
189 |
190 | font = ImageFont.truetype(font='font/times.ttf',
191 | size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
192 | thickness = (image.size[0] + image.size[1]) // 300
193 |
194 | for i, c in reversed(list(enumerate(out_classes))):
195 | predicted_class = self.class_names[c]
196 | box = out_boxes[i]
197 | score = out_scores[i]
198 | id = int(object_id[i])
199 |
200 | # bounding box
201 | top, left, bottom, right = box
202 | top = max(0, np.floor(top + 0.5).astype('int32'))
203 | left = max(0, np.floor(left + 0.5).astype('int32'))
204 | bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
205 | right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
206 |
207 | label = '{} {:.2f} id:{}'.format(predicted_class, score, id)
208 | draw = ImageDraw.Draw(image)
209 | label_size = draw.textsize(label, font)
210 |
211 | print(label, (left, top), (right, bottom))
212 |
213 | if top - label_size[1] >= 0:
214 | text_origin = np.array([left, top - label_size[1]])
215 | else:
216 | text_origin = np.array([left, top + 1])
217 |
218 | # My kingdom for a good redistributable image drawing library.
219 | for i in range(thickness):
220 | draw.rectangle(
221 | [left + i, top + i, right - i, bottom - i],
222 | outline=self.colors[c])
223 | draw.rectangle(
224 | [tuple(text_origin), tuple(text_origin + label_size)],
225 | fill=self.colors[c])
226 | draw.text(text_origin, label, fill=(0, 0, 0), font=font)
227 | del draw
228 |
229 | end = timer()
230 | print('time:', end - start, 's')
231 | self.frame = self.frame + 1
232 | return image
233 |
234 | def close_session(self):
235 | self.sess.close()
236 |
237 |
238 | def detect_video(yolo, video_path, output_path=""):
239 | import cv2
240 | vid = cv2.VideoCapture(video_path)
241 | if not vid.isOpened():
242 | raise IOError("Couldn't open webcam or video")
243 | video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC))
244 | video_fps = vid.get(cv2.CAP_PROP_FPS)
245 | video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)),
246 | int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)))
247 | isOutput = True if output_path != "" else False
248 | if isOutput:
249 | print("!!! TYPE:", type(output_path), type(video_FourCC), type(video_fps), type(video_size))
250 | out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size)
251 | accum_time = 0
252 | curr_fps = 0
253 | fps = "FPS: ??"
254 | prev_time = timer()
255 |
256 | if yolo.write_to_file:
257 | emptyFile = open(yolo.output_path + 'result.dat', 'w')
258 | else:
259 | emptyFile = None
260 |
261 | while True:
262 | return_value, frame = vid.read()
263 | try:
264 | image = Image.fromarray(frame)
265 | except AttributeError:
266 | break
267 | image = yolo.detect_image(frame, emptyFile)
268 | result = np.asarray(image)
269 | curr_time = timer()
270 | exec_time = curr_time - prev_time
271 | prev_time = curr_time
272 | accum_time = accum_time + exec_time
273 | curr_fps = curr_fps + 1
274 | if accum_time > 1:
275 | accum_time = accum_time - 1
276 | fps = "FPS: " + str(curr_fps)
277 | curr_fps = 0
278 | cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX,
279 | fontScale=0.50, color=(255, 0, 0), thickness=2)
280 | cv2.namedWindow("result", cv2.WINDOW_NORMAL)
281 | cv2.imshow("result", result)
282 | if isOutput:
283 | out.write(result)
284 | if cv2.waitKey(1) & 0xFF == ord('q'):
285 | break
286 | if yolo.write_to_file:
287 | emptyFile.close()
288 | yolo.close_session()
289 |
--------------------------------------------------------------------------------
/yolo3/model.py:
--------------------------------------------------------------------------------
1 | """YOLO_v3 Model Defined in Keras."""
2 |
3 | from functools import wraps
4 |
5 | import numpy as np
6 | import tensorflow as tf
7 | from keras import backend as K
8 | from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D
9 | from keras.layers.advanced_activations import LeakyReLU
10 | from keras.layers.normalization import BatchNormalization
11 | from keras.models import Model
12 | from keras.regularizers import l2
13 |
14 | from yolo3.utils import compose
15 |
16 |
17 | @wraps(Conv2D)
18 | def DarknetConv2D(*args, **kwargs):
19 | """Wrapper to set Darknet parameters for Convolution2D."""
20 | darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
21 | darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides') == (2, 2) else 'same'
22 | darknet_conv_kwargs.update(kwargs)
23 | return Conv2D(*args, **darknet_conv_kwargs)
24 |
25 |
26 | def DarknetConv2D_BN_Leaky(*args, **kwargs):
27 | """Darknet Convolution2D followed by BatchNormalization and LeakyReLU."""
28 | no_bias_kwargs = {'use_bias': False}
29 | no_bias_kwargs.update(kwargs)
30 | return compose(
31 | DarknetConv2D(*args, **no_bias_kwargs),
32 | BatchNormalization(),
33 | LeakyReLU(alpha=0.1))
34 |
35 |
36 | def resblock_body(x, num_filters, num_blocks):
37 | '''A series of resblocks starting with a downsampling Convolution2D'''
38 | # Darknet uses left and top padding instead of 'same' mode
39 | x = ZeroPadding2D(((1, 0), (1, 0)))(x)
40 | x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x)
41 | for i in range(num_blocks):
42 | y = compose(
43 | DarknetConv2D_BN_Leaky(num_filters // 2, (1, 1)),
44 | DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x)
45 | x = Add()([x, y])
46 | return x
47 |
48 |
49 | def darknet_body(x):
50 | '''Darknent body having 52 Convolution2D layers'''
51 | x = DarknetConv2D_BN_Leaky(32, (3, 3))(x)
52 | x = resblock_body(x, 64, 1)
53 | x = resblock_body(x, 128, 2)
54 | x = resblock_body(x, 256, 8)
55 | x = resblock_body(x, 512, 8)
56 | x = resblock_body(x, 1024, 4)
57 | return x
58 |
59 |
60 | def make_last_layers(x, num_filters, out_filters):
61 | '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer'''
62 | x = compose(
63 | DarknetConv2D_BN_Leaky(num_filters, (1, 1)),
64 | DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
65 | DarknetConv2D_BN_Leaky(num_filters, (1, 1)),
66 | DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
67 | DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(x)
68 | y = compose(
69 | DarknetConv2D_BN_Leaky(num_filters * 2, (3, 3)),
70 | DarknetConv2D(out_filters, (1, 1)))(x)
71 | return x, y
72 |
73 |
74 | def yolo_body(inputs, num_anchors, num_classes):
75 | """Create YOLO_V3 model CNN body in Keras."""
76 | darknet = Model(inputs, darknet_body(inputs))
77 | x, y1 = make_last_layers(darknet.output, 512, num_anchors * (num_classes + 5))
78 |
79 | x = compose(
80 | DarknetConv2D_BN_Leaky(256, (1, 1)),
81 | UpSampling2D(2))(x)
82 | x = Concatenate()([x, darknet.layers[152].output])
83 | x, y2 = make_last_layers(x, 256, num_anchors * (num_classes + 5))
84 |
85 | x = compose(
86 | DarknetConv2D_BN_Leaky(128, (1, 1)),
87 | UpSampling2D(2))(x)
88 | x = Concatenate()([x, darknet.layers[92].output])
89 | x, y3 = make_last_layers(x, 128, num_anchors * (num_classes + 5))
90 |
91 | return Model(inputs, [y1, y2, y3])
92 |
93 |
94 | def tiny_yolo_body(inputs, num_anchors, num_classes):
95 | '''Create Tiny YOLO_v3 model CNN body in keras.'''
96 | x1 = compose(
97 | DarknetConv2D_BN_Leaky(16, (3, 3)),
98 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
99 | DarknetConv2D_BN_Leaky(32, (3, 3)),
100 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
101 | DarknetConv2D_BN_Leaky(64, (3, 3)),
102 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
103 | DarknetConv2D_BN_Leaky(128, (3, 3)),
104 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
105 | DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs)
106 | x2 = compose(
107 | MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'),
108 | DarknetConv2D_BN_Leaky(512, (3, 3)),
109 | MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'),
110 | DarknetConv2D_BN_Leaky(1024, (3, 3)),
111 | DarknetConv2D_BN_Leaky(256, (1, 1)))(x1)
112 | y1 = compose(
113 | DarknetConv2D_BN_Leaky(512, (3, 3)),
114 | DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))(x2)
115 |
116 | x2 = compose(
117 | DarknetConv2D_BN_Leaky(128, (1, 1)),
118 | UpSampling2D(2))(x2)
119 | y2 = compose(
120 | Concatenate(),
121 | DarknetConv2D_BN_Leaky(256, (3, 3)),
122 | DarknetConv2D(num_anchors * (num_classes + 5), (1, 1)))([x2, x1])
123 |
124 | return Model(inputs, [y1, y2])
125 |
126 |
127 | def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
128 | """Convert final layer features to bounding box parameters."""
129 | num_anchors = len(anchors)
130 | # Reshape to batch, height, width, num_anchors, box_params.
131 | anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
132 |
133 | grid_shape = K.shape(feats)[1:3] # height, width
134 | grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
135 | [1, grid_shape[1], 1, 1])
136 | grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
137 | [grid_shape[0], 1, 1, 1])
138 | grid = K.concatenate([grid_x, grid_y])
139 | grid = K.cast(grid, K.dtype(feats))
140 |
141 | feats = K.reshape(
142 | feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
143 |
144 | # Adjust preditions to each spatial grid point and anchor size.
145 | box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
146 | box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
147 | box_confidence = K.sigmoid(feats[..., 4:5])
148 | box_class_probs = K.sigmoid(feats[..., 5:])
149 |
150 | if calc_loss == True:
151 | return grid, feats, box_xy, box_wh
152 | return box_xy, box_wh, box_confidence, box_class_probs
153 |
154 |
155 | def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
156 | '''Get corrected boxes'''
157 | box_yx = box_xy[..., ::-1]
158 | box_hw = box_wh[..., ::-1]
159 | input_shape = K.cast(input_shape, K.dtype(box_yx))
160 | image_shape = K.cast(image_shape, K.dtype(box_yx))
161 | new_shape = K.round(image_shape * K.min(input_shape / image_shape))
162 | offset = (input_shape - new_shape) / 2. / input_shape
163 | scale = input_shape / new_shape
164 | box_yx = (box_yx - offset) * scale
165 | box_hw *= scale
166 |
167 | box_mins = box_yx - (box_hw / 2.)
168 | box_maxes = box_yx + (box_hw / 2.)
169 | boxes = K.concatenate([
170 | box_mins[..., 0:1], # y_min
171 | box_mins[..., 1:2], # x_min
172 | box_maxes[..., 0:1], # y_max
173 | box_maxes[..., 1:2] # x_max
174 | ])
175 |
176 | # Scale boxes back to original image shape.
177 | boxes *= K.concatenate([image_shape, image_shape])
178 | return boxes
179 |
180 |
181 | def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
182 | '''Process Conv layer output'''
183 | box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
184 | anchors, num_classes, input_shape)
185 | boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
186 | boxes = K.reshape(boxes, [-1, 4])
187 | box_scores = box_confidence * box_class_probs
188 | box_scores = K.reshape(box_scores, [-1, num_classes])
189 | return boxes, box_scores
190 |
191 |
192 | def yolo_eval(yolo_outputs,
193 | anchors,
194 | num_classes,
195 | image_shape,
196 | max_boxes=20,
197 | score_threshold=.6,
198 | iou_threshold=.5):
199 | """Evaluate YOLO model on given input and return filtered boxes."""
200 | num_layers = len(yolo_outputs)
201 | anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] # default setting
202 | input_shape = K.shape(yolo_outputs[0])[1:3] * 32
203 | boxes = []
204 | box_scores = []
205 | for l in range(num_layers):
206 | _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
207 | anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
208 | boxes.append(_boxes)
209 | box_scores.append(_box_scores)
210 | boxes = K.concatenate(boxes, axis=0)
211 | box_scores = K.concatenate(box_scores, axis=0)
212 |
213 | mask = box_scores >= score_threshold
214 | max_boxes_tensor = K.constant(max_boxes, dtype='int32')
215 | boxes_ = []
216 | scores_ = []
217 | classes_ = []
218 | for c in range(num_classes):
219 | # TODO: use keras backend instead of tf.
220 | class_boxes = tf.boolean_mask(boxes, mask[:, c])
221 | class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
222 | nms_index = tf.image.non_max_suppression(
223 | class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
224 | class_boxes = K.gather(class_boxes, nms_index)
225 | class_box_scores = K.gather(class_box_scores, nms_index)
226 | classes = K.ones_like(class_box_scores, 'int32') * c
227 | boxes_.append(class_boxes)
228 | scores_.append(class_box_scores)
229 | classes_.append(classes)
230 | boxes_ = K.concatenate(boxes_, axis=0)
231 | scores_ = K.concatenate(scores_, axis=0)
232 | classes_ = K.concatenate(classes_, axis=0)
233 |
234 | return boxes_, scores_, classes_
235 |
236 |
237 | def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
238 | '''Preprocess true boxes to training input format
239 |
240 | Parameters
241 | ----------
242 | true_boxes: array, shape=(m, T, 5)
243 | Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape.
244 | input_shape: array-like, hw, multiples of 32
245 | anchors: array, shape=(N, 2), wh
246 | num_classes: integer
247 |
248 | Returns
249 | -------
250 | y_true: list of array, shape like yolo_outputs, xywh are reletive value
251 |
252 | '''
253 | assert (true_boxes[..., 4] < num_classes).all(), 'class id must be less than num_classes'
254 | num_layers = len(anchors) // 3 # default setting
255 | anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
256 |
257 | true_boxes = np.array(true_boxes, dtype='float32')
258 | input_shape = np.array(input_shape, dtype='int32')
259 | boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
260 | boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
261 | true_boxes[..., 0:2] = boxes_xy / input_shape[::-1]
262 | true_boxes[..., 2:4] = boxes_wh / input_shape[::-1]
263 |
264 | m = true_boxes.shape[0]
265 | grid_shapes = [input_shape // {0: 32, 1: 16, 2: 8}[l] for l in range(num_layers)]
266 | y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5 + num_classes),
267 | dtype='float32') for l in range(num_layers)]
268 |
269 | # Expand dim to apply broadcasting.
270 | anchors = np.expand_dims(anchors, 0)
271 | anchor_maxes = anchors / 2.
272 | anchor_mins = -anchor_maxes
273 | valid_mask = boxes_wh[..., 0] > 0
274 |
275 | for b in range(m):
276 | # Discard zero rows.
277 | wh = boxes_wh[b, valid_mask[b]]
278 | if len(wh) == 0: continue
279 | # Expand dim to apply broadcasting.
280 | wh = np.expand_dims(wh, -2)
281 | box_maxes = wh / 2.
282 | box_mins = -box_maxes
283 |
284 | intersect_mins = np.maximum(box_mins, anchor_mins)
285 | intersect_maxes = np.minimum(box_maxes, anchor_maxes)
286 | intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
287 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
288 | box_area = wh[..., 0] * wh[..., 1]
289 | anchor_area = anchors[..., 0] * anchors[..., 1]
290 | iou = intersect_area / (box_area + anchor_area - intersect_area)
291 |
292 | # Find best anchor for each true box
293 | best_anchor = np.argmax(iou, axis=-1)
294 |
295 | for t, n in enumerate(best_anchor):
296 | for l in range(num_layers):
297 | if n in anchor_mask[l]:
298 | i = np.floor(true_boxes[b, t, 0] * grid_shapes[l][1]).astype('int32')
299 | j = np.floor(true_boxes[b, t, 1] * grid_shapes[l][0]).astype('int32')
300 | k = anchor_mask[l].index(n)
301 | c = true_boxes[b, t, 4].astype('int32')
302 | y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4]
303 | y_true[l][b, j, i, k, 4] = 1
304 | y_true[l][b, j, i, k, 5 + c] = 1
305 |
306 | return y_true
307 |
308 |
309 | def box_iou(b1, b2):
310 | '''Return iou tensor
311 |
312 | Parameters
313 | ----------
314 | b1: tensor, shape=(i1,...,iN, 4), xywh
315 | b2: tensor, shape=(j, 4), xywh
316 |
317 | Returns
318 | -------
319 | iou: tensor, shape=(i1,...,iN, j)
320 |
321 | '''
322 |
323 | # Expand dim to apply broadcasting.
324 | b1 = K.expand_dims(b1, -2)
325 | b1_xy = b1[..., :2]
326 | b1_wh = b1[..., 2:4]
327 | b1_wh_half = b1_wh / 2.
328 | b1_mins = b1_xy - b1_wh_half
329 | b1_maxes = b1_xy + b1_wh_half
330 |
331 | # Expand dim to apply broadcasting.
332 | b2 = K.expand_dims(b2, 0)
333 | b2_xy = b2[..., :2]
334 | b2_wh = b2[..., 2:4]
335 | b2_wh_half = b2_wh / 2.
336 | b2_mins = b2_xy - b2_wh_half
337 | b2_maxes = b2_xy + b2_wh_half
338 |
339 | intersect_mins = K.maximum(b1_mins, b2_mins)
340 | intersect_maxes = K.minimum(b1_maxes, b2_maxes)
341 | intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
342 | intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
343 | b1_area = b1_wh[..., 0] * b1_wh[..., 1]
344 | b2_area = b2_wh[..., 0] * b2_wh[..., 1]
345 | iou = intersect_area / (b1_area + b2_area - intersect_area)
346 |
347 | return iou
348 |
349 |
350 | def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
351 | '''Return yolo_loss tensor
352 |
353 | Parameters
354 | ----------
355 | yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
356 | y_true: list of array, the output of preprocess_true_boxes
357 | anchors: array, shape=(N, 2), wh
358 | num_classes: integer
359 | ignore_thresh: float, the iou threshold whether to ignore object confidence loss
360 |
361 | Returns
362 | -------
363 | loss: tensor, shape=(1,)
364 |
365 | '''
366 | num_layers = len(anchors) // 3 # default setting
367 | yolo_outputs = args[:num_layers]
368 | y_true = args[num_layers:]
369 | anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
370 | input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
371 | grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
372 | loss = 0
373 | m = K.shape(yolo_outputs[0])[0] # batch size, tensor
374 | mf = K.cast(m, K.dtype(yolo_outputs[0]))
375 |
376 | for l in range(num_layers):
377 | object_mask = y_true[l][..., 4:5]
378 | true_class_probs = y_true[l][..., 5:]
379 |
380 | grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
381 | anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
382 | pred_box = K.concatenate([pred_xy, pred_wh])
383 |
384 | # Darknet raw box to calculate loss.
385 | raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
386 | raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
387 | raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
388 | box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]
389 |
390 | # Find ignore mask, iterate over each of batch.
391 | ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
392 | object_mask_bool = K.cast(object_mask, 'bool')
393 |
394 | def loop_body(b, ignore_mask):
395 | true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0])
396 | iou = box_iou(pred_box[b], true_box)
397 | best_iou = K.max(iou, axis=-1)
398 | ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
399 | return b + 1, ignore_mask
400 |
401 | _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask])
402 | ignore_mask = ignore_mask.stack()
403 | ignore_mask = K.expand_dims(ignore_mask, -1)
404 |
405 | # K.binary_crossentropy is helpful to avoid exp overflow.
406 | xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[..., 0:2],
407 | from_logits=True)
408 | wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh - raw_pred[..., 2:4])
409 | confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
410 | (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
411 | from_logits=True) * ignore_mask
412 | class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[..., 5:], from_logits=True)
413 |
414 | xy_loss = K.sum(xy_loss) / mf
415 | wh_loss = K.sum(wh_loss) / mf
416 | confidence_loss = K.sum(confidence_loss) / mf
417 | class_loss = K.sum(class_loss) / mf
418 | loss += xy_loss + wh_loss + confidence_loss + class_loss
419 | if print_loss:
420 | loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)],
421 | message='loss: ')
422 | return loss
423 |
--------------------------------------------------------------------------------