├── .gitignore
├── .vscode
├── launch.json
└── settings.json
├── README.md
├── conda-cpu.yml
├── conda-gpu.yml
├── convert.py
├── deep_sort
├── __init__.py
├── detection.py
├── iou_matching.py
├── kalman_filter.py
├── linear_assignment.py
├── nn_matching.py
├── preprocessing.py
├── track.py
└── tracker.py
├── deepface
├── DeepFace.py
├── __init__.py
├── basemodels
│ ├── ArcFace.py
│ ├── Boosting.py
│ ├── DeepID.py
│ ├── DlibResNet.py
│ ├── DlibWrapper.py
│ ├── Facenet.py
│ ├── FbDeepFace.py
│ ├── OpenFace.py
│ ├── VGGFace.py
│ └── __init__.py
├── commons
│ ├── __init__.py
│ ├── distance.py
│ ├── functions.py
│ └── realtime.py
├── detectors
│ ├── DlibWrapper.py
│ ├── FaceDetector.py
│ ├── MtcnnWrapper.py
│ ├── OpenCvWrapper.py
│ ├── RetinaFaceWrapper.py
│ ├── SsdWrapper.py
│ └── __init__.py
├── extendedmodels
│ ├── Age.py
│ ├── Emotion.py
│ ├── Gender.py
│ ├── Race.py
│ └── __init__.py
└── models
│ ├── __init__.py
│ └── face-recognition-ensemble-model.txt
├── detection.txt
├── evaluation.py
├── generate_face.py
├── object_tracker copy.py
├── object_tracker.py
├── requirements.txt
├── resources
├── database
│ ├── 1
│ │ └── ironman
│ │ │ ├── 1.jpg
│ │ │ ├── 2.jpg
│ │ │ └── 3.jpg
│ └── 2
│ │ ├── chimchakman
│ │ ├── 1.jpg
│ │ └── 2.jpg
│ │ ├── juhomin
│ │ ├── 1.jpg
│ │ └── 2.jpg
│ │ └── kimpoong
│ │ ├── 1.jpg
│ │ └── 2.jpg
├── fonts
│ └── futur.ttf
└── gt
│ ├── Apink_gt.xml
│ ├── BrunoMars_gt.xml
│ ├── Darling_gt.xml
│ ├── GirlsAloud_gt.xml
│ ├── HelloBubble_gt.xml
│ ├── README.txt
│ └── Westlife_gt.xml
├── test.md
├── tools
├── freeze_model.py
└── generate_detections.py
├── xml2txt.py
└── yolov3_tf2
├── __init__.py
├── dataset.py
├── models.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.h5
2 | *.weights
3 | *.tar
4 | *.tfrecord
5 | /checkpoints/*
6 | /serving/*
7 | /logs/
8 | /Untitled.ipynb
9 | /output.jpg
10 | /data/voc2012_raw/
11 |
12 | # Created by https://www.gitignore.io/api/python
13 | # Edit at https://www.gitignore.io/?templates=python
14 |
15 | ### Python ###
16 | # Byte-compiled / optimized / DLL files
17 | __pycache__/
18 | *.py[cod]
19 | *$py.class
20 |
21 | # C extensions
22 | *.so
23 |
24 | # Distribution / packaging
25 | .Python
26 | build/
27 | develop-eggs/
28 | dist/
29 | downloads/
30 | eggs/
31 | .eggs/
32 | lib/
33 | lib64/
34 | parts/
35 | sdist/
36 | var/
37 | wheels/
38 | pip-wheel-metadata/
39 | share/python-wheels/
40 | *.egg-info/
41 | .installed.cfg
42 | *.egg
43 | MANIFEST
44 |
45 | # PyInstaller
46 | # Usually these files are written by a python script from a template
47 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
48 | *.manifest
49 | *.spec
50 |
51 | # Installer logs
52 | pip-log.txt
53 | pip-delete-this-directory.txt
54 |
55 | # Unit test / coverage reports
56 | htmlcov/
57 | .tox/
58 | .nox/
59 | .coverage
60 | .coverage.*
61 | .cache
62 | nosetests.xml
63 | coverage.xml
64 | *.cover
65 | .hypothesis/
66 | .pytest_cache/
67 |
68 | # Translations
69 | *.mo
70 | *.pot
71 |
72 | # Django stuff:
73 | *.log
74 | local_settings.py
75 | db.sqlite3
76 |
77 | # Flask stuff:
78 | instance/
79 | .webassets-cache
80 |
81 | # Scrapy stuff:
82 | .scrapy
83 |
84 | # Sphinx documentation
85 | docs/_build/
86 |
87 | # PyBuilder
88 | target/
89 |
90 | # Jupyter Notebook
91 | .ipynb_checkpoints
92 |
93 | # IPython
94 | profile_default/
95 | ipython_config.py
96 |
97 | # pyenv
98 | .python-version
99 |
100 | # pipenv
101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not
104 | # install all needed dependencies.
105 | #Pipfile.lock
106 |
107 | # celery beat schedule file
108 | celerybeat-schedule
109 |
110 | # SageMath parsed files
111 | *.sage.py
112 |
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 |
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 |
126 | # Rope project settings
127 | .ropeproject
128 |
129 | # mkdocs documentation
130 | /site
131 |
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 |
137 | # Pyre type checker
138 | .pyre/
139 |
140 | # End of https://www.gitignore.io/api/python
141 |
142 | # video
143 | resources/video
144 | *.mp4
145 | *.avi
146 | *.mov
147 |
148 | # weight
149 | model_data/
150 | weights/
151 | resources/
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": "Python: Current File",
9 | "type": "python",
10 | "request": "launch",
11 | "program": "${file}",
12 | "console": "integratedTerminal",
13 | "args": [
14 | "--classes", "./model_data/labels/widerface.names",
15 | "--video", "./resources/video/in/T-ara.mov",
16 | "--weights", "./weights/yolov3-wider_16000.tf",
17 | "--output_format", "MP4V",
18 | "--database", "./resources/database/T-ara",
19 | "--output", "./resources/video/out/T-ara.mp4",
20 | "--num_classes", "1",
21 | "--max_face_threshold", "0.68",
22 | "--eval", "./resources/gt/T-ara_pred.txt",
23 | ]
24 | }
25 | ]
26 | }
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.pythonPath": "C:\\Users\\choiwansik\\anaconda3\\envs\\face_tracker\\python.exe"
3 | }
--------------------------------------------------------------------------------
/conda-cpu.yml:
--------------------------------------------------------------------------------
1 | name: tracker-cpu
2 |
3 | dependencies:
4 | - python==3.7
5 | - pip
6 | - matplotlib
7 | - opencv
8 | - pip:
9 | - tensorflow==2.4.1
10 | - lxml
11 | - tqdm
12 | - seaborn
13 | - pillow
14 |
--------------------------------------------------------------------------------
/conda-gpu.yml:
--------------------------------------------------------------------------------
1 | name: yolov3-tf2-gpu
2 |
3 | dependencies:
4 | - python==3.7
5 | - pip
6 | - matplotlib
7 | - opencv
8 | - cudnn
9 | - cudatoolkit==10.1.243
10 | - pip:
11 | - tensorflow==2.4.1
12 | - lxml
13 | - tqdm
14 | - -e .
15 |
--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
1 | from absl import app, flags, logging
2 | from absl.flags import FLAGS
3 | import numpy as np
4 | from yolov3_tf2.models import YoloV3, YoloV3Tiny
5 | from yolov3_tf2.utils import load_darknet_weights
6 | import tensorflow as tf
7 |
8 | flags.DEFINE_string('weights', './weights/yolov3-wider_16000.weights', 'path to weights file')
9 | flags.DEFINE_string('output', './weights/yolov3-wider_16000.tf', 'path to output')
10 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
11 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
12 |
13 |
14 | def main(_argv):
15 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
16 | if len(physical_devices) > 0:
17 | tf.config.experimental.set_memory_growth(physical_devices[0], True)
18 |
19 | if FLAGS.tiny:
20 | yolo = YoloV3Tiny(classes=FLAGS.num_classes)
21 | else:
22 | yolo = YoloV3(classes=FLAGS.num_classes)
23 | yolo.summary()
24 | logging.info('model created')
25 |
26 | load_darknet_weights(yolo, FLAGS.weights, FLAGS.tiny)
27 | logging.info('weights loaded')
28 |
29 | img = np.random.random((1, 320, 320, 3)).astype(np.float32)
30 | output = yolo(img)
31 | logging.info('sanity check passed')
32 |
33 | yolo.save_weights(FLAGS.output)
34 | logging.info('weights saved')
35 |
36 |
37 | if __name__ == '__main__':
38 | try:
39 | app.run(main)
40 | except SystemExit:
41 | pass
42 |
43 | """
44 | python convert.py --weights ./weights/yolov3-wider_16000.weights \
45 | --output ./weights/yolov3-wider_16000.tf \
46 | --num_classes 1
47 | """
--------------------------------------------------------------------------------
/deep_sort/__init__.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 |
--------------------------------------------------------------------------------
/deep_sort/detection.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | class Detection(object):
6 | """
7 | This class represents a bounding box detection in a single image.
8 |
9 | Parameters
10 | ----------
11 | tlwh : array_like
12 | Bounding box in format `(x, y, w, h)`.
13 | confidence : float
14 | Detector confidence score.
15 | feature : array_like
16 | A feature vector that describes the object contained in this image.
17 |
18 | Attributes
19 | ----------
20 | tlwh : ndarray
21 | Bounding box in format `(top left x, top left y, width, height)`.
22 | confidence : ndarray
23 | Detector confidence score.
24 | class_name : ndarray
25 | Detector class.
26 | feature : ndarray | NoneType
27 | A feature vector that describes the object contained in this image.
28 |
29 | """
30 |
31 | def __init__(self, tlwh, confidence, class_name, feature):
32 | self.tlwh = np.asarray(tlwh, dtype=np.float)
33 | self.confidence = float(confidence)
34 | self.class_name = class_name
35 | self.feature = np.asarray(feature, dtype=np.float32)
36 |
37 | def get_class(self):
38 | return self.class_name
39 |
40 | def to_tlbr(self):
41 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
42 | `(top left, bottom right)`.
43 | """
44 | ret = self.tlwh.copy()
45 | ret[2:] += ret[:2]
46 | return ret
47 |
48 | def to_xyah(self):
49 | """Convert bounding box to format `(center x, center y, aspect ratio,
50 | height)`, where the aspect ratio is `width / height`.
51 | """
52 | ret = self.tlwh.copy()
53 | ret[:2] += ret[2:] / 2
54 | ret[2] /= ret[3]
55 | return ret
56 |
--------------------------------------------------------------------------------
/deep_sort/iou_matching.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from . import linear_assignment
5 |
6 |
7 | def iou(bbox, candidates):
8 | """Computer intersection over union.
9 |
10 | Parameters
11 | ----------
12 | bbox : ndarray
13 | A bounding box in format `(top left x, top left y, width, height)`.
14 | candidates : ndarray
15 | A matrix of candidate bounding boxes (one per row) in the same format
16 | as `bbox`.
17 |
18 | Returns
19 | -------
20 | ndarray
21 | The intersection over union in [0, 1] between the `bbox` and each
22 | candidate. A higher score means a larger fraction of the `bbox` is
23 | occluded by the candidate.
24 |
25 | """
26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 | candidates_tl = candidates[:, :2]
28 | candidates_br = candidates[:, :2] + candidates[:, 2:]
29 |
30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 | wh = np.maximum(0., br - tl)
35 |
36 | area_intersection = wh.prod(axis=1)
37 | area_bbox = bbox[2:].prod()
38 | area_candidates = candidates[:, 2:].prod(axis=1)
39 | return area_intersection / (area_bbox + area_candidates - area_intersection)
40 |
41 |
42 | def iou_cost(tracks, detections, track_indices=None,
43 | detection_indices=None):
44 | """An intersection over union distance metric.
45 |
46 | Parameters
47 | ----------
48 | tracks : List[deep_sort.track.Track]
49 | A list of tracks.
50 | detections : List[deep_sort.detection.Detection]
51 | A list of detections.
52 | track_indices : Optional[List[int]]
53 | A list of indices to tracks that should be matched. Defaults to
54 | all `tracks`.
55 | detection_indices : Optional[List[int]]
56 | A list of indices to detections that should be matched. Defaults
57 | to all `detections`.
58 |
59 | Returns
60 | -------
61 | ndarray
62 | Returns a cost matrix of shape
63 | len(track_indices), len(detection_indices) where entry (i, j) is
64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 |
66 | """
67 | if track_indices is None:
68 | track_indices = np.arange(len(tracks))
69 | if detection_indices is None:
70 | detection_indices = np.arange(len(detections))
71 |
72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 | for row, track_idx in enumerate(track_indices):
74 | if tracks[track_idx].time_since_update > 1:
75 | cost_matrix[row, :] = linear_assignment.INFTY_COST
76 | continue
77 |
78 | bbox = tracks[track_idx].to_tlwh()
79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 | cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 | return cost_matrix
82 |
--------------------------------------------------------------------------------
/deep_sort/kalman_filter.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import scipy.linalg
4 |
5 |
6 | """
7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
9 | function and used as Mahalanobis gating threshold.
10 | """
11 | chi2inv95 = {
12 | 1: 3.8415,
13 | 2: 5.9915,
14 | 3: 7.8147,
15 | 4: 9.4877,
16 | 5: 11.070,
17 | 6: 12.592,
18 | 7: 14.067,
19 | 8: 15.507,
20 | 9: 16.919}
21 |
22 |
23 | class KalmanFilter(object):
24 | """
25 | A simple Kalman filter for tracking bounding boxes in image space.
26 |
27 | The 8-dimensional state space
28 |
29 | x, y, a, h, vx, vy, va, vh
30 |
31 | contains the bounding box center position (x, y), aspect ratio a, height h,
32 | and their respective velocities.
33 |
34 | Object motion follows a constant velocity model. The bounding box location
35 | (x, y, a, h) is taken as direct observation of the state space (linear
36 | observation model).
37 |
38 | """
39 |
40 | def __init__(self):
41 | ndim, dt = 4, 1.
42 |
43 | # Create Kalman filter model matrices.
44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45 | for i in range(ndim):
46 | self._motion_mat[i, ndim + i] = dt
47 | self._update_mat = np.eye(ndim, 2 * ndim)
48 |
49 | # Motion and observation uncertainty are chosen relative to the current
50 | # state estimate. These weights control the amount of uncertainty in
51 | # the model. This is a bit hacky.
52 | self._std_weight_position = 1. / 20
53 | self._std_weight_velocity = 1. / 160
54 |
55 | def initiate(self, measurement):
56 | """Create track from unassociated measurement.
57 |
58 | Parameters
59 | ----------
60 | measurement : ndarray
61 | Bounding box coordinates (x, y, a, h) with center position (x, y),
62 | aspect ratio a, and height h.
63 |
64 | Returns
65 | -------
66 | (ndarray, ndarray)
67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
68 | dimensional) of the new track. Unobserved velocities are initialized
69 | to 0 mean.
70 |
71 | """
72 | mean_pos = measurement
73 | mean_vel = np.zeros_like(mean_pos)
74 | mean = np.r_[mean_pos, mean_vel]
75 |
76 | std = [
77 | 2 * self._std_weight_position * measurement[3],
78 | 2 * self._std_weight_position * measurement[3],
79 | 1e-2,
80 | 2 * self._std_weight_position * measurement[3],
81 | 10 * self._std_weight_velocity * measurement[3],
82 | 10 * self._std_weight_velocity * measurement[3],
83 | 1e-5,
84 | 10 * self._std_weight_velocity * measurement[3]]
85 | covariance = np.diag(np.square(std))
86 | return mean, covariance
87 |
88 | def predict(self, mean, covariance):
89 | """Run Kalman filter prediction step.
90 |
91 | Parameters
92 | ----------
93 | mean : ndarray
94 | The 8 dimensional mean vector of the object state at the previous
95 | time step.
96 | covariance : ndarray
97 | The 8x8 dimensional covariance matrix of the object state at the
98 | previous time step.
99 |
100 | Returns
101 | -------
102 | (ndarray, ndarray)
103 | Returns the mean vector and covariance matrix of the predicted
104 | state. Unobserved velocities are initialized to 0 mean.
105 |
106 | """
107 | std_pos = [
108 | self._std_weight_position * mean[3],
109 | self._std_weight_position * mean[3],
110 | 1e-2,
111 | self._std_weight_position * mean[3]]
112 | std_vel = [
113 | self._std_weight_velocity * mean[3],
114 | self._std_weight_velocity * mean[3],
115 | 1e-5,
116 | self._std_weight_velocity * mean[3]]
117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 |
119 | mean = np.dot(self._motion_mat, mean)
120 | covariance = np.linalg.multi_dot((
121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 |
123 | return mean, covariance
124 |
125 | def project(self, mean, covariance):
126 | """Project state distribution to measurement space.
127 |
128 | Parameters
129 | ----------
130 | mean : ndarray
131 | The state's mean vector (8 dimensional array).
132 | covariance : ndarray
133 | The state's covariance matrix (8x8 dimensional).
134 |
135 | Returns
136 | -------
137 | (ndarray, ndarray)
138 | Returns the projected mean and covariance matrix of the given state
139 | estimate.
140 |
141 | """
142 | std = [
143 | self._std_weight_position * mean[3],
144 | self._std_weight_position * mean[3],
145 | 1e-1,
146 | self._std_weight_position * mean[3]]
147 | innovation_cov = np.diag(np.square(std))
148 |
149 | mean = np.dot(self._update_mat, mean)
150 | covariance = np.linalg.multi_dot((
151 | self._update_mat, covariance, self._update_mat.T))
152 | return mean, covariance + innovation_cov
153 |
154 | def update(self, mean, covariance, measurement):
155 | """Run Kalman filter correction step.
156 |
157 | Parameters
158 | ----------
159 | mean : ndarray
160 | The predicted state's mean vector (8 dimensional).
161 | covariance : ndarray
162 | The state's covariance matrix (8x8 dimensional).
163 | measurement : ndarray
164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 | is the center position, a the aspect ratio, and h the height of the
166 | bounding box.
167 |
168 | Returns
169 | -------
170 | (ndarray, ndarray)
171 | Returns the measurement-corrected state distribution.
172 |
173 | """
174 | projected_mean, projected_cov = self.project(mean, covariance)
175 |
176 | chol_factor, lower = scipy.linalg.cho_factor(
177 | projected_cov, lower=True, check_finite=False)
178 | kalman_gain = scipy.linalg.cho_solve(
179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 | check_finite=False).T
181 | innovation = measurement - projected_mean
182 |
183 | new_mean = mean + np.dot(innovation, kalman_gain.T)
184 | new_covariance = covariance - np.linalg.multi_dot((
185 | kalman_gain, projected_cov, kalman_gain.T))
186 | return new_mean, new_covariance
187 |
188 | def gating_distance(self, mean, covariance, measurements,
189 | only_position=False):
190 | """Compute gating distance between state distribution and measurements.
191 |
192 | A suitable distance threshold can be obtained from `chi2inv95`. If
193 | `only_position` is False, the chi-square distribution has 4 degrees of
194 | freedom, otherwise 2.
195 |
196 | Parameters
197 | ----------
198 | mean : ndarray
199 | Mean vector over the state distribution (8 dimensional).
200 | covariance : ndarray
201 | Covariance of the state distribution (8x8 dimensional).
202 | measurements : ndarray
203 | An Nx4 dimensional matrix of N measurements, each in
204 | format (x, y, a, h) where (x, y) is the bounding box center
205 | position, a the aspect ratio, and h the height.
206 | only_position : Optional[bool]
207 | If True, distance computation is done with respect to the bounding
208 | box center position only.
209 |
210 | Returns
211 | -------
212 | ndarray
213 | Returns an array of length N, where the i-th element contains the
214 | squared Mahalanobis distance between (mean, covariance) and
215 | `measurements[i]`.
216 |
217 | """
218 | mean, covariance = self.project(mean, covariance)
219 | if only_position:
220 | mean, covariance = mean[:2], covariance[:2, :2]
221 | measurements = measurements[:, :2]
222 |
223 | cholesky_factor = np.linalg.cholesky(covariance)
224 | d = measurements - mean
225 | z = scipy.linalg.solve_triangular(
226 | cholesky_factor, d.T, lower=True, check_finite=False,
227 | overwrite_b=True)
228 | squared_maha = np.sum(z * z, axis=0)
229 | return squared_maha
230 |
--------------------------------------------------------------------------------
/deep_sort/linear_assignment.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from scipy.optimize import linear_sum_assignment
5 | from . import kalman_filter
6 |
7 |
8 | INFTY_COST = 1e+5
9 |
10 |
11 | def min_cost_matching(
12 | distance_metric, max_distance, tracks, detections, track_indices=None,
13 | detection_indices=None):
14 | """Solve linear assignment problem.
15 |
16 | Parameters
17 | ----------
18 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
19 | The distance metric is given a list of tracks and detections as well as
20 | a list of N track indices and M detection indices. The metric should
21 | return the NxM dimensional cost matrix, where element (i, j) is the
22 | association cost between the i-th track in the given track indices and
23 | the j-th detection in the given detection_indices.
24 | max_distance : float
25 | Gating threshold. Associations with cost larger than this value are
26 | disregarded.
27 | tracks : List[track.Track]
28 | A list of predicted tracks at the current time step.
29 | detections : List[detection.Detection]
30 | A list of detections at the current time step.
31 | track_indices : List[int]
32 | List of track indices that maps rows in `cost_matrix` to tracks in
33 | `tracks` (see description above).
34 | detection_indices : List[int]
35 | List of detection indices that maps columns in `cost_matrix` to
36 | detections in `detections` (see description above).
37 |
38 | Returns
39 | -------
40 | (List[(int, int)], List[int], List[int])
41 | Returns a tuple with the following three entries:
42 | * A list of matched track and detection indices.
43 | * A list of unmatched track indices.
44 | * A list of unmatched detection indices.
45 |
46 | """
47 | if track_indices is None:
48 | track_indices = np.arange(len(tracks))
49 | if detection_indices is None:
50 | detection_indices = np.arange(len(detections))
51 |
52 | if len(detection_indices) == 0 or len(track_indices) == 0:
53 | return [], track_indices, detection_indices # Nothing to match.
54 |
55 | cost_matrix = distance_metric(
56 | tracks, detections, track_indices, detection_indices)
57 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
58 | indices = linear_sum_assignment(cost_matrix)
59 | indices = np.asarray(indices)
60 | indices = np.transpose(indices)
61 | matches, unmatched_tracks, unmatched_detections = [], [], []
62 | for col, detection_idx in enumerate(detection_indices):
63 | if col not in indices[:, 1]:
64 | unmatched_detections.append(detection_idx)
65 | for row, track_idx in enumerate(track_indices):
66 | if row not in indices[:, 0]:
67 | unmatched_tracks.append(track_idx)
68 | for row, col in indices:
69 | track_idx = track_indices[row]
70 | detection_idx = detection_indices[col]
71 | if cost_matrix[row, col] > max_distance:
72 | unmatched_tracks.append(track_idx)
73 | unmatched_detections.append(detection_idx)
74 | else:
75 | matches.append((track_idx, detection_idx))
76 | return matches, unmatched_tracks, unmatched_detections
77 |
78 |
79 | def matching_cascade(
80 | distance_metric, max_distance, cascade_depth, tracks, detections,
81 | track_indices=None, detection_indices=None):
82 | """Run matching cascade.
83 |
84 | Parameters
85 | ----------
86 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
87 | The distance metric is given a list of tracks and detections as well as
88 | a list of N track indices and M detection indices. The metric should
89 | return the NxM dimensional cost matrix, where element (i, j) is the
90 | association cost between the i-th track in the given track indices and
91 | the j-th detection in the given detection indices.
92 | max_distance : float
93 | Gating threshold. Associations with cost larger than this value are
94 | disregarded.
95 | cascade_depth: int
96 | The cascade depth, should be se to the maximum track age.
97 | tracks : List[track.Track]
98 | A list of predicted tracks at the current time step.
99 | detections : List[detection.Detection]
100 | A list of detections at the current time step.
101 | track_indices : Optional[List[int]]
102 | List of track indices that maps rows in `cost_matrix` to tracks in
103 | `tracks` (see description above). Defaults to all tracks.
104 | detection_indices : Optional[List[int]]
105 | List of detection indices that maps columns in `cost_matrix` to
106 | detections in `detections` (see description above). Defaults to all
107 | detections.
108 |
109 | Returns
110 | -------
111 | (List[(int, int)], List[int], List[int])
112 | Returns a tuple with the following three entries:
113 | * A list of matched track and detection indices.
114 | * A list of unmatched track indices.
115 | * A list of unmatched detection indices.
116 |
117 | """
118 | if track_indices is None:
119 | track_indices = list(range(len(tracks)))
120 | if detection_indices is None:
121 | detection_indices = list(range(len(detections)))
122 |
123 | unmatched_detections = detection_indices
124 | matches = []
125 | for level in range(cascade_depth):
126 | if len(unmatched_detections) == 0: # No detections left
127 | break
128 |
129 | track_indices_l = [
130 | k for k in track_indices
131 | if tracks[k].time_since_update == 1 + level
132 | ]
133 | if len(track_indices_l) == 0: # Nothing to match at this level
134 | continue
135 |
136 | matches_l, _, unmatched_detections = \
137 | min_cost_matching(
138 | distance_metric, max_distance, tracks, detections,
139 | track_indices_l, unmatched_detections)
140 | matches += matches_l
141 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
142 | return matches, unmatched_tracks, unmatched_detections
143 |
144 |
145 | def gate_cost_matrix(
146 | kf, cost_matrix, tracks, detections, track_indices, detection_indices,
147 | gated_cost=INFTY_COST, only_position=False):
148 | """Invalidate infeasible entries in cost matrix based on the state
149 | distributions obtained by Kalman filtering.
150 |
151 | Parameters
152 | ----------
153 | kf : The Kalman filter.
154 | cost_matrix : ndarray
155 | The NxM dimensional cost matrix, where N is the number of track indices
156 | and M is the number of detection indices, such that entry (i, j) is the
157 | association cost between `tracks[track_indices[i]]` and
158 | `detections[detection_indices[j]]`.
159 | tracks : List[track.Track]
160 | A list of predicted tracks at the current time step.
161 | detections : List[detection.Detection]
162 | A list of detections at the current time step.
163 | track_indices : List[int]
164 | List of track indices that maps rows in `cost_matrix` to tracks in
165 | `tracks` (see description above).
166 | detection_indices : List[int]
167 | List of detection indices that maps columns in `cost_matrix` to
168 | detections in `detections` (see description above).
169 | gated_cost : Optional[float]
170 | Entries in the cost matrix corresponding to infeasible associations are
171 | set this value. Defaults to a very large value.
172 | only_position : Optional[bool]
173 | If True, only the x, y position of the state distribution is considered
174 | during gating. Defaults to False.
175 |
176 | Returns
177 | -------
178 | ndarray
179 | Returns the modified cost matrix.
180 |
181 | """
182 | gating_dim = 2 if only_position else 4
183 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
184 | measurements = np.asarray(
185 | [detections[i].to_xyah() for i in detection_indices])
186 | for row, track_idx in enumerate(track_indices):
187 | track = tracks[track_idx]
188 | gating_distance = kf.gating_distance(
189 | track.mean, track.covariance, measurements, only_position)
190 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost
191 | return cost_matrix
192 |
--------------------------------------------------------------------------------
/deep_sort/nn_matching.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | def _pdist(a, b):
6 | """Compute pair-wise squared distance between points in `a` and `b`.
7 |
8 | Parameters
9 | ----------
10 | a : array_like
11 | An NxM matrix of N samples of dimensionality M.
12 | b : array_like
13 | An LxM matrix of L samples of dimensionality M.
14 |
15 | Returns
16 | -------
17 | ndarray
18 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
19 | contains the squared distance between `a[i]` and `b[j]`.
20 |
21 | """
22 | a, b = np.asarray(a), np.asarray(b)
23 | if len(a) == 0 or len(b) == 0:
24 | return np.zeros((len(a), len(b)))
25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
27 | r2 = np.clip(r2, 0., float(np.inf))
28 | return r2
29 |
30 |
31 | def _cosine_distance(a, b, data_is_normalized=False):
32 | """Compute pair-wise cosine distance between points in `a` and `b`.
33 |
34 | Parameters
35 | ----------
36 | a : array_like
37 | An NxM matrix of N samples of dimensionality M.
38 | b : array_like
39 | An LxM matrix of L samples of dimensionality M.
40 | data_is_normalized : Optional[bool]
41 | If True, assumes rows in a and b are unit length vectors.
42 | Otherwise, a and b are explicitly normalized to lenght 1.
43 |
44 | Returns
45 | -------
46 | ndarray
47 | Returns a matrix of size len(a), len(b) such that eleement (i, j)
48 | contains the squared distance between `a[i]` and `b[j]`.
49 |
50 | """
51 | if not data_is_normalized:
52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
54 | return 1. - np.dot(a, b.T)
55 |
56 |
57 | def _nn_euclidean_distance(x, y):
58 | """ Helper function for nearest neighbor distance metric (Euclidean).
59 |
60 | Parameters
61 | ----------
62 | x : ndarray
63 | A matrix of N row-vectors (sample points).
64 | y : ndarray
65 | A matrix of M row-vectors (query points).
66 |
67 | Returns
68 | -------
69 | ndarray
70 | A vector of length M that contains for each entry in `y` the
71 | smallest Euclidean distance to a sample in `x`.
72 |
73 | """
74 | distances = _pdist(x, y)
75 | return np.maximum(0.0, distances.min(axis=0))
76 |
77 |
78 | def _nn_cosine_distance(x, y):
79 | """ Helper function for nearest neighbor distance metric (cosine).
80 |
81 | Parameters
82 | ----------
83 | x : ndarray
84 | A matrix of N row-vectors (sample points).
85 | y : ndarray
86 | A matrix of M row-vectors (query points).
87 |
88 | Returns
89 | -------
90 | ndarray
91 | A vector of length M that contains for each entry in `y` the
92 | smallest cosine distance to a sample in `x`.
93 |
94 | """
95 | # distances = _cosine_distance(x, y) # 변화 준부분
96 | distances = custom_cosine_similarity(x, y)
97 | return distances.min(axis=0)
98 |
99 |
100 | class NearestNeighborDistanceMetric(object):
101 | """
102 | A nearest neighbor distance metric that, for each target, returns
103 | the closest distance to any sample that has been observed so far.
104 |
105 | Parameters
106 | ----------
107 | metric : str
108 | Either "euclidean" or "cosine".
109 | matching_threshold: float
110 | The matching threshold. Samples with larger distance are considered an
111 | invalid match.
112 | budget : Optional[int]
113 | If not None, fix samples per class to at most this number. Removes
114 | the oldest samples when the budget is reached.
115 |
116 | Attributes
117 | ----------
118 | samples : Dict[int -> List[ndarray]]
119 | A dictionary that maps from target identities to the list of samples
120 | that have been observed so far.
121 |
122 | """
123 |
124 | def __init__(self, metric, matching_threshold, budget=None):
125 |
126 |
127 | if metric == "euclidean":
128 | self._metric = _nn_euclidean_distance
129 | elif metric == "cosine":
130 | self._metric = _nn_cosine_distance
131 | else:
132 | raise ValueError(
133 | "Invalid metric; must be either 'euclidean' or 'cosine'")
134 | self.matching_threshold = matching_threshold
135 | self.budget = budget
136 | self.samples = {}
137 |
138 | def partial_fit(self, features, targets, active_targets):
139 | """Update the distance metric with new data.
140 |
141 | Parameters
142 | ----------
143 | features : ndarray
144 | An NxM matrix of N features of dimensionality M.
145 | targets : ndarray
146 | An integer array of associated target identities.
147 | active_targets : List[int]
148 | A list of targets that are currently present in the scene.
149 |
150 | """
151 | for feature, target in zip(features, targets):
152 | self.samples.setdefault(target, []).append(feature)
153 | if self.budget is not None:
154 | self.samples[target] = self.samples[target][-self.budget:]
155 | self.samples = {k: self.samples[k] for k in active_targets}
156 |
157 | def distance(self, features, targets):
158 | """Compute distance between features and targets.
159 |
160 | Parameters
161 | ----------
162 | features : ndarray
163 | An NxM matrix of N features of dimensionality M.
164 | targets : List[int]
165 | A list of targets to match the given `features` against.
166 |
167 | Returns
168 | -------
169 | ndarray
170 | Returns a cost matrix of shape len(targets), len(features), where
171 | element (i, j) contains the closest squared distance between
172 | `targets[i]` and `features[j]`.
173 |
174 | """
175 | cost_matrix = np.zeros((len(targets), len(features)))
176 | for i, target in enumerate(targets):
177 | cost_matrix[i, :] = self._metric(self.samples[target], features)
178 | return cost_matrix
179 |
180 | def custom_cosine_similarity(a, b):
181 |
182 | # if not data_is_normalized:
183 | # a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
184 | # b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
185 | # return 1. - np.dot(a, b.T)
186 | # [[]] [[]]
187 |
188 | aa = np.linalg.norm(a, axis=1, keepdims=True)
189 | bb = np.linalg.norm(b, axis=1, keepdims=True)
190 | norm_mat = np.dot(aa, bb.T)
191 | return 1. - (np.dot(a, b.T) / norm_mat)
192 |
193 |
194 | # a = np.matmul(np.transpose(source_representation), test_representation)
195 | # b = np.sum(np.multiply(source_representation, source_representation))
196 | # c = np.sum(np.multiply(test_representation, test_representation))
197 | # return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
--------------------------------------------------------------------------------
/deep_sort/preprocessing.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import cv2
4 |
5 |
6 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
7 | """Suppress overlapping detections.
8 |
9 | Original code from [1]_ has been adapted to include confidence score.
10 |
11 | .. [1] http://www.pyimagesearch.com/2015/02/16/
12 | faster-non-maximum-suppression-python/
13 |
14 | Examples
15 | --------
16 |
17 | >>> boxes = [d.roi for d in detections]
18 | >>> classes = [d.classes for d in detections]
19 | >>> scores = [d.confidence for d in detections]
20 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
21 | >>> detections = [detections[i] for i in indices]
22 |
23 | Parameters
24 | ----------
25 | boxes : ndarray
26 | Array of ROIs (x, y, width, height).
27 | max_bbox_overlap : float
28 | ROIs that overlap more than this values are suppressed.
29 | scores : Optional[array_like]
30 | Detector confidence score.
31 |
32 | Returns
33 | -------
34 | List[int]
35 | Returns indices of detections that have survived non-maxima suppression.
36 |
37 | """
38 | if len(boxes) == 0:
39 | return []
40 |
41 | boxes = boxes.astype(np.float)
42 | pick = []
43 |
44 | x1 = boxes[:, 0]
45 | y1 = boxes[:, 1]
46 | x2 = boxes[:, 2] + boxes[:, 0]
47 | y2 = boxes[:, 3] + boxes[:, 1]
48 |
49 | area = (x2 - x1 + 1) * (y2 - y1 + 1)
50 | if scores is not None:
51 | idxs = np.argsort(scores)
52 | else:
53 | idxs = np.argsort(y2)
54 |
55 | while len(idxs) > 0:
56 | last = len(idxs) - 1
57 | i = idxs[last]
58 | pick.append(i)
59 |
60 | xx1 = np.maximum(x1[i], x1[idxs[:last]])
61 | yy1 = np.maximum(y1[i], y1[idxs[:last]])
62 | xx2 = np.minimum(x2[i], x2[idxs[:last]])
63 | yy2 = np.minimum(y2[i], y2[idxs[:last]])
64 |
65 | w = np.maximum(0, xx2 - xx1 + 1)
66 | h = np.maximum(0, yy2 - yy1 + 1)
67 |
68 | overlap = (w * h) / area[idxs[:last]]
69 |
70 | idxs = np.delete(
71 | idxs, np.concatenate(
72 | ([last], np.where(overlap > max_bbox_overlap)[0])))
73 |
74 | return pick
75 |
--------------------------------------------------------------------------------
/deep_sort/track.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from .nn_matching import _cosine_distance, custom_cosine_similarity
3 | import statistics as st
4 | import numpy as np
5 | import time
6 | import sys
7 | import os
8 | sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
9 | from deepface.commons import functions, distance as dst
10 |
11 | def find_face(features, face_db, max_face_threshold):
12 | face_score = dict()
13 | for face in face_db:
14 | if face_db[face]["used"]:
15 | face_score[face] = 1
16 | continue
17 | cos_harmonic = []
18 | # cos_mat = _cosine_distance(features, face_db[face]["db"], data_is_normalized=True)
19 | cos_mat = custom_cosine_similarity(features, face_db[face]["db"])
20 | # print(cos_mat)
21 |
22 | # time.sleep(7)
23 |
24 | # for f in face_db[face]:
25 | # cos_harmonic.append(_nn_cosine_distance(feature, f))
26 | # cos_harmonic = st.harmonic_mean(list(cos_mat.flatten()))
27 | # cos_harmonic = np.mean(cos_mat.flatten())
28 | # face_score[face] = cos_harmonic
29 |
30 | # distance = dst.findCosineDistance(features[0], face_db[face]["db"][0])
31 | # print(distance)
32 |
33 | face_score[face] = cos_mat.min(axis=1)
34 | # print(face_score)
35 | print(face_score)
36 | ans_face = min(face_score,key=face_score.get)
37 |
38 | # print(ans_face, face_score)
39 |
40 | # time.sleep(5)
41 |
42 | if face_score[ans_face] < max_face_threshold:
43 | face_db[ans_face]["used"] = True
44 | return ans_face
45 | else:
46 | return ""
47 |
48 | class TrackState:
49 | """
50 | Enumeration type for the single target track state. Newly created tracks are
51 | classified as `tentative` until enough evidence has been collected. Then,
52 | the track state is changed to `confirmed`. Tracks that are no longer alive
53 | are classified as `deleted` to mark them for removal from the set of active
54 | tracks.
55 |
56 | """
57 |
58 | Tentative = 1
59 | Confirmed = 2
60 | Deleted = 3
61 |
62 |
63 | class Track:
64 | """
65 | A single target track with state space `(x, y, a, h)` and associated
66 | velocities, where `(x, y)` is the center of the bounding box, `a` is the
67 | aspect ratio and `h` is the height.
68 |
69 | Parameters
70 | ----------
71 | mean : ndarray
72 | Mean vector of the initial state distribution.
73 | covariance : ndarray
74 | Covariance matrix of the initial state distribution.
75 | track_id : int
76 | A unique track identifier.
77 | n_init : int
78 | Number of consecutive detections before the track is confirmed. The
79 | track state is set to `Deleted` if a miss occurs within the first
80 | `n_init` frames.
81 | max_age : int
82 | The maximum number of consecutive misses before the track state is
83 | set to `Deleted`.
84 | feature : Optional[ndarray]
85 | Feature vector of the detection this track originates from. If not None,
86 | this feature is added to the `features` cache.
87 |
88 | Attributes
89 | ----------
90 | mean : ndarray
91 | Mean vector of the initial state distribution.
92 | covariance : ndarray
93 | Covariance matrix of the initial state distribution.
94 | track_id : int
95 | A unique track identifier.
96 | hits : int
97 | Total number of measurement updates.
98 | age : int
99 | Total number of frames since first occurance.
100 | time_since_update : int
101 | Total number of frames since last measurement update.
102 | state : TrackState
103 | The current track state.
104 | features : List[ndarray]
105 | A cache of features. On each measurement update, the associated feature
106 | vector is added to this list.
107 | face_name : string
108 | 기존 데이터 베이스에 존재하는 이름 찾기
109 | """
110 |
111 | def __init__(self, mean, covariance, track_id, n_init, max_age, face_db, max_face_threshold,
112 | feature=None, class_name=None):
113 | self.mean = mean
114 | self.covariance = covariance
115 | self.track_id = track_id
116 | self.hits = 1
117 | self.age = 1
118 | self.time_since_update = 0
119 |
120 |
121 | self.state = TrackState.Tentative
122 | self.features = []
123 | self.face_name = ""
124 | if feature is not None:
125 | self.features.append(feature)
126 | self.face_name = find_face(self.features, face_db, max_face_threshold)
127 |
128 | self._n_init = n_init
129 | self._max_age = max_age
130 | self.class_name = class_name
131 |
132 | # def __init__(self, mean, covariance, track_id, n_init, max_age,
133 | # feature=None, class_name=None):
134 | # self.mean = mean
135 | # self.covariance = covariance
136 | # self.track_id = track_id
137 | # self.hits = 1
138 | # self.age = 1
139 | # self.time_since_update = 0
140 |
141 |
142 | # self.state = TrackState.Tentative
143 | # self.features = []
144 | # # self.face_name = ""
145 | # if feature is not None:
146 | # self.features.append(feature)
147 | # # self.face_name = find_face(self.features, face_db, max_face_threshold)
148 |
149 | # self._n_init = n_init
150 | # self._max_age = max_age
151 | # self.class_name = class_name
152 |
153 |
154 |
155 |
156 | def to_tlwh(self):
157 | """Get current position in bounding box format `(top left x, top left y,
158 | width, height)`.
159 |
160 | Returns
161 | -------
162 | ndarray
163 | The bounding box.
164 |
165 | """
166 | ret = self.mean[:4].copy()
167 | ret[2] *= ret[3]
168 | ret[:2] -= ret[2:] / 2
169 | return ret
170 |
171 | def to_tlbr(self):
172 | """Get current position in bounding box format `(min x, miny, max x,
173 | max y)`.
174 |
175 | Returns
176 | -------
177 | ndarray
178 | The bounding box.
179 |
180 | """
181 | ret = self.to_tlwh()
182 | ret[2:] = ret[:2] + ret[2:]
183 | return ret
184 |
185 | def get_class(self):
186 | return self.class_name
187 |
188 | def get_face_name(self):
189 | return self.face_name
190 |
191 | def predict(self, kf):
192 | """Propagate the state distribution to the current time step using a
193 | Kalman filter prediction step.
194 |
195 | Parameters
196 | ----------
197 | kf : kalman_filter.KalmanFilter
198 | The Kalman filter.
199 |
200 | """
201 | self.mean, self.covariance = kf.predict(self.mean, self.covariance)
202 | self.age += 1
203 | self.time_since_update += 1
204 |
205 | def update(self, kf, detection):
206 | """Perform Kalman filter measurement update step and update the feature
207 | cache.
208 |
209 | Parameters
210 | ----------
211 | kf : kalman_filter.KalmanFilter
212 | The Kalman filter.
213 | detection : Detection
214 | The associated detection.
215 |
216 | """
217 | self.mean, self.covariance = kf.update(
218 | self.mean, self.covariance, detection.to_xyah())
219 | self.features.append(detection.feature)
220 |
221 | self.hits += 1
222 | self.time_since_update = 0
223 | if self.state == TrackState.Tentative and self.hits >= self._n_init:
224 | self.state = TrackState.Confirmed
225 |
226 | def mark_missed(self, face_db):
227 | """Mark this track as missed (no association at the current time step).
228 | """
229 | if self.state == TrackState.Tentative:
230 | if self.face_name != "":
231 | face_db[self.face_name]["used"] = False
232 | self.state = TrackState.Deleted
233 | elif self.time_since_update > self._max_age:
234 | if self.face_name != "":
235 | face_db[self.face_name]["used"] = False
236 | self.state = TrackState.Deleted
237 |
238 | def is_tentative(self):
239 | """Returns True if this track is tentative (unconfirmed).
240 | """
241 | return self.state == TrackState.Tentative
242 |
243 | def is_confirmed(self):
244 | """Returns True if this track is confirmed."""
245 | return self.state == TrackState.Confirmed
246 |
247 | def is_deleted(self):
248 | """Returns True if this track is dead and should be deleted."""
249 | return self.state == TrackState.Deleted
250 |
251 | def find_face_name(self, face_db, max_face_threshold):
252 | face_score = dict()
253 | for face in face_db:
254 | if face_db[face]["used"]:
255 | face_score[face] = 1
256 | continue
257 | cos_mat = custom_cosine_similarity(self.features, face_db[face]["db"])
258 | # print(cos_mat)
259 |
260 | # time.sleep(7)
261 |
262 | face_score[face] = cos_mat.min(axis=1).min(axis=0)
263 | # print(face_score)
264 | ans_face = min(face_score,key=face_score.get)
265 |
266 | # print(ans_face, face_score)
267 |
268 | # time.sleep(5)
269 |
270 | if face_score[ans_face] < max_face_threshold:
271 | face_db[ans_face]["used"] = True
272 | self.face_name = ans_face
273 | else:
274 | self.face_name = ""
--------------------------------------------------------------------------------
/deep_sort/tracker.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | from __future__ import absolute_import
3 | import numpy as np
4 | from . import kalman_filter
5 | from . import linear_assignment
6 | from . import iou_matching
7 | from .track import Track
8 |
9 |
10 | class Tracker:
11 | """
12 | This is the multi-target tracker.
13 |
14 | Parameters
15 | ----------
16 | metric : nn_matching.NearestNeighborDistanceMetric
17 | A distance metric for measurement-to-track association.
18 | max_age : int
19 | Maximum number of missed misses before a track is deleted.
20 | n_init : int
21 | Number of consecutive detections before the track is confirmed. The
22 | track state is set to `Deleted` if a miss occurs within the first
23 | `n_init` frames.
24 |
25 | Attributes
26 | ----------
27 | metric : nn_matching.NearestNeighborDistanceMetric
28 | The distance metric used for measurement to track association.
29 | max_age : int
30 | Maximum number of missed misses before a track is deleted.
31 | n_init : int
32 | Number of frames that a track remains in initialization phase.
33 | kf : kalman_filter.KalmanFilter
34 | A Kalman filter to filter target trajectories in image space.
35 | tracks : List[Track]
36 | The list of active tracks at the current time step.
37 |
38 | """
39 |
40 | def __init__(self, metric, max_iou_distance=0.7, max_age=3, n_init=3): # 빠르게 객체를 지워주기 위해 max_age를 30에서 3으로 변경
41 | self.metric = metric
42 | self.max_iou_distance = max_iou_distance
43 | self.max_age = max_age
44 | self.n_init = n_init
45 |
46 | self.kf = kalman_filter.KalmanFilter()
47 | self.tracks = []
48 | self._next_id = 1
49 |
50 | def predict(self):
51 | """Propagate track state distributions one time step forward.
52 |
53 | This function should be called once every time step, before `update`.
54 | """
55 | for track in self.tracks:
56 | track.predict(self.kf)
57 |
58 |
59 | # # 시작할 때 db에서 찾는 코드
60 | def update(self, detections, face_db, max_face_threshold):
61 | """Perform measurement update and track management.
62 |
63 | Parameters
64 | ----------
65 | detections : List[deep_sort.detection.Detection]
66 | A list of detections at the current time step.
67 |
68 | """
69 | # Run matching cascade.
70 | matches, unmatched_tracks, unmatched_detections = \
71 | self._match(detections)
72 |
73 | # Update track set.
74 | for i in face_db:
75 | face_db[i]["used"] = False # 다 탐지 안된걸로 변경
76 |
77 | for track_idx, detection_idx in matches:
78 | self.tracks[track_idx].update(
79 | self.kf, detections[detection_idx])
80 |
81 | if self.tracks[track_idx].get_face_name() == "":
82 | self.tracks[track_idx].find_face_name(face_db, max_face_threshold)
83 |
84 | for track_idx in unmatched_tracks:
85 | self.tracks[track_idx].mark_missed(face_db) # 못찾으면 face_db에서 지워준다
86 | for detection_idx in unmatched_detections:
87 | self._initiate_track(detections[detection_idx], face_db, max_face_threshold)
88 | self.tracks = [t for t in self.tracks if not t.is_deleted()]
89 |
90 | # Update distance metric.
91 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
92 | features, targets = [], []
93 | for track in self.tracks:
94 | if not track.is_confirmed():
95 | continue
96 | features += track.features
97 | targets += [track.track_id for _ in track.features]
98 | track.features = []
99 | self.metric.partial_fit(
100 | np.asarray(features), np.asarray(targets), active_targets)
101 |
102 | # def update(self, detections):
103 | # """Perform measurement update and track management.
104 |
105 | # Parameters
106 | # ----------
107 | # detections : List[deep_sort.detection.Detection]
108 | # A list of detections at the current time step.
109 |
110 | # """
111 | # # Run matching cascade.
112 | # matches, unmatched_tracks, unmatched_detections = \
113 | # self._match(detections)
114 |
115 | # # Update track set.
116 | # for track_idx, detection_idx in matches:
117 | # self.tracks[track_idx].update(
118 | # self.kf, detections[detection_idx])
119 | # for track_idx in unmatched_tracks:
120 | # self.tracks[track_idx].mark_missed()
121 | # for detection_idx in unmatched_detections:
122 | # self._initiate_track(detections[detection_idx])
123 | # self.tracks = [t for t in self.tracks if not t.is_deleted()]
124 |
125 | # # Update distance metric.
126 | # active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
127 | # features, targets = [], []
128 | # for track in self.tracks:
129 | # if not track.is_confirmed():
130 | # continue
131 | # features += track.features
132 | # targets += [track.track_id for _ in track.features]
133 | # track.features = []
134 | # self.metric.partial_fit(
135 | # np.asarray(features), np.asarray(targets), active_targets)
136 |
137 |
138 | def _match(self, detections):
139 |
140 | def gated_metric(tracks, dets, track_indices, detection_indices):
141 | features = np.array([dets[i].feature for i in detection_indices])
142 | targets = np.array([tracks[i].track_id for i in track_indices])
143 | cost_matrix = self.metric.distance(features, targets)
144 | cost_matrix = linear_assignment.gate_cost_matrix(
145 | self.kf, cost_matrix, tracks, dets, track_indices,
146 | detection_indices)
147 |
148 | return cost_matrix
149 |
150 | # Split track set into confirmed and unconfirmed tracks.
151 | confirmed_tracks = [
152 | i for i, t in enumerate(self.tracks) if t.is_confirmed()]
153 | unconfirmed_tracks = [
154 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
155 |
156 | # Associate confirmed tracks using appearance features.
157 | matches_a, unmatched_tracks_a, unmatched_detections = \
158 | linear_assignment.matching_cascade(
159 | gated_metric, self.metric.matching_threshold, self.max_age,
160 | self.tracks, detections, confirmed_tracks)
161 |
162 | # Associate remaining tracks together with unconfirmed tracks using IOU.
163 | iou_track_candidates = unconfirmed_tracks + [
164 | k for k in unmatched_tracks_a if
165 | self.tracks[k].time_since_update == 1]
166 | unmatched_tracks_a = [
167 | k for k in unmatched_tracks_a if
168 | self.tracks[k].time_since_update != 1]
169 | matches_b, unmatched_tracks_b, unmatched_detections = \
170 | linear_assignment.min_cost_matching(
171 | iou_matching.iou_cost, self.max_iou_distance, self.tracks,
172 | detections, iou_track_candidates, unmatched_detections)
173 |
174 | matches = matches_a + matches_b
175 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
176 | return matches, unmatched_tracks, unmatched_detections
177 |
178 |
179 | # 시작할 때 클래스 초기화 코드
180 | def _initiate_track(self, detection, face_db, max_face_threshold):
181 | mean, covariance = self.kf.initiate(detection.to_xyah())
182 | class_name = detection.get_class()
183 | self.tracks.append(Track(
184 | mean, covariance, self._next_id, self.n_init, self.max_age,
185 | face_db, max_face_threshold, detection.feature, class_name))
186 | self._next_id += 1
187 |
188 | # def _initiate_track(self, detection):
189 | # mean, covariance = self.kf.initiate(detection.to_xyah())
190 | # class_name = detection.get_class()
191 | # self.tracks.append(Track(
192 | # mean, covariance, self._next_id, self.n_init, self.max_age,
193 | # detection.feature, class_name))
194 | # self._next_id += 1
195 |
--------------------------------------------------------------------------------
/deepface/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/__init__.py
--------------------------------------------------------------------------------
/deepface/basemodels/ArcFace.py:
--------------------------------------------------------------------------------
1 | from tensorflow.python.keras import backend
2 | from tensorflow.python.keras.engine import training
3 | from tensorflow.python.keras.utils import data_utils
4 | from tensorflow.python.keras.utils import layer_utils
5 | from tensorflow.python.lib.io import file_io
6 | import tensorflow
7 | from tensorflow import keras
8 |
9 | import os
10 | from pathlib import Path
11 | import gdown
12 |
13 | def loadModel(model_path):
14 | base_model = ResNet34()
15 | inputs = base_model.inputs[0]
16 | arcface_model = base_model.outputs[0]
17 | arcface_model = keras.layers.BatchNormalization(momentum=0.9, epsilon=2e-5)(arcface_model)
18 | arcface_model = keras.layers.Dropout(0.4)(arcface_model)
19 | arcface_model = keras.layers.Flatten()(arcface_model)
20 | arcface_model = keras.layers.Dense(512, activation=None, use_bias=True, kernel_initializer="glorot_normal")(arcface_model)
21 | embedding = keras.layers.BatchNormalization(momentum=0.9, epsilon=2e-5, name="embedding", scale=True)(arcface_model)
22 | # embedding = tensorflow.reshape(embedding, [-1, 512, 1])
23 | # embedding = keras.layers.MaxPooling1D(pool_size=4, strides=4, padding="valid")(embedding)
24 | # embedding = keras.layers.Flatten()(embedding)
25 | model = keras.models.Model(inputs, embedding, name=base_model.name)
26 |
27 | #---------------------------------------
28 | #check the availability of pre-trained weights
29 |
30 | # home = str(Path.home())
31 | url = "https://drive.google.com/uc?id=1LVB3CdVejpmGHM28BpqqkbZP5hDEcdZY"
32 | # file_name = "arcface_weights.h5"
33 | # output = home+'/deepface/weights/'+file_name
34 | # print(output)
35 |
36 |
37 | # if os.path.isfile(model_path) != True:
38 |
39 | # print(file_name," will be downloaded to ",model_path)
40 | # gdown.download(url, model_path, quiet=False)
41 |
42 | #---------------------------------------
43 |
44 | try:
45 | model.load_weights(model_path)
46 | except:
47 | print("pre-trained weights could not be loaded.")
48 | # print("You might try to download it from the url ", url," and copy to ",output," manually")
49 |
50 | return model
51 |
52 | def ResNet34():
53 |
54 | img_input = tensorflow.keras.layers.Input(shape=(112, 112, 3))
55 |
56 | x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name='conv1_pad')(img_input)
57 | x = tensorflow.keras.layers.Conv2D(64, 3, strides=1, use_bias=False, kernel_initializer='glorot_normal', name='conv1_conv')(x)
58 | x = tensorflow.keras.layers.BatchNormalization(axis=3, epsilon=2e-5, momentum=0.9, name='conv1_bn')(x)
59 | x = tensorflow.keras.layers.PReLU(shared_axes=[1, 2], name='conv1_prelu')(x)
60 | x = stack_fn(x)
61 |
62 | model = training.Model(img_input, x, name='ResNet34')
63 |
64 | return model
65 |
66 | def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
67 | bn_axis = 3
68 |
69 | if conv_shortcut:
70 | shortcut = tensorflow.keras.layers.Conv2D(filters, 1, strides=stride, use_bias=False, kernel_initializer='glorot_normal', name=name + '_0_conv')(x)
71 | shortcut = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_0_bn')(shortcut)
72 | else:
73 | shortcut = x
74 |
75 | x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_1_bn')(x)
76 | x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name=name + '_1_pad')(x)
77 | x = tensorflow.keras.layers.Conv2D(filters, 3, strides=1, kernel_initializer='glorot_normal', use_bias=False, name=name + '_1_conv')(x)
78 | x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_2_bn')(x)
79 | x = tensorflow.keras.layers.PReLU(shared_axes=[1, 2], name=name + '_1_prelu')(x)
80 |
81 | x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name=name + '_2_pad')(x)
82 | x = tensorflow.keras.layers.Conv2D(filters, kernel_size, strides=stride, kernel_initializer='glorot_normal', use_bias=False, name=name + '_2_conv')(x)
83 | x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_3_bn')(x)
84 |
85 | x = tensorflow.keras.layers.Add(name=name + '_add')([shortcut, x])
86 | return x
87 |
88 | def stack1(x, filters, blocks, stride1=2, name=None):
89 | x = block1(x, filters, stride=stride1, name=name + '_block1')
90 | for i in range(2, blocks + 1):
91 | x = block1(x, filters, conv_shortcut=False, name=name + '_block' + str(i))
92 | return x
93 |
94 | def stack_fn(x):
95 | x = stack1(x, 64, 3, name='conv2')
96 | x = stack1(x, 128, 4, name='conv3')
97 | x = stack1(x, 256, 6, name='conv4')
98 | return stack1(x, 512, 3, name='conv5')
--------------------------------------------------------------------------------
/deepface/basemodels/Boosting.py:
--------------------------------------------------------------------------------
1 | from deepface import DeepFace
2 | from tqdm import tqdm
3 | import os
4 | from os import path
5 | from pathlib import Path
6 | import numpy as np
7 | import gdown
8 | from deepface.commons import functions, distance as dst
9 |
10 | def loadModel():
11 |
12 | model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']
13 |
14 | model = {}
15 |
16 | model_pbar = tqdm(range(0, 4), desc='Face recognition models')
17 |
18 | for index in model_pbar:
19 |
20 | model_name = model_names[index]
21 |
22 | model_pbar.set_description("Loading %s" % (model_name))
23 | model[model_name] = DeepFace.build_model(model_name)
24 |
25 | return model
26 |
27 | def validate_model(model):
28 | #validate model dictionary because it might be passed from input as pre-trained
29 | found_models = []
30 | for key, value in model.items():
31 | found_models.append(key)
32 |
33 | if ('VGG-Face' in found_models) and ('Facenet' in found_models) and ('OpenFace' in found_models) and ('DeepFace' in found_models):
34 | #print("Ensemble learning will be applied for ", found_models," models")
35 | valid = True
36 | else:
37 |
38 | missing_ones = set(['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']) - set(found_models)
39 |
40 | raise ValueError("You'd like to apply ensemble method and pass pre-built models but models must contain [VGG-Face, Facenet, OpenFace, DeepFace] but you passed "+str(found_models)+". So, you need to pass "+str(missing_ones)+" models as well.")
41 |
42 | def build_gbm():
43 |
44 | #this is not a must dependency
45 | import lightgbm as lgb #lightgbm==2.3.1
46 |
47 | home = str(Path.home())
48 |
49 | if os.path.isfile(home+'/.deepface/weights/face-recognition-ensemble-model.txt') != True:
50 | print("face-recognition-ensemble-model.txt will be downloaded...")
51 | url = 'https://raw.githubusercontent.com/serengil/deepface/master/deepface/models/face-recognition-ensemble-model.txt'
52 | output = home+'/.deepface/weights/face-recognition-ensemble-model.txt'
53 | gdown.download(url, output, quiet=False)
54 |
55 | ensemble_model_path = home+'/.deepface/weights/face-recognition-ensemble-model.txt'
56 |
57 | deepface_ensemble = lgb.Booster(model_file = ensemble_model_path)
58 |
59 | return deepface_ensemble
60 |
--------------------------------------------------------------------------------
/deepface/basemodels/DeepID.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 | import gdown
4 | import zipfile
5 |
6 | from tensorflow import keras
7 | from tensorflow.keras.models import Model
8 | from tensorflow.keras.layers import Conv2D, Activation, Input, Add, MaxPooling2D, Flatten, Dense, Dropout
9 |
10 | #-------------------------------------
11 |
12 | def loadModel(url = 'https://drive.google.com/uc?id=1uRLtBCTQQAvHJ_KVrdbRJiCKxU8m5q2J'):
13 |
14 | myInput = Input(shape=(55, 47, 3))
15 |
16 | x = Conv2D(20, (4, 4), name='Conv1', activation='relu', input_shape=(55, 47, 3))(myInput)
17 | x = MaxPooling2D(pool_size=2, strides=2, name='Pool1')(x)
18 | x = Dropout(rate=0.99, name='D1')(x)
19 |
20 | x = Conv2D(40, (3, 3), name='Conv2', activation='relu')(x)
21 | x = MaxPooling2D(pool_size=2, strides=2, name='Pool2')(x)
22 | x = Dropout(rate=0.99, name='D2')(x)
23 |
24 | x = Conv2D(60, (3, 3), name='Conv3', activation='relu')(x)
25 | x = MaxPooling2D(pool_size=2, strides=2, name='Pool3')(x)
26 | x = Dropout(rate=0.99, name='D3')(x)
27 |
28 | x1 = Flatten()(x)
29 | fc11 = Dense(160, name = 'fc11')(x1)
30 |
31 | x2 = Conv2D(80, (2, 2), name='Conv4', activation='relu')(x)
32 | x2 = Flatten()(x2)
33 | fc12 = Dense(160, name = 'fc12')(x2)
34 |
35 | y = Add()([fc11, fc12])
36 | y = Activation('relu', name = 'deepid')(y)
37 |
38 | model = Model(inputs=[myInput], outputs=y)
39 |
40 | #---------------------------------
41 |
42 | home = str(Path.home())
43 |
44 | if os.path.isfile(home+'/.deepface/weights/deepid_keras_weights.h5') != True:
45 | print("deepid_keras_weights.h5 will be downloaded...")
46 |
47 | output = home+'/.deepface/weights/deepid_keras_weights.h5'
48 | gdown.download(url, output, quiet=False)
49 |
50 | model.load_weights(home+'/.deepface/weights/deepid_keras_weights.h5')
51 |
52 | return model
--------------------------------------------------------------------------------
/deepface/basemodels/DlibResNet.py:
--------------------------------------------------------------------------------
1 | import os
2 | import zipfile
3 | import bz2
4 | import gdown
5 | import numpy as np
6 | from pathlib import Path
7 |
8 | class DlibResNet:
9 |
10 | def __init__(self):
11 |
12 | #this is not a must dependency
13 | import dlib #19.20.0
14 |
15 | self.layers = [DlibMetaData()]
16 |
17 | #---------------------
18 |
19 | home = str(Path.home())
20 | weight_file = home+'/.deepface/weights/dlib_face_recognition_resnet_model_v1.dat'
21 |
22 | #---------------------
23 |
24 | #download pre-trained model if it does not exist
25 | if os.path.isfile(weight_file) != True:
26 | print("dlib_face_recognition_resnet_model_v1.dat is going to be downloaded")
27 |
28 | url = "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2"
29 | output = home+'/.deepface/weights/'+url.split("/")[-1]
30 | gdown.download(url, output, quiet=False)
31 |
32 | zipfile = bz2.BZ2File(output)
33 | data = zipfile.read()
34 | newfilepath = output[:-4] #discard .bz2 extension
35 | open(newfilepath, 'wb').write(data)
36 |
37 | #---------------------
38 |
39 | model = dlib.face_recognition_model_v1(weight_file)
40 | self.__model = model
41 |
42 | #---------------------
43 |
44 | return None #classes must return None
45 |
46 | def predict(self, img_aligned):
47 |
48 | #functions.detectFace returns 4 dimensional images
49 | if len(img_aligned.shape) == 4:
50 | img_aligned = img_aligned[0]
51 |
52 | #functions.detectFace returns bgr images
53 | img_aligned = img_aligned[:,:,::-1] #bgr to rgb
54 |
55 | #deepface.detectFace returns an array in scale of [0, 1] but dlib expects in scale of [0, 255]
56 | if img_aligned.max() <= 1:
57 | img_aligned = img_aligned * 255
58 |
59 | img_aligned = img_aligned.astype(np.uint8)
60 |
61 | model = self.__model
62 |
63 | img_representation = model.compute_face_descriptor(img_aligned)
64 |
65 | img_representation = np.array(img_representation)
66 | img_representation = np.expand_dims(img_representation, axis = 0)
67 |
68 | return img_representation
69 |
70 | class DlibMetaData:
71 | def __init__(self):
72 | self.input_shape = [[1, 150, 150, 3]]
--------------------------------------------------------------------------------
/deepface/basemodels/DlibWrapper.py:
--------------------------------------------------------------------------------
1 | from deepface.basemodels.DlibResNet import DlibResNet
2 |
3 | def loadModel():
4 | return DlibResNet()
--------------------------------------------------------------------------------
/deepface/basemodels/FbDeepFace.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 | import gdown
4 | import zipfile
5 |
6 | from tensorflow import keras
7 | from tensorflow.keras.models import Model, Sequential
8 | from tensorflow.keras.layers import Convolution2D, LocallyConnected2D, MaxPooling2D, Flatten, Dense, Dropout
9 |
10 | #-------------------------------------
11 |
12 | def loadModel(url = 'https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip'):
13 | base_model = Sequential()
14 | base_model.add(Convolution2D(32, (11, 11), activation='relu', name='C1', input_shape=(152, 152, 3)))
15 | base_model.add(MaxPooling2D(pool_size=3, strides=2, padding='same', name='M2'))
16 | base_model.add(Convolution2D(16, (9, 9), activation='relu', name='C3'))
17 | base_model.add(LocallyConnected2D(16, (9, 9), activation='relu', name='L4'))
18 | base_model.add(LocallyConnected2D(16, (7, 7), strides=2, activation='relu', name='L5') )
19 | base_model.add(LocallyConnected2D(16, (5, 5), activation='relu', name='L6'))
20 | base_model.add(Flatten(name='F0'))
21 | base_model.add(Dense(4096, activation='relu', name='F7'))
22 | base_model.add(Dropout(rate=0.5, name='D0'))
23 | base_model.add(Dense(8631, activation='softmax', name='F8'))
24 |
25 | #---------------------------------
26 |
27 | home = str(Path.home())
28 |
29 | if os.path.isfile(home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5') != True:
30 | print("VGGFace2_DeepFace_weights_val-0.9034.h5 will be downloaded...")
31 |
32 | output = home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5.zip'
33 |
34 | gdown.download(url, output, quiet=False)
35 |
36 | #unzip VGGFace2_DeepFace_weights_val-0.9034.h5.zip
37 | with zipfile.ZipFile(output, 'r') as zip_ref:
38 | zip_ref.extractall(home+'/.deepface/weights/')
39 |
40 | base_model.load_weights(home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5')
41 |
42 | #drop F8 and D0. F7 is the representation layer.
43 | deepface_model = Model(inputs=base_model.layers[0].input, outputs=base_model.layers[-3].output)
44 |
45 | return deepface_model
--------------------------------------------------------------------------------
/deepface/basemodels/OpenFace.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 | import gdown
4 |
5 | import tensorflow as tf
6 | from tensorflow import keras
7 | from tensorflow.keras.models import Model, Sequential
8 | from tensorflow.keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
9 | from tensorflow.keras.layers import Dense, Activation, Lambda, Flatten, BatchNormalization
10 | from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D
11 | from tensorflow.keras.models import load_model
12 | from tensorflow.keras import backend as K
13 |
14 | #---------------------------------------
15 |
16 | def loadModel(url = 'https://drive.google.com/uc?id=1LSe1YCV1x-BfNnfb7DFZTNpv_Q9jITxn'):
17 | myInput = Input(shape=(96, 96, 3))
18 |
19 | x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput)
20 | x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)
21 | x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x)
22 | x = Activation('relu')(x)
23 | x = ZeroPadding2D(padding=(1, 1))(x)
24 | x = MaxPooling2D(pool_size=3, strides=2)(x)
25 | x = Lambda(lambda x: tf.nn.lrn(x, alpha=1e-4, beta=0.75), name='lrn_1')(x)
26 | x = Conv2D(64, (1, 1), name='conv2')(x)
27 | x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x)
28 | x = Activation('relu')(x)
29 | x = ZeroPadding2D(padding=(1, 1))(x)
30 | x = Conv2D(192, (3, 3), name='conv3')(x)
31 | x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x)
32 | x = Activation('relu')(x)
33 | x = Lambda(lambda x: tf.nn.lrn(x, alpha=1e-4, beta=0.75), name='lrn_2')(x) #x is equal added
34 | x = ZeroPadding2D(padding=(1, 1))(x)
35 | x = MaxPooling2D(pool_size=3, strides=2)(x)
36 |
37 | # Inception3a
38 | inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x)
39 | inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3)
40 | inception_3a_3x3 = Activation('relu')(inception_3a_3x3)
41 | inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3)
42 | inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3)
43 | inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3)
44 | inception_3a_3x3 = Activation('relu')(inception_3a_3x3)
45 |
46 | inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x)
47 | inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5)
48 | inception_3a_5x5 = Activation('relu')(inception_3a_5x5)
49 | inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5)
50 | inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5)
51 | inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5)
52 | inception_3a_5x5 = Activation('relu')(inception_3a_5x5)
53 |
54 | inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x)
55 | inception_3a_pool = Conv2D(32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool)
56 | inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool)
57 | inception_3a_pool = Activation('relu')(inception_3a_pool)
58 | inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool)
59 |
60 | inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x)
61 | inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1)
62 | inception_3a_1x1 = Activation('relu')(inception_3a_1x1)
63 |
64 | inception_3a = concatenate([inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3)
65 |
66 | # Inception3b
67 | inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a)
68 | inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3)
69 | inception_3b_3x3 = Activation('relu')(inception_3b_3x3)
70 | inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3)
71 | inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3)
72 | inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3)
73 | inception_3b_3x3 = Activation('relu')(inception_3b_3x3)
74 |
75 | inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a)
76 | inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5)
77 | inception_3b_5x5 = Activation('relu')(inception_3b_5x5)
78 | inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5)
79 | inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5)
80 | inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5)
81 | inception_3b_5x5 = Activation('relu')(inception_3b_5x5)
82 |
83 | inception_3b_pool = Lambda(lambda x: x**2, name='power2_3b')(inception_3a)
84 | inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3b_pool)
85 | inception_3b_pool = Lambda(lambda x: x*9, name='mult9_3b')(inception_3b_pool)
86 | inception_3b_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_3b')(inception_3b_pool)
87 | inception_3b_pool = Conv2D(64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool)
88 | inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool)
89 | inception_3b_pool = Activation('relu')(inception_3b_pool)
90 | inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool)
91 |
92 | inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a)
93 | inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1)
94 | inception_3b_1x1 = Activation('relu')(inception_3b_1x1)
95 |
96 | inception_3b = concatenate([inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3)
97 |
98 | # Inception3c
99 | inception_3c_3x3 = Conv2D(128, (1, 1), strides=(1, 1), name='inception_3c_3x3_conv1')(inception_3b)
100 | inception_3c_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_3x3_bn1')(inception_3c_3x3)
101 | inception_3c_3x3 = Activation('relu')(inception_3c_3x3)
102 | inception_3c_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3c_3x3)
103 | inception_3c_3x3 = Conv2D(256, (3, 3), strides=(2, 2), name='inception_3c_3x3_conv'+'2')(inception_3c_3x3)
104 | inception_3c_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_3x3_bn'+'2')(inception_3c_3x3)
105 | inception_3c_3x3 = Activation('relu')(inception_3c_3x3)
106 |
107 | inception_3c_5x5 = Conv2D(32, (1, 1), strides=(1, 1), name='inception_3c_5x5_conv1')(inception_3b)
108 | inception_3c_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_5x5_bn1')(inception_3c_5x5)
109 | inception_3c_5x5 = Activation('relu')(inception_3c_5x5)
110 | inception_3c_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3c_5x5)
111 | inception_3c_5x5 = Conv2D(64, (5, 5), strides=(2, 2), name='inception_3c_5x5_conv'+'2')(inception_3c_5x5)
112 | inception_3c_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_5x5_bn'+'2')(inception_3c_5x5)
113 | inception_3c_5x5 = Activation('relu')(inception_3c_5x5)
114 |
115 | inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b)
116 | inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool)
117 |
118 | inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3)
119 |
120 | #inception 4a
121 | inception_4a_3x3 = Conv2D(96, (1, 1), strides=(1, 1), name='inception_4a_3x3_conv'+'1')(inception_3c)
122 | inception_4a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_3x3_bn'+'1')(inception_4a_3x3)
123 | inception_4a_3x3 = Activation('relu')(inception_4a_3x3)
124 | inception_4a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_4a_3x3)
125 | inception_4a_3x3 = Conv2D(192, (3, 3), strides=(1, 1), name='inception_4a_3x3_conv'+'2')(inception_4a_3x3)
126 | inception_4a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_3x3_bn'+'2')(inception_4a_3x3)
127 | inception_4a_3x3 = Activation('relu')(inception_4a_3x3)
128 |
129 | inception_4a_5x5 = Conv2D(32, (1,1), strides=(1,1), name='inception_4a_5x5_conv1')(inception_3c)
130 | inception_4a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_5x5_bn1')(inception_4a_5x5)
131 | inception_4a_5x5 = Activation('relu')(inception_4a_5x5)
132 | inception_4a_5x5 = ZeroPadding2D(padding=(2,2))(inception_4a_5x5)
133 | inception_4a_5x5 = Conv2D(64, (5,5), strides=(1,1), name='inception_4a_5x5_conv'+'2')(inception_4a_5x5)
134 | inception_4a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_5x5_bn'+'2')(inception_4a_5x5)
135 | inception_4a_5x5 = Activation('relu')(inception_4a_5x5)
136 |
137 | inception_4a_pool = Lambda(lambda x: x**2, name='power2_4a')(inception_3c)
138 | inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4a_pool)
139 | inception_4a_pool = Lambda(lambda x: x*9, name='mult9_4a')(inception_4a_pool)
140 | inception_4a_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_4a')(inception_4a_pool)
141 |
142 | inception_4a_pool = Conv2D(128, (1,1), strides=(1,1), name='inception_4a_pool_conv'+'')(inception_4a_pool)
143 | inception_4a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_pool_bn'+'')(inception_4a_pool)
144 | inception_4a_pool = Activation('relu')(inception_4a_pool)
145 | inception_4a_pool = ZeroPadding2D(padding=(2, 2))(inception_4a_pool)
146 |
147 | inception_4a_1x1 = Conv2D(256, (1, 1), strides=(1, 1), name='inception_4a_1x1_conv'+'')(inception_3c)
148 | inception_4a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_1x1_bn'+'')(inception_4a_1x1)
149 | inception_4a_1x1 = Activation('relu')(inception_4a_1x1)
150 |
151 | inception_4a = concatenate([inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3)
152 |
153 | #inception4e
154 | inception_4e_3x3 = Conv2D(160, (1,1), strides=(1,1), name='inception_4e_3x3_conv'+'1')(inception_4a)
155 | inception_4e_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_3x3_bn'+'1')(inception_4e_3x3)
156 | inception_4e_3x3 = Activation('relu')(inception_4e_3x3)
157 | inception_4e_3x3 = ZeroPadding2D(padding=(1, 1))(inception_4e_3x3)
158 | inception_4e_3x3 = Conv2D(256, (3,3), strides=(2,2), name='inception_4e_3x3_conv'+'2')(inception_4e_3x3)
159 | inception_4e_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_3x3_bn'+'2')(inception_4e_3x3)
160 | inception_4e_3x3 = Activation('relu')(inception_4e_3x3)
161 |
162 | inception_4e_5x5 = Conv2D(64, (1,1), strides=(1,1), name='inception_4e_5x5_conv'+'1')(inception_4a)
163 | inception_4e_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_5x5_bn'+'1')(inception_4e_5x5)
164 | inception_4e_5x5 = Activation('relu')(inception_4e_5x5)
165 | inception_4e_5x5 = ZeroPadding2D(padding=(2, 2))(inception_4e_5x5)
166 | inception_4e_5x5 = Conv2D(128, (5,5), strides=(2,2), name='inception_4e_5x5_conv'+'2')(inception_4e_5x5)
167 | inception_4e_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_5x5_bn'+'2')(inception_4e_5x5)
168 | inception_4e_5x5 = Activation('relu')(inception_4e_5x5)
169 |
170 | inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a)
171 | inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool)
172 |
173 | inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3)
174 |
175 | #inception5a
176 | inception_5a_3x3 = Conv2D(96, (1,1), strides=(1,1), name='inception_5a_3x3_conv'+'1')(inception_4e)
177 | inception_5a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_3x3_bn'+'1')(inception_5a_3x3)
178 | inception_5a_3x3 = Activation('relu')(inception_5a_3x3)
179 | inception_5a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_5a_3x3)
180 | inception_5a_3x3 = Conv2D(384, (3,3), strides=(1,1), name='inception_5a_3x3_conv'+'2')(inception_5a_3x3)
181 | inception_5a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_3x3_bn'+'2')(inception_5a_3x3)
182 | inception_5a_3x3 = Activation('relu')(inception_5a_3x3)
183 |
184 | inception_5a_pool = Lambda(lambda x: x**2, name='power2_5a')(inception_4e)
185 | inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_5a_pool)
186 | inception_5a_pool = Lambda(lambda x: x*9, name='mult9_5a')(inception_5a_pool)
187 | inception_5a_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_5a')(inception_5a_pool)
188 |
189 | inception_5a_pool = Conv2D(96, (1,1), strides=(1,1), name='inception_5a_pool_conv'+'')(inception_5a_pool)
190 | inception_5a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_pool_bn'+'')(inception_5a_pool)
191 | inception_5a_pool = Activation('relu')(inception_5a_pool)
192 | inception_5a_pool = ZeroPadding2D(padding=(1,1))(inception_5a_pool)
193 |
194 | inception_5a_1x1 = Conv2D(256, (1,1), strides=(1,1), name='inception_5a_1x1_conv'+'')(inception_4e)
195 | inception_5a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_1x1_bn'+'')(inception_5a_1x1)
196 | inception_5a_1x1 = Activation('relu')(inception_5a_1x1)
197 |
198 | inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3)
199 |
200 | #inception_5b
201 | inception_5b_3x3 = Conv2D(96, (1,1), strides=(1,1), name='inception_5b_3x3_conv'+'1')(inception_5a)
202 | inception_5b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_3x3_bn'+'1')(inception_5b_3x3)
203 | inception_5b_3x3 = Activation('relu')(inception_5b_3x3)
204 | inception_5b_3x3 = ZeroPadding2D(padding=(1,1))(inception_5b_3x3)
205 | inception_5b_3x3 = Conv2D(384, (3,3), strides=(1,1), name='inception_5b_3x3_conv'+'2')(inception_5b_3x3)
206 | inception_5b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_3x3_bn'+'2')(inception_5b_3x3)
207 | inception_5b_3x3 = Activation('relu')(inception_5b_3x3)
208 |
209 | inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a)
210 |
211 | inception_5b_pool = Conv2D(96, (1,1), strides=(1,1), name='inception_5b_pool_conv'+'')(inception_5b_pool)
212 | inception_5b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_pool_bn'+'')(inception_5b_pool)
213 | inception_5b_pool = Activation('relu')(inception_5b_pool)
214 |
215 | inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool)
216 |
217 | inception_5b_1x1 = Conv2D(256, (1,1), strides=(1,1), name='inception_5b_1x1_conv'+'')(inception_5a)
218 | inception_5b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_1x1_bn'+'')(inception_5b_1x1)
219 | inception_5b_1x1 = Activation('relu')(inception_5b_1x1)
220 |
221 | inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3)
222 |
223 | av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b)
224 | reshape_layer = Flatten()(av_pool)
225 | dense_layer = Dense(128, name='dense_layer')(reshape_layer)
226 | norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer)
227 |
228 | # Final Model
229 | model = Model(inputs=[myInput], outputs=norm_layer)
230 |
231 | #-----------------------------------
232 |
233 | home = str(Path.home())
234 |
235 | if os.path.isfile(home+'/.deepface/weights/openface_weights.h5') != True:
236 | print("openface_weights.h5 will be downloaded...")
237 |
238 | output = home+'/.deepface/weights/openface_weights.h5'
239 | gdown.download(url, output, quiet=False)
240 |
241 | #-----------------------------------
242 |
243 | model.load_weights(home+'/.deepface/weights/openface_weights.h5')
244 |
245 | #-----------------------------------
246 |
247 | return model
--------------------------------------------------------------------------------
/deepface/basemodels/VGGFace.py:
--------------------------------------------------------------------------------
1 | import os
2 | from pathlib import Path
3 | import gdown
4 |
5 | import tensorflow as tf
6 | tf_version = int(tf.__version__.split(".")[0])
7 |
8 | if tf_version == 1:
9 | from keras.models import Model, Sequential
10 | from keras.layers import Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
11 | else:
12 | from tensorflow import keras
13 | from tensorflow.keras.models import Model, Sequential
14 | from tensorflow.keras.layers import Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
15 |
16 | #---------------------------------------
17 |
18 | def baseModel():
19 | model = Sequential()
20 | model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
21 | model.add(Convolution2D(64, (3, 3), activation='relu'))
22 | model.add(ZeroPadding2D((1,1)))
23 | model.add(Convolution2D(64, (3, 3), activation='relu'))
24 | model.add(MaxPooling2D((2,2), strides=(2,2)))
25 |
26 | model.add(ZeroPadding2D((1,1)))
27 | model.add(Convolution2D(128, (3, 3), activation='relu'))
28 | model.add(ZeroPadding2D((1,1)))
29 | model.add(Convolution2D(128, (3, 3), activation='relu'))
30 | model.add(MaxPooling2D((2,2), strides=(2,2)))
31 |
32 | model.add(ZeroPadding2D((1,1)))
33 | model.add(Convolution2D(256, (3, 3), activation='relu'))
34 | model.add(ZeroPadding2D((1,1)))
35 | model.add(Convolution2D(256, (3, 3), activation='relu'))
36 | model.add(ZeroPadding2D((1,1)))
37 | model.add(Convolution2D(256, (3, 3), activation='relu'))
38 | model.add(MaxPooling2D((2,2), strides=(2,2)))
39 |
40 | model.add(ZeroPadding2D((1,1)))
41 | model.add(Convolution2D(512, (3, 3), activation='relu'))
42 | model.add(ZeroPadding2D((1,1)))
43 | model.add(Convolution2D(512, (3, 3), activation='relu'))
44 | model.add(ZeroPadding2D((1,1)))
45 | model.add(Convolution2D(512, (3, 3), activation='relu'))
46 | model.add(MaxPooling2D((2,2), strides=(2,2)))
47 |
48 | model.add(ZeroPadding2D((1,1)))
49 | model.add(Convolution2D(512, (3, 3), activation='relu'))
50 | model.add(ZeroPadding2D((1,1)))
51 | model.add(Convolution2D(512, (3, 3), activation='relu'))
52 | model.add(ZeroPadding2D((1,1)))
53 | model.add(Convolution2D(512, (3, 3), activation='relu'))
54 | model.add(MaxPooling2D((2,2), strides=(2,2)))
55 |
56 | model.add(Convolution2D(4096, (7, 7), activation='relu'))
57 | model.add(Dropout(0.5))
58 | model.add(Convolution2D(4096, (1, 1), activation='relu'))
59 | model.add(Dropout(0.5))
60 | model.add(Convolution2D(2622, (1, 1)))
61 | model.add(Flatten())
62 | model.add(Activation('softmax'))
63 |
64 | return model
65 |
66 | def loadModel(url = 'https://drive.google.com/uc?id=1CPSeum3HpopfomUEK1gybeuIVoeJT_Eo'):
67 |
68 | model = baseModel()
69 |
70 | #-----------------------------------
71 |
72 | home = str(Path.home())
73 | output = home+'/.deepface/weights/vgg_face_weights.h5'
74 |
75 | if os.path.isfile(output) != True:
76 | print("vgg_face_weights.h5 will be downloaded...")
77 | gdown.download(url, output, quiet=False)
78 |
79 | #-----------------------------------
80 |
81 | try:
82 | model.load_weights(output)
83 | except Exception as err:
84 | print(str(err))
85 | print("Pre-trained weight could not be loaded.")
86 | print("You might try to download the pre-trained weights from the url ", url, " and copy it to the ", output)
87 |
88 | #-----------------------------------
89 |
90 | #TO-DO: why?
91 | vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)
92 |
93 | return vgg_face_descriptor
--------------------------------------------------------------------------------
/deepface/basemodels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/basemodels/__init__.py
--------------------------------------------------------------------------------
/deepface/commons/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/commons/__init__.py
--------------------------------------------------------------------------------
/deepface/commons/distance.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def findCosineDistance(source_representation, test_representation):
4 | a = np.matmul(np.transpose(source_representation), test_representation)
5 | b = np.sum(np.multiply(source_representation, source_representation))
6 | c = np.sum(np.multiply(test_representation, test_representation))
7 | return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
8 |
9 | def findEuclideanDistance(source_representation, test_representation):
10 | if type(source_representation) == list:
11 | source_representation = np.array(source_representation)
12 |
13 | if type(test_representation) == list:
14 | test_representation = np.array(test_representation)
15 |
16 | euclidean_distance = source_representation - test_representation
17 | euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
18 | euclidean_distance = np.sqrt(euclidean_distance)
19 | return euclidean_distance
20 |
21 | def l2_normalize(x):
22 | return x / np.sqrt(np.sum(np.multiply(x, x)))
23 |
24 | def findThreshold(model_name, distance_metric):
25 |
26 | base_threshold = {'cosine': 0.40, 'euclidean': 0.55, 'euclidean_l2': 0.75}
27 |
28 | thresholds = {
29 | 'VGG-Face': {'cosine': 0.40, 'euclidean': 0.55, 'euclidean_l2': 0.75},
30 | 'OpenFace': {'cosine': 0.10, 'euclidean': 0.55, 'euclidean_l2': 0.55},
31 | 'Facenet': {'cosine': 0.40, 'euclidean': 10, 'euclidean_l2': 0.80},
32 | 'DeepFace': {'cosine': 0.23, 'euclidean': 64, 'euclidean_l2': 0.64},
33 | 'DeepID': {'cosine': 0.015, 'euclidean': 45, 'euclidean_l2': 0.17},
34 | 'Dlib': {'cosine': 0.07, 'euclidean': 0.6, 'euclidean_l2': 0.6},
35 | 'ArcFace': {'cosine': 0.6871912959056619, 'euclidean': 4.1591468986978075, 'euclidean_l2': 1.1315718048269017}
36 | }
37 |
38 | threshold = thresholds.get(model_name, base_threshold).get(distance_metric, 0.4)
39 |
40 | return threshold
41 |
--------------------------------------------------------------------------------
/deepface/commons/functions.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | import cv2
5 | import base64
6 | from pathlib import Path
7 |
8 | from deepface.detectors import FaceDetector
9 |
10 | import tensorflow as tf
11 | tf_version = int(tf.__version__.split(".")[0])
12 |
13 | if tf_version == 1:
14 | import keras
15 | from keras.preprocessing.image import load_img, save_img, img_to_array
16 | from keras.applications.imagenet_utils import preprocess_input
17 | from keras.preprocessing import image
18 | elif tf_version == 2:
19 | from tensorflow import keras
20 | from tensorflow.keras.preprocessing.image import load_img, save_img, img_to_array
21 | from tensorflow.keras.applications.imagenet_utils import preprocess_input
22 | from tensorflow.keras.preprocessing import image
23 |
24 | #--------------------------------------------------
25 |
26 | def initialize_input(img1_path, img2_path = None):
27 |
28 | if type(img1_path) == list:
29 | bulkProcess = True
30 | img_list = img1_path.copy()
31 | else:
32 | bulkProcess = False
33 |
34 | if (
35 | (type(img2_path) == str and img2_path != None) #exact image path, base64 image
36 | or (isinstance(img2_path, np.ndarray) and img2_path.any()) #numpy array
37 | ):
38 | img_list = [[img1_path, img2_path]]
39 | else: #analyze function passes just img1_path
40 | img_list = [img1_path]
41 |
42 | return img_list, bulkProcess
43 |
44 | def initialize_detector(detector_backend):
45 |
46 | global face_detector
47 | face_detector = FaceDetector.build_model(detector_backend)
48 |
49 | def initializeFolder():
50 |
51 | home = str(Path.home())
52 |
53 | if not os.path.exists(home+"/.deepface"):
54 | os.mkdir(home+"/.deepface")
55 | print("Directory ",home,"/.deepface created")
56 |
57 | if not os.path.exists(home+"/.deepface/weights"):
58 | os.mkdir(home+"/.deepface/weights")
59 | print("Directory ",home,"/.deepface/weights created")
60 |
61 | def loadBase64Img(uri):
62 | encoded_data = uri.split(',')[1]
63 | nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
64 | img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
65 | return img
66 |
67 | def load_image(img):
68 |
69 | exact_image = False
70 | if type(img).__module__ == np.__name__:
71 | exact_image = True
72 |
73 | base64_img = False
74 | if len(img) > 11 and img[0:11] == "data:image/":
75 | base64_img = True
76 |
77 | #---------------------------
78 |
79 | if base64_img == True:
80 | img = loadBase64Img(img)
81 |
82 | elif exact_image != True: #image path passed as input
83 | if os.path.isfile(img) != True:
84 | raise ValueError("Confirm that ",img," exists")
85 |
86 | img = cv2.imread(img)
87 |
88 | return img
89 |
90 | def detect_face(img, detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
91 |
92 | img_region = [0, 0, img.shape[0], img.shape[1]]
93 |
94 | #if functions.preproces_face is called directly, then face_detector global variable might not been initialized.
95 | if not "face_detector" in globals():
96 | initialize_detector(detector_backend = detector_backend)
97 |
98 | detected_face, img_region = FaceDetector.detect_face(face_detector, detector_backend, img, align)
99 |
100 | if (isinstance(detected_face, np.ndarray)):
101 | return detected_face, img_region
102 | else:
103 | if detected_face == None:
104 | if enforce_detection != True:
105 | return img, img_region
106 | else:
107 | raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.")
108 |
109 | def preprocess_face(img, target_size=(224, 224), grayscale = False, enforce_detection = True, detector_backend = 'opencv', return_region = False, align = True):
110 |
111 | #img might be path, base64 or numpy array. Convert it to numpy whatever it is.
112 | img = load_image(img)
113 | base_img = img.copy()
114 |
115 | # img, region = detect_face(img = img, detector_backend = detector_backend, grayscale = grayscale, enforce_detection = enforce_detection, align = align)
116 |
117 | #--------------------------
118 |
119 | if img.shape[0] == 0 or img.shape[1] == 0:
120 | if enforce_detection == True:
121 | raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
122 | else: #restore base image
123 | img = base_img.copy()
124 |
125 | #--------------------------
126 |
127 | #post-processing
128 | if grayscale == True:
129 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
130 |
131 | img = cv2.resize(img, target_size)
132 | #TODO: resize causes transformation on base image, you should add black pixels to rezie it to target_size
133 |
134 | img_pixels = image.img_to_array(img)
135 | img_pixels = np.expand_dims(img_pixels, axis = 0)
136 | img_pixels /= 255 #normalize input in [0, 1]
137 |
138 | if return_region == True:
139 | return img_pixels, region
140 | else:
141 | return img_pixels
142 |
143 | def find_input_shape(model):
144 |
145 | #face recognition models have different size of inputs
146 | #my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.
147 |
148 | input_shape = model.layers[0].input_shape
149 |
150 | if type(input_shape) == list:
151 | input_shape = input_shape[0][1:3]
152 | else:
153 | input_shape = input_shape[1:3]
154 |
155 | if type(input_shape) == list: #issue 197: some people got array here instead of tuple
156 | input_shape = tuple(input_shape)
157 |
158 | return input_shape
159 |
--------------------------------------------------------------------------------
/deepface/detectors/DlibWrapper.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import gdown
3 | import bz2
4 | import os
5 |
6 | def build_model():
7 |
8 | home = str(Path.home())
9 |
10 | import dlib #this requirement is not a must that's why imported here
11 |
12 | #check required file exists in the home/.deepface/weights folder
13 | if os.path.isfile(home+'/.deepface/weights/shape_predictor_5_face_landmarks.dat') != True:
14 |
15 | print("shape_predictor_5_face_landmarks.dat.bz2 is going to be downloaded")
16 |
17 | url = "http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2"
18 | output = home+'/.deepface/weights/'+url.split("/")[-1]
19 |
20 | gdown.download(url, output, quiet=False)
21 |
22 | zipfile = bz2.BZ2File(output)
23 | data = zipfile.read()
24 | newfilepath = output[:-4] #discard .bz2 extension
25 | open(newfilepath, 'wb').write(data)
26 |
27 | face_detector = dlib.get_frontal_face_detector()
28 | sp = dlib.shape_predictor(home+"/.deepface/weights/shape_predictor_5_face_landmarks.dat")
29 |
30 | detector = {}
31 | detector["face_detector"] = face_detector
32 | detector["sp"] = sp
33 | return detector
34 |
35 | def detect_face(detector, img, align = True):
36 |
37 | import dlib #this requirement is not a must that's why imported here
38 |
39 | home = str(Path.home())
40 |
41 | sp = detector["sp"]
42 |
43 | detected_face = None
44 | img_region = [0, 0, img.shape[0], img.shape[1]]
45 |
46 | face_detector = detector["face_detector"]
47 | detections = face_detector(img, 1)
48 |
49 | if len(detections) > 0:
50 |
51 | for idx, d in enumerate(detections):
52 | left = d.left(); right = d.right()
53 | top = d.top(); bottom = d.bottom()
54 | detected_face = img[top:bottom, left:right]
55 | img_region = [left, top, right - left, bottom - top]
56 | break #get the first one
57 |
58 | if align:
59 | img_shape = sp(img, detections[0])
60 | detected_face = dlib.get_face_chip(img, img_shape, size = detected_face.shape[0])
61 |
62 | return detected_face, img_region
63 |
--------------------------------------------------------------------------------
/deepface/detectors/FaceDetector.py:
--------------------------------------------------------------------------------
1 | from deepface.detectors import OpenCvWrapper, SsdWrapper, DlibWrapper, MtcnnWrapper, RetinaFaceWrapper
2 | from PIL import Image
3 | import math
4 | import numpy as np
5 | from deepface.commons import distance
6 |
7 | def build_model(detector_backend):
8 |
9 | backends = {
10 | 'opencv': OpenCvWrapper.build_model,
11 | 'ssd': SsdWrapper.build_model,
12 | 'dlib': DlibWrapper.build_model,
13 | 'mtcnn': MtcnnWrapper.build_model,
14 | 'retinaface': RetinaFaceWrapper.build_model
15 | }
16 |
17 | face_detector = backends.get(detector_backend)
18 |
19 | if face_detector:
20 | face_detector = face_detector()
21 | else:
22 | raise ValueError("invalid detector_backend passed - " + detector_backend)
23 |
24 | return face_detector
25 |
26 | def detect_face(face_detector, detector_backend, img, align = True):
27 |
28 | backends = {
29 | 'opencv': OpenCvWrapper.detect_face,
30 | 'ssd': SsdWrapper.detect_face,
31 | 'dlib': DlibWrapper.detect_face,
32 | 'mtcnn': MtcnnWrapper.detect_face,
33 | 'retinaface': RetinaFaceWrapper.detect_face
34 | }
35 |
36 | detect_face = backends.get(detector_backend)
37 |
38 | if detect_face:
39 | face, region = detect_face(face_detector, img, align)
40 | else:
41 | raise ValueError("invalid detector_backend passed - " + detector_backend)
42 |
43 | return face, region
44 |
45 | def alignment_procedure(img, left_eye, right_eye):
46 |
47 | #this function aligns given face in img based on left and right eye coordinates
48 |
49 | left_eye_x, left_eye_y = left_eye
50 | right_eye_x, right_eye_y = right_eye
51 |
52 | #-----------------------
53 | #find rotation direction
54 |
55 | if left_eye_y > right_eye_y:
56 | point_3rd = (right_eye_x, left_eye_y)
57 | direction = -1 #rotate same direction to clock
58 | else:
59 | point_3rd = (left_eye_x, right_eye_y)
60 | direction = 1 #rotate inverse direction of clock
61 |
62 | #-----------------------
63 | #find length of triangle edges
64 |
65 | a = distance.findEuclideanDistance(np.array(left_eye), np.array(point_3rd))
66 | b = distance.findEuclideanDistance(np.array(right_eye), np.array(point_3rd))
67 | c = distance.findEuclideanDistance(np.array(right_eye), np.array(left_eye))
68 |
69 | #-----------------------
70 |
71 | #apply cosine rule
72 |
73 | if b != 0 and c != 0: #this multiplication causes division by zero in cos_a calculation
74 |
75 | cos_a = (b*b + c*c - a*a)/(2*b*c)
76 | angle = np.arccos(cos_a) #angle in radian
77 | angle = (angle * 180) / math.pi #radian to degree
78 |
79 | #-----------------------
80 | #rotate base image
81 |
82 | if direction == -1:
83 | angle = 90 - angle
84 |
85 | img = Image.fromarray(img)
86 | img = np.array(img.rotate(direction * angle))
87 |
88 | #-----------------------
89 |
90 | return img #return img anyway
91 |
--------------------------------------------------------------------------------
/deepface/detectors/MtcnnWrapper.py:
--------------------------------------------------------------------------------
1 | from mtcnn import MTCNN
2 | import cv2
3 | from deepface.detectors import FaceDetector
4 |
5 | def build_model():
6 | face_detector = MTCNN()
7 | return face_detector
8 |
9 | def detect_face(face_detector, img, align = True):
10 |
11 | detected_face = None
12 | img_region = [0, 0, img.shape[0], img.shape[1]]
13 |
14 | img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #mtcnn expects RGB but OpenCV read BGR
15 | detections = face_detector.detect_faces(img_rgb)
16 |
17 | if len(detections) > 0:
18 | detection = detections[0]
19 | x, y, w, h = detection["box"]
20 | detected_face = img[int(y):int(y+h), int(x):int(x+w)]
21 | img_region = [x, y, w, h]
22 |
23 | keypoints = detection["keypoints"]
24 | left_eye = keypoints["left_eye"]
25 | right_eye = keypoints["right_eye"]
26 |
27 | if align:
28 | detected_face = FaceDetector.alignment_procedure(detected_face, left_eye, right_eye)
29 |
30 | return detected_face, img_region
31 |
--------------------------------------------------------------------------------
/deepface/detectors/OpenCvWrapper.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | import pandas as pd
4 | from deepface.detectors import FaceDetector
5 |
6 | def build_model():
7 |
8 | detector ={}
9 |
10 | detector["face_detector"] = build_cascade('haarcascade')
11 | detector["eye_detector"] = build_cascade('haarcascade_eye')
12 |
13 | return detector
14 |
15 | def build_cascade(model_name = 'haarcascade'):
16 | opencv_path = get_opencv_path()
17 |
18 | if model_name == 'haarcascade':
19 |
20 | face_detector_path = opencv_path+"haarcascade_frontalface_default.xml"
21 |
22 | if os.path.isfile(face_detector_path) != True:
23 | raise ValueError("Confirm that opencv is installed on your environment! Expected path ",face_detector_path," violated.")
24 |
25 |
26 | face_detector = cv2.CascadeClassifier(face_detector_path)
27 | return face_detector
28 |
29 | elif model_name == 'haarcascade_eye':
30 | eye_detector_path = opencv_path+"haarcascade_eye.xml"
31 |
32 | if os.path.isfile(eye_detector_path) != True:
33 | raise ValueError("Confirm that opencv is installed on your environment! Expected path ",eye_detector_path," violated.")
34 |
35 | eye_detector = cv2.CascadeClassifier(eye_detector_path)
36 | return eye_detector
37 |
38 | def detect_face(detector, img, align = True):
39 |
40 | detected_face = None
41 | img_region = [0, 0, img.shape[0], img.shape[1]]
42 |
43 | faces = []
44 | try:
45 | faces = detector["face_detector"].detectMultiScale(img, 1.3, 5)
46 | except:
47 | pass
48 |
49 | if len(faces) > 0:
50 | x,y,w,h = faces[0] #focus on the 1st face found in the image
51 | detected_face = img[int(y):int(y+h), int(x):int(x+w)]
52 |
53 | if align:
54 | detected_face = align_face(detector["eye_detector"], detected_face)
55 | img_region = [x, y, w, h]
56 |
57 | return detected_face, img_region
58 |
59 | def align_face(eye_detector, img):
60 |
61 | detected_face_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #eye detector expects gray scale image
62 |
63 | eyes = eye_detector.detectMultiScale(detected_face_gray)
64 |
65 | if len(eyes) >= 2:
66 |
67 | #find the largest 2 eye
68 |
69 | base_eyes = eyes[:, 2]
70 |
71 | items = []
72 | for i in range(0, len(base_eyes)):
73 | item = (base_eyes[i], i)
74 | items.append(item)
75 |
76 | df = pd.DataFrame(items, columns = ["length", "idx"]).sort_values(by=['length'], ascending=False)
77 |
78 | eyes = eyes[df.idx.values[0:2]] #eyes variable stores the largest 2 eye
79 |
80 | #-----------------------
81 | #decide left and right eye
82 |
83 | eye_1 = eyes[0]; eye_2 = eyes[1]
84 |
85 | if eye_1[0] < eye_2[0]:
86 | left_eye = eye_1; right_eye = eye_2
87 | else:
88 | left_eye = eye_2; right_eye = eye_1
89 |
90 | #-----------------------
91 | #find center of eyes
92 | left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
93 | right_eye = (int(right_eye[0] + (right_eye[2]/2)), int(right_eye[1] + (right_eye[3]/2)))
94 | img = FaceDetector.alignment_procedure(img, left_eye, right_eye)
95 | return img #return img anyway
96 |
97 | def get_opencv_path():
98 | opencv_home = cv2.__file__
99 | folders = opencv_home.split(os.path.sep)[0:-1]
100 |
101 | path = folders[0]
102 | for folder in folders[1:]:
103 | path = path + "/" + folder
104 |
105 | return path+"/data/"
106 |
--------------------------------------------------------------------------------
/deepface/detectors/RetinaFaceWrapper.py:
--------------------------------------------------------------------------------
1 | from retinaface import RetinaFace
2 | import cv2
3 |
4 | def build_model():
5 | face_detector = RetinaFace.build_model()
6 | return face_detector
7 |
8 | def detect_face(face_detector, img, align = True):
9 |
10 | face = None
11 | img_region = [0, 0, img.shape[0], img.shape[1]]
12 |
13 | img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #retinaface expects RGB but OpenCV read BGR
14 |
15 | faces = RetinaFace.extract_faces(img_rgb, model = face_detector, align = align)
16 |
17 | if len(faces) > 0:
18 | face = faces[0][:, :, ::-1]
19 |
20 | return face, img_region
21 |
--------------------------------------------------------------------------------
/deepface/detectors/SsdWrapper.py:
--------------------------------------------------------------------------------
1 | import gdown
2 | from pathlib import Path
3 | import os
4 | import cv2
5 | import pandas as pd
6 |
7 | from deepface.detectors import OpenCvWrapper
8 |
9 | def build_model():
10 |
11 | home = str(Path.home())
12 |
13 | #model structure
14 | if os.path.isfile(home+'/.deepface/weights/deploy.prototxt') != True:
15 |
16 | print("deploy.prototxt will be downloaded...")
17 |
18 | url = "https://github.com/opencv/opencv/raw/3.4.0/samples/dnn/face_detector/deploy.prototxt"
19 |
20 | output = home+'/.deepface/weights/deploy.prototxt'
21 |
22 | gdown.download(url, output, quiet=False)
23 |
24 | #pre-trained weights
25 | if os.path.isfile(home+'/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel') != True:
26 |
27 | print("res10_300x300_ssd_iter_140000.caffemodel will be downloaded...")
28 |
29 | url = "https://github.com/opencv/opencv_3rdparty/raw/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel"
30 |
31 | output = home+'/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel'
32 |
33 | gdown.download(url, output, quiet=False)
34 |
35 | face_detector = cv2.dnn.readNetFromCaffe(
36 | home+"/.deepface/weights/deploy.prototxt",
37 | home+"/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel"
38 | )
39 |
40 | eye_detector = OpenCvWrapper.build_cascade("haarcascade_eye")
41 |
42 | detector = {}
43 | detector["face_detector"] = face_detector
44 | detector["eye_detector"] = eye_detector
45 |
46 | return detector
47 |
48 | def detect_face(detector, img, align = True):
49 |
50 | detected_face = None
51 | img_region = [0, 0, img.shape[0], img.shape[1]]
52 |
53 | ssd_labels = ["img_id", "is_face", "confidence", "left", "top", "right", "bottom"]
54 |
55 | target_size = (300, 300)
56 |
57 | base_img = img.copy() #we will restore base_img to img later
58 |
59 | original_size = img.shape
60 |
61 | img = cv2.resize(img, target_size)
62 |
63 | aspect_ratio_x = (original_size[1] / target_size[1])
64 | aspect_ratio_y = (original_size[0] / target_size[0])
65 |
66 | imageBlob = cv2.dnn.blobFromImage(image = img)
67 |
68 | face_detector = detector["face_detector"]
69 | face_detector.setInput(imageBlob)
70 | detections = face_detector.forward()
71 |
72 | detections_df = pd.DataFrame(detections[0][0], columns = ssd_labels)
73 |
74 | detections_df = detections_df[detections_df['is_face'] == 1] #0: background, 1: face
75 | detections_df = detections_df[detections_df['confidence'] >= 0.90]
76 |
77 | detections_df['left'] = (detections_df['left'] * 300).astype(int)
78 | detections_df['bottom'] = (detections_df['bottom'] * 300).astype(int)
79 | detections_df['right'] = (detections_df['right'] * 300).astype(int)
80 | detections_df['top'] = (detections_df['top'] * 300).astype(int)
81 |
82 | if detections_df.shape[0] > 0:
83 |
84 | #TODO: sort detections_df
85 |
86 | #get the first face in the image
87 | instance = detections_df.iloc[0]
88 |
89 | left = instance["left"]
90 | right = instance["right"]
91 | bottom = instance["bottom"]
92 | top = instance["top"]
93 |
94 | detected_face = base_img[int(top*aspect_ratio_y):int(bottom*aspect_ratio_y), int(left*aspect_ratio_x):int(right*aspect_ratio_x)]
95 | img_region = [int(left*aspect_ratio_x), int(top*aspect_ratio_y), int(right*aspect_ratio_x) - int(left*aspect_ratio_x), int(bottom*aspect_ratio_y) - int(top*aspect_ratio_y)]
96 |
97 | if align:
98 | detected_face = OpenCvWrapper.align_face(detector["eye_detector"], detected_face)
99 |
100 | return detected_face, img_region
101 |
--------------------------------------------------------------------------------
/deepface/detectors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/detectors/__init__.py
--------------------------------------------------------------------------------
/deepface/extendedmodels/Age.py:
--------------------------------------------------------------------------------
1 | from deepface.basemodels import VGGFace
2 | import os
3 | from pathlib import Path
4 | import gdown
5 | import numpy as np
6 |
7 | import tensorflow as tf
8 | tf_version = int(tf.__version__.split(".")[0])
9 |
10 | if tf_version == 1:
11 | import keras
12 | from keras.models import Model, Sequential
13 | from keras.layers import Convolution2D, Flatten, Activation
14 | elif tf_version == 2:
15 | from tensorflow import keras
16 | from tensorflow.keras.models import Model, Sequential
17 | from tensorflow.keras.layers import Convolution2D, Flatten, Activation
18 |
19 | def loadModel():
20 |
21 | model = VGGFace.baseModel()
22 |
23 | #--------------------------
24 |
25 | classes = 101
26 | base_model_output = Sequential()
27 | base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output)
28 | base_model_output = Flatten()(base_model_output)
29 | base_model_output = Activation('softmax')(base_model_output)
30 |
31 | #--------------------------
32 |
33 | age_model = Model(inputs=model.input, outputs=base_model_output)
34 |
35 | #--------------------------
36 |
37 | #load weights
38 |
39 | home = str(Path.home())
40 |
41 | if os.path.isfile(home+'/.deepface/weights/age_model_weights.h5') != True:
42 | print("age_model_weights.h5 will be downloaded...")
43 |
44 | url = 'https://drive.google.com/uc?id=1YCox_4kJ-BYeXq27uUbasu--yz28zUMV'
45 | output = home+'/.deepface/weights/age_model_weights.h5'
46 | gdown.download(url, output, quiet=False)
47 |
48 | age_model.load_weights(home+'/.deepface/weights/age_model_weights.h5')
49 |
50 | return age_model
51 |
52 | #--------------------------
53 |
54 | def findApparentAge(age_predictions):
55 | output_indexes = np.array([i for i in range(0, 101)])
56 | apparent_age = np.sum(age_predictions * output_indexes)
57 | return apparent_age
--------------------------------------------------------------------------------
/deepface/extendedmodels/Emotion.py:
--------------------------------------------------------------------------------
1 | import os
2 | import gdown
3 | from pathlib import Path
4 | import zipfile
5 |
6 | import tensorflow as tf
7 | tf_version = int(tf.__version__.split(".")[0])
8 |
9 | if tf_version == 1:
10 | import keras
11 | from keras.models import Model, Sequential
12 | from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout
13 | elif tf_version == 2:
14 | from tensorflow import keras
15 | from tensorflow.keras.models import Model, Sequential
16 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout
17 |
18 | def loadModel():
19 |
20 | num_classes = 7
21 |
22 | model = Sequential()
23 |
24 | #1st convolution layer
25 | model.add(Conv2D(64, (5, 5), activation='relu', input_shape=(48,48,1)))
26 | model.add(MaxPooling2D(pool_size=(5,5), strides=(2, 2)))
27 |
28 | #2nd convolution layer
29 | model.add(Conv2D(64, (3, 3), activation='relu'))
30 | model.add(Conv2D(64, (3, 3), activation='relu'))
31 | model.add(AveragePooling2D(pool_size=(3,3), strides=(2, 2)))
32 |
33 | #3rd convolution layer
34 | model.add(Conv2D(128, (3, 3), activation='relu'))
35 | model.add(Conv2D(128, (3, 3), activation='relu'))
36 | model.add(AveragePooling2D(pool_size=(3,3), strides=(2, 2)))
37 |
38 | model.add(Flatten())
39 |
40 | #fully connected neural networks
41 | model.add(Dense(1024, activation='relu'))
42 | model.add(Dropout(0.2))
43 | model.add(Dense(1024, activation='relu'))
44 | model.add(Dropout(0.2))
45 |
46 | model.add(Dense(num_classes, activation='softmax'))
47 |
48 | #----------------------------
49 |
50 | home = str(Path.home())
51 |
52 | if os.path.isfile(home+'/.deepface/weights/facial_expression_model_weights.h5') != True:
53 | print("facial_expression_model_weights.h5 will be downloaded...")
54 |
55 | #TO-DO: upload weights to google drive
56 |
57 | #zip
58 | url = 'https://drive.google.com/uc?id=13iUHHP3SlNg53qSuQZDdHDSDNdBP9nwy'
59 | output = home+'/.deepface/weights/facial_expression_model_weights.zip'
60 | gdown.download(url, output, quiet=False)
61 |
62 | #unzip facial_expression_model_weights.zip
63 | with zipfile.ZipFile(output, 'r') as zip_ref:
64 | zip_ref.extractall(home+'/.deepface/weights/')
65 |
66 | model.load_weights(home+'/.deepface/weights/facial_expression_model_weights.h5')
67 |
68 | return model
69 |
70 | #----------------------------
71 |
72 | return 0
--------------------------------------------------------------------------------
/deepface/extendedmodels/Gender.py:
--------------------------------------------------------------------------------
1 | from deepface.basemodels import VGGFace
2 | import os
3 | from pathlib import Path
4 | import gdown
5 | import numpy as np
6 |
7 | import tensorflow as tf
8 | tf_version = int(tf.__version__.split(".")[0])
9 |
10 | if tf_version == 1:
11 | from keras.models import Model, Sequential
12 | from keras.layers import Convolution2D, Flatten, Activation
13 | elif tf_version == 2:
14 | from tensorflow.keras.models import Model, Sequential
15 | from tensorflow.keras.layers import Convolution2D, Flatten, Activation
16 |
17 | def loadModel():
18 |
19 | model = VGGFace.baseModel()
20 |
21 | #--------------------------
22 |
23 | classes = 2
24 | base_model_output = Sequential()
25 | base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output)
26 | base_model_output = Flatten()(base_model_output)
27 | base_model_output = Activation('softmax')(base_model_output)
28 |
29 | #--------------------------
30 |
31 | gender_model = Model(inputs=model.input, outputs=base_model_output)
32 |
33 | #--------------------------
34 |
35 | #load weights
36 |
37 | home = str(Path.home())
38 |
39 | if os.path.isfile(home+'/.deepface/weights/gender_model_weights.h5') != True:
40 | print("gender_model_weights.h5 will be downloaded...")
41 |
42 | url = 'https://drive.google.com/uc?id=1wUXRVlbsni2FN9-jkS_f4UTUrm1bRLyk'
43 | output = home+'/.deepface/weights/gender_model_weights.h5'
44 | gdown.download(url, output, quiet=False)
45 |
46 | gender_model.load_weights(home+'/.deepface/weights/gender_model_weights.h5')
47 |
48 | return gender_model
49 |
50 | #--------------------------
--------------------------------------------------------------------------------
/deepface/extendedmodels/Race.py:
--------------------------------------------------------------------------------
1 | from deepface.basemodels import VGGFace
2 |
3 | import os
4 | from pathlib import Path
5 | import gdown
6 | import numpy as np
7 | import zipfile
8 |
9 | import tensorflow as tf
10 | tf_version = int(tf.__version__.split(".")[0])
11 |
12 | if tf_version == 1:
13 | from keras.models import Model, Sequential
14 | from keras.layers import Convolution2D, Flatten, Activation
15 | elif tf_version == 2:
16 | from tensorflow.keras.models import Model, Sequential
17 | from tensorflow.keras.layers import Convolution2D, Flatten, Activation
18 |
19 | def loadModel():
20 |
21 | model = VGGFace.baseModel()
22 |
23 | #--------------------------
24 |
25 | classes = 6
26 | base_model_output = Sequential()
27 | base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output)
28 | base_model_output = Flatten()(base_model_output)
29 | base_model_output = Activation('softmax')(base_model_output)
30 |
31 | #--------------------------
32 |
33 | race_model = Model(inputs=model.input, outputs=base_model_output)
34 |
35 | #--------------------------
36 |
37 | #load weights
38 |
39 | home = str(Path.home())
40 |
41 | if os.path.isfile(home+'/.deepface/weights/race_model_single_batch.h5') != True:
42 | print("race_model_single_batch.h5 will be downloaded...")
43 |
44 | #zip
45 | url = 'https://drive.google.com/uc?id=1nz-WDhghGQBC4biwShQ9kYjvQMpO6smj'
46 | output = home+'/.deepface/weights/race_model_single_batch.zip'
47 | gdown.download(url, output, quiet=False)
48 |
49 | #unzip race_model_single_batch.zip
50 | with zipfile.ZipFile(output, 'r') as zip_ref:
51 | zip_ref.extractall(home+'/.deepface/weights/')
52 |
53 | race_model.load_weights(home+'/.deepface/weights/race_model_single_batch.h5')
54 |
55 | return race_model
56 |
57 | #--------------------------
58 |
--------------------------------------------------------------------------------
/deepface/extendedmodels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/extendedmodels/__init__.py
--------------------------------------------------------------------------------
/deepface/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/models/__init__.py
--------------------------------------------------------------------------------
/detection.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/detection.txt
--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
1 | import motmetrics as mm
2 | import numpy as np
3 | import os
4 | from absl import app, flags, logging
5 | from absl.flags import FLAGS
6 |
7 | """
8 | python evaluation.py \
9 | --gt_file_path ./resources/gt/T-ara_gt.txt \
10 | --pred_file_path ./resources/gt/T-ara_pred.txt
11 |
12 | python evaluation.py \
13 | --gt_file_path ./resources/gt/GirlsAloud_gt.txt \
14 | --pred_file_path ./resources/gt/GirlsAloud_pred.txt
15 |
16 | python evaluation.py \
17 | --gt_file_path ./resources/gt/Darling_gt.txt \
18 | --pred_file_path ./resources/gt/Darling_pred.txt
19 |
20 | python evaluation.py \
21 | --gt_file_path ./resources/gt/Westlife_gt.txt \
22 | --pred_file_path ./resources/gt/Westlife_pred.txt
23 |
24 | python evaluation.py \
25 | --gt_file_path ./resources/gt/BrunoMars_gt.txt \
26 | --pred_file_path ./resources/gt/BrunoMars_pred.txt
27 |
28 | python evaluation.py \
29 | --gt_file_path ./resources/gt/HelloBubble_gt.txt \
30 | --pred_file_path ./resources/gt/HelloBubble_pred.txt
31 |
32 | python evaluation.py \
33 | --gt_file_path ./resources/gt/Apink_gt.txt \
34 | --pred_file_path ./resources/gt/Apink_pred.txt
35 | """
36 |
37 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to gt txt')
38 | flags.DEFINE_string('pred_file_path', './resources/gt/T-ara_pred.txt', 'path to predicted txt')
39 |
40 | def main(args):
41 |
42 | # home = os.getcwd()
43 | # gt_path = os.path.join(home, "resources", "gt")
44 |
45 | # gt_file_path = os.path.join(gt_path, "T-ara_gt.txt")
46 | # pred_file_path = os.path.join(gt_path, "T-ara_pred.txt")
47 |
48 | f = open(FLAGS.gt_file_path, "r")
49 | gt = []
50 | while True:
51 | line = f.readline()
52 | if not line: break
53 | a = list(map(int, line.split()))
54 | gt.append(a)
55 | gt = np.asarray(gt)
56 | f.close()
57 |
58 | f = open(FLAGS.pred_file_path, "r")
59 | pred = []
60 | while True:
61 | line = f.readline()
62 | if not line: break
63 | a = list(map(int, line.split()))
64 | pred.append(a)
65 | pred = np.asarray(pred)
66 | f.close()
67 |
68 | acc = mm.MOTAccumulator(auto_id=True)
69 | frame_idx = 0
70 | count = 0
71 | max_index = max(max(gt[:, 0]), max(pred[:, 0]))
72 |
73 | while frame_idx <= max_index:
74 | frame_idx += 1
75 |
76 | gt_indexs = gt[:, 0]
77 | pred_indexs = pred[:, 0]
78 |
79 | mask1 = frame_idx == gt_indexs
80 | mask2 = frame_idx == pred_indexs
81 |
82 | # if not gt[mask1].shape[0] and not pred[mask2].shape[0]:
83 | # break
84 |
85 | # gt_ids = sorted(list(set(gt[mask1][:, 1])))
86 | # pred_ids = sorted(list(set(pred[mask2][:, 1])))
87 |
88 | gt_ids = gt[mask1][:, 1]
89 | pred_ids = pred[mask2][:, 1]
90 | # print(gt_ids)
91 | # print(pred_ids)
92 |
93 | a = gt[mask1][:, 2:]
94 | b = pred[mask2][:, 2:]
95 | # print(mm.distances.iou_matrix(a, b, max_iou=0.5))
96 |
97 | f = acc.update(
98 | gt_ids,
99 | pred_ids,
100 | mm.distances.iou_matrix(a, b, max_iou=0.5)
101 | )
102 | # print(mm.distances.iou_matrix(a, b, max_iou=0.5))
103 | # print(acc.mot_events.loc[f])
104 |
105 |
106 | mh = mm.metrics.create()
107 | custom_metric = [
108 | "num_frames",
109 | "obj_frequencies",
110 | "pred_frequencies",
111 | "num_matches",
112 | "num_switches",
113 | "num_transfer",
114 | "num_ascend",
115 | "num_migrate",
116 | "num_false_positives",
117 | "num_misses",
118 | "num_detections",
119 | "num_objects",
120 | "num_predictions",
121 | "num_unique_objects",
122 | "track_ratios",
123 | "mostly_tracked",
124 | "partially_tracked",
125 | "mostly_lost",
126 | "num_fragmentations",
127 | "motp",
128 | "mota",
129 | "precision",
130 | "recall",
131 | ]
132 | summary = mh.compute_many(
133 | [acc, acc.mot_events],
134 | metrics=mm.metrics.motchallenge_metrics,
135 | )
136 |
137 | strsummary = mm.io.render_summary(
138 | summary,
139 | formatters=mh.formatters,
140 | namemap=mm.io.motchallenge_metric_names
141 | )
142 |
143 | print(strsummary)
144 |
145 |
146 | if __name__ == '__main__':
147 | try:
148 | app.run(main)
149 | except SystemExit:
150 | pass
151 |
152 |
--------------------------------------------------------------------------------
/generate_face.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import os
3 | import numpy as np
4 | from absl import app, flags, logging
5 | from absl.flags import FLAGS
6 |
7 |
8 |
9 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to crop gt file')
10 | flags.DEFINE_string('video_file_path', './resources/video/in/T-ara.mov', 'path to video file')
11 | flags.DEFINE_string('face_data_path', './resources/database/T-ara', 'path to video file')
12 |
13 | """
14 | python generate_face.py \
15 | --gt_file_path ./resources/gt/T-ara_gt.txt \
16 | --video_file_path ./resources/video/in/T-ara.mov \
17 | --face_data_path ./resources/database/T-ara
18 |
19 | python generate_face.py \
20 | --gt_file_path ./resources/gt/GirlsAloud_gt.txt \
21 | --video_file_path ./resources/video/in/GirlsAloud.mp4 \
22 | --face_data_path ./resources/database/GirlsAloud
23 |
24 | python generate_face.py \
25 | --gt_file_path ./resources/gt/Darling_gt.txt \
26 | --video_file_path ./resources/video/in/Darling.mp4 \
27 | --face_data_path ./resources/database/Darling
28 |
29 | python generate_face.py \
30 | --gt_file_path ./resources/gt/Westlife_gt.txt \
31 | --video_file_path ./resources/video/in/Westlife.mp4 \
32 | --face_data_path ./resources/database/Westlife
33 |
34 | python generate_face.py \
35 | --gt_file_path ./resources/gt/BrunoMars_gt.txt \
36 | --video_file_path ./resources/video/in/BrunoMars.mp4 \
37 | --face_data_path ./resources/database/BrunoMars
38 |
39 | python generate_face.py \
40 | --gt_file_path ./resources/gt/HelloBubble_gt.txt \
41 | --video_file_path ./resources/video/in/HelloBubble.mp4 \
42 | --face_data_path ./resources/database/HelloBubble
43 |
44 | python generate_face.py \
45 | --gt_file_path ./resources/gt/Apink_gt.txt \
46 | --video_file_path ./resources/video/in/Apink.mp4 \
47 | --face_data_path ./resources/database/Apink
48 |
49 | """
50 | def main(args):
51 | f = open(FLAGS.gt_file_path, "r")
52 | detections = []
53 | while True:
54 | line = f.readline()
55 | if not line: break
56 | a = list(map(int, line.split()))
57 | detections.append(a)
58 | detections = np.asarray(detections)
59 | f.close()
60 |
61 | if not os.path.isdir(FLAGS.face_data_path):
62 | os.mkdir(FLAGS.face_data_path)
63 |
64 | vid = cv2.VideoCapture(FLAGS.video_file_path)
65 | frame_index = -1
66 | count = 0
67 | frame_indices = detections[:, 0].astype(np.int)
68 |
69 | object_dict = dict()
70 |
71 | while True:
72 | frame_index += 1
73 | print(f'{frame_index} frame is working on...')
74 | _, img = vid.read()
75 |
76 | if img is None:
77 | logging.warning("Empty Frame")
78 | count+=1
79 | if count < 3:
80 | continue
81 | else:
82 | break
83 |
84 | mask = frame_indices == frame_index
85 |
86 |
87 |
88 | for row in detections[mask]:
89 | frame, id, bbox = row[0], row[1], row[2:]
90 |
91 | if object_dict.get(id):
92 | file_name = object_dict[id]
93 | object_dict[id] += 1
94 | else:
95 | object_dict[id] = 1
96 | file_name = object_dict[id]
97 |
98 | if object_dict[id] % 10 != 0:
99 | continue
100 |
101 | # target_aspect = float(img.shape[1]) / img.shape[0]
102 | # new_width = target_aspect * bbox[3]
103 | # bbox[0] -= (new_width - bbox[2]) / 2
104 | # bbox[2] = new_width
105 | bbox[2:] += bbox[:2]
106 | bbox = bbox.astype(np.int)
107 |
108 | bbox[:2] = np.maximum(0, bbox[:2])
109 | bbox[2:] = np.minimum(np.asarray(img.shape[:2][::-1]) - 1, bbox[2:])
110 |
111 | sx, sy, ex, ey = bbox
112 | # print(bbox)
113 | # print(img.shape)
114 | image = img[sy:ey, sx:ex]
115 |
116 | output_path = os.path.join(FLAGS.face_data_path, str(id))
117 | if not os.path.isdir(output_path):
118 | os.mkdir(output_path)
119 |
120 | cv2.imwrite(os.path.join(FLAGS.face_data_path, str(id), str(object_dict[id])+".jpg"), image)
121 |
122 |
123 |
124 | # frame_indices = detection_mat[:, 0].astype(np.int)
125 | # mask = frame_indices == frame_idx
126 |
127 | # detection_list = []
128 | # for row in detection_mat[mask]:
129 | # bbox, confidence, feature = row[2:6], row[6], row[10:]
130 | # if bbox[3] < min_height:
131 | # continue
132 | # detection_list.append(Detection(bbox, confidence, feature))
133 | # return detection_list
134 |
135 |
136 |
137 | if __name__ == '__main__':
138 | try:
139 | app.run(main)
140 | except SystemExit:
141 | pass
142 |
143 |
144 |
--------------------------------------------------------------------------------
/object_tracker copy.py:
--------------------------------------------------------------------------------
1 | import time
2 | from absl import app, flags, logging
3 | from absl.flags import FLAGS
4 |
5 | import tensorflow as tf
6 | import numpy as np
7 | import cv2
8 | import tensorflow as tf
9 | import matplotlib.pyplot as plt
10 | from yolov3_tf2.models import (
11 | YoloV3, YoloV3Tiny
12 | )
13 | from yolov3_tf2.dataset import transform_images
14 | from yolov3_tf2.utils import draw_outputs, convert_boxes
15 |
16 | from deep_sort import preprocessing
17 | from deep_sort import nn_matching
18 | from deep_sort.detection import Detection
19 | from deep_sort.tracker import Tracker
20 | from tools import generate_detections as gdet
21 | from PIL import Image
22 |
23 |
24 | flags.DEFINE_string('classes', './model_data/labels/coco.names', 'path to classes file')
25 | flags.DEFINE_string('weights', './weights/yolov3.tf',
26 | 'path to weights file')
27 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
28 | flags.DEFINE_integer('size', 416, 'resize images to')
29 | flags.DEFINE_string('video', './resources/video/in/test.mp4',
30 | 'path to video file or number for webcam)')
31 | flags.DEFINE_string('output', None, 'path to output video')
32 | flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
33 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
34 |
35 |
36 | def main(_argv):
37 | # Definition of the parameters
38 | max_cosine_distance = 0.5
39 | nn_budget = None
40 | nms_max_overlap = 1.0
41 |
42 | #initialize deep sort
43 | model_filename = 'weights/mars-small128.pb'
44 | encoder = gdet.create_box_encoder(model_filename, batch_size=128)
45 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
46 | tracker = Tracker(metric)
47 |
48 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
49 | for physical_device in physical_devices:
50 | tf.config.experimental.set_memory_growth(physical_device, True)
51 |
52 | if FLAGS.tiny:
53 | yolo = YoloV3Tiny(classes=FLAGS.num_classes)
54 | else:
55 | yolo = YoloV3(classes=FLAGS.num_classes)
56 |
57 | yolo.load_weights(FLAGS.weights)
58 | logging.info('weights loaded')
59 |
60 | class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
61 | logging.info('classes loaded')
62 |
63 | times = []
64 |
65 | try:
66 | vid = cv2.VideoCapture(int(FLAGS.video))
67 | except:
68 | vid = cv2.VideoCapture(FLAGS.video)
69 |
70 | out = None
71 |
72 | if FLAGS.output:
73 | # by default VideoCapture returns float instead of int
74 | width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
75 | height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
76 | fps = int(vid.get(cv2.CAP_PROP_FPS))
77 | codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
78 | out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
79 | list_file = open('detection.txt', 'w')
80 | frame_index = -1
81 |
82 | fps = 0.0
83 | count = 0
84 |
85 | while True:
86 | _, img = vid.read()
87 |
88 | if img is None:
89 | logging.warning("Empty Frame")
90 | time.sleep(0.1)
91 | count+=1
92 | if count < 3:
93 | continue
94 | else:
95 | break
96 |
97 | img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
98 | img_in = tf.expand_dims(img_in, 0)
99 | img_in = transform_images(img_in, FLAGS.size)
100 |
101 | t1 = time.time()
102 | boxes, scores, classes, nums = yolo.predict(img_in)
103 | # print(boxes, scores, classes, nums)
104 | t2 = time.time()
105 | times.append(t2-t1)
106 | print(f'yolo predict time : {t2-t1}')
107 | times = times[-20:]
108 |
109 | t3 = time.time()
110 | #############
111 | classes = classes[0]
112 | names = []
113 | for i in range(len(classes)):
114 | names.append(class_names[int(classes[i])])
115 | names = np.array(names)
116 | converted_boxes = convert_boxes(img, boxes[0])
117 | features = encoder(img, converted_boxes)
118 | detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)]
119 |
120 | t4 = time.time()
121 | print(f'feature generation time : {t4-t3}')
122 |
123 | #initialize color map
124 | cmap = plt.get_cmap('tab20b')
125 | colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
126 |
127 | # run non-maxima suppresion
128 | boxs = np.array([d.tlwh for d in detections])
129 | scores = np.array([d.confidence for d in detections])
130 | classes = np.array([d.class_name for d in detections])
131 | indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
132 | detections = [detections[i] for i in indices]
133 |
134 | t5 = time.time()
135 | # Call the tracker
136 | tracker.predict()
137 | tracker.update(detections)
138 | t6 = time.time()
139 | print(f'tracking time : {t6-t5}')
140 |
141 | for track in tracker.tracks:
142 | if not track.is_confirmed() or track.time_since_update > 1:
143 | continue
144 | bbox = track.to_tlbr()
145 | class_name = track.get_class()
146 | color = colors[int(track.track_id) % len(colors)]
147 | color = [i * 255 for i in color]
148 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
149 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
150 | cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
151 |
152 | #######
153 | fps = ( fps + (1./(time.time()-t1)) ) / 2
154 | # img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
155 | # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
156 | # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
157 | img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
158 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2)
159 | if FLAGS.output:
160 | out.write(img)
161 | frame_index = frame_index + 1
162 | list_file.write(str(frame_index)+' ')
163 | if len(converted_boxes) != 0:
164 | for i in range(0,len(converted_boxes)):
165 | list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
166 | list_file.write('\n')
167 | cv2.imshow('output', img)
168 | if cv2.waitKey(1) == ord('q'):
169 | break
170 |
171 | cv2.destroyAllWindows()
172 |
173 |
174 | if __name__ == '__main__':
175 | try:
176 | app.run(main)
177 | except SystemExit:
178 | pass
179 |
--------------------------------------------------------------------------------
/object_tracker.py:
--------------------------------------------------------------------------------
1 | import time
2 | import os
3 | from absl import app, flags, logging
4 | from absl.flags import FLAGS
5 |
6 | from mtcnn import MTCNN
7 | import tensorflow as tf
8 | import numpy as np
9 | import cv2
10 | import matplotlib.pyplot as plt
11 | from yolov3_tf2.models import (
12 | YoloV3, YoloV3Tiny
13 | )
14 | from yolov3_tf2.dataset import transform_images
15 | from yolov3_tf2.utils import draw_outputs, convert_boxes
16 |
17 | from deep_sort import preprocessing
18 | from deep_sort import nn_matching
19 | from deep_sort.detection import Detection
20 | from deep_sort.tracker import Tracker
21 | from tools import generate_detections as gdet
22 | from PIL import Image
23 |
24 | gpus = tf.config.experimental.list_physical_devices('GPU')
25 | if gpus:
26 | try:
27 | # Currently, memory growth needs to be the same across GPUs
28 | for gpu in gpus:
29 | tf.config.experimental.set_memory_growth(gpu, True)
30 | logical_gpus = tf.config.experimental.list_logical_devices('GPU')
31 | print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
32 | except RuntimeError as e:
33 | # Memory growth must be set before GPUs have been initialized
34 | print(e)
35 |
36 | """
37 | python object_tracker.py \
38 | --classes ./model_data/labels/widerface.names \
39 | --video 0 \
40 | --weights ./weights/yolov3-wider_16000.tf \
41 | --num_classes 1 \
42 | --output_format MP4V \
43 | --output ./resources/video/out/myface.mp4 \
44 |
45 | python object_tracker.py \
46 | --classes ./model_data/labels/widerface.names \
47 | --video ./resources/video/in/2.mp4 \
48 | --weights ./weights/yolov3-wider_16000.tf \
49 | --output_format MP4V \
50 | --database ./resources/database/2 \
51 | --output ./resources/video/out/2.mp4 \
52 | --num_classes 1 \
53 | --max_face_threshold 0.6871912959056619
54 |
55 | python object_tracker.py \
56 | --classes ./model_data/labels/widerface.names \
57 | --video ./resources/video/in/T-ara.mov \
58 | --weights ./weights/yolov3-wider_16000.tf \
59 | --output_format MP4V \
60 | --database ./resources/database/T-ara \
61 | --output ./resources/video/out/T-ara.mp4 \
62 | --num_classes 1 \
63 | --max_face_threshold 0.6871912959056619 \
64 | --eval ./resources/gt/T-ara_pred.txt
65 |
66 | python object_tracker.py \
67 | --classes ./model_data/labels/widerface.names \
68 | --video ./resources/video/in/BrunoMars.mp4 \
69 | --weights ./weights/yolov3-wider_16000.tf \
70 | --output_format MP4V \
71 | --database ./resources/database/BrunoMars \
72 | --output ./resources/video/out/BrunoMars.mp4 \
73 | --num_classes 1 \
74 | --max_face_threshold 0.6871912959056619 \
75 | --eval ./resources/gt/BrunoMars_pred.txt
76 |
77 | python object_tracker.py \
78 | --classes ./model_data/labels/widerface.names \
79 | --video ./resources/video/in/Darling.mp4 \
80 | --weights ./weights/yolov3-wider_16000.tf \
81 | --output_format MP4V \
82 | --database ./resources/database/Darling \
83 | --output ./resources/video/out/Darling.mp4 \
84 | --num_classes 1 \
85 | --max_face_threshold 0.6871912959056619 \
86 | --eval ./resources/gt/Darling_pred.txt
87 |
88 | python object_tracker.py \
89 | --classes ./model_data/labels/widerface.names \
90 | --video ./resources/video/in/GirlsAloud.mp4 \
91 | --weights ./weights/yolov3-wider_16000.tf \
92 | --output_format MP4V \
93 | --database ./resources/database/GirlsAloud \
94 | --output ./resources/video/out/GirlsAloud.mp4 \
95 | --num_classes 1 \
96 | --max_face_threshold 0.6871912959056619 \
97 | --eval ./resources/gt/GirlsAloud_pred.txt
98 |
99 | python object_tracker.py \
100 | --classes ./model_data/labels/widerface.names \
101 | --video ./resources/video/in/HelloBubble.mp4 \
102 | --weights ./weights/yolov3-wider_16000.tf \
103 | --output_format MP4V \
104 | --database ./resources/database/HelloBubble \
105 | --output ./resources/video/out/HelloBubble.mp4 \
106 | --num_classes 1 \
107 | --max_face_threshold 0.6871912959056619 \
108 | --eval ./resources/gt/HelloBubble_pred.txt
109 |
110 | python object_tracker.py \
111 | --classes ./model_data/labels/widerface.names \
112 | --video ./resources/video/in/Westlife.mp4 \
113 | --weights ./weights/yolov3-wider_16000.tf \
114 | --output_format MP4V \
115 | --database ./resources/database/Westlife \
116 | --output ./resources/video/out/Westlife.mp4 \
117 | --num_classes 1 \
118 | --max_face_threshold 0.6871912959056619 \
119 | --eval ./resources/gt/Westlife_pred.txt
120 |
121 | python object_tracker.py \
122 | --classes ./model_data/labels/widerface.names \
123 | --video ./resources/video/in/Apink.mp4 \
124 | --weights ./weights/yolov3-wider_16000.tf \
125 | --output_format MP4V \
126 | --database ./resources/database/Apink \
127 | --output ./resources/video/out/Apink.mp4 \
128 | --num_classes 1 \
129 | --max_face_threshold 0.6871912959056619 \
130 | --eval ./resources/gt/Apink_pred.txt
131 | """
132 |
133 |
134 | flags.DEFINE_string('classes', './model_data/labels/widerface.names', 'path to classes file')
135 | flags.DEFINE_string('weights', './weights/yolov3-wider_16000.tf',
136 | 'path to weights file')
137 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
138 | flags.DEFINE_integer('size', 416, 'resize images to')
139 | flags.DEFINE_string('video', './resources/video/in/1.mp4',
140 | 'path to video file or number for webcam)')
141 | flags.DEFINE_string('database', './resources/database/1',
142 | 'path to database file for identification)')
143 | flags.DEFINE_string('output', './resources/video/out/1.mp4', 'path to output video')
144 | flags.DEFINE_string('output_format', 'MP4V', 'codec used in VideoWriter when saving video to file')
145 | flags.DEFINE_integer('num_classes', 1, 'number of classes in the model')
146 | flags.DEFINE_float('max_face_threshold', 0.6871912959056619, 'face threshold')
147 | flags.DEFINE_string('eval', "./resources/gt/1_pred.txt", 'txt file path for evaluation')
148 |
149 |
150 | def main(_argv):
151 | # set present path
152 | home = os.getcwd()
153 |
154 | # Definition of the parameters
155 | max_cosine_distance = 0.5
156 | nn_budget = None
157 | nms_max_overlap = 1.0
158 |
159 | #initialize deep sort
160 | # model_filename = 'weights/mars-small128.pb'
161 | model_filename = os.path.join(home, "weights", "arcface_weights.h5")
162 | encoder = gdet.create_box_encoder(model_filename, batch_size=128)
163 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
164 | tracker = Tracker(metric)
165 |
166 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
167 | for physical_device in physical_devices:
168 | tf.config.experimental.set_memory_growth(physical_device, True)
169 |
170 | if FLAGS.tiny:
171 | yolo = YoloV3Tiny(classes=FLAGS.num_classes)
172 | else:
173 | yolo = YoloV3(classes=FLAGS.num_classes)
174 |
175 | yolo.load_weights(FLAGS.weights)
176 | logging.info('weights loaded')
177 |
178 | class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
179 | logging.info('classes loaded')
180 |
181 | times = []
182 |
183 |
184 | # Database 생성
185 | face_db = dict()
186 |
187 | db_path = FLAGS.database
188 | for name in os.listdir(db_path):
189 | name_path = os.path.join(db_path, name)
190 | name_db = []
191 | for i in os.listdir(name_path):
192 | if i.split(".")[1] != "jpg": continue
193 | id_path = os.path.join(name_path, i)
194 | img = cv2.imread(id_path)
195 | # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
196 | # img_in = tf.expand_dims(img_in, 0)
197 | # img_in = transform_images(img_in, FLAGS.size)
198 | # boxes, scores, classes, nums = yolo.predict(img_in)
199 | boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]])
200 | scores = np.asarray([[1]])
201 | converted_boxes = convert_boxes(img, boxes, scores)
202 | features = encoder(img, converted_boxes)
203 |
204 | if features.shape[0] == 0: continue
205 |
206 | for f in range(features.shape[0]):
207 | name_db.append(features[f,:])
208 | name_db = np.asarray(name_db)
209 | face_db[name] = dict({"used": False, "db": name_db})
210 |
211 |
212 | try:
213 | vid = cv2.VideoCapture(int(FLAGS.video))
214 | except:
215 | vid = cv2.VideoCapture(FLAGS.video)
216 |
217 | out = None
218 |
219 | if FLAGS.output:
220 | # by default VideoCapture returns float instead of int
221 | width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
222 | height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
223 | fps = int(vid.get(cv2.CAP_PROP_FPS))
224 | codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
225 | out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
226 | list_file = open('detection.txt', 'w')
227 | frame_index = -1
228 |
229 | fps = 0.0
230 | count = 0
231 |
232 | detection_list = []
233 |
234 | while True:
235 | _, img = vid.read()
236 |
237 | if img is None:
238 | logging.warning("Empty Frame")
239 | time.sleep(0.1)
240 | count+=1
241 | if count < 3:
242 | continue
243 | else:
244 | break
245 |
246 | img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
247 | img_in = tf.expand_dims(img_in, 0)
248 | img_in = transform_images(img_in, FLAGS.size)
249 |
250 | t1 = time.time()
251 | boxes, scores, classes, nums = yolo.predict(img_in)
252 |
253 | # print(boxes, scores, classes, nums)
254 | # time.sleep(5)
255 | t2 = time.time()
256 | times.append(t2-t1)
257 | print(f'yolo predict time : {t2-t1}')
258 | times = times[-20:]
259 |
260 | t3 = time.time()
261 | #############
262 | classes = classes[0]
263 | names = []
264 | for i in range(len(classes)):
265 | names.append(class_names[int(classes[i])])
266 | names = np.array(names)
267 | converted_boxes = convert_boxes(img, boxes[0], scores[0])
268 | features = encoder(img, converted_boxes)
269 | detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)]
270 |
271 | t4 = time.time()
272 | print(f'feature generation time : {t4-t3}')
273 |
274 | #initialize color map
275 | cmap = plt.get_cmap('tab20b')
276 | colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
277 |
278 | # run non-maxima suppresion
279 | boxs = np.array([d.tlwh for d in detections])
280 | scores = np.array([d.confidence for d in detections])
281 | classes = np.array([d.class_name for d in detections])
282 | indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
283 | detections = [detections[i] for i in indices]
284 |
285 | t5 = time.time()
286 | # Call the tracker
287 | tracker.predict()
288 | # tracker.update(detections)
289 | tracker.update(detections, face_db, FLAGS.max_face_threshold)
290 | t6 = time.time()
291 | print(f'tracking time : {t6-t5}')
292 |
293 | frame_index = frame_index + 1
294 | for track in tracker.tracks:
295 | if not track.is_confirmed() or track.time_since_update > 1:
296 | continue
297 | bbox = track.to_tlbr()
298 | class_name = track.get_class()
299 | face_name = track.get_face_name()
300 | color = colors[int(track.track_id) % len(colors)]
301 | color = [i * 255 for i in color]
302 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
303 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id))+len(str(face_name)))*23, int(bbox[1])), color, -1)
304 | # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
305 | cv2.putText(img, class_name + "-" + str(track.track_id) + "-" + face_name, (int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
306 | # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
307 | # print(class_name + "-" + str(track.track_id))
308 |
309 | # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))}))
310 | if face_name != "":
311 | detection_list.append(dict({"frame_no": str(frame_index), "id": str(face_name), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))}))
312 | #######
313 | fps = ( fps + (1./(time.time()-t1)) ) / 2
314 | # img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
315 | # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
316 | # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
317 | img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
318 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2)
319 | if FLAGS.output:
320 | out.write(img)
321 | # frame_index = frame_index + 1
322 | # list_file.write(str(frame_index)+' ')
323 | # if len(converted_boxes) != 0:
324 | # for i in range(0,len(converted_boxes)):
325 | # list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
326 | # list_file.write('\n')
327 | cv2.imshow('output', img)
328 | if cv2.waitKey(1) == ord('q'):
329 | break
330 |
331 | cv2.destroyAllWindows()
332 |
333 |
334 | frame_list = sorted(detection_list, key= lambda x: (int(x["frame_no"]), int(x["id"])))
335 | # pprint.pprint(frame_list)
336 |
337 | f = open(FLAGS.eval, "w")
338 | for a in frame_list:
339 | f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n")
340 | # 파일 닫기
341 | f.close()
342 |
343 |
344 | if __name__ == '__main__':
345 | try:
346 | app.run(main)
347 | except SystemExit:
348 | pass
349 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | absl-py==0.12.0
2 | argon2-cffi @ file:///C:/ci/argon2-cffi_1613038019788/work
3 | astor==0.8.1
4 | astunparse==1.6.3
5 | async-generator==1.10
6 | atomicwrites==1.4.0
7 | attrs @ file:///tmp/build/80754af9/attrs_1604765588209/work
8 | backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
9 | bleach @ file:///tmp/build/80754af9/bleach_1612211392645/work
10 | cached-property==1.5.2
11 | cachetools==4.2.1
12 | certifi==2020.12.5
13 | cffi @ file:///C:/ci/cffi_1613247308275/work
14 | chardet==4.0.0
15 | colorama @ file:///tmp/build/80754af9/colorama_1607707115595/work
16 | cycler==0.10.0
17 | decorator @ file:///tmp/build/80754af9/decorator_1617916966915/work
18 | defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work
19 | entrypoints==0.3
20 | filelock==3.0.12
21 | flake8==3.9.2
22 | flake8-import-order==0.18.1
23 | flatbuffers==1.12
24 | gast==0.3.3
25 | gdown==3.13.0
26 | google-auth==1.28.0
27 | google-auth-oauthlib==0.4.4
28 | google-pasta==0.2.0
29 | grpcio==1.32.0
30 | h5py==2.10.0
31 | idna==2.10
32 | imageio==2.9.0
33 | importlib-metadata @ file:///C:/ci/importlib-metadata_1617877486026/work
34 | iniconfig==1.1.1
35 | ipykernel @ file:///C:/ci/ipykernel_1596208728219/work/dist/ipykernel-5.3.4-py3-none-any.whl
36 | ipython @ file:///C:/ci/ipython_1617121109687/work
37 | ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work
38 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1610481889018/work
39 | jedi==0.17.0
40 | Jinja2 @ file:///tmp/build/80754af9/jinja2_1612213139570/work
41 | joblib==1.0.1
42 | jsonschema @ file:///tmp/build/80754af9/jsonschema_1602607155483/work
43 | jupyter==1.0.0
44 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1616770841739/work
45 | jupyter-console @ file:///tmp/build/80754af9/jupyter_console_1616615302928/work
46 | jupyter-core @ file:///C:/ci/jupyter_core_1612213516947/work
47 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work
48 | jupyterlab-widgets @ file:///tmp/build/80754af9/jupyterlab_widgets_1609884341231/work
49 | Keras==2.4.3
50 | Keras-Applications==1.0.8
51 | Keras-Preprocessing==1.1.2
52 | kiwisolver==1.3.1
53 | lxml==4.6.3
54 | Markdown==3.3.4
55 | MarkupSafe @ file:///C:/ci/markupsafe_1594405949945/work
56 | matplotlib==3.4.1
57 | mccabe==0.6.1
58 | mistune @ file:///C:/ci/mistune_1594373272338/work
59 | motmetrics==1.2.0
60 | mtcnn==0.1.0
61 | nbclient @ file:///tmp/build/80754af9/nbclient_1614364831625/work
62 | nbconvert @ file:///C:/ci/nbconvert_1601914921407/work
63 | nbformat @ file:///tmp/build/80754af9/nbformat_1617383369282/work
64 | nest-asyncio @ file:///tmp/build/80754af9/nest-asyncio_1613680548246/work
65 | networkx==2.5.1
66 | notebook @ file:///C:/ci/notebook_1616443616158/work
67 | numpy==1.19.5
68 | oauthlib==3.1.0
69 | opencv-python==4.5.1.48
70 | opt-einsum==3.3.0
71 | packaging @ file:///tmp/build/80754af9/packaging_1611952188834/work
72 | pandas==1.2.4
73 | pandocfilters @ file:///C:/ci/pandocfilters_1605102427207/work
74 | parso @ file:///tmp/build/80754af9/parso_1617223946239/work
75 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work
76 | Pillow==8.2.0
77 | pluggy==0.13.1
78 | prometheus-client @ file:///tmp/build/80754af9/prometheus_client_1618088486455/work
79 | prompt-toolkit @ file:///tmp/build/80754af9/prompt-toolkit_1616415428029/work
80 | protobuf==3.15.7
81 | py==1.10.0
82 | py-cpuinfo==8.0.0
83 | pyasn1==0.4.8
84 | pyasn1-modules==0.2.8
85 | pycodestyle==2.7.0
86 | pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
87 | pyflakes==2.3.1
88 | Pygments @ file:///tmp/build/80754af9/pygments_1615143339740/work
89 | pyparsing @ file:///home/linux1/recipes/ci/pyparsing_1610983426697/work
90 | pyrsistent @ file:///C:/ci/pyrsistent_1600123688363/work
91 | PySocks==1.7.1
92 | pytest==6.2.4
93 | pytest-benchmark==3.4.1
94 | python-dateutil @ file:///home/ktietz/src/ci/python-dateutil_1611928101742/work
95 | pytz==2021.1
96 | PyWavelets==1.1.1
97 | pywin32==227
98 | pywinpty==0.5.7
99 | PyYAML==5.4.1
100 | pyzmq==20.0.0
101 | qtconsole @ file:///tmp/build/80754af9/qtconsole_1616775094278/work
102 | QtPy==1.9.0
103 | requests==2.25.1
104 | requests-oauthlib==1.3.0
105 | retina-face==0.0.4
106 | rsa==4.7.2
107 | scikit-image==0.18.1
108 | scikit-learn==0.22.2
109 | scipy==1.6.2
110 | seaborn==0.10.0
111 | Send2Trash @ file:///tmp/build/80754af9/send2trash_1607525499227/work
112 | six @ file:///C:/ci/six_1605205426665/work
113 | sklearn==0.0
114 | tensorboard==2.4.1
115 | tensorboard-plugin-wit==1.8.0
116 | tensorflow==2.4.1
117 | tensorflow-estimator==2.4.0
118 | termcolor==1.1.0
119 | terminado==0.9.4
120 | testpath @ file:///home/ktietz/src/ci/testpath_1611930608132/work
121 | threadpoolctl==2.1.0
122 | tifffile==2021.4.8
123 | toml==0.10.2
124 | tornado @ file:///C:/ci/tornado_1606935947090/work
125 | tqdm==4.60.0
126 | traitlets @ file:///home/ktietz/src/ci/traitlets_1611929699868/work
127 | typing-extensions @ file:///home/ktietz/src/ci_mi/typing_extensions_1612808209620/work
128 | urllib3==1.26.4
129 | wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work
130 | webencodings==0.5.1
131 | Werkzeug==1.0.1
132 | widgetsnbextension==3.5.1
133 | wincertstore==0.2
134 | wrapt==1.12.1
135 | xmltodict==0.12.0
136 | zipp @ file:///tmp/build/80754af9/zipp_1615904174917/work
137 |
--------------------------------------------------------------------------------
/resources/database/1/ironman/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/1.jpg
--------------------------------------------------------------------------------
/resources/database/1/ironman/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/2.jpg
--------------------------------------------------------------------------------
/resources/database/1/ironman/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/3.jpg
--------------------------------------------------------------------------------
/resources/database/2/chimchakman/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/chimchakman/1.jpg
--------------------------------------------------------------------------------
/resources/database/2/chimchakman/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/chimchakman/2.jpg
--------------------------------------------------------------------------------
/resources/database/2/juhomin/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/juhomin/1.jpg
--------------------------------------------------------------------------------
/resources/database/2/juhomin/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/juhomin/2.jpg
--------------------------------------------------------------------------------
/resources/database/2/kimpoong/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/kimpoong/1.jpg
--------------------------------------------------------------------------------
/resources/database/2/kimpoong/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/kimpoong/2.jpg
--------------------------------------------------------------------------------
/resources/fonts/futur.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/fonts/futur.ttf
--------------------------------------------------------------------------------
/resources/gt/README.txt:
--------------------------------------------------------------------------------
1 | --------------------------------------------------------------------
2 | Music video dataset
3 | --------------------------------------------------------------------
4 |
5 | This file describes the music video dataset as introduced in
6 |
7 | [1] Shun Zhang, Yihong Gong, Jia-Bin Huang, Jongwoo Lim, Jinjun Wang,
8 | Narendra Ahuja and Ming-Hsuan Yang. Tracking Persons-of-Interest via
9 | Adaptive Discriminative Features[C]. The 14th European Conference on
10 | Computer Vision (ECCV), 2016.
11 | [2] The project website: http://shunzhang.me.pn/papers/eccv2016/
12 |
13 | The dataset contains manually annotated face trajectories from 8 music
14 | videos from YouTube: T-ara, Westlife, Pussycat Dolls, Apink, Darling,
15 | Bruno Mars, Hello Bubble and Girls Aloud (as detailed in [1,2]).
16 |
17 | Kindly cite [1] when using the dataset, where appropriate.
18 |
19 | --------------------------------------------------------------------
20 | Description of the files
21 | --------------------------------------------------------------------
22 |
23 | The annotations for each video are stored in an XML file.
24 | We give an XML example below and introduce the XML format.
25 |
26 |
27 | Example:
28 | 1.
29 | 2.
35 |
36 |
37 | The 1st line at the top of our example is the XML declaration
38 | that indicates the version of XML.
39 | The 2nd line indicates the video information, including video name,
40 | start frame and end frame.
41 | The 3rd line indicates the trajectory information, including trajectory
42 | identity, start frame and end frame.
43 | The 4th line contains 5 values of per bounding box:
44 | ,,,,
45 | (x-bb_left,y-bb_top) is the left-top point of the bounding box.
46 | is the width and height of the bounding box.
47 |
48 | -- EOF
49 |
--------------------------------------------------------------------------------
/test.md:
--------------------------------------------------------------------------------
1 | # 맨처음
2 |
3 | ### T-ara
4 |
5 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
6 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
7 | |53.6%|59.0%|49.5%|76.6%|90.4%|1176|3406|3752|517|42.6%|71%|0.241|4710|
8 |
9 | ### GirlsAloud
10 |
11 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
12 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
13 | |39.0%|42.6%|36.4%|73.9%|85.3%|2087|4275|4687|1122|32.6%|64.6%|0.314|6630|
14 |
15 | ### Darling
16 |
17 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
18 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
19 | |3.2%|44.2%|42.6%|79.7%|82.1%|1654|1935|3048|743|30.4%|65.7%|0.267|6180|
20 |
21 | ### Westlife
22 |
23 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
24 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
25 | |64.3%|61.3%|68.4%|87.8%|77.9%|2828|1389|1809|562|47.0%|64.7%|0.411|6870|
26 |
27 | ### BrunoMars
28 |
29 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
30 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
31 | |40.5%|40.7%|40.8%|74.1%|73.1%|4560|4330|5128|1010|16.1%|78.9%|0.539|8460|
32 |
33 | ### HelloBubble
34 |
35 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
36 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
37 | |41.9%|45.3%|39.1%|73.9%|85.2%| 673|1363|1381|301|34.6%|69.7%|0.256|4920|
38 |
39 | ### Apink
40 |
41 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
42 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
43 | |56.2% | 58.9% | 53.8% |79.5% |86.8% | 883| 1491| 1234 | 337 |50.4% | 66.8% | 0.15 |4650|
44 |
45 |
46 | # n = 20을 기반으로 얼굴 크로핑
47 | ## T ara
48 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
49 | 0 61.3% 67.4% 56.6% 76.6% 90.4% 6 3 3 0 1176 3405 3131 516 46.9% 0.285 2563 3 3
50 | 1 108.3% 100.0% 100.0% 76.6% 90.4% 6 3 3 0 1176 3405 3131 516 46.9% 0.285 2563 3 3
51 |
52 | ## GirlsAloud
53 |
54 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
55 | 0 49.8% 53.6% 46.9% 75.6% 85.5% 5 1 4 0 2106 3990 4384 1048 36.0% 0.353 3633 2 2
56 | 1 106.1% 100.0% 100.0% 75.6% 85.5% 5 1 4 0 2106 3990 4384 1048 36.0% 0.353 3633 2 2
57 |
58 | ## Darling
59 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
60 | 0 53.6% 54.7% 53.0% 80.1% 82.0% 8 4 4 0 1678 1896 2717 726 34.0% 0.343 2126 6 6
61 | 1 101.2% 100.0% 100.0% 80.1% 82.0% 8 4 4 0 1678 1896 2717 726 34.0% 0.343 2126 6 6
62 |
63 | ## Westlife
64 |
65 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
66 | 0 71.0% 67.2% 75.9% 89.4% 78.4% 4 4 0 0 2808 1208 1482 477 51.7% 0.353 1180 2 2
67 | 1 93.4% 100.0% 100.0% 89.4% 78.4% 4 4 0 0 2808 1208 1482 477 51.7% 0.353 1180 2 2
68 |
69 | ## brunomars
70 |
71 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
72 | 0 52.1% 51.9% 52.8% 75.3% 73.3% 11 5 6 0 4575 4126 4105 978 23.3% 0.319 2815 8 8
73 | 1 98.7% 100.0% 100.0% 75.3% 73.3% 11 5 6 0 4575 4126 4105 978 23.3% 0.319 2815 8 8
74 |
75 | ## Hellobubble
76 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
77 | 0 58.3% 62.2% 55.1% 75.6% 85.1% 4 0 4 0 693 1272 1113 273 41.1% 0.314 810 2 2
78 | 1 105.9% 100.0% 100.0% 75.6% 85.1% 4 0 4 0 693 1272 1113 273 41.1% 0.314 810 2 2
79 |
80 | ## Apink
81 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
82 | 0 69.5% 72.4% 66.9% 80.4% 86.8% 6 3 3 0 889 1427 855 304 56.4% 0.332 483 4 4
83 | 1 103.8% 100.0% 100.0% 80.4% 86.8% 6 3 3 0 889 1427 855 304 56.4% 0.332 483 4 4
84 |
85 |
86 | # n = 10
87 |
88 | ## T ara
89 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
90 | 0 67.4% 74.1% 62.2% 76.6% 90.4% 6 3 3 0 1176 3405 2479 516 51.4% 0.285 2033 3 3
91 | 1 108.3% 100.0% 100.0% 76.6% 90.4% 6 3 3 0 1176 3405 2479 516 51.4% 0.285 2033 3 3
92 |
93 | ## GirlsAloud
94 |
95 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
96 | 0 55.0% 59.1% 51.8% 75.7% 85.5% 5 1 4 0 2108 3975 3906 1037 39.0% 0.353 3263 3 3
97 | 1 106.0% 100.0% 100.0% 75.7% 85.5% 5 1 4 0 2108 3975 3906 1037 39.0% 0.353 3263 3 3
98 |
99 | ## Darling
100 |
101 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
102 | 0 59.2% 60.2% 58.6% 80.3% 81.9% 8 4 4 0 1692 1876 2367 716 37.7% 0.343 1894 6 6
103 | 1 101.0% 100.0% 100.0% 80.3% 81.9% 8 4 4 0 1692 1876 2367 716 37.7% 0.343 1894 6 6
104 | ## Westlife
105 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
106 | 0 73.6% 69.7% 78.8% 89.5% 78.4% 4 4 0 0 2810 1195 1347 466 53.0% 0.353 1079 1 1
107 | 1 93.4% 100.0% 100.0% 89.5% 78.4% 4 4 0 0 2810 1195 1347 466 53.0% 0.353 1079 1 1
108 |
109 | ## brunomars
110 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
111 | 0 57.8% 57.5% 58.6% 75.4% 73.3% 11 5 6 0 4579 4115 3311 973 28.1% 0.319 2215 8 8
112 | 1 98.6% 100.0% 100.0% 75.4% 73.3% 11 5 6 0 4579 4115 3311 973 28.1% 0.319 2215 8 8
113 |
114 | ## Hellobubble
115 |
116 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
117 | 0 64.0% 68.3% 60.5% 75.6% 85.1% 4 0 4 0 693 1272 902 273 45.1% 0.314 676 2 2
118 | 1 105.9% 100.0% 100.0% 75.6% 85.1% 4 0 4 0 693 1272 902 273 45.1% 0.314 676 2 2
119 | ## Apink
120 |
121 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
122 | 0 71.9% 74.9% 69.3% 80.5% 86.8% 6 3 3 0 889 1420 691 299 58.8% 0.333 377 4 4
123 | 1 103.8% 100.0% 100.0% 80.5% 86.8% 6 3 3 0 889 1420 691 299 58.8% 0.333 377 4 4
124 |
125 | # only using id, 그냥 deep sort처럼 id로 뺀 경우
126 |
127 | ## T ara
128 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
129 | 0 5.7% 6.2% 5.3% 77.3% 89.7% 6 3 3 0 1283 3296 317 531 66.3% 0.286 27 278 0
130 | 1 107.4% 100.0% 100.0% 77.3% 89.7% 6 3 3 0 1283 3296 317 531 66.3% 0.286 27 278 0
131 |
132 | ## GirlsAloud
133 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
134 | 0 7.3% 7.5% 7.0% 78.3% 84.1% 5 1 4 0 2427 3548 543 989 60.2% 0.353 85 383 0
135 | 1 103.5% 100.0% 100.0% 78.3% 84.1% 5 1 4 0 2427 3548 543 989 60.2% 0.353 85 383 0
136 |
137 |
138 | ## Darling
139 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
140 | 0 5.4% 5.3% 5.5% 82.2% 80.3% 8 4 4 0 1916 1698 527 668 56.6% 0.342 59 407 0
141 | 1 98.9% 100.0% 100.0% 82.2% 80.3% 8 4 4 0 1916 1698 527 668 56.6% 0.342 59 407 0
142 |
143 | ## Westlife
144 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
145 | 0 7.2% 6.4% 8.4% 90.8% 69.0% 4 4 0 0 4637 1044 117 445 49.1% 0.353 6 108 0
146 | 1 86.4% 100.0% 100.0% 90.8% 69.0% 4 4 0 0 4637 1044 117 445 49.1% 0.353 6 108 0
147 |
148 | ## brunomars
149 |
150 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
151 | 0 6.8% 6.6% 7.1% 77.8% 72.1% 11 5 6 0 5018 3710 535 935 44.6% 0.319 59 384 1
152 | 1 96.2% 100.0% 100.0% 77.8% 72.1% 11 5 6 0 5018 3710 535 935 44.6% 0.319 59 384 1
153 | (face_tracker)
154 | ## Hellobubble
155 |
156 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
157 | 0 9.8% 10.3% 9.3% 76.6% 84.4% 4 0 4 0 738 1223 175 277 59.1% 0.313 21 140 2
158 | 1 104.9% 100.0% 100.0% 76.6% 84.4% 4 0 4 0 738 1223 175 277 59.1% 0.313 21 140 2
159 | ## Apink
160 |
161 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
162 | 0 6.4% 6.6% 6.2% 81.9% 86.5% 6 5 1 0 934 1314 223 278 66.0% 0.332 20 185 0
163 | 1 102.7% 100.0% 100.0% 81.9% 86.5% 6 5 1 0 934 1314 223 278 66.0% 0.332 20 185 0
164 |
165 |
166 |
167 | # face id, track id말고 face id로 처음에 매칭될 때만 배정하여 추출한 결과
168 |
169 |
170 | ## T ara
171 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
172 | 0 53.2% 66.5% 49.1% 76.6% 90.4% 6 3 3 0 1178 3402 209 515 67.0% 0.285 241 1 1
173 | 1 108.3% 100.0% 100.0% 76.6% 90.4% 6 3 3 0 1178 3402 209 515 67.0% 0.285 241 1 1
174 |
175 | ## GirlsAloud
176 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
177 | 0 51.4% 60.9% 48.5% 75.9% 85.5% 5 1 4 0 2116 3951 341 1021 60.9% 0.353 466 1 1
178 | 1 105.9% 100.0% 100.0% 75.9% 85.5% 5 1 4 0 2116 3951 341 1021 60.9% 0.353 466 1 1
179 |
180 | ## Darling
181 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
182 | 0 53.2% 57.4% 53.0% 80.7% 81.3% 8 4 4 0 1772 1835 292 704 59.1% 0.343 278 2 2
183 | 1 100.3% 100.0% 100.0% 80.7% 81.3% 8 4 4 0 1772 1835 292 704 59.1% 0.343 278 2 2
184 |
185 | ## Westlife
186 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
187 | 0 49.7% 59.5% 56.8% 90.6% 70.5% 4 4 0 0 4309 1065 70 446 52.2% 0.353 118 0 0
188 | 1 87.5% 100.0% 100.0% 90.6% 70.5% 4 4 0 0 4309 1065 70 446 52.2% 0.353 118 0 0
189 |
190 | ## brunomars
191 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
192 | 0 47.1% 51.8% 47.8% 75.4% 73.3% 11 5 6 0 4584 4110 320 970 46.0% 0.319 254 7 7
193 | 1 98.6% 100.0% 100.0% 75.4% 73.3% 11 5 6 0 4584 4110 320 970 46.0% 0.319 254 7 7
194 |
195 | ## Hellobubble
196 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
197 | 0 51.8% 58.7% 48.9% 75.7% 85.1% 4 0 4 0 694 1270 91 273 60.7% 0.314 77 3 3
198 | 1 105.8% 100.0% 100.0% 75.7% 85.1% 4 0 4 0 694 1270 91 273 60.7% 0.314 77 3 3
199 |
200 | ## Apink
201 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm
202 | 0 61.8% 65.9% 59.6% 80.6% 86.8% 6 3 3 0 889 1409 87 292 67.2% 0.333 69 1 1
203 | 1 103.7% 100.0% 100.0% 80.6% 86.8% 6 3 3 0 889 1409 87 292 67.2% 0.333 69 1 1
204 |
--------------------------------------------------------------------------------
/tools/freeze_model.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import argparse
3 | import tensorflow as tf
4 | import tensorflow.contrib.slim as slim
5 |
6 |
7 | def _batch_norm_fn(x, scope=None):
8 | if scope is None:
9 | scope = tf.get_variable_scope().name + "/bn"
10 | return slim.batch_norm(x, scope=scope)
11 |
12 |
13 | def create_link(
14 | incoming, network_builder, scope, nonlinearity=tf.nn.elu,
15 | weights_initializer=tf.truncated_normal_initializer(stddev=1e-3),
16 | regularizer=None, is_first=False, summarize_activations=True):
17 | if is_first:
18 | network = incoming
19 | else:
20 | network = _batch_norm_fn(incoming, scope=scope + "/bn")
21 | network = nonlinearity(network)
22 | if summarize_activations:
23 | tf.summary.histogram(scope+"/activations", network)
24 |
25 | pre_block_network = network
26 | post_block_network = network_builder(pre_block_network, scope)
27 |
28 | incoming_dim = pre_block_network.get_shape().as_list()[-1]
29 | outgoing_dim = post_block_network.get_shape().as_list()[-1]
30 | if incoming_dim != outgoing_dim:
31 | assert outgoing_dim == 2 * incoming_dim, \
32 | "%d != %d" % (outgoing_dim, 2 * incoming)
33 | projection = slim.conv2d(
34 | incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None,
35 | scope=scope+"/projection", weights_initializer=weights_initializer,
36 | biases_initializer=None, weights_regularizer=regularizer)
37 | network = projection + post_block_network
38 | else:
39 | network = incoming + post_block_network
40 | return network
41 |
42 |
43 | def create_inner_block(
44 | incoming, scope, nonlinearity=tf.nn.elu,
45 | weights_initializer=tf.truncated_normal_initializer(1e-3),
46 | bias_initializer=tf.zeros_initializer(), regularizer=None,
47 | increase_dim=False, summarize_activations=True):
48 | n = incoming.get_shape().as_list()[-1]
49 | stride = 1
50 | if increase_dim:
51 | n *= 2
52 | stride = 2
53 |
54 | incoming = slim.conv2d(
55 | incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME",
56 | normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer,
57 | biases_initializer=bias_initializer, weights_regularizer=regularizer,
58 | scope=scope + "/1")
59 | if summarize_activations:
60 | tf.summary.histogram(incoming.name + "/activations", incoming)
61 |
62 | incoming = slim.dropout(incoming, keep_prob=0.6)
63 |
64 | incoming = slim.conv2d(
65 | incoming, n, [3, 3], 1, activation_fn=None, padding="SAME",
66 | normalizer_fn=None, weights_initializer=weights_initializer,
67 | biases_initializer=bias_initializer, weights_regularizer=regularizer,
68 | scope=scope + "/2")
69 | return incoming
70 |
71 |
72 | def residual_block(incoming, scope, nonlinearity=tf.nn.elu,
73 | weights_initializer=tf.truncated_normal_initializer(1e3),
74 | bias_initializer=tf.zeros_initializer(), regularizer=None,
75 | increase_dim=False, is_first=False,
76 | summarize_activations=True):
77 |
78 | def network_builder(x, s):
79 | return create_inner_block(
80 | x, s, nonlinearity, weights_initializer, bias_initializer,
81 | regularizer, increase_dim, summarize_activations)
82 |
83 | return create_link(
84 | incoming, network_builder, scope, nonlinearity, weights_initializer,
85 | regularizer, is_first, summarize_activations)
86 |
87 |
88 | def _create_network(incoming, reuse=None, weight_decay=1e-8):
89 | nonlinearity = tf.nn.elu
90 | conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
91 | conv_bias_init = tf.zeros_initializer()
92 | conv_regularizer = slim.l2_regularizer(weight_decay)
93 | fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
94 | fc_bias_init = tf.zeros_initializer()
95 | fc_regularizer = slim.l2_regularizer(weight_decay)
96 |
97 | def batch_norm_fn(x):
98 | return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn")
99 |
100 | network = incoming
101 | network = slim.conv2d(
102 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
103 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1",
104 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
105 | weights_regularizer=conv_regularizer)
106 | network = slim.conv2d(
107 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
108 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2",
109 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
110 | weights_regularizer=conv_regularizer)
111 |
112 | # NOTE(nwojke): This is missing a padding="SAME" to match the CNN
113 | # architecture in Table 1 of the paper. Information on how this affects
114 | # performance on MOT 16 training sequences can be found in
115 | # issue 10 https://github.com/nwojke/deep_sort/issues/10
116 | network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1")
117 |
118 | network = residual_block(
119 | network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init,
120 | conv_regularizer, increase_dim=False, is_first=True)
121 | network = residual_block(
122 | network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init,
123 | conv_regularizer, increase_dim=False)
124 |
125 | network = residual_block(
126 | network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init,
127 | conv_regularizer, increase_dim=True)
128 | network = residual_block(
129 | network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init,
130 | conv_regularizer, increase_dim=False)
131 |
132 | network = residual_block(
133 | network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init,
134 | conv_regularizer, increase_dim=True)
135 | network = residual_block(
136 | network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init,
137 | conv_regularizer, increase_dim=False)
138 |
139 | feature_dim = network.get_shape().as_list()[-1]
140 | network = slim.flatten(network)
141 |
142 | network = slim.dropout(network, keep_prob=0.6)
143 | network = slim.fully_connected(
144 | network, feature_dim, activation_fn=nonlinearity,
145 | normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer,
146 | scope="fc1", weights_initializer=fc_weight_init,
147 | biases_initializer=fc_bias_init)
148 |
149 | features = network
150 |
151 | # Features in rows, normalize axis 1.
152 | features = slim.batch_norm(features, scope="ball", reuse=reuse)
153 | feature_norm = tf.sqrt(
154 | tf.constant(1e-8, tf.float32) +
155 | tf.reduce_sum(tf.square(features), [1], keepdims=True))
156 | features = features / feature_norm
157 | return features, None
158 |
159 |
160 | def _network_factory(weight_decay=1e-8):
161 |
162 | def factory_fn(image, reuse):
163 | with slim.arg_scope([slim.batch_norm, slim.dropout],
164 | is_training=False):
165 | with slim.arg_scope([slim.conv2d, slim.fully_connected,
166 | slim.batch_norm, slim.layer_norm],
167 | reuse=reuse):
168 | features, logits = _create_network(
169 | image, reuse=reuse, weight_decay=weight_decay)
170 | return features, logits
171 |
172 | return factory_fn
173 |
174 |
175 | def _preprocess(image):
176 | image = image[:, :, ::-1] # BGR to RGB
177 | return image
178 |
179 |
180 | def parse_args():
181 | """Parse command line arguments.
182 | """
183 | parser = argparse.ArgumentParser(description="Freeze old model")
184 | parser.add_argument(
185 | "--checkpoint_in",
186 | default="resources/networks/mars-small128.ckpt-68577",
187 | help="Path to checkpoint file")
188 | parser.add_argument(
189 | "--graphdef_out",
190 | default="resources/networks/mars-small128.pb")
191 | return parser.parse_args()
192 |
193 |
194 | def main():
195 | args = parse_args()
196 |
197 | with tf.Session(graph=tf.Graph()) as session:
198 | input_var = tf.placeholder(
199 | tf.uint8, (None, 128, 64, 3), name="images")
200 | image_var = tf.map_fn(
201 | lambda x: _preprocess(x), tf.cast(input_var, tf.float32),
202 | back_prop=False)
203 |
204 | factory_fn = _network_factory()
205 | features, _ = factory_fn(image_var, reuse=None)
206 | features = tf.identity(features, name="features")
207 |
208 | saver = tf.train.Saver(slim.get_variables_to_restore())
209 | saver.restore(session, args.checkpoint_in)
210 |
211 | output_graph_def = tf.graph_util.convert_variables_to_constants(
212 | session, tf.get_default_graph().as_graph_def(),
213 | [features.name.split(":")[0]])
214 | with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle:
215 | file_handle.write(output_graph_def.SerializeToString())
216 |
217 |
218 | if __name__ == "__main__":
219 | main()
220 |
--------------------------------------------------------------------------------
/tools/generate_detections.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import os
3 | import errno
4 | import argparse
5 | import numpy as np
6 | import cv2
7 | import tensorflow.compat.v1 as tf
8 | from tensorflow.keras.preprocessing import image as keras_image
9 |
10 | # 추가
11 | import sys
12 | sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
13 | from deepface.basemodels import ArcFace
14 | import tensorflow as tf2
15 |
16 | gpus = tf.config.experimental.list_physical_devices('GPU')
17 | if gpus:
18 | try:
19 | # Currently, memory growth needs to be the same across GPUs
20 | for gpu in gpus:
21 | tf.config.experimental.set_memory_growth(gpu, True)
22 | logical_gpus = tf.config.experimental.list_logical_devices('GPU')
23 | print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
24 | except RuntimeError as e:
25 | # Memory growth must be set before GPUs have been initialized
26 | print(e)
27 |
28 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
29 | if len(physical_devices) > 0:
30 | tf.config.experimental.set_memory_growth(physical_devices[0], True)
31 |
32 | def _run_in_batches(f, data_dict, out, batch_size):
33 | data_len = len(out)
34 | num_batches = int(data_len / batch_size)
35 |
36 | s, e = 0, 0
37 | for i in range(num_batches):
38 | s, e = i * batch_size, (i + 1) * batch_size
39 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
40 | out[s:e] = f(batch_data_dict)
41 | if e < len(out):
42 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
43 | out[e:] = f(batch_data_dict)
44 |
45 |
46 | def extract_image_patch(image, bbox, patch_shape):
47 | """Extract image patch from bounding box.
48 |
49 | Parameters
50 | ----------
51 | image : ndarray
52 | The full image.
53 | bbox : array_like
54 | The bounding box in format (x, y, width, height).
55 | patch_shape : Optional[array_like]
56 | This parameter can be used to enforce a desired patch shape
57 | (height, width). First, the `bbox` is adapted to the aspect ratio
58 | of the patch shape, then it is clipped at the image boundaries.
59 | If None, the shape is computed from :arg:`bbox`.
60 |
61 | Returns
62 | -------
63 | ndarray | NoneType
64 | An image patch showing the :arg:`bbox`, optionally reshaped to
65 | :arg:`patch_shape`.
66 | Returns None if the bounding box is empty or fully outside of the image
67 | boundaries.
68 |
69 | """
70 | bbox = np.array(bbox)
71 | if patch_shape is not None:
72 | # correct aspect ratio to patch shape
73 | target_aspect = float(patch_shape[1]) / patch_shape[0]
74 | new_width = target_aspect * bbox[3]
75 | bbox[0] -= (new_width - bbox[2]) / 2
76 | bbox[2] = new_width
77 |
78 | # convert to top left, bottom right
79 | bbox[2:] += bbox[:2]
80 | bbox = bbox.astype(np.int)
81 |
82 | # clip at image boundaries
83 | bbox[:2] = np.maximum(0, bbox[:2])
84 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
85 | if np.any(bbox[:2] >= bbox[2:]):
86 | return None
87 | sx, sy, ex, ey = bbox
88 | image = image[sy:ey, sx:ex]
89 | image = cv2.resize(image, tuple(patch_shape[::-1]))
90 |
91 | # 추가
92 | image = keras_image.img_to_array(image)
93 | # image = np.expand_dims(image, axis = 0)
94 | image /= 255 #normalize input in [0, 1]
95 |
96 | return image
97 |
98 |
99 | class ImageEncoder(object):
100 |
101 | def __init__(self, checkpoint_filename, input_name="images",
102 | output_name="features"):
103 | self.session = ArcFace.loadModel(checkpoint_filename) # 성공
104 | self.feature_dim = self.session.layers[-1].output_shape[1]
105 | self.image_shape = list(self.session.input_shape[1:])
106 |
107 | # self.session = tf.Session()
108 | # with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
109 | # graph_def = tf.GraphDef()
110 | # graph_def.ParseFromString(file_handle.read())
111 | # tf.import_graph_def(graph_def, name="net")
112 | # self.input_var = tf.get_default_graph().get_tensor_by_name(
113 | # "%s:0" % input_name)
114 | # self.output_var = tf.get_default_graph().get_tensor_by_name(
115 | # "%s:0" % output_name)
116 |
117 | # assert len(self.output_var.get_shape()) == 2
118 | # assert len(self.input_var.get_shape()) == 4
119 | # self.feature_dim = self. output_var.get_shape().as_list()[-1]
120 | # self.image_shape = self.input_var.get_shape().as_list()[1:]
121 |
122 | def __call__(self, data_x, batch_size=32):
123 | out = np.zeros((len(data_x), self.feature_dim), np.float32)
124 |
125 | if data_x.shape[0] == 0:
126 | return out
127 |
128 | import time
129 | t1 = time.time()
130 |
131 | # 추가
132 | out = self.session.predict(data_x)
133 |
134 | # _run_in_batches(
135 | # lambda x: self.session.run(self.output_var, feed_dict=x),
136 | # {self.input_var: data_x}, out, batch_size)
137 | # t2 = time.time()
138 | # print("patch inference time : ", t2-t1)
139 | return out
140 |
141 |
142 | def create_box_encoder(model_filename, input_name="images",
143 | output_name="features", batch_size=32):
144 | image_encoder = ImageEncoder(model_filename, input_name, output_name)
145 | image_shape = image_encoder.image_shape
146 |
147 | def encoder(image, boxes):
148 | image_patches = []
149 | for box in boxes:
150 | patch = extract_image_patch(image, box, image_shape[:2]) # 자동으로 모양 리사이즈 됨
151 |
152 | if patch is None:
153 | print("WARNING: Failed to extract image patch: %s." % str(box))
154 | # patch = np.random.uniform(
155 | # 0., 255., image_shape).astype(np.uint8)
156 | patch = np.random.uniform(
157 | 0., 1., image_shape).astype(np.float32)
158 | image_patches.append(patch)
159 | image_patches = np.asarray(image_patches)
160 | return image_encoder(image_patches, batch_size)
161 |
162 | return encoder
163 |
164 |
165 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
166 | """Generate detections with features.
167 |
168 | Parameters
169 | ----------
170 | encoder : Callable[image, ndarray] -> ndarray
171 | The encoder function takes as input a BGR color image and a matrix of
172 | bounding boxes in format `(x, y, w, h)` and returns a matrix of
173 | corresponding feature vectors.
174 | mot_dir : str
175 | Path to the MOTChallenge directory (can be either train or test).
176 | output_dir
177 | Path to the output directory. Will be created if it does not exist.
178 | detection_dir
179 | Path to custom detections. The directory structure should be the default
180 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
181 | standard MOTChallenge detections.
182 |
183 | """
184 | if detection_dir is None:
185 | detection_dir = mot_dir
186 | try:
187 | os.makedirs(output_dir)
188 | except OSError as exception:
189 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
190 | pass
191 | else:
192 | raise ValueError(
193 | "Failed to created output directory '%s'" % output_dir)
194 |
195 | for sequence in os.listdir(mot_dir):
196 | print("Processing %s" % sequence)
197 | sequence_dir = os.path.join(mot_dir, sequence)
198 |
199 | image_dir = os.path.join(sequence_dir, "img1")
200 | image_filenames = {
201 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
202 | for f in os.listdir(image_dir)}
203 |
204 | detection_file = os.path.join(
205 | detection_dir, sequence, "det/det.txt")
206 | detections_in = np.loadtxt(detection_file, delimiter=',')
207 | detections_out = []
208 |
209 | frame_indices = detections_in[:, 0].astype(np.int)
210 | min_frame_idx = frame_indices.astype(np.int).min()
211 | max_frame_idx = frame_indices.astype(np.int).max()
212 | for frame_idx in range(min_frame_idx, max_frame_idx + 1):
213 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
214 | mask = frame_indices == frame_idx
215 | rows = detections_in[mask]
216 |
217 | if frame_idx not in image_filenames:
218 | print("WARNING could not find image for frame %d" % frame_idx)
219 | continue
220 | bgr_image = cv2.imread(
221 | image_filenames[frame_idx], cv2.IMREAD_COLOR)
222 | features = encoder(bgr_image, rows[:, 2:6].copy())
223 | detections_out += [np.r_[(row, feature)] for row, feature
224 | in zip(rows, features)]
225 |
226 | output_filename = os.path.join(output_dir, "%s.npy" % sequence)
227 | np.save(
228 | output_filename, np.asarray(detections_out), allow_pickle=False)
229 |
230 |
231 | def parse_args():
232 | """Parse command line arguments.
233 | """
234 | parser = argparse.ArgumentParser(description="Re-ID feature extractor")
235 | parser.add_argument(
236 | "--model",
237 | default="resources/networks/mars-small128.pb",
238 | help="Path to freezed inference graph protobuf.")
239 | parser.add_argument(
240 | "--mot_dir", help="Path to MOTChallenge directory (train or test)",
241 | required=True)
242 | parser.add_argument(
243 | "--detection_dir", help="Path to custom detections. Defaults to "
244 | "standard MOT detections Directory structure should be the default "
245 | "MOTChallenge structure: [sequence]/det/det.txt", default=None)
246 | parser.add_argument(
247 | "--output_dir", help="Output directory. Will be created if it does not"
248 | " exist.", default="detections")
249 | return parser.parse_args()
250 |
251 |
252 | def main():
253 | args = parse_args()
254 | encoder = create_box_encoder(args.model, batch_size=32)
255 | generate_detections(encoder, args.mot_dir, args.output_dir,
256 | args.detection_dir)
257 |
258 |
259 | if __name__ == "__main__":
260 | main()
261 |
--------------------------------------------------------------------------------
/xml2txt.py:
--------------------------------------------------------------------------------
1 | import xml.etree.ElementTree as elemTree
2 | import os
3 | import pprint
4 | from absl import app, flags, logging
5 | from absl.flags import FLAGS
6 |
7 | """
8 | python xml2txt.py \
9 | --gt_path ./resources/gt/T-ara_gt.xml \
10 | --gt_file_path ./resources/gt/T-ara_gt.txt
11 |
12 | python xml2txt.py \
13 | --gt_path ./resources/gt/GirlsAloud_gt.xml \
14 | --gt_file_path ./resources/gt/GirlsAloud_gt.txt
15 |
16 | python xml2txt.py \
17 | --gt_path ./resources/gt/Darling_gt.xml \
18 | --gt_file_path ./resources/gt/Darling_gt.txt
19 |
20 | python xml2txt.py \
21 | --gt_path ./resources/gt/Westlife_gt.xml \
22 | --gt_file_path ./resources/gt/Westlife_gt.txt
23 |
24 | python xml2txt.py \
25 | --gt_path ./resources/gt/BrunoMars_gt.xml \
26 | --gt_file_path ./resources/gt/BrunoMars_gt.txt
27 |
28 | python xml2txt.py \
29 | --gt_path ./resources/gt/HelloBubble_gt.xml \
30 | --gt_file_path ./resources/gt/HelloBubble_gt.txt
31 |
32 | python xml2txt.py \
33 | --gt_path ./resources/gt/Apink_gt.xml \
34 | --gt_file_path ./resources/gt/Apink_gt.txt
35 | """
36 |
37 | flags.DEFINE_string('gt_path', './resources/gt/T-ara_gt.xml', 'path to gt')
38 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to save converted file')
39 |
40 |
41 | def main(args):
42 | tree = elemTree.parse(FLAGS.gt_path)
43 |
44 | root=tree.getroot()
45 |
46 | print(root.tag, root.attrib)
47 | print(root.find("Trajectory"))
48 |
49 | frame_list = []
50 |
51 | for traj in root:
52 | for f in traj:
53 | a = f.attrib
54 | a["frame_no"] = str(int(a["frame_no"])-1)
55 | a["id"] = traj.attrib["obj_id"]
56 | frame_list.append(a)
57 |
58 |
59 | frame_list = sorted(frame_list, key= lambda x: (int(x["frame_no"]), int(x["id"])))
60 | # pprint.pprint(frame_list)
61 |
62 |
63 | f = open(FLAGS.gt_file_path, 'w')
64 |
65 | for a in frame_list:
66 | f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n")
67 | # 파일 닫기
68 | f.close()
69 |
70 |
71 |
72 | if __name__ == '__main__':
73 | try:
74 | app.run(main)
75 | except SystemExit:
76 | pass
77 |
--------------------------------------------------------------------------------
/yolov3_tf2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/yolov3_tf2/__init__.py
--------------------------------------------------------------------------------
/yolov3_tf2/dataset.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from absl.flags import FLAGS
3 |
4 | @tf.function
5 | def transform_targets_for_output(y_true, grid_size, anchor_idxs):
6 | # y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor))
7 | N = tf.shape(y_true)[0]
8 |
9 | # y_true_out: (N, grid, grid, anchors, [x1, y1, x2, y2, obj, class])
10 | y_true_out = tf.zeros(
11 | (N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6))
12 |
13 | anchor_idxs = tf.cast(anchor_idxs, tf.int32)
14 |
15 | indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True)
16 | updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)
17 | idx = 0
18 | for i in tf.range(N):
19 | for j in tf.range(tf.shape(y_true)[1]):
20 | if tf.equal(y_true[i][j][2], 0):
21 | continue
22 | anchor_eq = tf.equal(
23 | anchor_idxs, tf.cast(y_true[i][j][5], tf.int32))
24 |
25 | if tf.reduce_any(anchor_eq):
26 | box = y_true[i][j][0:4]
27 | box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2
28 |
29 | anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32)
30 | grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32)
31 |
32 | # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class)
33 | indexes = indexes.write(
34 | idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]])
35 | updates = updates.write(
36 | idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]])
37 | idx += 1
38 |
39 | # tf.print(indexes.stack())
40 | # tf.print(updates.stack())
41 |
42 | return tf.tensor_scatter_nd_update(
43 | y_true_out, indexes.stack(), updates.stack())
44 |
45 |
46 | def transform_targets(y_train, anchors, anchor_masks, size):
47 | y_outs = []
48 | grid_size = size // 32
49 |
50 | # calculate anchor index for true boxes
51 | anchors = tf.cast(anchors, tf.float32)
52 | anchor_area = anchors[..., 0] * anchors[..., 1]
53 | box_wh = y_train[..., 2:4] - y_train[..., 0:2]
54 | box_wh = tf.tile(tf.expand_dims(box_wh, -2),
55 | (1, 1, tf.shape(anchors)[0], 1))
56 | box_area = box_wh[..., 0] * box_wh[..., 1]
57 | intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * \
58 | tf.minimum(box_wh[..., 1], anchors[..., 1])
59 | iou = intersection / (box_area + anchor_area - intersection)
60 | anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32)
61 | anchor_idx = tf.expand_dims(anchor_idx, axis=-1)
62 |
63 | y_train = tf.concat([y_train, anchor_idx], axis=-1)
64 |
65 | for anchor_idxs in anchor_masks:
66 | y_outs.append(transform_targets_for_output(
67 | y_train, grid_size, anchor_idxs))
68 | grid_size *= 2
69 |
70 | return tuple(y_outs)
71 |
72 |
73 | def transform_images(x_train, size):
74 | x_train = tf.image.resize(x_train, (size, size))
75 | x_train = x_train / 255
76 | return x_train
77 |
78 |
79 | # https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md#conversion-script-outline-conversion-script-outline
80 | # Commented out fields are not required in our project
81 | IMAGE_FEATURE_MAP = {
82 | # 'image/width': tf.io.FixedLenFeature([], tf.int64),
83 | # 'image/height': tf.io.FixedLenFeature([], tf.int64),
84 | # 'image/filename': tf.io.FixedLenFeature([], tf.string),
85 | # 'image/source_id': tf.io.FixedLenFeature([], tf.string),
86 | # 'image/key/sha256': tf.io.FixedLenFeature([], tf.string),
87 | 'image/encoded': tf.io.FixedLenFeature([], tf.string),
88 | # 'image/format': tf.io.FixedLenFeature([], tf.string),
89 | 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
90 | 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
91 | 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
92 | 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
93 | 'image/object/class/text': tf.io.VarLenFeature(tf.string),
94 | # 'image/object/class/label': tf.io.VarLenFeature(tf.int64),
95 | # 'image/object/difficult': tf.io.VarLenFeature(tf.int64),
96 | # 'image/object/truncated': tf.io.VarLenFeature(tf.int64),
97 | # 'image/object/view': tf.io.VarLenFeature(tf.string),
98 | }
99 |
100 |
101 | def parse_tfrecord(tfrecord, class_table, size):
102 | x = tf.io.parse_single_example(tfrecord, IMAGE_FEATURE_MAP)
103 | x_train = tf.image.decode_jpeg(x['image/encoded'], channels=3)
104 | x_train = tf.image.resize(x_train, (size, size))
105 |
106 | class_text = tf.sparse.to_dense(
107 | x['image/object/class/text'], default_value='')
108 | labels = tf.cast(class_table.lookup(class_text), tf.float32)
109 | y_train = tf.stack([tf.sparse.to_dense(x['image/object/bbox/xmin']),
110 | tf.sparse.to_dense(x['image/object/bbox/ymin']),
111 | tf.sparse.to_dense(x['image/object/bbox/xmax']),
112 | tf.sparse.to_dense(x['image/object/bbox/ymax']),
113 | labels], axis=1)
114 |
115 | paddings = [[0, FLAGS.yolo_max_boxes - tf.shape(y_train)[0]], [0, 0]]
116 | y_train = tf.pad(y_train, paddings)
117 |
118 | return x_train, y_train
119 |
120 |
121 | def load_tfrecord_dataset(file_pattern, class_file, size=416):
122 | LINE_NUMBER = -1 # TODO: use tf.lookup.TextFileIndex.LINE_NUMBER
123 | class_table = tf.lookup.StaticHashTable(tf.lookup.TextFileInitializer(
124 | class_file, tf.string, 0, tf.int64, LINE_NUMBER, delimiter="\n"), -1)
125 |
126 | files = tf.data.Dataset.list_files(file_pattern)
127 | dataset = files.flat_map(tf.data.TFRecordDataset)
128 | return dataset.map(lambda x: parse_tfrecord(x, class_table, size))
129 |
130 |
131 | def load_fake_dataset():
132 | x_train = tf.image.decode_jpeg(
133 | open('./data/girl.png', 'rb').read(), channels=3)
134 | x_train = tf.expand_dims(x_train, axis=0)
135 |
136 | labels = [
137 | [0.18494931, 0.03049111, 0.9435849, 0.96302897, 0],
138 | [0.01586703, 0.35938117, 0.17582396, 0.6069674, 56],
139 | [0.09158827, 0.48252046, 0.26967454, 0.6403017, 67]
140 | ] + [[0, 0, 0, 0, 0]] * 5
141 | y_train = tf.convert_to_tensor(labels, tf.float32)
142 | y_train = tf.expand_dims(y_train, axis=0)
143 |
144 | return tf.data.Dataset.from_tensor_slices((x_train, y_train))
145 |
--------------------------------------------------------------------------------
/yolov3_tf2/models.py:
--------------------------------------------------------------------------------
1 | from absl import flags
2 | from absl.flags import FLAGS
3 | import numpy as np
4 | import tensorflow as tf
5 | from tensorflow.keras import Model
6 | from tensorflow.keras.layers import (
7 | Add,
8 | Concatenate,
9 | Conv2D,
10 | Input,
11 | Lambda,
12 | LeakyReLU,
13 | MaxPool2D,
14 | UpSampling2D,
15 | ZeroPadding2D,
16 | BatchNormalization,
17 | )
18 | from tensorflow.keras.regularizers import l2
19 | from tensorflow.keras.losses import (
20 | binary_crossentropy,
21 | sparse_categorical_crossentropy
22 | )
23 | from .utils import broadcast_iou
24 |
25 | flags.DEFINE_integer('yolo_max_boxes', 100,
26 | 'maximum number of boxes per image')
27 | flags.DEFINE_float('yolo_iou_threshold', 0.5, 'iou threshold')
28 | flags.DEFINE_float('yolo_score_threshold', 0.5, 'score threshold')
29 |
30 | yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
31 | (59, 119), (116, 90), (156, 198), (373, 326)],
32 | np.float32) / 416
33 | yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])
34 |
35 | yolo_tiny_anchors = np.array([(10, 14), (23, 27), (37, 58),
36 | (81, 82), (135, 169), (344, 319)],
37 | np.float32) / 416
38 | yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])
39 |
40 |
41 | def DarknetConv(x, filters, size, strides=1, batch_norm=True):
42 | if strides == 1:
43 | padding = 'same'
44 | else:
45 | x = ZeroPadding2D(((1, 0), (1, 0)))(x) # top left half-padding
46 | padding = 'valid'
47 | x = Conv2D(filters=filters, kernel_size=size,
48 | strides=strides, padding=padding,
49 | use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x)
50 | if batch_norm:
51 | x = BatchNormalization()(x)
52 | x = LeakyReLU(alpha=0.1)(x)
53 | return x
54 |
55 |
56 | def DarknetResidual(x, filters):
57 | prev = x
58 | x = DarknetConv(x, filters // 2, 1)
59 | x = DarknetConv(x, filters, 3)
60 | x = Add()([prev, x])
61 | return x
62 |
63 |
64 | def DarknetBlock(x, filters, blocks):
65 | x = DarknetConv(x, filters, 3, strides=2)
66 | for _ in range(blocks):
67 | x = DarknetResidual(x, filters)
68 | return x
69 |
70 |
71 | def Darknet(name=None):
72 | x = inputs = Input([None, None, 3])
73 | x = DarknetConv(x, 32, 3)
74 | x = DarknetBlock(x, 64, 1)
75 | x = DarknetBlock(x, 128, 2) # skip connection
76 | x = x_36 = DarknetBlock(x, 256, 8) # skip connection
77 | x = x_61 = DarknetBlock(x, 512, 8)
78 | x = DarknetBlock(x, 1024, 4)
79 | return tf.keras.Model(inputs, (x_36, x_61, x), name=name)
80 |
81 |
82 | def DarknetTiny(name=None):
83 | x = inputs = Input([None, None, 3])
84 | x = DarknetConv(x, 16, 3)
85 | x = MaxPool2D(2, 2, 'same')(x)
86 | x = DarknetConv(x, 32, 3)
87 | x = MaxPool2D(2, 2, 'same')(x)
88 | x = DarknetConv(x, 64, 3)
89 | x = MaxPool2D(2, 2, 'same')(x)
90 | x = DarknetConv(x, 128, 3)
91 | x = MaxPool2D(2, 2, 'same')(x)
92 | x = x_8 = DarknetConv(x, 256, 3) # skip connection
93 | x = MaxPool2D(2, 2, 'same')(x)
94 | x = DarknetConv(x, 512, 3)
95 | x = MaxPool2D(2, 1, 'same')(x)
96 | x = DarknetConv(x, 1024, 3)
97 | return tf.keras.Model(inputs, (x_8, x), name=name)
98 |
99 |
100 | def YoloConv(filters, name=None):
101 | def yolo_conv(x_in):
102 | if isinstance(x_in, tuple):
103 | inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
104 | x, x_skip = inputs
105 |
106 | # concat with skip connection
107 | x = DarknetConv(x, filters, 1)
108 | x = UpSampling2D(2)(x)
109 | x = Concatenate()([x, x_skip])
110 | else:
111 | x = inputs = Input(x_in.shape[1:])
112 |
113 | x = DarknetConv(x, filters, 1)
114 | x = DarknetConv(x, filters * 2, 3)
115 | x = DarknetConv(x, filters, 1)
116 | x = DarknetConv(x, filters * 2, 3)
117 | x = DarknetConv(x, filters, 1)
118 | return Model(inputs, x, name=name)(x_in)
119 | return yolo_conv
120 |
121 |
122 | def YoloConvTiny(filters, name=None):
123 | def yolo_conv(x_in):
124 | if isinstance(x_in, tuple):
125 | inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
126 | x, x_skip = inputs
127 |
128 | # concat with skip connection
129 | x = DarknetConv(x, filters, 1)
130 | x = UpSampling2D(2)(x)
131 | x = Concatenate()([x, x_skip])
132 | else:
133 | x = inputs = Input(x_in.shape[1:])
134 | x = DarknetConv(x, filters, 1)
135 |
136 | return Model(inputs, x, name=name)(x_in)
137 | return yolo_conv
138 |
139 |
140 | def YoloOutput(filters, anchors, classes, name=None):
141 | def yolo_output(x_in):
142 | x = inputs = Input(x_in.shape[1:])
143 | x = DarknetConv(x, filters * 2, 3)
144 | x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)
145 | x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2],
146 | anchors, classes + 5)))(x)
147 | return tf.keras.Model(inputs, x, name=name)(x_in)
148 | return yolo_output
149 |
150 |
151 | # As tensorflow lite doesn't support tf.size used in tf.meshgrid,
152 | # we reimplemented a simple meshgrid function that use basic tf function.
153 | def _meshgrid(n_a, n_b):
154 |
155 | return [
156 | tf.reshape(tf.tile(tf.range(n_a), [n_b]), (n_b, n_a)),
157 | tf.reshape(tf.repeat(tf.range(n_b), n_a), (n_b, n_a))
158 | ]
159 |
160 |
161 | def yolo_boxes(pred, anchors, classes):
162 | # pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
163 | grid_size = tf.shape(pred)[1:3]
164 | box_xy, box_wh, objectness, class_probs = tf.split(
165 | pred, (2, 2, 1, classes), axis=-1)
166 |
167 | box_xy = tf.sigmoid(box_xy)
168 | objectness = tf.sigmoid(objectness)
169 | class_probs = tf.sigmoid(class_probs)
170 | pred_box = tf.concat((box_xy, box_wh), axis=-1) # original xywh for loss
171 |
172 | # !!! grid[x][y] == (y, x)
173 | grid = _meshgrid(grid_size[1],grid_size[0])
174 | grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # [gx, gy, 1, 2]
175 |
176 | box_xy = (box_xy + tf.cast(grid, tf.float32)) / \
177 | tf.cast(grid_size, tf.float32)
178 | box_wh = tf.exp(box_wh) * anchors
179 |
180 | box_x1y1 = box_xy - box_wh / 2
181 | box_x2y2 = box_xy + box_wh / 2
182 | bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)
183 |
184 | return bbox, objectness, class_probs, pred_box
185 |
186 |
187 | def yolo_nms(outputs, anchors, masks, classes):
188 | # boxes, conf, type
189 | b, c, t = [], [], []
190 |
191 | for o in outputs:
192 | b.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1])))
193 | c.append(tf.reshape(o[1], (tf.shape(o[1])[0], -1, tf.shape(o[1])[-1])))
194 | t.append(tf.reshape(o[2], (tf.shape(o[2])[0], -1, tf.shape(o[2])[-1])))
195 |
196 | bbox = tf.concat(b, axis=1)
197 | confidence = tf.concat(c, axis=1)
198 | class_probs = tf.concat(t, axis=1)
199 |
200 | scores = confidence * class_probs
201 |
202 | dscores = tf.squeeze(scores, axis=0)
203 | scores = tf.reduce_max(dscores,[1])
204 | bbox = tf.reshape(bbox,(-1,4))
205 | classes = tf.argmax(dscores,1)
206 | selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(
207 | boxes=bbox,
208 | scores=scores,
209 | max_output_size=FLAGS.yolo_max_boxes,
210 | iou_threshold=FLAGS.yolo_iou_threshold,
211 | score_threshold=FLAGS.yolo_score_threshold,
212 | soft_nms_sigma=0.5
213 | )
214 |
215 | num_valid_nms_boxes = tf.shape(selected_indices)[0]
216 |
217 | selected_indices = tf.concat([selected_indices,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes, tf.int32)], 0)
218 | selected_scores = tf.concat([selected_scores,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes,tf.float32)], -1)
219 |
220 | boxes=tf.gather(bbox, selected_indices)
221 | boxes = tf.expand_dims(boxes, axis=0)
222 | scores=selected_scores
223 | scores = tf.expand_dims(scores, axis=0)
224 | classes = tf.gather(classes,selected_indices)
225 | classes = tf.expand_dims(classes, axis=0)
226 | valid_detections=num_valid_nms_boxes
227 | valid_detections = tf.expand_dims(valid_detections, axis=0)
228 |
229 | return boxes, scores, classes, valid_detections
230 |
231 |
232 | def YoloV3(size=None, channels=3, anchors=yolo_anchors,
233 | masks=yolo_anchor_masks, classes=80, training=False):
234 | x = inputs = Input([size, size, channels], name='input')
235 |
236 | x_36, x_61, x = Darknet(name='yolo_darknet')(x)
237 |
238 | x = YoloConv(512, name='yolo_conv_0')(x)
239 | output_0 = YoloOutput(512, len(masks[0]), classes, name='yolo_output_0')(x)
240 |
241 | x = YoloConv(256, name='yolo_conv_1')((x, x_61))
242 | output_1 = YoloOutput(256, len(masks[1]), classes, name='yolo_output_1')(x)
243 |
244 | x = YoloConv(128, name='yolo_conv_2')((x, x_36))
245 | output_2 = YoloOutput(128, len(masks[2]), classes, name='yolo_output_2')(x)
246 |
247 | if training:
248 | return Model(inputs, (output_0, output_1, output_2), name='yolov3')
249 |
250 | boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
251 | name='yolo_boxes_0')(output_0)
252 | boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
253 | name='yolo_boxes_1')(output_1)
254 | boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], classes),
255 | name='yolo_boxes_2')(output_2)
256 |
257 | outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
258 | name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3]))
259 |
260 | return Model(inputs, outputs, name='yolov3')
261 |
262 |
263 | def YoloV3Tiny(size=None, channels=3, anchors=yolo_tiny_anchors,
264 | masks=yolo_tiny_anchor_masks, classes=80, training=False):
265 | x = inputs = Input([size, size, channels], name='input')
266 |
267 | x_8, x = DarknetTiny(name='yolo_darknet')(x)
268 |
269 | x = YoloConvTiny(256, name='yolo_conv_0')(x)
270 | output_0 = YoloOutput(256, len(masks[0]), classes, name='yolo_output_0')(x)
271 |
272 | x = YoloConvTiny(128, name='yolo_conv_1')((x, x_8))
273 | output_1 = YoloOutput(128, len(masks[1]), classes, name='yolo_output_1')(x)
274 |
275 | if training:
276 | return Model(inputs, (output_0, output_1), name='yolov3')
277 |
278 | boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
279 | name='yolo_boxes_0')(output_0)
280 | boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
281 | name='yolo_boxes_1')(output_1)
282 | outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
283 | name='yolo_nms')((boxes_0[:3], boxes_1[:3]))
284 | return Model(inputs, outputs, name='yolov3_tiny')
285 |
286 |
287 | def YoloLoss(anchors, classes=80, ignore_thresh=0.5):
288 | def yolo_loss(y_true, y_pred):
289 | # 1. transform all pred outputs
290 | # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
291 | pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(
292 | y_pred, anchors, classes)
293 | pred_xy = pred_xywh[..., 0:2]
294 | pred_wh = pred_xywh[..., 2:4]
295 |
296 | # 2. transform all true outputs
297 | # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
298 | true_box, true_obj, true_class_idx = tf.split(
299 | y_true, (4, 1, 1), axis=-1)
300 | true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2
301 | true_wh = true_box[..., 2:4] - true_box[..., 0:2]
302 |
303 | # give higher weights to small boxes
304 | box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]
305 |
306 | # 3. inverting the pred box equations
307 | grid_size = tf.shape(y_true)[1]
308 | grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
309 | grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
310 | true_xy = true_xy * tf.cast(grid_size, tf.float32) - \
311 | tf.cast(grid, tf.float32)
312 | true_wh = tf.math.log(true_wh / anchors)
313 | true_wh = tf.where(tf.math.is_inf(true_wh),
314 | tf.zeros_like(true_wh), true_wh)
315 |
316 | # 4. calculate all masks
317 | obj_mask = tf.squeeze(true_obj, -1)
318 | # ignore false positive when iou is over threshold
319 | best_iou = tf.map_fn(
320 | lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask(
321 | x[1], tf.cast(x[2], tf.bool))), axis=-1),
322 | (pred_box, true_box, obj_mask),
323 | tf.float32)
324 | ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)
325 |
326 | # 5. calculate all losses
327 | xy_loss = obj_mask * box_loss_scale * \
328 | tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
329 | wh_loss = obj_mask * box_loss_scale * \
330 | tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
331 | obj_loss = binary_crossentropy(true_obj, pred_obj)
332 | obj_loss = obj_mask * obj_loss + \
333 | (1 - obj_mask) * ignore_mask * obj_loss
334 | # TODO: use binary_crossentropy instead
335 | class_loss = obj_mask * sparse_categorical_crossentropy(
336 | true_class_idx, pred_class)
337 |
338 | # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
339 | xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
340 | wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
341 | obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
342 | class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))
343 |
344 | return xy_loss + wh_loss + obj_loss + class_loss
345 | return yolo_loss
346 |
--------------------------------------------------------------------------------
/yolov3_tf2/utils.py:
--------------------------------------------------------------------------------
1 | from absl import logging
2 | import numpy as np
3 | import tensorflow as tf
4 | import cv2
5 |
6 | YOLOV3_LAYER_LIST = [
7 | 'yolo_darknet',
8 | 'yolo_conv_0',
9 | 'yolo_output_0',
10 | 'yolo_conv_1',
11 | 'yolo_output_1',
12 | 'yolo_conv_2',
13 | 'yolo_output_2',
14 | ]
15 |
16 | YOLOV3_TINY_LAYER_LIST = [
17 | 'yolo_darknet',
18 | 'yolo_conv_0',
19 | 'yolo_output_0',
20 | 'yolo_conv_1',
21 | 'yolo_output_1',
22 | ]
23 |
24 |
25 | def load_darknet_weights(model, weights_file, tiny=False):
26 | wf = open(weights_file, 'rb')
27 | major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
28 |
29 | if tiny:
30 | layers = YOLOV3_TINY_LAYER_LIST
31 | else:
32 | layers = YOLOV3_LAYER_LIST
33 |
34 | for layer_name in layers:
35 | sub_model = model.get_layer(layer_name)
36 | for i, layer in enumerate(sub_model.layers):
37 | if not layer.name.startswith('conv2d'):
38 | continue
39 | batch_norm = None
40 | if i + 1 < len(sub_model.layers) and \
41 | sub_model.layers[i + 1].name.startswith('batch_norm'):
42 | batch_norm = sub_model.layers[i + 1]
43 |
44 | logging.info("{}/{} {}".format(
45 | sub_model.name, layer.name, 'bn' if batch_norm else 'bias'))
46 |
47 | filters = layer.filters
48 | size = layer.kernel_size[0]
49 | in_dim = layer.get_input_shape_at(0)[-1]
50 |
51 | if batch_norm is None:
52 | conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
53 | else:
54 | # darknet [beta, gamma, mean, variance]
55 | bn_weights = np.fromfile(
56 | wf, dtype=np.float32, count=4 * filters)
57 | # tf [gamma, beta, mean, variance]
58 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
59 |
60 | # darknet shape (out_dim, in_dim, height, width)
61 | conv_shape = (filters, in_dim, size, size)
62 | conv_weights = np.fromfile(
63 | wf, dtype=np.float32, count=np.product(conv_shape))
64 | # tf shape (height, width, in_dim, out_dim)
65 | conv_weights = conv_weights.reshape(
66 | conv_shape).transpose([2, 3, 1, 0])
67 |
68 | if batch_norm is None:
69 | layer.set_weights([conv_weights, conv_bias])
70 | else:
71 | layer.set_weights([conv_weights])
72 | batch_norm.set_weights(bn_weights)
73 |
74 | assert len(wf.read()) == 0, 'failed to read all data'
75 | wf.close()
76 |
77 |
78 | def broadcast_iou(box_1, box_2):
79 | # box_1: (..., (x1, y1, x2, y2))
80 | # box_2: (N, (x1, y1, x2, y2))
81 |
82 | # broadcast boxes
83 | box_1 = tf.expand_dims(box_1, -2)
84 | box_2 = tf.expand_dims(box_2, 0)
85 | # new_shape: (..., N, (x1, y1, x2, y2))
86 | new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2))
87 | box_1 = tf.broadcast_to(box_1, new_shape)
88 | box_2 = tf.broadcast_to(box_2, new_shape)
89 |
90 | int_w = tf.maximum(tf.minimum(box_1[..., 2], box_2[..., 2]) -
91 | tf.maximum(box_1[..., 0], box_2[..., 0]), 0)
92 | int_h = tf.maximum(tf.minimum(box_1[..., 3], box_2[..., 3]) -
93 | tf.maximum(box_1[..., 1], box_2[..., 1]), 0)
94 | int_area = int_w * int_h
95 | box_1_area = (box_1[..., 2] - box_1[..., 0]) * \
96 | (box_1[..., 3] - box_1[..., 1])
97 | box_2_area = (box_2[..., 2] - box_2[..., 0]) * \
98 | (box_2[..., 3] - box_2[..., 1])
99 | return int_area / (box_1_area + box_2_area - int_area)
100 |
101 |
102 | def draw_outputs(img, outputs, class_names):
103 | boxes, objectness, classes, nums = outputs
104 | boxes, objectness, classes, nums = boxes[0], objectness[0], classes[0], nums[0]
105 | wh = np.flip(img.shape[0:2])
106 | for i in range(nums):
107 | x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
108 | x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
109 | img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
110 | img = cv2.putText(img, '{} {:.4f}'.format(
111 | class_names[int(classes[i])], objectness[i]),
112 | x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
113 | return img
114 |
115 |
116 | def draw_labels(x, y, class_names):
117 | img = x.numpy()
118 | boxes, classes = tf.split(y, (4, 1), axis=-1)
119 | classes = classes[..., 0]
120 | wh = np.flip(img.shape[0:2])
121 | for i in range(len(boxes)):
122 | x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
123 | x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
124 | img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
125 | img = cv2.putText(img, class_names[classes[i]],
126 | x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL,
127 | 1, (0, 0, 255), 2)
128 | return img
129 |
130 |
131 | def freeze_all(model, frozen=True):
132 | model.trainable = not frozen
133 | if isinstance(model, tf.keras.Model):
134 | for l in model.layers:
135 | freeze_all(l, frozen)
136 |
137 |
138 | def convert_boxes(image, boxes, scores):
139 | returned_boxes = []
140 | for box, score in zip(boxes, scores):
141 | if score == 0.0: continue
142 | box[0] = (box[0] * image.shape[1]).astype(int)
143 | box[1] = (box[1] * image.shape[0]).astype(int)
144 | box[2] = (box[2] * image.shape[1]).astype(int)
145 | box[3] = (box[3] * image.shape[0]).astype(int)
146 | box[2] = int(box[2]-box[0])
147 | box[3] = int(box[3]-box[1])
148 | box = box.astype(int)
149 | box = box.tolist()
150 | if box != [0,0,0,0]:
151 | returned_boxes.append(box)
152 | return returned_boxes
--------------------------------------------------------------------------------