├── .gitignore
├── .vscode
    ├── launch.json
    └── settings.json
├── README.md
├── conda-cpu.yml
├── conda-gpu.yml
├── convert.py
├── deep_sort
    ├── __init__.py
    ├── detection.py
    ├── iou_matching.py
    ├── kalman_filter.py
    ├── linear_assignment.py
    ├── nn_matching.py
    ├── preprocessing.py
    ├── track.py
    └── tracker.py
├── deepface
    ├── DeepFace.py
    ├── __init__.py
    ├── basemodels
    │   ├── ArcFace.py
    │   ├── Boosting.py
    │   ├── DeepID.py
    │   ├── DlibResNet.py
    │   ├── DlibWrapper.py
    │   ├── Facenet.py
    │   ├── FbDeepFace.py
    │   ├── OpenFace.py
    │   ├── VGGFace.py
    │   └── __init__.py
    ├── commons
    │   ├── __init__.py
    │   ├── distance.py
    │   ├── functions.py
    │   └── realtime.py
    ├── detectors
    │   ├── DlibWrapper.py
    │   ├── FaceDetector.py
    │   ├── MtcnnWrapper.py
    │   ├── OpenCvWrapper.py
    │   ├── RetinaFaceWrapper.py
    │   ├── SsdWrapper.py
    │   └── __init__.py
    ├── extendedmodels
    │   ├── Age.py
    │   ├── Emotion.py
    │   ├── Gender.py
    │   ├── Race.py
    │   └── __init__.py
    └── models
    │   ├── __init__.py
    │   └── face-recognition-ensemble-model.txt
├── detection.txt
├── evaluation.py
├── generate_face.py
├── object_tracker copy.py
├── object_tracker.py
├── requirements.txt
├── resources
    ├── database
    │   ├── 1
    │   │   └── ironman
    │   │   │   ├── 1.jpg
    │   │   │   ├── 2.jpg
    │   │   │   └── 3.jpg
    │   └── 2
    │   │   ├── chimchakman
    │   │       ├── 1.jpg
    │   │       └── 2.jpg
    │   │   ├── juhomin
    │   │       ├── 1.jpg
    │   │       └── 2.jpg
    │   │   └── kimpoong
    │   │       ├── 1.jpg
    │   │       └── 2.jpg
    ├── fonts
    │   └── futur.ttf
    └── gt
    │   ├── Apink_gt.xml
    │   ├── BrunoMars_gt.xml
    │   ├── Darling_gt.xml
    │   ├── GirlsAloud_gt.xml
    │   ├── HelloBubble_gt.xml
    │   ├── README.txt
    │   └── Westlife_gt.xml
├── test.md
├── tools
    ├── freeze_model.py
    └── generate_detections.py
├── xml2txt.py
└── yolov3_tf2
    ├── __init__.py
    ├── dataset.py
    ├── models.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.h5
  2 | *.weights
  3 | *.tar
  4 | *.tfrecord
  5 | /checkpoints/*
  6 | /serving/*
  7 | /logs/
  8 | /Untitled.ipynb
  9 | /output.jpg
 10 | /data/voc2012_raw/
 11 | 
 12 | # Created by https://www.gitignore.io/api/python
 13 | # Edit at https://www.gitignore.io/?templates=python
 14 | 
 15 | ### Python ###
 16 | # Byte-compiled / optimized / DLL files
 17 | __pycache__/
 18 | *.py[cod]
 19 | *$py.class
 20 | 
 21 | # C extensions
 22 | *.so
 23 | 
 24 | # Distribution / packaging
 25 | .Python
 26 | build/
 27 | develop-eggs/
 28 | dist/
 29 | downloads/
 30 | eggs/
 31 | .eggs/
 32 | lib/
 33 | lib64/
 34 | parts/
 35 | sdist/
 36 | var/
 37 | wheels/
 38 | pip-wheel-metadata/
 39 | share/python-wheels/
 40 | *.egg-info/
 41 | .installed.cfg
 42 | *.egg
 43 | MANIFEST
 44 | 
 45 | # PyInstaller
 46 | #  Usually these files are written by a python script from a template
 47 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 48 | *.manifest
 49 | *.spec
 50 | 
 51 | # Installer logs
 52 | pip-log.txt
 53 | pip-delete-this-directory.txt
 54 | 
 55 | # Unit test / coverage reports
 56 | htmlcov/
 57 | .tox/
 58 | .nox/
 59 | .coverage
 60 | .coverage.*
 61 | .cache
 62 | nosetests.xml
 63 | coverage.xml
 64 | *.cover
 65 | .hypothesis/
 66 | .pytest_cache/
 67 | 
 68 | # Translations
 69 | *.mo
 70 | *.pot
 71 | 
 72 | # Django stuff:
 73 | *.log
 74 | local_settings.py
 75 | db.sqlite3
 76 | 
 77 | # Flask stuff:
 78 | instance/
 79 | .webassets-cache
 80 | 
 81 | # Scrapy stuff:
 82 | .scrapy
 83 | 
 84 | # Sphinx documentation
 85 | docs/_build/
 86 | 
 87 | # PyBuilder
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # IPython
 94 | profile_default/
 95 | ipython_config.py
 96 | 
 97 | # pyenv
 98 | .python-version
 99 | 
100 | # pipenv
101 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | #   having no cross-platform support, pipenv may install dependencies that don’t work, or not
104 | #   install all needed dependencies.
105 | #Pipfile.lock
106 | 
107 | # celery beat schedule file
108 | celerybeat-schedule
109 | 
110 | # SageMath parsed files
111 | *.sage.py
112 | 
113 | # Environments
114 | .env
115 | .venv
116 | env/
117 | venv/
118 | ENV/
119 | env.bak/
120 | venv.bak/
121 | 
122 | # Spyder project settings
123 | .spyderproject
124 | .spyproject
125 | 
126 | # Rope project settings
127 | .ropeproject
128 | 
129 | # mkdocs documentation
130 | /site
131 | 
132 | # mypy
133 | .mypy_cache/
134 | .dmypy.json
135 | dmypy.json
136 | 
137 | # Pyre type checker
138 | .pyre/
139 | 
140 | # End of https://www.gitignore.io/api/python
141 | 
142 | # video
143 | resources/video
144 | *.mp4
145 | *.avi
146 | *.mov
147 | 
148 | # weight
149 | model_data/
150 | weights/
151 | resources/


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     // Use IntelliSense to learn about possible attributes.
 3 |     // Hover to view descriptions of existing attributes.
 4 |     // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
 5 |     "version": "0.2.0",
 6 |     "configurations": [
 7 |         {
 8 |             "name": "Python: Current File",
 9 |             "type": "python",
10 |             "request": "launch",
11 |             "program": "${file}",
12 |             "console": "integratedTerminal",
13 |             "args": [
14 |                 "--classes", "./model_data/labels/widerface.names",
15 |                 "--video", "./resources/video/in/T-ara.mov",
16 |                 "--weights", "./weights/yolov3-wider_16000.tf",
17 |                 "--output_format", "MP4V",
18 |                 "--database", "./resources/database/T-ara",
19 |                 "--output", "./resources/video/out/T-ara.mp4",
20 |                 "--num_classes", "1",
21 |                 "--max_face_threshold", "0.68",
22 |                 "--eval", "./resources/gt/T-ara_pred.txt",
23 |             ]
24 |         }
25 |     ]
26 | }


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "C:\\Users\\choiwansik\\anaconda3\\envs\\face_tracker\\python.exe"
3 | }


--------------------------------------------------------------------------------
/conda-cpu.yml:
--------------------------------------------------------------------------------
 1 | name: tracker-cpu
 2 | 
 3 | dependencies:
 4 |   - python==3.7
 5 |   - pip
 6 |   - matplotlib
 7 |   - opencv
 8 |   - pip:
 9 |     - tensorflow==2.4.1
10 |     - lxml
11 |     - tqdm
12 |     - seaborn
13 |     - pillow
14 | 


--------------------------------------------------------------------------------
/conda-gpu.yml:
--------------------------------------------------------------------------------
 1 | name: yolov3-tf2-gpu
 2 | 
 3 | dependencies:
 4 |   - python==3.7
 5 |   - pip
 6 |   - matplotlib
 7 |   - opencv
 8 |   - cudnn
 9 |   - cudatoolkit==10.1.243
10 |   - pip:
11 |     - tensorflow==2.4.1
12 |     - lxml
13 |     - tqdm
14 |     - -e .
15 | 


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
 1 | from absl import app, flags, logging
 2 | from absl.flags import FLAGS
 3 | import numpy as np
 4 | from yolov3_tf2.models import YoloV3, YoloV3Tiny
 5 | from yolov3_tf2.utils import load_darknet_weights
 6 | import tensorflow as tf
 7 | 
 8 | flags.DEFINE_string('weights', './weights/yolov3-wider_16000.weights', 'path to weights file')
 9 | flags.DEFINE_string('output', './weights/yolov3-wider_16000.tf', 'path to output')
10 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
11 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
12 | 
13 | 
14 | def main(_argv):
15 |     physical_devices = tf.config.experimental.list_physical_devices('GPU')
16 |     if len(physical_devices) > 0:
17 |         tf.config.experimental.set_memory_growth(physical_devices[0], True)
18 | 
19 |     if FLAGS.tiny:
20 |         yolo = YoloV3Tiny(classes=FLAGS.num_classes)
21 |     else:
22 |         yolo = YoloV3(classes=FLAGS.num_classes)
23 |     yolo.summary()
24 |     logging.info('model created')
25 | 
26 |     load_darknet_weights(yolo, FLAGS.weights, FLAGS.tiny)
27 |     logging.info('weights loaded')
28 | 
29 |     img = np.random.random((1, 320, 320, 3)).astype(np.float32)
30 |     output = yolo(img)
31 |     logging.info('sanity check passed')
32 | 
33 |     yolo.save_weights(FLAGS.output)
34 |     logging.info('weights saved')
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     try:
39 |         app.run(main)
40 |     except SystemExit:
41 |         pass
42 | 
43 | """
44 | python convert.py --weights ./weights/yolov3-wider_16000.weights \
45 |     --output ./weights/yolov3-wider_16000.tf \
46 |     --num_classes 1
47 | """


--------------------------------------------------------------------------------
/deep_sort/__init__.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | 


--------------------------------------------------------------------------------
/deep_sort/detection.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | 
 4 | 
 5 | class Detection(object):
 6 |     """
 7 |     This class represents a bounding box detection in a single image.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     tlwh : array_like
12 |         Bounding box in format `(x, y, w, h)`.
13 |     confidence : float
14 |         Detector confidence score.
15 |     feature : array_like
16 |         A feature vector that describes the object contained in this image.
17 | 
18 |     Attributes
19 |     ----------
20 |     tlwh : ndarray
21 |         Bounding box in format `(top left x, top left y, width, height)`.
22 |     confidence : ndarray
23 |         Detector confidence score.
24 |     class_name : ndarray
25 |         Detector class.
26 |     feature : ndarray | NoneType
27 |         A feature vector that describes the object contained in this image.
28 | 
29 |     """
30 | 
31 |     def __init__(self, tlwh, confidence, class_name, feature):
32 |         self.tlwh = np.asarray(tlwh, dtype=np.float)
33 |         self.confidence = float(confidence)
34 |         self.class_name = class_name
35 |         self.feature = np.asarray(feature, dtype=np.float32)
36 | 
37 |     def get_class(self):
38 |         return self.class_name
39 | 
40 |     def to_tlbr(self):
41 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
42 |         `(top left, bottom right)`.
43 |         """
44 |         ret = self.tlwh.copy()
45 |         ret[2:] += ret[:2]
46 |         return ret
47 | 
48 |     def to_xyah(self):
49 |         """Convert bounding box to format `(center x, center y, aspect ratio,
50 |         height)`, where the aspect ratio is `width / height`.
51 |         """
52 |         ret = self.tlwh.copy()
53 |         ret[:2] += ret[2:] / 2
54 |         ret[2] /= ret[3]
55 |         return ret
56 | 


--------------------------------------------------------------------------------
/deep_sort/iou_matching.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | from __future__ import absolute_import
 3 | import numpy as np
 4 | from . import linear_assignment
 5 | 
 6 | 
 7 | def iou(bbox, candidates):
 8 |     """Computer intersection over union.
 9 | 
10 |     Parameters
11 |     ----------
12 |     bbox : ndarray
13 |         A bounding box in format `(top left x, top left y, width, height)`.
14 |     candidates : ndarray
15 |         A matrix of candidate bounding boxes (one per row) in the same format
16 |         as `bbox`.
17 | 
18 |     Returns
19 |     -------
20 |     ndarray
21 |         The intersection over union in [0, 1] between the `bbox` and each
22 |         candidate. A higher score means a larger fraction of the `bbox` is
23 |         occluded by the candidate.
24 | 
25 |     """
26 |     bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
27 |     candidates_tl = candidates[:, :2]
28 |     candidates_br = candidates[:, :2] + candidates[:, 2:]
29 | 
30 |     tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
31 |                np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
32 |     br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
33 |                np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
34 |     wh = np.maximum(0., br - tl)
35 | 
36 |     area_intersection = wh.prod(axis=1)
37 |     area_bbox = bbox[2:].prod()
38 |     area_candidates = candidates[:, 2:].prod(axis=1)
39 |     return area_intersection / (area_bbox + area_candidates - area_intersection)
40 | 
41 | 
42 | def iou_cost(tracks, detections, track_indices=None,
43 |              detection_indices=None):
44 |     """An intersection over union distance metric.
45 | 
46 |     Parameters
47 |     ----------
48 |     tracks : List[deep_sort.track.Track]
49 |         A list of tracks.
50 |     detections : List[deep_sort.detection.Detection]
51 |         A list of detections.
52 |     track_indices : Optional[List[int]]
53 |         A list of indices to tracks that should be matched. Defaults to
54 |         all `tracks`.
55 |     detection_indices : Optional[List[int]]
56 |         A list of indices to detections that should be matched. Defaults
57 |         to all `detections`.
58 | 
59 |     Returns
60 |     -------
61 |     ndarray
62 |         Returns a cost matrix of shape
63 |         len(track_indices), len(detection_indices) where entry (i, j) is
64 |         `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.
65 | 
66 |     """
67 |     if track_indices is None:
68 |         track_indices = np.arange(len(tracks))
69 |     if detection_indices is None:
70 |         detection_indices = np.arange(len(detections))
71 | 
72 |     cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
73 |     for row, track_idx in enumerate(track_indices):
74 |         if tracks[track_idx].time_since_update > 1:
75 |             cost_matrix[row, :] = linear_assignment.INFTY_COST
76 |             continue
77 | 
78 |         bbox = tracks[track_idx].to_tlwh()
79 |         candidates = np.asarray([detections[i].tlwh for i in detection_indices])
80 |         cost_matrix[row, :] = 1. - iou(bbox, candidates)
81 |     return cost_matrix
82 | 


--------------------------------------------------------------------------------
/deep_sort/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | 
  6 | """
  7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  9 | function and used as Mahalanobis gating threshold.
 10 | """
 11 | chi2inv95 = {
 12 |     1: 3.8415,
 13 |     2: 5.9915,
 14 |     3: 7.8147,
 15 |     4: 9.4877,
 16 |     5: 11.070,
 17 |     6: 12.592,
 18 |     7: 14.067,
 19 |     8: 15.507,
 20 |     9: 16.919}
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1. / 20
 53 |         self._std_weight_velocity = 1. / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3]]
 85 |         covariance = np.diag(np.square(std))
 86 |         return mean, covariance
 87 | 
 88 |     def predict(self, mean, covariance):
 89 |         """Run Kalman filter prediction step.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         mean : ndarray
 94 |             The 8 dimensional mean vector of the object state at the previous
 95 |             time step.
 96 |         covariance : ndarray
 97 |             The 8x8 dimensional covariance matrix of the object state at the
 98 |             previous time step.
 99 | 
100 |         Returns
101 |         -------
102 |         (ndarray, ndarray)
103 |             Returns the mean vector and covariance matrix of the predicted
104 |             state. Unobserved velocities are initialized to 0 mean.
105 | 
106 |         """
107 |         std_pos = [
108 |             self._std_weight_position * mean[3],
109 |             self._std_weight_position * mean[3],
110 |             1e-2,
111 |             self._std_weight_position * mean[3]]
112 |         std_vel = [
113 |             self._std_weight_velocity * mean[3],
114 |             self._std_weight_velocity * mean[3],
115 |             1e-5,
116 |             self._std_weight_velocity * mean[3]]
117 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
118 | 
119 |         mean = np.dot(self._motion_mat, mean)
120 |         covariance = np.linalg.multi_dot((
121 |             self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
122 | 
123 |         return mean, covariance
124 | 
125 |     def project(self, mean, covariance):
126 |         """Project state distribution to measurement space.
127 | 
128 |         Parameters
129 |         ----------
130 |         mean : ndarray
131 |             The state's mean vector (8 dimensional array).
132 |         covariance : ndarray
133 |             The state's covariance matrix (8x8 dimensional).
134 | 
135 |         Returns
136 |         -------
137 |         (ndarray, ndarray)
138 |             Returns the projected mean and covariance matrix of the given state
139 |             estimate.
140 | 
141 |         """
142 |         std = [
143 |             self._std_weight_position * mean[3],
144 |             self._std_weight_position * mean[3],
145 |             1e-1,
146 |             self._std_weight_position * mean[3]]
147 |         innovation_cov = np.diag(np.square(std))
148 | 
149 |         mean = np.dot(self._update_mat, mean)
150 |         covariance = np.linalg.multi_dot((
151 |             self._update_mat, covariance, self._update_mat.T))
152 |         return mean, covariance + innovation_cov
153 | 
154 |     def update(self, mean, covariance, measurement):
155 |         """Run Kalman filter correction step.
156 | 
157 |         Parameters
158 |         ----------
159 |         mean : ndarray
160 |             The predicted state's mean vector (8 dimensional).
161 |         covariance : ndarray
162 |             The state's covariance matrix (8x8 dimensional).
163 |         measurement : ndarray
164 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
165 |             is the center position, a the aspect ratio, and h the height of the
166 |             bounding box.
167 | 
168 |         Returns
169 |         -------
170 |         (ndarray, ndarray)
171 |             Returns the measurement-corrected state distribution.
172 | 
173 |         """
174 |         projected_mean, projected_cov = self.project(mean, covariance)
175 | 
176 |         chol_factor, lower = scipy.linalg.cho_factor(
177 |             projected_cov, lower=True, check_finite=False)
178 |         kalman_gain = scipy.linalg.cho_solve(
179 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
180 |             check_finite=False).T
181 |         innovation = measurement - projected_mean
182 | 
183 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
184 |         new_covariance = covariance - np.linalg.multi_dot((
185 |             kalman_gain, projected_cov, kalman_gain.T))
186 |         return new_mean, new_covariance
187 | 
188 |     def gating_distance(self, mean, covariance, measurements,
189 |                         only_position=False):
190 |         """Compute gating distance between state distribution and measurements.
191 | 
192 |         A suitable distance threshold can be obtained from `chi2inv95`. If
193 |         `only_position` is False, the chi-square distribution has 4 degrees of
194 |         freedom, otherwise 2.
195 | 
196 |         Parameters
197 |         ----------
198 |         mean : ndarray
199 |             Mean vector over the state distribution (8 dimensional).
200 |         covariance : ndarray
201 |             Covariance of the state distribution (8x8 dimensional).
202 |         measurements : ndarray
203 |             An Nx4 dimensional matrix of N measurements, each in
204 |             format (x, y, a, h) where (x, y) is the bounding box center
205 |             position, a the aspect ratio, and h the height.
206 |         only_position : Optional[bool]
207 |             If True, distance computation is done with respect to the bounding
208 |             box center position only.
209 | 
210 |         Returns
211 |         -------
212 |         ndarray
213 |             Returns an array of length N, where the i-th element contains the
214 |             squared Mahalanobis distance between (mean, covariance) and
215 |             `measurements[i]`.
216 | 
217 |         """
218 |         mean, covariance = self.project(mean, covariance)
219 |         if only_position:
220 |             mean, covariance = mean[:2], covariance[:2, :2]
221 |             measurements = measurements[:, :2]
222 | 
223 |         cholesky_factor = np.linalg.cholesky(covariance)
224 |         d = measurements - mean
225 |         z = scipy.linalg.solve_triangular(
226 |             cholesky_factor, d.T, lower=True, check_finite=False,
227 |             overwrite_b=True)
228 |         squared_maha = np.sum(z * z, axis=0)
229 |         return squared_maha
230 | 


--------------------------------------------------------------------------------
/deep_sort/linear_assignment.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from scipy.optimize import linear_sum_assignment
  5 | from . import kalman_filter
  6 | 
  7 | 
  8 | INFTY_COST = 1e+5
  9 | 
 10 | 
 11 | def min_cost_matching(
 12 |         distance_metric, max_distance, tracks, detections, track_indices=None,
 13 |         detection_indices=None):
 14 |     """Solve linear assignment problem.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 19 |         The distance metric is given a list of tracks and detections as well as
 20 |         a list of N track indices and M detection indices. The metric should
 21 |         return the NxM dimensional cost matrix, where element (i, j) is the
 22 |         association cost between the i-th track in the given track indices and
 23 |         the j-th detection in the given detection_indices.
 24 |     max_distance : float
 25 |         Gating threshold. Associations with cost larger than this value are
 26 |         disregarded.
 27 |     tracks : List[track.Track]
 28 |         A list of predicted tracks at the current time step.
 29 |     detections : List[detection.Detection]
 30 |         A list of detections at the current time step.
 31 |     track_indices : List[int]
 32 |         List of track indices that maps rows in `cost_matrix` to tracks in
 33 |         `tracks` (see description above).
 34 |     detection_indices : List[int]
 35 |         List of detection indices that maps columns in `cost_matrix` to
 36 |         detections in `detections` (see description above).
 37 | 
 38 |     Returns
 39 |     -------
 40 |     (List[(int, int)], List[int], List[int])
 41 |         Returns a tuple with the following three entries:
 42 |         * A list of matched track and detection indices.
 43 |         * A list of unmatched track indices.
 44 |         * A list of unmatched detection indices.
 45 | 
 46 |     """
 47 |     if track_indices is None:
 48 |         track_indices = np.arange(len(tracks))
 49 |     if detection_indices is None:
 50 |         detection_indices = np.arange(len(detections))
 51 | 
 52 |     if len(detection_indices) == 0 or len(track_indices) == 0:
 53 |         return [], track_indices, detection_indices  # Nothing to match.
 54 | 
 55 |     cost_matrix = distance_metric(
 56 |         tracks, detections, track_indices, detection_indices)
 57 |     cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
 58 |     indices = linear_sum_assignment(cost_matrix)
 59 |     indices = np.asarray(indices)
 60 |     indices = np.transpose(indices)
 61 |     matches, unmatched_tracks, unmatched_detections = [], [], []
 62 |     for col, detection_idx in enumerate(detection_indices):
 63 |         if col not in indices[:, 1]:
 64 |             unmatched_detections.append(detection_idx)
 65 |     for row, track_idx in enumerate(track_indices):
 66 |         if row not in indices[:, 0]:
 67 |             unmatched_tracks.append(track_idx)
 68 |     for row, col in indices:
 69 |         track_idx = track_indices[row]
 70 |         detection_idx = detection_indices[col]
 71 |         if cost_matrix[row, col] > max_distance:
 72 |             unmatched_tracks.append(track_idx)
 73 |             unmatched_detections.append(detection_idx)
 74 |         else:
 75 |             matches.append((track_idx, detection_idx))
 76 |     return matches, unmatched_tracks, unmatched_detections
 77 | 
 78 | 
 79 | def matching_cascade(
 80 |         distance_metric, max_distance, cascade_depth, tracks, detections,
 81 |         track_indices=None, detection_indices=None):
 82 |     """Run matching cascade.
 83 | 
 84 |     Parameters
 85 |     ----------
 86 |     distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray
 87 |         The distance metric is given a list of tracks and detections as well as
 88 |         a list of N track indices and M detection indices. The metric should
 89 |         return the NxM dimensional cost matrix, where element (i, j) is the
 90 |         association cost between the i-th track in the given track indices and
 91 |         the j-th detection in the given detection indices.
 92 |     max_distance : float
 93 |         Gating threshold. Associations with cost larger than this value are
 94 |         disregarded.
 95 |     cascade_depth: int
 96 |         The cascade depth, should be se to the maximum track age.
 97 |     tracks : List[track.Track]
 98 |         A list of predicted tracks at the current time step.
 99 |     detections : List[detection.Detection]
100 |         A list of detections at the current time step.
101 |     track_indices : Optional[List[int]]
102 |         List of track indices that maps rows in `cost_matrix` to tracks in
103 |         `tracks` (see description above). Defaults to all tracks.
104 |     detection_indices : Optional[List[int]]
105 |         List of detection indices that maps columns in `cost_matrix` to
106 |         detections in `detections` (see description above). Defaults to all
107 |         detections.
108 | 
109 |     Returns
110 |     -------
111 |     (List[(int, int)], List[int], List[int])
112 |         Returns a tuple with the following three entries:
113 |         * A list of matched track and detection indices.
114 |         * A list of unmatched track indices.
115 |         * A list of unmatched detection indices.
116 | 
117 |     """
118 |     if track_indices is None:
119 |         track_indices = list(range(len(tracks)))
120 |     if detection_indices is None:
121 |         detection_indices = list(range(len(detections)))
122 | 
123 |     unmatched_detections = detection_indices
124 |     matches = []
125 |     for level in range(cascade_depth):
126 |         if len(unmatched_detections) == 0:  # No detections left
127 |             break
128 | 
129 |         track_indices_l = [
130 |             k for k in track_indices
131 |             if tracks[k].time_since_update == 1 + level
132 |         ]
133 |         if len(track_indices_l) == 0:  # Nothing to match at this level
134 |             continue
135 | 
136 |         matches_l, _, unmatched_detections = \
137 |             min_cost_matching(
138 |                 distance_metric, max_distance, tracks, detections,
139 |                 track_indices_l, unmatched_detections)
140 |         matches += matches_l
141 |     unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
142 |     return matches, unmatched_tracks, unmatched_detections
143 | 
144 | 
145 | def gate_cost_matrix(
146 |         kf, cost_matrix, tracks, detections, track_indices, detection_indices,
147 |         gated_cost=INFTY_COST, only_position=False):
148 |     """Invalidate infeasible entries in cost matrix based on the state
149 |     distributions obtained by Kalman filtering.
150 | 
151 |     Parameters
152 |     ----------
153 |     kf : The Kalman filter.
154 |     cost_matrix : ndarray
155 |         The NxM dimensional cost matrix, where N is the number of track indices
156 |         and M is the number of detection indices, such that entry (i, j) is the
157 |         association cost between `tracks[track_indices[i]]` and
158 |         `detections[detection_indices[j]]`.
159 |     tracks : List[track.Track]
160 |         A list of predicted tracks at the current time step.
161 |     detections : List[detection.Detection]
162 |         A list of detections at the current time step.
163 |     track_indices : List[int]
164 |         List of track indices that maps rows in `cost_matrix` to tracks in
165 |         `tracks` (see description above).
166 |     detection_indices : List[int]
167 |         List of detection indices that maps columns in `cost_matrix` to
168 |         detections in `detections` (see description above).
169 |     gated_cost : Optional[float]
170 |         Entries in the cost matrix corresponding to infeasible associations are
171 |         set this value. Defaults to a very large value.
172 |     only_position : Optional[bool]
173 |         If True, only the x, y position of the state distribution is considered
174 |         during gating. Defaults to False.
175 | 
176 |     Returns
177 |     -------
178 |     ndarray
179 |         Returns the modified cost matrix.
180 | 
181 |     """
182 |     gating_dim = 2 if only_position else 4
183 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
184 |     measurements = np.asarray(
185 |         [detections[i].to_xyah() for i in detection_indices])
186 |     for row, track_idx in enumerate(track_indices):
187 |         track = tracks[track_idx]
188 |         gating_distance = kf.gating_distance(
189 |             track.mean, track.covariance, measurements, only_position)
190 |         cost_matrix[row, gating_distance > gating_threshold] = gated_cost
191 |     return cost_matrix
192 | 


--------------------------------------------------------------------------------
/deep_sort/nn_matching.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | 
  4 | 
  5 | def _pdist(a, b):
  6 |     """Compute pair-wise squared distance between points in `a` and `b`.
  7 | 
  8 |     Parameters
  9 |     ----------
 10 |     a : array_like
 11 |         An NxM matrix of N samples of dimensionality M.
 12 |     b : array_like
 13 |         An LxM matrix of L samples of dimensionality M.
 14 | 
 15 |     Returns
 16 |     -------
 17 |     ndarray
 18 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 19 |         contains the squared distance between `a[i]` and `b[j]`.
 20 | 
 21 |     """
 22 |     a, b = np.asarray(a), np.asarray(b)
 23 |     if len(a) == 0 or len(b) == 0:
 24 |         return np.zeros((len(a), len(b)))
 25 |     a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
 26 |     r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
 27 |     r2 = np.clip(r2, 0., float(np.inf))
 28 |     return r2
 29 | 
 30 | 
 31 | def _cosine_distance(a, b, data_is_normalized=False):
 32 |     """Compute pair-wise cosine distance between points in `a` and `b`.
 33 | 
 34 |     Parameters
 35 |     ----------
 36 |     a : array_like
 37 |         An NxM matrix of N samples of dimensionality M.
 38 |     b : array_like
 39 |         An LxM matrix of L samples of dimensionality M.
 40 |     data_is_normalized : Optional[bool]
 41 |         If True, assumes rows in a and b are unit length vectors.
 42 |         Otherwise, a and b are explicitly normalized to lenght 1.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     ndarray
 47 |         Returns a matrix of size len(a), len(b) such that eleement (i, j)
 48 |         contains the squared distance between `a[i]` and `b[j]`.
 49 | 
 50 |     """
 51 |     if not data_is_normalized:
 52 |         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
 53 |         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
 54 |     return 1. - np.dot(a, b.T)
 55 | 
 56 | 
 57 | def _nn_euclidean_distance(x, y):
 58 |     """ Helper function for nearest neighbor distance metric (Euclidean).
 59 | 
 60 |     Parameters
 61 |     ----------
 62 |     x : ndarray
 63 |         A matrix of N row-vectors (sample points).
 64 |     y : ndarray
 65 |         A matrix of M row-vectors (query points).
 66 | 
 67 |     Returns
 68 |     -------
 69 |     ndarray
 70 |         A vector of length M that contains for each entry in `y` the
 71 |         smallest Euclidean distance to a sample in `x`.
 72 | 
 73 |     """
 74 |     distances = _pdist(x, y)
 75 |     return np.maximum(0.0, distances.min(axis=0))
 76 | 
 77 | 
 78 | def _nn_cosine_distance(x, y):
 79 |     """ Helper function for nearest neighbor distance metric (cosine).
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     x : ndarray
 84 |         A matrix of N row-vectors (sample points).
 85 |     y : ndarray
 86 |         A matrix of M row-vectors (query points).
 87 | 
 88 |     Returns
 89 |     -------
 90 |     ndarray
 91 |         A vector of length M that contains for each entry in `y` the
 92 |         smallest cosine distance to a sample in `x`.
 93 | 
 94 |     """
 95 |     # distances = _cosine_distance(x, y) # 변화 준부분
 96 |     distances = custom_cosine_similarity(x, y)
 97 |     return distances.min(axis=0)
 98 | 
 99 | 
100 | class NearestNeighborDistanceMetric(object):
101 |     """
102 |     A nearest neighbor distance metric that, for each target, returns
103 |     the closest distance to any sample that has been observed so far.
104 | 
105 |     Parameters
106 |     ----------
107 |     metric : str
108 |         Either "euclidean" or "cosine".
109 |     matching_threshold: float
110 |         The matching threshold. Samples with larger distance are considered an
111 |         invalid match.
112 |     budget : Optional[int]
113 |         If not None, fix samples per class to at most this number. Removes
114 |         the oldest samples when the budget is reached.
115 | 
116 |     Attributes
117 |     ----------
118 |     samples : Dict[int -> List[ndarray]]
119 |         A dictionary that maps from target identities to the list of samples
120 |         that have been observed so far.
121 | 
122 |     """
123 | 
124 |     def __init__(self, metric, matching_threshold, budget=None):
125 | 
126 | 
127 |         if metric == "euclidean":
128 |             self._metric = _nn_euclidean_distance
129 |         elif metric == "cosine":
130 |             self._metric = _nn_cosine_distance
131 |         else:
132 |             raise ValueError(
133 |                 "Invalid metric; must be either 'euclidean' or 'cosine'")
134 |         self.matching_threshold = matching_threshold
135 |         self.budget = budget
136 |         self.samples = {}
137 | 
138 |     def partial_fit(self, features, targets, active_targets):
139 |         """Update the distance metric with new data.
140 | 
141 |         Parameters
142 |         ----------
143 |         features : ndarray
144 |             An NxM matrix of N features of dimensionality M.
145 |         targets : ndarray
146 |             An integer array of associated target identities.
147 |         active_targets : List[int]
148 |             A list of targets that are currently present in the scene.
149 | 
150 |         """
151 |         for feature, target in zip(features, targets):
152 |             self.samples.setdefault(target, []).append(feature)
153 |             if self.budget is not None:
154 |                 self.samples[target] = self.samples[target][-self.budget:]
155 |         self.samples = {k: self.samples[k] for k in active_targets}
156 | 
157 |     def distance(self, features, targets):
158 |         """Compute distance between features and targets.
159 | 
160 |         Parameters
161 |         ----------
162 |         features : ndarray
163 |             An NxM matrix of N features of dimensionality M.
164 |         targets : List[int]
165 |             A list of targets to match the given `features` against.
166 | 
167 |         Returns
168 |         -------
169 |         ndarray
170 |             Returns a cost matrix of shape len(targets), len(features), where
171 |             element (i, j) contains the closest squared distance between
172 |             `targets[i]` and `features[j]`.
173 | 
174 |         """
175 |         cost_matrix = np.zeros((len(targets), len(features)))
176 |         for i, target in enumerate(targets):
177 |             cost_matrix[i, :] = self._metric(self.samples[target], features)
178 |         return cost_matrix
179 | 
180 | def custom_cosine_similarity(a, b):
181 | 
182 | #     if not data_is_normalized:
183 | #         a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
184 | #         b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
185 | #     return 1. - np.dot(a, b.T)
186 | #     [[]] [[]]
187 |     
188 |     aa = np.linalg.norm(a, axis=1, keepdims=True)
189 |     bb = np.linalg.norm(b, axis=1, keepdims=True)
190 |     norm_mat = np.dot(aa, bb.T)
191 |     return 1. - (np.dot(a, b.T) / norm_mat)
192 | 
193 | 
194 |     # a = np.matmul(np.transpose(source_representation), test_representation)
195 |     # b = np.sum(np.multiply(source_representation, source_representation))
196 |     # c = np.sum(np.multiply(test_representation, test_representation))
197 |     # return 1 - (a / (np.sqrt(b) * np.sqrt(c)))


--------------------------------------------------------------------------------
/deep_sort/preprocessing.py:
--------------------------------------------------------------------------------
 1 | # vim: expandtab:ts=4:sw=4
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | 
 6 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None):
 7 |     """Suppress overlapping detections.
 8 | 
 9 |     Original code from [1]_ has been adapted to include confidence score.
10 | 
11 |     .. [1] http://www.pyimagesearch.com/2015/02/16/
12 |            faster-non-maximum-suppression-python/
13 | 
14 |     Examples
15 |     --------
16 | 
17 |         >>> boxes = [d.roi for d in detections]
18 |         >>> classes = [d.classes for d in detections]
19 |         >>> scores = [d.confidence for d in detections]
20 |         >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)
21 |         >>> detections = [detections[i] for i in indices]
22 | 
23 |     Parameters
24 |     ----------
25 |     boxes : ndarray
26 |         Array of ROIs (x, y, width, height).
27 |     max_bbox_overlap : float
28 |         ROIs that overlap more than this values are suppressed.
29 |     scores : Optional[array_like]
30 |         Detector confidence score.
31 | 
32 |     Returns
33 |     -------
34 |     List[int]
35 |         Returns indices of detections that have survived non-maxima suppression.
36 | 
37 |     """
38 |     if len(boxes) == 0:
39 |         return []
40 | 
41 |     boxes = boxes.astype(np.float)
42 |     pick = []
43 | 
44 |     x1 = boxes[:, 0]
45 |     y1 = boxes[:, 1]
46 |     x2 = boxes[:, 2] + boxes[:, 0]
47 |     y2 = boxes[:, 3] + boxes[:, 1]
48 | 
49 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
50 |     if scores is not None:
51 |         idxs = np.argsort(scores)
52 |     else:
53 |         idxs = np.argsort(y2)
54 | 
55 |     while len(idxs) > 0:
56 |         last = len(idxs) - 1
57 |         i = idxs[last]
58 |         pick.append(i)
59 | 
60 |         xx1 = np.maximum(x1[i], x1[idxs[:last]])
61 |         yy1 = np.maximum(y1[i], y1[idxs[:last]])
62 |         xx2 = np.minimum(x2[i], x2[idxs[:last]])
63 |         yy2 = np.minimum(y2[i], y2[idxs[:last]])
64 | 
65 |         w = np.maximum(0, xx2 - xx1 + 1)
66 |         h = np.maximum(0, yy2 - yy1 + 1)
67 | 
68 |         overlap = (w * h) / area[idxs[:last]]
69 | 
70 |         idxs = np.delete(
71 |             idxs, np.concatenate(
72 |                 ([last], np.where(overlap > max_bbox_overlap)[0])))
73 | 
74 |     return pick
75 | 


--------------------------------------------------------------------------------
/deep_sort/track.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from .nn_matching import _cosine_distance, custom_cosine_similarity
  3 | import statistics as st
  4 | import numpy as np
  5 | import time
  6 | import sys
  7 | import os
  8 | sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
  9 | from deepface.commons import functions, distance as dst
 10 | 
 11 | def find_face(features, face_db, max_face_threshold):
 12 |     face_score = dict()
 13 |     for face in face_db:
 14 |         if face_db[face]["used"]: 
 15 |             face_score[face] = 1
 16 |             continue
 17 |         cos_harmonic = []
 18 |         # cos_mat = _cosine_distance(features, face_db[face]["db"], data_is_normalized=True)
 19 |         cos_mat = custom_cosine_similarity(features, face_db[face]["db"])
 20 |         # print(cos_mat)
 21 | 
 22 |         # time.sleep(7)
 23 | 
 24 |         # for f in face_db[face]:
 25 |         #     cos_harmonic.append(_nn_cosine_distance(feature, f))
 26 |         # cos_harmonic = st.harmonic_mean(list(cos_mat.flatten()))
 27 |         # cos_harmonic = np.mean(cos_mat.flatten())
 28 |         # face_score[face] = cos_harmonic
 29 | 
 30 |         # distance = dst.findCosineDistance(features[0], face_db[face]["db"][0])
 31 |         # print(distance)
 32 | 
 33 |         face_score[face] = cos_mat.min(axis=1)
 34 |         # print(face_score)
 35 |     print(face_score)
 36 |     ans_face = min(face_score,key=face_score.get)
 37 |     
 38 |     # print(ans_face, face_score)
 39 |     
 40 |     # time.sleep(5)
 41 | 
 42 |     if face_score[ans_face] < max_face_threshold:
 43 |         face_db[ans_face]["used"] = True
 44 |         return ans_face
 45 |     else:
 46 |         return ""
 47 | 
 48 | class TrackState:
 49 |     """
 50 |     Enumeration type for the single target track state. Newly created tracks are
 51 |     classified as `tentative` until enough evidence has been collected. Then,
 52 |     the track state is changed to `confirmed`. Tracks that are no longer alive
 53 |     are classified as `deleted` to mark them for removal from the set of active
 54 |     tracks.
 55 | 
 56 |     """
 57 | 
 58 |     Tentative = 1
 59 |     Confirmed = 2
 60 |     Deleted = 3
 61 | 
 62 | 
 63 | class Track:
 64 |     """
 65 |     A single target track with state space `(x, y, a, h)` and associated
 66 |     velocities, where `(x, y)` is the center of the bounding box, `a` is the
 67 |     aspect ratio and `h` is the height.
 68 | 
 69 |     Parameters
 70 |     ----------
 71 |     mean : ndarray
 72 |         Mean vector of the initial state distribution.
 73 |     covariance : ndarray
 74 |         Covariance matrix of the initial state distribution.
 75 |     track_id : int
 76 |         A unique track identifier.
 77 |     n_init : int
 78 |         Number of consecutive detections before the track is confirmed. The
 79 |         track state is set to `Deleted` if a miss occurs within the first
 80 |         `n_init` frames.
 81 |     max_age : int
 82 |         The maximum number of consecutive misses before the track state is
 83 |         set to `Deleted`.
 84 |     feature : Optional[ndarray]
 85 |         Feature vector of the detection this track originates from. If not None,
 86 |         this feature is added to the `features` cache.
 87 | 
 88 |     Attributes
 89 |     ----------
 90 |     mean : ndarray
 91 |         Mean vector of the initial state distribution.
 92 |     covariance : ndarray
 93 |         Covariance matrix of the initial state distribution.
 94 |     track_id : int
 95 |         A unique track identifier.
 96 |     hits : int
 97 |         Total number of measurement updates.
 98 |     age : int
 99 |         Total number of frames since first occurance.
100 |     time_since_update : int
101 |         Total number of frames since last measurement update.
102 |     state : TrackState
103 |         The current track state.
104 |     features : List[ndarray]
105 |         A cache of features. On each measurement update, the associated feature
106 |         vector is added to this list.
107 |     face_name : string
108 |         기존 데이터 베이스에 존재하는 이름 찾기
109 |     """
110 | 
111 |     def __init__(self, mean, covariance, track_id, n_init, max_age, face_db, max_face_threshold,
112 |                  feature=None, class_name=None):
113 |         self.mean = mean
114 |         self.covariance = covariance
115 |         self.track_id = track_id
116 |         self.hits = 1
117 |         self.age = 1
118 |         self.time_since_update = 0
119 |         
120 | 
121 |         self.state = TrackState.Tentative
122 |         self.features = []
123 |         self.face_name = ""
124 |         if feature is not None:
125 |             self.features.append(feature)
126 |             self.face_name = find_face(self.features, face_db, max_face_threshold)
127 | 
128 |         self._n_init = n_init
129 |         self._max_age = max_age
130 |         self.class_name = class_name
131 |         
132 |     # def __init__(self, mean, covariance, track_id, n_init, max_age,
133 |     #              feature=None, class_name=None):
134 |     #     self.mean = mean
135 |     #     self.covariance = covariance
136 |     #     self.track_id = track_id
137 |     #     self.hits = 1
138 |     #     self.age = 1
139 |     #     self.time_since_update = 0
140 |         
141 | 
142 |     #     self.state = TrackState.Tentative
143 |     #     self.features = []
144 |     #     # self.face_name = ""
145 |     #     if feature is not None:
146 |     #         self.features.append(feature)
147 |     #         # self.face_name = find_face(self.features, face_db, max_face_threshold)
148 | 
149 |     #     self._n_init = n_init
150 |     #     self._max_age = max_age
151 |     #     self.class_name = class_name
152 |     
153 | 
154 |             
155 | 
156 |     def to_tlwh(self):
157 |         """Get current position in bounding box format `(top left x, top left y,
158 |         width, height)`.
159 | 
160 |         Returns
161 |         -------
162 |         ndarray
163 |             The bounding box.
164 | 
165 |         """
166 |         ret = self.mean[:4].copy()
167 |         ret[2] *= ret[3]
168 |         ret[:2] -= ret[2:] / 2
169 |         return ret
170 | 
171 |     def to_tlbr(self):
172 |         """Get current position in bounding box format `(min x, miny, max x,
173 |         max y)`.
174 | 
175 |         Returns
176 |         -------
177 |         ndarray
178 |             The bounding box.
179 | 
180 |         """
181 |         ret = self.to_tlwh()
182 |         ret[2:] = ret[:2] + ret[2:]
183 |         return ret
184 |     
185 |     def get_class(self):
186 |         return self.class_name
187 |     
188 |     def get_face_name(self):
189 |         return self.face_name
190 | 
191 |     def predict(self, kf):
192 |         """Propagate the state distribution to the current time step using a
193 |         Kalman filter prediction step.
194 | 
195 |         Parameters
196 |         ----------
197 |         kf : kalman_filter.KalmanFilter
198 |             The Kalman filter.
199 | 
200 |         """
201 |         self.mean, self.covariance = kf.predict(self.mean, self.covariance)
202 |         self.age += 1
203 |         self.time_since_update += 1
204 | 
205 |     def update(self, kf, detection):
206 |         """Perform Kalman filter measurement update step and update the feature
207 |         cache.
208 | 
209 |         Parameters
210 |         ----------
211 |         kf : kalman_filter.KalmanFilter
212 |             The Kalman filter.
213 |         detection : Detection
214 |             The associated detection.
215 | 
216 |         """
217 |         self.mean, self.covariance = kf.update(
218 |             self.mean, self.covariance, detection.to_xyah())
219 |         self.features.append(detection.feature)
220 | 
221 |         self.hits += 1
222 |         self.time_since_update = 0
223 |         if self.state == TrackState.Tentative and self.hits >= self._n_init:
224 |             self.state = TrackState.Confirmed
225 | 
226 |     def mark_missed(self, face_db):
227 |         """Mark this track as missed (no association at the current time step).
228 |         """
229 |         if self.state == TrackState.Tentative:
230 |             if self.face_name != "":
231 |                 face_db[self.face_name]["used"] = False
232 |             self.state = TrackState.Deleted
233 |         elif self.time_since_update > self._max_age:
234 |             if self.face_name != "":
235 |                 face_db[self.face_name]["used"] = False
236 |             self.state = TrackState.Deleted
237 | 
238 |     def is_tentative(self):
239 |         """Returns True if this track is tentative (unconfirmed).
240 |         """
241 |         return self.state == TrackState.Tentative
242 | 
243 |     def is_confirmed(self):
244 |         """Returns True if this track is confirmed."""
245 |         return self.state == TrackState.Confirmed
246 | 
247 |     def is_deleted(self):
248 |         """Returns True if this track is dead and should be deleted."""
249 |         return self.state == TrackState.Deleted
250 |     
251 |     def find_face_name(self, face_db, max_face_threshold):
252 |         face_score = dict()
253 |         for face in face_db:
254 |             if face_db[face]["used"]: 
255 |                 face_score[face] = 1
256 |                 continue
257 |             cos_mat = custom_cosine_similarity(self.features, face_db[face]["db"])
258 |             # print(cos_mat)
259 | 
260 |             # time.sleep(7)
261 | 
262 |             face_score[face] = cos_mat.min(axis=1).min(axis=0)
263 |             # print(face_score)
264 |         ans_face = min(face_score,key=face_score.get)
265 |         
266 |         # print(ans_face, face_score)
267 |         
268 |         # time.sleep(5)
269 | 
270 |         if face_score[ans_face] < max_face_threshold:
271 |             face_db[ans_face]["used"] = True
272 |             self.face_name =  ans_face
273 |         else:
274 |             self.face_name = ""


--------------------------------------------------------------------------------
/deep_sort/tracker.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | from __future__ import absolute_import
  3 | import numpy as np
  4 | from . import kalman_filter
  5 | from . import linear_assignment
  6 | from . import iou_matching
  7 | from .track import Track
  8 | 
  9 | 
 10 | class Tracker:
 11 |     """
 12 |     This is the multi-target tracker.
 13 | 
 14 |     Parameters
 15 |     ----------
 16 |     metric : nn_matching.NearestNeighborDistanceMetric
 17 |         A distance metric for measurement-to-track association.
 18 |     max_age : int
 19 |         Maximum number of missed misses before a track is deleted.
 20 |     n_init : int
 21 |         Number of consecutive detections before the track is confirmed. The
 22 |         track state is set to `Deleted` if a miss occurs within the first
 23 |         `n_init` frames.
 24 | 
 25 |     Attributes
 26 |     ----------
 27 |     metric : nn_matching.NearestNeighborDistanceMetric
 28 |         The distance metric used for measurement to track association.
 29 |     max_age : int
 30 |         Maximum number of missed misses before a track is deleted.
 31 |     n_init : int
 32 |         Number of frames that a track remains in initialization phase.
 33 |     kf : kalman_filter.KalmanFilter
 34 |         A Kalman filter to filter target trajectories in image space.
 35 |     tracks : List[Track]
 36 |         The list of active tracks at the current time step.
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self, metric, max_iou_distance=0.7, max_age=3, n_init=3): # 빠르게 객체를 지워주기 위해 max_age를 30에서 3으로 변경
 41 |         self.metric = metric
 42 |         self.max_iou_distance = max_iou_distance
 43 |         self.max_age = max_age
 44 |         self.n_init = n_init
 45 | 
 46 |         self.kf = kalman_filter.KalmanFilter()
 47 |         self.tracks = []
 48 |         self._next_id = 1
 49 | 
 50 |     def predict(self):
 51 |         """Propagate track state distributions one time step forward.
 52 | 
 53 |         This function should be called once every time step, before `update`.
 54 |         """
 55 |         for track in self.tracks:
 56 |             track.predict(self.kf)
 57 | 
 58 |     
 59 |     # # 시작할 때 db에서 찾는 코드
 60 |     def update(self, detections, face_db, max_face_threshold):
 61 |         """Perform measurement update and track management.
 62 | 
 63 |         Parameters
 64 |         ----------
 65 |         detections : List[deep_sort.detection.Detection]
 66 |             A list of detections at the current time step.
 67 | 
 68 |         """
 69 |         # Run matching cascade.
 70 |         matches, unmatched_tracks, unmatched_detections = \
 71 |             self._match(detections)
 72 |         
 73 |         # Update track set.
 74 |         for i in face_db:
 75 |             face_db[i]["used"] = False # 다 탐지 안된걸로 변경
 76 | 
 77 |         for track_idx, detection_idx in matches:
 78 |             self.tracks[track_idx].update(
 79 |                 self.kf, detections[detection_idx])
 80 |             
 81 |             if self.tracks[track_idx].get_face_name() == "":
 82 |                 self.tracks[track_idx].find_face_name(face_db, max_face_threshold)
 83 | 
 84 |         for track_idx in unmatched_tracks:
 85 |             self.tracks[track_idx].mark_missed(face_db) # 못찾으면 face_db에서 지워준다
 86 |         for detection_idx in unmatched_detections:
 87 |             self._initiate_track(detections[detection_idx], face_db, max_face_threshold)
 88 |         self.tracks = [t for t in self.tracks if not t.is_deleted()]
 89 | 
 90 |         # Update distance metric.
 91 |         active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
 92 |         features, targets = [], []
 93 |         for track in self.tracks:
 94 |             if not track.is_confirmed():
 95 |                 continue
 96 |             features += track.features
 97 |             targets += [track.track_id for _ in track.features]
 98 |             track.features = []
 99 |         self.metric.partial_fit(
100 |             np.asarray(features), np.asarray(targets), active_targets)
101 |     
102 |     # def update(self, detections):
103 |     #     """Perform measurement update and track management.
104 | 
105 |     #     Parameters
106 |     #     ----------
107 |     #     detections : List[deep_sort.detection.Detection]
108 |     #         A list of detections at the current time step.
109 | 
110 |     #     """
111 |     #     # Run matching cascade.
112 |     #     matches, unmatched_tracks, unmatched_detections = \
113 |     #         self._match(detections)
114 |         
115 |     #     # Update track set.
116 |     #     for track_idx, detection_idx in matches:
117 |     #         self.tracks[track_idx].update(
118 |     #             self.kf, detections[detection_idx])
119 |     #     for track_idx in unmatched_tracks:
120 |     #         self.tracks[track_idx].mark_missed()
121 |     #     for detection_idx in unmatched_detections:
122 |     #         self._initiate_track(detections[detection_idx])
123 |     #     self.tracks = [t for t in self.tracks if not t.is_deleted()]
124 | 
125 |     #     # Update distance metric.
126 |     #     active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
127 |     #     features, targets = [], []
128 |     #     for track in self.tracks:
129 |     #         if not track.is_confirmed():
130 |     #             continue
131 |     #         features += track.features
132 |     #         targets += [track.track_id for _ in track.features]
133 |     #         track.features = []
134 |     #     self.metric.partial_fit(
135 |     #         np.asarray(features), np.asarray(targets), active_targets)
136 | 
137 |     
138 |     def _match(self, detections):
139 | 
140 |         def gated_metric(tracks, dets, track_indices, detection_indices):
141 |             features = np.array([dets[i].feature for i in detection_indices])
142 |             targets = np.array([tracks[i].track_id for i in track_indices])
143 |             cost_matrix = self.metric.distance(features, targets)
144 |             cost_matrix = linear_assignment.gate_cost_matrix(
145 |                 self.kf, cost_matrix, tracks, dets, track_indices,
146 |                 detection_indices)
147 | 
148 |             return cost_matrix
149 | 
150 |         # Split track set into confirmed and unconfirmed tracks.
151 |         confirmed_tracks = [
152 |             i for i, t in enumerate(self.tracks) if t.is_confirmed()]
153 |         unconfirmed_tracks = [
154 |             i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
155 | 
156 |         # Associate confirmed tracks using appearance features.
157 |         matches_a, unmatched_tracks_a, unmatched_detections = \
158 |             linear_assignment.matching_cascade(
159 |                 gated_metric, self.metric.matching_threshold, self.max_age,
160 |                 self.tracks, detections, confirmed_tracks)
161 | 
162 |         # Associate remaining tracks together with unconfirmed tracks using IOU.
163 |         iou_track_candidates = unconfirmed_tracks + [
164 |             k for k in unmatched_tracks_a if
165 |             self.tracks[k].time_since_update == 1]
166 |         unmatched_tracks_a = [
167 |             k for k in unmatched_tracks_a if
168 |             self.tracks[k].time_since_update != 1]
169 |         matches_b, unmatched_tracks_b, unmatched_detections = \
170 |             linear_assignment.min_cost_matching(
171 |                 iou_matching.iou_cost, self.max_iou_distance, self.tracks,
172 |                 detections, iou_track_candidates, unmatched_detections)
173 | 
174 |         matches = matches_a + matches_b
175 |         unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
176 |         return matches, unmatched_tracks, unmatched_detections
177 | 
178 |     
179 |     # 시작할 때 클래스 초기화 코드
180 |     def _initiate_track(self, detection, face_db, max_face_threshold):
181 |         mean, covariance = self.kf.initiate(detection.to_xyah())
182 |         class_name = detection.get_class()
183 |         self.tracks.append(Track(
184 |             mean, covariance, self._next_id, self.n_init, self.max_age,
185 |             face_db, max_face_threshold, detection.feature, class_name))
186 |         self._next_id += 1
187 |     
188 |     # def _initiate_track(self, detection):
189 |     #     mean, covariance = self.kf.initiate(detection.to_xyah())
190 |     #     class_name = detection.get_class()
191 |     #     self.tracks.append(Track(
192 |     #         mean, covariance, self._next_id, self.n_init, self.max_age,
193 |     #         detection.feature, class_name))
194 |     #     self._next_id += 1
195 | 


--------------------------------------------------------------------------------
/deepface/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/__init__.py


--------------------------------------------------------------------------------
/deepface/basemodels/ArcFace.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python.keras import backend
 2 | from tensorflow.python.keras.engine import training
 3 | from tensorflow.python.keras.utils import data_utils
 4 | from tensorflow.python.keras.utils import layer_utils
 5 | from tensorflow.python.lib.io import file_io
 6 | import tensorflow
 7 | from tensorflow import keras
 8 | 
 9 | import os
10 | from pathlib import Path
11 | import gdown
12 | 
13 | def loadModel(model_path):
14 | 	base_model = ResNet34()
15 | 	inputs = base_model.inputs[0]
16 | 	arcface_model = base_model.outputs[0]
17 | 	arcface_model = keras.layers.BatchNormalization(momentum=0.9, epsilon=2e-5)(arcface_model)
18 | 	arcface_model = keras.layers.Dropout(0.4)(arcface_model)
19 | 	arcface_model = keras.layers.Flatten()(arcface_model)
20 | 	arcface_model = keras.layers.Dense(512, activation=None, use_bias=True, kernel_initializer="glorot_normal")(arcface_model)
21 | 	embedding = keras.layers.BatchNormalization(momentum=0.9, epsilon=2e-5, name="embedding", scale=True)(arcface_model)
22 | 	# embedding = tensorflow.reshape(embedding, [-1, 512, 1])
23 | 	# embedding = keras.layers.MaxPooling1D(pool_size=4, strides=4, padding="valid")(embedding)
24 | 	# embedding = keras.layers.Flatten()(embedding)
25 | 	model = keras.models.Model(inputs, embedding, name=base_model.name)
26 | 	
27 | 	#---------------------------------------
28 | 	#check the availability of pre-trained weights
29 | 	
30 | 	# home = str(Path.home())
31 | 	url = "https://drive.google.com/uc?id=1LVB3CdVejpmGHM28BpqqkbZP5hDEcdZY"
32 | 	# file_name = "arcface_weights.h5"
33 | 	# output = home+'/deepface/weights/'+file_name
34 | 	# print(output)
35 | 	
36 | 	
37 | 	# if os.path.isfile(model_path) != True:
38 | 
39 | 	# 	print(file_name," will be downloaded to ",model_path)
40 | 	# 	gdown.download(url, model_path, quiet=False)	
41 | 	
42 | 	#---------------------------------------
43 | 	
44 | 	try:
45 | 		model.load_weights(model_path)
46 | 	except:
47 | 		print("pre-trained weights could not be loaded.")
48 | 		# print("You might try to download it from the url ", url," and copy to ",output," manually")
49 | 	
50 | 	return model
51 | 	
52 | def ResNet34():
53 | 	
54 | 	img_input = tensorflow.keras.layers.Input(shape=(112, 112, 3))
55 | 	
56 | 	x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name='conv1_pad')(img_input)
57 | 	x = tensorflow.keras.layers.Conv2D(64, 3, strides=1, use_bias=False, kernel_initializer='glorot_normal', name='conv1_conv')(x)
58 | 	x = tensorflow.keras.layers.BatchNormalization(axis=3, epsilon=2e-5, momentum=0.9, name='conv1_bn')(x)
59 | 	x = tensorflow.keras.layers.PReLU(shared_axes=[1, 2], name='conv1_prelu')(x)
60 | 	x = stack_fn(x)
61 | 		
62 | 	model = training.Model(img_input, x, name='ResNet34')
63 | 
64 | 	return model
65 | 
66 | def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None):
67 | 	bn_axis = 3
68 | 
69 | 	if conv_shortcut:
70 | 		shortcut = tensorflow.keras.layers.Conv2D(filters, 1, strides=stride, use_bias=False, kernel_initializer='glorot_normal', name=name + '_0_conv')(x)
71 | 		shortcut = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_0_bn')(shortcut)
72 | 	else:
73 | 		shortcut = x
74 | 
75 | 	x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_1_bn')(x)
76 | 	x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name=name + '_1_pad')(x)
77 | 	x = tensorflow.keras.layers.Conv2D(filters, 3, strides=1, kernel_initializer='glorot_normal', use_bias=False, name=name + '_1_conv')(x)
78 | 	x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_2_bn')(x)
79 | 	x = tensorflow.keras.layers.PReLU(shared_axes=[1, 2], name=name + '_1_prelu')(x)
80 | 
81 | 	x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name=name + '_2_pad')(x)
82 | 	x = tensorflow.keras.layers.Conv2D(filters, kernel_size, strides=stride, kernel_initializer='glorot_normal', use_bias=False, name=name + '_2_conv')(x)
83 | 	x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_3_bn')(x)
84 | 
85 | 	x = tensorflow.keras.layers.Add(name=name + '_add')([shortcut, x])
86 | 	return x
87 | 
88 | def stack1(x, filters, blocks, stride1=2, name=None):
89 | 	x = block1(x, filters, stride=stride1, name=name + '_block1')
90 | 	for i in range(2, blocks + 1):
91 | 		x = block1(x, filters, conv_shortcut=False, name=name + '_block' + str(i))
92 | 	return x
93 | 
94 | def stack_fn(x):
95 | 	x = stack1(x, 64, 3, name='conv2')
96 | 	x = stack1(x, 128, 4, name='conv3')
97 | 	x = stack1(x, 256, 6, name='conv4')
98 | 	return stack1(x, 512, 3, name='conv5')


--------------------------------------------------------------------------------
/deepface/basemodels/Boosting.py:
--------------------------------------------------------------------------------
 1 | from deepface import DeepFace
 2 | from tqdm import tqdm
 3 | import os
 4 | from os import path
 5 | from pathlib import Path
 6 | import numpy as np
 7 | import gdown
 8 | from deepface.commons import functions, distance as dst
 9 | 
10 | def loadModel():
11 | 	
12 | 	model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']
13 | 
14 | 	model = {}
15 | 	
16 | 	model_pbar = tqdm(range(0, 4), desc='Face recognition models')
17 | 	
18 | 	for index in model_pbar:
19 | 		
20 | 		model_name = model_names[index]
21 | 		
22 | 		model_pbar.set_description("Loading %s" % (model_name))
23 | 		model[model_name] = DeepFace.build_model(model_name)
24 | 	
25 | 	return model
26 | 
27 | def validate_model(model):
28 | 	#validate model dictionary because it might be passed from input as pre-trained
29 | 	found_models = []
30 | 	for key, value in model.items():
31 | 		found_models.append(key)
32 | 	
33 | 	if ('VGG-Face' in found_models) and ('Facenet' in found_models) and ('OpenFace' in found_models) and ('DeepFace' in found_models):
34 | 		#print("Ensemble learning will be applied for ", found_models," models")
35 | 		valid = True
36 | 	else:
37 | 		
38 | 		missing_ones = set(['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']) - set(found_models)
39 | 		
40 | 		raise ValueError("You'd like to apply ensemble method and pass pre-built models but models must contain [VGG-Face, Facenet, OpenFace, DeepFace] but you passed "+str(found_models)+". So, you need to pass "+str(missing_ones)+" models as well.")
41 | 
42 | def build_gbm():
43 | 	
44 | 	#this is not a must dependency
45 | 	import lightgbm as lgb #lightgbm==2.3.1
46 | 	
47 | 	home = str(Path.home())
48 | 	
49 | 	if os.path.isfile(home+'/.deepface/weights/face-recognition-ensemble-model.txt') != True:
50 | 		print("face-recognition-ensemble-model.txt will be downloaded...")
51 | 		url = 'https://raw.githubusercontent.com/serengil/deepface/master/deepface/models/face-recognition-ensemble-model.txt'
52 | 		output = home+'/.deepface/weights/face-recognition-ensemble-model.txt'
53 | 		gdown.download(url, output, quiet=False)
54 | 		
55 | 	ensemble_model_path = home+'/.deepface/weights/face-recognition-ensemble-model.txt'
56 | 	
57 | 	deepface_ensemble = lgb.Booster(model_file = ensemble_model_path)
58 | 	
59 | 	return deepface_ensemble
60 | 


--------------------------------------------------------------------------------
/deepface/basemodels/DeepID.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | import gdown
 4 | import zipfile
 5 | 
 6 | from tensorflow import keras
 7 | from tensorflow.keras.models import Model
 8 | from tensorflow.keras.layers import Conv2D, Activation, Input, Add, MaxPooling2D, Flatten, Dense, Dropout
 9 | 
10 | #-------------------------------------
11 | 
12 | def loadModel(url = 'https://drive.google.com/uc?id=1uRLtBCTQQAvHJ_KVrdbRJiCKxU8m5q2J'):
13 | 	
14 | 	myInput = Input(shape=(55, 47, 3))
15 | 	
16 | 	x = Conv2D(20, (4, 4), name='Conv1', activation='relu', input_shape=(55, 47, 3))(myInput)
17 | 	x = MaxPooling2D(pool_size=2, strides=2, name='Pool1')(x)
18 | 	x = Dropout(rate=0.99, name='D1')(x)
19 | 	
20 | 	x = Conv2D(40, (3, 3), name='Conv2', activation='relu')(x)
21 | 	x = MaxPooling2D(pool_size=2, strides=2, name='Pool2')(x)
22 | 	x = Dropout(rate=0.99, name='D2')(x)
23 | 	
24 | 	x = Conv2D(60, (3, 3), name='Conv3', activation='relu')(x)
25 | 	x = MaxPooling2D(pool_size=2, strides=2, name='Pool3')(x)
26 | 	x = Dropout(rate=0.99, name='D3')(x)
27 | 	
28 | 	x1 = Flatten()(x)
29 | 	fc11 = Dense(160, name = 'fc11')(x1)
30 | 	
31 | 	x2 = Conv2D(80, (2, 2), name='Conv4', activation='relu')(x)
32 | 	x2 = Flatten()(x2)
33 | 	fc12 = Dense(160, name = 'fc12')(x2)
34 | 	
35 | 	y = Add()([fc11, fc12])
36 | 	y = Activation('relu', name = 'deepid')(y)
37 | 	
38 | 	model = Model(inputs=[myInput], outputs=y)
39 | 
40 | 	#---------------------------------
41 | 	
42 | 	home = str(Path.home())
43 | 	
44 | 	if os.path.isfile(home+'/.deepface/weights/deepid_keras_weights.h5') != True:
45 | 		print("deepid_keras_weights.h5 will be downloaded...")
46 | 		
47 | 		output = home+'/.deepface/weights/deepid_keras_weights.h5'
48 | 		gdown.download(url, output, quiet=False)
49 | 		
50 | 	model.load_weights(home+'/.deepface/weights/deepid_keras_weights.h5')	
51 | 	
52 | 	return model


--------------------------------------------------------------------------------
/deepface/basemodels/DlibResNet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import zipfile
 3 | import bz2
 4 | import gdown
 5 | import numpy as np
 6 | from pathlib import Path
 7 | 
 8 | class DlibResNet:
 9 | 	
10 | 	def __init__(self):
11 | 		
12 | 		#this is not a must dependency
13 | 		import dlib #19.20.0
14 | 	
15 | 		self.layers = [DlibMetaData()]
16 | 		
17 | 		#---------------------
18 | 		
19 | 		home = str(Path.home())
20 | 		weight_file = home+'/.deepface/weights/dlib_face_recognition_resnet_model_v1.dat'
21 | 		
22 | 		#---------------------
23 | 		
24 | 		#download pre-trained model if it does not exist
25 | 		if os.path.isfile(weight_file) != True:
26 | 			print("dlib_face_recognition_resnet_model_v1.dat is going to be downloaded")  
27 | 			
28 | 			url = "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2"
29 | 			output = home+'/.deepface/weights/'+url.split("/")[-1]
30 | 			gdown.download(url, output, quiet=False)
31 | 			
32 | 			zipfile = bz2.BZ2File(output)
33 | 			data = zipfile.read()
34 | 			newfilepath = output[:-4] #discard .bz2 extension
35 | 			open(newfilepath, 'wb').write(data)
36 | 			
37 | 		#---------------------
38 | 		
39 | 		model = dlib.face_recognition_model_v1(weight_file)
40 | 		self.__model = model
41 | 		
42 | 		#---------------------
43 | 		
44 | 		return None #classes must return None
45 | 	
46 | 	def predict(self, img_aligned):
47 | 		
48 | 		#functions.detectFace returns 4 dimensional images
49 | 		if len(img_aligned.shape) == 4:
50 | 			img_aligned = img_aligned[0]
51 | 		
52 | 		#functions.detectFace returns bgr images
53 | 		img_aligned = img_aligned[:,:,::-1] #bgr to rgb
54 | 		
55 | 		#deepface.detectFace returns an array in scale of [0, 1] but dlib expects in scale of [0, 255]
56 | 		if img_aligned.max() <= 1:
57 | 			img_aligned = img_aligned * 255
58 | 		
59 | 		img_aligned = img_aligned.astype(np.uint8)
60 | 		
61 | 		model = self.__model
62 | 		
63 | 		img_representation = model.compute_face_descriptor(img_aligned)
64 | 		
65 | 		img_representation = np.array(img_representation)
66 | 		img_representation = np.expand_dims(img_representation, axis = 0)
67 | 		
68 | 		return img_representation
69 | 
70 | class DlibMetaData:
71 | 	def __init__(self):
72 | 		self.input_shape = [[1, 150, 150, 3]]


--------------------------------------------------------------------------------
/deepface/basemodels/DlibWrapper.py:
--------------------------------------------------------------------------------
1 | from deepface.basemodels.DlibResNet import DlibResNet
2 | 
3 | def loadModel():
4 | 	return DlibResNet()


--------------------------------------------------------------------------------
/deepface/basemodels/FbDeepFace.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | import gdown
 4 | import zipfile
 5 | 
 6 | from tensorflow import keras
 7 | from tensorflow.keras.models import Model, Sequential
 8 | from tensorflow.keras.layers import Convolution2D, LocallyConnected2D, MaxPooling2D, Flatten, Dense, Dropout
 9 | 
10 | #-------------------------------------
11 | 
12 | def loadModel(url = 'https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip'):
13 | 	base_model = Sequential()
14 | 	base_model.add(Convolution2D(32, (11, 11), activation='relu', name='C1', input_shape=(152, 152, 3)))
15 | 	base_model.add(MaxPooling2D(pool_size=3, strides=2, padding='same', name='M2'))
16 | 	base_model.add(Convolution2D(16, (9, 9), activation='relu', name='C3'))
17 | 	base_model.add(LocallyConnected2D(16, (9, 9), activation='relu', name='L4'))
18 | 	base_model.add(LocallyConnected2D(16, (7, 7), strides=2, activation='relu', name='L5') )
19 | 	base_model.add(LocallyConnected2D(16, (5, 5), activation='relu', name='L6'))
20 | 	base_model.add(Flatten(name='F0'))
21 | 	base_model.add(Dense(4096, activation='relu', name='F7'))
22 | 	base_model.add(Dropout(rate=0.5, name='D0'))
23 | 	base_model.add(Dense(8631, activation='softmax', name='F8'))
24 | 	
25 | 	#---------------------------------
26 | 	
27 | 	home = str(Path.home())
28 | 	
29 | 	if os.path.isfile(home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5') != True:
30 | 		print("VGGFace2_DeepFace_weights_val-0.9034.h5 will be downloaded...")
31 | 		
32 | 		output = home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5.zip'
33 | 		
34 | 		gdown.download(url, output, quiet=False)
35 | 		
36 | 		#unzip VGGFace2_DeepFace_weights_val-0.9034.h5.zip
37 | 		with zipfile.ZipFile(output, 'r') as zip_ref:
38 | 			zip_ref.extractall(home+'/.deepface/weights/')
39 | 		
40 | 	base_model.load_weights(home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5')	
41 | 	
42 | 	#drop F8 and D0. F7 is the representation layer.
43 | 	deepface_model = Model(inputs=base_model.layers[0].input, outputs=base_model.layers[-3].output)
44 | 		
45 | 	return deepface_model


--------------------------------------------------------------------------------
/deepface/basemodels/OpenFace.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from pathlib import Path
  3 | import gdown
  4 | 
  5 | import tensorflow as tf
  6 | from tensorflow import keras
  7 | from tensorflow.keras.models import Model, Sequential
  8 | from tensorflow.keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
  9 | from tensorflow.keras.layers import Dense, Activation, Lambda, Flatten, BatchNormalization
 10 | from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D
 11 | from tensorflow.keras.models import load_model
 12 | from tensorflow.keras import backend as K
 13 | 
 14 | #---------------------------------------
 15 | 
 16 | def loadModel(url = 'https://drive.google.com/uc?id=1LSe1YCV1x-BfNnfb7DFZTNpv_Q9jITxn'):
 17 | 	myInput = Input(shape=(96, 96, 3))
 18 | 
 19 | 	x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput)
 20 | 	x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)
 21 | 	x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x)
 22 | 	x = Activation('relu')(x)
 23 | 	x = ZeroPadding2D(padding=(1, 1))(x)
 24 | 	x = MaxPooling2D(pool_size=3, strides=2)(x)
 25 | 	x = Lambda(lambda x: tf.nn.lrn(x, alpha=1e-4, beta=0.75), name='lrn_1')(x)
 26 | 	x = Conv2D(64, (1, 1), name='conv2')(x)
 27 | 	x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x)
 28 | 	x = Activation('relu')(x)
 29 | 	x = ZeroPadding2D(padding=(1, 1))(x)
 30 | 	x = Conv2D(192, (3, 3), name='conv3')(x)
 31 | 	x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x)
 32 | 	x = Activation('relu')(x)
 33 | 	x = Lambda(lambda x: tf.nn.lrn(x, alpha=1e-4, beta=0.75), name='lrn_2')(x) #x is equal added
 34 | 	x = ZeroPadding2D(padding=(1, 1))(x)
 35 | 	x = MaxPooling2D(pool_size=3, strides=2)(x)
 36 | 
 37 | 	# Inception3a
 38 | 	inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x)
 39 | 	inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3)
 40 | 	inception_3a_3x3 = Activation('relu')(inception_3a_3x3)
 41 | 	inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3)
 42 | 	inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3)
 43 | 	inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3)
 44 | 	inception_3a_3x3 = Activation('relu')(inception_3a_3x3)
 45 | 
 46 | 	inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x)
 47 | 	inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5)
 48 | 	inception_3a_5x5 = Activation('relu')(inception_3a_5x5)
 49 | 	inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5)
 50 | 	inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5)
 51 | 	inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5)
 52 | 	inception_3a_5x5 = Activation('relu')(inception_3a_5x5)
 53 | 
 54 | 	inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x)
 55 | 	inception_3a_pool = Conv2D(32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool)
 56 | 	inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool)
 57 | 	inception_3a_pool = Activation('relu')(inception_3a_pool)
 58 | 	inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool)
 59 | 
 60 | 	inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x)
 61 | 	inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1)
 62 | 	inception_3a_1x1 = Activation('relu')(inception_3a_1x1)
 63 | 
 64 | 	inception_3a = concatenate([inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3)
 65 | 
 66 | 	# Inception3b
 67 | 	inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a)
 68 | 	inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3)
 69 | 	inception_3b_3x3 = Activation('relu')(inception_3b_3x3)
 70 | 	inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3)
 71 | 	inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3)
 72 | 	inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3)
 73 | 	inception_3b_3x3 = Activation('relu')(inception_3b_3x3)
 74 | 
 75 | 	inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a)
 76 | 	inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5)
 77 | 	inception_3b_5x5 = Activation('relu')(inception_3b_5x5)
 78 | 	inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5)
 79 | 	inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5)
 80 | 	inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5)
 81 | 	inception_3b_5x5 = Activation('relu')(inception_3b_5x5)
 82 | 
 83 | 	inception_3b_pool = Lambda(lambda x: x**2, name='power2_3b')(inception_3a)
 84 | 	inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3b_pool)
 85 | 	inception_3b_pool = Lambda(lambda x: x*9, name='mult9_3b')(inception_3b_pool)
 86 | 	inception_3b_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_3b')(inception_3b_pool)
 87 | 	inception_3b_pool = Conv2D(64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool)
 88 | 	inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool)
 89 | 	inception_3b_pool = Activation('relu')(inception_3b_pool)
 90 | 	inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool)
 91 | 
 92 | 	inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a)
 93 | 	inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1)
 94 | 	inception_3b_1x1 = Activation('relu')(inception_3b_1x1)
 95 | 
 96 | 	inception_3b = concatenate([inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3)
 97 | 
 98 | 	# Inception3c
 99 | 	inception_3c_3x3 = Conv2D(128, (1, 1), strides=(1, 1), name='inception_3c_3x3_conv1')(inception_3b)
100 | 	inception_3c_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_3x3_bn1')(inception_3c_3x3)
101 | 	inception_3c_3x3 = Activation('relu')(inception_3c_3x3)
102 | 	inception_3c_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3c_3x3)
103 | 	inception_3c_3x3 = Conv2D(256, (3, 3), strides=(2, 2), name='inception_3c_3x3_conv'+'2')(inception_3c_3x3)
104 | 	inception_3c_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_3x3_bn'+'2')(inception_3c_3x3)
105 | 	inception_3c_3x3 = Activation('relu')(inception_3c_3x3)
106 | 
107 | 	inception_3c_5x5 = Conv2D(32, (1, 1), strides=(1, 1), name='inception_3c_5x5_conv1')(inception_3b)
108 | 	inception_3c_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_5x5_bn1')(inception_3c_5x5)
109 | 	inception_3c_5x5 = Activation('relu')(inception_3c_5x5)
110 | 	inception_3c_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3c_5x5)
111 | 	inception_3c_5x5 = Conv2D(64, (5, 5), strides=(2, 2), name='inception_3c_5x5_conv'+'2')(inception_3c_5x5)
112 | 	inception_3c_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_5x5_bn'+'2')(inception_3c_5x5)
113 | 	inception_3c_5x5 = Activation('relu')(inception_3c_5x5)
114 | 
115 | 	inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b)
116 | 	inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool)
117 | 
118 | 	inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3)
119 | 
120 | 	#inception 4a
121 | 	inception_4a_3x3 = Conv2D(96, (1, 1), strides=(1, 1), name='inception_4a_3x3_conv'+'1')(inception_3c)
122 | 	inception_4a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_3x3_bn'+'1')(inception_4a_3x3)
123 | 	inception_4a_3x3 = Activation('relu')(inception_4a_3x3)
124 | 	inception_4a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_4a_3x3)
125 | 	inception_4a_3x3 = Conv2D(192, (3, 3), strides=(1, 1), name='inception_4a_3x3_conv'+'2')(inception_4a_3x3)
126 | 	inception_4a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_3x3_bn'+'2')(inception_4a_3x3)
127 | 	inception_4a_3x3 = Activation('relu')(inception_4a_3x3)
128 | 
129 | 	inception_4a_5x5 = Conv2D(32, (1,1), strides=(1,1), name='inception_4a_5x5_conv1')(inception_3c)
130 | 	inception_4a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_5x5_bn1')(inception_4a_5x5)
131 | 	inception_4a_5x5 = Activation('relu')(inception_4a_5x5)
132 | 	inception_4a_5x5 = ZeroPadding2D(padding=(2,2))(inception_4a_5x5)
133 | 	inception_4a_5x5 = Conv2D(64, (5,5), strides=(1,1), name='inception_4a_5x5_conv'+'2')(inception_4a_5x5)
134 | 	inception_4a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_5x5_bn'+'2')(inception_4a_5x5)
135 | 	inception_4a_5x5 = Activation('relu')(inception_4a_5x5)
136 | 
137 | 	inception_4a_pool = Lambda(lambda x: x**2, name='power2_4a')(inception_3c)
138 | 	inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4a_pool)
139 | 	inception_4a_pool = Lambda(lambda x: x*9, name='mult9_4a')(inception_4a_pool)
140 | 	inception_4a_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_4a')(inception_4a_pool)
141 | 
142 | 	inception_4a_pool = Conv2D(128, (1,1), strides=(1,1), name='inception_4a_pool_conv'+'')(inception_4a_pool)
143 | 	inception_4a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_pool_bn'+'')(inception_4a_pool)
144 | 	inception_4a_pool = Activation('relu')(inception_4a_pool)
145 | 	inception_4a_pool = ZeroPadding2D(padding=(2, 2))(inception_4a_pool)
146 | 
147 | 	inception_4a_1x1 = Conv2D(256, (1, 1), strides=(1, 1), name='inception_4a_1x1_conv'+'')(inception_3c)
148 | 	inception_4a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_1x1_bn'+'')(inception_4a_1x1)
149 | 	inception_4a_1x1 = Activation('relu')(inception_4a_1x1)
150 | 
151 | 	inception_4a = concatenate([inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3)
152 | 
153 | 	#inception4e
154 | 	inception_4e_3x3 = Conv2D(160, (1,1), strides=(1,1), name='inception_4e_3x3_conv'+'1')(inception_4a)
155 | 	inception_4e_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_3x3_bn'+'1')(inception_4e_3x3)
156 | 	inception_4e_3x3 = Activation('relu')(inception_4e_3x3)
157 | 	inception_4e_3x3 = ZeroPadding2D(padding=(1, 1))(inception_4e_3x3)
158 | 	inception_4e_3x3 = Conv2D(256, (3,3), strides=(2,2), name='inception_4e_3x3_conv'+'2')(inception_4e_3x3)
159 | 	inception_4e_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_3x3_bn'+'2')(inception_4e_3x3)
160 | 	inception_4e_3x3 = Activation('relu')(inception_4e_3x3)
161 | 
162 | 	inception_4e_5x5 = Conv2D(64, (1,1), strides=(1,1), name='inception_4e_5x5_conv'+'1')(inception_4a)
163 | 	inception_4e_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_5x5_bn'+'1')(inception_4e_5x5)
164 | 	inception_4e_5x5 = Activation('relu')(inception_4e_5x5)
165 | 	inception_4e_5x5 = ZeroPadding2D(padding=(2, 2))(inception_4e_5x5)
166 | 	inception_4e_5x5 = Conv2D(128, (5,5), strides=(2,2), name='inception_4e_5x5_conv'+'2')(inception_4e_5x5)
167 | 	inception_4e_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_5x5_bn'+'2')(inception_4e_5x5)
168 | 	inception_4e_5x5 = Activation('relu')(inception_4e_5x5)
169 | 
170 | 	inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a)
171 | 	inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool)
172 | 
173 | 	inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3)
174 | 
175 | 	#inception5a
176 | 	inception_5a_3x3 = Conv2D(96, (1,1), strides=(1,1), name='inception_5a_3x3_conv'+'1')(inception_4e)
177 | 	inception_5a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_3x3_bn'+'1')(inception_5a_3x3)
178 | 	inception_5a_3x3 = Activation('relu')(inception_5a_3x3)
179 | 	inception_5a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_5a_3x3)
180 | 	inception_5a_3x3 = Conv2D(384, (3,3), strides=(1,1), name='inception_5a_3x3_conv'+'2')(inception_5a_3x3)
181 | 	inception_5a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_3x3_bn'+'2')(inception_5a_3x3)
182 | 	inception_5a_3x3 = Activation('relu')(inception_5a_3x3)
183 | 
184 | 	inception_5a_pool = Lambda(lambda x: x**2, name='power2_5a')(inception_4e)
185 | 	inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_5a_pool)
186 | 	inception_5a_pool = Lambda(lambda x: x*9, name='mult9_5a')(inception_5a_pool)
187 | 	inception_5a_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_5a')(inception_5a_pool)
188 | 
189 | 	inception_5a_pool = Conv2D(96, (1,1), strides=(1,1), name='inception_5a_pool_conv'+'')(inception_5a_pool)
190 | 	inception_5a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_pool_bn'+'')(inception_5a_pool)
191 | 	inception_5a_pool = Activation('relu')(inception_5a_pool)
192 | 	inception_5a_pool = ZeroPadding2D(padding=(1,1))(inception_5a_pool)
193 | 
194 | 	inception_5a_1x1 = Conv2D(256, (1,1), strides=(1,1), name='inception_5a_1x1_conv'+'')(inception_4e)
195 | 	inception_5a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_1x1_bn'+'')(inception_5a_1x1)
196 | 	inception_5a_1x1 = Activation('relu')(inception_5a_1x1)
197 | 
198 | 	inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3)
199 | 
200 | 	#inception_5b
201 | 	inception_5b_3x3 = Conv2D(96, (1,1), strides=(1,1), name='inception_5b_3x3_conv'+'1')(inception_5a)
202 | 	inception_5b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_3x3_bn'+'1')(inception_5b_3x3)
203 | 	inception_5b_3x3 = Activation('relu')(inception_5b_3x3)
204 | 	inception_5b_3x3 = ZeroPadding2D(padding=(1,1))(inception_5b_3x3)
205 | 	inception_5b_3x3 = Conv2D(384, (3,3), strides=(1,1), name='inception_5b_3x3_conv'+'2')(inception_5b_3x3)
206 | 	inception_5b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_3x3_bn'+'2')(inception_5b_3x3)
207 | 	inception_5b_3x3 = Activation('relu')(inception_5b_3x3)
208 | 
209 | 	inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a)
210 | 
211 | 	inception_5b_pool = Conv2D(96, (1,1), strides=(1,1), name='inception_5b_pool_conv'+'')(inception_5b_pool)
212 | 	inception_5b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_pool_bn'+'')(inception_5b_pool)
213 | 	inception_5b_pool = Activation('relu')(inception_5b_pool)
214 | 
215 | 	inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool)
216 | 
217 | 	inception_5b_1x1 = Conv2D(256, (1,1), strides=(1,1), name='inception_5b_1x1_conv'+'')(inception_5a)
218 | 	inception_5b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_1x1_bn'+'')(inception_5b_1x1)
219 | 	inception_5b_1x1 = Activation('relu')(inception_5b_1x1)
220 | 
221 | 	inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3)
222 | 
223 | 	av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b)
224 | 	reshape_layer = Flatten()(av_pool)
225 | 	dense_layer = Dense(128, name='dense_layer')(reshape_layer)
226 | 	norm_layer = Lambda(lambda  x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer)
227 | 
228 | 	# Final Model
229 | 	model = Model(inputs=[myInput], outputs=norm_layer)
230 | 	
231 | 	#-----------------------------------
232 | 	
233 | 	home = str(Path.home())
234 | 	
235 | 	if os.path.isfile(home+'/.deepface/weights/openface_weights.h5') != True:
236 | 		print("openface_weights.h5 will be downloaded...")
237 | 		
238 | 		output = home+'/.deepface/weights/openface_weights.h5'
239 | 		gdown.download(url, output, quiet=False)
240 | 	
241 | 	#-----------------------------------
242 | 	
243 | 	model.load_weights(home+'/.deepface/weights/openface_weights.h5')
244 | 	
245 | 	#-----------------------------------
246 | 	
247 | 	return model


--------------------------------------------------------------------------------
/deepface/basemodels/VGGFace.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | import gdown
 4 | 
 5 | import tensorflow as tf
 6 | tf_version = int(tf.__version__.split(".")[0])
 7 | 
 8 | if tf_version == 1:
 9 | 	from keras.models import Model, Sequential
10 | 	from keras.layers import Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
11 | else:
12 | 	from tensorflow import keras
13 | 	from tensorflow.keras.models import Model, Sequential
14 | 	from tensorflow.keras.layers import Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Flatten, Dense, Dropout, Activation
15 | 	
16 | #---------------------------------------
17 | 
18 | def baseModel():
19 | 	model = Sequential()
20 | 	model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
21 | 	model.add(Convolution2D(64, (3, 3), activation='relu'))
22 | 	model.add(ZeroPadding2D((1,1)))
23 | 	model.add(Convolution2D(64, (3, 3), activation='relu'))
24 | 	model.add(MaxPooling2D((2,2), strides=(2,2)))
25 | 
26 | 	model.add(ZeroPadding2D((1,1)))
27 | 	model.add(Convolution2D(128, (3, 3), activation='relu'))
28 | 	model.add(ZeroPadding2D((1,1)))
29 | 	model.add(Convolution2D(128, (3, 3), activation='relu'))
30 | 	model.add(MaxPooling2D((2,2), strides=(2,2)))
31 | 
32 | 	model.add(ZeroPadding2D((1,1)))
33 | 	model.add(Convolution2D(256, (3, 3), activation='relu'))
34 | 	model.add(ZeroPadding2D((1,1)))
35 | 	model.add(Convolution2D(256, (3, 3), activation='relu'))
36 | 	model.add(ZeroPadding2D((1,1)))
37 | 	model.add(Convolution2D(256, (3, 3), activation='relu'))
38 | 	model.add(MaxPooling2D((2,2), strides=(2,2)))
39 | 
40 | 	model.add(ZeroPadding2D((1,1)))
41 | 	model.add(Convolution2D(512, (3, 3), activation='relu'))
42 | 	model.add(ZeroPadding2D((1,1)))
43 | 	model.add(Convolution2D(512, (3, 3), activation='relu'))
44 | 	model.add(ZeroPadding2D((1,1)))
45 | 	model.add(Convolution2D(512, (3, 3), activation='relu'))
46 | 	model.add(MaxPooling2D((2,2), strides=(2,2)))
47 | 
48 | 	model.add(ZeroPadding2D((1,1)))
49 | 	model.add(Convolution2D(512, (3, 3), activation='relu'))
50 | 	model.add(ZeroPadding2D((1,1)))
51 | 	model.add(Convolution2D(512, (3, 3), activation='relu'))
52 | 	model.add(ZeroPadding2D((1,1)))
53 | 	model.add(Convolution2D(512, (3, 3), activation='relu'))
54 | 	model.add(MaxPooling2D((2,2), strides=(2,2)))
55 | 
56 | 	model.add(Convolution2D(4096, (7, 7), activation='relu'))
57 | 	model.add(Dropout(0.5))
58 | 	model.add(Convolution2D(4096, (1, 1), activation='relu'))
59 | 	model.add(Dropout(0.5))
60 | 	model.add(Convolution2D(2622, (1, 1)))
61 | 	model.add(Flatten())
62 | 	model.add(Activation('softmax'))
63 | 	
64 | 	return model
65 | 
66 | def loadModel(url = 'https://drive.google.com/uc?id=1CPSeum3HpopfomUEK1gybeuIVoeJT_Eo'):
67 | 	
68 | 	model = baseModel()
69 | 	
70 | 	#-----------------------------------
71 | 	
72 | 	home = str(Path.home())
73 | 	output = home+'/.deepface/weights/vgg_face_weights.h5'
74 | 	
75 | 	if os.path.isfile(output) != True:
76 | 		print("vgg_face_weights.h5 will be downloaded...")		
77 | 		gdown.download(url, output, quiet=False)
78 | 	
79 | 	#-----------------------------------
80 | 	
81 | 	try:
82 | 		model.load_weights(output)
83 | 	except Exception as err:
84 | 		print(str(err))
85 | 		print("Pre-trained weight could not be loaded.")
86 | 		print("You might try to download the pre-trained weights from the url ", url, " and copy it to the ", output)
87 | 	
88 | 	#-----------------------------------
89 | 	
90 | 	#TO-DO: why?
91 | 	vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output)
92 | 	
93 | 	return vgg_face_descriptor


--------------------------------------------------------------------------------
/deepface/basemodels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/basemodels/__init__.py


--------------------------------------------------------------------------------
/deepface/commons/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/commons/__init__.py


--------------------------------------------------------------------------------
/deepface/commons/distance.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def findCosineDistance(source_representation, test_representation):
 4 |     a = np.matmul(np.transpose(source_representation), test_representation)
 5 |     b = np.sum(np.multiply(source_representation, source_representation))
 6 |     c = np.sum(np.multiply(test_representation, test_representation))
 7 |     return 1 - (a / (np.sqrt(b) * np.sqrt(c)))
 8 | 
 9 | def findEuclideanDistance(source_representation, test_representation):
10 |     if type(source_representation) == list:
11 |         source_representation = np.array(source_representation)
12 | 
13 |     if type(test_representation) == list:
14 |         test_representation = np.array(test_representation)
15 | 
16 |     euclidean_distance = source_representation - test_representation
17 |     euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
18 |     euclidean_distance = np.sqrt(euclidean_distance)
19 |     return euclidean_distance
20 | 
21 | def l2_normalize(x):
22 |     return x / np.sqrt(np.sum(np.multiply(x, x)))
23 | 
24 | def findThreshold(model_name, distance_metric):
25 | 
26 | 	base_threshold = {'cosine': 0.40, 'euclidean': 0.55, 'euclidean_l2': 0.75}
27 | 
28 | 	thresholds = {
29 | 		'VGG-Face': {'cosine': 0.40, 'euclidean': 0.55, 'euclidean_l2': 0.75},
30 | 		'OpenFace': {'cosine': 0.10, 'euclidean': 0.55, 'euclidean_l2': 0.55},
31 | 		'Facenet':  {'cosine': 0.40, 'euclidean': 10, 'euclidean_l2': 0.80},
32 | 		'DeepFace': {'cosine': 0.23, 'euclidean': 64, 'euclidean_l2': 0.64},
33 | 		'DeepID': 	{'cosine': 0.015, 'euclidean': 45, 'euclidean_l2': 0.17},
34 | 		'Dlib': 	{'cosine': 0.07, 'euclidean': 0.6, 'euclidean_l2': 0.6},
35 | 		'ArcFace':  {'cosine': 0.6871912959056619, 'euclidean': 4.1591468986978075, 'euclidean_l2': 1.1315718048269017}
36 | 		}
37 | 
38 | 	threshold = thresholds.get(model_name, base_threshold).get(distance_metric, 0.4)
39 | 
40 | 	return threshold
41 | 


--------------------------------------------------------------------------------
/deepface/commons/functions.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import cv2
  5 | import base64
  6 | from pathlib import Path
  7 | 
  8 | from deepface.detectors import FaceDetector
  9 | 
 10 | import tensorflow as tf
 11 | tf_version = int(tf.__version__.split(".")[0])
 12 | 
 13 | if tf_version == 1:
 14 | 	import keras
 15 | 	from keras.preprocessing.image import load_img, save_img, img_to_array
 16 | 	from keras.applications.imagenet_utils import preprocess_input
 17 | 	from keras.preprocessing import image
 18 | elif tf_version == 2:
 19 | 	from tensorflow import keras
 20 | 	from tensorflow.keras.preprocessing.image import load_img, save_img, img_to_array
 21 | 	from tensorflow.keras.applications.imagenet_utils import preprocess_input
 22 | 	from tensorflow.keras.preprocessing import image
 23 | 
 24 | #--------------------------------------------------
 25 | 
 26 | def initialize_input(img1_path, img2_path = None):
 27 | 
 28 | 	if type(img1_path) == list:
 29 | 		bulkProcess = True
 30 | 		img_list = img1_path.copy()
 31 | 	else:
 32 | 		bulkProcess = False
 33 | 
 34 | 		if (
 35 | 			(type(img2_path) == str and img2_path != None) #exact image path, base64 image
 36 | 			or (isinstance(img2_path, np.ndarray) and img2_path.any()) #numpy array
 37 | 		):
 38 | 			img_list = [[img1_path, img2_path]]
 39 | 		else: #analyze function passes just img1_path
 40 | 			img_list = [img1_path]
 41 | 
 42 | 	return img_list, bulkProcess
 43 | 
 44 | def initialize_detector(detector_backend):
 45 | 
 46 | 	global face_detector
 47 | 	face_detector = FaceDetector.build_model(detector_backend)
 48 | 
 49 | def initializeFolder():
 50 | 
 51 | 	home = str(Path.home())
 52 | 
 53 | 	if not os.path.exists(home+"/.deepface"):
 54 | 		os.mkdir(home+"/.deepface")
 55 | 		print("Directory ",home,"/.deepface created")
 56 | 
 57 | 	if not os.path.exists(home+"/.deepface/weights"):
 58 | 		os.mkdir(home+"/.deepface/weights")
 59 | 		print("Directory ",home,"/.deepface/weights created")
 60 | 
 61 | def loadBase64Img(uri):
 62 |    encoded_data = uri.split(',')[1]
 63 |    nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8)
 64 |    img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 65 |    return img
 66 | 
 67 | def load_image(img):
 68 | 
 69 | 	exact_image = False
 70 | 	if type(img).__module__ == np.__name__:
 71 | 		exact_image = True
 72 | 
 73 | 	base64_img = False
 74 | 	if len(img) > 11 and img[0:11] == "data:image/":
 75 | 		base64_img = True
 76 | 
 77 | 	#---------------------------
 78 | 
 79 | 	if base64_img == True:
 80 | 		img = loadBase64Img(img)
 81 | 
 82 | 	elif exact_image != True: #image path passed as input
 83 | 		if os.path.isfile(img) != True:
 84 | 			raise ValueError("Confirm that ",img," exists")
 85 | 
 86 | 		img = cv2.imread(img)
 87 | 
 88 | 	return img
 89 | 
 90 | def detect_face(img, detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True):
 91 | 
 92 | 	img_region = [0, 0, img.shape[0], img.shape[1]]
 93 | 
 94 | 	#if functions.preproces_face is called directly, then face_detector global variable might not been initialized.
 95 | 	if not "face_detector" in globals():
 96 | 		initialize_detector(detector_backend = detector_backend)
 97 | 
 98 | 	detected_face, img_region = FaceDetector.detect_face(face_detector, detector_backend, img, align)
 99 | 
100 | 	if (isinstance(detected_face, np.ndarray)):
101 | 		return detected_face, img_region
102 | 	else:
103 | 		if detected_face == None:
104 | 			if enforce_detection != True:
105 | 			  return img, img_region
106 | 			else:
107 | 			  raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.")
108 | 
109 | def preprocess_face(img, target_size=(224, 224), grayscale = False, enforce_detection = True, detector_backend = 'opencv', return_region = False, align = True):
110 | 
111 | 	#img might be path, base64 or numpy array. Convert it to numpy whatever it is.
112 | 	img = load_image(img)
113 | 	base_img = img.copy()
114 | 
115 | 	# img, region = detect_face(img = img, detector_backend = detector_backend, grayscale = grayscale, enforce_detection = enforce_detection, align = align)
116 | 
117 | 	#--------------------------
118 | 
119 | 	if img.shape[0] == 0 or img.shape[1] == 0:
120 | 		if enforce_detection == True:
121 | 			raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.")
122 | 		else: #restore base image
123 | 			img = base_img.copy()
124 | 
125 | 	#--------------------------
126 | 
127 | 	#post-processing
128 | 	if grayscale == True:
129 | 		img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
130 | 	
131 | 	img = cv2.resize(img, target_size)
132 | 	#TODO: resize causes transformation on base image, you should add black pixels to rezie it to target_size
133 | 
134 | 	img_pixels = image.img_to_array(img)
135 | 	img_pixels = np.expand_dims(img_pixels, axis = 0)
136 | 	img_pixels /= 255 #normalize input in [0, 1]
137 | 
138 | 	if return_region == True:
139 | 		return img_pixels, region
140 | 	else:
141 | 		return img_pixels
142 | 
143 | def find_input_shape(model):
144 | 
145 | 	#face recognition models have different size of inputs
146 | 	#my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue.
147 | 
148 | 	input_shape = model.layers[0].input_shape
149 | 
150 | 	if type(input_shape) == list:
151 | 		input_shape = input_shape[0][1:3]
152 | 	else:
153 | 		input_shape = input_shape[1:3]
154 | 
155 | 	if type(input_shape) == list: #issue 197: some people got array here instead of tuple
156 | 		input_shape = tuple(input_shape)
157 | 
158 | 	return input_shape
159 | 


--------------------------------------------------------------------------------
/deepface/detectors/DlibWrapper.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import gdown
 3 | import bz2
 4 | import os
 5 | 
 6 | def build_model():
 7 | 
 8 | 	home = str(Path.home())
 9 | 
10 | 	import dlib #this requirement is not a must that's why imported here
11 | 
12 | 	#check required file exists in the home/.deepface/weights folder
13 | 	if os.path.isfile(home+'/.deepface/weights/shape_predictor_5_face_landmarks.dat') != True:
14 | 
15 | 		print("shape_predictor_5_face_landmarks.dat.bz2 is going to be downloaded")
16 | 
17 | 		url = "http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2"
18 | 		output = home+'/.deepface/weights/'+url.split("/")[-1]
19 | 
20 | 		gdown.download(url, output, quiet=False)
21 | 
22 | 		zipfile = bz2.BZ2File(output)
23 | 		data = zipfile.read()
24 | 		newfilepath = output[:-4] #discard .bz2 extension
25 | 		open(newfilepath, 'wb').write(data)
26 | 
27 | 	face_detector = dlib.get_frontal_face_detector()
28 | 	sp = dlib.shape_predictor(home+"/.deepface/weights/shape_predictor_5_face_landmarks.dat")
29 | 
30 | 	detector = {}
31 | 	detector["face_detector"] = face_detector
32 | 	detector["sp"] = sp
33 | 	return detector
34 | 
35 | def detect_face(detector, img, align = True):
36 | 
37 | 	import dlib #this requirement is not a must that's why imported here
38 | 
39 | 	home = str(Path.home())
40 | 
41 | 	sp = detector["sp"]
42 | 
43 | 	detected_face = None
44 | 	img_region = [0, 0, img.shape[0], img.shape[1]]
45 | 
46 | 	face_detector = detector["face_detector"]
47 | 	detections = face_detector(img, 1)
48 | 
49 | 	if len(detections) > 0:
50 | 
51 | 		for idx, d in enumerate(detections):
52 | 			left = d.left(); right = d.right()
53 | 			top = d.top(); bottom = d.bottom()
54 | 			detected_face = img[top:bottom, left:right]
55 | 			img_region = [left, top, right - left, bottom - top]
56 | 			break #get the first one
57 | 
58 | 		if align:
59 | 			img_shape = sp(img, detections[0])
60 | 			detected_face = dlib.get_face_chip(img, img_shape, size = detected_face.shape[0])
61 | 
62 | 	return detected_face, img_region
63 | 


--------------------------------------------------------------------------------
/deepface/detectors/FaceDetector.py:
--------------------------------------------------------------------------------
 1 | from deepface.detectors import OpenCvWrapper, SsdWrapper, DlibWrapper, MtcnnWrapper, RetinaFaceWrapper
 2 | from PIL import Image
 3 | import math
 4 | import numpy as np
 5 | from deepface.commons import distance
 6 | 
 7 | def build_model(detector_backend):
 8 | 
 9 |     backends = {
10 |         'opencv': OpenCvWrapper.build_model,
11 |         'ssd': SsdWrapper.build_model,
12 |         'dlib': DlibWrapper.build_model,
13 |         'mtcnn': MtcnnWrapper.build_model,
14 |         'retinaface': RetinaFaceWrapper.build_model
15 |     }
16 | 
17 |     face_detector = backends.get(detector_backend)
18 | 
19 |     if face_detector:
20 |         face_detector = face_detector()
21 |     else:
22 |         raise ValueError("invalid detector_backend passed - " + detector_backend)
23 | 
24 |     return face_detector
25 | 
26 | def detect_face(face_detector, detector_backend, img, align = True):
27 | 
28 |     backends = {
29 |         'opencv': OpenCvWrapper.detect_face,
30 |         'ssd': SsdWrapper.detect_face,
31 |         'dlib': DlibWrapper.detect_face,
32 |         'mtcnn': MtcnnWrapper.detect_face,
33 |         'retinaface': RetinaFaceWrapper.detect_face
34 |     }
35 | 
36 |     detect_face = backends.get(detector_backend)
37 | 
38 |     if detect_face:
39 |         face, region = detect_face(face_detector, img, align)
40 |     else:
41 |         raise ValueError("invalid detector_backend passed - " + detector_backend)
42 | 
43 |     return face, region
44 | 
45 | def alignment_procedure(img, left_eye, right_eye):
46 | 
47 | 	#this function aligns given face in img based on left and right eye coordinates
48 | 
49 | 	left_eye_x, left_eye_y = left_eye
50 | 	right_eye_x, right_eye_y = right_eye
51 | 
52 | 	#-----------------------
53 | 	#find rotation direction
54 | 
55 | 	if left_eye_y > right_eye_y:
56 | 		point_3rd = (right_eye_x, left_eye_y)
57 | 		direction = -1 #rotate same direction to clock
58 | 	else:
59 | 		point_3rd = (left_eye_x, right_eye_y)
60 | 		direction = 1 #rotate inverse direction of clock
61 | 
62 | 	#-----------------------
63 | 	#find length of triangle edges
64 | 
65 | 	a = distance.findEuclideanDistance(np.array(left_eye), np.array(point_3rd))
66 | 	b = distance.findEuclideanDistance(np.array(right_eye), np.array(point_3rd))
67 | 	c = distance.findEuclideanDistance(np.array(right_eye), np.array(left_eye))
68 | 
69 | 	#-----------------------
70 | 
71 | 	#apply cosine rule
72 | 
73 | 	if b != 0 and c != 0: #this multiplication causes division by zero in cos_a calculation
74 | 
75 | 		cos_a = (b*b + c*c - a*a)/(2*b*c)
76 | 		angle = np.arccos(cos_a) #angle in radian
77 | 		angle = (angle * 180) / math.pi #radian to degree
78 | 
79 | 		#-----------------------
80 | 		#rotate base image
81 | 
82 | 		if direction == -1:
83 | 			angle = 90 - angle
84 | 
85 | 		img = Image.fromarray(img)
86 | 		img = np.array(img.rotate(direction * angle))
87 | 
88 | 	#-----------------------
89 | 
90 | 	return img #return img anyway
91 | 


--------------------------------------------------------------------------------
/deepface/detectors/MtcnnWrapper.py:
--------------------------------------------------------------------------------
 1 | from mtcnn import MTCNN
 2 | import cv2
 3 | from deepface.detectors import FaceDetector
 4 | 
 5 | def build_model():
 6 | 	face_detector = MTCNN()
 7 | 	return face_detector
 8 | 
 9 | def detect_face(face_detector, img, align = True):
10 | 
11 | 	detected_face = None
12 | 	img_region = [0, 0, img.shape[0], img.shape[1]]
13 | 
14 | 	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #mtcnn expects RGB but OpenCV read BGR
15 | 	detections = face_detector.detect_faces(img_rgb)
16 | 
17 | 	if len(detections) > 0:
18 | 		detection = detections[0]
19 | 		x, y, w, h = detection["box"]
20 | 		detected_face = img[int(y):int(y+h), int(x):int(x+w)]
21 | 		img_region = [x, y, w, h]
22 | 
23 | 		keypoints = detection["keypoints"]
24 | 		left_eye = keypoints["left_eye"]
25 | 		right_eye = keypoints["right_eye"]
26 | 
27 | 		if align:
28 | 			detected_face = FaceDetector.alignment_procedure(detected_face, left_eye, right_eye)
29 | 
30 | 	return detected_face, img_region
31 | 


--------------------------------------------------------------------------------
/deepface/detectors/OpenCvWrapper.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import pandas as pd
  4 | from deepface.detectors import FaceDetector
  5 | 
  6 | def build_model():
  7 | 
  8 | 	detector ={}
  9 | 
 10 | 	detector["face_detector"] = build_cascade('haarcascade')
 11 | 	detector["eye_detector"] = build_cascade('haarcascade_eye')
 12 | 
 13 | 	return detector
 14 | 
 15 | def build_cascade(model_name = 'haarcascade'):
 16 | 	opencv_path = get_opencv_path()
 17 | 
 18 | 	if model_name == 'haarcascade':
 19 | 
 20 | 		face_detector_path = opencv_path+"haarcascade_frontalface_default.xml"
 21 | 
 22 | 		if os.path.isfile(face_detector_path) != True:
 23 | 			raise ValueError("Confirm that opencv is installed on your environment! Expected path ",face_detector_path," violated.")
 24 | 
 25 | 
 26 | 		face_detector = cv2.CascadeClassifier(face_detector_path)
 27 | 		return face_detector
 28 | 
 29 | 	elif model_name == 'haarcascade_eye':
 30 | 		eye_detector_path = opencv_path+"haarcascade_eye.xml"
 31 | 
 32 | 		if os.path.isfile(eye_detector_path) != True:
 33 | 			raise ValueError("Confirm that opencv is installed on your environment! Expected path ",eye_detector_path," violated.")
 34 | 
 35 | 		eye_detector = cv2.CascadeClassifier(eye_detector_path)
 36 | 		return eye_detector
 37 | 
 38 | def detect_face(detector, img, align = True):
 39 | 
 40 | 	detected_face = None
 41 | 	img_region = [0, 0, img.shape[0], img.shape[1]]
 42 | 
 43 | 	faces = []
 44 | 	try:
 45 | 		faces = detector["face_detector"].detectMultiScale(img, 1.3, 5)
 46 | 	except:
 47 | 		pass
 48 | 
 49 | 	if len(faces) > 0:
 50 | 		x,y,w,h = faces[0] #focus on the 1st face found in the image
 51 | 		detected_face = img[int(y):int(y+h), int(x):int(x+w)]
 52 | 
 53 | 		if align:
 54 | 			detected_face = align_face(detector["eye_detector"], detected_face)
 55 | 		img_region = [x, y, w, h]
 56 | 
 57 | 	return detected_face, img_region
 58 | 
 59 | def align_face(eye_detector, img):
 60 | 
 61 | 	detected_face_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #eye detector expects gray scale image
 62 | 
 63 | 	eyes = eye_detector.detectMultiScale(detected_face_gray)
 64 | 
 65 | 	if len(eyes) >= 2:
 66 | 
 67 | 		#find the largest 2 eye
 68 | 
 69 | 		base_eyes = eyes[:, 2]
 70 | 
 71 | 		items = []
 72 | 		for i in range(0, len(base_eyes)):
 73 | 			item = (base_eyes[i], i)
 74 | 			items.append(item)
 75 | 
 76 | 		df = pd.DataFrame(items, columns = ["length", "idx"]).sort_values(by=['length'], ascending=False)
 77 | 
 78 | 		eyes = eyes[df.idx.values[0:2]] #eyes variable stores the largest 2 eye
 79 | 
 80 | 		#-----------------------
 81 | 		#decide left and right eye
 82 | 
 83 | 		eye_1 = eyes[0]; eye_2 = eyes[1]
 84 | 
 85 | 		if eye_1[0] < eye_2[0]:
 86 | 			left_eye = eye_1; right_eye = eye_2
 87 | 		else:
 88 | 			left_eye = eye_2; right_eye = eye_1
 89 | 
 90 | 		#-----------------------
 91 | 		#find center of eyes
 92 | 		left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2)))
 93 | 		right_eye = (int(right_eye[0] + (right_eye[2]/2)), int(right_eye[1] + (right_eye[3]/2)))
 94 | 		img = FaceDetector.alignment_procedure(img, left_eye, right_eye)
 95 | 	return img  #return img anyway
 96 | 
 97 | def get_opencv_path():
 98 | 	opencv_home = cv2.__file__
 99 | 	folders = opencv_home.split(os.path.sep)[0:-1]
100 | 
101 | 	path = folders[0]
102 | 	for folder in folders[1:]:
103 | 		path = path + "/" + folder
104 | 
105 | 	return path+"/data/"
106 | 


--------------------------------------------------------------------------------
/deepface/detectors/RetinaFaceWrapper.py:
--------------------------------------------------------------------------------
 1 | from retinaface import RetinaFace
 2 | import cv2
 3 | 
 4 | def build_model():
 5 |     face_detector = RetinaFace.build_model()
 6 |     return face_detector
 7 | 
 8 | def detect_face(face_detector, img, align = True):
 9 | 
10 |     face = None
11 |     img_region = [0, 0, img.shape[0], img.shape[1]]
12 | 
13 |     img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #retinaface expects RGB but OpenCV read BGR
14 | 
15 |     faces = RetinaFace.extract_faces(img_rgb, model = face_detector, align = align)
16 | 
17 |     if len(faces) > 0:
18 |         face = faces[0][:, :, ::-1]
19 | 
20 |     return face, img_region
21 | 


--------------------------------------------------------------------------------
/deepface/detectors/SsdWrapper.py:
--------------------------------------------------------------------------------
  1 | import gdown
  2 | from pathlib import Path
  3 | import os
  4 | import cv2
  5 | import pandas as pd
  6 | 
  7 | from deepface.detectors import OpenCvWrapper
  8 | 
  9 | def build_model():
 10 | 
 11 | 	home = str(Path.home())
 12 | 
 13 | 	#model structure
 14 | 	if os.path.isfile(home+'/.deepface/weights/deploy.prototxt') != True:
 15 | 
 16 | 		print("deploy.prototxt will be downloaded...")
 17 | 
 18 | 		url = "https://github.com/opencv/opencv/raw/3.4.0/samples/dnn/face_detector/deploy.prototxt"
 19 | 
 20 | 		output = home+'/.deepface/weights/deploy.prototxt'
 21 | 
 22 | 		gdown.download(url, output, quiet=False)
 23 | 
 24 | 	#pre-trained weights
 25 | 	if os.path.isfile(home+'/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel') != True:
 26 | 
 27 | 		print("res10_300x300_ssd_iter_140000.caffemodel will be downloaded...")
 28 | 
 29 | 		url = "https://github.com/opencv/opencv_3rdparty/raw/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel"
 30 | 
 31 | 		output = home+'/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel'
 32 | 
 33 | 		gdown.download(url, output, quiet=False)
 34 | 
 35 | 	face_detector = cv2.dnn.readNetFromCaffe(
 36 | 		home+"/.deepface/weights/deploy.prototxt",
 37 | 		home+"/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel"
 38 | 	)
 39 | 
 40 | 	eye_detector = OpenCvWrapper.build_cascade("haarcascade_eye")
 41 | 
 42 | 	detector = {}
 43 | 	detector["face_detector"] = face_detector
 44 | 	detector["eye_detector"] = eye_detector
 45 | 
 46 | 	return detector
 47 | 
 48 | def detect_face(detector, img, align = True):
 49 | 
 50 | 	detected_face = None
 51 | 	img_region = [0, 0, img.shape[0], img.shape[1]]
 52 | 
 53 | 	ssd_labels = ["img_id", "is_face", "confidence", "left", "top", "right", "bottom"]
 54 | 
 55 | 	target_size = (300, 300)
 56 | 
 57 | 	base_img = img.copy() #we will restore base_img to img later
 58 | 
 59 | 	original_size = img.shape
 60 | 
 61 | 	img = cv2.resize(img, target_size)
 62 | 
 63 | 	aspect_ratio_x = (original_size[1] / target_size[1])
 64 | 	aspect_ratio_y = (original_size[0] / target_size[0])
 65 | 
 66 | 	imageBlob = cv2.dnn.blobFromImage(image = img)
 67 | 
 68 | 	face_detector = detector["face_detector"]
 69 | 	face_detector.setInput(imageBlob)
 70 | 	detections = face_detector.forward()
 71 | 
 72 | 	detections_df = pd.DataFrame(detections[0][0], columns = ssd_labels)
 73 | 
 74 | 	detections_df = detections_df[detections_df['is_face'] == 1] #0: background, 1: face
 75 | 	detections_df = detections_df[detections_df['confidence'] >= 0.90]
 76 | 
 77 | 	detections_df['left'] = (detections_df['left'] * 300).astype(int)
 78 | 	detections_df['bottom'] = (detections_df['bottom'] * 300).astype(int)
 79 | 	detections_df['right'] = (detections_df['right'] * 300).astype(int)
 80 | 	detections_df['top'] = (detections_df['top'] * 300).astype(int)
 81 | 
 82 | 	if detections_df.shape[0] > 0:
 83 | 
 84 | 		#TODO: sort detections_df
 85 | 
 86 | 		#get the first face in the image
 87 | 		instance = detections_df.iloc[0]
 88 | 
 89 | 		left = instance["left"]
 90 | 		right = instance["right"]
 91 | 		bottom = instance["bottom"]
 92 | 		top = instance["top"]
 93 | 
 94 | 		detected_face = base_img[int(top*aspect_ratio_y):int(bottom*aspect_ratio_y), int(left*aspect_ratio_x):int(right*aspect_ratio_x)]
 95 | 		img_region = [int(left*aspect_ratio_x), int(top*aspect_ratio_y), int(right*aspect_ratio_x) - int(left*aspect_ratio_x), int(bottom*aspect_ratio_y) - int(top*aspect_ratio_y)]
 96 | 
 97 | 		if align:
 98 | 			detected_face = OpenCvWrapper.align_face(detector["eye_detector"], detected_face)
 99 | 
100 | 	return detected_face, img_region
101 | 


--------------------------------------------------------------------------------
/deepface/detectors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/detectors/__init__.py


--------------------------------------------------------------------------------
/deepface/extendedmodels/Age.py:
--------------------------------------------------------------------------------
 1 | from deepface.basemodels import VGGFace
 2 | import os
 3 | from pathlib import Path
 4 | import gdown
 5 | import numpy as np
 6 | 
 7 | import tensorflow as tf
 8 | tf_version = int(tf.__version__.split(".")[0])
 9 | 
10 | if tf_version == 1:
11 | 	import keras
12 | 	from keras.models import Model, Sequential
13 | 	from keras.layers import Convolution2D, Flatten, Activation	
14 | elif tf_version == 2:
15 | 	from tensorflow import keras
16 | 	from tensorflow.keras.models import Model, Sequential
17 | 	from tensorflow.keras.layers import Convolution2D, Flatten, Activation
18 | 
19 | def loadModel():
20 | 	
21 | 	model = VGGFace.baseModel()
22 | 	
23 | 	#--------------------------
24 | 	
25 | 	classes = 101
26 | 	base_model_output = Sequential()
27 | 	base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output)
28 | 	base_model_output = Flatten()(base_model_output)
29 | 	base_model_output = Activation('softmax')(base_model_output)
30 | 	
31 | 	#--------------------------
32 | 
33 | 	age_model = Model(inputs=model.input, outputs=base_model_output)
34 | 	
35 | 	#--------------------------
36 | 	
37 | 	#load weights
38 | 	
39 | 	home = str(Path.home())
40 | 	
41 | 	if os.path.isfile(home+'/.deepface/weights/age_model_weights.h5') != True:
42 | 		print("age_model_weights.h5 will be downloaded...")
43 | 		
44 | 		url = 'https://drive.google.com/uc?id=1YCox_4kJ-BYeXq27uUbasu--yz28zUMV'
45 | 		output = home+'/.deepface/weights/age_model_weights.h5'
46 | 		gdown.download(url, output, quiet=False)
47 | 	
48 | 	age_model.load_weights(home+'/.deepface/weights/age_model_weights.h5')
49 | 	
50 | 	return age_model
51 | 	
52 | 	#--------------------------
53 | 
54 | def findApparentAge(age_predictions):
55 | 	output_indexes = np.array([i for i in range(0, 101)])
56 | 	apparent_age = np.sum(age_predictions * output_indexes)
57 | 	return apparent_age


--------------------------------------------------------------------------------
/deepface/extendedmodels/Emotion.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import gdown
 3 | from pathlib import Path
 4 | import zipfile
 5 | 
 6 | import tensorflow as tf
 7 | tf_version = int(tf.__version__.split(".")[0])
 8 | 
 9 | if tf_version == 1:
10 | 	import keras
11 | 	from keras.models import Model, Sequential
12 | 	from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout
13 | elif tf_version == 2:
14 | 	from tensorflow import keras
15 | 	from tensorflow.keras.models import Model, Sequential
16 | 	from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout
17 | 	
18 | def loadModel():
19 | 	
20 | 	num_classes = 7
21 | 	
22 | 	model = Sequential()
23 | 
24 | 	#1st convolution layer
25 | 	model.add(Conv2D(64, (5, 5), activation='relu', input_shape=(48,48,1)))
26 | 	model.add(MaxPooling2D(pool_size=(5,5), strides=(2, 2)))
27 | 
28 | 	#2nd convolution layer
29 | 	model.add(Conv2D(64, (3, 3), activation='relu'))
30 | 	model.add(Conv2D(64, (3, 3), activation='relu'))
31 | 	model.add(AveragePooling2D(pool_size=(3,3), strides=(2, 2)))
32 | 
33 | 	#3rd convolution layer
34 | 	model.add(Conv2D(128, (3, 3), activation='relu'))
35 | 	model.add(Conv2D(128, (3, 3), activation='relu'))
36 | 	model.add(AveragePooling2D(pool_size=(3,3), strides=(2, 2)))
37 | 
38 | 	model.add(Flatten())
39 | 
40 | 	#fully connected neural networks
41 | 	model.add(Dense(1024, activation='relu'))
42 | 	model.add(Dropout(0.2))
43 | 	model.add(Dense(1024, activation='relu'))
44 | 	model.add(Dropout(0.2))
45 | 
46 | 	model.add(Dense(num_classes, activation='softmax'))
47 | 	
48 | 	#----------------------------
49 | 	
50 | 	home = str(Path.home())
51 | 	
52 | 	if os.path.isfile(home+'/.deepface/weights/facial_expression_model_weights.h5') != True:
53 | 		print("facial_expression_model_weights.h5 will be downloaded...")
54 | 		
55 | 		#TO-DO: upload weights to google drive
56 | 		
57 | 		#zip
58 | 		url = 'https://drive.google.com/uc?id=13iUHHP3SlNg53qSuQZDdHDSDNdBP9nwy'
59 | 		output = home+'/.deepface/weights/facial_expression_model_weights.zip'
60 | 		gdown.download(url, output, quiet=False)
61 | 		
62 | 		#unzip facial_expression_model_weights.zip
63 | 		with zipfile.ZipFile(output, 'r') as zip_ref:
64 | 			zip_ref.extractall(home+'/.deepface/weights/')
65 | 		
66 | 	model.load_weights(home+'/.deepface/weights/facial_expression_model_weights.h5')
67 | 	
68 | 	return model
69 | 	
70 | 	#----------------------------
71 | 	
72 | 	return 0


--------------------------------------------------------------------------------
/deepface/extendedmodels/Gender.py:
--------------------------------------------------------------------------------
 1 | from deepface.basemodels import VGGFace
 2 | import os
 3 | from pathlib import Path
 4 | import gdown
 5 | import numpy as np
 6 | 
 7 | import tensorflow as tf
 8 | tf_version = int(tf.__version__.split(".")[0])
 9 | 
10 | if tf_version == 1:
11 | 	from keras.models import Model, Sequential
12 | 	from keras.layers import Convolution2D, Flatten, Activation
13 | elif tf_version == 2:
14 | 	from tensorflow.keras.models import Model, Sequential
15 | 	from tensorflow.keras.layers import Convolution2D, Flatten, Activation
16 | 
17 | def loadModel():
18 | 	
19 | 	model = VGGFace.baseModel()
20 | 	
21 | 	#--------------------------
22 | 	
23 | 	classes = 2
24 | 	base_model_output = Sequential()
25 | 	base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output)
26 | 	base_model_output = Flatten()(base_model_output)
27 | 	base_model_output = Activation('softmax')(base_model_output)
28 | 	
29 | 	#--------------------------
30 | 
31 | 	gender_model = Model(inputs=model.input, outputs=base_model_output)
32 | 	
33 | 	#--------------------------
34 | 	
35 | 	#load weights
36 | 	
37 | 	home = str(Path.home())
38 | 	
39 | 	if os.path.isfile(home+'/.deepface/weights/gender_model_weights.h5') != True:
40 | 		print("gender_model_weights.h5 will be downloaded...")
41 | 		
42 | 		url = 'https://drive.google.com/uc?id=1wUXRVlbsni2FN9-jkS_f4UTUrm1bRLyk'
43 | 		output = home+'/.deepface/weights/gender_model_weights.h5'
44 | 		gdown.download(url, output, quiet=False)
45 | 	
46 | 	gender_model.load_weights(home+'/.deepface/weights/gender_model_weights.h5')
47 | 	
48 | 	return gender_model
49 | 	
50 | 	#--------------------------


--------------------------------------------------------------------------------
/deepface/extendedmodels/Race.py:
--------------------------------------------------------------------------------
 1 | from deepface.basemodels import VGGFace
 2 | 
 3 | import os
 4 | from pathlib import Path
 5 | import gdown
 6 | import numpy as np
 7 | import zipfile
 8 | 
 9 | import tensorflow as tf
10 | tf_version = int(tf.__version__.split(".")[0])
11 | 
12 | if tf_version == 1:
13 | 	from keras.models import Model, Sequential
14 | 	from keras.layers import Convolution2D, Flatten, Activation
15 | elif tf_version == 2:
16 | 	from tensorflow.keras.models import Model, Sequential
17 | 	from tensorflow.keras.layers import Convolution2D, Flatten, Activation
18 | 
19 | def loadModel():
20 | 	
21 | 	model = VGGFace.baseModel()
22 | 	
23 | 	#--------------------------
24 | 	
25 | 	classes = 6
26 | 	base_model_output = Sequential()
27 | 	base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output)
28 | 	base_model_output = Flatten()(base_model_output)
29 | 	base_model_output = Activation('softmax')(base_model_output)
30 | 	
31 | 	#--------------------------
32 | 
33 | 	race_model = Model(inputs=model.input, outputs=base_model_output)
34 | 	
35 | 	#--------------------------
36 | 	
37 | 	#load weights
38 | 	
39 | 	home = str(Path.home())
40 | 	
41 | 	if os.path.isfile(home+'/.deepface/weights/race_model_single_batch.h5') != True:
42 | 		print("race_model_single_batch.h5 will be downloaded...")
43 | 		
44 | 		#zip
45 | 		url = 'https://drive.google.com/uc?id=1nz-WDhghGQBC4biwShQ9kYjvQMpO6smj'
46 | 		output = home+'/.deepface/weights/race_model_single_batch.zip'
47 | 		gdown.download(url, output, quiet=False)
48 | 		
49 | 		#unzip race_model_single_batch.zip
50 | 		with zipfile.ZipFile(output, 'r') as zip_ref:
51 | 			zip_ref.extractall(home+'/.deepface/weights/')
52 | 	
53 | 	race_model.load_weights(home+'/.deepface/weights/race_model_single_batch.h5')
54 | 	
55 | 	return race_model
56 | 	
57 | 	#--------------------------
58 | 


--------------------------------------------------------------------------------
/deepface/extendedmodels/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/extendedmodels/__init__.py


--------------------------------------------------------------------------------
/deepface/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/models/__init__.py


--------------------------------------------------------------------------------
/detection.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/detection.txt


--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
  1 | import motmetrics as mm
  2 | import numpy as np
  3 | import os
  4 | from absl import app, flags, logging
  5 | from absl.flags import FLAGS
  6 | 
  7 | """
  8 | python evaluation.py \
  9 |     --gt_file_path ./resources/gt/T-ara_gt.txt \
 10 |     --pred_file_path ./resources/gt/T-ara_pred.txt
 11 | 
 12 | python evaluation.py \
 13 |     --gt_file_path ./resources/gt/GirlsAloud_gt.txt \
 14 |     --pred_file_path ./resources/gt/GirlsAloud_pred.txt
 15 | 
 16 | python evaluation.py \
 17 |     --gt_file_path ./resources/gt/Darling_gt.txt \
 18 |     --pred_file_path ./resources/gt/Darling_pred.txt
 19 | 
 20 | python evaluation.py \
 21 |     --gt_file_path ./resources/gt/Westlife_gt.txt \
 22 |     --pred_file_path ./resources/gt/Westlife_pred.txt
 23 | 
 24 | python evaluation.py \
 25 |     --gt_file_path ./resources/gt/BrunoMars_gt.txt \
 26 |     --pred_file_path ./resources/gt/BrunoMars_pred.txt
 27 | 
 28 | python evaluation.py \
 29 |     --gt_file_path ./resources/gt/HelloBubble_gt.txt \
 30 |     --pred_file_path ./resources/gt/HelloBubble_pred.txt
 31 | 
 32 | python evaluation.py \
 33 |     --gt_file_path ./resources/gt/Apink_gt.txt \
 34 |     --pred_file_path ./resources/gt/Apink_pred.txt
 35 | """
 36 | 
 37 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to gt txt')
 38 | flags.DEFINE_string('pred_file_path', './resources/gt/T-ara_pred.txt', 'path to predicted txt')
 39 | 
 40 | def main(args):
 41 |         
 42 |     # home = os.getcwd()
 43 |     # gt_path = os.path.join(home, "resources", "gt")
 44 | 
 45 |     # gt_file_path = os.path.join(gt_path, "T-ara_gt.txt")
 46 |     # pred_file_path = os.path.join(gt_path, "T-ara_pred.txt")
 47 | 
 48 |     f = open(FLAGS.gt_file_path, "r")
 49 |     gt = []
 50 |     while True:
 51 |         line = f.readline()
 52 |         if not line: break
 53 |         a = list(map(int, line.split()))
 54 |         gt.append(a)
 55 |     gt = np.asarray(gt)
 56 |     f.close()
 57 | 
 58 |     f = open(FLAGS.pred_file_path, "r")
 59 |     pred = []
 60 |     while True:
 61 |         line = f.readline()
 62 |         if not line: break
 63 |         a = list(map(int, line.split()))
 64 |         pred.append(a)
 65 |     pred = np.asarray(pred)
 66 |     f.close()
 67 | 
 68 |     acc = mm.MOTAccumulator(auto_id=True)
 69 |     frame_idx = 0
 70 |     count = 0
 71 |     max_index = max(max(gt[:, 0]), max(pred[:, 0]))
 72 | 
 73 |     while frame_idx <= max_index:
 74 |         frame_idx += 1
 75 | 
 76 |         gt_indexs = gt[:, 0]
 77 |         pred_indexs = pred[:, 0]
 78 | 
 79 |         mask1 = frame_idx == gt_indexs
 80 |         mask2 = frame_idx == pred_indexs
 81 | 
 82 |         # if not gt[mask1].shape[0] and not pred[mask2].shape[0]:
 83 |         #     break
 84 | 
 85 |         # gt_ids = sorted(list(set(gt[mask1][:, 1])))
 86 |         # pred_ids = sorted(list(set(pred[mask2][:, 1])))
 87 | 
 88 |         gt_ids = gt[mask1][:, 1]
 89 |         pred_ids = pred[mask2][:, 1]
 90 |         # print(gt_ids)
 91 |         # print(pred_ids)
 92 | 
 93 |         a = gt[mask1][:, 2:]
 94 |         b = pred[mask2][:, 2:]
 95 |         # print(mm.distances.iou_matrix(a, b, max_iou=0.5))
 96 | 
 97 |         f = acc.update(
 98 |             gt_ids,
 99 |             pred_ids,
100 |             mm.distances.iou_matrix(a, b, max_iou=0.5)
101 |         )
102 |         # print(mm.distances.iou_matrix(a, b, max_iou=0.5))
103 |         # print(acc.mot_events.loc[f])
104 | 
105 | 
106 |     mh = mm.metrics.create()
107 |     custom_metric = [
108 |         "num_frames",
109 |         "obj_frequencies",
110 |         "pred_frequencies",
111 |         "num_matches",
112 |         "num_switches",
113 |         "num_transfer",
114 |         "num_ascend",
115 |         "num_migrate",
116 |         "num_false_positives",
117 |         "num_misses",
118 |         "num_detections",
119 |         "num_objects",
120 |         "num_predictions",
121 |         "num_unique_objects",
122 |         "track_ratios",
123 |         "mostly_tracked",
124 |         "partially_tracked",
125 |         "mostly_lost",
126 |         "num_fragmentations",
127 |         "motp",
128 |         "mota",
129 |         "precision",
130 |         "recall",
131 |     ]
132 |     summary = mh.compute_many(
133 |         [acc, acc.mot_events],
134 |         metrics=mm.metrics.motchallenge_metrics,
135 |     )
136 | 
137 |     strsummary = mm.io.render_summary(
138 |         summary,
139 |         formatters=mh.formatters,
140 |         namemap=mm.io.motchallenge_metric_names
141 |     )
142 | 
143 |     print(strsummary)
144 | 
145 | 
146 | if __name__ == '__main__':
147 |     try:
148 |         app.run(main)
149 |     except SystemExit:
150 |         pass
151 | 
152 | 


--------------------------------------------------------------------------------
/generate_face.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import os
  3 | import numpy as np
  4 | from absl import app, flags, logging
  5 | from absl.flags import FLAGS
  6 | 
  7 | 
  8 | 
  9 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to crop gt file')
 10 | flags.DEFINE_string('video_file_path', './resources/video/in/T-ara.mov', 'path to video file')
 11 | flags.DEFINE_string('face_data_path', './resources/database/T-ara', 'path to video file')
 12 | 
 13 | """
 14 | python generate_face.py \
 15 |     --gt_file_path ./resources/gt/T-ara_gt.txt \
 16 |     --video_file_path ./resources/video/in/T-ara.mov \
 17 |     --face_data_path ./resources/database/T-ara
 18 | 
 19 | python generate_face.py \
 20 |     --gt_file_path ./resources/gt/GirlsAloud_gt.txt \
 21 |     --video_file_path ./resources/video/in/GirlsAloud.mp4 \
 22 |     --face_data_path ./resources/database/GirlsAloud
 23 | 
 24 | python generate_face.py \
 25 |     --gt_file_path ./resources/gt/Darling_gt.txt \
 26 |     --video_file_path ./resources/video/in/Darling.mp4 \
 27 |     --face_data_path ./resources/database/Darling
 28 | 
 29 | python generate_face.py \
 30 |     --gt_file_path ./resources/gt/Westlife_gt.txt \
 31 |     --video_file_path ./resources/video/in/Westlife.mp4 \
 32 |     --face_data_path ./resources/database/Westlife
 33 | 
 34 | python generate_face.py \
 35 |     --gt_file_path ./resources/gt/BrunoMars_gt.txt \
 36 |     --video_file_path ./resources/video/in/BrunoMars.mp4 \
 37 |     --face_data_path ./resources/database/BrunoMars
 38 | 
 39 | python generate_face.py \
 40 |     --gt_file_path ./resources/gt/HelloBubble_gt.txt \
 41 |     --video_file_path ./resources/video/in/HelloBubble.mp4 \
 42 |     --face_data_path ./resources/database/HelloBubble
 43 | 
 44 | python generate_face.py \
 45 |     --gt_file_path ./resources/gt/Apink_gt.txt \
 46 |     --video_file_path ./resources/video/in/Apink.mp4 \
 47 |     --face_data_path ./resources/database/Apink
 48 | 
 49 | """
 50 | def main(args):
 51 |     f = open(FLAGS.gt_file_path, "r")
 52 |     detections = []
 53 |     while True:
 54 |         line = f.readline()
 55 |         if not line: break
 56 |         a = list(map(int, line.split()))
 57 |         detections.append(a)
 58 |     detections = np.asarray(detections)
 59 |     f.close()
 60 | 
 61 |     if not os.path.isdir(FLAGS.face_data_path):
 62 |         os.mkdir(FLAGS.face_data_path)
 63 | 
 64 |     vid = cv2.VideoCapture(FLAGS.video_file_path)
 65 |     frame_index = -1
 66 |     count = 0 
 67 |     frame_indices = detections[:, 0].astype(np.int)
 68 | 
 69 |     object_dict = dict()
 70 | 
 71 |     while True:
 72 |         frame_index += 1
 73 |         print(f'{frame_index} frame is working on...')
 74 |         _, img = vid.read()
 75 | 
 76 |         if img is None:
 77 |             logging.warning("Empty Frame")
 78 |             count+=1
 79 |             if count < 3:
 80 |                 continue
 81 |             else: 
 82 |                 break
 83 |         
 84 |         mask = frame_indices == frame_index
 85 | 
 86 |         
 87 | 
 88 |         for row in detections[mask]:
 89 |             frame, id, bbox = row[0], row[1], row[2:]
 90 | 
 91 |             if object_dict.get(id):
 92 |                 file_name = object_dict[id]
 93 |                 object_dict[id] += 1
 94 |             else:
 95 |                 object_dict[id] = 1
 96 |                 file_name = object_dict[id]
 97 | 
 98 |             if object_dict[id] % 10 != 0:
 99 |                 continue
100 | 
101 |             # target_aspect = float(img.shape[1]) / img.shape[0]
102 |             # new_width = target_aspect * bbox[3]
103 |             # bbox[0] -= (new_width - bbox[2]) / 2
104 |             # bbox[2] = new_width
105 |             bbox[2:] += bbox[:2]
106 |             bbox = bbox.astype(np.int)
107 | 
108 |             bbox[:2] = np.maximum(0, bbox[:2])
109 |             bbox[2:] = np.minimum(np.asarray(img.shape[:2][::-1]) - 1, bbox[2:])
110 | 
111 |             sx, sy, ex, ey = bbox
112 |             # print(bbox)
113 |             # print(img.shape)
114 |             image = img[sy:ey, sx:ex]
115 | 
116 |             output_path = os.path.join(FLAGS.face_data_path, str(id))
117 |             if not os.path.isdir(output_path):
118 |                 os.mkdir(output_path)
119 | 
120 |             cv2.imwrite(os.path.join(FLAGS.face_data_path, str(id), str(object_dict[id])+".jpg"), image)
121 |         
122 | 
123 | 
124 |     # frame_indices = detection_mat[:, 0].astype(np.int)
125 |     #     mask = frame_indices == frame_idx
126 | 
127 |     #     detection_list = []
128 |     #     for row in detection_mat[mask]:
129 |     #         bbox, confidence, feature = row[2:6], row[6], row[10:]
130 |     #         if bbox[3] < min_height:
131 |     #             continue
132 |     #         detection_list.append(Detection(bbox, confidence, feature))
133 |     #     return detection_list
134 |         
135 | 
136 |  
137 | if __name__ == '__main__':
138 |     try:
139 |         app.run(main)
140 |     except SystemExit:
141 |         pass
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/object_tracker copy.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from absl import app, flags, logging
  3 | from absl.flags import FLAGS
  4 | 
  5 | import tensorflow as tf
  6 | import numpy as np
  7 | import cv2
  8 | import tensorflow as tf
  9 | import matplotlib.pyplot as plt
 10 | from yolov3_tf2.models import (
 11 |     YoloV3, YoloV3Tiny
 12 | )
 13 | from yolov3_tf2.dataset import transform_images
 14 | from yolov3_tf2.utils import draw_outputs, convert_boxes
 15 | 
 16 | from deep_sort import preprocessing
 17 | from deep_sort import nn_matching
 18 | from deep_sort.detection import Detection
 19 | from deep_sort.tracker import Tracker
 20 | from tools import generate_detections as gdet
 21 | from PIL import Image
 22 | 
 23 | 
 24 | flags.DEFINE_string('classes', './model_data/labels/coco.names', 'path to classes file')
 25 | flags.DEFINE_string('weights', './weights/yolov3.tf',
 26 |                     'path to weights file')
 27 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
 28 | flags.DEFINE_integer('size', 416, 'resize images to')
 29 | flags.DEFINE_string('video', './resources/video/in/test.mp4',
 30 |                     'path to video file or number for webcam)')
 31 | flags.DEFINE_string('output', None, 'path to output video')
 32 | flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
 33 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model')
 34 | 
 35 | 
 36 | def main(_argv):
 37 |     # Definition of the parameters
 38 |     max_cosine_distance = 0.5
 39 |     nn_budget = None
 40 |     nms_max_overlap = 1.0
 41 | 
 42 |     #initialize deep sort
 43 |     model_filename = 'weights/mars-small128.pb'
 44 |     encoder = gdet.create_box_encoder(model_filename, batch_size=128)
 45 |     metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
 46 |     tracker = Tracker(metric)
 47 | 
 48 |     physical_devices = tf.config.experimental.list_physical_devices('GPU')
 49 |     for physical_device in physical_devices:
 50 |         tf.config.experimental.set_memory_growth(physical_device, True)
 51 | 
 52 |     if FLAGS.tiny:
 53 |         yolo = YoloV3Tiny(classes=FLAGS.num_classes)
 54 |     else:
 55 |         yolo = YoloV3(classes=FLAGS.num_classes)
 56 | 
 57 |     yolo.load_weights(FLAGS.weights)
 58 |     logging.info('weights loaded')
 59 | 
 60 |     class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
 61 |     logging.info('classes loaded')
 62 | 
 63 |     times = []
 64 | 
 65 |     try:
 66 |         vid = cv2.VideoCapture(int(FLAGS.video))
 67 |     except:
 68 |         vid = cv2.VideoCapture(FLAGS.video)
 69 | 
 70 |     out = None
 71 | 
 72 |     if FLAGS.output:
 73 |         # by default VideoCapture returns float instead of int
 74 |         width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
 75 |         height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
 76 |         fps = int(vid.get(cv2.CAP_PROP_FPS))
 77 |         codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
 78 |         out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
 79 |         list_file = open('detection.txt', 'w')
 80 |         frame_index = -1 
 81 | 
 82 |     fps = 0.0
 83 |     count = 0 
 84 | 
 85 |     while True:
 86 |         _, img = vid.read()
 87 | 
 88 |         if img is None:
 89 |             logging.warning("Empty Frame")
 90 |             time.sleep(0.1)
 91 |             count+=1
 92 |             if count < 3:
 93 |                 continue
 94 |             else: 
 95 |                 break
 96 | 
 97 |         img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 98 |         img_in = tf.expand_dims(img_in, 0)
 99 |         img_in = transform_images(img_in, FLAGS.size)
100 | 
101 |         t1 = time.time()
102 |         boxes, scores, classes, nums = yolo.predict(img_in)
103 |         # print(boxes, scores, classes, nums)
104 |         t2 = time.time()
105 |         times.append(t2-t1)
106 |         print(f'yolo predict time : {t2-t1}')
107 |         times = times[-20:]
108 | 
109 |         t3 = time.time()
110 |         #############
111 |         classes = classes[0]
112 |         names = []
113 |         for i in range(len(classes)):
114 |             names.append(class_names[int(classes[i])])
115 |         names = np.array(names)
116 |         converted_boxes = convert_boxes(img, boxes[0])
117 |         features = encoder(img, converted_boxes)    
118 |         detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)]
119 | 
120 |         t4 = time.time()
121 |         print(f'feature generation time : {t4-t3}')
122 | 
123 |         #initialize color map
124 |         cmap = plt.get_cmap('tab20b')
125 |         colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
126 | 
127 |         # run non-maxima suppresion
128 |         boxs = np.array([d.tlwh for d in detections])
129 |         scores = np.array([d.confidence for d in detections])
130 |         classes = np.array([d.class_name for d in detections])
131 |         indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
132 |         detections = [detections[i] for i in indices]        
133 | 
134 |         t5 = time.time()
135 |         # Call the tracker
136 |         tracker.predict()
137 |         tracker.update(detections)
138 |         t6 = time.time()
139 |         print(f'tracking time : {t6-t5}')
140 | 
141 |         for track in tracker.tracks:
142 |             if not track.is_confirmed() or track.time_since_update > 1:
143 |                 continue 
144 |             bbox = track.to_tlbr()
145 |             class_name = track.get_class()
146 |             color = colors[int(track.track_id) % len(colors)]
147 |             color = [i * 255 for i in color]
148 |             cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
149 |             cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1)
150 |             cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
151 | 
152 |         #######
153 |         fps  = ( fps + (1./(time.time()-t1)) ) / 2
154 |         # img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
155 |         # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
156 |         #                   cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
157 |         img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
158 |                           cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2)
159 |         if FLAGS.output:
160 |             out.write(img)
161 |             frame_index = frame_index + 1
162 |             list_file.write(str(frame_index)+' ')
163 |             if len(converted_boxes) != 0:
164 |                 for i in range(0,len(converted_boxes)):
165 |                     list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
166 |             list_file.write('\n')
167 |         cv2.imshow('output', img)
168 |         if cv2.waitKey(1) == ord('q'):
169 |             break
170 | 
171 |     cv2.destroyAllWindows()
172 | 
173 | 
174 | if __name__ == '__main__':
175 |     try:
176 |         app.run(main)
177 |     except SystemExit:
178 |         pass
179 | 


--------------------------------------------------------------------------------
/object_tracker.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import os
  3 | from absl import app, flags, logging
  4 | from absl.flags import FLAGS
  5 | 
  6 | from mtcnn import MTCNN 
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | import cv2
 10 | import matplotlib.pyplot as plt
 11 | from yolov3_tf2.models import (
 12 |     YoloV3, YoloV3Tiny
 13 | )
 14 | from yolov3_tf2.dataset import transform_images
 15 | from yolov3_tf2.utils import draw_outputs, convert_boxes
 16 | 
 17 | from deep_sort import preprocessing
 18 | from deep_sort import nn_matching
 19 | from deep_sort.detection import Detection
 20 | from deep_sort.tracker import Tracker
 21 | from tools import generate_detections as gdet
 22 | from PIL import Image
 23 | 
 24 | gpus = tf.config.experimental.list_physical_devices('GPU')
 25 | if gpus:
 26 |   try:
 27 |     # Currently, memory growth needs to be the same across GPUs
 28 |     for gpu in gpus:
 29 |       tf.config.experimental.set_memory_growth(gpu, True)
 30 |     logical_gpus = tf.config.experimental.list_logical_devices('GPU')
 31 |     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
 32 |   except RuntimeError as e:
 33 |     # Memory growth must be set before GPUs have been initialized
 34 |     print(e)
 35 | 
 36 | """
 37 | python object_tracker.py \
 38 |     --classes ./model_data/labels/widerface.names \
 39 |     --video 0 \
 40 |     --weights ./weights/yolov3-wider_16000.tf \
 41 |     --num_classes 1 \
 42 |     --output_format MP4V \
 43 |     --output ./resources/video/out/myface.mp4 \
 44 | 
 45 | python object_tracker.py \
 46 |     --classes ./model_data/labels/widerface.names \
 47 |     --video ./resources/video/in/2.mp4 \
 48 |     --weights ./weights/yolov3-wider_16000.tf \
 49 |     --output_format MP4V \
 50 |     --database ./resources/database/2 \
 51 |     --output ./resources/video/out/2.mp4 \
 52 |     --num_classes 1 \
 53 |     --max_face_threshold 0.6871912959056619
 54 | 
 55 | python object_tracker.py \
 56 |     --classes ./model_data/labels/widerface.names \
 57 |     --video ./resources/video/in/T-ara.mov \
 58 |     --weights ./weights/yolov3-wider_16000.tf \
 59 |     --output_format MP4V \
 60 |     --database ./resources/database/T-ara \
 61 |     --output ./resources/video/out/T-ara.mp4 \
 62 |     --num_classes 1 \
 63 |     --max_face_threshold 0.6871912959056619 \
 64 |     --eval ./resources/gt/T-ara_pred.txt
 65 | 
 66 | python object_tracker.py \
 67 |     --classes ./model_data/labels/widerface.names \
 68 |     --video ./resources/video/in/BrunoMars.mp4 \
 69 |     --weights ./weights/yolov3-wider_16000.tf \
 70 |     --output_format MP4V \
 71 |     --database ./resources/database/BrunoMars \
 72 |     --output ./resources/video/out/BrunoMars.mp4 \
 73 |     --num_classes 1 \
 74 |     --max_face_threshold 0.6871912959056619 \
 75 |     --eval ./resources/gt/BrunoMars_pred.txt
 76 | 
 77 | python object_tracker.py \
 78 |     --classes ./model_data/labels/widerface.names \
 79 |     --video ./resources/video/in/Darling.mp4 \
 80 |     --weights ./weights/yolov3-wider_16000.tf \
 81 |     --output_format MP4V \
 82 |     --database ./resources/database/Darling \
 83 |     --output ./resources/video/out/Darling.mp4 \
 84 |     --num_classes 1 \
 85 |     --max_face_threshold 0.6871912959056619 \
 86 |     --eval ./resources/gt/Darling_pred.txt
 87 | 
 88 | python object_tracker.py \
 89 |     --classes ./model_data/labels/widerface.names \
 90 |     --video ./resources/video/in/GirlsAloud.mp4 \
 91 |     --weights ./weights/yolov3-wider_16000.tf \
 92 |     --output_format MP4V \
 93 |     --database ./resources/database/GirlsAloud \
 94 |     --output ./resources/video/out/GirlsAloud.mp4 \
 95 |     --num_classes 1 \
 96 |     --max_face_threshold 0.6871912959056619 \
 97 |     --eval ./resources/gt/GirlsAloud_pred.txt
 98 | 
 99 | python object_tracker.py \
100 |     --classes ./model_data/labels/widerface.names \
101 |     --video ./resources/video/in/HelloBubble.mp4 \
102 |     --weights ./weights/yolov3-wider_16000.tf \
103 |     --output_format MP4V \
104 |     --database ./resources/database/HelloBubble \
105 |     --output ./resources/video/out/HelloBubble.mp4 \
106 |     --num_classes 1 \
107 |     --max_face_threshold 0.6871912959056619 \
108 |     --eval ./resources/gt/HelloBubble_pred.txt
109 | 
110 | python object_tracker.py \
111 |     --classes ./model_data/labels/widerface.names \
112 |     --video ./resources/video/in/Westlife.mp4 \
113 |     --weights ./weights/yolov3-wider_16000.tf \
114 |     --output_format MP4V \
115 |     --database ./resources/database/Westlife \
116 |     --output ./resources/video/out/Westlife.mp4 \
117 |     --num_classes 1 \
118 |     --max_face_threshold 0.6871912959056619 \
119 |     --eval ./resources/gt/Westlife_pred.txt
120 | 
121 | python object_tracker.py \
122 |     --classes ./model_data/labels/widerface.names \
123 |     --video ./resources/video/in/Apink.mp4 \
124 |     --weights ./weights/yolov3-wider_16000.tf \
125 |     --output_format MP4V \
126 |     --database ./resources/database/Apink \
127 |     --output ./resources/video/out/Apink.mp4 \
128 |     --num_classes 1 \
129 |     --max_face_threshold 0.6871912959056619 \
130 |     --eval ./resources/gt/Apink_pred.txt
131 | """
132 | 
133 | 
134 | flags.DEFINE_string('classes', './model_data/labels/widerface.names', 'path to classes file')
135 | flags.DEFINE_string('weights', './weights/yolov3-wider_16000.tf',
136 |                     'path to weights file')
137 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny')
138 | flags.DEFINE_integer('size', 416, 'resize images to')
139 | flags.DEFINE_string('video', './resources/video/in/1.mp4',
140 |                     'path to video file or number for webcam)')
141 | flags.DEFINE_string('database', './resources/database/1',
142 |                     'path to database file for identification)')
143 | flags.DEFINE_string('output', './resources/video/out/1.mp4', 'path to output video')
144 | flags.DEFINE_string('output_format', 'MP4V', 'codec used in VideoWriter when saving video to file')
145 | flags.DEFINE_integer('num_classes', 1, 'number of classes in the model')
146 | flags.DEFINE_float('max_face_threshold', 0.6871912959056619, 'face threshold')
147 | flags.DEFINE_string('eval', "./resources/gt/1_pred.txt", 'txt file path for evaluation')
148 | 
149 | 
150 | def main(_argv):
151 |     # set present path
152 |     home = os.getcwd()
153 |     
154 |     # Definition of the parameters
155 |     max_cosine_distance = 0.5
156 |     nn_budget = None
157 |     nms_max_overlap = 1.0
158 | 
159 |     #initialize deep sort
160 |     # model_filename = 'weights/mars-small128.pb'
161 |     model_filename = os.path.join(home, "weights", "arcface_weights.h5")
162 |     encoder = gdet.create_box_encoder(model_filename, batch_size=128)
163 |     metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
164 |     tracker = Tracker(metric)
165 | 
166 |     physical_devices = tf.config.experimental.list_physical_devices('GPU')
167 |     for physical_device in physical_devices:
168 |         tf.config.experimental.set_memory_growth(physical_device, True)
169 | 
170 |     if FLAGS.tiny:
171 |         yolo = YoloV3Tiny(classes=FLAGS.num_classes)
172 |     else:
173 |         yolo = YoloV3(classes=FLAGS.num_classes)
174 | 
175 |     yolo.load_weights(FLAGS.weights)
176 |     logging.info('weights loaded')
177 | 
178 |     class_names = [c.strip() for c in open(FLAGS.classes).readlines()]
179 |     logging.info('classes loaded')
180 | 
181 |     times = []
182 | 
183 | 
184 |     # Database 생성
185 |     face_db = dict()
186 | 
187 |     db_path = FLAGS.database
188 |     for name in os.listdir(db_path):
189 |         name_path = os.path.join(db_path, name)
190 |         name_db = []
191 |         for i in os.listdir(name_path):
192 |             if i.split(".")[1] != "jpg": continue
193 |             id_path = os.path.join(name_path, i)
194 |             img = cv2.imread(id_path)
195 |             # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
196 |             # img_in = tf.expand_dims(img_in, 0)
197 |             # img_in = transform_images(img_in, FLAGS.size)
198 |             # boxes, scores, classes, nums = yolo.predict(img_in)
199 |             boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]])
200 |             scores = np.asarray([[1]])
201 |             converted_boxes = convert_boxes(img, boxes, scores)
202 |             features = encoder(img, converted_boxes)
203 | 
204 |             if features.shape[0] == 0: continue
205 | 
206 |             for f in range(features.shape[0]):
207 |                 name_db.append(features[f,:])
208 |         name_db = np.asarray(name_db)
209 |         face_db[name] = dict({"used": False, "db": name_db})
210 |     
211 | 
212 |     try:
213 |         vid = cv2.VideoCapture(int(FLAGS.video))
214 |     except:
215 |         vid = cv2.VideoCapture(FLAGS.video)
216 | 
217 |     out = None
218 | 
219 |     if FLAGS.output:
220 |         # by default VideoCapture returns float instead of int
221 |         width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
222 |         height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))    
223 |         fps = int(vid.get(cv2.CAP_PROP_FPS))
224 |         codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
225 |         out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
226 |         list_file = open('detection.txt', 'w')
227 |         frame_index = -1
228 | 
229 |     fps = 0.0
230 |     count = 0 
231 | 
232 |     detection_list = []
233 | 
234 |     while True:
235 |         _, img = vid.read()
236 | 
237 |         if img is None:
238 |             logging.warning("Empty Frame")
239 |             time.sleep(0.1)
240 |             count+=1
241 |             if count < 3:
242 |                 continue
243 |             else: 
244 |                 break
245 | 
246 |         img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
247 |         img_in = tf.expand_dims(img_in, 0)
248 |         img_in = transform_images(img_in, FLAGS.size)
249 | 
250 |         t1 = time.time()
251 |         boxes, scores, classes, nums = yolo.predict(img_in)
252 | 
253 |         # print(boxes, scores, classes, nums)
254 |         # time.sleep(5)
255 |         t2 = time.time()
256 |         times.append(t2-t1)
257 |         print(f'yolo predict time : {t2-t1}')
258 |         times = times[-20:]
259 | 
260 |         t3 = time.time()
261 |         #############
262 |         classes = classes[0]
263 |         names = []
264 |         for i in range(len(classes)):
265 |             names.append(class_names[int(classes[i])])
266 |         names = np.array(names)
267 |         converted_boxes = convert_boxes(img, boxes[0], scores[0])
268 |         features = encoder(img, converted_boxes)    
269 |         detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)]
270 | 
271 |         t4 = time.time()
272 |         print(f'feature generation time : {t4-t3}')
273 | 
274 |         #initialize color map
275 |         cmap = plt.get_cmap('tab20b')
276 |         colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)]
277 | 
278 |         # run non-maxima suppresion
279 |         boxs = np.array([d.tlwh for d in detections])
280 |         scores = np.array([d.confidence for d in detections])
281 |         classes = np.array([d.class_name for d in detections])
282 |         indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
283 |         detections = [detections[i] for i in indices]        
284 | 
285 |         t5 = time.time()
286 |         # Call the tracker
287 |         tracker.predict()
288 |         # tracker.update(detections)
289 |         tracker.update(detections, face_db, FLAGS.max_face_threshold)
290 |         t6 = time.time()
291 |         print(f'tracking time : {t6-t5}') 
292 |         
293 |         frame_index = frame_index + 1
294 |         for track in tracker.tracks:
295 |             if not track.is_confirmed() or track.time_since_update > 1:
296 |                 continue 
297 |             bbox = track.to_tlbr()
298 |             class_name = track.get_class()
299 |             face_name = track.get_face_name()
300 |             color = colors[int(track.track_id) % len(colors)]
301 |             color = [i * 255 for i in color]
302 |             cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2)
303 |             cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id))+len(str(face_name)))*23, int(bbox[1])), color, -1)
304 |             # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
305 |             cv2.putText(img, class_name + "-" + str(track.track_id) + "-" + face_name, (int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
306 |             # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2)
307 |             # print(class_name + "-" + str(track.track_id))
308 | 
309 |             # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))}))
310 |             if face_name != "":
311 |                 detection_list.append(dict({"frame_no": str(frame_index), "id": str(face_name), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))}))
312 |         #######
313 |         fps  = ( fps + (1./(time.time()-t1)) ) / 2
314 |         # img = draw_outputs(img, (boxes, scores, classes, nums), class_names)
315 |         # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30),
316 |         #                   cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
317 |         img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30),
318 |                           cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2)
319 |         if FLAGS.output:
320 |             out.write(img)
321 |             # frame_index = frame_index + 1
322 |             # list_file.write(str(frame_index)+' ')
323 |             # if len(converted_boxes) != 0:
324 |             #     for i in range(0,len(converted_boxes)):
325 |             #         list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ')
326 |             # list_file.write('\n')
327 |         cv2.imshow('output', img)
328 |         if cv2.waitKey(1) == ord('q'):
329 |             break
330 | 
331 |     cv2.destroyAllWindows()
332 | 
333 |     
334 |     frame_list = sorted(detection_list, key= lambda x: (int(x["frame_no"]), int(x["id"])))
335 |     # pprint.pprint(frame_list)
336 | 
337 |     f = open(FLAGS.eval, "w")
338 |     for a in frame_list:
339 |         f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n")
340 |     # 파일 닫기
341 |     f.close()
342 | 
343 | 
344 | if __name__ == '__main__':
345 |     try:
346 |         app.run(main)
347 |     except SystemExit:
348 |         pass
349 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
  1 | absl-py==0.12.0
  2 | argon2-cffi @ file:///C:/ci/argon2-cffi_1613038019788/work
  3 | astor==0.8.1
  4 | astunparse==1.6.3
  5 | async-generator==1.10
  6 | atomicwrites==1.4.0
  7 | attrs @ file:///tmp/build/80754af9/attrs_1604765588209/work
  8 | backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work
  9 | bleach @ file:///tmp/build/80754af9/bleach_1612211392645/work
 10 | cached-property==1.5.2
 11 | cachetools==4.2.1
 12 | certifi==2020.12.5
 13 | cffi @ file:///C:/ci/cffi_1613247308275/work
 14 | chardet==4.0.0
 15 | colorama @ file:///tmp/build/80754af9/colorama_1607707115595/work
 16 | cycler==0.10.0
 17 | decorator @ file:///tmp/build/80754af9/decorator_1617916966915/work
 18 | defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work
 19 | entrypoints==0.3
 20 | filelock==3.0.12
 21 | flake8==3.9.2
 22 | flake8-import-order==0.18.1
 23 | flatbuffers==1.12
 24 | gast==0.3.3
 25 | gdown==3.13.0
 26 | google-auth==1.28.0
 27 | google-auth-oauthlib==0.4.4
 28 | google-pasta==0.2.0
 29 | grpcio==1.32.0
 30 | h5py==2.10.0
 31 | idna==2.10
 32 | imageio==2.9.0
 33 | importlib-metadata @ file:///C:/ci/importlib-metadata_1617877486026/work
 34 | iniconfig==1.1.1
 35 | ipykernel @ file:///C:/ci/ipykernel_1596208728219/work/dist/ipykernel-5.3.4-py3-none-any.whl
 36 | ipython @ file:///C:/ci/ipython_1617121109687/work
 37 | ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work
 38 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1610481889018/work
 39 | jedi==0.17.0
 40 | Jinja2 @ file:///tmp/build/80754af9/jinja2_1612213139570/work
 41 | joblib==1.0.1
 42 | jsonschema @ file:///tmp/build/80754af9/jsonschema_1602607155483/work
 43 | jupyter==1.0.0
 44 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1616770841739/work
 45 | jupyter-console @ file:///tmp/build/80754af9/jupyter_console_1616615302928/work
 46 | jupyter-core @ file:///C:/ci/jupyter_core_1612213516947/work
 47 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work
 48 | jupyterlab-widgets @ file:///tmp/build/80754af9/jupyterlab_widgets_1609884341231/work
 49 | Keras==2.4.3
 50 | Keras-Applications==1.0.8
 51 | Keras-Preprocessing==1.1.2
 52 | kiwisolver==1.3.1
 53 | lxml==4.6.3
 54 | Markdown==3.3.4
 55 | MarkupSafe @ file:///C:/ci/markupsafe_1594405949945/work
 56 | matplotlib==3.4.1
 57 | mccabe==0.6.1
 58 | mistune @ file:///C:/ci/mistune_1594373272338/work
 59 | motmetrics==1.2.0
 60 | mtcnn==0.1.0
 61 | nbclient @ file:///tmp/build/80754af9/nbclient_1614364831625/work
 62 | nbconvert @ file:///C:/ci/nbconvert_1601914921407/work
 63 | nbformat @ file:///tmp/build/80754af9/nbformat_1617383369282/work
 64 | nest-asyncio @ file:///tmp/build/80754af9/nest-asyncio_1613680548246/work
 65 | networkx==2.5.1
 66 | notebook @ file:///C:/ci/notebook_1616443616158/work
 67 | numpy==1.19.5
 68 | oauthlib==3.1.0
 69 | opencv-python==4.5.1.48
 70 | opt-einsum==3.3.0
 71 | packaging @ file:///tmp/build/80754af9/packaging_1611952188834/work
 72 | pandas==1.2.4
 73 | pandocfilters @ file:///C:/ci/pandocfilters_1605102427207/work
 74 | parso @ file:///tmp/build/80754af9/parso_1617223946239/work
 75 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work
 76 | Pillow==8.2.0
 77 | pluggy==0.13.1
 78 | prometheus-client @ file:///tmp/build/80754af9/prometheus_client_1618088486455/work
 79 | prompt-toolkit @ file:///tmp/build/80754af9/prompt-toolkit_1616415428029/work
 80 | protobuf==3.15.7
 81 | py==1.10.0
 82 | py-cpuinfo==8.0.0
 83 | pyasn1==0.4.8
 84 | pyasn1-modules==0.2.8
 85 | pycodestyle==2.7.0
 86 | pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
 87 | pyflakes==2.3.1
 88 | Pygments @ file:///tmp/build/80754af9/pygments_1615143339740/work
 89 | pyparsing @ file:///home/linux1/recipes/ci/pyparsing_1610983426697/work
 90 | pyrsistent @ file:///C:/ci/pyrsistent_1600123688363/work
 91 | PySocks==1.7.1
 92 | pytest==6.2.4
 93 | pytest-benchmark==3.4.1
 94 | python-dateutil @ file:///home/ktietz/src/ci/python-dateutil_1611928101742/work
 95 | pytz==2021.1
 96 | PyWavelets==1.1.1
 97 | pywin32==227
 98 | pywinpty==0.5.7
 99 | PyYAML==5.4.1
100 | pyzmq==20.0.0
101 | qtconsole @ file:///tmp/build/80754af9/qtconsole_1616775094278/work
102 | QtPy==1.9.0
103 | requests==2.25.1
104 | requests-oauthlib==1.3.0
105 | retina-face==0.0.4
106 | rsa==4.7.2
107 | scikit-image==0.18.1
108 | scikit-learn==0.22.2
109 | scipy==1.6.2
110 | seaborn==0.10.0
111 | Send2Trash @ file:///tmp/build/80754af9/send2trash_1607525499227/work
112 | six @ file:///C:/ci/six_1605205426665/work
113 | sklearn==0.0
114 | tensorboard==2.4.1
115 | tensorboard-plugin-wit==1.8.0
116 | tensorflow==2.4.1
117 | tensorflow-estimator==2.4.0
118 | termcolor==1.1.0
119 | terminado==0.9.4
120 | testpath @ file:///home/ktietz/src/ci/testpath_1611930608132/work
121 | threadpoolctl==2.1.0
122 | tifffile==2021.4.8
123 | toml==0.10.2
124 | tornado @ file:///C:/ci/tornado_1606935947090/work
125 | tqdm==4.60.0
126 | traitlets @ file:///home/ktietz/src/ci/traitlets_1611929699868/work
127 | typing-extensions @ file:///home/ktietz/src/ci_mi/typing_extensions_1612808209620/work
128 | urllib3==1.26.4
129 | wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work
130 | webencodings==0.5.1
131 | Werkzeug==1.0.1
132 | widgetsnbextension==3.5.1
133 | wincertstore==0.2
134 | wrapt==1.12.1
135 | xmltodict==0.12.0
136 | zipp @ file:///tmp/build/80754af9/zipp_1615904174917/work
137 | 


--------------------------------------------------------------------------------
/resources/database/1/ironman/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/1.jpg


--------------------------------------------------------------------------------
/resources/database/1/ironman/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/2.jpg


--------------------------------------------------------------------------------
/resources/database/1/ironman/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/3.jpg


--------------------------------------------------------------------------------
/resources/database/2/chimchakman/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/chimchakman/1.jpg


--------------------------------------------------------------------------------
/resources/database/2/chimchakman/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/chimchakman/2.jpg


--------------------------------------------------------------------------------
/resources/database/2/juhomin/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/juhomin/1.jpg


--------------------------------------------------------------------------------
/resources/database/2/juhomin/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/juhomin/2.jpg


--------------------------------------------------------------------------------
/resources/database/2/kimpoong/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/kimpoong/1.jpg


--------------------------------------------------------------------------------
/resources/database/2/kimpoong/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/kimpoong/2.jpg


--------------------------------------------------------------------------------
/resources/fonts/futur.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/fonts/futur.ttf


--------------------------------------------------------------------------------
/resources/gt/README.txt:
--------------------------------------------------------------------------------
 1 | --------------------------------------------------------------------
 2 | Music video dataset
 3 | --------------------------------------------------------------------
 4 | 
 5 | This file describes the music video dataset as introduced in
 6 | 
 7 | [1] Shun Zhang, Yihong Gong, Jia-Bin Huang, Jongwoo Lim, Jinjun Wang, 
 8 | Narendra Ahuja and Ming-Hsuan Yang. Tracking Persons-of-Interest via 
 9 | Adaptive Discriminative Features[C]. The 14th European Conference on 
10 | Computer Vision (ECCV), 2016.
11 | [2] The project website: http://shunzhang.me.pn/papers/eccv2016/
12 | 
13 | The dataset contains manually annotated face trajectories from 8 music 
14 | videos from YouTube: T-ara, Westlife, Pussycat Dolls, Apink, Darling, 
15 | Bruno Mars, Hello Bubble and Girls Aloud (as detailed in [1,2]).
16 | 
17 | Kindly cite [1] when using the dataset, where appropriate.
18 | 
19 | --------------------------------------------------------------------
20 | Description of the files
21 | --------------------------------------------------------------------
22 | 
23 | The annotations for each video are stored in an XML file. 
24 | We give an XML example below and introduce the XML format.
25 | 
26 | 
27 | Example:
28 | 1. <?xml version = "1.0"?>
29 | 2. <Video fname="gt" start_frame="1" end_frame="5275">
30 | 3.   <Trajectory obj_id="1" start_frame="253" end_frame="5275">
31 | 4.       <Frame frame_no="253" x="639" y="155" width="109" height="139"></Frame>
32 | 5.       ...
33 | 6.   </Trajectory>
34 | 7. </Video>
35 | 
36 | 
37 | The 1st line at the top of our example is the XML declaration 
38 | that indicates the version of XML.
39 | The 2nd line indicates the video information, including video name, 
40 | start frame and end frame.
41 | The 3rd line indicates the trajectory information, including trajectory 
42 | identity, start frame and end frame.
43 | The 4th line contains 5 values of per bounding box:
44 | <frame_num>,<x-bb_left>,<y-bb_top>,<bb_width>,<bb_height>
45 | (x-bb_left,y-bb_top) is the left-top point of the bounding box. 
46 | <bb_width,bb_height> is the width and height of the bounding box.
47 | 
48 | -- EOF
49 | 


--------------------------------------------------------------------------------
/test.md:
--------------------------------------------------------------------------------
  1 | # 맨처음
  2 | 
  3 | ### T-ara
  4 | 
  5 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
  6 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
  7 | |53.6%|59.0%|49.5%|76.6%|90.4%|1176|3406|3752|517|42.6%|71%|0.241|4710|
  8 | 
  9 | ### GirlsAloud
 10 | 
 11 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
 12 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
 13 | |39.0%|42.6%|36.4%|73.9%|85.3%|2087|4275|4687|1122|32.6%|64.6%|0.314|6630|
 14 | 
 15 | ### Darling
 16 | 
 17 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
 18 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
 19 | |3.2%|44.2%|42.6%|79.7%|82.1%|1654|1935|3048|743|30.4%|65.7%|0.267|6180|
 20 | 
 21 | ### Westlife
 22 | 
 23 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
 24 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
 25 | |64.3%|61.3%|68.4%|87.8%|77.9%|2828|1389|1809|562|47.0%|64.7%|0.411|6870|
 26 | 
 27 | ### BrunoMars
 28 | 
 29 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
 30 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
 31 | |40.5%|40.7%|40.8%|74.1%|73.1%|4560|4330|5128|1010|16.1%|78.9%|0.539|8460|
 32 | 
 33 | ### HelloBubble
 34 | 
 35 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
 36 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
 37 | |41.9%|45.3%|39.1%|73.9%|85.2%| 673|1363|1381|301|34.6%|69.7%|0.256|4920|
 38 | 
 39 | ### Apink
 40 | 
 41 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn|
 42 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|
 43 | |56.2% | 58.9% | 53.8% |79.5% |86.8% |  883| 1491| 1234 | 337 |50.4% | 66.8% | 0.15 |4650|
 44 | 
 45 | 
 46 | # n = 20을 기반으로 얼굴 크로핑
 47 | ## T ara
 48 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs   FM  MOTA  MOTP  IDt IDa IDm
 49 | 0  61.3%  67.4%  56.6% 76.6% 90.4%  6  3  3  0 1176 3405 3131  516 46.9% 0.285 2563   3   3
 50 | 1 108.3% 100.0% 100.0% 76.6% 90.4%  6  3  3  0 1176 3405 3131  516 46.9% 0.285 2563   3   3
 51 | 
 52 | ## GirlsAloud
 53 | 
 54 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs    FM  MOTA  MOTP  IDt IDa IDm
 55 | 0  49.8%  53.6%  46.9% 75.6% 85.5%  5  1  4  0 2106 3990 4384  1048 36.0% 0.353 3633   2   2
 56 | 1 106.1% 100.0% 100.0% 75.6% 85.5%  5  1  4  0 2106 3990 4384  1048 36.0% 0.353 3633   2   2
 57 | 
 58 | ## Darling
 59 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs   FM  MOTA  MOTP  IDt IDa IDm
 60 | 0  53.6%  54.7%  53.0% 80.1% 82.0%  8  4  4  0 1678 1896 2717  726 34.0% 0.343 2126   6   6
 61 | 1 101.2% 100.0% 100.0% 80.1% 82.0%  8  4  4  0 1678 1896 2717  726 34.0% 0.343 2126   6   6
 62 | 
 63 | ## Westlife
 64 | 
 65 |    IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs   FM  MOTA  MOTP  IDt IDa IDm
 66 | 0 71.0%  67.2%  75.9% 89.4% 78.4%  4  4  0  0 2808 1208 1482  477 51.7% 0.353 1180   2   2
 67 | 1 93.4% 100.0% 100.0% 89.4% 78.4%  4  4  0  0 2808 1208 1482  477 51.7% 0.353 1180   2   2
 68 | 
 69 | ## brunomars
 70 | 
 71 |    IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs   FM  MOTA  MOTP  IDt IDa IDm
 72 | 0 52.1%  51.9%  52.8% 75.3% 73.3% 11  5  6  0 4575 4126 4105  978 23.3% 0.319 2815   8   8
 73 | 1 98.7% 100.0% 100.0% 75.3% 73.3% 11  5  6  0 4575 4126 4105  978 23.3% 0.319 2815   8   8
 74 | 
 75 | ## Hellobubble
 76 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML  FP   FN  IDs   FM  MOTA  MOTP IDt IDa IDm
 77 | 0  58.3%  62.2%  55.1% 75.6% 85.1%  4  0  4  0 693 1272 1113  273 41.1% 0.314 810   2   2
 78 | 1 105.9% 100.0% 100.0% 75.6% 85.1%  4  0  4  0 693 1272 1113  273 41.1% 0.314 810   2   2
 79 | 
 80 | ## Apink
 81 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML  FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
 82 | 0  69.5%  72.4%  66.9% 80.4% 86.8%  6  3  3  0 889 1427 855  304 56.4% 0.332 483   4   4
 83 | 1 103.8% 100.0% 100.0% 80.4% 86.8%  6  3  3  0 889 1427 855  304 56.4% 0.332 483   4   4
 84 | 
 85 | 
 86 | # n = 10
 87 | 
 88 | ## T ara
 89 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs   FM  MOTA  MOTP  IDt IDa IDm
 90 | 0  67.4%  74.1%  62.2% 76.6% 90.4%  6  3  3  0 1176 3405 2479  516 51.4% 0.285 2033   3   3
 91 | 1 108.3% 100.0% 100.0% 76.6% 90.4%  6  3  3  0 1176 3405 2479  516 51.4% 0.285 2033   3   3
 92 | 
 93 | ## GirlsAloud
 94 | 
 95 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs    FM  MOTA  MOTP  IDt IDa IDm
 96 | 0  55.0%  59.1%  51.8% 75.7% 85.5%  5  1  4  0 2108 3975 3906  1037 39.0% 0.353 3263   3   3
 97 | 1 106.0% 100.0% 100.0% 75.7% 85.5%  5  1  4  0 2108 3975 3906  1037 39.0% 0.353 3263   3   3
 98 | 
 99 | ## Darling
100 | 
101 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs   FM  MOTA  MOTP  IDt IDa IDm
102 | 0  59.2%  60.2%  58.6% 80.3% 81.9%  8  4  4  0 1692 1876 2367  716 37.7% 0.343 1894   6   6
103 | 1 101.0% 100.0% 100.0% 80.3% 81.9%  8  4  4  0 1692 1876 2367  716 37.7% 0.343 1894   6   6
104 | ## Westlife
105 |    IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs   FM  MOTA  MOTP  IDt IDa IDm
106 | 0 73.6%  69.7%  78.8% 89.5% 78.4%  4  4  0  0 2810 1195 1347  466 53.0% 0.353 1079   1   1
107 | 1 93.4% 100.0% 100.0% 89.5% 78.4%  4  4  0  0 2810 1195 1347  466 53.0% 0.353 1079   1   1
108 |  
109 | ## brunomars
110 |    IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN  IDs   FM  MOTA  MOTP  IDt IDa IDm
111 | 0 57.8%  57.5%  58.6% 75.4% 73.3% 11  5  6  0 4579 4115 3311  973 28.1% 0.319 2215   8   8
112 | 1 98.6% 100.0% 100.0% 75.4% 73.3% 11  5  6  0 4579 4115 3311  973 28.1% 0.319 2215   8   8
113 |  
114 | ## Hellobubble
115 |  
116 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML  FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
117 | 0  64.0%  68.3%  60.5% 75.6% 85.1%  4  0  4  0 693 1272 902  273 45.1% 0.314 676   2   2
118 | 1 105.9% 100.0% 100.0% 75.6% 85.1%  4  0  4  0 693 1272 902  273 45.1% 0.314 676   2   2
119 | ## Apink
120 | 
121 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML  FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
122 | 0  71.9%  74.9%  69.3% 80.5% 86.8%  6  3  3  0 889 1420 691  299 58.8% 0.333 377   4   4
123 | 1 103.8% 100.0% 100.0% 80.5% 86.8%  6  3  3  0 889 1420 691  299 58.8% 0.333 377   4   4
124 | 
125 | # only using id, 그냥 deep sort처럼 id로 뺀 경우
126 | 
127 | ## T ara
128 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
129 | 0   5.7%   6.2%   5.3% 77.3% 89.7%  6  3  3  0 1283 3296 317  531 66.3% 0.286  27 278   0
130 | 1 107.4% 100.0% 100.0% 77.3% 89.7%  6  3  3  0 1283 3296 317  531 66.3% 0.286  27 278   0
131 | 
132 | ## GirlsAloud
133 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
134 | 0   7.3%   7.5%   7.0% 78.3% 84.1%  5  1  4  0 2427 3548 543  989 60.2% 0.353  85 383   0
135 | 1 103.5% 100.0% 100.0% 78.3% 84.1%  5  1  4  0 2427 3548 543  989 60.2% 0.353  85 383   0
136 | 
137 | 
138 | ## Darling
139 |    IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
140 | 0  5.4%   5.3%   5.5% 82.2% 80.3%  8  4  4  0 1916 1698 527  668 56.6% 0.342  59 407   0
141 | 1 98.9% 100.0% 100.0% 82.2% 80.3%  8  4  4  0 1916 1698 527  668 56.6% 0.342  59 407   0
142 | 
143 | ## Westlife
144 |    IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
145 | 0  7.2%   6.4%   8.4% 90.8% 69.0%  4  4  0  0 4637 1044 117  445 49.1% 0.353   6 108   0
146 | 1 86.4% 100.0% 100.0% 90.8% 69.0%  4  4  0  0 4637 1044 117  445 49.1% 0.353   6 108   0
147 |  
148 | ## brunomars
149 | 
150 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
151 | 0  6.8%   6.6%   7.1% 77.8% 72.1% 11  5  6  0 5018 3710 535  935 44.6% 0.319  59 384   1
152 | 1 96.2% 100.0% 100.0% 77.8% 72.1% 11  5  6  0 5018 3710 535  935 44.6% 0.319  59 384   1
153 | (face_tracker) 
154 | ## Hellobubble
155 |  
156 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML  FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
157 | 0   9.8%  10.3%   9.3% 76.6% 84.4%  4  0  4  0 738 1223 175  277 59.1% 0.313  21 140   2
158 | 1 104.9% 100.0% 100.0% 76.6% 84.4%  4  0  4  0 738 1223 175  277 59.1% 0.313  21 140   2
159 | ## Apink
160 | 
161 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML  FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
162 | 0   6.4%   6.6%   6.2% 81.9% 86.5%  6  5  1  0 934 1314 223  278 66.0% 0.332  20 185   0
163 | 1 102.7% 100.0% 100.0% 81.9% 86.5%  6  5  1  0 934 1314 223  278 66.0% 0.332  20 185   0
164 | 
165 | 
166 | 
167 | # face id, track id말고 face id로 처음에 매칭될 때만 배정하여 추출한 결과
168 | 
169 | 
170 | ## T ara
171 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
172 | 0  53.2%  66.5%  49.1% 76.6% 90.4%  6  3  3  0 1178 3402 209  515 67.0% 0.285 241   1   1
173 | 1 108.3% 100.0% 100.0% 76.6% 90.4%  6  3  3  0 1178 3402 209  515 67.0% 0.285 241   1   1
174 | 
175 | ## GirlsAloud
176 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs    FM  MOTA  MOTP IDt IDa IDm
177 | 0  51.4%  60.9%  48.5% 75.9% 85.5%  5  1  4  0 2116 3951 341  1021 60.9% 0.353 466   1   1
178 | 1 105.9% 100.0% 100.0% 75.9% 85.5%  5  1  4  0 2116 3951 341  1021 60.9% 0.353 466   1   1
179 | 
180 | ## Darling
181 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
182 | 0  53.2%  57.4%  53.0% 80.7% 81.3%  8  4  4  0 1772 1835 292  704 59.1% 0.343 278   2   2
183 | 1 100.3% 100.0% 100.0% 80.7% 81.3%  8  4  4  0 1772 1835 292  704 59.1% 0.343 278   2   2
184 | 
185 | ## Westlife
186 |    IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
187 | 0 49.7%  59.5%  56.8% 90.6% 70.5%  4  4  0  0 4309 1065  70  446 52.2% 0.353 118   0   0
188 | 1 87.5% 100.0% 100.0% 90.6% 70.5%  4  4  0  0 4309 1065  70  446 52.2% 0.353 118   0   0
189 |  
190 | ## brunomars
191 |    IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML   FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
192 | 0 47.1%  51.8%  47.8% 75.4% 73.3% 11  5  6  0 4584 4110 320  970 46.0% 0.319 254   7   7
193 | 1 98.6% 100.0% 100.0% 75.4% 73.3% 11  5  6  0 4584 4110 320  970 46.0% 0.319 254   7   7
194 | 
195 | ## Hellobubble
196 |      IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML  FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
197 | 0  51.8%  58.7%  48.9% 75.7% 85.1%  4  0  4  0 694 1270  91  273 60.7% 0.314  77   3   3
198 | 1 105.8% 100.0% 100.0% 75.7% 85.1%  4  0  4  0 694 1270  91  273 60.7% 0.314  77   3   3
199 | 
200 | ## Apink
201 |     IDF1    IDP    IDR  Rcll  Prcn GT MT PT ML  FP   FN IDs   FM  MOTA  MOTP IDt IDa IDm
202 | 0  61.8%  65.9%  59.6% 80.6% 86.8%  6  3  3  0 889 1409  87  292 67.2% 0.333  69   1   1
203 | 1 103.7% 100.0% 100.0% 80.6% 86.8%  6  3  3  0 889 1409  87  292 67.2% 0.333  69   1   1
204 | 


--------------------------------------------------------------------------------
/tools/freeze_model.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import argparse
  3 | import tensorflow as tf
  4 | import tensorflow.contrib.slim as slim
  5 | 
  6 | 
  7 | def _batch_norm_fn(x, scope=None):
  8 |     if scope is None:
  9 |         scope = tf.get_variable_scope().name + "/bn"
 10 |     return slim.batch_norm(x, scope=scope)
 11 | 
 12 | 
 13 | def create_link(
 14 |         incoming, network_builder, scope, nonlinearity=tf.nn.elu,
 15 |         weights_initializer=tf.truncated_normal_initializer(stddev=1e-3),
 16 |         regularizer=None, is_first=False, summarize_activations=True):
 17 |     if is_first:
 18 |         network = incoming
 19 |     else:
 20 |         network = _batch_norm_fn(incoming, scope=scope + "/bn")
 21 |         network = nonlinearity(network)
 22 |         if summarize_activations:
 23 |             tf.summary.histogram(scope+"/activations", network)
 24 | 
 25 |     pre_block_network = network
 26 |     post_block_network = network_builder(pre_block_network, scope)
 27 | 
 28 |     incoming_dim = pre_block_network.get_shape().as_list()[-1]
 29 |     outgoing_dim = post_block_network.get_shape().as_list()[-1]
 30 |     if incoming_dim != outgoing_dim:
 31 |         assert outgoing_dim == 2 * incoming_dim, \
 32 |             "%d != %d" % (outgoing_dim, 2 * incoming)
 33 |         projection = slim.conv2d(
 34 |             incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None,
 35 |             scope=scope+"/projection", weights_initializer=weights_initializer,
 36 |             biases_initializer=None, weights_regularizer=regularizer)
 37 |         network = projection + post_block_network
 38 |     else:
 39 |         network = incoming + post_block_network
 40 |     return network
 41 | 
 42 | 
 43 | def create_inner_block(
 44 |         incoming, scope, nonlinearity=tf.nn.elu,
 45 |         weights_initializer=tf.truncated_normal_initializer(1e-3),
 46 |         bias_initializer=tf.zeros_initializer(), regularizer=None,
 47 |         increase_dim=False, summarize_activations=True):
 48 |     n = incoming.get_shape().as_list()[-1]
 49 |     stride = 1
 50 |     if increase_dim:
 51 |         n *= 2
 52 |         stride = 2
 53 | 
 54 |     incoming = slim.conv2d(
 55 |         incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME",
 56 |         normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer,
 57 |         biases_initializer=bias_initializer, weights_regularizer=regularizer,
 58 |         scope=scope + "/1")
 59 |     if summarize_activations:
 60 |         tf.summary.histogram(incoming.name + "/activations", incoming)
 61 | 
 62 |     incoming = slim.dropout(incoming, keep_prob=0.6)
 63 | 
 64 |     incoming = slim.conv2d(
 65 |         incoming, n, [3, 3], 1, activation_fn=None, padding="SAME",
 66 |         normalizer_fn=None, weights_initializer=weights_initializer,
 67 |         biases_initializer=bias_initializer, weights_regularizer=regularizer,
 68 |         scope=scope + "/2")
 69 |     return incoming
 70 | 
 71 | 
 72 | def residual_block(incoming, scope, nonlinearity=tf.nn.elu,
 73 |                    weights_initializer=tf.truncated_normal_initializer(1e3),
 74 |                    bias_initializer=tf.zeros_initializer(), regularizer=None,
 75 |                    increase_dim=False, is_first=False,
 76 |                    summarize_activations=True):
 77 | 
 78 |     def network_builder(x, s):
 79 |         return create_inner_block(
 80 |             x, s, nonlinearity, weights_initializer, bias_initializer,
 81 |             regularizer, increase_dim, summarize_activations)
 82 | 
 83 |     return create_link(
 84 |         incoming, network_builder, scope, nonlinearity, weights_initializer,
 85 |         regularizer, is_first, summarize_activations)
 86 | 
 87 | 
 88 | def _create_network(incoming, reuse=None, weight_decay=1e-8):
 89 |     nonlinearity = tf.nn.elu
 90 |     conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
 91 |     conv_bias_init = tf.zeros_initializer()
 92 |     conv_regularizer = slim.l2_regularizer(weight_decay)
 93 |     fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3)
 94 |     fc_bias_init = tf.zeros_initializer()
 95 |     fc_regularizer = slim.l2_regularizer(weight_decay)
 96 | 
 97 |     def batch_norm_fn(x):
 98 |         return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn")
 99 | 
100 |     network = incoming
101 |     network = slim.conv2d(
102 |         network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
103 |         padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1",
104 |         weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
105 |         weights_regularizer=conv_regularizer)
106 |     network = slim.conv2d(
107 |         network, 32, [3, 3], stride=1, activation_fn=nonlinearity,
108 |         padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2",
109 |         weights_initializer=conv_weight_init, biases_initializer=conv_bias_init,
110 |         weights_regularizer=conv_regularizer)
111 | 
112 |     # NOTE(nwojke): This is missing a padding="SAME" to match the CNN
113 |     # architecture in Table 1 of the paper. Information on how this affects
114 |     # performance on MOT 16 training sequences can be found in
115 |     # issue 10 https://github.com/nwojke/deep_sort/issues/10
116 |     network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1")
117 | 
118 |     network = residual_block(
119 |         network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init,
120 |         conv_regularizer, increase_dim=False, is_first=True)
121 |     network = residual_block(
122 |         network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init,
123 |         conv_regularizer, increase_dim=False)
124 | 
125 |     network = residual_block(
126 |         network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init,
127 |         conv_regularizer, increase_dim=True)
128 |     network = residual_block(
129 |         network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init,
130 |         conv_regularizer, increase_dim=False)
131 | 
132 |     network = residual_block(
133 |         network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init,
134 |         conv_regularizer, increase_dim=True)
135 |     network = residual_block(
136 |         network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init,
137 |         conv_regularizer, increase_dim=False)
138 | 
139 |     feature_dim = network.get_shape().as_list()[-1]
140 |     network = slim.flatten(network)
141 | 
142 |     network = slim.dropout(network, keep_prob=0.6)
143 |     network = slim.fully_connected(
144 |         network, feature_dim, activation_fn=nonlinearity,
145 |         normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer,
146 |         scope="fc1", weights_initializer=fc_weight_init,
147 |         biases_initializer=fc_bias_init)
148 | 
149 |     features = network
150 | 
151 |     # Features in rows, normalize axis 1.
152 |     features = slim.batch_norm(features, scope="ball", reuse=reuse)
153 |     feature_norm = tf.sqrt(
154 |         tf.constant(1e-8, tf.float32) +
155 |         tf.reduce_sum(tf.square(features), [1], keepdims=True))
156 |     features = features / feature_norm
157 |     return features, None
158 | 
159 | 
160 | def _network_factory(weight_decay=1e-8):
161 | 
162 |     def factory_fn(image, reuse):
163 |             with slim.arg_scope([slim.batch_norm, slim.dropout],
164 |                                 is_training=False):
165 |                 with slim.arg_scope([slim.conv2d, slim.fully_connected,
166 |                                      slim.batch_norm, slim.layer_norm],
167 |                                     reuse=reuse):
168 |                     features, logits = _create_network(
169 |                         image, reuse=reuse, weight_decay=weight_decay)
170 |                     return features, logits
171 | 
172 |     return factory_fn
173 | 
174 | 
175 | def _preprocess(image):
176 |     image = image[:, :, ::-1]  # BGR to RGB
177 |     return image
178 | 
179 | 
180 | def parse_args():
181 |     """Parse command line arguments.
182 |     """
183 |     parser = argparse.ArgumentParser(description="Freeze old model")
184 |     parser.add_argument(
185 |         "--checkpoint_in",
186 |         default="resources/networks/mars-small128.ckpt-68577",
187 |         help="Path to checkpoint file")
188 |     parser.add_argument(
189 |         "--graphdef_out",
190 |         default="resources/networks/mars-small128.pb")
191 |     return parser.parse_args()
192 | 
193 | 
194 | def main():
195 |     args = parse_args()
196 | 
197 |     with tf.Session(graph=tf.Graph()) as session:
198 |         input_var = tf.placeholder(
199 |             tf.uint8, (None, 128, 64, 3), name="images")
200 |         image_var = tf.map_fn(
201 |             lambda x: _preprocess(x), tf.cast(input_var, tf.float32),
202 |             back_prop=False)
203 | 
204 |         factory_fn = _network_factory()
205 |         features, _ = factory_fn(image_var, reuse=None)
206 |         features = tf.identity(features, name="features")
207 | 
208 |         saver = tf.train.Saver(slim.get_variables_to_restore())
209 |         saver.restore(session, args.checkpoint_in)
210 | 
211 |         output_graph_def = tf.graph_util.convert_variables_to_constants(
212 |             session, tf.get_default_graph().as_graph_def(),
213 |             [features.name.split(":")[0]])
214 |         with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle:
215 |             file_handle.write(output_graph_def.SerializeToString())
216 | 
217 | 
218 | if __name__ == "__main__":
219 |     main()
220 | 


--------------------------------------------------------------------------------
/tools/generate_detections.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import os
  3 | import errno
  4 | import argparse
  5 | import numpy as np
  6 | import cv2
  7 | import tensorflow.compat.v1 as tf
  8 | from tensorflow.keras.preprocessing import image as keras_image
  9 | 
 10 | # 추가
 11 | import sys
 12 | sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))
 13 | from deepface.basemodels import ArcFace
 14 | import tensorflow as tf2
 15 | 
 16 | gpus = tf.config.experimental.list_physical_devices('GPU')
 17 | if gpus:
 18 |   try:
 19 |     # Currently, memory growth needs to be the same across GPUs
 20 |     for gpu in gpus:
 21 |       tf.config.experimental.set_memory_growth(gpu, True)
 22 |     logical_gpus = tf.config.experimental.list_logical_devices('GPU')
 23 |     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
 24 |   except RuntimeError as e:
 25 |     # Memory growth must be set before GPUs have been initialized
 26 |     print(e)
 27 |     
 28 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
 29 | if len(physical_devices) > 0:
 30 |     tf.config.experimental.set_memory_growth(physical_devices[0], True)
 31 | 
 32 | def _run_in_batches(f, data_dict, out, batch_size):
 33 |     data_len = len(out)
 34 |     num_batches = int(data_len / batch_size)
 35 | 
 36 |     s, e = 0, 0
 37 |     for i in range(num_batches):
 38 |         s, e = i * batch_size, (i + 1) * batch_size
 39 |         batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
 40 |         out[s:e] = f(batch_data_dict)
 41 |     if e < len(out):
 42 |         batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
 43 |         out[e:] = f(batch_data_dict)
 44 | 
 45 | 
 46 | def extract_image_patch(image, bbox, patch_shape):
 47 |     """Extract image patch from bounding box.
 48 | 
 49 |     Parameters
 50 |     ----------
 51 |     image : ndarray
 52 |         The full image.
 53 |     bbox : array_like
 54 |         The bounding box in format (x, y, width, height).
 55 |     patch_shape : Optional[array_like]
 56 |         This parameter can be used to enforce a desired patch shape
 57 |         (height, width). First, the `bbox` is adapted to the aspect ratio
 58 |         of the patch shape, then it is clipped at the image boundaries.
 59 |         If None, the shape is computed from :arg:`bbox`.
 60 | 
 61 |     Returns
 62 |     -------
 63 |     ndarray | NoneType
 64 |         An image patch showing the :arg:`bbox`, optionally reshaped to
 65 |         :arg:`patch_shape`.
 66 |         Returns None if the bounding box is empty or fully outside of the image
 67 |         boundaries.
 68 | 
 69 |     """
 70 |     bbox = np.array(bbox)
 71 |     if patch_shape is not None:
 72 |         # correct aspect ratio to patch shape
 73 |         target_aspect = float(patch_shape[1]) / patch_shape[0]
 74 |         new_width = target_aspect * bbox[3]
 75 |         bbox[0] -= (new_width - bbox[2]) / 2
 76 |         bbox[2] = new_width
 77 | 
 78 |     # convert to top left, bottom right
 79 |     bbox[2:] += bbox[:2]
 80 |     bbox = bbox.astype(np.int)
 81 | 
 82 |     # clip at image boundaries
 83 |     bbox[:2] = np.maximum(0, bbox[:2])
 84 |     bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
 85 |     if np.any(bbox[:2] >= bbox[2:]):
 86 |         return None
 87 |     sx, sy, ex, ey = bbox
 88 |     image = image[sy:ey, sx:ex]
 89 |     image = cv2.resize(image, tuple(patch_shape[::-1]))
 90 | 
 91 |     # 추가
 92 |     image = keras_image.img_to_array(image)
 93 | 	# image = np.expand_dims(image, axis = 0)
 94 |     image /= 255 #normalize input in [0, 1]
 95 | 
 96 |     return image
 97 | 
 98 | 
 99 | class ImageEncoder(object):
100 | 
101 |     def __init__(self, checkpoint_filename, input_name="images",
102 |                  output_name="features"):
103 |         self.session = ArcFace.loadModel(checkpoint_filename) # 성공
104 |         self.feature_dim = self.session.layers[-1].output_shape[1]
105 |         self.image_shape = list(self.session.input_shape[1:])
106 | 
107 |         # self.session = tf.Session()
108 |         # with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
109 |         #     graph_def = tf.GraphDef()
110 |         #     graph_def.ParseFromString(file_handle.read())
111 |         # tf.import_graph_def(graph_def, name="net")
112 |         # self.input_var = tf.get_default_graph().get_tensor_by_name(
113 |         #     "%s:0" % input_name)
114 |         # self.output_var = tf.get_default_graph().get_tensor_by_name(
115 |         #     "%s:0" % output_name)
116 | 
117 |         # assert len(self.output_var.get_shape()) == 2
118 |         # assert len(self.input_var.get_shape()) == 4
119 |         # self.feature_dim = self. output_var.get_shape().as_list()[-1]
120 |         # self.image_shape = self.input_var.get_shape().as_list()[1:]
121 | 
122 |     def __call__(self, data_x, batch_size=32):
123 |         out = np.zeros((len(data_x), self.feature_dim), np.float32)
124 | 
125 |         if data_x.shape[0] == 0: 
126 |             return out
127 | 
128 |         import time
129 |         t1 = time.time()
130 | 
131 |         # 추가
132 |         out = self.session.predict(data_x)
133 |         
134 |         # _run_in_batches(
135 |         #     lambda x: self.session.run(self.output_var, feed_dict=x),
136 |         #     {self.input_var: data_x}, out, batch_size)
137 |         # t2 = time.time()
138 |         # print("patch inference time : ", t2-t1)
139 |         return out
140 | 
141 | 
142 | def create_box_encoder(model_filename, input_name="images",
143 |                        output_name="features", batch_size=32):
144 |     image_encoder = ImageEncoder(model_filename, input_name, output_name)
145 |     image_shape = image_encoder.image_shape
146 | 
147 |     def encoder(image, boxes):
148 |         image_patches = []
149 |         for box in boxes:
150 |             patch = extract_image_patch(image, box, image_shape[:2]) # 자동으로 모양 리사이즈 됨
151 |             
152 |             if patch is None:
153 |                 print("WARNING: Failed to extract image patch: %s." % str(box))
154 |                 # patch = np.random.uniform(
155 |                 #     0., 255., image_shape).astype(np.uint8)
156 |                 patch = np.random.uniform(
157 |                     0., 1., image_shape).astype(np.float32)
158 |             image_patches.append(patch)
159 |         image_patches = np.asarray(image_patches)
160 |         return image_encoder(image_patches, batch_size)
161 | 
162 |     return encoder
163 | 
164 | 
165 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
166 |     """Generate detections with features.
167 | 
168 |     Parameters
169 |     ----------
170 |     encoder : Callable[image, ndarray] -> ndarray
171 |         The encoder function takes as input a BGR color image and a matrix of
172 |         bounding boxes in format `(x, y, w, h)` and returns a matrix of
173 |         corresponding feature vectors.
174 |     mot_dir : str
175 |         Path to the MOTChallenge directory (can be either train or test).
176 |     output_dir
177 |         Path to the output directory. Will be created if it does not exist.
178 |     detection_dir
179 |         Path to custom detections. The directory structure should be the default
180 |         MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
181 |         standard MOTChallenge detections.
182 | 
183 |     """
184 |     if detection_dir is None:
185 |         detection_dir = mot_dir
186 |     try:
187 |         os.makedirs(output_dir)
188 |     except OSError as exception:
189 |         if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
190 |             pass
191 |         else:
192 |             raise ValueError(
193 |                 "Failed to created output directory '%s'" % output_dir)
194 | 
195 |     for sequence in os.listdir(mot_dir):
196 |         print("Processing %s" % sequence)
197 |         sequence_dir = os.path.join(mot_dir, sequence)
198 | 
199 |         image_dir = os.path.join(sequence_dir, "img1")
200 |         image_filenames = {
201 |             int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
202 |             for f in os.listdir(image_dir)}
203 | 
204 |         detection_file = os.path.join(
205 |             detection_dir, sequence, "det/det.txt")
206 |         detections_in = np.loadtxt(detection_file, delimiter=',')
207 |         detections_out = []
208 | 
209 |         frame_indices = detections_in[:, 0].astype(np.int)
210 |         min_frame_idx = frame_indices.astype(np.int).min()
211 |         max_frame_idx = frame_indices.astype(np.int).max()
212 |         for frame_idx in range(min_frame_idx, max_frame_idx + 1):
213 |             print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
214 |             mask = frame_indices == frame_idx
215 |             rows = detections_in[mask]
216 | 
217 |             if frame_idx not in image_filenames:
218 |                 print("WARNING could not find image for frame %d" % frame_idx)
219 |                 continue
220 |             bgr_image = cv2.imread(
221 |                 image_filenames[frame_idx], cv2.IMREAD_COLOR)
222 |             features = encoder(bgr_image, rows[:, 2:6].copy())
223 |             detections_out += [np.r_[(row, feature)] for row, feature
224 |                                in zip(rows, features)]
225 | 
226 |         output_filename = os.path.join(output_dir, "%s.npy" % sequence)
227 |         np.save(
228 |             output_filename, np.asarray(detections_out), allow_pickle=False)
229 | 
230 | 
231 | def parse_args():
232 |     """Parse command line arguments.
233 |     """
234 |     parser = argparse.ArgumentParser(description="Re-ID feature extractor")
235 |     parser.add_argument(
236 |         "--model",
237 |         default="resources/networks/mars-small128.pb",
238 |         help="Path to freezed inference graph protobuf.")
239 |     parser.add_argument(
240 |         "--mot_dir", help="Path to MOTChallenge directory (train or test)",
241 |         required=True)
242 |     parser.add_argument(
243 |         "--detection_dir", help="Path to custom detections. Defaults to "
244 |         "standard MOT detections Directory structure should be the default "
245 |         "MOTChallenge structure: [sequence]/det/det.txt", default=None)
246 |     parser.add_argument(
247 |         "--output_dir", help="Output directory. Will be created if it does not"
248 |         " exist.", default="detections")
249 |     return parser.parse_args()
250 | 
251 | 
252 | def main():
253 |     args = parse_args()
254 |     encoder = create_box_encoder(args.model, batch_size=32)
255 |     generate_detections(encoder, args.mot_dir, args.output_dir,
256 |                         args.detection_dir)
257 | 
258 | 
259 | if __name__ == "__main__":
260 |     main()
261 | 


--------------------------------------------------------------------------------
/xml2txt.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as elemTree
 2 | import os
 3 | import pprint
 4 | from absl import app, flags, logging
 5 | from absl.flags import FLAGS
 6 | 
 7 | """
 8 | python xml2txt.py \
 9 |     --gt_path ./resources/gt/T-ara_gt.xml \
10 |     --gt_file_path ./resources/gt/T-ara_gt.txt
11 | 
12 | python xml2txt.py \
13 |     --gt_path ./resources/gt/GirlsAloud_gt.xml \
14 |     --gt_file_path ./resources/gt/GirlsAloud_gt.txt
15 | 
16 | python xml2txt.py \
17 |     --gt_path ./resources/gt/Darling_gt.xml \
18 |     --gt_file_path ./resources/gt/Darling_gt.txt
19 | 
20 | python xml2txt.py \
21 |     --gt_path ./resources/gt/Westlife_gt.xml \
22 |     --gt_file_path ./resources/gt/Westlife_gt.txt
23 | 
24 | python xml2txt.py \
25 |     --gt_path ./resources/gt/BrunoMars_gt.xml \
26 |     --gt_file_path ./resources/gt/BrunoMars_gt.txt
27 | 
28 | python xml2txt.py \
29 |     --gt_path ./resources/gt/HelloBubble_gt.xml \
30 |     --gt_file_path ./resources/gt/HelloBubble_gt.txt
31 | 
32 | python xml2txt.py \
33 |     --gt_path ./resources/gt/Apink_gt.xml \
34 |     --gt_file_path ./resources/gt/Apink_gt.txt
35 | """
36 | 
37 | flags.DEFINE_string('gt_path', './resources/gt/T-ara_gt.xml', 'path to gt')
38 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to save converted file')
39 | 
40 | 
41 | def main(args):
42 |     tree = elemTree.parse(FLAGS.gt_path)
43 | 
44 |     root=tree.getroot()
45 | 
46 |     print(root.tag, root.attrib)
47 |     print(root.find("Trajectory"))
48 | 
49 |     frame_list = []
50 | 
51 |     for traj in root:
52 |         for f in traj:
53 |             a = f.attrib
54 |             a["frame_no"] = str(int(a["frame_no"])-1)
55 |             a["id"] = traj.attrib["obj_id"]
56 |             frame_list.append(a)
57 | 
58 | 
59 |     frame_list = sorted(frame_list, key= lambda x: (int(x["frame_no"]), int(x["id"])))
60 |     # pprint.pprint(frame_list)
61 | 
62 | 
63 |     f = open(FLAGS.gt_file_path, 'w')
64 | 
65 |     for a in frame_list:
66 |         f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n")
67 |     # 파일 닫기
68 |     f.close()
69 | 
70 | 
71 | 
72 | if __name__ == '__main__':
73 |     try:
74 |         app.run(main)
75 |     except SystemExit:
76 |         pass
77 | 


--------------------------------------------------------------------------------
/yolov3_tf2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/yolov3_tf2/__init__.py


--------------------------------------------------------------------------------
/yolov3_tf2/dataset.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from absl.flags import FLAGS
  3 | 
  4 | @tf.function
  5 | def transform_targets_for_output(y_true, grid_size, anchor_idxs):
  6 |     # y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor))
  7 |     N = tf.shape(y_true)[0]
  8 | 
  9 |     # y_true_out: (N, grid, grid, anchors, [x1, y1, x2, y2, obj, class])
 10 |     y_true_out = tf.zeros(
 11 |         (N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6))
 12 | 
 13 |     anchor_idxs = tf.cast(anchor_idxs, tf.int32)
 14 | 
 15 |     indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True)
 16 |     updates = tf.TensorArray(tf.float32, 1, dynamic_size=True)
 17 |     idx = 0
 18 |     for i in tf.range(N):
 19 |         for j in tf.range(tf.shape(y_true)[1]):
 20 |             if tf.equal(y_true[i][j][2], 0):
 21 |                 continue
 22 |             anchor_eq = tf.equal(
 23 |                 anchor_idxs, tf.cast(y_true[i][j][5], tf.int32))
 24 | 
 25 |             if tf.reduce_any(anchor_eq):
 26 |                 box = y_true[i][j][0:4]
 27 |                 box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2
 28 | 
 29 |                 anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32)
 30 |                 grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32)
 31 | 
 32 |                 # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class)
 33 |                 indexes = indexes.write(
 34 |                     idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]])
 35 |                 updates = updates.write(
 36 |                     idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]])
 37 |                 idx += 1
 38 | 
 39 |     # tf.print(indexes.stack())
 40 |     # tf.print(updates.stack())
 41 | 
 42 |     return tf.tensor_scatter_nd_update(
 43 |         y_true_out, indexes.stack(), updates.stack())
 44 | 
 45 | 
 46 | def transform_targets(y_train, anchors, anchor_masks, size):
 47 |     y_outs = []
 48 |     grid_size = size // 32
 49 | 
 50 |     # calculate anchor index for true boxes
 51 |     anchors = tf.cast(anchors, tf.float32)
 52 |     anchor_area = anchors[..., 0] * anchors[..., 1]
 53 |     box_wh = y_train[..., 2:4] - y_train[..., 0:2]
 54 |     box_wh = tf.tile(tf.expand_dims(box_wh, -2),
 55 |                      (1, 1, tf.shape(anchors)[0], 1))
 56 |     box_area = box_wh[..., 0] * box_wh[..., 1]
 57 |     intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * \
 58 |         tf.minimum(box_wh[..., 1], anchors[..., 1])
 59 |     iou = intersection / (box_area + anchor_area - intersection)
 60 |     anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32)
 61 |     anchor_idx = tf.expand_dims(anchor_idx, axis=-1)
 62 | 
 63 |     y_train = tf.concat([y_train, anchor_idx], axis=-1)
 64 | 
 65 |     for anchor_idxs in anchor_masks:
 66 |         y_outs.append(transform_targets_for_output(
 67 |             y_train, grid_size, anchor_idxs))
 68 |         grid_size *= 2
 69 | 
 70 |     return tuple(y_outs)
 71 | 
 72 | 
 73 | def transform_images(x_train, size):
 74 |     x_train = tf.image.resize(x_train, (size, size))
 75 |     x_train = x_train / 255
 76 |     return x_train
 77 | 
 78 | 
 79 | # https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md#conversion-script-outline-conversion-script-outline
 80 | # Commented out fields are not required in our project
 81 | IMAGE_FEATURE_MAP = {
 82 |     # 'image/width': tf.io.FixedLenFeature([], tf.int64),
 83 |     # 'image/height': tf.io.FixedLenFeature([], tf.int64),
 84 |     # 'image/filename': tf.io.FixedLenFeature([], tf.string),
 85 |     # 'image/source_id': tf.io.FixedLenFeature([], tf.string),
 86 |     # 'image/key/sha256': tf.io.FixedLenFeature([], tf.string),
 87 |     'image/encoded': tf.io.FixedLenFeature([], tf.string),
 88 |     # 'image/format': tf.io.FixedLenFeature([], tf.string),
 89 |     'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32),
 90 |     'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32),
 91 |     'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32),
 92 |     'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32),
 93 |     'image/object/class/text': tf.io.VarLenFeature(tf.string),
 94 |     # 'image/object/class/label': tf.io.VarLenFeature(tf.int64),
 95 |     # 'image/object/difficult': tf.io.VarLenFeature(tf.int64),
 96 |     # 'image/object/truncated': tf.io.VarLenFeature(tf.int64),
 97 |     # 'image/object/view': tf.io.VarLenFeature(tf.string),
 98 | }
 99 | 
100 | 
101 | def parse_tfrecord(tfrecord, class_table, size):
102 |     x = tf.io.parse_single_example(tfrecord, IMAGE_FEATURE_MAP)
103 |     x_train = tf.image.decode_jpeg(x['image/encoded'], channels=3)
104 |     x_train = tf.image.resize(x_train, (size, size))
105 | 
106 |     class_text = tf.sparse.to_dense(
107 |         x['image/object/class/text'], default_value='')
108 |     labels = tf.cast(class_table.lookup(class_text), tf.float32)
109 |     y_train = tf.stack([tf.sparse.to_dense(x['image/object/bbox/xmin']),
110 |                         tf.sparse.to_dense(x['image/object/bbox/ymin']),
111 |                         tf.sparse.to_dense(x['image/object/bbox/xmax']),
112 |                         tf.sparse.to_dense(x['image/object/bbox/ymax']),
113 |                         labels], axis=1)
114 | 
115 |     paddings = [[0, FLAGS.yolo_max_boxes - tf.shape(y_train)[0]], [0, 0]]
116 |     y_train = tf.pad(y_train, paddings)
117 | 
118 |     return x_train, y_train
119 | 
120 | 
121 | def load_tfrecord_dataset(file_pattern, class_file, size=416):
122 |     LINE_NUMBER = -1  # TODO: use tf.lookup.TextFileIndex.LINE_NUMBER
123 |     class_table = tf.lookup.StaticHashTable(tf.lookup.TextFileInitializer(
124 |         class_file, tf.string, 0, tf.int64, LINE_NUMBER, delimiter="\n"), -1)
125 | 
126 |     files = tf.data.Dataset.list_files(file_pattern)
127 |     dataset = files.flat_map(tf.data.TFRecordDataset)
128 |     return dataset.map(lambda x: parse_tfrecord(x, class_table, size))
129 | 
130 | 
131 | def load_fake_dataset():
132 |     x_train = tf.image.decode_jpeg(
133 |         open('./data/girl.png', 'rb').read(), channels=3)
134 |     x_train = tf.expand_dims(x_train, axis=0)
135 | 
136 |     labels = [
137 |         [0.18494931, 0.03049111, 0.9435849,  0.96302897, 0],
138 |         [0.01586703, 0.35938117, 0.17582396, 0.6069674, 56],
139 |         [0.09158827, 0.48252046, 0.26967454, 0.6403017, 67]
140 |     ] + [[0, 0, 0, 0, 0]] * 5
141 |     y_train = tf.convert_to_tensor(labels, tf.float32)
142 |     y_train = tf.expand_dims(y_train, axis=0)
143 | 
144 |     return tf.data.Dataset.from_tensor_slices((x_train, y_train))
145 | 


--------------------------------------------------------------------------------
/yolov3_tf2/models.py:
--------------------------------------------------------------------------------
  1 | from absl import flags
  2 | from absl.flags import FLAGS
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from tensorflow.keras import Model
  6 | from tensorflow.keras.layers import (
  7 |     Add,
  8 |     Concatenate,
  9 |     Conv2D,
 10 |     Input,
 11 |     Lambda,
 12 |     LeakyReLU,
 13 |     MaxPool2D,
 14 |     UpSampling2D,
 15 |     ZeroPadding2D,
 16 |     BatchNormalization,
 17 | )
 18 | from tensorflow.keras.regularizers import l2
 19 | from tensorflow.keras.losses import (
 20 |     binary_crossentropy,
 21 |     sparse_categorical_crossentropy
 22 | )
 23 | from .utils import broadcast_iou
 24 | 
 25 | flags.DEFINE_integer('yolo_max_boxes', 100,
 26 |                      'maximum number of boxes per image')
 27 | flags.DEFINE_float('yolo_iou_threshold', 0.5, 'iou threshold')
 28 | flags.DEFINE_float('yolo_score_threshold', 0.5, 'score threshold')
 29 | 
 30 | yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45),
 31 |                          (59, 119), (116, 90), (156, 198), (373, 326)],
 32 |                         np.float32) / 416
 33 | yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]])
 34 | 
 35 | yolo_tiny_anchors = np.array([(10, 14), (23, 27), (37, 58),
 36 |                               (81, 82), (135, 169),  (344, 319)],
 37 |                              np.float32) / 416
 38 | yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]])
 39 | 
 40 | 
 41 | def DarknetConv(x, filters, size, strides=1, batch_norm=True):
 42 |     if strides == 1:
 43 |         padding = 'same'
 44 |     else:
 45 |         x = ZeroPadding2D(((1, 0), (1, 0)))(x)  # top left half-padding
 46 |         padding = 'valid'
 47 |     x = Conv2D(filters=filters, kernel_size=size,
 48 |                strides=strides, padding=padding,
 49 |                use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x)
 50 |     if batch_norm:
 51 |         x = BatchNormalization()(x)
 52 |         x = LeakyReLU(alpha=0.1)(x)
 53 |     return x
 54 | 
 55 | 
 56 | def DarknetResidual(x, filters):
 57 |     prev = x
 58 |     x = DarknetConv(x, filters // 2, 1)
 59 |     x = DarknetConv(x, filters, 3)
 60 |     x = Add()([prev, x])
 61 |     return x
 62 | 
 63 | 
 64 | def DarknetBlock(x, filters, blocks):
 65 |     x = DarknetConv(x, filters, 3, strides=2)
 66 |     for _ in range(blocks):
 67 |         x = DarknetResidual(x, filters)
 68 |     return x
 69 | 
 70 | 
 71 | def Darknet(name=None):
 72 |     x = inputs = Input([None, None, 3])
 73 |     x = DarknetConv(x, 32, 3)
 74 |     x = DarknetBlock(x, 64, 1)
 75 |     x = DarknetBlock(x, 128, 2)  # skip connection
 76 |     x = x_36 = DarknetBlock(x, 256, 8)  # skip connection
 77 |     x = x_61 = DarknetBlock(x, 512, 8)
 78 |     x = DarknetBlock(x, 1024, 4)
 79 |     return tf.keras.Model(inputs, (x_36, x_61, x), name=name)
 80 | 
 81 | 
 82 | def DarknetTiny(name=None):
 83 |     x = inputs = Input([None, None, 3])
 84 |     x = DarknetConv(x, 16, 3)
 85 |     x = MaxPool2D(2, 2, 'same')(x)
 86 |     x = DarknetConv(x, 32, 3)
 87 |     x = MaxPool2D(2, 2, 'same')(x)
 88 |     x = DarknetConv(x, 64, 3)
 89 |     x = MaxPool2D(2, 2, 'same')(x)
 90 |     x = DarknetConv(x, 128, 3)
 91 |     x = MaxPool2D(2, 2, 'same')(x)
 92 |     x = x_8 = DarknetConv(x, 256, 3)  # skip connection
 93 |     x = MaxPool2D(2, 2, 'same')(x)
 94 |     x = DarknetConv(x, 512, 3)
 95 |     x = MaxPool2D(2, 1, 'same')(x)
 96 |     x = DarknetConv(x, 1024, 3)
 97 |     return tf.keras.Model(inputs, (x_8, x), name=name)
 98 | 
 99 | 
100 | def YoloConv(filters, name=None):
101 |     def yolo_conv(x_in):
102 |         if isinstance(x_in, tuple):
103 |             inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
104 |             x, x_skip = inputs
105 | 
106 |             # concat with skip connection
107 |             x = DarknetConv(x, filters, 1)
108 |             x = UpSampling2D(2)(x)
109 |             x = Concatenate()([x, x_skip])
110 |         else:
111 |             x = inputs = Input(x_in.shape[1:])
112 | 
113 |         x = DarknetConv(x, filters, 1)
114 |         x = DarknetConv(x, filters * 2, 3)
115 |         x = DarknetConv(x, filters, 1)
116 |         x = DarknetConv(x, filters * 2, 3)
117 |         x = DarknetConv(x, filters, 1)
118 |         return Model(inputs, x, name=name)(x_in)
119 |     return yolo_conv
120 | 
121 | 
122 | def YoloConvTiny(filters, name=None):
123 |     def yolo_conv(x_in):
124 |         if isinstance(x_in, tuple):
125 |             inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:])
126 |             x, x_skip = inputs
127 | 
128 |             # concat with skip connection
129 |             x = DarknetConv(x, filters, 1)
130 |             x = UpSampling2D(2)(x)
131 |             x = Concatenate()([x, x_skip])
132 |         else:
133 |             x = inputs = Input(x_in.shape[1:])
134 |             x = DarknetConv(x, filters, 1)
135 | 
136 |         return Model(inputs, x, name=name)(x_in)
137 |     return yolo_conv
138 | 
139 | 
140 | def YoloOutput(filters, anchors, classes, name=None):
141 |     def yolo_output(x_in):
142 |         x = inputs = Input(x_in.shape[1:])
143 |         x = DarknetConv(x, filters * 2, 3)
144 |         x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False)
145 |         x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2],
146 |                                             anchors, classes + 5)))(x)
147 |         return tf.keras.Model(inputs, x, name=name)(x_in)
148 |     return yolo_output
149 | 
150 | 
151 | # As tensorflow lite doesn't support tf.size used in tf.meshgrid, 
152 | # we reimplemented a simple meshgrid function that use basic tf function.
153 | def _meshgrid(n_a, n_b):
154 | 
155 |     return [
156 |         tf.reshape(tf.tile(tf.range(n_a), [n_b]), (n_b, n_a)),
157 |         tf.reshape(tf.repeat(tf.range(n_b), n_a), (n_b, n_a))
158 |     ]
159 | 
160 | 
161 | def yolo_boxes(pred, anchors, classes):
162 |     # pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes))
163 |     grid_size = tf.shape(pred)[1:3]
164 |     box_xy, box_wh, objectness, class_probs = tf.split(
165 |         pred, (2, 2, 1, classes), axis=-1)
166 | 
167 |     box_xy = tf.sigmoid(box_xy)
168 |     objectness = tf.sigmoid(objectness)
169 |     class_probs = tf.sigmoid(class_probs)
170 |     pred_box = tf.concat((box_xy, box_wh), axis=-1)  # original xywh for loss
171 | 
172 |     # !!! grid[x][y] == (y, x)
173 |     grid = _meshgrid(grid_size[1],grid_size[0])
174 |     grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)  # [gx, gy, 1, 2]
175 | 
176 |     box_xy = (box_xy + tf.cast(grid, tf.float32)) / \
177 |         tf.cast(grid_size, tf.float32)
178 |     box_wh = tf.exp(box_wh) * anchors
179 | 
180 |     box_x1y1 = box_xy - box_wh / 2
181 |     box_x2y2 = box_xy + box_wh / 2
182 |     bbox = tf.concat([box_x1y1, box_x2y2], axis=-1)
183 | 
184 |     return bbox, objectness, class_probs, pred_box
185 | 
186 | 
187 | def yolo_nms(outputs, anchors, masks, classes):
188 |     # boxes, conf, type
189 |     b, c, t = [], [], []
190 | 
191 |     for o in outputs:
192 |         b.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1])))
193 |         c.append(tf.reshape(o[1], (tf.shape(o[1])[0], -1, tf.shape(o[1])[-1])))
194 |         t.append(tf.reshape(o[2], (tf.shape(o[2])[0], -1, tf.shape(o[2])[-1])))
195 | 
196 |     bbox = tf.concat(b, axis=1)
197 |     confidence = tf.concat(c, axis=1)
198 |     class_probs = tf.concat(t, axis=1)
199 | 
200 |     scores = confidence * class_probs
201 | 
202 |     dscores = tf.squeeze(scores, axis=0)
203 |     scores = tf.reduce_max(dscores,[1])
204 |     bbox = tf.reshape(bbox,(-1,4))
205 |     classes = tf.argmax(dscores,1)
206 |     selected_indices, selected_scores = tf.image.non_max_suppression_with_scores(
207 |         boxes=bbox,
208 |         scores=scores,
209 |         max_output_size=FLAGS.yolo_max_boxes,
210 |         iou_threshold=FLAGS.yolo_iou_threshold,
211 |         score_threshold=FLAGS.yolo_score_threshold,
212 |         soft_nms_sigma=0.5
213 |     )
214 |     
215 |     num_valid_nms_boxes = tf.shape(selected_indices)[0]
216 | 
217 |     selected_indices = tf.concat([selected_indices,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes, tf.int32)], 0)
218 |     selected_scores = tf.concat([selected_scores,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes,tf.float32)], -1)
219 | 
220 |     boxes=tf.gather(bbox, selected_indices)
221 |     boxes = tf.expand_dims(boxes, axis=0)
222 |     scores=selected_scores
223 |     scores = tf.expand_dims(scores, axis=0)
224 |     classes = tf.gather(classes,selected_indices)
225 |     classes = tf.expand_dims(classes, axis=0)
226 |     valid_detections=num_valid_nms_boxes
227 |     valid_detections = tf.expand_dims(valid_detections, axis=0)
228 | 
229 |     return boxes, scores, classes, valid_detections
230 | 
231 | 
232 | def YoloV3(size=None, channels=3, anchors=yolo_anchors,
233 |            masks=yolo_anchor_masks, classes=80, training=False):
234 |     x = inputs = Input([size, size, channels], name='input')
235 | 
236 |     x_36, x_61, x = Darknet(name='yolo_darknet')(x)
237 | 
238 |     x = YoloConv(512, name='yolo_conv_0')(x)
239 |     output_0 = YoloOutput(512, len(masks[0]), classes, name='yolo_output_0')(x)
240 | 
241 |     x = YoloConv(256, name='yolo_conv_1')((x, x_61))
242 |     output_1 = YoloOutput(256, len(masks[1]), classes, name='yolo_output_1')(x)
243 | 
244 |     x = YoloConv(128, name='yolo_conv_2')((x, x_36))
245 |     output_2 = YoloOutput(128, len(masks[2]), classes, name='yolo_output_2')(x)
246 | 
247 |     if training:
248 |         return Model(inputs, (output_0, output_1, output_2), name='yolov3')
249 | 
250 |     boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
251 |                      name='yolo_boxes_0')(output_0)
252 |     boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
253 |                      name='yolo_boxes_1')(output_1)
254 |     boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], classes),
255 |                      name='yolo_boxes_2')(output_2)
256 | 
257 |     outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
258 |                      name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3]))
259 | 
260 |     return Model(inputs, outputs, name='yolov3')
261 | 
262 | 
263 | def YoloV3Tiny(size=None, channels=3, anchors=yolo_tiny_anchors,
264 |                masks=yolo_tiny_anchor_masks, classes=80, training=False):
265 |     x = inputs = Input([size, size, channels], name='input')
266 | 
267 |     x_8, x = DarknetTiny(name='yolo_darknet')(x)
268 | 
269 |     x = YoloConvTiny(256, name='yolo_conv_0')(x)
270 |     output_0 = YoloOutput(256, len(masks[0]), classes, name='yolo_output_0')(x)
271 | 
272 |     x = YoloConvTiny(128, name='yolo_conv_1')((x, x_8))
273 |     output_1 = YoloOutput(128, len(masks[1]), classes, name='yolo_output_1')(x)
274 | 
275 |     if training:
276 |         return Model(inputs, (output_0, output_1), name='yolov3')
277 | 
278 |     boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes),
279 |                      name='yolo_boxes_0')(output_0)
280 |     boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes),
281 |                      name='yolo_boxes_1')(output_1)
282 |     outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes),
283 |                      name='yolo_nms')((boxes_0[:3], boxes_1[:3]))
284 |     return Model(inputs, outputs, name='yolov3_tiny')
285 | 
286 | 
287 | def YoloLoss(anchors, classes=80, ignore_thresh=0.5):
288 |     def yolo_loss(y_true, y_pred):
289 |         # 1. transform all pred outputs
290 |         # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls))
291 |         pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes(
292 |             y_pred, anchors, classes)
293 |         pred_xy = pred_xywh[..., 0:2]
294 |         pred_wh = pred_xywh[..., 2:4]
295 | 
296 |         # 2. transform all true outputs
297 |         # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls))
298 |         true_box, true_obj, true_class_idx = tf.split(
299 |             y_true, (4, 1, 1), axis=-1)
300 |         true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2
301 |         true_wh = true_box[..., 2:4] - true_box[..., 0:2]
302 | 
303 |         # give higher weights to small boxes
304 |         box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1]
305 | 
306 |         # 3. inverting the pred box equations
307 |         grid_size = tf.shape(y_true)[1]
308 |         grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size))
309 |         grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2)
310 |         true_xy = true_xy * tf.cast(grid_size, tf.float32) - \
311 |             tf.cast(grid, tf.float32)
312 |         true_wh = tf.math.log(true_wh / anchors)
313 |         true_wh = tf.where(tf.math.is_inf(true_wh),
314 |                            tf.zeros_like(true_wh), true_wh)
315 | 
316 |         # 4. calculate all masks
317 |         obj_mask = tf.squeeze(true_obj, -1)
318 |         # ignore false positive when iou is over threshold
319 |         best_iou = tf.map_fn(
320 |             lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask(
321 |                 x[1], tf.cast(x[2], tf.bool))), axis=-1),
322 |             (pred_box, true_box, obj_mask),
323 |             tf.float32)
324 |         ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)
325 | 
326 |         # 5. calculate all losses
327 |         xy_loss = obj_mask * box_loss_scale * \
328 |             tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1)
329 |         wh_loss = obj_mask * box_loss_scale * \
330 |             tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1)
331 |         obj_loss = binary_crossentropy(true_obj, pred_obj)
332 |         obj_loss = obj_mask * obj_loss + \
333 |             (1 - obj_mask) * ignore_mask * obj_loss
334 |         # TODO: use binary_crossentropy instead
335 |         class_loss = obj_mask * sparse_categorical_crossentropy(
336 |             true_class_idx, pred_class)
337 | 
338 |         # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1)
339 |         xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3))
340 |         wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3))
341 |         obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3))
342 |         class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3))
343 | 
344 |         return xy_loss + wh_loss + obj_loss + class_loss
345 |     return yolo_loss
346 | 


--------------------------------------------------------------------------------
/yolov3_tf2/utils.py:
--------------------------------------------------------------------------------
  1 | from absl import logging
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | import cv2
  5 | 
  6 | YOLOV3_LAYER_LIST = [
  7 |     'yolo_darknet',
  8 |     'yolo_conv_0',
  9 |     'yolo_output_0',
 10 |     'yolo_conv_1',
 11 |     'yolo_output_1',
 12 |     'yolo_conv_2',
 13 |     'yolo_output_2',
 14 | ]
 15 | 
 16 | YOLOV3_TINY_LAYER_LIST = [
 17 |     'yolo_darknet',
 18 |     'yolo_conv_0',
 19 |     'yolo_output_0',
 20 |     'yolo_conv_1',
 21 |     'yolo_output_1',
 22 | ]
 23 | 
 24 | 
 25 | def load_darknet_weights(model, weights_file, tiny=False):
 26 |     wf = open(weights_file, 'rb')
 27 |     major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5)
 28 | 
 29 |     if tiny:
 30 |         layers = YOLOV3_TINY_LAYER_LIST
 31 |     else:
 32 |         layers = YOLOV3_LAYER_LIST
 33 | 
 34 |     for layer_name in layers:
 35 |         sub_model = model.get_layer(layer_name)
 36 |         for i, layer in enumerate(sub_model.layers):
 37 |             if not layer.name.startswith('conv2d'):
 38 |                 continue
 39 |             batch_norm = None
 40 |             if i + 1 < len(sub_model.layers) and \
 41 |                     sub_model.layers[i + 1].name.startswith('batch_norm'):
 42 |                 batch_norm = sub_model.layers[i + 1]
 43 | 
 44 |             logging.info("{}/{} {}".format(
 45 |                 sub_model.name, layer.name, 'bn' if batch_norm else 'bias'))
 46 | 
 47 |             filters = layer.filters
 48 |             size = layer.kernel_size[0]
 49 |             in_dim = layer.get_input_shape_at(0)[-1]
 50 | 
 51 |             if batch_norm is None:
 52 |                 conv_bias = np.fromfile(wf, dtype=np.float32, count=filters)
 53 |             else:
 54 |                 # darknet [beta, gamma, mean, variance]
 55 |                 bn_weights = np.fromfile(
 56 |                     wf, dtype=np.float32, count=4 * filters)
 57 |                 # tf [gamma, beta, mean, variance]
 58 |                 bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]]
 59 | 
 60 |             # darknet shape (out_dim, in_dim, height, width)
 61 |             conv_shape = (filters, in_dim, size, size)
 62 |             conv_weights = np.fromfile(
 63 |                 wf, dtype=np.float32, count=np.product(conv_shape))
 64 |             # tf shape (height, width, in_dim, out_dim)
 65 |             conv_weights = conv_weights.reshape(
 66 |                 conv_shape).transpose([2, 3, 1, 0])
 67 | 
 68 |             if batch_norm is None:
 69 |                 layer.set_weights([conv_weights, conv_bias])
 70 |             else:
 71 |                 layer.set_weights([conv_weights])
 72 |                 batch_norm.set_weights(bn_weights)
 73 | 
 74 |     assert len(wf.read()) == 0, 'failed to read all data'
 75 |     wf.close()
 76 | 
 77 | 
 78 | def broadcast_iou(box_1, box_2):
 79 |     # box_1: (..., (x1, y1, x2, y2))
 80 |     # box_2: (N, (x1, y1, x2, y2))
 81 | 
 82 |     # broadcast boxes
 83 |     box_1 = tf.expand_dims(box_1, -2)
 84 |     box_2 = tf.expand_dims(box_2, 0)
 85 |     # new_shape: (..., N, (x1, y1, x2, y2))
 86 |     new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2))
 87 |     box_1 = tf.broadcast_to(box_1, new_shape)
 88 |     box_2 = tf.broadcast_to(box_2, new_shape)
 89 | 
 90 |     int_w = tf.maximum(tf.minimum(box_1[..., 2], box_2[..., 2]) -
 91 |                        tf.maximum(box_1[..., 0], box_2[..., 0]), 0)
 92 |     int_h = tf.maximum(tf.minimum(box_1[..., 3], box_2[..., 3]) -
 93 |                        tf.maximum(box_1[..., 1], box_2[..., 1]), 0)
 94 |     int_area = int_w * int_h
 95 |     box_1_area = (box_1[..., 2] - box_1[..., 0]) * \
 96 |         (box_1[..., 3] - box_1[..., 1])
 97 |     box_2_area = (box_2[..., 2] - box_2[..., 0]) * \
 98 |         (box_2[..., 3] - box_2[..., 1])
 99 |     return int_area / (box_1_area + box_2_area - int_area)
100 | 
101 | 
102 | def draw_outputs(img, outputs, class_names):
103 |     boxes, objectness, classes, nums = outputs
104 |     boxes, objectness, classes, nums = boxes[0], objectness[0], classes[0], nums[0]
105 |     wh = np.flip(img.shape[0:2])
106 |     for i in range(nums):
107 |         x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
108 |         x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
109 |         img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
110 |         img = cv2.putText(img, '{} {:.4f}'.format(
111 |             class_names[int(classes[i])], objectness[i]),
112 |             x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2)
113 |     return img
114 | 
115 | 
116 | def draw_labels(x, y, class_names):
117 |     img = x.numpy()
118 |     boxes, classes = tf.split(y, (4, 1), axis=-1)
119 |     classes = classes[..., 0]
120 |     wh = np.flip(img.shape[0:2])
121 |     for i in range(len(boxes)):
122 |         x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32))
123 |         x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32))
124 |         img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2)
125 |         img = cv2.putText(img, class_names[classes[i]],
126 |                           x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL,
127 |                           1, (0, 0, 255), 2)
128 |     return img
129 | 
130 | 
131 | def freeze_all(model, frozen=True):
132 |     model.trainable = not frozen
133 |     if isinstance(model, tf.keras.Model):
134 |         for l in model.layers:
135 |             freeze_all(l, frozen)
136 | 
137 | 
138 | def convert_boxes(image, boxes, scores):
139 |     returned_boxes = []
140 |     for box, score in zip(boxes, scores):
141 |         if score == 0.0: continue
142 |         box[0] = (box[0] * image.shape[1]).astype(int)
143 |         box[1] = (box[1] * image.shape[0]).astype(int)
144 |         box[2] = (box[2] * image.shape[1]).astype(int)
145 |         box[3] = (box[3] * image.shape[0]).astype(int)
146 |         box[2] = int(box[2]-box[0])
147 |         box[3] = int(box[3]-box[1])
148 |         box = box.astype(int)
149 |         box = box.tolist()
150 |         if box != [0,0,0,0]:
151 |             returned_boxes.append(box)
152 |     return returned_boxes


--------------------------------------------------------------------------------