├── .gitignore ├── .vscode ├── launch.json └── settings.json ├── README.md ├── conda-cpu.yml ├── conda-gpu.yml ├── convert.py ├── deep_sort ├── __init__.py ├── detection.py ├── iou_matching.py ├── kalman_filter.py ├── linear_assignment.py ├── nn_matching.py ├── preprocessing.py ├── track.py └── tracker.py ├── deepface ├── DeepFace.py ├── __init__.py ├── basemodels │ ├── ArcFace.py │ ├── Boosting.py │ ├── DeepID.py │ ├── DlibResNet.py │ ├── DlibWrapper.py │ ├── Facenet.py │ ├── FbDeepFace.py │ ├── OpenFace.py │ ├── VGGFace.py │ └── __init__.py ├── commons │ ├── __init__.py │ ├── distance.py │ ├── functions.py │ └── realtime.py ├── detectors │ ├── DlibWrapper.py │ ├── FaceDetector.py │ ├── MtcnnWrapper.py │ ├── OpenCvWrapper.py │ ├── RetinaFaceWrapper.py │ ├── SsdWrapper.py │ └── __init__.py ├── extendedmodels │ ├── Age.py │ ├── Emotion.py │ ├── Gender.py │ ├── Race.py │ └── __init__.py └── models │ ├── __init__.py │ └── face-recognition-ensemble-model.txt ├── detection.txt ├── evaluation.py ├── generate_face.py ├── object_tracker copy.py ├── object_tracker.py ├── requirements.txt ├── resources ├── database │ ├── 1 │ │ └── ironman │ │ │ ├── 1.jpg │ │ │ ├── 2.jpg │ │ │ └── 3.jpg │ └── 2 │ │ ├── chimchakman │ │ ├── 1.jpg │ │ └── 2.jpg │ │ ├── juhomin │ │ ├── 1.jpg │ │ └── 2.jpg │ │ └── kimpoong │ │ ├── 1.jpg │ │ └── 2.jpg ├── fonts │ └── futur.ttf └── gt │ ├── Apink_gt.xml │ ├── BrunoMars_gt.xml │ ├── Darling_gt.xml │ ├── GirlsAloud_gt.xml │ ├── HelloBubble_gt.xml │ ├── README.txt │ └── Westlife_gt.xml ├── test.md ├── tools ├── freeze_model.py └── generate_detections.py ├── xml2txt.py └── yolov3_tf2 ├── __init__.py ├── dataset.py ├── models.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.h5 2 | *.weights 3 | *.tar 4 | *.tfrecord 5 | /checkpoints/* 6 | /serving/* 7 | /logs/ 8 | /Untitled.ipynb 9 | /output.jpg 10 | /data/voc2012_raw/ 11 | 12 | # Created by https://www.gitignore.io/api/python 13 | # Edit at https://www.gitignore.io/?templates=python 14 | 15 | ### Python ### 16 | # Byte-compiled / optimized / DLL files 17 | __pycache__/ 18 | *.py[cod] 19 | *$py.class 20 | 21 | # C extensions 22 | *.so 23 | 24 | # Distribution / packaging 25 | .Python 26 | build/ 27 | develop-eggs/ 28 | dist/ 29 | downloads/ 30 | eggs/ 31 | .eggs/ 32 | lib/ 33 | lib64/ 34 | parts/ 35 | sdist/ 36 | var/ 37 | wheels/ 38 | pip-wheel-metadata/ 39 | share/python-wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | MANIFEST 44 | 45 | # PyInstaller 46 | # Usually these files are written by a python script from a template 47 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 48 | *.manifest 49 | *.spec 50 | 51 | # Installer logs 52 | pip-log.txt 53 | pip-delete-this-directory.txt 54 | 55 | # Unit test / coverage reports 56 | htmlcov/ 57 | .tox/ 58 | .nox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | .hypothesis/ 66 | .pytest_cache/ 67 | 68 | # Translations 69 | *.mo 70 | *.pot 71 | 72 | # Django stuff: 73 | *.log 74 | local_settings.py 75 | db.sqlite3 76 | 77 | # Flask stuff: 78 | instance/ 79 | .webassets-cache 80 | 81 | # Scrapy stuff: 82 | .scrapy 83 | 84 | # Sphinx documentation 85 | docs/_build/ 86 | 87 | # PyBuilder 88 | target/ 89 | 90 | # Jupyter Notebook 91 | .ipynb_checkpoints 92 | 93 | # IPython 94 | profile_default/ 95 | ipython_config.py 96 | 97 | # pyenv 98 | .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don’t work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # celery beat schedule file 108 | celerybeat-schedule 109 | 110 | # SageMath parsed files 111 | *.sage.py 112 | 113 | # Environments 114 | .env 115 | .venv 116 | env/ 117 | venv/ 118 | ENV/ 119 | env.bak/ 120 | venv.bak/ 121 | 122 | # Spyder project settings 123 | .spyderproject 124 | .spyproject 125 | 126 | # Rope project settings 127 | .ropeproject 128 | 129 | # mkdocs documentation 130 | /site 131 | 132 | # mypy 133 | .mypy_cache/ 134 | .dmypy.json 135 | dmypy.json 136 | 137 | # Pyre type checker 138 | .pyre/ 139 | 140 | # End of https://www.gitignore.io/api/python 141 | 142 | # video 143 | resources/video 144 | *.mp4 145 | *.avi 146 | *.mov 147 | 148 | # weight 149 | model_data/ 150 | weights/ 151 | resources/ -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Current File", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal", 13 | "args": [ 14 | "--classes", "./model_data/labels/widerface.names", 15 | "--video", "./resources/video/in/T-ara.mov", 16 | "--weights", "./weights/yolov3-wider_16000.tf", 17 | "--output_format", "MP4V", 18 | "--database", "./resources/database/T-ara", 19 | "--output", "./resources/video/out/T-ara.mp4", 20 | "--num_classes", "1", 21 | "--max_face_threshold", "0.68", 22 | "--eval", "./resources/gt/T-ara_pred.txt", 23 | ] 24 | } 25 | ] 26 | } -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.pythonPath": "C:\\Users\\choiwansik\\anaconda3\\envs\\face_tracker\\python.exe" 3 | } -------------------------------------------------------------------------------- /conda-cpu.yml: -------------------------------------------------------------------------------- 1 | name: tracker-cpu 2 | 3 | dependencies: 4 | - python==3.7 5 | - pip 6 | - matplotlib 7 | - opencv 8 | - pip: 9 | - tensorflow==2.4.1 10 | - lxml 11 | - tqdm 12 | - seaborn 13 | - pillow 14 | -------------------------------------------------------------------------------- /conda-gpu.yml: -------------------------------------------------------------------------------- 1 | name: yolov3-tf2-gpu 2 | 3 | dependencies: 4 | - python==3.7 5 | - pip 6 | - matplotlib 7 | - opencv 8 | - cudnn 9 | - cudatoolkit==10.1.243 10 | - pip: 11 | - tensorflow==2.4.1 12 | - lxml 13 | - tqdm 14 | - -e . 15 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | from absl import app, flags, logging 2 | from absl.flags import FLAGS 3 | import numpy as np 4 | from yolov3_tf2.models import YoloV3, YoloV3Tiny 5 | from yolov3_tf2.utils import load_darknet_weights 6 | import tensorflow as tf 7 | 8 | flags.DEFINE_string('weights', './weights/yolov3-wider_16000.weights', 'path to weights file') 9 | flags.DEFINE_string('output', './weights/yolov3-wider_16000.tf', 'path to output') 10 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny') 11 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model') 12 | 13 | 14 | def main(_argv): 15 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 16 | if len(physical_devices) > 0: 17 | tf.config.experimental.set_memory_growth(physical_devices[0], True) 18 | 19 | if FLAGS.tiny: 20 | yolo = YoloV3Tiny(classes=FLAGS.num_classes) 21 | else: 22 | yolo = YoloV3(classes=FLAGS.num_classes) 23 | yolo.summary() 24 | logging.info('model created') 25 | 26 | load_darknet_weights(yolo, FLAGS.weights, FLAGS.tiny) 27 | logging.info('weights loaded') 28 | 29 | img = np.random.random((1, 320, 320, 3)).astype(np.float32) 30 | output = yolo(img) 31 | logging.info('sanity check passed') 32 | 33 | yolo.save_weights(FLAGS.output) 34 | logging.info('weights saved') 35 | 36 | 37 | if __name__ == '__main__': 38 | try: 39 | app.run(main) 40 | except SystemExit: 41 | pass 42 | 43 | """ 44 | python convert.py --weights ./weights/yolov3-wider_16000.weights \ 45 | --output ./weights/yolov3-wider_16000.tf \ 46 | --num_classes 1 47 | """ -------------------------------------------------------------------------------- /deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /deep_sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | class_name : ndarray 25 | Detector class. 26 | feature : ndarray | NoneType 27 | A feature vector that describes the object contained in this image. 28 | 29 | """ 30 | 31 | def __init__(self, tlwh, confidence, class_name, feature): 32 | self.tlwh = np.asarray(tlwh, dtype=np.float) 33 | self.confidence = float(confidence) 34 | self.class_name = class_name 35 | self.feature = np.asarray(feature, dtype=np.float32) 36 | 37 | def get_class(self): 38 | return self.class_name 39 | 40 | def to_tlbr(self): 41 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 42 | `(top left, bottom right)`. 43 | """ 44 | ret = self.tlwh.copy() 45 | ret[2:] += ret[:2] 46 | return ret 47 | 48 | def to_xyah(self): 49 | """Convert bounding box to format `(center x, center y, aspect ratio, 50 | height)`, where the aspect ratio is `width / height`. 51 | """ 52 | ret = self.tlwh.copy() 53 | ret[:2] += ret[2:] / 2 54 | ret[2] /= ret[3] 55 | return ret 56 | -------------------------------------------------------------------------------- /deep_sort/iou_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import linear_assignment 5 | 6 | 7 | def iou(bbox, candidates): 8 | """Computer intersection over union. 9 | 10 | Parameters 11 | ---------- 12 | bbox : ndarray 13 | A bounding box in format `(top left x, top left y, width, height)`. 14 | candidates : ndarray 15 | A matrix of candidate bounding boxes (one per row) in the same format 16 | as `bbox`. 17 | 18 | Returns 19 | ------- 20 | ndarray 21 | The intersection over union in [0, 1] between the `bbox` and each 22 | candidate. A higher score means a larger fraction of the `bbox` is 23 | occluded by the candidate. 24 | 25 | """ 26 | bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:] 27 | candidates_tl = candidates[:, :2] 28 | candidates_br = candidates[:, :2] + candidates[:, 2:] 29 | 30 | tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], 31 | np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] 32 | br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], 33 | np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] 34 | wh = np.maximum(0., br - tl) 35 | 36 | area_intersection = wh.prod(axis=1) 37 | area_bbox = bbox[2:].prod() 38 | area_candidates = candidates[:, 2:].prod(axis=1) 39 | return area_intersection / (area_bbox + area_candidates - area_intersection) 40 | 41 | 42 | def iou_cost(tracks, detections, track_indices=None, 43 | detection_indices=None): 44 | """An intersection over union distance metric. 45 | 46 | Parameters 47 | ---------- 48 | tracks : List[deep_sort.track.Track] 49 | A list of tracks. 50 | detections : List[deep_sort.detection.Detection] 51 | A list of detections. 52 | track_indices : Optional[List[int]] 53 | A list of indices to tracks that should be matched. Defaults to 54 | all `tracks`. 55 | detection_indices : Optional[List[int]] 56 | A list of indices to detections that should be matched. Defaults 57 | to all `detections`. 58 | 59 | Returns 60 | ------- 61 | ndarray 62 | Returns a cost matrix of shape 63 | len(track_indices), len(detection_indices) where entry (i, j) is 64 | `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. 65 | 66 | """ 67 | if track_indices is None: 68 | track_indices = np.arange(len(tracks)) 69 | if detection_indices is None: 70 | detection_indices = np.arange(len(detections)) 71 | 72 | cost_matrix = np.zeros((len(track_indices), len(detection_indices))) 73 | for row, track_idx in enumerate(track_indices): 74 | if tracks[track_idx].time_since_update > 1: 75 | cost_matrix[row, :] = linear_assignment.INFTY_COST 76 | continue 77 | 78 | bbox = tracks[track_idx].to_tlwh() 79 | candidates = np.asarray([detections[i].tlwh for i in detection_indices]) 80 | cost_matrix[row, :] = 1. - iou(bbox, candidates) 81 | return cost_matrix 82 | -------------------------------------------------------------------------------- /deep_sort/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | 6 | """ 7 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 8 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 9 | function and used as Mahalanobis gating threshold. 10 | """ 11 | chi2inv95 = { 12 | 1: 3.8415, 13 | 2: 5.9915, 14 | 3: 7.8147, 15 | 4: 9.4877, 16 | 5: 11.070, 17 | 6: 12.592, 18 | 7: 14.067, 19 | 8: 15.507, 20 | 9: 16.919} 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1. 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1. / 20 53 | self._std_weight_velocity = 1. / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3]] 85 | covariance = np.diag(np.square(std)) 86 | return mean, covariance 87 | 88 | def predict(self, mean, covariance): 89 | """Run Kalman filter prediction step. 90 | 91 | Parameters 92 | ---------- 93 | mean : ndarray 94 | The 8 dimensional mean vector of the object state at the previous 95 | time step. 96 | covariance : ndarray 97 | The 8x8 dimensional covariance matrix of the object state at the 98 | previous time step. 99 | 100 | Returns 101 | ------- 102 | (ndarray, ndarray) 103 | Returns the mean vector and covariance matrix of the predicted 104 | state. Unobserved velocities are initialized to 0 mean. 105 | 106 | """ 107 | std_pos = [ 108 | self._std_weight_position * mean[3], 109 | self._std_weight_position * mean[3], 110 | 1e-2, 111 | self._std_weight_position * mean[3]] 112 | std_vel = [ 113 | self._std_weight_velocity * mean[3], 114 | self._std_weight_velocity * mean[3], 115 | 1e-5, 116 | self._std_weight_velocity * mean[3]] 117 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 118 | 119 | mean = np.dot(self._motion_mat, mean) 120 | covariance = np.linalg.multi_dot(( 121 | self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 122 | 123 | return mean, covariance 124 | 125 | def project(self, mean, covariance): 126 | """Project state distribution to measurement space. 127 | 128 | Parameters 129 | ---------- 130 | mean : ndarray 131 | The state's mean vector (8 dimensional array). 132 | covariance : ndarray 133 | The state's covariance matrix (8x8 dimensional). 134 | 135 | Returns 136 | ------- 137 | (ndarray, ndarray) 138 | Returns the projected mean and covariance matrix of the given state 139 | estimate. 140 | 141 | """ 142 | std = [ 143 | self._std_weight_position * mean[3], 144 | self._std_weight_position * mean[3], 145 | 1e-1, 146 | self._std_weight_position * mean[3]] 147 | innovation_cov = np.diag(np.square(std)) 148 | 149 | mean = np.dot(self._update_mat, mean) 150 | covariance = np.linalg.multi_dot(( 151 | self._update_mat, covariance, self._update_mat.T)) 152 | return mean, covariance + innovation_cov 153 | 154 | def update(self, mean, covariance, measurement): 155 | """Run Kalman filter correction step. 156 | 157 | Parameters 158 | ---------- 159 | mean : ndarray 160 | The predicted state's mean vector (8 dimensional). 161 | covariance : ndarray 162 | The state's covariance matrix (8x8 dimensional). 163 | measurement : ndarray 164 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 165 | is the center position, a the aspect ratio, and h the height of the 166 | bounding box. 167 | 168 | Returns 169 | ------- 170 | (ndarray, ndarray) 171 | Returns the measurement-corrected state distribution. 172 | 173 | """ 174 | projected_mean, projected_cov = self.project(mean, covariance) 175 | 176 | chol_factor, lower = scipy.linalg.cho_factor( 177 | projected_cov, lower=True, check_finite=False) 178 | kalman_gain = scipy.linalg.cho_solve( 179 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 180 | check_finite=False).T 181 | innovation = measurement - projected_mean 182 | 183 | new_mean = mean + np.dot(innovation, kalman_gain.T) 184 | new_covariance = covariance - np.linalg.multi_dot(( 185 | kalman_gain, projected_cov, kalman_gain.T)) 186 | return new_mean, new_covariance 187 | 188 | def gating_distance(self, mean, covariance, measurements, 189 | only_position=False): 190 | """Compute gating distance between state distribution and measurements. 191 | 192 | A suitable distance threshold can be obtained from `chi2inv95`. If 193 | `only_position` is False, the chi-square distribution has 4 degrees of 194 | freedom, otherwise 2. 195 | 196 | Parameters 197 | ---------- 198 | mean : ndarray 199 | Mean vector over the state distribution (8 dimensional). 200 | covariance : ndarray 201 | Covariance of the state distribution (8x8 dimensional). 202 | measurements : ndarray 203 | An Nx4 dimensional matrix of N measurements, each in 204 | format (x, y, a, h) where (x, y) is the bounding box center 205 | position, a the aspect ratio, and h the height. 206 | only_position : Optional[bool] 207 | If True, distance computation is done with respect to the bounding 208 | box center position only. 209 | 210 | Returns 211 | ------- 212 | ndarray 213 | Returns an array of length N, where the i-th element contains the 214 | squared Mahalanobis distance between (mean, covariance) and 215 | `measurements[i]`. 216 | 217 | """ 218 | mean, covariance = self.project(mean, covariance) 219 | if only_position: 220 | mean, covariance = mean[:2], covariance[:2, :2] 221 | measurements = measurements[:, :2] 222 | 223 | cholesky_factor = np.linalg.cholesky(covariance) 224 | d = measurements - mean 225 | z = scipy.linalg.solve_triangular( 226 | cholesky_factor, d.T, lower=True, check_finite=False, 227 | overwrite_b=True) 228 | squared_maha = np.sum(z * z, axis=0) 229 | return squared_maha 230 | -------------------------------------------------------------------------------- /deep_sort/linear_assignment.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from scipy.optimize import linear_sum_assignment 5 | from . import kalman_filter 6 | 7 | 8 | INFTY_COST = 1e+5 9 | 10 | 11 | def min_cost_matching( 12 | distance_metric, max_distance, tracks, detections, track_indices=None, 13 | detection_indices=None): 14 | """Solve linear assignment problem. 15 | 16 | Parameters 17 | ---------- 18 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 19 | The distance metric is given a list of tracks and detections as well as 20 | a list of N track indices and M detection indices. The metric should 21 | return the NxM dimensional cost matrix, where element (i, j) is the 22 | association cost between the i-th track in the given track indices and 23 | the j-th detection in the given detection_indices. 24 | max_distance : float 25 | Gating threshold. Associations with cost larger than this value are 26 | disregarded. 27 | tracks : List[track.Track] 28 | A list of predicted tracks at the current time step. 29 | detections : List[detection.Detection] 30 | A list of detections at the current time step. 31 | track_indices : List[int] 32 | List of track indices that maps rows in `cost_matrix` to tracks in 33 | `tracks` (see description above). 34 | detection_indices : List[int] 35 | List of detection indices that maps columns in `cost_matrix` to 36 | detections in `detections` (see description above). 37 | 38 | Returns 39 | ------- 40 | (List[(int, int)], List[int], List[int]) 41 | Returns a tuple with the following three entries: 42 | * A list of matched track and detection indices. 43 | * A list of unmatched track indices. 44 | * A list of unmatched detection indices. 45 | 46 | """ 47 | if track_indices is None: 48 | track_indices = np.arange(len(tracks)) 49 | if detection_indices is None: 50 | detection_indices = np.arange(len(detections)) 51 | 52 | if len(detection_indices) == 0 or len(track_indices) == 0: 53 | return [], track_indices, detection_indices # Nothing to match. 54 | 55 | cost_matrix = distance_metric( 56 | tracks, detections, track_indices, detection_indices) 57 | cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 58 | indices = linear_sum_assignment(cost_matrix) 59 | indices = np.asarray(indices) 60 | indices = np.transpose(indices) 61 | matches, unmatched_tracks, unmatched_detections = [], [], [] 62 | for col, detection_idx in enumerate(detection_indices): 63 | if col not in indices[:, 1]: 64 | unmatched_detections.append(detection_idx) 65 | for row, track_idx in enumerate(track_indices): 66 | if row not in indices[:, 0]: 67 | unmatched_tracks.append(track_idx) 68 | for row, col in indices: 69 | track_idx = track_indices[row] 70 | detection_idx = detection_indices[col] 71 | if cost_matrix[row, col] > max_distance: 72 | unmatched_tracks.append(track_idx) 73 | unmatched_detections.append(detection_idx) 74 | else: 75 | matches.append((track_idx, detection_idx)) 76 | return matches, unmatched_tracks, unmatched_detections 77 | 78 | 79 | def matching_cascade( 80 | distance_metric, max_distance, cascade_depth, tracks, detections, 81 | track_indices=None, detection_indices=None): 82 | """Run matching cascade. 83 | 84 | Parameters 85 | ---------- 86 | distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray 87 | The distance metric is given a list of tracks and detections as well as 88 | a list of N track indices and M detection indices. The metric should 89 | return the NxM dimensional cost matrix, where element (i, j) is the 90 | association cost between the i-th track in the given track indices and 91 | the j-th detection in the given detection indices. 92 | max_distance : float 93 | Gating threshold. Associations with cost larger than this value are 94 | disregarded. 95 | cascade_depth: int 96 | The cascade depth, should be se to the maximum track age. 97 | tracks : List[track.Track] 98 | A list of predicted tracks at the current time step. 99 | detections : List[detection.Detection] 100 | A list of detections at the current time step. 101 | track_indices : Optional[List[int]] 102 | List of track indices that maps rows in `cost_matrix` to tracks in 103 | `tracks` (see description above). Defaults to all tracks. 104 | detection_indices : Optional[List[int]] 105 | List of detection indices that maps columns in `cost_matrix` to 106 | detections in `detections` (see description above). Defaults to all 107 | detections. 108 | 109 | Returns 110 | ------- 111 | (List[(int, int)], List[int], List[int]) 112 | Returns a tuple with the following three entries: 113 | * A list of matched track and detection indices. 114 | * A list of unmatched track indices. 115 | * A list of unmatched detection indices. 116 | 117 | """ 118 | if track_indices is None: 119 | track_indices = list(range(len(tracks))) 120 | if detection_indices is None: 121 | detection_indices = list(range(len(detections))) 122 | 123 | unmatched_detections = detection_indices 124 | matches = [] 125 | for level in range(cascade_depth): 126 | if len(unmatched_detections) == 0: # No detections left 127 | break 128 | 129 | track_indices_l = [ 130 | k for k in track_indices 131 | if tracks[k].time_since_update == 1 + level 132 | ] 133 | if len(track_indices_l) == 0: # Nothing to match at this level 134 | continue 135 | 136 | matches_l, _, unmatched_detections = \ 137 | min_cost_matching( 138 | distance_metric, max_distance, tracks, detections, 139 | track_indices_l, unmatched_detections) 140 | matches += matches_l 141 | unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) 142 | return matches, unmatched_tracks, unmatched_detections 143 | 144 | 145 | def gate_cost_matrix( 146 | kf, cost_matrix, tracks, detections, track_indices, detection_indices, 147 | gated_cost=INFTY_COST, only_position=False): 148 | """Invalidate infeasible entries in cost matrix based on the state 149 | distributions obtained by Kalman filtering. 150 | 151 | Parameters 152 | ---------- 153 | kf : The Kalman filter. 154 | cost_matrix : ndarray 155 | The NxM dimensional cost matrix, where N is the number of track indices 156 | and M is the number of detection indices, such that entry (i, j) is the 157 | association cost between `tracks[track_indices[i]]` and 158 | `detections[detection_indices[j]]`. 159 | tracks : List[track.Track] 160 | A list of predicted tracks at the current time step. 161 | detections : List[detection.Detection] 162 | A list of detections at the current time step. 163 | track_indices : List[int] 164 | List of track indices that maps rows in `cost_matrix` to tracks in 165 | `tracks` (see description above). 166 | detection_indices : List[int] 167 | List of detection indices that maps columns in `cost_matrix` to 168 | detections in `detections` (see description above). 169 | gated_cost : Optional[float] 170 | Entries in the cost matrix corresponding to infeasible associations are 171 | set this value. Defaults to a very large value. 172 | only_position : Optional[bool] 173 | If True, only the x, y position of the state distribution is considered 174 | during gating. Defaults to False. 175 | 176 | Returns 177 | ------- 178 | ndarray 179 | Returns the modified cost matrix. 180 | 181 | """ 182 | gating_dim = 2 if only_position else 4 183 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 184 | measurements = np.asarray( 185 | [detections[i].to_xyah() for i in detection_indices]) 186 | for row, track_idx in enumerate(track_indices): 187 | track = tracks[track_idx] 188 | gating_distance = kf.gating_distance( 189 | track.mean, track.covariance, measurements, only_position) 190 | cost_matrix[row, gating_distance > gating_threshold] = gated_cost 191 | return cost_matrix 192 | -------------------------------------------------------------------------------- /deep_sort/nn_matching.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | def _pdist(a, b): 6 | """Compute pair-wise squared distance between points in `a` and `b`. 7 | 8 | Parameters 9 | ---------- 10 | a : array_like 11 | An NxM matrix of N samples of dimensionality M. 12 | b : array_like 13 | An LxM matrix of L samples of dimensionality M. 14 | 15 | Returns 16 | ------- 17 | ndarray 18 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 19 | contains the squared distance between `a[i]` and `b[j]`. 20 | 21 | """ 22 | a, b = np.asarray(a), np.asarray(b) 23 | if len(a) == 0 or len(b) == 0: 24 | return np.zeros((len(a), len(b))) 25 | a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1) 26 | r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :] 27 | r2 = np.clip(r2, 0., float(np.inf)) 28 | return r2 29 | 30 | 31 | def _cosine_distance(a, b, data_is_normalized=False): 32 | """Compute pair-wise cosine distance between points in `a` and `b`. 33 | 34 | Parameters 35 | ---------- 36 | a : array_like 37 | An NxM matrix of N samples of dimensionality M. 38 | b : array_like 39 | An LxM matrix of L samples of dimensionality M. 40 | data_is_normalized : Optional[bool] 41 | If True, assumes rows in a and b are unit length vectors. 42 | Otherwise, a and b are explicitly normalized to lenght 1. 43 | 44 | Returns 45 | ------- 46 | ndarray 47 | Returns a matrix of size len(a), len(b) such that eleement (i, j) 48 | contains the squared distance between `a[i]` and `b[j]`. 49 | 50 | """ 51 | if not data_is_normalized: 52 | a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 53 | b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 54 | return 1. - np.dot(a, b.T) 55 | 56 | 57 | def _nn_euclidean_distance(x, y): 58 | """ Helper function for nearest neighbor distance metric (Euclidean). 59 | 60 | Parameters 61 | ---------- 62 | x : ndarray 63 | A matrix of N row-vectors (sample points). 64 | y : ndarray 65 | A matrix of M row-vectors (query points). 66 | 67 | Returns 68 | ------- 69 | ndarray 70 | A vector of length M that contains for each entry in `y` the 71 | smallest Euclidean distance to a sample in `x`. 72 | 73 | """ 74 | distances = _pdist(x, y) 75 | return np.maximum(0.0, distances.min(axis=0)) 76 | 77 | 78 | def _nn_cosine_distance(x, y): 79 | """ Helper function for nearest neighbor distance metric (cosine). 80 | 81 | Parameters 82 | ---------- 83 | x : ndarray 84 | A matrix of N row-vectors (sample points). 85 | y : ndarray 86 | A matrix of M row-vectors (query points). 87 | 88 | Returns 89 | ------- 90 | ndarray 91 | A vector of length M that contains for each entry in `y` the 92 | smallest cosine distance to a sample in `x`. 93 | 94 | """ 95 | # distances = _cosine_distance(x, y) # 변화 준부분 96 | distances = custom_cosine_similarity(x, y) 97 | return distances.min(axis=0) 98 | 99 | 100 | class NearestNeighborDistanceMetric(object): 101 | """ 102 | A nearest neighbor distance metric that, for each target, returns 103 | the closest distance to any sample that has been observed so far. 104 | 105 | Parameters 106 | ---------- 107 | metric : str 108 | Either "euclidean" or "cosine". 109 | matching_threshold: float 110 | The matching threshold. Samples with larger distance are considered an 111 | invalid match. 112 | budget : Optional[int] 113 | If not None, fix samples per class to at most this number. Removes 114 | the oldest samples when the budget is reached. 115 | 116 | Attributes 117 | ---------- 118 | samples : Dict[int -> List[ndarray]] 119 | A dictionary that maps from target identities to the list of samples 120 | that have been observed so far. 121 | 122 | """ 123 | 124 | def __init__(self, metric, matching_threshold, budget=None): 125 | 126 | 127 | if metric == "euclidean": 128 | self._metric = _nn_euclidean_distance 129 | elif metric == "cosine": 130 | self._metric = _nn_cosine_distance 131 | else: 132 | raise ValueError( 133 | "Invalid metric; must be either 'euclidean' or 'cosine'") 134 | self.matching_threshold = matching_threshold 135 | self.budget = budget 136 | self.samples = {} 137 | 138 | def partial_fit(self, features, targets, active_targets): 139 | """Update the distance metric with new data. 140 | 141 | Parameters 142 | ---------- 143 | features : ndarray 144 | An NxM matrix of N features of dimensionality M. 145 | targets : ndarray 146 | An integer array of associated target identities. 147 | active_targets : List[int] 148 | A list of targets that are currently present in the scene. 149 | 150 | """ 151 | for feature, target in zip(features, targets): 152 | self.samples.setdefault(target, []).append(feature) 153 | if self.budget is not None: 154 | self.samples[target] = self.samples[target][-self.budget:] 155 | self.samples = {k: self.samples[k] for k in active_targets} 156 | 157 | def distance(self, features, targets): 158 | """Compute distance between features and targets. 159 | 160 | Parameters 161 | ---------- 162 | features : ndarray 163 | An NxM matrix of N features of dimensionality M. 164 | targets : List[int] 165 | A list of targets to match the given `features` against. 166 | 167 | Returns 168 | ------- 169 | ndarray 170 | Returns a cost matrix of shape len(targets), len(features), where 171 | element (i, j) contains the closest squared distance between 172 | `targets[i]` and `features[j]`. 173 | 174 | """ 175 | cost_matrix = np.zeros((len(targets), len(features))) 176 | for i, target in enumerate(targets): 177 | cost_matrix[i, :] = self._metric(self.samples[target], features) 178 | return cost_matrix 179 | 180 | def custom_cosine_similarity(a, b): 181 | 182 | # if not data_is_normalized: 183 | # a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True) 184 | # b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True) 185 | # return 1. - np.dot(a, b.T) 186 | # [[]] [[]] 187 | 188 | aa = np.linalg.norm(a, axis=1, keepdims=True) 189 | bb = np.linalg.norm(b, axis=1, keepdims=True) 190 | norm_mat = np.dot(aa, bb.T) 191 | return 1. - (np.dot(a, b.T) / norm_mat) 192 | 193 | 194 | # a = np.matmul(np.transpose(source_representation), test_representation) 195 | # b = np.sum(np.multiply(source_representation, source_representation)) 196 | # c = np.sum(np.multiply(test_representation, test_representation)) 197 | # return 1 - (a / (np.sqrt(b) * np.sqrt(c))) -------------------------------------------------------------------------------- /deep_sort/preprocessing.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def non_max_suppression(boxes, classes, max_bbox_overlap, scores=None): 7 | """Suppress overlapping detections. 8 | 9 | Original code from [1]_ has been adapted to include confidence score. 10 | 11 | .. [1] http://www.pyimagesearch.com/2015/02/16/ 12 | faster-non-maximum-suppression-python/ 13 | 14 | Examples 15 | -------- 16 | 17 | >>> boxes = [d.roi for d in detections] 18 | >>> classes = [d.classes for d in detections] 19 | >>> scores = [d.confidence for d in detections] 20 | >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores) 21 | >>> detections = [detections[i] for i in indices] 22 | 23 | Parameters 24 | ---------- 25 | boxes : ndarray 26 | Array of ROIs (x, y, width, height). 27 | max_bbox_overlap : float 28 | ROIs that overlap more than this values are suppressed. 29 | scores : Optional[array_like] 30 | Detector confidence score. 31 | 32 | Returns 33 | ------- 34 | List[int] 35 | Returns indices of detections that have survived non-maxima suppression. 36 | 37 | """ 38 | if len(boxes) == 0: 39 | return [] 40 | 41 | boxes = boxes.astype(np.float) 42 | pick = [] 43 | 44 | x1 = boxes[:, 0] 45 | y1 = boxes[:, 1] 46 | x2 = boxes[:, 2] + boxes[:, 0] 47 | y2 = boxes[:, 3] + boxes[:, 1] 48 | 49 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 50 | if scores is not None: 51 | idxs = np.argsort(scores) 52 | else: 53 | idxs = np.argsort(y2) 54 | 55 | while len(idxs) > 0: 56 | last = len(idxs) - 1 57 | i = idxs[last] 58 | pick.append(i) 59 | 60 | xx1 = np.maximum(x1[i], x1[idxs[:last]]) 61 | yy1 = np.maximum(y1[i], y1[idxs[:last]]) 62 | xx2 = np.minimum(x2[i], x2[idxs[:last]]) 63 | yy2 = np.minimum(y2[i], y2[idxs[:last]]) 64 | 65 | w = np.maximum(0, xx2 - xx1 + 1) 66 | h = np.maximum(0, yy2 - yy1 + 1) 67 | 68 | overlap = (w * h) / area[idxs[:last]] 69 | 70 | idxs = np.delete( 71 | idxs, np.concatenate( 72 | ([last], np.where(overlap > max_bbox_overlap)[0]))) 73 | 74 | return pick 75 | -------------------------------------------------------------------------------- /deep_sort/track.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from .nn_matching import _cosine_distance, custom_cosine_similarity 3 | import statistics as st 4 | import numpy as np 5 | import time 6 | import sys 7 | import os 8 | sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__)))) 9 | from deepface.commons import functions, distance as dst 10 | 11 | def find_face(features, face_db, max_face_threshold): 12 | face_score = dict() 13 | for face in face_db: 14 | if face_db[face]["used"]: 15 | face_score[face] = 1 16 | continue 17 | cos_harmonic = [] 18 | # cos_mat = _cosine_distance(features, face_db[face]["db"], data_is_normalized=True) 19 | cos_mat = custom_cosine_similarity(features, face_db[face]["db"]) 20 | # print(cos_mat) 21 | 22 | # time.sleep(7) 23 | 24 | # for f in face_db[face]: 25 | # cos_harmonic.append(_nn_cosine_distance(feature, f)) 26 | # cos_harmonic = st.harmonic_mean(list(cos_mat.flatten())) 27 | # cos_harmonic = np.mean(cos_mat.flatten()) 28 | # face_score[face] = cos_harmonic 29 | 30 | # distance = dst.findCosineDistance(features[0], face_db[face]["db"][0]) 31 | # print(distance) 32 | 33 | face_score[face] = cos_mat.min(axis=1) 34 | # print(face_score) 35 | print(face_score) 36 | ans_face = min(face_score,key=face_score.get) 37 | 38 | # print(ans_face, face_score) 39 | 40 | # time.sleep(5) 41 | 42 | if face_score[ans_face] < max_face_threshold: 43 | face_db[ans_face]["used"] = True 44 | return ans_face 45 | else: 46 | return "" 47 | 48 | class TrackState: 49 | """ 50 | Enumeration type for the single target track state. Newly created tracks are 51 | classified as `tentative` until enough evidence has been collected. Then, 52 | the track state is changed to `confirmed`. Tracks that are no longer alive 53 | are classified as `deleted` to mark them for removal from the set of active 54 | tracks. 55 | 56 | """ 57 | 58 | Tentative = 1 59 | Confirmed = 2 60 | Deleted = 3 61 | 62 | 63 | class Track: 64 | """ 65 | A single target track with state space `(x, y, a, h)` and associated 66 | velocities, where `(x, y)` is the center of the bounding box, `a` is the 67 | aspect ratio and `h` is the height. 68 | 69 | Parameters 70 | ---------- 71 | mean : ndarray 72 | Mean vector of the initial state distribution. 73 | covariance : ndarray 74 | Covariance matrix of the initial state distribution. 75 | track_id : int 76 | A unique track identifier. 77 | n_init : int 78 | Number of consecutive detections before the track is confirmed. The 79 | track state is set to `Deleted` if a miss occurs within the first 80 | `n_init` frames. 81 | max_age : int 82 | The maximum number of consecutive misses before the track state is 83 | set to `Deleted`. 84 | feature : Optional[ndarray] 85 | Feature vector of the detection this track originates from. If not None, 86 | this feature is added to the `features` cache. 87 | 88 | Attributes 89 | ---------- 90 | mean : ndarray 91 | Mean vector of the initial state distribution. 92 | covariance : ndarray 93 | Covariance matrix of the initial state distribution. 94 | track_id : int 95 | A unique track identifier. 96 | hits : int 97 | Total number of measurement updates. 98 | age : int 99 | Total number of frames since first occurance. 100 | time_since_update : int 101 | Total number of frames since last measurement update. 102 | state : TrackState 103 | The current track state. 104 | features : List[ndarray] 105 | A cache of features. On each measurement update, the associated feature 106 | vector is added to this list. 107 | face_name : string 108 | 기존 데이터 베이스에 존재하는 이름 찾기 109 | """ 110 | 111 | def __init__(self, mean, covariance, track_id, n_init, max_age, face_db, max_face_threshold, 112 | feature=None, class_name=None): 113 | self.mean = mean 114 | self.covariance = covariance 115 | self.track_id = track_id 116 | self.hits = 1 117 | self.age = 1 118 | self.time_since_update = 0 119 | 120 | 121 | self.state = TrackState.Tentative 122 | self.features = [] 123 | self.face_name = "" 124 | if feature is not None: 125 | self.features.append(feature) 126 | self.face_name = find_face(self.features, face_db, max_face_threshold) 127 | 128 | self._n_init = n_init 129 | self._max_age = max_age 130 | self.class_name = class_name 131 | 132 | # def __init__(self, mean, covariance, track_id, n_init, max_age, 133 | # feature=None, class_name=None): 134 | # self.mean = mean 135 | # self.covariance = covariance 136 | # self.track_id = track_id 137 | # self.hits = 1 138 | # self.age = 1 139 | # self.time_since_update = 0 140 | 141 | 142 | # self.state = TrackState.Tentative 143 | # self.features = [] 144 | # # self.face_name = "" 145 | # if feature is not None: 146 | # self.features.append(feature) 147 | # # self.face_name = find_face(self.features, face_db, max_face_threshold) 148 | 149 | # self._n_init = n_init 150 | # self._max_age = max_age 151 | # self.class_name = class_name 152 | 153 | 154 | 155 | 156 | def to_tlwh(self): 157 | """Get current position in bounding box format `(top left x, top left y, 158 | width, height)`. 159 | 160 | Returns 161 | ------- 162 | ndarray 163 | The bounding box. 164 | 165 | """ 166 | ret = self.mean[:4].copy() 167 | ret[2] *= ret[3] 168 | ret[:2] -= ret[2:] / 2 169 | return ret 170 | 171 | def to_tlbr(self): 172 | """Get current position in bounding box format `(min x, miny, max x, 173 | max y)`. 174 | 175 | Returns 176 | ------- 177 | ndarray 178 | The bounding box. 179 | 180 | """ 181 | ret = self.to_tlwh() 182 | ret[2:] = ret[:2] + ret[2:] 183 | return ret 184 | 185 | def get_class(self): 186 | return self.class_name 187 | 188 | def get_face_name(self): 189 | return self.face_name 190 | 191 | def predict(self, kf): 192 | """Propagate the state distribution to the current time step using a 193 | Kalman filter prediction step. 194 | 195 | Parameters 196 | ---------- 197 | kf : kalman_filter.KalmanFilter 198 | The Kalman filter. 199 | 200 | """ 201 | self.mean, self.covariance = kf.predict(self.mean, self.covariance) 202 | self.age += 1 203 | self.time_since_update += 1 204 | 205 | def update(self, kf, detection): 206 | """Perform Kalman filter measurement update step and update the feature 207 | cache. 208 | 209 | Parameters 210 | ---------- 211 | kf : kalman_filter.KalmanFilter 212 | The Kalman filter. 213 | detection : Detection 214 | The associated detection. 215 | 216 | """ 217 | self.mean, self.covariance = kf.update( 218 | self.mean, self.covariance, detection.to_xyah()) 219 | self.features.append(detection.feature) 220 | 221 | self.hits += 1 222 | self.time_since_update = 0 223 | if self.state == TrackState.Tentative and self.hits >= self._n_init: 224 | self.state = TrackState.Confirmed 225 | 226 | def mark_missed(self, face_db): 227 | """Mark this track as missed (no association at the current time step). 228 | """ 229 | if self.state == TrackState.Tentative: 230 | if self.face_name != "": 231 | face_db[self.face_name]["used"] = False 232 | self.state = TrackState.Deleted 233 | elif self.time_since_update > self._max_age: 234 | if self.face_name != "": 235 | face_db[self.face_name]["used"] = False 236 | self.state = TrackState.Deleted 237 | 238 | def is_tentative(self): 239 | """Returns True if this track is tentative (unconfirmed). 240 | """ 241 | return self.state == TrackState.Tentative 242 | 243 | def is_confirmed(self): 244 | """Returns True if this track is confirmed.""" 245 | return self.state == TrackState.Confirmed 246 | 247 | def is_deleted(self): 248 | """Returns True if this track is dead and should be deleted.""" 249 | return self.state == TrackState.Deleted 250 | 251 | def find_face_name(self, face_db, max_face_threshold): 252 | face_score = dict() 253 | for face in face_db: 254 | if face_db[face]["used"]: 255 | face_score[face] = 1 256 | continue 257 | cos_mat = custom_cosine_similarity(self.features, face_db[face]["db"]) 258 | # print(cos_mat) 259 | 260 | # time.sleep(7) 261 | 262 | face_score[face] = cos_mat.min(axis=1).min(axis=0) 263 | # print(face_score) 264 | ans_face = min(face_score,key=face_score.get) 265 | 266 | # print(ans_face, face_score) 267 | 268 | # time.sleep(5) 269 | 270 | if face_score[ans_face] < max_face_threshold: 271 | face_db[ans_face]["used"] = True 272 | self.face_name = ans_face 273 | else: 274 | self.face_name = "" -------------------------------------------------------------------------------- /deep_sort/tracker.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | from __future__ import absolute_import 3 | import numpy as np 4 | from . import kalman_filter 5 | from . import linear_assignment 6 | from . import iou_matching 7 | from .track import Track 8 | 9 | 10 | class Tracker: 11 | """ 12 | This is the multi-target tracker. 13 | 14 | Parameters 15 | ---------- 16 | metric : nn_matching.NearestNeighborDistanceMetric 17 | A distance metric for measurement-to-track association. 18 | max_age : int 19 | Maximum number of missed misses before a track is deleted. 20 | n_init : int 21 | Number of consecutive detections before the track is confirmed. The 22 | track state is set to `Deleted` if a miss occurs within the first 23 | `n_init` frames. 24 | 25 | Attributes 26 | ---------- 27 | metric : nn_matching.NearestNeighborDistanceMetric 28 | The distance metric used for measurement to track association. 29 | max_age : int 30 | Maximum number of missed misses before a track is deleted. 31 | n_init : int 32 | Number of frames that a track remains in initialization phase. 33 | kf : kalman_filter.KalmanFilter 34 | A Kalman filter to filter target trajectories in image space. 35 | tracks : List[Track] 36 | The list of active tracks at the current time step. 37 | 38 | """ 39 | 40 | def __init__(self, metric, max_iou_distance=0.7, max_age=3, n_init=3): # 빠르게 객체를 지워주기 위해 max_age를 30에서 3으로 변경 41 | self.metric = metric 42 | self.max_iou_distance = max_iou_distance 43 | self.max_age = max_age 44 | self.n_init = n_init 45 | 46 | self.kf = kalman_filter.KalmanFilter() 47 | self.tracks = [] 48 | self._next_id = 1 49 | 50 | def predict(self): 51 | """Propagate track state distributions one time step forward. 52 | 53 | This function should be called once every time step, before `update`. 54 | """ 55 | for track in self.tracks: 56 | track.predict(self.kf) 57 | 58 | 59 | # # 시작할 때 db에서 찾는 코드 60 | def update(self, detections, face_db, max_face_threshold): 61 | """Perform measurement update and track management. 62 | 63 | Parameters 64 | ---------- 65 | detections : List[deep_sort.detection.Detection] 66 | A list of detections at the current time step. 67 | 68 | """ 69 | # Run matching cascade. 70 | matches, unmatched_tracks, unmatched_detections = \ 71 | self._match(detections) 72 | 73 | # Update track set. 74 | for i in face_db: 75 | face_db[i]["used"] = False # 다 탐지 안된걸로 변경 76 | 77 | for track_idx, detection_idx in matches: 78 | self.tracks[track_idx].update( 79 | self.kf, detections[detection_idx]) 80 | 81 | if self.tracks[track_idx].get_face_name() == "": 82 | self.tracks[track_idx].find_face_name(face_db, max_face_threshold) 83 | 84 | for track_idx in unmatched_tracks: 85 | self.tracks[track_idx].mark_missed(face_db) # 못찾으면 face_db에서 지워준다 86 | for detection_idx in unmatched_detections: 87 | self._initiate_track(detections[detection_idx], face_db, max_face_threshold) 88 | self.tracks = [t for t in self.tracks if not t.is_deleted()] 89 | 90 | # Update distance metric. 91 | active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 92 | features, targets = [], [] 93 | for track in self.tracks: 94 | if not track.is_confirmed(): 95 | continue 96 | features += track.features 97 | targets += [track.track_id for _ in track.features] 98 | track.features = [] 99 | self.metric.partial_fit( 100 | np.asarray(features), np.asarray(targets), active_targets) 101 | 102 | # def update(self, detections): 103 | # """Perform measurement update and track management. 104 | 105 | # Parameters 106 | # ---------- 107 | # detections : List[deep_sort.detection.Detection] 108 | # A list of detections at the current time step. 109 | 110 | # """ 111 | # # Run matching cascade. 112 | # matches, unmatched_tracks, unmatched_detections = \ 113 | # self._match(detections) 114 | 115 | # # Update track set. 116 | # for track_idx, detection_idx in matches: 117 | # self.tracks[track_idx].update( 118 | # self.kf, detections[detection_idx]) 119 | # for track_idx in unmatched_tracks: 120 | # self.tracks[track_idx].mark_missed() 121 | # for detection_idx in unmatched_detections: 122 | # self._initiate_track(detections[detection_idx]) 123 | # self.tracks = [t for t in self.tracks if not t.is_deleted()] 124 | 125 | # # Update distance metric. 126 | # active_targets = [t.track_id for t in self.tracks if t.is_confirmed()] 127 | # features, targets = [], [] 128 | # for track in self.tracks: 129 | # if not track.is_confirmed(): 130 | # continue 131 | # features += track.features 132 | # targets += [track.track_id for _ in track.features] 133 | # track.features = [] 134 | # self.metric.partial_fit( 135 | # np.asarray(features), np.asarray(targets), active_targets) 136 | 137 | 138 | def _match(self, detections): 139 | 140 | def gated_metric(tracks, dets, track_indices, detection_indices): 141 | features = np.array([dets[i].feature for i in detection_indices]) 142 | targets = np.array([tracks[i].track_id for i in track_indices]) 143 | cost_matrix = self.metric.distance(features, targets) 144 | cost_matrix = linear_assignment.gate_cost_matrix( 145 | self.kf, cost_matrix, tracks, dets, track_indices, 146 | detection_indices) 147 | 148 | return cost_matrix 149 | 150 | # Split track set into confirmed and unconfirmed tracks. 151 | confirmed_tracks = [ 152 | i for i, t in enumerate(self.tracks) if t.is_confirmed()] 153 | unconfirmed_tracks = [ 154 | i for i, t in enumerate(self.tracks) if not t.is_confirmed()] 155 | 156 | # Associate confirmed tracks using appearance features. 157 | matches_a, unmatched_tracks_a, unmatched_detections = \ 158 | linear_assignment.matching_cascade( 159 | gated_metric, self.metric.matching_threshold, self.max_age, 160 | self.tracks, detections, confirmed_tracks) 161 | 162 | # Associate remaining tracks together with unconfirmed tracks using IOU. 163 | iou_track_candidates = unconfirmed_tracks + [ 164 | k for k in unmatched_tracks_a if 165 | self.tracks[k].time_since_update == 1] 166 | unmatched_tracks_a = [ 167 | k for k in unmatched_tracks_a if 168 | self.tracks[k].time_since_update != 1] 169 | matches_b, unmatched_tracks_b, unmatched_detections = \ 170 | linear_assignment.min_cost_matching( 171 | iou_matching.iou_cost, self.max_iou_distance, self.tracks, 172 | detections, iou_track_candidates, unmatched_detections) 173 | 174 | matches = matches_a + matches_b 175 | unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b)) 176 | return matches, unmatched_tracks, unmatched_detections 177 | 178 | 179 | # 시작할 때 클래스 초기화 코드 180 | def _initiate_track(self, detection, face_db, max_face_threshold): 181 | mean, covariance = self.kf.initiate(detection.to_xyah()) 182 | class_name = detection.get_class() 183 | self.tracks.append(Track( 184 | mean, covariance, self._next_id, self.n_init, self.max_age, 185 | face_db, max_face_threshold, detection.feature, class_name)) 186 | self._next_id += 1 187 | 188 | # def _initiate_track(self, detection): 189 | # mean, covariance = self.kf.initiate(detection.to_xyah()) 190 | # class_name = detection.get_class() 191 | # self.tracks.append(Track( 192 | # mean, covariance, self._next_id, self.n_init, self.max_age, 193 | # detection.feature, class_name)) 194 | # self._next_id += 1 195 | -------------------------------------------------------------------------------- /deepface/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/__init__.py -------------------------------------------------------------------------------- /deepface/basemodels/ArcFace.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.keras import backend 2 | from tensorflow.python.keras.engine import training 3 | from tensorflow.python.keras.utils import data_utils 4 | from tensorflow.python.keras.utils import layer_utils 5 | from tensorflow.python.lib.io import file_io 6 | import tensorflow 7 | from tensorflow import keras 8 | 9 | import os 10 | from pathlib import Path 11 | import gdown 12 | 13 | def loadModel(model_path): 14 | base_model = ResNet34() 15 | inputs = base_model.inputs[0] 16 | arcface_model = base_model.outputs[0] 17 | arcface_model = keras.layers.BatchNormalization(momentum=0.9, epsilon=2e-5)(arcface_model) 18 | arcface_model = keras.layers.Dropout(0.4)(arcface_model) 19 | arcface_model = keras.layers.Flatten()(arcface_model) 20 | arcface_model = keras.layers.Dense(512, activation=None, use_bias=True, kernel_initializer="glorot_normal")(arcface_model) 21 | embedding = keras.layers.BatchNormalization(momentum=0.9, epsilon=2e-5, name="embedding", scale=True)(arcface_model) 22 | # embedding = tensorflow.reshape(embedding, [-1, 512, 1]) 23 | # embedding = keras.layers.MaxPooling1D(pool_size=4, strides=4, padding="valid")(embedding) 24 | # embedding = keras.layers.Flatten()(embedding) 25 | model = keras.models.Model(inputs, embedding, name=base_model.name) 26 | 27 | #--------------------------------------- 28 | #check the availability of pre-trained weights 29 | 30 | # home = str(Path.home()) 31 | url = "https://drive.google.com/uc?id=1LVB3CdVejpmGHM28BpqqkbZP5hDEcdZY" 32 | # file_name = "arcface_weights.h5" 33 | # output = home+'/deepface/weights/'+file_name 34 | # print(output) 35 | 36 | 37 | # if os.path.isfile(model_path) != True: 38 | 39 | # print(file_name," will be downloaded to ",model_path) 40 | # gdown.download(url, model_path, quiet=False) 41 | 42 | #--------------------------------------- 43 | 44 | try: 45 | model.load_weights(model_path) 46 | except: 47 | print("pre-trained weights could not be loaded.") 48 | # print("You might try to download it from the url ", url," and copy to ",output," manually") 49 | 50 | return model 51 | 52 | def ResNet34(): 53 | 54 | img_input = tensorflow.keras.layers.Input(shape=(112, 112, 3)) 55 | 56 | x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name='conv1_pad')(img_input) 57 | x = tensorflow.keras.layers.Conv2D(64, 3, strides=1, use_bias=False, kernel_initializer='glorot_normal', name='conv1_conv')(x) 58 | x = tensorflow.keras.layers.BatchNormalization(axis=3, epsilon=2e-5, momentum=0.9, name='conv1_bn')(x) 59 | x = tensorflow.keras.layers.PReLU(shared_axes=[1, 2], name='conv1_prelu')(x) 60 | x = stack_fn(x) 61 | 62 | model = training.Model(img_input, x, name='ResNet34') 63 | 64 | return model 65 | 66 | def block1(x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None): 67 | bn_axis = 3 68 | 69 | if conv_shortcut: 70 | shortcut = tensorflow.keras.layers.Conv2D(filters, 1, strides=stride, use_bias=False, kernel_initializer='glorot_normal', name=name + '_0_conv')(x) 71 | shortcut = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_0_bn')(shortcut) 72 | else: 73 | shortcut = x 74 | 75 | x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_1_bn')(x) 76 | x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name=name + '_1_pad')(x) 77 | x = tensorflow.keras.layers.Conv2D(filters, 3, strides=1, kernel_initializer='glorot_normal', use_bias=False, name=name + '_1_conv')(x) 78 | x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_2_bn')(x) 79 | x = tensorflow.keras.layers.PReLU(shared_axes=[1, 2], name=name + '_1_prelu')(x) 80 | 81 | x = tensorflow.keras.layers.ZeroPadding2D(padding=1, name=name + '_2_pad')(x) 82 | x = tensorflow.keras.layers.Conv2D(filters, kernel_size, strides=stride, kernel_initializer='glorot_normal', use_bias=False, name=name + '_2_conv')(x) 83 | x = tensorflow.keras.layers.BatchNormalization(axis=bn_axis, epsilon=2e-5, momentum=0.9, name=name + '_3_bn')(x) 84 | 85 | x = tensorflow.keras.layers.Add(name=name + '_add')([shortcut, x]) 86 | return x 87 | 88 | def stack1(x, filters, blocks, stride1=2, name=None): 89 | x = block1(x, filters, stride=stride1, name=name + '_block1') 90 | for i in range(2, blocks + 1): 91 | x = block1(x, filters, conv_shortcut=False, name=name + '_block' + str(i)) 92 | return x 93 | 94 | def stack_fn(x): 95 | x = stack1(x, 64, 3, name='conv2') 96 | x = stack1(x, 128, 4, name='conv3') 97 | x = stack1(x, 256, 6, name='conv4') 98 | return stack1(x, 512, 3, name='conv5') -------------------------------------------------------------------------------- /deepface/basemodels/Boosting.py: -------------------------------------------------------------------------------- 1 | from deepface import DeepFace 2 | from tqdm import tqdm 3 | import os 4 | from os import path 5 | from pathlib import Path 6 | import numpy as np 7 | import gdown 8 | from deepface.commons import functions, distance as dst 9 | 10 | def loadModel(): 11 | 12 | model_names = ['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace'] 13 | 14 | model = {} 15 | 16 | model_pbar = tqdm(range(0, 4), desc='Face recognition models') 17 | 18 | for index in model_pbar: 19 | 20 | model_name = model_names[index] 21 | 22 | model_pbar.set_description("Loading %s" % (model_name)) 23 | model[model_name] = DeepFace.build_model(model_name) 24 | 25 | return model 26 | 27 | def validate_model(model): 28 | #validate model dictionary because it might be passed from input as pre-trained 29 | found_models = [] 30 | for key, value in model.items(): 31 | found_models.append(key) 32 | 33 | if ('VGG-Face' in found_models) and ('Facenet' in found_models) and ('OpenFace' in found_models) and ('DeepFace' in found_models): 34 | #print("Ensemble learning will be applied for ", found_models," models") 35 | valid = True 36 | else: 37 | 38 | missing_ones = set(['VGG-Face', 'Facenet', 'OpenFace', 'DeepFace']) - set(found_models) 39 | 40 | raise ValueError("You'd like to apply ensemble method and pass pre-built models but models must contain [VGG-Face, Facenet, OpenFace, DeepFace] but you passed "+str(found_models)+". So, you need to pass "+str(missing_ones)+" models as well.") 41 | 42 | def build_gbm(): 43 | 44 | #this is not a must dependency 45 | import lightgbm as lgb #lightgbm==2.3.1 46 | 47 | home = str(Path.home()) 48 | 49 | if os.path.isfile(home+'/.deepface/weights/face-recognition-ensemble-model.txt') != True: 50 | print("face-recognition-ensemble-model.txt will be downloaded...") 51 | url = 'https://raw.githubusercontent.com/serengil/deepface/master/deepface/models/face-recognition-ensemble-model.txt' 52 | output = home+'/.deepface/weights/face-recognition-ensemble-model.txt' 53 | gdown.download(url, output, quiet=False) 54 | 55 | ensemble_model_path = home+'/.deepface/weights/face-recognition-ensemble-model.txt' 56 | 57 | deepface_ensemble = lgb.Booster(model_file = ensemble_model_path) 58 | 59 | return deepface_ensemble 60 | -------------------------------------------------------------------------------- /deepface/basemodels/DeepID.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import gdown 4 | import zipfile 5 | 6 | from tensorflow import keras 7 | from tensorflow.keras.models import Model 8 | from tensorflow.keras.layers import Conv2D, Activation, Input, Add, MaxPooling2D, Flatten, Dense, Dropout 9 | 10 | #------------------------------------- 11 | 12 | def loadModel(url = 'https://drive.google.com/uc?id=1uRLtBCTQQAvHJ_KVrdbRJiCKxU8m5q2J'): 13 | 14 | myInput = Input(shape=(55, 47, 3)) 15 | 16 | x = Conv2D(20, (4, 4), name='Conv1', activation='relu', input_shape=(55, 47, 3))(myInput) 17 | x = MaxPooling2D(pool_size=2, strides=2, name='Pool1')(x) 18 | x = Dropout(rate=0.99, name='D1')(x) 19 | 20 | x = Conv2D(40, (3, 3), name='Conv2', activation='relu')(x) 21 | x = MaxPooling2D(pool_size=2, strides=2, name='Pool2')(x) 22 | x = Dropout(rate=0.99, name='D2')(x) 23 | 24 | x = Conv2D(60, (3, 3), name='Conv3', activation='relu')(x) 25 | x = MaxPooling2D(pool_size=2, strides=2, name='Pool3')(x) 26 | x = Dropout(rate=0.99, name='D3')(x) 27 | 28 | x1 = Flatten()(x) 29 | fc11 = Dense(160, name = 'fc11')(x1) 30 | 31 | x2 = Conv2D(80, (2, 2), name='Conv4', activation='relu')(x) 32 | x2 = Flatten()(x2) 33 | fc12 = Dense(160, name = 'fc12')(x2) 34 | 35 | y = Add()([fc11, fc12]) 36 | y = Activation('relu', name = 'deepid')(y) 37 | 38 | model = Model(inputs=[myInput], outputs=y) 39 | 40 | #--------------------------------- 41 | 42 | home = str(Path.home()) 43 | 44 | if os.path.isfile(home+'/.deepface/weights/deepid_keras_weights.h5') != True: 45 | print("deepid_keras_weights.h5 will be downloaded...") 46 | 47 | output = home+'/.deepface/weights/deepid_keras_weights.h5' 48 | gdown.download(url, output, quiet=False) 49 | 50 | model.load_weights(home+'/.deepface/weights/deepid_keras_weights.h5') 51 | 52 | return model -------------------------------------------------------------------------------- /deepface/basemodels/DlibResNet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import zipfile 3 | import bz2 4 | import gdown 5 | import numpy as np 6 | from pathlib import Path 7 | 8 | class DlibResNet: 9 | 10 | def __init__(self): 11 | 12 | #this is not a must dependency 13 | import dlib #19.20.0 14 | 15 | self.layers = [DlibMetaData()] 16 | 17 | #--------------------- 18 | 19 | home = str(Path.home()) 20 | weight_file = home+'/.deepface/weights/dlib_face_recognition_resnet_model_v1.dat' 21 | 22 | #--------------------- 23 | 24 | #download pre-trained model if it does not exist 25 | if os.path.isfile(weight_file) != True: 26 | print("dlib_face_recognition_resnet_model_v1.dat is going to be downloaded") 27 | 28 | url = "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2" 29 | output = home+'/.deepface/weights/'+url.split("/")[-1] 30 | gdown.download(url, output, quiet=False) 31 | 32 | zipfile = bz2.BZ2File(output) 33 | data = zipfile.read() 34 | newfilepath = output[:-4] #discard .bz2 extension 35 | open(newfilepath, 'wb').write(data) 36 | 37 | #--------------------- 38 | 39 | model = dlib.face_recognition_model_v1(weight_file) 40 | self.__model = model 41 | 42 | #--------------------- 43 | 44 | return None #classes must return None 45 | 46 | def predict(self, img_aligned): 47 | 48 | #functions.detectFace returns 4 dimensional images 49 | if len(img_aligned.shape) == 4: 50 | img_aligned = img_aligned[0] 51 | 52 | #functions.detectFace returns bgr images 53 | img_aligned = img_aligned[:,:,::-1] #bgr to rgb 54 | 55 | #deepface.detectFace returns an array in scale of [0, 1] but dlib expects in scale of [0, 255] 56 | if img_aligned.max() <= 1: 57 | img_aligned = img_aligned * 255 58 | 59 | img_aligned = img_aligned.astype(np.uint8) 60 | 61 | model = self.__model 62 | 63 | img_representation = model.compute_face_descriptor(img_aligned) 64 | 65 | img_representation = np.array(img_representation) 66 | img_representation = np.expand_dims(img_representation, axis = 0) 67 | 68 | return img_representation 69 | 70 | class DlibMetaData: 71 | def __init__(self): 72 | self.input_shape = [[1, 150, 150, 3]] -------------------------------------------------------------------------------- /deepface/basemodels/DlibWrapper.py: -------------------------------------------------------------------------------- 1 | from deepface.basemodels.DlibResNet import DlibResNet 2 | 3 | def loadModel(): 4 | return DlibResNet() -------------------------------------------------------------------------------- /deepface/basemodels/FbDeepFace.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import gdown 4 | import zipfile 5 | 6 | from tensorflow import keras 7 | from tensorflow.keras.models import Model, Sequential 8 | from tensorflow.keras.layers import Convolution2D, LocallyConnected2D, MaxPooling2D, Flatten, Dense, Dropout 9 | 10 | #------------------------------------- 11 | 12 | def loadModel(url = 'https://github.com/swghosh/DeepFace/releases/download/weights-vggface2-2d-aligned/VGGFace2_DeepFace_weights_val-0.9034.h5.zip'): 13 | base_model = Sequential() 14 | base_model.add(Convolution2D(32, (11, 11), activation='relu', name='C1', input_shape=(152, 152, 3))) 15 | base_model.add(MaxPooling2D(pool_size=3, strides=2, padding='same', name='M2')) 16 | base_model.add(Convolution2D(16, (9, 9), activation='relu', name='C3')) 17 | base_model.add(LocallyConnected2D(16, (9, 9), activation='relu', name='L4')) 18 | base_model.add(LocallyConnected2D(16, (7, 7), strides=2, activation='relu', name='L5') ) 19 | base_model.add(LocallyConnected2D(16, (5, 5), activation='relu', name='L6')) 20 | base_model.add(Flatten(name='F0')) 21 | base_model.add(Dense(4096, activation='relu', name='F7')) 22 | base_model.add(Dropout(rate=0.5, name='D0')) 23 | base_model.add(Dense(8631, activation='softmax', name='F8')) 24 | 25 | #--------------------------------- 26 | 27 | home = str(Path.home()) 28 | 29 | if os.path.isfile(home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5') != True: 30 | print("VGGFace2_DeepFace_weights_val-0.9034.h5 will be downloaded...") 31 | 32 | output = home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5.zip' 33 | 34 | gdown.download(url, output, quiet=False) 35 | 36 | #unzip VGGFace2_DeepFace_weights_val-0.9034.h5.zip 37 | with zipfile.ZipFile(output, 'r') as zip_ref: 38 | zip_ref.extractall(home+'/.deepface/weights/') 39 | 40 | base_model.load_weights(home+'/.deepface/weights/VGGFace2_DeepFace_weights_val-0.9034.h5') 41 | 42 | #drop F8 and D0. F7 is the representation layer. 43 | deepface_model = Model(inputs=base_model.layers[0].input, outputs=base_model.layers[-3].output) 44 | 45 | return deepface_model -------------------------------------------------------------------------------- /deepface/basemodels/OpenFace.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import gdown 4 | 5 | import tensorflow as tf 6 | from tensorflow import keras 7 | from tensorflow.keras.models import Model, Sequential 8 | from tensorflow.keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate 9 | from tensorflow.keras.layers import Dense, Activation, Lambda, Flatten, BatchNormalization 10 | from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D 11 | from tensorflow.keras.models import load_model 12 | from tensorflow.keras import backend as K 13 | 14 | #--------------------------------------- 15 | 16 | def loadModel(url = 'https://drive.google.com/uc?id=1LSe1YCV1x-BfNnfb7DFZTNpv_Q9jITxn'): 17 | myInput = Input(shape=(96, 96, 3)) 18 | 19 | x = ZeroPadding2D(padding=(3, 3), input_shape=(96, 96, 3))(myInput) 20 | x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) 21 | x = BatchNormalization(axis=3, epsilon=0.00001, name='bn1')(x) 22 | x = Activation('relu')(x) 23 | x = ZeroPadding2D(padding=(1, 1))(x) 24 | x = MaxPooling2D(pool_size=3, strides=2)(x) 25 | x = Lambda(lambda x: tf.nn.lrn(x, alpha=1e-4, beta=0.75), name='lrn_1')(x) 26 | x = Conv2D(64, (1, 1), name='conv2')(x) 27 | x = BatchNormalization(axis=3, epsilon=0.00001, name='bn2')(x) 28 | x = Activation('relu')(x) 29 | x = ZeroPadding2D(padding=(1, 1))(x) 30 | x = Conv2D(192, (3, 3), name='conv3')(x) 31 | x = BatchNormalization(axis=3, epsilon=0.00001, name='bn3')(x) 32 | x = Activation('relu')(x) 33 | x = Lambda(lambda x: tf.nn.lrn(x, alpha=1e-4, beta=0.75), name='lrn_2')(x) #x is equal added 34 | x = ZeroPadding2D(padding=(1, 1))(x) 35 | x = MaxPooling2D(pool_size=3, strides=2)(x) 36 | 37 | # Inception3a 38 | inception_3a_3x3 = Conv2D(96, (1, 1), name='inception_3a_3x3_conv1')(x) 39 | inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn1')(inception_3a_3x3) 40 | inception_3a_3x3 = Activation('relu')(inception_3a_3x3) 41 | inception_3a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3a_3x3) 42 | inception_3a_3x3 = Conv2D(128, (3, 3), name='inception_3a_3x3_conv2')(inception_3a_3x3) 43 | inception_3a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_3x3_bn2')(inception_3a_3x3) 44 | inception_3a_3x3 = Activation('relu')(inception_3a_3x3) 45 | 46 | inception_3a_5x5 = Conv2D(16, (1, 1), name='inception_3a_5x5_conv1')(x) 47 | inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn1')(inception_3a_5x5) 48 | inception_3a_5x5 = Activation('relu')(inception_3a_5x5) 49 | inception_3a_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3a_5x5) 50 | inception_3a_5x5 = Conv2D(32, (5, 5), name='inception_3a_5x5_conv2')(inception_3a_5x5) 51 | inception_3a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_5x5_bn2')(inception_3a_5x5) 52 | inception_3a_5x5 = Activation('relu')(inception_3a_5x5) 53 | 54 | inception_3a_pool = MaxPooling2D(pool_size=3, strides=2)(x) 55 | inception_3a_pool = Conv2D(32, (1, 1), name='inception_3a_pool_conv')(inception_3a_pool) 56 | inception_3a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_pool_bn')(inception_3a_pool) 57 | inception_3a_pool = Activation('relu')(inception_3a_pool) 58 | inception_3a_pool = ZeroPadding2D(padding=((3, 4), (3, 4)))(inception_3a_pool) 59 | 60 | inception_3a_1x1 = Conv2D(64, (1, 1), name='inception_3a_1x1_conv')(x) 61 | inception_3a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3a_1x1_bn')(inception_3a_1x1) 62 | inception_3a_1x1 = Activation('relu')(inception_3a_1x1) 63 | 64 | inception_3a = concatenate([inception_3a_3x3, inception_3a_5x5, inception_3a_pool, inception_3a_1x1], axis=3) 65 | 66 | # Inception3b 67 | inception_3b_3x3 = Conv2D(96, (1, 1), name='inception_3b_3x3_conv1')(inception_3a) 68 | inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn1')(inception_3b_3x3) 69 | inception_3b_3x3 = Activation('relu')(inception_3b_3x3) 70 | inception_3b_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3b_3x3) 71 | inception_3b_3x3 = Conv2D(128, (3, 3), name='inception_3b_3x3_conv2')(inception_3b_3x3) 72 | inception_3b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_3x3_bn2')(inception_3b_3x3) 73 | inception_3b_3x3 = Activation('relu')(inception_3b_3x3) 74 | 75 | inception_3b_5x5 = Conv2D(32, (1, 1), name='inception_3b_5x5_conv1')(inception_3a) 76 | inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn1')(inception_3b_5x5) 77 | inception_3b_5x5 = Activation('relu')(inception_3b_5x5) 78 | inception_3b_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3b_5x5) 79 | inception_3b_5x5 = Conv2D(64, (5, 5), name='inception_3b_5x5_conv2')(inception_3b_5x5) 80 | inception_3b_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_5x5_bn2')(inception_3b_5x5) 81 | inception_3b_5x5 = Activation('relu')(inception_3b_5x5) 82 | 83 | inception_3b_pool = Lambda(lambda x: x**2, name='power2_3b')(inception_3a) 84 | inception_3b_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_3b_pool) 85 | inception_3b_pool = Lambda(lambda x: x*9, name='mult9_3b')(inception_3b_pool) 86 | inception_3b_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_3b')(inception_3b_pool) 87 | inception_3b_pool = Conv2D(64, (1, 1), name='inception_3b_pool_conv')(inception_3b_pool) 88 | inception_3b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_pool_bn')(inception_3b_pool) 89 | inception_3b_pool = Activation('relu')(inception_3b_pool) 90 | inception_3b_pool = ZeroPadding2D(padding=(4, 4))(inception_3b_pool) 91 | 92 | inception_3b_1x1 = Conv2D(64, (1, 1), name='inception_3b_1x1_conv')(inception_3a) 93 | inception_3b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3b_1x1_bn')(inception_3b_1x1) 94 | inception_3b_1x1 = Activation('relu')(inception_3b_1x1) 95 | 96 | inception_3b = concatenate([inception_3b_3x3, inception_3b_5x5, inception_3b_pool, inception_3b_1x1], axis=3) 97 | 98 | # Inception3c 99 | inception_3c_3x3 = Conv2D(128, (1, 1), strides=(1, 1), name='inception_3c_3x3_conv1')(inception_3b) 100 | inception_3c_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_3x3_bn1')(inception_3c_3x3) 101 | inception_3c_3x3 = Activation('relu')(inception_3c_3x3) 102 | inception_3c_3x3 = ZeroPadding2D(padding=(1, 1))(inception_3c_3x3) 103 | inception_3c_3x3 = Conv2D(256, (3, 3), strides=(2, 2), name='inception_3c_3x3_conv'+'2')(inception_3c_3x3) 104 | inception_3c_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_3x3_bn'+'2')(inception_3c_3x3) 105 | inception_3c_3x3 = Activation('relu')(inception_3c_3x3) 106 | 107 | inception_3c_5x5 = Conv2D(32, (1, 1), strides=(1, 1), name='inception_3c_5x5_conv1')(inception_3b) 108 | inception_3c_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_5x5_bn1')(inception_3c_5x5) 109 | inception_3c_5x5 = Activation('relu')(inception_3c_5x5) 110 | inception_3c_5x5 = ZeroPadding2D(padding=(2, 2))(inception_3c_5x5) 111 | inception_3c_5x5 = Conv2D(64, (5, 5), strides=(2, 2), name='inception_3c_5x5_conv'+'2')(inception_3c_5x5) 112 | inception_3c_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_3c_5x5_bn'+'2')(inception_3c_5x5) 113 | inception_3c_5x5 = Activation('relu')(inception_3c_5x5) 114 | 115 | inception_3c_pool = MaxPooling2D(pool_size=3, strides=2)(inception_3b) 116 | inception_3c_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_3c_pool) 117 | 118 | inception_3c = concatenate([inception_3c_3x3, inception_3c_5x5, inception_3c_pool], axis=3) 119 | 120 | #inception 4a 121 | inception_4a_3x3 = Conv2D(96, (1, 1), strides=(1, 1), name='inception_4a_3x3_conv'+'1')(inception_3c) 122 | inception_4a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_3x3_bn'+'1')(inception_4a_3x3) 123 | inception_4a_3x3 = Activation('relu')(inception_4a_3x3) 124 | inception_4a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_4a_3x3) 125 | inception_4a_3x3 = Conv2D(192, (3, 3), strides=(1, 1), name='inception_4a_3x3_conv'+'2')(inception_4a_3x3) 126 | inception_4a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_3x3_bn'+'2')(inception_4a_3x3) 127 | inception_4a_3x3 = Activation('relu')(inception_4a_3x3) 128 | 129 | inception_4a_5x5 = Conv2D(32, (1,1), strides=(1,1), name='inception_4a_5x5_conv1')(inception_3c) 130 | inception_4a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_5x5_bn1')(inception_4a_5x5) 131 | inception_4a_5x5 = Activation('relu')(inception_4a_5x5) 132 | inception_4a_5x5 = ZeroPadding2D(padding=(2,2))(inception_4a_5x5) 133 | inception_4a_5x5 = Conv2D(64, (5,5), strides=(1,1), name='inception_4a_5x5_conv'+'2')(inception_4a_5x5) 134 | inception_4a_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_5x5_bn'+'2')(inception_4a_5x5) 135 | inception_4a_5x5 = Activation('relu')(inception_4a_5x5) 136 | 137 | inception_4a_pool = Lambda(lambda x: x**2, name='power2_4a')(inception_3c) 138 | inception_4a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_4a_pool) 139 | inception_4a_pool = Lambda(lambda x: x*9, name='mult9_4a')(inception_4a_pool) 140 | inception_4a_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_4a')(inception_4a_pool) 141 | 142 | inception_4a_pool = Conv2D(128, (1,1), strides=(1,1), name='inception_4a_pool_conv'+'')(inception_4a_pool) 143 | inception_4a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_pool_bn'+'')(inception_4a_pool) 144 | inception_4a_pool = Activation('relu')(inception_4a_pool) 145 | inception_4a_pool = ZeroPadding2D(padding=(2, 2))(inception_4a_pool) 146 | 147 | inception_4a_1x1 = Conv2D(256, (1, 1), strides=(1, 1), name='inception_4a_1x1_conv'+'')(inception_3c) 148 | inception_4a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4a_1x1_bn'+'')(inception_4a_1x1) 149 | inception_4a_1x1 = Activation('relu')(inception_4a_1x1) 150 | 151 | inception_4a = concatenate([inception_4a_3x3, inception_4a_5x5, inception_4a_pool, inception_4a_1x1], axis=3) 152 | 153 | #inception4e 154 | inception_4e_3x3 = Conv2D(160, (1,1), strides=(1,1), name='inception_4e_3x3_conv'+'1')(inception_4a) 155 | inception_4e_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_3x3_bn'+'1')(inception_4e_3x3) 156 | inception_4e_3x3 = Activation('relu')(inception_4e_3x3) 157 | inception_4e_3x3 = ZeroPadding2D(padding=(1, 1))(inception_4e_3x3) 158 | inception_4e_3x3 = Conv2D(256, (3,3), strides=(2,2), name='inception_4e_3x3_conv'+'2')(inception_4e_3x3) 159 | inception_4e_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_3x3_bn'+'2')(inception_4e_3x3) 160 | inception_4e_3x3 = Activation('relu')(inception_4e_3x3) 161 | 162 | inception_4e_5x5 = Conv2D(64, (1,1), strides=(1,1), name='inception_4e_5x5_conv'+'1')(inception_4a) 163 | inception_4e_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_5x5_bn'+'1')(inception_4e_5x5) 164 | inception_4e_5x5 = Activation('relu')(inception_4e_5x5) 165 | inception_4e_5x5 = ZeroPadding2D(padding=(2, 2))(inception_4e_5x5) 166 | inception_4e_5x5 = Conv2D(128, (5,5), strides=(2,2), name='inception_4e_5x5_conv'+'2')(inception_4e_5x5) 167 | inception_4e_5x5 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_4e_5x5_bn'+'2')(inception_4e_5x5) 168 | inception_4e_5x5 = Activation('relu')(inception_4e_5x5) 169 | 170 | inception_4e_pool = MaxPooling2D(pool_size=3, strides=2)(inception_4a) 171 | inception_4e_pool = ZeroPadding2D(padding=((0, 1), (0, 1)))(inception_4e_pool) 172 | 173 | inception_4e = concatenate([inception_4e_3x3, inception_4e_5x5, inception_4e_pool], axis=3) 174 | 175 | #inception5a 176 | inception_5a_3x3 = Conv2D(96, (1,1), strides=(1,1), name='inception_5a_3x3_conv'+'1')(inception_4e) 177 | inception_5a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_3x3_bn'+'1')(inception_5a_3x3) 178 | inception_5a_3x3 = Activation('relu')(inception_5a_3x3) 179 | inception_5a_3x3 = ZeroPadding2D(padding=(1, 1))(inception_5a_3x3) 180 | inception_5a_3x3 = Conv2D(384, (3,3), strides=(1,1), name='inception_5a_3x3_conv'+'2')(inception_5a_3x3) 181 | inception_5a_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_3x3_bn'+'2')(inception_5a_3x3) 182 | inception_5a_3x3 = Activation('relu')(inception_5a_3x3) 183 | 184 | inception_5a_pool = Lambda(lambda x: x**2, name='power2_5a')(inception_4e) 185 | inception_5a_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3))(inception_5a_pool) 186 | inception_5a_pool = Lambda(lambda x: x*9, name='mult9_5a')(inception_5a_pool) 187 | inception_5a_pool = Lambda(lambda x: K.sqrt(x), name='sqrt_5a')(inception_5a_pool) 188 | 189 | inception_5a_pool = Conv2D(96, (1,1), strides=(1,1), name='inception_5a_pool_conv'+'')(inception_5a_pool) 190 | inception_5a_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_pool_bn'+'')(inception_5a_pool) 191 | inception_5a_pool = Activation('relu')(inception_5a_pool) 192 | inception_5a_pool = ZeroPadding2D(padding=(1,1))(inception_5a_pool) 193 | 194 | inception_5a_1x1 = Conv2D(256, (1,1), strides=(1,1), name='inception_5a_1x1_conv'+'')(inception_4e) 195 | inception_5a_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5a_1x1_bn'+'')(inception_5a_1x1) 196 | inception_5a_1x1 = Activation('relu')(inception_5a_1x1) 197 | 198 | inception_5a = concatenate([inception_5a_3x3, inception_5a_pool, inception_5a_1x1], axis=3) 199 | 200 | #inception_5b 201 | inception_5b_3x3 = Conv2D(96, (1,1), strides=(1,1), name='inception_5b_3x3_conv'+'1')(inception_5a) 202 | inception_5b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_3x3_bn'+'1')(inception_5b_3x3) 203 | inception_5b_3x3 = Activation('relu')(inception_5b_3x3) 204 | inception_5b_3x3 = ZeroPadding2D(padding=(1,1))(inception_5b_3x3) 205 | inception_5b_3x3 = Conv2D(384, (3,3), strides=(1,1), name='inception_5b_3x3_conv'+'2')(inception_5b_3x3) 206 | inception_5b_3x3 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_3x3_bn'+'2')(inception_5b_3x3) 207 | inception_5b_3x3 = Activation('relu')(inception_5b_3x3) 208 | 209 | inception_5b_pool = MaxPooling2D(pool_size=3, strides=2)(inception_5a) 210 | 211 | inception_5b_pool = Conv2D(96, (1,1), strides=(1,1), name='inception_5b_pool_conv'+'')(inception_5b_pool) 212 | inception_5b_pool = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_pool_bn'+'')(inception_5b_pool) 213 | inception_5b_pool = Activation('relu')(inception_5b_pool) 214 | 215 | inception_5b_pool = ZeroPadding2D(padding=(1, 1))(inception_5b_pool) 216 | 217 | inception_5b_1x1 = Conv2D(256, (1,1), strides=(1,1), name='inception_5b_1x1_conv'+'')(inception_5a) 218 | inception_5b_1x1 = BatchNormalization(axis=3, epsilon=0.00001, name='inception_5b_1x1_bn'+'')(inception_5b_1x1) 219 | inception_5b_1x1 = Activation('relu')(inception_5b_1x1) 220 | 221 | inception_5b = concatenate([inception_5b_3x3, inception_5b_pool, inception_5b_1x1], axis=3) 222 | 223 | av_pool = AveragePooling2D(pool_size=(3, 3), strides=(1, 1))(inception_5b) 224 | reshape_layer = Flatten()(av_pool) 225 | dense_layer = Dense(128, name='dense_layer')(reshape_layer) 226 | norm_layer = Lambda(lambda x: K.l2_normalize(x, axis=1), name='norm_layer')(dense_layer) 227 | 228 | # Final Model 229 | model = Model(inputs=[myInput], outputs=norm_layer) 230 | 231 | #----------------------------------- 232 | 233 | home = str(Path.home()) 234 | 235 | if os.path.isfile(home+'/.deepface/weights/openface_weights.h5') != True: 236 | print("openface_weights.h5 will be downloaded...") 237 | 238 | output = home+'/.deepface/weights/openface_weights.h5' 239 | gdown.download(url, output, quiet=False) 240 | 241 | #----------------------------------- 242 | 243 | model.load_weights(home+'/.deepface/weights/openface_weights.h5') 244 | 245 | #----------------------------------- 246 | 247 | return model -------------------------------------------------------------------------------- /deepface/basemodels/VGGFace.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | import gdown 4 | 5 | import tensorflow as tf 6 | tf_version = int(tf.__version__.split(".")[0]) 7 | 8 | if tf_version == 1: 9 | from keras.models import Model, Sequential 10 | from keras.layers import Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Flatten, Dense, Dropout, Activation 11 | else: 12 | from tensorflow import keras 13 | from tensorflow.keras.models import Model, Sequential 14 | from tensorflow.keras.layers import Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Flatten, Dense, Dropout, Activation 15 | 16 | #--------------------------------------- 17 | 18 | def baseModel(): 19 | model = Sequential() 20 | model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3))) 21 | model.add(Convolution2D(64, (3, 3), activation='relu')) 22 | model.add(ZeroPadding2D((1,1))) 23 | model.add(Convolution2D(64, (3, 3), activation='relu')) 24 | model.add(MaxPooling2D((2,2), strides=(2,2))) 25 | 26 | model.add(ZeroPadding2D((1,1))) 27 | model.add(Convolution2D(128, (3, 3), activation='relu')) 28 | model.add(ZeroPadding2D((1,1))) 29 | model.add(Convolution2D(128, (3, 3), activation='relu')) 30 | model.add(MaxPooling2D((2,2), strides=(2,2))) 31 | 32 | model.add(ZeroPadding2D((1,1))) 33 | model.add(Convolution2D(256, (3, 3), activation='relu')) 34 | model.add(ZeroPadding2D((1,1))) 35 | model.add(Convolution2D(256, (3, 3), activation='relu')) 36 | model.add(ZeroPadding2D((1,1))) 37 | model.add(Convolution2D(256, (3, 3), activation='relu')) 38 | model.add(MaxPooling2D((2,2), strides=(2,2))) 39 | 40 | model.add(ZeroPadding2D((1,1))) 41 | model.add(Convolution2D(512, (3, 3), activation='relu')) 42 | model.add(ZeroPadding2D((1,1))) 43 | model.add(Convolution2D(512, (3, 3), activation='relu')) 44 | model.add(ZeroPadding2D((1,1))) 45 | model.add(Convolution2D(512, (3, 3), activation='relu')) 46 | model.add(MaxPooling2D((2,2), strides=(2,2))) 47 | 48 | model.add(ZeroPadding2D((1,1))) 49 | model.add(Convolution2D(512, (3, 3), activation='relu')) 50 | model.add(ZeroPadding2D((1,1))) 51 | model.add(Convolution2D(512, (3, 3), activation='relu')) 52 | model.add(ZeroPadding2D((1,1))) 53 | model.add(Convolution2D(512, (3, 3), activation='relu')) 54 | model.add(MaxPooling2D((2,2), strides=(2,2))) 55 | 56 | model.add(Convolution2D(4096, (7, 7), activation='relu')) 57 | model.add(Dropout(0.5)) 58 | model.add(Convolution2D(4096, (1, 1), activation='relu')) 59 | model.add(Dropout(0.5)) 60 | model.add(Convolution2D(2622, (1, 1))) 61 | model.add(Flatten()) 62 | model.add(Activation('softmax')) 63 | 64 | return model 65 | 66 | def loadModel(url = 'https://drive.google.com/uc?id=1CPSeum3HpopfomUEK1gybeuIVoeJT_Eo'): 67 | 68 | model = baseModel() 69 | 70 | #----------------------------------- 71 | 72 | home = str(Path.home()) 73 | output = home+'/.deepface/weights/vgg_face_weights.h5' 74 | 75 | if os.path.isfile(output) != True: 76 | print("vgg_face_weights.h5 will be downloaded...") 77 | gdown.download(url, output, quiet=False) 78 | 79 | #----------------------------------- 80 | 81 | try: 82 | model.load_weights(output) 83 | except Exception as err: 84 | print(str(err)) 85 | print("Pre-trained weight could not be loaded.") 86 | print("You might try to download the pre-trained weights from the url ", url, " and copy it to the ", output) 87 | 88 | #----------------------------------- 89 | 90 | #TO-DO: why? 91 | vgg_face_descriptor = Model(inputs=model.layers[0].input, outputs=model.layers[-2].output) 92 | 93 | return vgg_face_descriptor -------------------------------------------------------------------------------- /deepface/basemodels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/basemodels/__init__.py -------------------------------------------------------------------------------- /deepface/commons/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/commons/__init__.py -------------------------------------------------------------------------------- /deepface/commons/distance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def findCosineDistance(source_representation, test_representation): 4 | a = np.matmul(np.transpose(source_representation), test_representation) 5 | b = np.sum(np.multiply(source_representation, source_representation)) 6 | c = np.sum(np.multiply(test_representation, test_representation)) 7 | return 1 - (a / (np.sqrt(b) * np.sqrt(c))) 8 | 9 | def findEuclideanDistance(source_representation, test_representation): 10 | if type(source_representation) == list: 11 | source_representation = np.array(source_representation) 12 | 13 | if type(test_representation) == list: 14 | test_representation = np.array(test_representation) 15 | 16 | euclidean_distance = source_representation - test_representation 17 | euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance)) 18 | euclidean_distance = np.sqrt(euclidean_distance) 19 | return euclidean_distance 20 | 21 | def l2_normalize(x): 22 | return x / np.sqrt(np.sum(np.multiply(x, x))) 23 | 24 | def findThreshold(model_name, distance_metric): 25 | 26 | base_threshold = {'cosine': 0.40, 'euclidean': 0.55, 'euclidean_l2': 0.75} 27 | 28 | thresholds = { 29 | 'VGG-Face': {'cosine': 0.40, 'euclidean': 0.55, 'euclidean_l2': 0.75}, 30 | 'OpenFace': {'cosine': 0.10, 'euclidean': 0.55, 'euclidean_l2': 0.55}, 31 | 'Facenet': {'cosine': 0.40, 'euclidean': 10, 'euclidean_l2': 0.80}, 32 | 'DeepFace': {'cosine': 0.23, 'euclidean': 64, 'euclidean_l2': 0.64}, 33 | 'DeepID': {'cosine': 0.015, 'euclidean': 45, 'euclidean_l2': 0.17}, 34 | 'Dlib': {'cosine': 0.07, 'euclidean': 0.6, 'euclidean_l2': 0.6}, 35 | 'ArcFace': {'cosine': 0.6871912959056619, 'euclidean': 4.1591468986978075, 'euclidean_l2': 1.1315718048269017} 36 | } 37 | 38 | threshold = thresholds.get(model_name, base_threshold).get(distance_metric, 0.4) 39 | 40 | return threshold 41 | -------------------------------------------------------------------------------- /deepface/commons/functions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import cv2 5 | import base64 6 | from pathlib import Path 7 | 8 | from deepface.detectors import FaceDetector 9 | 10 | import tensorflow as tf 11 | tf_version = int(tf.__version__.split(".")[0]) 12 | 13 | if tf_version == 1: 14 | import keras 15 | from keras.preprocessing.image import load_img, save_img, img_to_array 16 | from keras.applications.imagenet_utils import preprocess_input 17 | from keras.preprocessing import image 18 | elif tf_version == 2: 19 | from tensorflow import keras 20 | from tensorflow.keras.preprocessing.image import load_img, save_img, img_to_array 21 | from tensorflow.keras.applications.imagenet_utils import preprocess_input 22 | from tensorflow.keras.preprocessing import image 23 | 24 | #-------------------------------------------------- 25 | 26 | def initialize_input(img1_path, img2_path = None): 27 | 28 | if type(img1_path) == list: 29 | bulkProcess = True 30 | img_list = img1_path.copy() 31 | else: 32 | bulkProcess = False 33 | 34 | if ( 35 | (type(img2_path) == str and img2_path != None) #exact image path, base64 image 36 | or (isinstance(img2_path, np.ndarray) and img2_path.any()) #numpy array 37 | ): 38 | img_list = [[img1_path, img2_path]] 39 | else: #analyze function passes just img1_path 40 | img_list = [img1_path] 41 | 42 | return img_list, bulkProcess 43 | 44 | def initialize_detector(detector_backend): 45 | 46 | global face_detector 47 | face_detector = FaceDetector.build_model(detector_backend) 48 | 49 | def initializeFolder(): 50 | 51 | home = str(Path.home()) 52 | 53 | if not os.path.exists(home+"/.deepface"): 54 | os.mkdir(home+"/.deepface") 55 | print("Directory ",home,"/.deepface created") 56 | 57 | if not os.path.exists(home+"/.deepface/weights"): 58 | os.mkdir(home+"/.deepface/weights") 59 | print("Directory ",home,"/.deepface/weights created") 60 | 61 | def loadBase64Img(uri): 62 | encoded_data = uri.split(',')[1] 63 | nparr = np.fromstring(base64.b64decode(encoded_data), np.uint8) 64 | img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 65 | return img 66 | 67 | def load_image(img): 68 | 69 | exact_image = False 70 | if type(img).__module__ == np.__name__: 71 | exact_image = True 72 | 73 | base64_img = False 74 | if len(img) > 11 and img[0:11] == "data:image/": 75 | base64_img = True 76 | 77 | #--------------------------- 78 | 79 | if base64_img == True: 80 | img = loadBase64Img(img) 81 | 82 | elif exact_image != True: #image path passed as input 83 | if os.path.isfile(img) != True: 84 | raise ValueError("Confirm that ",img," exists") 85 | 86 | img = cv2.imread(img) 87 | 88 | return img 89 | 90 | def detect_face(img, detector_backend = 'opencv', grayscale = False, enforce_detection = True, align = True): 91 | 92 | img_region = [0, 0, img.shape[0], img.shape[1]] 93 | 94 | #if functions.preproces_face is called directly, then face_detector global variable might not been initialized. 95 | if not "face_detector" in globals(): 96 | initialize_detector(detector_backend = detector_backend) 97 | 98 | detected_face, img_region = FaceDetector.detect_face(face_detector, detector_backend, img, align) 99 | 100 | if (isinstance(detected_face, np.ndarray)): 101 | return detected_face, img_region 102 | else: 103 | if detected_face == None: 104 | if enforce_detection != True: 105 | return img, img_region 106 | else: 107 | raise ValueError("Face could not be detected. Please confirm that the picture is a face photo or consider to set enforce_detection param to False.") 108 | 109 | def preprocess_face(img, target_size=(224, 224), grayscale = False, enforce_detection = True, detector_backend = 'opencv', return_region = False, align = True): 110 | 111 | #img might be path, base64 or numpy array. Convert it to numpy whatever it is. 112 | img = load_image(img) 113 | base_img = img.copy() 114 | 115 | # img, region = detect_face(img = img, detector_backend = detector_backend, grayscale = grayscale, enforce_detection = enforce_detection, align = align) 116 | 117 | #-------------------------- 118 | 119 | if img.shape[0] == 0 or img.shape[1] == 0: 120 | if enforce_detection == True: 121 | raise ValueError("Detected face shape is ", img.shape,". Consider to set enforce_detection argument to False.") 122 | else: #restore base image 123 | img = base_img.copy() 124 | 125 | #-------------------------- 126 | 127 | #post-processing 128 | if grayscale == True: 129 | img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 130 | 131 | img = cv2.resize(img, target_size) 132 | #TODO: resize causes transformation on base image, you should add black pixels to rezie it to target_size 133 | 134 | img_pixels = image.img_to_array(img) 135 | img_pixels = np.expand_dims(img_pixels, axis = 0) 136 | img_pixels /= 255 #normalize input in [0, 1] 137 | 138 | if return_region == True: 139 | return img_pixels, region 140 | else: 141 | return img_pixels 142 | 143 | def find_input_shape(model): 144 | 145 | #face recognition models have different size of inputs 146 | #my environment returns (None, 224, 224, 3) but some people mentioned that they got [(None, 224, 224, 3)]. I think this is because of version issue. 147 | 148 | input_shape = model.layers[0].input_shape 149 | 150 | if type(input_shape) == list: 151 | input_shape = input_shape[0][1:3] 152 | else: 153 | input_shape = input_shape[1:3] 154 | 155 | if type(input_shape) == list: #issue 197: some people got array here instead of tuple 156 | input_shape = tuple(input_shape) 157 | 158 | return input_shape 159 | -------------------------------------------------------------------------------- /deepface/detectors/DlibWrapper.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import gdown 3 | import bz2 4 | import os 5 | 6 | def build_model(): 7 | 8 | home = str(Path.home()) 9 | 10 | import dlib #this requirement is not a must that's why imported here 11 | 12 | #check required file exists in the home/.deepface/weights folder 13 | if os.path.isfile(home+'/.deepface/weights/shape_predictor_5_face_landmarks.dat') != True: 14 | 15 | print("shape_predictor_5_face_landmarks.dat.bz2 is going to be downloaded") 16 | 17 | url = "http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2" 18 | output = home+'/.deepface/weights/'+url.split("/")[-1] 19 | 20 | gdown.download(url, output, quiet=False) 21 | 22 | zipfile = bz2.BZ2File(output) 23 | data = zipfile.read() 24 | newfilepath = output[:-4] #discard .bz2 extension 25 | open(newfilepath, 'wb').write(data) 26 | 27 | face_detector = dlib.get_frontal_face_detector() 28 | sp = dlib.shape_predictor(home+"/.deepface/weights/shape_predictor_5_face_landmarks.dat") 29 | 30 | detector = {} 31 | detector["face_detector"] = face_detector 32 | detector["sp"] = sp 33 | return detector 34 | 35 | def detect_face(detector, img, align = True): 36 | 37 | import dlib #this requirement is not a must that's why imported here 38 | 39 | home = str(Path.home()) 40 | 41 | sp = detector["sp"] 42 | 43 | detected_face = None 44 | img_region = [0, 0, img.shape[0], img.shape[1]] 45 | 46 | face_detector = detector["face_detector"] 47 | detections = face_detector(img, 1) 48 | 49 | if len(detections) > 0: 50 | 51 | for idx, d in enumerate(detections): 52 | left = d.left(); right = d.right() 53 | top = d.top(); bottom = d.bottom() 54 | detected_face = img[top:bottom, left:right] 55 | img_region = [left, top, right - left, bottom - top] 56 | break #get the first one 57 | 58 | if align: 59 | img_shape = sp(img, detections[0]) 60 | detected_face = dlib.get_face_chip(img, img_shape, size = detected_face.shape[0]) 61 | 62 | return detected_face, img_region 63 | -------------------------------------------------------------------------------- /deepface/detectors/FaceDetector.py: -------------------------------------------------------------------------------- 1 | from deepface.detectors import OpenCvWrapper, SsdWrapper, DlibWrapper, MtcnnWrapper, RetinaFaceWrapper 2 | from PIL import Image 3 | import math 4 | import numpy as np 5 | from deepface.commons import distance 6 | 7 | def build_model(detector_backend): 8 | 9 | backends = { 10 | 'opencv': OpenCvWrapper.build_model, 11 | 'ssd': SsdWrapper.build_model, 12 | 'dlib': DlibWrapper.build_model, 13 | 'mtcnn': MtcnnWrapper.build_model, 14 | 'retinaface': RetinaFaceWrapper.build_model 15 | } 16 | 17 | face_detector = backends.get(detector_backend) 18 | 19 | if face_detector: 20 | face_detector = face_detector() 21 | else: 22 | raise ValueError("invalid detector_backend passed - " + detector_backend) 23 | 24 | return face_detector 25 | 26 | def detect_face(face_detector, detector_backend, img, align = True): 27 | 28 | backends = { 29 | 'opencv': OpenCvWrapper.detect_face, 30 | 'ssd': SsdWrapper.detect_face, 31 | 'dlib': DlibWrapper.detect_face, 32 | 'mtcnn': MtcnnWrapper.detect_face, 33 | 'retinaface': RetinaFaceWrapper.detect_face 34 | } 35 | 36 | detect_face = backends.get(detector_backend) 37 | 38 | if detect_face: 39 | face, region = detect_face(face_detector, img, align) 40 | else: 41 | raise ValueError("invalid detector_backend passed - " + detector_backend) 42 | 43 | return face, region 44 | 45 | def alignment_procedure(img, left_eye, right_eye): 46 | 47 | #this function aligns given face in img based on left and right eye coordinates 48 | 49 | left_eye_x, left_eye_y = left_eye 50 | right_eye_x, right_eye_y = right_eye 51 | 52 | #----------------------- 53 | #find rotation direction 54 | 55 | if left_eye_y > right_eye_y: 56 | point_3rd = (right_eye_x, left_eye_y) 57 | direction = -1 #rotate same direction to clock 58 | else: 59 | point_3rd = (left_eye_x, right_eye_y) 60 | direction = 1 #rotate inverse direction of clock 61 | 62 | #----------------------- 63 | #find length of triangle edges 64 | 65 | a = distance.findEuclideanDistance(np.array(left_eye), np.array(point_3rd)) 66 | b = distance.findEuclideanDistance(np.array(right_eye), np.array(point_3rd)) 67 | c = distance.findEuclideanDistance(np.array(right_eye), np.array(left_eye)) 68 | 69 | #----------------------- 70 | 71 | #apply cosine rule 72 | 73 | if b != 0 and c != 0: #this multiplication causes division by zero in cos_a calculation 74 | 75 | cos_a = (b*b + c*c - a*a)/(2*b*c) 76 | angle = np.arccos(cos_a) #angle in radian 77 | angle = (angle * 180) / math.pi #radian to degree 78 | 79 | #----------------------- 80 | #rotate base image 81 | 82 | if direction == -1: 83 | angle = 90 - angle 84 | 85 | img = Image.fromarray(img) 86 | img = np.array(img.rotate(direction * angle)) 87 | 88 | #----------------------- 89 | 90 | return img #return img anyway 91 | -------------------------------------------------------------------------------- /deepface/detectors/MtcnnWrapper.py: -------------------------------------------------------------------------------- 1 | from mtcnn import MTCNN 2 | import cv2 3 | from deepface.detectors import FaceDetector 4 | 5 | def build_model(): 6 | face_detector = MTCNN() 7 | return face_detector 8 | 9 | def detect_face(face_detector, img, align = True): 10 | 11 | detected_face = None 12 | img_region = [0, 0, img.shape[0], img.shape[1]] 13 | 14 | img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #mtcnn expects RGB but OpenCV read BGR 15 | detections = face_detector.detect_faces(img_rgb) 16 | 17 | if len(detections) > 0: 18 | detection = detections[0] 19 | x, y, w, h = detection["box"] 20 | detected_face = img[int(y):int(y+h), int(x):int(x+w)] 21 | img_region = [x, y, w, h] 22 | 23 | keypoints = detection["keypoints"] 24 | left_eye = keypoints["left_eye"] 25 | right_eye = keypoints["right_eye"] 26 | 27 | if align: 28 | detected_face = FaceDetector.alignment_procedure(detected_face, left_eye, right_eye) 29 | 30 | return detected_face, img_region 31 | -------------------------------------------------------------------------------- /deepface/detectors/OpenCvWrapper.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import pandas as pd 4 | from deepface.detectors import FaceDetector 5 | 6 | def build_model(): 7 | 8 | detector ={} 9 | 10 | detector["face_detector"] = build_cascade('haarcascade') 11 | detector["eye_detector"] = build_cascade('haarcascade_eye') 12 | 13 | return detector 14 | 15 | def build_cascade(model_name = 'haarcascade'): 16 | opencv_path = get_opencv_path() 17 | 18 | if model_name == 'haarcascade': 19 | 20 | face_detector_path = opencv_path+"haarcascade_frontalface_default.xml" 21 | 22 | if os.path.isfile(face_detector_path) != True: 23 | raise ValueError("Confirm that opencv is installed on your environment! Expected path ",face_detector_path," violated.") 24 | 25 | 26 | face_detector = cv2.CascadeClassifier(face_detector_path) 27 | return face_detector 28 | 29 | elif model_name == 'haarcascade_eye': 30 | eye_detector_path = opencv_path+"haarcascade_eye.xml" 31 | 32 | if os.path.isfile(eye_detector_path) != True: 33 | raise ValueError("Confirm that opencv is installed on your environment! Expected path ",eye_detector_path," violated.") 34 | 35 | eye_detector = cv2.CascadeClassifier(eye_detector_path) 36 | return eye_detector 37 | 38 | def detect_face(detector, img, align = True): 39 | 40 | detected_face = None 41 | img_region = [0, 0, img.shape[0], img.shape[1]] 42 | 43 | faces = [] 44 | try: 45 | faces = detector["face_detector"].detectMultiScale(img, 1.3, 5) 46 | except: 47 | pass 48 | 49 | if len(faces) > 0: 50 | x,y,w,h = faces[0] #focus on the 1st face found in the image 51 | detected_face = img[int(y):int(y+h), int(x):int(x+w)] 52 | 53 | if align: 54 | detected_face = align_face(detector["eye_detector"], detected_face) 55 | img_region = [x, y, w, h] 56 | 57 | return detected_face, img_region 58 | 59 | def align_face(eye_detector, img): 60 | 61 | detected_face_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #eye detector expects gray scale image 62 | 63 | eyes = eye_detector.detectMultiScale(detected_face_gray) 64 | 65 | if len(eyes) >= 2: 66 | 67 | #find the largest 2 eye 68 | 69 | base_eyes = eyes[:, 2] 70 | 71 | items = [] 72 | for i in range(0, len(base_eyes)): 73 | item = (base_eyes[i], i) 74 | items.append(item) 75 | 76 | df = pd.DataFrame(items, columns = ["length", "idx"]).sort_values(by=['length'], ascending=False) 77 | 78 | eyes = eyes[df.idx.values[0:2]] #eyes variable stores the largest 2 eye 79 | 80 | #----------------------- 81 | #decide left and right eye 82 | 83 | eye_1 = eyes[0]; eye_2 = eyes[1] 84 | 85 | if eye_1[0] < eye_2[0]: 86 | left_eye = eye_1; right_eye = eye_2 87 | else: 88 | left_eye = eye_2; right_eye = eye_1 89 | 90 | #----------------------- 91 | #find center of eyes 92 | left_eye = (int(left_eye[0] + (left_eye[2] / 2)), int(left_eye[1] + (left_eye[3] / 2))) 93 | right_eye = (int(right_eye[0] + (right_eye[2]/2)), int(right_eye[1] + (right_eye[3]/2))) 94 | img = FaceDetector.alignment_procedure(img, left_eye, right_eye) 95 | return img #return img anyway 96 | 97 | def get_opencv_path(): 98 | opencv_home = cv2.__file__ 99 | folders = opencv_home.split(os.path.sep)[0:-1] 100 | 101 | path = folders[0] 102 | for folder in folders[1:]: 103 | path = path + "/" + folder 104 | 105 | return path+"/data/" 106 | -------------------------------------------------------------------------------- /deepface/detectors/RetinaFaceWrapper.py: -------------------------------------------------------------------------------- 1 | from retinaface import RetinaFace 2 | import cv2 3 | 4 | def build_model(): 5 | face_detector = RetinaFace.build_model() 6 | return face_detector 7 | 8 | def detect_face(face_detector, img, align = True): 9 | 10 | face = None 11 | img_region = [0, 0, img.shape[0], img.shape[1]] 12 | 13 | img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #retinaface expects RGB but OpenCV read BGR 14 | 15 | faces = RetinaFace.extract_faces(img_rgb, model = face_detector, align = align) 16 | 17 | if len(faces) > 0: 18 | face = faces[0][:, :, ::-1] 19 | 20 | return face, img_region 21 | -------------------------------------------------------------------------------- /deepface/detectors/SsdWrapper.py: -------------------------------------------------------------------------------- 1 | import gdown 2 | from pathlib import Path 3 | import os 4 | import cv2 5 | import pandas as pd 6 | 7 | from deepface.detectors import OpenCvWrapper 8 | 9 | def build_model(): 10 | 11 | home = str(Path.home()) 12 | 13 | #model structure 14 | if os.path.isfile(home+'/.deepface/weights/deploy.prototxt') != True: 15 | 16 | print("deploy.prototxt will be downloaded...") 17 | 18 | url = "https://github.com/opencv/opencv/raw/3.4.0/samples/dnn/face_detector/deploy.prototxt" 19 | 20 | output = home+'/.deepface/weights/deploy.prototxt' 21 | 22 | gdown.download(url, output, quiet=False) 23 | 24 | #pre-trained weights 25 | if os.path.isfile(home+'/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel') != True: 26 | 27 | print("res10_300x300_ssd_iter_140000.caffemodel will be downloaded...") 28 | 29 | url = "https://github.com/opencv/opencv_3rdparty/raw/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel" 30 | 31 | output = home+'/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel' 32 | 33 | gdown.download(url, output, quiet=False) 34 | 35 | face_detector = cv2.dnn.readNetFromCaffe( 36 | home+"/.deepface/weights/deploy.prototxt", 37 | home+"/.deepface/weights/res10_300x300_ssd_iter_140000.caffemodel" 38 | ) 39 | 40 | eye_detector = OpenCvWrapper.build_cascade("haarcascade_eye") 41 | 42 | detector = {} 43 | detector["face_detector"] = face_detector 44 | detector["eye_detector"] = eye_detector 45 | 46 | return detector 47 | 48 | def detect_face(detector, img, align = True): 49 | 50 | detected_face = None 51 | img_region = [0, 0, img.shape[0], img.shape[1]] 52 | 53 | ssd_labels = ["img_id", "is_face", "confidence", "left", "top", "right", "bottom"] 54 | 55 | target_size = (300, 300) 56 | 57 | base_img = img.copy() #we will restore base_img to img later 58 | 59 | original_size = img.shape 60 | 61 | img = cv2.resize(img, target_size) 62 | 63 | aspect_ratio_x = (original_size[1] / target_size[1]) 64 | aspect_ratio_y = (original_size[0] / target_size[0]) 65 | 66 | imageBlob = cv2.dnn.blobFromImage(image = img) 67 | 68 | face_detector = detector["face_detector"] 69 | face_detector.setInput(imageBlob) 70 | detections = face_detector.forward() 71 | 72 | detections_df = pd.DataFrame(detections[0][0], columns = ssd_labels) 73 | 74 | detections_df = detections_df[detections_df['is_face'] == 1] #0: background, 1: face 75 | detections_df = detections_df[detections_df['confidence'] >= 0.90] 76 | 77 | detections_df['left'] = (detections_df['left'] * 300).astype(int) 78 | detections_df['bottom'] = (detections_df['bottom'] * 300).astype(int) 79 | detections_df['right'] = (detections_df['right'] * 300).astype(int) 80 | detections_df['top'] = (detections_df['top'] * 300).astype(int) 81 | 82 | if detections_df.shape[0] > 0: 83 | 84 | #TODO: sort detections_df 85 | 86 | #get the first face in the image 87 | instance = detections_df.iloc[0] 88 | 89 | left = instance["left"] 90 | right = instance["right"] 91 | bottom = instance["bottom"] 92 | top = instance["top"] 93 | 94 | detected_face = base_img[int(top*aspect_ratio_y):int(bottom*aspect_ratio_y), int(left*aspect_ratio_x):int(right*aspect_ratio_x)] 95 | img_region = [int(left*aspect_ratio_x), int(top*aspect_ratio_y), int(right*aspect_ratio_x) - int(left*aspect_ratio_x), int(bottom*aspect_ratio_y) - int(top*aspect_ratio_y)] 96 | 97 | if align: 98 | detected_face = OpenCvWrapper.align_face(detector["eye_detector"], detected_face) 99 | 100 | return detected_face, img_region 101 | -------------------------------------------------------------------------------- /deepface/detectors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/detectors/__init__.py -------------------------------------------------------------------------------- /deepface/extendedmodels/Age.py: -------------------------------------------------------------------------------- 1 | from deepface.basemodels import VGGFace 2 | import os 3 | from pathlib import Path 4 | import gdown 5 | import numpy as np 6 | 7 | import tensorflow as tf 8 | tf_version = int(tf.__version__.split(".")[0]) 9 | 10 | if tf_version == 1: 11 | import keras 12 | from keras.models import Model, Sequential 13 | from keras.layers import Convolution2D, Flatten, Activation 14 | elif tf_version == 2: 15 | from tensorflow import keras 16 | from tensorflow.keras.models import Model, Sequential 17 | from tensorflow.keras.layers import Convolution2D, Flatten, Activation 18 | 19 | def loadModel(): 20 | 21 | model = VGGFace.baseModel() 22 | 23 | #-------------------------- 24 | 25 | classes = 101 26 | base_model_output = Sequential() 27 | base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output) 28 | base_model_output = Flatten()(base_model_output) 29 | base_model_output = Activation('softmax')(base_model_output) 30 | 31 | #-------------------------- 32 | 33 | age_model = Model(inputs=model.input, outputs=base_model_output) 34 | 35 | #-------------------------- 36 | 37 | #load weights 38 | 39 | home = str(Path.home()) 40 | 41 | if os.path.isfile(home+'/.deepface/weights/age_model_weights.h5') != True: 42 | print("age_model_weights.h5 will be downloaded...") 43 | 44 | url = 'https://drive.google.com/uc?id=1YCox_4kJ-BYeXq27uUbasu--yz28zUMV' 45 | output = home+'/.deepface/weights/age_model_weights.h5' 46 | gdown.download(url, output, quiet=False) 47 | 48 | age_model.load_weights(home+'/.deepface/weights/age_model_weights.h5') 49 | 50 | return age_model 51 | 52 | #-------------------------- 53 | 54 | def findApparentAge(age_predictions): 55 | output_indexes = np.array([i for i in range(0, 101)]) 56 | apparent_age = np.sum(age_predictions * output_indexes) 57 | return apparent_age -------------------------------------------------------------------------------- /deepface/extendedmodels/Emotion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import gdown 3 | from pathlib import Path 4 | import zipfile 5 | 6 | import tensorflow as tf 7 | tf_version = int(tf.__version__.split(".")[0]) 8 | 9 | if tf_version == 1: 10 | import keras 11 | from keras.models import Model, Sequential 12 | from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout 13 | elif tf_version == 2: 14 | from tensorflow import keras 15 | from tensorflow.keras.models import Model, Sequential 16 | from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout 17 | 18 | def loadModel(): 19 | 20 | num_classes = 7 21 | 22 | model = Sequential() 23 | 24 | #1st convolution layer 25 | model.add(Conv2D(64, (5, 5), activation='relu', input_shape=(48,48,1))) 26 | model.add(MaxPooling2D(pool_size=(5,5), strides=(2, 2))) 27 | 28 | #2nd convolution layer 29 | model.add(Conv2D(64, (3, 3), activation='relu')) 30 | model.add(Conv2D(64, (3, 3), activation='relu')) 31 | model.add(AveragePooling2D(pool_size=(3,3), strides=(2, 2))) 32 | 33 | #3rd convolution layer 34 | model.add(Conv2D(128, (3, 3), activation='relu')) 35 | model.add(Conv2D(128, (3, 3), activation='relu')) 36 | model.add(AveragePooling2D(pool_size=(3,3), strides=(2, 2))) 37 | 38 | model.add(Flatten()) 39 | 40 | #fully connected neural networks 41 | model.add(Dense(1024, activation='relu')) 42 | model.add(Dropout(0.2)) 43 | model.add(Dense(1024, activation='relu')) 44 | model.add(Dropout(0.2)) 45 | 46 | model.add(Dense(num_classes, activation='softmax')) 47 | 48 | #---------------------------- 49 | 50 | home = str(Path.home()) 51 | 52 | if os.path.isfile(home+'/.deepface/weights/facial_expression_model_weights.h5') != True: 53 | print("facial_expression_model_weights.h5 will be downloaded...") 54 | 55 | #TO-DO: upload weights to google drive 56 | 57 | #zip 58 | url = 'https://drive.google.com/uc?id=13iUHHP3SlNg53qSuQZDdHDSDNdBP9nwy' 59 | output = home+'/.deepface/weights/facial_expression_model_weights.zip' 60 | gdown.download(url, output, quiet=False) 61 | 62 | #unzip facial_expression_model_weights.zip 63 | with zipfile.ZipFile(output, 'r') as zip_ref: 64 | zip_ref.extractall(home+'/.deepface/weights/') 65 | 66 | model.load_weights(home+'/.deepface/weights/facial_expression_model_weights.h5') 67 | 68 | return model 69 | 70 | #---------------------------- 71 | 72 | return 0 -------------------------------------------------------------------------------- /deepface/extendedmodels/Gender.py: -------------------------------------------------------------------------------- 1 | from deepface.basemodels import VGGFace 2 | import os 3 | from pathlib import Path 4 | import gdown 5 | import numpy as np 6 | 7 | import tensorflow as tf 8 | tf_version = int(tf.__version__.split(".")[0]) 9 | 10 | if tf_version == 1: 11 | from keras.models import Model, Sequential 12 | from keras.layers import Convolution2D, Flatten, Activation 13 | elif tf_version == 2: 14 | from tensorflow.keras.models import Model, Sequential 15 | from tensorflow.keras.layers import Convolution2D, Flatten, Activation 16 | 17 | def loadModel(): 18 | 19 | model = VGGFace.baseModel() 20 | 21 | #-------------------------- 22 | 23 | classes = 2 24 | base_model_output = Sequential() 25 | base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output) 26 | base_model_output = Flatten()(base_model_output) 27 | base_model_output = Activation('softmax')(base_model_output) 28 | 29 | #-------------------------- 30 | 31 | gender_model = Model(inputs=model.input, outputs=base_model_output) 32 | 33 | #-------------------------- 34 | 35 | #load weights 36 | 37 | home = str(Path.home()) 38 | 39 | if os.path.isfile(home+'/.deepface/weights/gender_model_weights.h5') != True: 40 | print("gender_model_weights.h5 will be downloaded...") 41 | 42 | url = 'https://drive.google.com/uc?id=1wUXRVlbsni2FN9-jkS_f4UTUrm1bRLyk' 43 | output = home+'/.deepface/weights/gender_model_weights.h5' 44 | gdown.download(url, output, quiet=False) 45 | 46 | gender_model.load_weights(home+'/.deepface/weights/gender_model_weights.h5') 47 | 48 | return gender_model 49 | 50 | #-------------------------- -------------------------------------------------------------------------------- /deepface/extendedmodels/Race.py: -------------------------------------------------------------------------------- 1 | from deepface.basemodels import VGGFace 2 | 3 | import os 4 | from pathlib import Path 5 | import gdown 6 | import numpy as np 7 | import zipfile 8 | 9 | import tensorflow as tf 10 | tf_version = int(tf.__version__.split(".")[0]) 11 | 12 | if tf_version == 1: 13 | from keras.models import Model, Sequential 14 | from keras.layers import Convolution2D, Flatten, Activation 15 | elif tf_version == 2: 16 | from tensorflow.keras.models import Model, Sequential 17 | from tensorflow.keras.layers import Convolution2D, Flatten, Activation 18 | 19 | def loadModel(): 20 | 21 | model = VGGFace.baseModel() 22 | 23 | #-------------------------- 24 | 25 | classes = 6 26 | base_model_output = Sequential() 27 | base_model_output = Convolution2D(classes, (1, 1), name='predictions')(model.layers[-4].output) 28 | base_model_output = Flatten()(base_model_output) 29 | base_model_output = Activation('softmax')(base_model_output) 30 | 31 | #-------------------------- 32 | 33 | race_model = Model(inputs=model.input, outputs=base_model_output) 34 | 35 | #-------------------------- 36 | 37 | #load weights 38 | 39 | home = str(Path.home()) 40 | 41 | if os.path.isfile(home+'/.deepface/weights/race_model_single_batch.h5') != True: 42 | print("race_model_single_batch.h5 will be downloaded...") 43 | 44 | #zip 45 | url = 'https://drive.google.com/uc?id=1nz-WDhghGQBC4biwShQ9kYjvQMpO6smj' 46 | output = home+'/.deepface/weights/race_model_single_batch.zip' 47 | gdown.download(url, output, quiet=False) 48 | 49 | #unzip race_model_single_batch.zip 50 | with zipfile.ZipFile(output, 'r') as zip_ref: 51 | zip_ref.extractall(home+'/.deepface/weights/') 52 | 53 | race_model.load_weights(home+'/.deepface/weights/race_model_single_batch.h5') 54 | 55 | return race_model 56 | 57 | #-------------------------- 58 | -------------------------------------------------------------------------------- /deepface/extendedmodels/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/extendedmodels/__init__.py -------------------------------------------------------------------------------- /deepface/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/deepface/models/__init__.py -------------------------------------------------------------------------------- /detection.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/detection.txt -------------------------------------------------------------------------------- /evaluation.py: -------------------------------------------------------------------------------- 1 | import motmetrics as mm 2 | import numpy as np 3 | import os 4 | from absl import app, flags, logging 5 | from absl.flags import FLAGS 6 | 7 | """ 8 | python evaluation.py \ 9 | --gt_file_path ./resources/gt/T-ara_gt.txt \ 10 | --pred_file_path ./resources/gt/T-ara_pred.txt 11 | 12 | python evaluation.py \ 13 | --gt_file_path ./resources/gt/GirlsAloud_gt.txt \ 14 | --pred_file_path ./resources/gt/GirlsAloud_pred.txt 15 | 16 | python evaluation.py \ 17 | --gt_file_path ./resources/gt/Darling_gt.txt \ 18 | --pred_file_path ./resources/gt/Darling_pred.txt 19 | 20 | python evaluation.py \ 21 | --gt_file_path ./resources/gt/Westlife_gt.txt \ 22 | --pred_file_path ./resources/gt/Westlife_pred.txt 23 | 24 | python evaluation.py \ 25 | --gt_file_path ./resources/gt/BrunoMars_gt.txt \ 26 | --pred_file_path ./resources/gt/BrunoMars_pred.txt 27 | 28 | python evaluation.py \ 29 | --gt_file_path ./resources/gt/HelloBubble_gt.txt \ 30 | --pred_file_path ./resources/gt/HelloBubble_pred.txt 31 | 32 | python evaluation.py \ 33 | --gt_file_path ./resources/gt/Apink_gt.txt \ 34 | --pred_file_path ./resources/gt/Apink_pred.txt 35 | """ 36 | 37 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to gt txt') 38 | flags.DEFINE_string('pred_file_path', './resources/gt/T-ara_pred.txt', 'path to predicted txt') 39 | 40 | def main(args): 41 | 42 | # home = os.getcwd() 43 | # gt_path = os.path.join(home, "resources", "gt") 44 | 45 | # gt_file_path = os.path.join(gt_path, "T-ara_gt.txt") 46 | # pred_file_path = os.path.join(gt_path, "T-ara_pred.txt") 47 | 48 | f = open(FLAGS.gt_file_path, "r") 49 | gt = [] 50 | while True: 51 | line = f.readline() 52 | if not line: break 53 | a = list(map(int, line.split())) 54 | gt.append(a) 55 | gt = np.asarray(gt) 56 | f.close() 57 | 58 | f = open(FLAGS.pred_file_path, "r") 59 | pred = [] 60 | while True: 61 | line = f.readline() 62 | if not line: break 63 | a = list(map(int, line.split())) 64 | pred.append(a) 65 | pred = np.asarray(pred) 66 | f.close() 67 | 68 | acc = mm.MOTAccumulator(auto_id=True) 69 | frame_idx = 0 70 | count = 0 71 | max_index = max(max(gt[:, 0]), max(pred[:, 0])) 72 | 73 | while frame_idx <= max_index: 74 | frame_idx += 1 75 | 76 | gt_indexs = gt[:, 0] 77 | pred_indexs = pred[:, 0] 78 | 79 | mask1 = frame_idx == gt_indexs 80 | mask2 = frame_idx == pred_indexs 81 | 82 | # if not gt[mask1].shape[0] and not pred[mask2].shape[0]: 83 | # break 84 | 85 | # gt_ids = sorted(list(set(gt[mask1][:, 1]))) 86 | # pred_ids = sorted(list(set(pred[mask2][:, 1]))) 87 | 88 | gt_ids = gt[mask1][:, 1] 89 | pred_ids = pred[mask2][:, 1] 90 | # print(gt_ids) 91 | # print(pred_ids) 92 | 93 | a = gt[mask1][:, 2:] 94 | b = pred[mask2][:, 2:] 95 | # print(mm.distances.iou_matrix(a, b, max_iou=0.5)) 96 | 97 | f = acc.update( 98 | gt_ids, 99 | pred_ids, 100 | mm.distances.iou_matrix(a, b, max_iou=0.5) 101 | ) 102 | # print(mm.distances.iou_matrix(a, b, max_iou=0.5)) 103 | # print(acc.mot_events.loc[f]) 104 | 105 | 106 | mh = mm.metrics.create() 107 | custom_metric = [ 108 | "num_frames", 109 | "obj_frequencies", 110 | "pred_frequencies", 111 | "num_matches", 112 | "num_switches", 113 | "num_transfer", 114 | "num_ascend", 115 | "num_migrate", 116 | "num_false_positives", 117 | "num_misses", 118 | "num_detections", 119 | "num_objects", 120 | "num_predictions", 121 | "num_unique_objects", 122 | "track_ratios", 123 | "mostly_tracked", 124 | "partially_tracked", 125 | "mostly_lost", 126 | "num_fragmentations", 127 | "motp", 128 | "mota", 129 | "precision", 130 | "recall", 131 | ] 132 | summary = mh.compute_many( 133 | [acc, acc.mot_events], 134 | metrics=mm.metrics.motchallenge_metrics, 135 | ) 136 | 137 | strsummary = mm.io.render_summary( 138 | summary, 139 | formatters=mh.formatters, 140 | namemap=mm.io.motchallenge_metric_names 141 | ) 142 | 143 | print(strsummary) 144 | 145 | 146 | if __name__ == '__main__': 147 | try: 148 | app.run(main) 149 | except SystemExit: 150 | pass 151 | 152 | -------------------------------------------------------------------------------- /generate_face.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import numpy as np 4 | from absl import app, flags, logging 5 | from absl.flags import FLAGS 6 | 7 | 8 | 9 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to crop gt file') 10 | flags.DEFINE_string('video_file_path', './resources/video/in/T-ara.mov', 'path to video file') 11 | flags.DEFINE_string('face_data_path', './resources/database/T-ara', 'path to video file') 12 | 13 | """ 14 | python generate_face.py \ 15 | --gt_file_path ./resources/gt/T-ara_gt.txt \ 16 | --video_file_path ./resources/video/in/T-ara.mov \ 17 | --face_data_path ./resources/database/T-ara 18 | 19 | python generate_face.py \ 20 | --gt_file_path ./resources/gt/GirlsAloud_gt.txt \ 21 | --video_file_path ./resources/video/in/GirlsAloud.mp4 \ 22 | --face_data_path ./resources/database/GirlsAloud 23 | 24 | python generate_face.py \ 25 | --gt_file_path ./resources/gt/Darling_gt.txt \ 26 | --video_file_path ./resources/video/in/Darling.mp4 \ 27 | --face_data_path ./resources/database/Darling 28 | 29 | python generate_face.py \ 30 | --gt_file_path ./resources/gt/Westlife_gt.txt \ 31 | --video_file_path ./resources/video/in/Westlife.mp4 \ 32 | --face_data_path ./resources/database/Westlife 33 | 34 | python generate_face.py \ 35 | --gt_file_path ./resources/gt/BrunoMars_gt.txt \ 36 | --video_file_path ./resources/video/in/BrunoMars.mp4 \ 37 | --face_data_path ./resources/database/BrunoMars 38 | 39 | python generate_face.py \ 40 | --gt_file_path ./resources/gt/HelloBubble_gt.txt \ 41 | --video_file_path ./resources/video/in/HelloBubble.mp4 \ 42 | --face_data_path ./resources/database/HelloBubble 43 | 44 | python generate_face.py \ 45 | --gt_file_path ./resources/gt/Apink_gt.txt \ 46 | --video_file_path ./resources/video/in/Apink.mp4 \ 47 | --face_data_path ./resources/database/Apink 48 | 49 | """ 50 | def main(args): 51 | f = open(FLAGS.gt_file_path, "r") 52 | detections = [] 53 | while True: 54 | line = f.readline() 55 | if not line: break 56 | a = list(map(int, line.split())) 57 | detections.append(a) 58 | detections = np.asarray(detections) 59 | f.close() 60 | 61 | if not os.path.isdir(FLAGS.face_data_path): 62 | os.mkdir(FLAGS.face_data_path) 63 | 64 | vid = cv2.VideoCapture(FLAGS.video_file_path) 65 | frame_index = -1 66 | count = 0 67 | frame_indices = detections[:, 0].astype(np.int) 68 | 69 | object_dict = dict() 70 | 71 | while True: 72 | frame_index += 1 73 | print(f'{frame_index} frame is working on...') 74 | _, img = vid.read() 75 | 76 | if img is None: 77 | logging.warning("Empty Frame") 78 | count+=1 79 | if count < 3: 80 | continue 81 | else: 82 | break 83 | 84 | mask = frame_indices == frame_index 85 | 86 | 87 | 88 | for row in detections[mask]: 89 | frame, id, bbox = row[0], row[1], row[2:] 90 | 91 | if object_dict.get(id): 92 | file_name = object_dict[id] 93 | object_dict[id] += 1 94 | else: 95 | object_dict[id] = 1 96 | file_name = object_dict[id] 97 | 98 | if object_dict[id] % 10 != 0: 99 | continue 100 | 101 | # target_aspect = float(img.shape[1]) / img.shape[0] 102 | # new_width = target_aspect * bbox[3] 103 | # bbox[0] -= (new_width - bbox[2]) / 2 104 | # bbox[2] = new_width 105 | bbox[2:] += bbox[:2] 106 | bbox = bbox.astype(np.int) 107 | 108 | bbox[:2] = np.maximum(0, bbox[:2]) 109 | bbox[2:] = np.minimum(np.asarray(img.shape[:2][::-1]) - 1, bbox[2:]) 110 | 111 | sx, sy, ex, ey = bbox 112 | # print(bbox) 113 | # print(img.shape) 114 | image = img[sy:ey, sx:ex] 115 | 116 | output_path = os.path.join(FLAGS.face_data_path, str(id)) 117 | if not os.path.isdir(output_path): 118 | os.mkdir(output_path) 119 | 120 | cv2.imwrite(os.path.join(FLAGS.face_data_path, str(id), str(object_dict[id])+".jpg"), image) 121 | 122 | 123 | 124 | # frame_indices = detection_mat[:, 0].astype(np.int) 125 | # mask = frame_indices == frame_idx 126 | 127 | # detection_list = [] 128 | # for row in detection_mat[mask]: 129 | # bbox, confidence, feature = row[2:6], row[6], row[10:] 130 | # if bbox[3] < min_height: 131 | # continue 132 | # detection_list.append(Detection(bbox, confidence, feature)) 133 | # return detection_list 134 | 135 | 136 | 137 | if __name__ == '__main__': 138 | try: 139 | app.run(main) 140 | except SystemExit: 141 | pass 142 | 143 | 144 | -------------------------------------------------------------------------------- /object_tracker copy.py: -------------------------------------------------------------------------------- 1 | import time 2 | from absl import app, flags, logging 3 | from absl.flags import FLAGS 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | import cv2 8 | import tensorflow as tf 9 | import matplotlib.pyplot as plt 10 | from yolov3_tf2.models import ( 11 | YoloV3, YoloV3Tiny 12 | ) 13 | from yolov3_tf2.dataset import transform_images 14 | from yolov3_tf2.utils import draw_outputs, convert_boxes 15 | 16 | from deep_sort import preprocessing 17 | from deep_sort import nn_matching 18 | from deep_sort.detection import Detection 19 | from deep_sort.tracker import Tracker 20 | from tools import generate_detections as gdet 21 | from PIL import Image 22 | 23 | 24 | flags.DEFINE_string('classes', './model_data/labels/coco.names', 'path to classes file') 25 | flags.DEFINE_string('weights', './weights/yolov3.tf', 26 | 'path to weights file') 27 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny') 28 | flags.DEFINE_integer('size', 416, 'resize images to') 29 | flags.DEFINE_string('video', './resources/video/in/test.mp4', 30 | 'path to video file or number for webcam)') 31 | flags.DEFINE_string('output', None, 'path to output video') 32 | flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file') 33 | flags.DEFINE_integer('num_classes', 80, 'number of classes in the model') 34 | 35 | 36 | def main(_argv): 37 | # Definition of the parameters 38 | max_cosine_distance = 0.5 39 | nn_budget = None 40 | nms_max_overlap = 1.0 41 | 42 | #initialize deep sort 43 | model_filename = 'weights/mars-small128.pb' 44 | encoder = gdet.create_box_encoder(model_filename, batch_size=128) 45 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 46 | tracker = Tracker(metric) 47 | 48 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 49 | for physical_device in physical_devices: 50 | tf.config.experimental.set_memory_growth(physical_device, True) 51 | 52 | if FLAGS.tiny: 53 | yolo = YoloV3Tiny(classes=FLAGS.num_classes) 54 | else: 55 | yolo = YoloV3(classes=FLAGS.num_classes) 56 | 57 | yolo.load_weights(FLAGS.weights) 58 | logging.info('weights loaded') 59 | 60 | class_names = [c.strip() for c in open(FLAGS.classes).readlines()] 61 | logging.info('classes loaded') 62 | 63 | times = [] 64 | 65 | try: 66 | vid = cv2.VideoCapture(int(FLAGS.video)) 67 | except: 68 | vid = cv2.VideoCapture(FLAGS.video) 69 | 70 | out = None 71 | 72 | if FLAGS.output: 73 | # by default VideoCapture returns float instead of int 74 | width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) 75 | height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) 76 | fps = int(vid.get(cv2.CAP_PROP_FPS)) 77 | codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) 78 | out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) 79 | list_file = open('detection.txt', 'w') 80 | frame_index = -1 81 | 82 | fps = 0.0 83 | count = 0 84 | 85 | while True: 86 | _, img = vid.read() 87 | 88 | if img is None: 89 | logging.warning("Empty Frame") 90 | time.sleep(0.1) 91 | count+=1 92 | if count < 3: 93 | continue 94 | else: 95 | break 96 | 97 | img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 98 | img_in = tf.expand_dims(img_in, 0) 99 | img_in = transform_images(img_in, FLAGS.size) 100 | 101 | t1 = time.time() 102 | boxes, scores, classes, nums = yolo.predict(img_in) 103 | # print(boxes, scores, classes, nums) 104 | t2 = time.time() 105 | times.append(t2-t1) 106 | print(f'yolo predict time : {t2-t1}') 107 | times = times[-20:] 108 | 109 | t3 = time.time() 110 | ############# 111 | classes = classes[0] 112 | names = [] 113 | for i in range(len(classes)): 114 | names.append(class_names[int(classes[i])]) 115 | names = np.array(names) 116 | converted_boxes = convert_boxes(img, boxes[0]) 117 | features = encoder(img, converted_boxes) 118 | detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)] 119 | 120 | t4 = time.time() 121 | print(f'feature generation time : {t4-t3}') 122 | 123 | #initialize color map 124 | cmap = plt.get_cmap('tab20b') 125 | colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] 126 | 127 | # run non-maxima suppresion 128 | boxs = np.array([d.tlwh for d in detections]) 129 | scores = np.array([d.confidence for d in detections]) 130 | classes = np.array([d.class_name for d in detections]) 131 | indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) 132 | detections = [detections[i] for i in indices] 133 | 134 | t5 = time.time() 135 | # Call the tracker 136 | tracker.predict() 137 | tracker.update(detections) 138 | t6 = time.time() 139 | print(f'tracking time : {t6-t5}') 140 | 141 | for track in tracker.tracks: 142 | if not track.is_confirmed() or track.time_since_update > 1: 143 | continue 144 | bbox = track.to_tlbr() 145 | class_name = track.get_class() 146 | color = colors[int(track.track_id) % len(colors)] 147 | color = [i * 255 for i in color] 148 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) 149 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id)))*17, int(bbox[1])), color, -1) 150 | cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) 151 | 152 | ####### 153 | fps = ( fps + (1./(time.time()-t1)) ) / 2 154 | # img = draw_outputs(img, (boxes, scores, classes, nums), class_names) 155 | # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30), 156 | # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) 157 | img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), 158 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2) 159 | if FLAGS.output: 160 | out.write(img) 161 | frame_index = frame_index + 1 162 | list_file.write(str(frame_index)+' ') 163 | if len(converted_boxes) != 0: 164 | for i in range(0,len(converted_boxes)): 165 | list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ') 166 | list_file.write('\n') 167 | cv2.imshow('output', img) 168 | if cv2.waitKey(1) == ord('q'): 169 | break 170 | 171 | cv2.destroyAllWindows() 172 | 173 | 174 | if __name__ == '__main__': 175 | try: 176 | app.run(main) 177 | except SystemExit: 178 | pass 179 | -------------------------------------------------------------------------------- /object_tracker.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | from absl import app, flags, logging 4 | from absl.flags import FLAGS 5 | 6 | from mtcnn import MTCNN 7 | import tensorflow as tf 8 | import numpy as np 9 | import cv2 10 | import matplotlib.pyplot as plt 11 | from yolov3_tf2.models import ( 12 | YoloV3, YoloV3Tiny 13 | ) 14 | from yolov3_tf2.dataset import transform_images 15 | from yolov3_tf2.utils import draw_outputs, convert_boxes 16 | 17 | from deep_sort import preprocessing 18 | from deep_sort import nn_matching 19 | from deep_sort.detection import Detection 20 | from deep_sort.tracker import Tracker 21 | from tools import generate_detections as gdet 22 | from PIL import Image 23 | 24 | gpus = tf.config.experimental.list_physical_devices('GPU') 25 | if gpus: 26 | try: 27 | # Currently, memory growth needs to be the same across GPUs 28 | for gpu in gpus: 29 | tf.config.experimental.set_memory_growth(gpu, True) 30 | logical_gpus = tf.config.experimental.list_logical_devices('GPU') 31 | print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") 32 | except RuntimeError as e: 33 | # Memory growth must be set before GPUs have been initialized 34 | print(e) 35 | 36 | """ 37 | python object_tracker.py \ 38 | --classes ./model_data/labels/widerface.names \ 39 | --video 0 \ 40 | --weights ./weights/yolov3-wider_16000.tf \ 41 | --num_classes 1 \ 42 | --output_format MP4V \ 43 | --output ./resources/video/out/myface.mp4 \ 44 | 45 | python object_tracker.py \ 46 | --classes ./model_data/labels/widerface.names \ 47 | --video ./resources/video/in/2.mp4 \ 48 | --weights ./weights/yolov3-wider_16000.tf \ 49 | --output_format MP4V \ 50 | --database ./resources/database/2 \ 51 | --output ./resources/video/out/2.mp4 \ 52 | --num_classes 1 \ 53 | --max_face_threshold 0.6871912959056619 54 | 55 | python object_tracker.py \ 56 | --classes ./model_data/labels/widerface.names \ 57 | --video ./resources/video/in/T-ara.mov \ 58 | --weights ./weights/yolov3-wider_16000.tf \ 59 | --output_format MP4V \ 60 | --database ./resources/database/T-ara \ 61 | --output ./resources/video/out/T-ara.mp4 \ 62 | --num_classes 1 \ 63 | --max_face_threshold 0.6871912959056619 \ 64 | --eval ./resources/gt/T-ara_pred.txt 65 | 66 | python object_tracker.py \ 67 | --classes ./model_data/labels/widerface.names \ 68 | --video ./resources/video/in/BrunoMars.mp4 \ 69 | --weights ./weights/yolov3-wider_16000.tf \ 70 | --output_format MP4V \ 71 | --database ./resources/database/BrunoMars \ 72 | --output ./resources/video/out/BrunoMars.mp4 \ 73 | --num_classes 1 \ 74 | --max_face_threshold 0.6871912959056619 \ 75 | --eval ./resources/gt/BrunoMars_pred.txt 76 | 77 | python object_tracker.py \ 78 | --classes ./model_data/labels/widerface.names \ 79 | --video ./resources/video/in/Darling.mp4 \ 80 | --weights ./weights/yolov3-wider_16000.tf \ 81 | --output_format MP4V \ 82 | --database ./resources/database/Darling \ 83 | --output ./resources/video/out/Darling.mp4 \ 84 | --num_classes 1 \ 85 | --max_face_threshold 0.6871912959056619 \ 86 | --eval ./resources/gt/Darling_pred.txt 87 | 88 | python object_tracker.py \ 89 | --classes ./model_data/labels/widerface.names \ 90 | --video ./resources/video/in/GirlsAloud.mp4 \ 91 | --weights ./weights/yolov3-wider_16000.tf \ 92 | --output_format MP4V \ 93 | --database ./resources/database/GirlsAloud \ 94 | --output ./resources/video/out/GirlsAloud.mp4 \ 95 | --num_classes 1 \ 96 | --max_face_threshold 0.6871912959056619 \ 97 | --eval ./resources/gt/GirlsAloud_pred.txt 98 | 99 | python object_tracker.py \ 100 | --classes ./model_data/labels/widerface.names \ 101 | --video ./resources/video/in/HelloBubble.mp4 \ 102 | --weights ./weights/yolov3-wider_16000.tf \ 103 | --output_format MP4V \ 104 | --database ./resources/database/HelloBubble \ 105 | --output ./resources/video/out/HelloBubble.mp4 \ 106 | --num_classes 1 \ 107 | --max_face_threshold 0.6871912959056619 \ 108 | --eval ./resources/gt/HelloBubble_pred.txt 109 | 110 | python object_tracker.py \ 111 | --classes ./model_data/labels/widerface.names \ 112 | --video ./resources/video/in/Westlife.mp4 \ 113 | --weights ./weights/yolov3-wider_16000.tf \ 114 | --output_format MP4V \ 115 | --database ./resources/database/Westlife \ 116 | --output ./resources/video/out/Westlife.mp4 \ 117 | --num_classes 1 \ 118 | --max_face_threshold 0.6871912959056619 \ 119 | --eval ./resources/gt/Westlife_pred.txt 120 | 121 | python object_tracker.py \ 122 | --classes ./model_data/labels/widerface.names \ 123 | --video ./resources/video/in/Apink.mp4 \ 124 | --weights ./weights/yolov3-wider_16000.tf \ 125 | --output_format MP4V \ 126 | --database ./resources/database/Apink \ 127 | --output ./resources/video/out/Apink.mp4 \ 128 | --num_classes 1 \ 129 | --max_face_threshold 0.6871912959056619 \ 130 | --eval ./resources/gt/Apink_pred.txt 131 | """ 132 | 133 | 134 | flags.DEFINE_string('classes', './model_data/labels/widerface.names', 'path to classes file') 135 | flags.DEFINE_string('weights', './weights/yolov3-wider_16000.tf', 136 | 'path to weights file') 137 | flags.DEFINE_boolean('tiny', False, 'yolov3 or yolov3-tiny') 138 | flags.DEFINE_integer('size', 416, 'resize images to') 139 | flags.DEFINE_string('video', './resources/video/in/1.mp4', 140 | 'path to video file or number for webcam)') 141 | flags.DEFINE_string('database', './resources/database/1', 142 | 'path to database file for identification)') 143 | flags.DEFINE_string('output', './resources/video/out/1.mp4', 'path to output video') 144 | flags.DEFINE_string('output_format', 'MP4V', 'codec used in VideoWriter when saving video to file') 145 | flags.DEFINE_integer('num_classes', 1, 'number of classes in the model') 146 | flags.DEFINE_float('max_face_threshold', 0.6871912959056619, 'face threshold') 147 | flags.DEFINE_string('eval', "./resources/gt/1_pred.txt", 'txt file path for evaluation') 148 | 149 | 150 | def main(_argv): 151 | # set present path 152 | home = os.getcwd() 153 | 154 | # Definition of the parameters 155 | max_cosine_distance = 0.5 156 | nn_budget = None 157 | nms_max_overlap = 1.0 158 | 159 | #initialize deep sort 160 | # model_filename = 'weights/mars-small128.pb' 161 | model_filename = os.path.join(home, "weights", "arcface_weights.h5") 162 | encoder = gdet.create_box_encoder(model_filename, batch_size=128) 163 | metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) 164 | tracker = Tracker(metric) 165 | 166 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 167 | for physical_device in physical_devices: 168 | tf.config.experimental.set_memory_growth(physical_device, True) 169 | 170 | if FLAGS.tiny: 171 | yolo = YoloV3Tiny(classes=FLAGS.num_classes) 172 | else: 173 | yolo = YoloV3(classes=FLAGS.num_classes) 174 | 175 | yolo.load_weights(FLAGS.weights) 176 | logging.info('weights loaded') 177 | 178 | class_names = [c.strip() for c in open(FLAGS.classes).readlines()] 179 | logging.info('classes loaded') 180 | 181 | times = [] 182 | 183 | 184 | # Database 생성 185 | face_db = dict() 186 | 187 | db_path = FLAGS.database 188 | for name in os.listdir(db_path): 189 | name_path = os.path.join(db_path, name) 190 | name_db = [] 191 | for i in os.listdir(name_path): 192 | if i.split(".")[1] != "jpg": continue 193 | id_path = os.path.join(name_path, i) 194 | img = cv2.imread(id_path) 195 | # img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 196 | # img_in = tf.expand_dims(img_in, 0) 197 | # img_in = transform_images(img_in, FLAGS.size) 198 | # boxes, scores, classes, nums = yolo.predict(img_in) 199 | boxes = np.asarray([[0, 0, img.shape[0], img.shape[1]]]) 200 | scores = np.asarray([[1]]) 201 | converted_boxes = convert_boxes(img, boxes, scores) 202 | features = encoder(img, converted_boxes) 203 | 204 | if features.shape[0] == 0: continue 205 | 206 | for f in range(features.shape[0]): 207 | name_db.append(features[f,:]) 208 | name_db = np.asarray(name_db) 209 | face_db[name] = dict({"used": False, "db": name_db}) 210 | 211 | 212 | try: 213 | vid = cv2.VideoCapture(int(FLAGS.video)) 214 | except: 215 | vid = cv2.VideoCapture(FLAGS.video) 216 | 217 | out = None 218 | 219 | if FLAGS.output: 220 | # by default VideoCapture returns float instead of int 221 | width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)) 222 | height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT)) 223 | fps = int(vid.get(cv2.CAP_PROP_FPS)) 224 | codec = cv2.VideoWriter_fourcc(*FLAGS.output_format) 225 | out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height)) 226 | list_file = open('detection.txt', 'w') 227 | frame_index = -1 228 | 229 | fps = 0.0 230 | count = 0 231 | 232 | detection_list = [] 233 | 234 | while True: 235 | _, img = vid.read() 236 | 237 | if img is None: 238 | logging.warning("Empty Frame") 239 | time.sleep(0.1) 240 | count+=1 241 | if count < 3: 242 | continue 243 | else: 244 | break 245 | 246 | img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 247 | img_in = tf.expand_dims(img_in, 0) 248 | img_in = transform_images(img_in, FLAGS.size) 249 | 250 | t1 = time.time() 251 | boxes, scores, classes, nums = yolo.predict(img_in) 252 | 253 | # print(boxes, scores, classes, nums) 254 | # time.sleep(5) 255 | t2 = time.time() 256 | times.append(t2-t1) 257 | print(f'yolo predict time : {t2-t1}') 258 | times = times[-20:] 259 | 260 | t3 = time.time() 261 | ############# 262 | classes = classes[0] 263 | names = [] 264 | for i in range(len(classes)): 265 | names.append(class_names[int(classes[i])]) 266 | names = np.array(names) 267 | converted_boxes = convert_boxes(img, boxes[0], scores[0]) 268 | features = encoder(img, converted_boxes) 269 | detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in zip(converted_boxes, scores[0], names, features)] 270 | 271 | t4 = time.time() 272 | print(f'feature generation time : {t4-t3}') 273 | 274 | #initialize color map 275 | cmap = plt.get_cmap('tab20b') 276 | colors = [cmap(i)[:3] for i in np.linspace(0, 1, 20)] 277 | 278 | # run non-maxima suppresion 279 | boxs = np.array([d.tlwh for d in detections]) 280 | scores = np.array([d.confidence for d in detections]) 281 | classes = np.array([d.class_name for d in detections]) 282 | indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores) 283 | detections = [detections[i] for i in indices] 284 | 285 | t5 = time.time() 286 | # Call the tracker 287 | tracker.predict() 288 | # tracker.update(detections) 289 | tracker.update(detections, face_db, FLAGS.max_face_threshold) 290 | t6 = time.time() 291 | print(f'tracking time : {t6-t5}') 292 | 293 | frame_index = frame_index + 1 294 | for track in tracker.tracks: 295 | if not track.is_confirmed() or track.time_since_update > 1: 296 | continue 297 | bbox = track.to_tlbr() 298 | class_name = track.get_class() 299 | face_name = track.get_face_name() 300 | color = colors[int(track.track_id) % len(colors)] 301 | color = [i * 255 for i in color] 302 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), color, 2) 303 | cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)+len(str(track.track_id))+len(str(face_name)))*23, int(bbox[1])), color, -1) 304 | # cv2.putText(img, class_name + face_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) 305 | cv2.putText(img, class_name + "-" + str(track.track_id) + "-" + face_name, (int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) 306 | # cv2.putText(img, class_name + "-" + str(track.track_id),(int(bbox[0]), int(bbox[1]-10)),0, 0.75, (255,255,255),2) 307 | # print(class_name + "-" + str(track.track_id)) 308 | 309 | # detection_list.append(dict({"frame_no": str(frame_index), "id": str(track.track_id), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))})) 310 | if face_name != "": 311 | detection_list.append(dict({"frame_no": str(frame_index), "id": str(face_name), "x": str(int(bbox[0])), "y": str(int(bbox[1])), "width": str(int(bbox[2])-int(bbox[0])), "height": str(int(bbox[3])-int(bbox[1]))})) 312 | ####### 313 | fps = ( fps + (1./(time.time()-t1)) ) / 2 314 | # img = draw_outputs(img, (boxes, scores, classes, nums), class_names) 315 | # img = cv2.putText(img, "Time: {:.2f}ms".format(sum(times)/len(times)*1000), (0, 30), 316 | # cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) 317 | img = cv2.putText(img, "FPS: {:.2f}".format(fps), (0, 30), 318 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (20, 20, 255), 2) 319 | if FLAGS.output: 320 | out.write(img) 321 | # frame_index = frame_index + 1 322 | # list_file.write(str(frame_index)+' ') 323 | # if len(converted_boxes) != 0: 324 | # for i in range(0,len(converted_boxes)): 325 | # list_file.write(str(converted_boxes[i][0]) + ' '+str(converted_boxes[i][1]) + ' '+str(converted_boxes[i][2]) + ' '+str(converted_boxes[i][3]) + ' ') 326 | # list_file.write('\n') 327 | cv2.imshow('output', img) 328 | if cv2.waitKey(1) == ord('q'): 329 | break 330 | 331 | cv2.destroyAllWindows() 332 | 333 | 334 | frame_list = sorted(detection_list, key= lambda x: (int(x["frame_no"]), int(x["id"]))) 335 | # pprint.pprint(frame_list) 336 | 337 | f = open(FLAGS.eval, "w") 338 | for a in frame_list: 339 | f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n") 340 | # 파일 닫기 341 | f.close() 342 | 343 | 344 | if __name__ == '__main__': 345 | try: 346 | app.run(main) 347 | except SystemExit: 348 | pass 349 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.12.0 2 | argon2-cffi @ file:///C:/ci/argon2-cffi_1613038019788/work 3 | astor==0.8.1 4 | astunparse==1.6.3 5 | async-generator==1.10 6 | atomicwrites==1.4.0 7 | attrs @ file:///tmp/build/80754af9/attrs_1604765588209/work 8 | backcall @ file:///home/ktietz/src/ci/backcall_1611930011877/work 9 | bleach @ file:///tmp/build/80754af9/bleach_1612211392645/work 10 | cached-property==1.5.2 11 | cachetools==4.2.1 12 | certifi==2020.12.5 13 | cffi @ file:///C:/ci/cffi_1613247308275/work 14 | chardet==4.0.0 15 | colorama @ file:///tmp/build/80754af9/colorama_1607707115595/work 16 | cycler==0.10.0 17 | decorator @ file:///tmp/build/80754af9/decorator_1617916966915/work 18 | defusedxml @ file:///tmp/build/80754af9/defusedxml_1615228127516/work 19 | entrypoints==0.3 20 | filelock==3.0.12 21 | flake8==3.9.2 22 | flake8-import-order==0.18.1 23 | flatbuffers==1.12 24 | gast==0.3.3 25 | gdown==3.13.0 26 | google-auth==1.28.0 27 | google-auth-oauthlib==0.4.4 28 | google-pasta==0.2.0 29 | grpcio==1.32.0 30 | h5py==2.10.0 31 | idna==2.10 32 | imageio==2.9.0 33 | importlib-metadata @ file:///C:/ci/importlib-metadata_1617877486026/work 34 | iniconfig==1.1.1 35 | ipykernel @ file:///C:/ci/ipykernel_1596208728219/work/dist/ipykernel-5.3.4-py3-none-any.whl 36 | ipython @ file:///C:/ci/ipython_1617121109687/work 37 | ipython-genutils @ file:///tmp/build/80754af9/ipython_genutils_1606773439826/work 38 | ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1610481889018/work 39 | jedi==0.17.0 40 | Jinja2 @ file:///tmp/build/80754af9/jinja2_1612213139570/work 41 | joblib==1.0.1 42 | jsonschema @ file:///tmp/build/80754af9/jsonschema_1602607155483/work 43 | jupyter==1.0.0 44 | jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1616770841739/work 45 | jupyter-console @ file:///tmp/build/80754af9/jupyter_console_1616615302928/work 46 | jupyter-core @ file:///C:/ci/jupyter_core_1612213516947/work 47 | jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work 48 | jupyterlab-widgets @ file:///tmp/build/80754af9/jupyterlab_widgets_1609884341231/work 49 | Keras==2.4.3 50 | Keras-Applications==1.0.8 51 | Keras-Preprocessing==1.1.2 52 | kiwisolver==1.3.1 53 | lxml==4.6.3 54 | Markdown==3.3.4 55 | MarkupSafe @ file:///C:/ci/markupsafe_1594405949945/work 56 | matplotlib==3.4.1 57 | mccabe==0.6.1 58 | mistune @ file:///C:/ci/mistune_1594373272338/work 59 | motmetrics==1.2.0 60 | mtcnn==0.1.0 61 | nbclient @ file:///tmp/build/80754af9/nbclient_1614364831625/work 62 | nbconvert @ file:///C:/ci/nbconvert_1601914921407/work 63 | nbformat @ file:///tmp/build/80754af9/nbformat_1617383369282/work 64 | nest-asyncio @ file:///tmp/build/80754af9/nest-asyncio_1613680548246/work 65 | networkx==2.5.1 66 | notebook @ file:///C:/ci/notebook_1616443616158/work 67 | numpy==1.19.5 68 | oauthlib==3.1.0 69 | opencv-python==4.5.1.48 70 | opt-einsum==3.3.0 71 | packaging @ file:///tmp/build/80754af9/packaging_1611952188834/work 72 | pandas==1.2.4 73 | pandocfilters @ file:///C:/ci/pandocfilters_1605102427207/work 74 | parso @ file:///tmp/build/80754af9/parso_1617223946239/work 75 | pickleshare @ file:///tmp/build/80754af9/pickleshare_1606932040724/work 76 | Pillow==8.2.0 77 | pluggy==0.13.1 78 | prometheus-client @ file:///tmp/build/80754af9/prometheus_client_1618088486455/work 79 | prompt-toolkit @ file:///tmp/build/80754af9/prompt-toolkit_1616415428029/work 80 | protobuf==3.15.7 81 | py==1.10.0 82 | py-cpuinfo==8.0.0 83 | pyasn1==0.4.8 84 | pyasn1-modules==0.2.8 85 | pycodestyle==2.7.0 86 | pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work 87 | pyflakes==2.3.1 88 | Pygments @ file:///tmp/build/80754af9/pygments_1615143339740/work 89 | pyparsing @ file:///home/linux1/recipes/ci/pyparsing_1610983426697/work 90 | pyrsistent @ file:///C:/ci/pyrsistent_1600123688363/work 91 | PySocks==1.7.1 92 | pytest==6.2.4 93 | pytest-benchmark==3.4.1 94 | python-dateutil @ file:///home/ktietz/src/ci/python-dateutil_1611928101742/work 95 | pytz==2021.1 96 | PyWavelets==1.1.1 97 | pywin32==227 98 | pywinpty==0.5.7 99 | PyYAML==5.4.1 100 | pyzmq==20.0.0 101 | qtconsole @ file:///tmp/build/80754af9/qtconsole_1616775094278/work 102 | QtPy==1.9.0 103 | requests==2.25.1 104 | requests-oauthlib==1.3.0 105 | retina-face==0.0.4 106 | rsa==4.7.2 107 | scikit-image==0.18.1 108 | scikit-learn==0.22.2 109 | scipy==1.6.2 110 | seaborn==0.10.0 111 | Send2Trash @ file:///tmp/build/80754af9/send2trash_1607525499227/work 112 | six @ file:///C:/ci/six_1605205426665/work 113 | sklearn==0.0 114 | tensorboard==2.4.1 115 | tensorboard-plugin-wit==1.8.0 116 | tensorflow==2.4.1 117 | tensorflow-estimator==2.4.0 118 | termcolor==1.1.0 119 | terminado==0.9.4 120 | testpath @ file:///home/ktietz/src/ci/testpath_1611930608132/work 121 | threadpoolctl==2.1.0 122 | tifffile==2021.4.8 123 | toml==0.10.2 124 | tornado @ file:///C:/ci/tornado_1606935947090/work 125 | tqdm==4.60.0 126 | traitlets @ file:///home/ktietz/src/ci/traitlets_1611929699868/work 127 | typing-extensions @ file:///home/ktietz/src/ci_mi/typing_extensions_1612808209620/work 128 | urllib3==1.26.4 129 | wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work 130 | webencodings==0.5.1 131 | Werkzeug==1.0.1 132 | widgetsnbextension==3.5.1 133 | wincertstore==0.2 134 | wrapt==1.12.1 135 | xmltodict==0.12.0 136 | zipp @ file:///tmp/build/80754af9/zipp_1615904174917/work 137 | -------------------------------------------------------------------------------- /resources/database/1/ironman/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/1.jpg -------------------------------------------------------------------------------- /resources/database/1/ironman/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/2.jpg -------------------------------------------------------------------------------- /resources/database/1/ironman/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/1/ironman/3.jpg -------------------------------------------------------------------------------- /resources/database/2/chimchakman/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/chimchakman/1.jpg -------------------------------------------------------------------------------- /resources/database/2/chimchakman/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/chimchakman/2.jpg -------------------------------------------------------------------------------- /resources/database/2/juhomin/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/juhomin/1.jpg -------------------------------------------------------------------------------- /resources/database/2/juhomin/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/juhomin/2.jpg -------------------------------------------------------------------------------- /resources/database/2/kimpoong/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/kimpoong/1.jpg -------------------------------------------------------------------------------- /resources/database/2/kimpoong/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/database/2/kimpoong/2.jpg -------------------------------------------------------------------------------- /resources/fonts/futur.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/resources/fonts/futur.ttf -------------------------------------------------------------------------------- /resources/gt/README.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------- 2 | Music video dataset 3 | -------------------------------------------------------------------- 4 | 5 | This file describes the music video dataset as introduced in 6 | 7 | [1] Shun Zhang, Yihong Gong, Jia-Bin Huang, Jongwoo Lim, Jinjun Wang, 8 | Narendra Ahuja and Ming-Hsuan Yang. Tracking Persons-of-Interest via 9 | Adaptive Discriminative Features[C]. The 14th European Conference on 10 | Computer Vision (ECCV), 2016. 11 | [2] The project website: http://shunzhang.me.pn/papers/eccv2016/ 12 | 13 | The dataset contains manually annotated face trajectories from 8 music 14 | videos from YouTube: T-ara, Westlife, Pussycat Dolls, Apink, Darling, 15 | Bruno Mars, Hello Bubble and Girls Aloud (as detailed in [1,2]). 16 | 17 | Kindly cite [1] when using the dataset, where appropriate. 18 | 19 | -------------------------------------------------------------------- 20 | Description of the files 21 | -------------------------------------------------------------------- 22 | 23 | The annotations for each video are stored in an XML file. 24 | We give an XML example below and introduce the XML format. 25 | 26 | 27 | Example: 28 | 1. 29 | 2. 35 | 36 | 37 | The 1st line at the top of our example is the XML declaration 38 | that indicates the version of XML. 39 | The 2nd line indicates the video information, including video name, 40 | start frame and end frame. 41 | The 3rd line indicates the trajectory information, including trajectory 42 | identity, start frame and end frame. 43 | The 4th line contains 5 values of per bounding box: 44 | ,,,, 45 | (x-bb_left,y-bb_top) is the left-top point of the bounding box. 46 | is the width and height of the bounding box. 47 | 48 | -- EOF 49 | -------------------------------------------------------------------------------- /test.md: -------------------------------------------------------------------------------- 1 | # 맨처음 2 | 3 | ### T-ara 4 | 5 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn| 6 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 7 | |53.6%|59.0%|49.5%|76.6%|90.4%|1176|3406|3752|517|42.6%|71%|0.241|4710| 8 | 9 | ### GirlsAloud 10 | 11 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn| 12 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 13 | |39.0%|42.6%|36.4%|73.9%|85.3%|2087|4275|4687|1122|32.6%|64.6%|0.314|6630| 14 | 15 | ### Darling 16 | 17 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn| 18 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 19 | |3.2%|44.2%|42.6%|79.7%|82.1%|1654|1935|3048|743|30.4%|65.7%|0.267|6180| 20 | 21 | ### Westlife 22 | 23 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn| 24 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 25 | |64.3%|61.3%|68.4%|87.8%|77.9%|2828|1389|1809|562|47.0%|64.7%|0.411|6870| 26 | 27 | ### BrunoMars 28 | 29 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn| 30 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 31 | |40.5%|40.7%|40.8%|74.1%|73.1%|4560|4330|5128|1010|16.1%|78.9%|0.539|8460| 32 | 33 | ### HelloBubble 34 | 35 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn| 36 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 37 | |41.9%|45.3%|39.1%|73.9%|85.2%| 673|1363|1381|301|34.6%|69.7%|0.256|4920| 38 | 39 | ### Apink 40 | 41 | |IDF1|IDP|IDR|Rcll|Prcn|FP|FN|IDs|FM|MOTA|MOTP|FAR|Fn| 42 | |:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:|:-:| 43 | |56.2% | 58.9% | 53.8% |79.5% |86.8% | 883| 1491| 1234 | 337 |50.4% | 66.8% | 0.15 |4650| 44 | 45 | 46 | # n = 20을 기반으로 얼굴 크로핑 47 | ## T ara 48 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 49 | 0 61.3% 67.4% 56.6% 76.6% 90.4% 6 3 3 0 1176 3405 3131 516 46.9% 0.285 2563 3 3 50 | 1 108.3% 100.0% 100.0% 76.6% 90.4% 6 3 3 0 1176 3405 3131 516 46.9% 0.285 2563 3 3 51 | 52 | ## GirlsAloud 53 | 54 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 55 | 0 49.8% 53.6% 46.9% 75.6% 85.5% 5 1 4 0 2106 3990 4384 1048 36.0% 0.353 3633 2 2 56 | 1 106.1% 100.0% 100.0% 75.6% 85.5% 5 1 4 0 2106 3990 4384 1048 36.0% 0.353 3633 2 2 57 | 58 | ## Darling 59 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 60 | 0 53.6% 54.7% 53.0% 80.1% 82.0% 8 4 4 0 1678 1896 2717 726 34.0% 0.343 2126 6 6 61 | 1 101.2% 100.0% 100.0% 80.1% 82.0% 8 4 4 0 1678 1896 2717 726 34.0% 0.343 2126 6 6 62 | 63 | ## Westlife 64 | 65 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 66 | 0 71.0% 67.2% 75.9% 89.4% 78.4% 4 4 0 0 2808 1208 1482 477 51.7% 0.353 1180 2 2 67 | 1 93.4% 100.0% 100.0% 89.4% 78.4% 4 4 0 0 2808 1208 1482 477 51.7% 0.353 1180 2 2 68 | 69 | ## brunomars 70 | 71 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 72 | 0 52.1% 51.9% 52.8% 75.3% 73.3% 11 5 6 0 4575 4126 4105 978 23.3% 0.319 2815 8 8 73 | 1 98.7% 100.0% 100.0% 75.3% 73.3% 11 5 6 0 4575 4126 4105 978 23.3% 0.319 2815 8 8 74 | 75 | ## Hellobubble 76 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 77 | 0 58.3% 62.2% 55.1% 75.6% 85.1% 4 0 4 0 693 1272 1113 273 41.1% 0.314 810 2 2 78 | 1 105.9% 100.0% 100.0% 75.6% 85.1% 4 0 4 0 693 1272 1113 273 41.1% 0.314 810 2 2 79 | 80 | ## Apink 81 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 82 | 0 69.5% 72.4% 66.9% 80.4% 86.8% 6 3 3 0 889 1427 855 304 56.4% 0.332 483 4 4 83 | 1 103.8% 100.0% 100.0% 80.4% 86.8% 6 3 3 0 889 1427 855 304 56.4% 0.332 483 4 4 84 | 85 | 86 | # n = 10 87 | 88 | ## T ara 89 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 90 | 0 67.4% 74.1% 62.2% 76.6% 90.4% 6 3 3 0 1176 3405 2479 516 51.4% 0.285 2033 3 3 91 | 1 108.3% 100.0% 100.0% 76.6% 90.4% 6 3 3 0 1176 3405 2479 516 51.4% 0.285 2033 3 3 92 | 93 | ## GirlsAloud 94 | 95 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 96 | 0 55.0% 59.1% 51.8% 75.7% 85.5% 5 1 4 0 2108 3975 3906 1037 39.0% 0.353 3263 3 3 97 | 1 106.0% 100.0% 100.0% 75.7% 85.5% 5 1 4 0 2108 3975 3906 1037 39.0% 0.353 3263 3 3 98 | 99 | ## Darling 100 | 101 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 102 | 0 59.2% 60.2% 58.6% 80.3% 81.9% 8 4 4 0 1692 1876 2367 716 37.7% 0.343 1894 6 6 103 | 1 101.0% 100.0% 100.0% 80.3% 81.9% 8 4 4 0 1692 1876 2367 716 37.7% 0.343 1894 6 6 104 | ## Westlife 105 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 106 | 0 73.6% 69.7% 78.8% 89.5% 78.4% 4 4 0 0 2810 1195 1347 466 53.0% 0.353 1079 1 1 107 | 1 93.4% 100.0% 100.0% 89.5% 78.4% 4 4 0 0 2810 1195 1347 466 53.0% 0.353 1079 1 1 108 | 109 | ## brunomars 110 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 111 | 0 57.8% 57.5% 58.6% 75.4% 73.3% 11 5 6 0 4579 4115 3311 973 28.1% 0.319 2215 8 8 112 | 1 98.6% 100.0% 100.0% 75.4% 73.3% 11 5 6 0 4579 4115 3311 973 28.1% 0.319 2215 8 8 113 | 114 | ## Hellobubble 115 | 116 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 117 | 0 64.0% 68.3% 60.5% 75.6% 85.1% 4 0 4 0 693 1272 902 273 45.1% 0.314 676 2 2 118 | 1 105.9% 100.0% 100.0% 75.6% 85.1% 4 0 4 0 693 1272 902 273 45.1% 0.314 676 2 2 119 | ## Apink 120 | 121 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 122 | 0 71.9% 74.9% 69.3% 80.5% 86.8% 6 3 3 0 889 1420 691 299 58.8% 0.333 377 4 4 123 | 1 103.8% 100.0% 100.0% 80.5% 86.8% 6 3 3 0 889 1420 691 299 58.8% 0.333 377 4 4 124 | 125 | # only using id, 그냥 deep sort처럼 id로 뺀 경우 126 | 127 | ## T ara 128 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 129 | 0 5.7% 6.2% 5.3% 77.3% 89.7% 6 3 3 0 1283 3296 317 531 66.3% 0.286 27 278 0 130 | 1 107.4% 100.0% 100.0% 77.3% 89.7% 6 3 3 0 1283 3296 317 531 66.3% 0.286 27 278 0 131 | 132 | ## GirlsAloud 133 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 134 | 0 7.3% 7.5% 7.0% 78.3% 84.1% 5 1 4 0 2427 3548 543 989 60.2% 0.353 85 383 0 135 | 1 103.5% 100.0% 100.0% 78.3% 84.1% 5 1 4 0 2427 3548 543 989 60.2% 0.353 85 383 0 136 | 137 | 138 | ## Darling 139 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 140 | 0 5.4% 5.3% 5.5% 82.2% 80.3% 8 4 4 0 1916 1698 527 668 56.6% 0.342 59 407 0 141 | 1 98.9% 100.0% 100.0% 82.2% 80.3% 8 4 4 0 1916 1698 527 668 56.6% 0.342 59 407 0 142 | 143 | ## Westlife 144 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 145 | 0 7.2% 6.4% 8.4% 90.8% 69.0% 4 4 0 0 4637 1044 117 445 49.1% 0.353 6 108 0 146 | 1 86.4% 100.0% 100.0% 90.8% 69.0% 4 4 0 0 4637 1044 117 445 49.1% 0.353 6 108 0 147 | 148 | ## brunomars 149 | 150 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 151 | 0 6.8% 6.6% 7.1% 77.8% 72.1% 11 5 6 0 5018 3710 535 935 44.6% 0.319 59 384 1 152 | 1 96.2% 100.0% 100.0% 77.8% 72.1% 11 5 6 0 5018 3710 535 935 44.6% 0.319 59 384 1 153 | (face_tracker) 154 | ## Hellobubble 155 | 156 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 157 | 0 9.8% 10.3% 9.3% 76.6% 84.4% 4 0 4 0 738 1223 175 277 59.1% 0.313 21 140 2 158 | 1 104.9% 100.0% 100.0% 76.6% 84.4% 4 0 4 0 738 1223 175 277 59.1% 0.313 21 140 2 159 | ## Apink 160 | 161 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 162 | 0 6.4% 6.6% 6.2% 81.9% 86.5% 6 5 1 0 934 1314 223 278 66.0% 0.332 20 185 0 163 | 1 102.7% 100.0% 100.0% 81.9% 86.5% 6 5 1 0 934 1314 223 278 66.0% 0.332 20 185 0 164 | 165 | 166 | 167 | # face id, track id말고 face id로 처음에 매칭될 때만 배정하여 추출한 결과 168 | 169 | 170 | ## T ara 171 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 172 | 0 53.2% 66.5% 49.1% 76.6% 90.4% 6 3 3 0 1178 3402 209 515 67.0% 0.285 241 1 1 173 | 1 108.3% 100.0% 100.0% 76.6% 90.4% 6 3 3 0 1178 3402 209 515 67.0% 0.285 241 1 1 174 | 175 | ## GirlsAloud 176 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 177 | 0 51.4% 60.9% 48.5% 75.9% 85.5% 5 1 4 0 2116 3951 341 1021 60.9% 0.353 466 1 1 178 | 1 105.9% 100.0% 100.0% 75.9% 85.5% 5 1 4 0 2116 3951 341 1021 60.9% 0.353 466 1 1 179 | 180 | ## Darling 181 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 182 | 0 53.2% 57.4% 53.0% 80.7% 81.3% 8 4 4 0 1772 1835 292 704 59.1% 0.343 278 2 2 183 | 1 100.3% 100.0% 100.0% 80.7% 81.3% 8 4 4 0 1772 1835 292 704 59.1% 0.343 278 2 2 184 | 185 | ## Westlife 186 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 187 | 0 49.7% 59.5% 56.8% 90.6% 70.5% 4 4 0 0 4309 1065 70 446 52.2% 0.353 118 0 0 188 | 1 87.5% 100.0% 100.0% 90.6% 70.5% 4 4 0 0 4309 1065 70 446 52.2% 0.353 118 0 0 189 | 190 | ## brunomars 191 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 192 | 0 47.1% 51.8% 47.8% 75.4% 73.3% 11 5 6 0 4584 4110 320 970 46.0% 0.319 254 7 7 193 | 1 98.6% 100.0% 100.0% 75.4% 73.3% 11 5 6 0 4584 4110 320 970 46.0% 0.319 254 7 7 194 | 195 | ## Hellobubble 196 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 197 | 0 51.8% 58.7% 48.9% 75.7% 85.1% 4 0 4 0 694 1270 91 273 60.7% 0.314 77 3 3 198 | 1 105.8% 100.0% 100.0% 75.7% 85.1% 4 0 4 0 694 1270 91 273 60.7% 0.314 77 3 3 199 | 200 | ## Apink 201 | IDF1 IDP IDR Rcll Prcn GT MT PT ML FP FN IDs FM MOTA MOTP IDt IDa IDm 202 | 0 61.8% 65.9% 59.6% 80.6% 86.8% 6 3 3 0 889 1409 87 292 67.2% 0.333 69 1 1 203 | 1 103.7% 100.0% 100.0% 80.6% 86.8% 6 3 3 0 889 1409 87 292 67.2% 0.333 69 1 1 204 | -------------------------------------------------------------------------------- /tools/freeze_model.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import argparse 3 | import tensorflow as tf 4 | import tensorflow.contrib.slim as slim 5 | 6 | 7 | def _batch_norm_fn(x, scope=None): 8 | if scope is None: 9 | scope = tf.get_variable_scope().name + "/bn" 10 | return slim.batch_norm(x, scope=scope) 11 | 12 | 13 | def create_link( 14 | incoming, network_builder, scope, nonlinearity=tf.nn.elu, 15 | weights_initializer=tf.truncated_normal_initializer(stddev=1e-3), 16 | regularizer=None, is_first=False, summarize_activations=True): 17 | if is_first: 18 | network = incoming 19 | else: 20 | network = _batch_norm_fn(incoming, scope=scope + "/bn") 21 | network = nonlinearity(network) 22 | if summarize_activations: 23 | tf.summary.histogram(scope+"/activations", network) 24 | 25 | pre_block_network = network 26 | post_block_network = network_builder(pre_block_network, scope) 27 | 28 | incoming_dim = pre_block_network.get_shape().as_list()[-1] 29 | outgoing_dim = post_block_network.get_shape().as_list()[-1] 30 | if incoming_dim != outgoing_dim: 31 | assert outgoing_dim == 2 * incoming_dim, \ 32 | "%d != %d" % (outgoing_dim, 2 * incoming) 33 | projection = slim.conv2d( 34 | incoming, outgoing_dim, 1, 2, padding="SAME", activation_fn=None, 35 | scope=scope+"/projection", weights_initializer=weights_initializer, 36 | biases_initializer=None, weights_regularizer=regularizer) 37 | network = projection + post_block_network 38 | else: 39 | network = incoming + post_block_network 40 | return network 41 | 42 | 43 | def create_inner_block( 44 | incoming, scope, nonlinearity=tf.nn.elu, 45 | weights_initializer=tf.truncated_normal_initializer(1e-3), 46 | bias_initializer=tf.zeros_initializer(), regularizer=None, 47 | increase_dim=False, summarize_activations=True): 48 | n = incoming.get_shape().as_list()[-1] 49 | stride = 1 50 | if increase_dim: 51 | n *= 2 52 | stride = 2 53 | 54 | incoming = slim.conv2d( 55 | incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME", 56 | normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer, 57 | biases_initializer=bias_initializer, weights_regularizer=regularizer, 58 | scope=scope + "/1") 59 | if summarize_activations: 60 | tf.summary.histogram(incoming.name + "/activations", incoming) 61 | 62 | incoming = slim.dropout(incoming, keep_prob=0.6) 63 | 64 | incoming = slim.conv2d( 65 | incoming, n, [3, 3], 1, activation_fn=None, padding="SAME", 66 | normalizer_fn=None, weights_initializer=weights_initializer, 67 | biases_initializer=bias_initializer, weights_regularizer=regularizer, 68 | scope=scope + "/2") 69 | return incoming 70 | 71 | 72 | def residual_block(incoming, scope, nonlinearity=tf.nn.elu, 73 | weights_initializer=tf.truncated_normal_initializer(1e3), 74 | bias_initializer=tf.zeros_initializer(), regularizer=None, 75 | increase_dim=False, is_first=False, 76 | summarize_activations=True): 77 | 78 | def network_builder(x, s): 79 | return create_inner_block( 80 | x, s, nonlinearity, weights_initializer, bias_initializer, 81 | regularizer, increase_dim, summarize_activations) 82 | 83 | return create_link( 84 | incoming, network_builder, scope, nonlinearity, weights_initializer, 85 | regularizer, is_first, summarize_activations) 86 | 87 | 88 | def _create_network(incoming, reuse=None, weight_decay=1e-8): 89 | nonlinearity = tf.nn.elu 90 | conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) 91 | conv_bias_init = tf.zeros_initializer() 92 | conv_regularizer = slim.l2_regularizer(weight_decay) 93 | fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) 94 | fc_bias_init = tf.zeros_initializer() 95 | fc_regularizer = slim.l2_regularizer(weight_decay) 96 | 97 | def batch_norm_fn(x): 98 | return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") 99 | 100 | network = incoming 101 | network = slim.conv2d( 102 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity, 103 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", 104 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, 105 | weights_regularizer=conv_regularizer) 106 | network = slim.conv2d( 107 | network, 32, [3, 3], stride=1, activation_fn=nonlinearity, 108 | padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", 109 | weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, 110 | weights_regularizer=conv_regularizer) 111 | 112 | # NOTE(nwojke): This is missing a padding="SAME" to match the CNN 113 | # architecture in Table 1 of the paper. Information on how this affects 114 | # performance on MOT 16 training sequences can be found in 115 | # issue 10 https://github.com/nwojke/deep_sort/issues/10 116 | network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1") 117 | 118 | network = residual_block( 119 | network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, 120 | conv_regularizer, increase_dim=False, is_first=True) 121 | network = residual_block( 122 | network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, 123 | conv_regularizer, increase_dim=False) 124 | 125 | network = residual_block( 126 | network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, 127 | conv_regularizer, increase_dim=True) 128 | network = residual_block( 129 | network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, 130 | conv_regularizer, increase_dim=False) 131 | 132 | network = residual_block( 133 | network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, 134 | conv_regularizer, increase_dim=True) 135 | network = residual_block( 136 | network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, 137 | conv_regularizer, increase_dim=False) 138 | 139 | feature_dim = network.get_shape().as_list()[-1] 140 | network = slim.flatten(network) 141 | 142 | network = slim.dropout(network, keep_prob=0.6) 143 | network = slim.fully_connected( 144 | network, feature_dim, activation_fn=nonlinearity, 145 | normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, 146 | scope="fc1", weights_initializer=fc_weight_init, 147 | biases_initializer=fc_bias_init) 148 | 149 | features = network 150 | 151 | # Features in rows, normalize axis 1. 152 | features = slim.batch_norm(features, scope="ball", reuse=reuse) 153 | feature_norm = tf.sqrt( 154 | tf.constant(1e-8, tf.float32) + 155 | tf.reduce_sum(tf.square(features), [1], keepdims=True)) 156 | features = features / feature_norm 157 | return features, None 158 | 159 | 160 | def _network_factory(weight_decay=1e-8): 161 | 162 | def factory_fn(image, reuse): 163 | with slim.arg_scope([slim.batch_norm, slim.dropout], 164 | is_training=False): 165 | with slim.arg_scope([slim.conv2d, slim.fully_connected, 166 | slim.batch_norm, slim.layer_norm], 167 | reuse=reuse): 168 | features, logits = _create_network( 169 | image, reuse=reuse, weight_decay=weight_decay) 170 | return features, logits 171 | 172 | return factory_fn 173 | 174 | 175 | def _preprocess(image): 176 | image = image[:, :, ::-1] # BGR to RGB 177 | return image 178 | 179 | 180 | def parse_args(): 181 | """Parse command line arguments. 182 | """ 183 | parser = argparse.ArgumentParser(description="Freeze old model") 184 | parser.add_argument( 185 | "--checkpoint_in", 186 | default="resources/networks/mars-small128.ckpt-68577", 187 | help="Path to checkpoint file") 188 | parser.add_argument( 189 | "--graphdef_out", 190 | default="resources/networks/mars-small128.pb") 191 | return parser.parse_args() 192 | 193 | 194 | def main(): 195 | args = parse_args() 196 | 197 | with tf.Session(graph=tf.Graph()) as session: 198 | input_var = tf.placeholder( 199 | tf.uint8, (None, 128, 64, 3), name="images") 200 | image_var = tf.map_fn( 201 | lambda x: _preprocess(x), tf.cast(input_var, tf.float32), 202 | back_prop=False) 203 | 204 | factory_fn = _network_factory() 205 | features, _ = factory_fn(image_var, reuse=None) 206 | features = tf.identity(features, name="features") 207 | 208 | saver = tf.train.Saver(slim.get_variables_to_restore()) 209 | saver.restore(session, args.checkpoint_in) 210 | 211 | output_graph_def = tf.graph_util.convert_variables_to_constants( 212 | session, tf.get_default_graph().as_graph_def(), 213 | [features.name.split(":")[0]]) 214 | with tf.gfile.GFile(args.graphdef_out, "wb") as file_handle: 215 | file_handle.write(output_graph_def.SerializeToString()) 216 | 217 | 218 | if __name__ == "__main__": 219 | main() 220 | -------------------------------------------------------------------------------- /tools/generate_detections.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import os 3 | import errno 4 | import argparse 5 | import numpy as np 6 | import cv2 7 | import tensorflow.compat.v1 as tf 8 | from tensorflow.keras.preprocessing import image as keras_image 9 | 10 | # 추가 11 | import sys 12 | sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname(__file__)))) 13 | from deepface.basemodels import ArcFace 14 | import tensorflow as tf2 15 | 16 | gpus = tf.config.experimental.list_physical_devices('GPU') 17 | if gpus: 18 | try: 19 | # Currently, memory growth needs to be the same across GPUs 20 | for gpu in gpus: 21 | tf.config.experimental.set_memory_growth(gpu, True) 22 | logical_gpus = tf.config.experimental.list_logical_devices('GPU') 23 | print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") 24 | except RuntimeError as e: 25 | # Memory growth must be set before GPUs have been initialized 26 | print(e) 27 | 28 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 29 | if len(physical_devices) > 0: 30 | tf.config.experimental.set_memory_growth(physical_devices[0], True) 31 | 32 | def _run_in_batches(f, data_dict, out, batch_size): 33 | data_len = len(out) 34 | num_batches = int(data_len / batch_size) 35 | 36 | s, e = 0, 0 37 | for i in range(num_batches): 38 | s, e = i * batch_size, (i + 1) * batch_size 39 | batch_data_dict = {k: v[s:e] for k, v in data_dict.items()} 40 | out[s:e] = f(batch_data_dict) 41 | if e < len(out): 42 | batch_data_dict = {k: v[e:] for k, v in data_dict.items()} 43 | out[e:] = f(batch_data_dict) 44 | 45 | 46 | def extract_image_patch(image, bbox, patch_shape): 47 | """Extract image patch from bounding box. 48 | 49 | Parameters 50 | ---------- 51 | image : ndarray 52 | The full image. 53 | bbox : array_like 54 | The bounding box in format (x, y, width, height). 55 | patch_shape : Optional[array_like] 56 | This parameter can be used to enforce a desired patch shape 57 | (height, width). First, the `bbox` is adapted to the aspect ratio 58 | of the patch shape, then it is clipped at the image boundaries. 59 | If None, the shape is computed from :arg:`bbox`. 60 | 61 | Returns 62 | ------- 63 | ndarray | NoneType 64 | An image patch showing the :arg:`bbox`, optionally reshaped to 65 | :arg:`patch_shape`. 66 | Returns None if the bounding box is empty or fully outside of the image 67 | boundaries. 68 | 69 | """ 70 | bbox = np.array(bbox) 71 | if patch_shape is not None: 72 | # correct aspect ratio to patch shape 73 | target_aspect = float(patch_shape[1]) / patch_shape[0] 74 | new_width = target_aspect * bbox[3] 75 | bbox[0] -= (new_width - bbox[2]) / 2 76 | bbox[2] = new_width 77 | 78 | # convert to top left, bottom right 79 | bbox[2:] += bbox[:2] 80 | bbox = bbox.astype(np.int) 81 | 82 | # clip at image boundaries 83 | bbox[:2] = np.maximum(0, bbox[:2]) 84 | bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:]) 85 | if np.any(bbox[:2] >= bbox[2:]): 86 | return None 87 | sx, sy, ex, ey = bbox 88 | image = image[sy:ey, sx:ex] 89 | image = cv2.resize(image, tuple(patch_shape[::-1])) 90 | 91 | # 추가 92 | image = keras_image.img_to_array(image) 93 | # image = np.expand_dims(image, axis = 0) 94 | image /= 255 #normalize input in [0, 1] 95 | 96 | return image 97 | 98 | 99 | class ImageEncoder(object): 100 | 101 | def __init__(self, checkpoint_filename, input_name="images", 102 | output_name="features"): 103 | self.session = ArcFace.loadModel(checkpoint_filename) # 성공 104 | self.feature_dim = self.session.layers[-1].output_shape[1] 105 | self.image_shape = list(self.session.input_shape[1:]) 106 | 107 | # self.session = tf.Session() 108 | # with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle: 109 | # graph_def = tf.GraphDef() 110 | # graph_def.ParseFromString(file_handle.read()) 111 | # tf.import_graph_def(graph_def, name="net") 112 | # self.input_var = tf.get_default_graph().get_tensor_by_name( 113 | # "%s:0" % input_name) 114 | # self.output_var = tf.get_default_graph().get_tensor_by_name( 115 | # "%s:0" % output_name) 116 | 117 | # assert len(self.output_var.get_shape()) == 2 118 | # assert len(self.input_var.get_shape()) == 4 119 | # self.feature_dim = self. output_var.get_shape().as_list()[-1] 120 | # self.image_shape = self.input_var.get_shape().as_list()[1:] 121 | 122 | def __call__(self, data_x, batch_size=32): 123 | out = np.zeros((len(data_x), self.feature_dim), np.float32) 124 | 125 | if data_x.shape[0] == 0: 126 | return out 127 | 128 | import time 129 | t1 = time.time() 130 | 131 | # 추가 132 | out = self.session.predict(data_x) 133 | 134 | # _run_in_batches( 135 | # lambda x: self.session.run(self.output_var, feed_dict=x), 136 | # {self.input_var: data_x}, out, batch_size) 137 | # t2 = time.time() 138 | # print("patch inference time : ", t2-t1) 139 | return out 140 | 141 | 142 | def create_box_encoder(model_filename, input_name="images", 143 | output_name="features", batch_size=32): 144 | image_encoder = ImageEncoder(model_filename, input_name, output_name) 145 | image_shape = image_encoder.image_shape 146 | 147 | def encoder(image, boxes): 148 | image_patches = [] 149 | for box in boxes: 150 | patch = extract_image_patch(image, box, image_shape[:2]) # 자동으로 모양 리사이즈 됨 151 | 152 | if patch is None: 153 | print("WARNING: Failed to extract image patch: %s." % str(box)) 154 | # patch = np.random.uniform( 155 | # 0., 255., image_shape).astype(np.uint8) 156 | patch = np.random.uniform( 157 | 0., 1., image_shape).astype(np.float32) 158 | image_patches.append(patch) 159 | image_patches = np.asarray(image_patches) 160 | return image_encoder(image_patches, batch_size) 161 | 162 | return encoder 163 | 164 | 165 | def generate_detections(encoder, mot_dir, output_dir, detection_dir=None): 166 | """Generate detections with features. 167 | 168 | Parameters 169 | ---------- 170 | encoder : Callable[image, ndarray] -> ndarray 171 | The encoder function takes as input a BGR color image and a matrix of 172 | bounding boxes in format `(x, y, w, h)` and returns a matrix of 173 | corresponding feature vectors. 174 | mot_dir : str 175 | Path to the MOTChallenge directory (can be either train or test). 176 | output_dir 177 | Path to the output directory. Will be created if it does not exist. 178 | detection_dir 179 | Path to custom detections. The directory structure should be the default 180 | MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the 181 | standard MOTChallenge detections. 182 | 183 | """ 184 | if detection_dir is None: 185 | detection_dir = mot_dir 186 | try: 187 | os.makedirs(output_dir) 188 | except OSError as exception: 189 | if exception.errno == errno.EEXIST and os.path.isdir(output_dir): 190 | pass 191 | else: 192 | raise ValueError( 193 | "Failed to created output directory '%s'" % output_dir) 194 | 195 | for sequence in os.listdir(mot_dir): 196 | print("Processing %s" % sequence) 197 | sequence_dir = os.path.join(mot_dir, sequence) 198 | 199 | image_dir = os.path.join(sequence_dir, "img1") 200 | image_filenames = { 201 | int(os.path.splitext(f)[0]): os.path.join(image_dir, f) 202 | for f in os.listdir(image_dir)} 203 | 204 | detection_file = os.path.join( 205 | detection_dir, sequence, "det/det.txt") 206 | detections_in = np.loadtxt(detection_file, delimiter=',') 207 | detections_out = [] 208 | 209 | frame_indices = detections_in[:, 0].astype(np.int) 210 | min_frame_idx = frame_indices.astype(np.int).min() 211 | max_frame_idx = frame_indices.astype(np.int).max() 212 | for frame_idx in range(min_frame_idx, max_frame_idx + 1): 213 | print("Frame %05d/%05d" % (frame_idx, max_frame_idx)) 214 | mask = frame_indices == frame_idx 215 | rows = detections_in[mask] 216 | 217 | if frame_idx not in image_filenames: 218 | print("WARNING could not find image for frame %d" % frame_idx) 219 | continue 220 | bgr_image = cv2.imread( 221 | image_filenames[frame_idx], cv2.IMREAD_COLOR) 222 | features = encoder(bgr_image, rows[:, 2:6].copy()) 223 | detections_out += [np.r_[(row, feature)] for row, feature 224 | in zip(rows, features)] 225 | 226 | output_filename = os.path.join(output_dir, "%s.npy" % sequence) 227 | np.save( 228 | output_filename, np.asarray(detections_out), allow_pickle=False) 229 | 230 | 231 | def parse_args(): 232 | """Parse command line arguments. 233 | """ 234 | parser = argparse.ArgumentParser(description="Re-ID feature extractor") 235 | parser.add_argument( 236 | "--model", 237 | default="resources/networks/mars-small128.pb", 238 | help="Path to freezed inference graph protobuf.") 239 | parser.add_argument( 240 | "--mot_dir", help="Path to MOTChallenge directory (train or test)", 241 | required=True) 242 | parser.add_argument( 243 | "--detection_dir", help="Path to custom detections. Defaults to " 244 | "standard MOT detections Directory structure should be the default " 245 | "MOTChallenge structure: [sequence]/det/det.txt", default=None) 246 | parser.add_argument( 247 | "--output_dir", help="Output directory. Will be created if it does not" 248 | " exist.", default="detections") 249 | return parser.parse_args() 250 | 251 | 252 | def main(): 253 | args = parse_args() 254 | encoder = create_box_encoder(args.model, batch_size=32) 255 | generate_detections(encoder, args.mot_dir, args.output_dir, 256 | args.detection_dir) 257 | 258 | 259 | if __name__ == "__main__": 260 | main() 261 | -------------------------------------------------------------------------------- /xml2txt.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as elemTree 2 | import os 3 | import pprint 4 | from absl import app, flags, logging 5 | from absl.flags import FLAGS 6 | 7 | """ 8 | python xml2txt.py \ 9 | --gt_path ./resources/gt/T-ara_gt.xml \ 10 | --gt_file_path ./resources/gt/T-ara_gt.txt 11 | 12 | python xml2txt.py \ 13 | --gt_path ./resources/gt/GirlsAloud_gt.xml \ 14 | --gt_file_path ./resources/gt/GirlsAloud_gt.txt 15 | 16 | python xml2txt.py \ 17 | --gt_path ./resources/gt/Darling_gt.xml \ 18 | --gt_file_path ./resources/gt/Darling_gt.txt 19 | 20 | python xml2txt.py \ 21 | --gt_path ./resources/gt/Westlife_gt.xml \ 22 | --gt_file_path ./resources/gt/Westlife_gt.txt 23 | 24 | python xml2txt.py \ 25 | --gt_path ./resources/gt/BrunoMars_gt.xml \ 26 | --gt_file_path ./resources/gt/BrunoMars_gt.txt 27 | 28 | python xml2txt.py \ 29 | --gt_path ./resources/gt/HelloBubble_gt.xml \ 30 | --gt_file_path ./resources/gt/HelloBubble_gt.txt 31 | 32 | python xml2txt.py \ 33 | --gt_path ./resources/gt/Apink_gt.xml \ 34 | --gt_file_path ./resources/gt/Apink_gt.txt 35 | """ 36 | 37 | flags.DEFINE_string('gt_path', './resources/gt/T-ara_gt.xml', 'path to gt') 38 | flags.DEFINE_string('gt_file_path', './resources/gt/T-ara_gt.txt', 'path to save converted file') 39 | 40 | 41 | def main(args): 42 | tree = elemTree.parse(FLAGS.gt_path) 43 | 44 | root=tree.getroot() 45 | 46 | print(root.tag, root.attrib) 47 | print(root.find("Trajectory")) 48 | 49 | frame_list = [] 50 | 51 | for traj in root: 52 | for f in traj: 53 | a = f.attrib 54 | a["frame_no"] = str(int(a["frame_no"])-1) 55 | a["id"] = traj.attrib["obj_id"] 56 | frame_list.append(a) 57 | 58 | 59 | frame_list = sorted(frame_list, key= lambda x: (int(x["frame_no"]), int(x["id"]))) 60 | # pprint.pprint(frame_list) 61 | 62 | 63 | f = open(FLAGS.gt_file_path, 'w') 64 | 65 | for a in frame_list: 66 | f.write(a["frame_no"] + " " + a["id"] + " " + a["x"] + " " + a["y"] + " " + a["width"] + " " + a["height"] + "\n") 67 | # 파일 닫기 68 | f.close() 69 | 70 | 71 | 72 | if __name__ == '__main__': 73 | try: 74 | app.run(main) 75 | except SystemExit: 76 | pass 77 | -------------------------------------------------------------------------------- /yolov3_tf2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wansook0316/MultiFaceTrackerUsingDeepsort/8abb3b94be7858233e13ed42e65b16ec65554dcb/yolov3_tf2/__init__.py -------------------------------------------------------------------------------- /yolov3_tf2/dataset.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from absl.flags import FLAGS 3 | 4 | @tf.function 5 | def transform_targets_for_output(y_true, grid_size, anchor_idxs): 6 | # y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor)) 7 | N = tf.shape(y_true)[0] 8 | 9 | # y_true_out: (N, grid, grid, anchors, [x1, y1, x2, y2, obj, class]) 10 | y_true_out = tf.zeros( 11 | (N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6)) 12 | 13 | anchor_idxs = tf.cast(anchor_idxs, tf.int32) 14 | 15 | indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True) 16 | updates = tf.TensorArray(tf.float32, 1, dynamic_size=True) 17 | idx = 0 18 | for i in tf.range(N): 19 | for j in tf.range(tf.shape(y_true)[1]): 20 | if tf.equal(y_true[i][j][2], 0): 21 | continue 22 | anchor_eq = tf.equal( 23 | anchor_idxs, tf.cast(y_true[i][j][5], tf.int32)) 24 | 25 | if tf.reduce_any(anchor_eq): 26 | box = y_true[i][j][0:4] 27 | box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2 28 | 29 | anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32) 30 | grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32) 31 | 32 | # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class) 33 | indexes = indexes.write( 34 | idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]]) 35 | updates = updates.write( 36 | idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]]) 37 | idx += 1 38 | 39 | # tf.print(indexes.stack()) 40 | # tf.print(updates.stack()) 41 | 42 | return tf.tensor_scatter_nd_update( 43 | y_true_out, indexes.stack(), updates.stack()) 44 | 45 | 46 | def transform_targets(y_train, anchors, anchor_masks, size): 47 | y_outs = [] 48 | grid_size = size // 32 49 | 50 | # calculate anchor index for true boxes 51 | anchors = tf.cast(anchors, tf.float32) 52 | anchor_area = anchors[..., 0] * anchors[..., 1] 53 | box_wh = y_train[..., 2:4] - y_train[..., 0:2] 54 | box_wh = tf.tile(tf.expand_dims(box_wh, -2), 55 | (1, 1, tf.shape(anchors)[0], 1)) 56 | box_area = box_wh[..., 0] * box_wh[..., 1] 57 | intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * \ 58 | tf.minimum(box_wh[..., 1], anchors[..., 1]) 59 | iou = intersection / (box_area + anchor_area - intersection) 60 | anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32) 61 | anchor_idx = tf.expand_dims(anchor_idx, axis=-1) 62 | 63 | y_train = tf.concat([y_train, anchor_idx], axis=-1) 64 | 65 | for anchor_idxs in anchor_masks: 66 | y_outs.append(transform_targets_for_output( 67 | y_train, grid_size, anchor_idxs)) 68 | grid_size *= 2 69 | 70 | return tuple(y_outs) 71 | 72 | 73 | def transform_images(x_train, size): 74 | x_train = tf.image.resize(x_train, (size, size)) 75 | x_train = x_train / 255 76 | return x_train 77 | 78 | 79 | # https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/using_your_own_dataset.md#conversion-script-outline-conversion-script-outline 80 | # Commented out fields are not required in our project 81 | IMAGE_FEATURE_MAP = { 82 | # 'image/width': tf.io.FixedLenFeature([], tf.int64), 83 | # 'image/height': tf.io.FixedLenFeature([], tf.int64), 84 | # 'image/filename': tf.io.FixedLenFeature([], tf.string), 85 | # 'image/source_id': tf.io.FixedLenFeature([], tf.string), 86 | # 'image/key/sha256': tf.io.FixedLenFeature([], tf.string), 87 | 'image/encoded': tf.io.FixedLenFeature([], tf.string), 88 | # 'image/format': tf.io.FixedLenFeature([], tf.string), 89 | 'image/object/bbox/xmin': tf.io.VarLenFeature(tf.float32), 90 | 'image/object/bbox/ymin': tf.io.VarLenFeature(tf.float32), 91 | 'image/object/bbox/xmax': tf.io.VarLenFeature(tf.float32), 92 | 'image/object/bbox/ymax': tf.io.VarLenFeature(tf.float32), 93 | 'image/object/class/text': tf.io.VarLenFeature(tf.string), 94 | # 'image/object/class/label': tf.io.VarLenFeature(tf.int64), 95 | # 'image/object/difficult': tf.io.VarLenFeature(tf.int64), 96 | # 'image/object/truncated': tf.io.VarLenFeature(tf.int64), 97 | # 'image/object/view': tf.io.VarLenFeature(tf.string), 98 | } 99 | 100 | 101 | def parse_tfrecord(tfrecord, class_table, size): 102 | x = tf.io.parse_single_example(tfrecord, IMAGE_FEATURE_MAP) 103 | x_train = tf.image.decode_jpeg(x['image/encoded'], channels=3) 104 | x_train = tf.image.resize(x_train, (size, size)) 105 | 106 | class_text = tf.sparse.to_dense( 107 | x['image/object/class/text'], default_value='') 108 | labels = tf.cast(class_table.lookup(class_text), tf.float32) 109 | y_train = tf.stack([tf.sparse.to_dense(x['image/object/bbox/xmin']), 110 | tf.sparse.to_dense(x['image/object/bbox/ymin']), 111 | tf.sparse.to_dense(x['image/object/bbox/xmax']), 112 | tf.sparse.to_dense(x['image/object/bbox/ymax']), 113 | labels], axis=1) 114 | 115 | paddings = [[0, FLAGS.yolo_max_boxes - tf.shape(y_train)[0]], [0, 0]] 116 | y_train = tf.pad(y_train, paddings) 117 | 118 | return x_train, y_train 119 | 120 | 121 | def load_tfrecord_dataset(file_pattern, class_file, size=416): 122 | LINE_NUMBER = -1 # TODO: use tf.lookup.TextFileIndex.LINE_NUMBER 123 | class_table = tf.lookup.StaticHashTable(tf.lookup.TextFileInitializer( 124 | class_file, tf.string, 0, tf.int64, LINE_NUMBER, delimiter="\n"), -1) 125 | 126 | files = tf.data.Dataset.list_files(file_pattern) 127 | dataset = files.flat_map(tf.data.TFRecordDataset) 128 | return dataset.map(lambda x: parse_tfrecord(x, class_table, size)) 129 | 130 | 131 | def load_fake_dataset(): 132 | x_train = tf.image.decode_jpeg( 133 | open('./data/girl.png', 'rb').read(), channels=3) 134 | x_train = tf.expand_dims(x_train, axis=0) 135 | 136 | labels = [ 137 | [0.18494931, 0.03049111, 0.9435849, 0.96302897, 0], 138 | [0.01586703, 0.35938117, 0.17582396, 0.6069674, 56], 139 | [0.09158827, 0.48252046, 0.26967454, 0.6403017, 67] 140 | ] + [[0, 0, 0, 0, 0]] * 5 141 | y_train = tf.convert_to_tensor(labels, tf.float32) 142 | y_train = tf.expand_dims(y_train, axis=0) 143 | 144 | return tf.data.Dataset.from_tensor_slices((x_train, y_train)) 145 | -------------------------------------------------------------------------------- /yolov3_tf2/models.py: -------------------------------------------------------------------------------- 1 | from absl import flags 2 | from absl.flags import FLAGS 3 | import numpy as np 4 | import tensorflow as tf 5 | from tensorflow.keras import Model 6 | from tensorflow.keras.layers import ( 7 | Add, 8 | Concatenate, 9 | Conv2D, 10 | Input, 11 | Lambda, 12 | LeakyReLU, 13 | MaxPool2D, 14 | UpSampling2D, 15 | ZeroPadding2D, 16 | BatchNormalization, 17 | ) 18 | from tensorflow.keras.regularizers import l2 19 | from tensorflow.keras.losses import ( 20 | binary_crossentropy, 21 | sparse_categorical_crossentropy 22 | ) 23 | from .utils import broadcast_iou 24 | 25 | flags.DEFINE_integer('yolo_max_boxes', 100, 26 | 'maximum number of boxes per image') 27 | flags.DEFINE_float('yolo_iou_threshold', 0.5, 'iou threshold') 28 | flags.DEFINE_float('yolo_score_threshold', 0.5, 'score threshold') 29 | 30 | yolo_anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), 31 | (59, 119), (116, 90), (156, 198), (373, 326)], 32 | np.float32) / 416 33 | yolo_anchor_masks = np.array([[6, 7, 8], [3, 4, 5], [0, 1, 2]]) 34 | 35 | yolo_tiny_anchors = np.array([(10, 14), (23, 27), (37, 58), 36 | (81, 82), (135, 169), (344, 319)], 37 | np.float32) / 416 38 | yolo_tiny_anchor_masks = np.array([[3, 4, 5], [0, 1, 2]]) 39 | 40 | 41 | def DarknetConv(x, filters, size, strides=1, batch_norm=True): 42 | if strides == 1: 43 | padding = 'same' 44 | else: 45 | x = ZeroPadding2D(((1, 0), (1, 0)))(x) # top left half-padding 46 | padding = 'valid' 47 | x = Conv2D(filters=filters, kernel_size=size, 48 | strides=strides, padding=padding, 49 | use_bias=not batch_norm, kernel_regularizer=l2(0.0005))(x) 50 | if batch_norm: 51 | x = BatchNormalization()(x) 52 | x = LeakyReLU(alpha=0.1)(x) 53 | return x 54 | 55 | 56 | def DarknetResidual(x, filters): 57 | prev = x 58 | x = DarknetConv(x, filters // 2, 1) 59 | x = DarknetConv(x, filters, 3) 60 | x = Add()([prev, x]) 61 | return x 62 | 63 | 64 | def DarknetBlock(x, filters, blocks): 65 | x = DarknetConv(x, filters, 3, strides=2) 66 | for _ in range(blocks): 67 | x = DarknetResidual(x, filters) 68 | return x 69 | 70 | 71 | def Darknet(name=None): 72 | x = inputs = Input([None, None, 3]) 73 | x = DarknetConv(x, 32, 3) 74 | x = DarknetBlock(x, 64, 1) 75 | x = DarknetBlock(x, 128, 2) # skip connection 76 | x = x_36 = DarknetBlock(x, 256, 8) # skip connection 77 | x = x_61 = DarknetBlock(x, 512, 8) 78 | x = DarknetBlock(x, 1024, 4) 79 | return tf.keras.Model(inputs, (x_36, x_61, x), name=name) 80 | 81 | 82 | def DarknetTiny(name=None): 83 | x = inputs = Input([None, None, 3]) 84 | x = DarknetConv(x, 16, 3) 85 | x = MaxPool2D(2, 2, 'same')(x) 86 | x = DarknetConv(x, 32, 3) 87 | x = MaxPool2D(2, 2, 'same')(x) 88 | x = DarknetConv(x, 64, 3) 89 | x = MaxPool2D(2, 2, 'same')(x) 90 | x = DarknetConv(x, 128, 3) 91 | x = MaxPool2D(2, 2, 'same')(x) 92 | x = x_8 = DarknetConv(x, 256, 3) # skip connection 93 | x = MaxPool2D(2, 2, 'same')(x) 94 | x = DarknetConv(x, 512, 3) 95 | x = MaxPool2D(2, 1, 'same')(x) 96 | x = DarknetConv(x, 1024, 3) 97 | return tf.keras.Model(inputs, (x_8, x), name=name) 98 | 99 | 100 | def YoloConv(filters, name=None): 101 | def yolo_conv(x_in): 102 | if isinstance(x_in, tuple): 103 | inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:]) 104 | x, x_skip = inputs 105 | 106 | # concat with skip connection 107 | x = DarknetConv(x, filters, 1) 108 | x = UpSampling2D(2)(x) 109 | x = Concatenate()([x, x_skip]) 110 | else: 111 | x = inputs = Input(x_in.shape[1:]) 112 | 113 | x = DarknetConv(x, filters, 1) 114 | x = DarknetConv(x, filters * 2, 3) 115 | x = DarknetConv(x, filters, 1) 116 | x = DarknetConv(x, filters * 2, 3) 117 | x = DarknetConv(x, filters, 1) 118 | return Model(inputs, x, name=name)(x_in) 119 | return yolo_conv 120 | 121 | 122 | def YoloConvTiny(filters, name=None): 123 | def yolo_conv(x_in): 124 | if isinstance(x_in, tuple): 125 | inputs = Input(x_in[0].shape[1:]), Input(x_in[1].shape[1:]) 126 | x, x_skip = inputs 127 | 128 | # concat with skip connection 129 | x = DarknetConv(x, filters, 1) 130 | x = UpSampling2D(2)(x) 131 | x = Concatenate()([x, x_skip]) 132 | else: 133 | x = inputs = Input(x_in.shape[1:]) 134 | x = DarknetConv(x, filters, 1) 135 | 136 | return Model(inputs, x, name=name)(x_in) 137 | return yolo_conv 138 | 139 | 140 | def YoloOutput(filters, anchors, classes, name=None): 141 | def yolo_output(x_in): 142 | x = inputs = Input(x_in.shape[1:]) 143 | x = DarknetConv(x, filters * 2, 3) 144 | x = DarknetConv(x, anchors * (classes + 5), 1, batch_norm=False) 145 | x = Lambda(lambda x: tf.reshape(x, (-1, tf.shape(x)[1], tf.shape(x)[2], 146 | anchors, classes + 5)))(x) 147 | return tf.keras.Model(inputs, x, name=name)(x_in) 148 | return yolo_output 149 | 150 | 151 | # As tensorflow lite doesn't support tf.size used in tf.meshgrid, 152 | # we reimplemented a simple meshgrid function that use basic tf function. 153 | def _meshgrid(n_a, n_b): 154 | 155 | return [ 156 | tf.reshape(tf.tile(tf.range(n_a), [n_b]), (n_b, n_a)), 157 | tf.reshape(tf.repeat(tf.range(n_b), n_a), (n_b, n_a)) 158 | ] 159 | 160 | 161 | def yolo_boxes(pred, anchors, classes): 162 | # pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...classes)) 163 | grid_size = tf.shape(pred)[1:3] 164 | box_xy, box_wh, objectness, class_probs = tf.split( 165 | pred, (2, 2, 1, classes), axis=-1) 166 | 167 | box_xy = tf.sigmoid(box_xy) 168 | objectness = tf.sigmoid(objectness) 169 | class_probs = tf.sigmoid(class_probs) 170 | pred_box = tf.concat((box_xy, box_wh), axis=-1) # original xywh for loss 171 | 172 | # !!! grid[x][y] == (y, x) 173 | grid = _meshgrid(grid_size[1],grid_size[0]) 174 | grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # [gx, gy, 1, 2] 175 | 176 | box_xy = (box_xy + tf.cast(grid, tf.float32)) / \ 177 | tf.cast(grid_size, tf.float32) 178 | box_wh = tf.exp(box_wh) * anchors 179 | 180 | box_x1y1 = box_xy - box_wh / 2 181 | box_x2y2 = box_xy + box_wh / 2 182 | bbox = tf.concat([box_x1y1, box_x2y2], axis=-1) 183 | 184 | return bbox, objectness, class_probs, pred_box 185 | 186 | 187 | def yolo_nms(outputs, anchors, masks, classes): 188 | # boxes, conf, type 189 | b, c, t = [], [], [] 190 | 191 | for o in outputs: 192 | b.append(tf.reshape(o[0], (tf.shape(o[0])[0], -1, tf.shape(o[0])[-1]))) 193 | c.append(tf.reshape(o[1], (tf.shape(o[1])[0], -1, tf.shape(o[1])[-1]))) 194 | t.append(tf.reshape(o[2], (tf.shape(o[2])[0], -1, tf.shape(o[2])[-1]))) 195 | 196 | bbox = tf.concat(b, axis=1) 197 | confidence = tf.concat(c, axis=1) 198 | class_probs = tf.concat(t, axis=1) 199 | 200 | scores = confidence * class_probs 201 | 202 | dscores = tf.squeeze(scores, axis=0) 203 | scores = tf.reduce_max(dscores,[1]) 204 | bbox = tf.reshape(bbox,(-1,4)) 205 | classes = tf.argmax(dscores,1) 206 | selected_indices, selected_scores = tf.image.non_max_suppression_with_scores( 207 | boxes=bbox, 208 | scores=scores, 209 | max_output_size=FLAGS.yolo_max_boxes, 210 | iou_threshold=FLAGS.yolo_iou_threshold, 211 | score_threshold=FLAGS.yolo_score_threshold, 212 | soft_nms_sigma=0.5 213 | ) 214 | 215 | num_valid_nms_boxes = tf.shape(selected_indices)[0] 216 | 217 | selected_indices = tf.concat([selected_indices,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes, tf.int32)], 0) 218 | selected_scores = tf.concat([selected_scores,tf.zeros(FLAGS.yolo_max_boxes-num_valid_nms_boxes,tf.float32)], -1) 219 | 220 | boxes=tf.gather(bbox, selected_indices) 221 | boxes = tf.expand_dims(boxes, axis=0) 222 | scores=selected_scores 223 | scores = tf.expand_dims(scores, axis=0) 224 | classes = tf.gather(classes,selected_indices) 225 | classes = tf.expand_dims(classes, axis=0) 226 | valid_detections=num_valid_nms_boxes 227 | valid_detections = tf.expand_dims(valid_detections, axis=0) 228 | 229 | return boxes, scores, classes, valid_detections 230 | 231 | 232 | def YoloV3(size=None, channels=3, anchors=yolo_anchors, 233 | masks=yolo_anchor_masks, classes=80, training=False): 234 | x = inputs = Input([size, size, channels], name='input') 235 | 236 | x_36, x_61, x = Darknet(name='yolo_darknet')(x) 237 | 238 | x = YoloConv(512, name='yolo_conv_0')(x) 239 | output_0 = YoloOutput(512, len(masks[0]), classes, name='yolo_output_0')(x) 240 | 241 | x = YoloConv(256, name='yolo_conv_1')((x, x_61)) 242 | output_1 = YoloOutput(256, len(masks[1]), classes, name='yolo_output_1')(x) 243 | 244 | x = YoloConv(128, name='yolo_conv_2')((x, x_36)) 245 | output_2 = YoloOutput(128, len(masks[2]), classes, name='yolo_output_2')(x) 246 | 247 | if training: 248 | return Model(inputs, (output_0, output_1, output_2), name='yolov3') 249 | 250 | boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes), 251 | name='yolo_boxes_0')(output_0) 252 | boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes), 253 | name='yolo_boxes_1')(output_1) 254 | boxes_2 = Lambda(lambda x: yolo_boxes(x, anchors[masks[2]], classes), 255 | name='yolo_boxes_2')(output_2) 256 | 257 | outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes), 258 | name='yolo_nms')((boxes_0[:3], boxes_1[:3], boxes_2[:3])) 259 | 260 | return Model(inputs, outputs, name='yolov3') 261 | 262 | 263 | def YoloV3Tiny(size=None, channels=3, anchors=yolo_tiny_anchors, 264 | masks=yolo_tiny_anchor_masks, classes=80, training=False): 265 | x = inputs = Input([size, size, channels], name='input') 266 | 267 | x_8, x = DarknetTiny(name='yolo_darknet')(x) 268 | 269 | x = YoloConvTiny(256, name='yolo_conv_0')(x) 270 | output_0 = YoloOutput(256, len(masks[0]), classes, name='yolo_output_0')(x) 271 | 272 | x = YoloConvTiny(128, name='yolo_conv_1')((x, x_8)) 273 | output_1 = YoloOutput(128, len(masks[1]), classes, name='yolo_output_1')(x) 274 | 275 | if training: 276 | return Model(inputs, (output_0, output_1), name='yolov3') 277 | 278 | boxes_0 = Lambda(lambda x: yolo_boxes(x, anchors[masks[0]], classes), 279 | name='yolo_boxes_0')(output_0) 280 | boxes_1 = Lambda(lambda x: yolo_boxes(x, anchors[masks[1]], classes), 281 | name='yolo_boxes_1')(output_1) 282 | outputs = Lambda(lambda x: yolo_nms(x, anchors, masks, classes), 283 | name='yolo_nms')((boxes_0[:3], boxes_1[:3])) 284 | return Model(inputs, outputs, name='yolov3_tiny') 285 | 286 | 287 | def YoloLoss(anchors, classes=80, ignore_thresh=0.5): 288 | def yolo_loss(y_true, y_pred): 289 | # 1. transform all pred outputs 290 | # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls)) 291 | pred_box, pred_obj, pred_class, pred_xywh = yolo_boxes( 292 | y_pred, anchors, classes) 293 | pred_xy = pred_xywh[..., 0:2] 294 | pred_wh = pred_xywh[..., 2:4] 295 | 296 | # 2. transform all true outputs 297 | # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls)) 298 | true_box, true_obj, true_class_idx = tf.split( 299 | y_true, (4, 1, 1), axis=-1) 300 | true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2 301 | true_wh = true_box[..., 2:4] - true_box[..., 0:2] 302 | 303 | # give higher weights to small boxes 304 | box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1] 305 | 306 | # 3. inverting the pred box equations 307 | grid_size = tf.shape(y_true)[1] 308 | grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size)) 309 | grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) 310 | true_xy = true_xy * tf.cast(grid_size, tf.float32) - \ 311 | tf.cast(grid, tf.float32) 312 | true_wh = tf.math.log(true_wh / anchors) 313 | true_wh = tf.where(tf.math.is_inf(true_wh), 314 | tf.zeros_like(true_wh), true_wh) 315 | 316 | # 4. calculate all masks 317 | obj_mask = tf.squeeze(true_obj, -1) 318 | # ignore false positive when iou is over threshold 319 | best_iou = tf.map_fn( 320 | lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask( 321 | x[1], tf.cast(x[2], tf.bool))), axis=-1), 322 | (pred_box, true_box, obj_mask), 323 | tf.float32) 324 | ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32) 325 | 326 | # 5. calculate all losses 327 | xy_loss = obj_mask * box_loss_scale * \ 328 | tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1) 329 | wh_loss = obj_mask * box_loss_scale * \ 330 | tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1) 331 | obj_loss = binary_crossentropy(true_obj, pred_obj) 332 | obj_loss = obj_mask * obj_loss + \ 333 | (1 - obj_mask) * ignore_mask * obj_loss 334 | # TODO: use binary_crossentropy instead 335 | class_loss = obj_mask * sparse_categorical_crossentropy( 336 | true_class_idx, pred_class) 337 | 338 | # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1) 339 | xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3)) 340 | wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3)) 341 | obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3)) 342 | class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3)) 343 | 344 | return xy_loss + wh_loss + obj_loss + class_loss 345 | return yolo_loss 346 | -------------------------------------------------------------------------------- /yolov3_tf2/utils.py: -------------------------------------------------------------------------------- 1 | from absl import logging 2 | import numpy as np 3 | import tensorflow as tf 4 | import cv2 5 | 6 | YOLOV3_LAYER_LIST = [ 7 | 'yolo_darknet', 8 | 'yolo_conv_0', 9 | 'yolo_output_0', 10 | 'yolo_conv_1', 11 | 'yolo_output_1', 12 | 'yolo_conv_2', 13 | 'yolo_output_2', 14 | ] 15 | 16 | YOLOV3_TINY_LAYER_LIST = [ 17 | 'yolo_darknet', 18 | 'yolo_conv_0', 19 | 'yolo_output_0', 20 | 'yolo_conv_1', 21 | 'yolo_output_1', 22 | ] 23 | 24 | 25 | def load_darknet_weights(model, weights_file, tiny=False): 26 | wf = open(weights_file, 'rb') 27 | major, minor, revision, seen, _ = np.fromfile(wf, dtype=np.int32, count=5) 28 | 29 | if tiny: 30 | layers = YOLOV3_TINY_LAYER_LIST 31 | else: 32 | layers = YOLOV3_LAYER_LIST 33 | 34 | for layer_name in layers: 35 | sub_model = model.get_layer(layer_name) 36 | for i, layer in enumerate(sub_model.layers): 37 | if not layer.name.startswith('conv2d'): 38 | continue 39 | batch_norm = None 40 | if i + 1 < len(sub_model.layers) and \ 41 | sub_model.layers[i + 1].name.startswith('batch_norm'): 42 | batch_norm = sub_model.layers[i + 1] 43 | 44 | logging.info("{}/{} {}".format( 45 | sub_model.name, layer.name, 'bn' if batch_norm else 'bias')) 46 | 47 | filters = layer.filters 48 | size = layer.kernel_size[0] 49 | in_dim = layer.get_input_shape_at(0)[-1] 50 | 51 | if batch_norm is None: 52 | conv_bias = np.fromfile(wf, dtype=np.float32, count=filters) 53 | else: 54 | # darknet [beta, gamma, mean, variance] 55 | bn_weights = np.fromfile( 56 | wf, dtype=np.float32, count=4 * filters) 57 | # tf [gamma, beta, mean, variance] 58 | bn_weights = bn_weights.reshape((4, filters))[[1, 0, 2, 3]] 59 | 60 | # darknet shape (out_dim, in_dim, height, width) 61 | conv_shape = (filters, in_dim, size, size) 62 | conv_weights = np.fromfile( 63 | wf, dtype=np.float32, count=np.product(conv_shape)) 64 | # tf shape (height, width, in_dim, out_dim) 65 | conv_weights = conv_weights.reshape( 66 | conv_shape).transpose([2, 3, 1, 0]) 67 | 68 | if batch_norm is None: 69 | layer.set_weights([conv_weights, conv_bias]) 70 | else: 71 | layer.set_weights([conv_weights]) 72 | batch_norm.set_weights(bn_weights) 73 | 74 | assert len(wf.read()) == 0, 'failed to read all data' 75 | wf.close() 76 | 77 | 78 | def broadcast_iou(box_1, box_2): 79 | # box_1: (..., (x1, y1, x2, y2)) 80 | # box_2: (N, (x1, y1, x2, y2)) 81 | 82 | # broadcast boxes 83 | box_1 = tf.expand_dims(box_1, -2) 84 | box_2 = tf.expand_dims(box_2, 0) 85 | # new_shape: (..., N, (x1, y1, x2, y2)) 86 | new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2)) 87 | box_1 = tf.broadcast_to(box_1, new_shape) 88 | box_2 = tf.broadcast_to(box_2, new_shape) 89 | 90 | int_w = tf.maximum(tf.minimum(box_1[..., 2], box_2[..., 2]) - 91 | tf.maximum(box_1[..., 0], box_2[..., 0]), 0) 92 | int_h = tf.maximum(tf.minimum(box_1[..., 3], box_2[..., 3]) - 93 | tf.maximum(box_1[..., 1], box_2[..., 1]), 0) 94 | int_area = int_w * int_h 95 | box_1_area = (box_1[..., 2] - box_1[..., 0]) * \ 96 | (box_1[..., 3] - box_1[..., 1]) 97 | box_2_area = (box_2[..., 2] - box_2[..., 0]) * \ 98 | (box_2[..., 3] - box_2[..., 1]) 99 | return int_area / (box_1_area + box_2_area - int_area) 100 | 101 | 102 | def draw_outputs(img, outputs, class_names): 103 | boxes, objectness, classes, nums = outputs 104 | boxes, objectness, classes, nums = boxes[0], objectness[0], classes[0], nums[0] 105 | wh = np.flip(img.shape[0:2]) 106 | for i in range(nums): 107 | x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32)) 108 | x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32)) 109 | img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2) 110 | img = cv2.putText(img, '{} {:.4f}'.format( 111 | class_names[int(classes[i])], objectness[i]), 112 | x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 255), 2) 113 | return img 114 | 115 | 116 | def draw_labels(x, y, class_names): 117 | img = x.numpy() 118 | boxes, classes = tf.split(y, (4, 1), axis=-1) 119 | classes = classes[..., 0] 120 | wh = np.flip(img.shape[0:2]) 121 | for i in range(len(boxes)): 122 | x1y1 = tuple((np.array(boxes[i][0:2]) * wh).astype(np.int32)) 123 | x2y2 = tuple((np.array(boxes[i][2:4]) * wh).astype(np.int32)) 124 | img = cv2.rectangle(img, x1y1, x2y2, (255, 0, 0), 2) 125 | img = cv2.putText(img, class_names[classes[i]], 126 | x1y1, cv2.FONT_HERSHEY_COMPLEX_SMALL, 127 | 1, (0, 0, 255), 2) 128 | return img 129 | 130 | 131 | def freeze_all(model, frozen=True): 132 | model.trainable = not frozen 133 | if isinstance(model, tf.keras.Model): 134 | for l in model.layers: 135 | freeze_all(l, frozen) 136 | 137 | 138 | def convert_boxes(image, boxes, scores): 139 | returned_boxes = [] 140 | for box, score in zip(boxes, scores): 141 | if score == 0.0: continue 142 | box[0] = (box[0] * image.shape[1]).astype(int) 143 | box[1] = (box[1] * image.shape[0]).astype(int) 144 | box[2] = (box[2] * image.shape[1]).astype(int) 145 | box[3] = (box[3] * image.shape[0]).astype(int) 146 | box[2] = int(box[2]-box[0]) 147 | box[3] = int(box[3]-box[1]) 148 | box = box.astype(int) 149 | box = box.tolist() 150 | if box != [0,0,0,0]: 151 | returned_boxes.append(box) 152 | return returned_boxes --------------------------------------------------------------------------------