├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE.md
├── README.md
├── add_persons.py
├── assets
├── bytetrack.png
├── face-detection.gif
├── face-detection2.gif
├── face-recognition.gif
├── result.jpg
├── sequence-diagram.png
├── train_image.jpg
└── workflow.png
├── datasets
├── backup
│ ├── lam
│ │ └── lam.jpg
│ └── phuoc
│ │ ├── avatar2.png
│ │ ├── phuoc.jpg
│ │ └── quare.jpg
├── data
│ ├── lam
│ │ └── 0.jpg
│ └── phuoc
│ │ ├── 0.jpg
│ │ ├── 1.jpg
│ │ └── 2.jpg
└── face_features
│ └── feature.npz
├── detect.py
├── face_align.py
├── face_alignment
└── alignment.py
├── face_detection
├── retinaface
│ ├── LICENSE.MIT
│ ├── README.md
│ ├── camera_test.py
│ ├── convert_to_onnx.py
│ ├── data
│ │ ├── FDDB
│ │ │ └── img_list.txt
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── data_augment.py
│ │ └── wider_face.py
│ ├── detect.py
│ ├── layers
│ │ ├── __init__.py
│ │ ├── functions
│ │ │ └── prior_box.py
│ │ └── modules
│ │ │ ├── __init__.py
│ │ │ └── multibox_loss.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── net.py
│ │ └── retinaface.py
│ └── utils
│ │ ├── __init__.py
│ │ ├── box_utils.py
│ │ ├── nms
│ │ ├── __init__.py
│ │ └── py_cpu_nms.py
│ │ └── timer.py
├── scrfd
│ ├── detector.py
│ └── weights
│ │ └── README.md
└── yolov5_face
│ ├── README.md
│ ├── detector.py
│ ├── models
│ ├── __init__.py
│ ├── blazeface.yaml
│ ├── blazeface_fpn.yaml
│ ├── common.py
│ ├── experimental.py
│ ├── yolo.py
│ ├── yolov5l.yaml
│ ├── yolov5l6.yaml
│ ├── yolov5m.yaml
│ ├── yolov5m6.yaml
│ ├── yolov5n-0.5.yaml
│ ├── yolov5n.yaml
│ ├── yolov5n6.yaml
│ ├── yolov5s.yaml
│ └── yolov5s6.yaml
│ ├── utils
│ ├── __init__.py
│ ├── activations.py
│ ├── autoanchor.py
│ ├── datasets.py
│ ├── face_datasets.py
│ ├── general.py
│ ├── google_utils.py
│ ├── infer_utils.py
│ ├── loss.py
│ ├── metrics.py
│ ├── plots.py
│ ├── torch_utils.py
│ └── wandb_logging
│ │ ├── __init__.py
│ │ ├── log_dataset.py
│ │ └── wandb_utils.py
│ └── weights
│ └── README.md
├── face_recognition
└── arcface
│ ├── model.py
│ ├── utils.py
│ └── weights
│ └── README.md
├── face_tracking
├── config
│ └── config_tracking.yaml
├── pretrained
│ └── README.md
└── tracker
│ ├── basetrack.py
│ ├── byte_tracker.py
│ ├── kalman_filter.py
│ ├── matching.py
│ └── visualize.py
├── recognize.py
├── requirements.txt
└── tracking.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.mp4
2 |
3 | .env
4 | # *.npz
5 | index.faiss
6 | test*
7 | # *.ipynb
8 | NOTE.md
9 | data-elastic-search
10 | qdrant-vector-database
11 | architectures
12 | *.csv
13 | data.csv
14 |
15 | image-search-engine/assets/uploaded_images/*
16 | !image-search-engine/assets/uploaded_images/.gitkeep
17 |
18 | # Model
19 | *.pth
20 | *.pt
21 | *.onnx
22 |
23 | # Byte-compiled / optimized / DLL files
24 | __pycache__/
25 | *.py[cod]
26 | *$py.class
27 |
28 | # C extensions
29 | *.so
30 |
31 | # Distribution / packaging
32 | .Python
33 | build/
34 | develop-eggs/
35 | dist/
36 | downloads/
37 | eggs/
38 | .eggs/
39 | lib/
40 | lib64/
41 | parts/
42 | sdist/
43 | var/
44 | wheels/
45 | pip-wheel-metadata/
46 | share/python-wheels/
47 | *.egg-info/
48 | .installed.cfg
49 | MANIFEST
50 |
51 | # PyInstaller
52 | # Usually these files are written by a python script from a template
53 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
54 | *.manifest
55 | *.spec
56 |
57 | # Installer logs
58 | pip-log.txt
59 | pip-delete-this-directory.txt
60 |
61 | # Unit test / coverage reports
62 | htmlcov/
63 | .tox/
64 | .nox/
65 | .coverage
66 | .coverage.*
67 | .cache
68 | nosetests.xml
69 | coverage.xml
70 | *.cover
71 | *.py,cover
72 | .hypothesis/
73 | .pytest_cache/
74 |
75 | # Translations
76 | *.mo
77 | *.pot
78 |
79 | # Django stuff:
80 | *.log
81 | local_settings.py
82 | db.sqlite3
83 | db.sqlite3-journal
84 |
85 | # Flask stuff:
86 | instance/
87 | .webassets-cache
88 |
89 | # Scrapy stuff:
90 | .scrapy
91 |
92 | # Sphinx documentation
93 | docs/_build/
94 |
95 | # PyBuilder
96 | target/
97 |
98 | # Jupyter Notebook
99 | .ipynb_checkpoints
100 |
101 | # IPython
102 | profile_default/
103 | ipython_config.py
104 |
105 | # pyenv
106 | .python-version
107 |
108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
109 | __pypackages__/
110 |
111 | # Celery stuff
112 | celerybeat-schedule
113 | celerybeat.pid
114 |
115 | # SageMath parsed files
116 | *.sage.py
117 |
118 | # Environment variable
119 | # .env
120 | # .env*
121 |
122 | # Environments
123 | .venv/
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # It's better to unpack these files and commit the raw source because
150 | # git has its own built in compression methods.
151 | *.7z
152 | *.jar
153 | *.rar
154 | *.zip
155 | *.gz
156 | *.gzip
157 | *.tgz
158 | *.bzip
159 | *.bzip2
160 | *.bz2
161 | *.xz
162 | *.lzma
163 | *.cab
164 | *.xar
165 |
166 | # Packing-only formats
167 | *.iso
168 | *.tar
169 |
170 | # Package management formats
171 | *.dmg
172 | *.xpi
173 | *.gem
174 | *.egg
175 | *.deb
176 | *.rpm
177 | *.msi
178 | *.msm
179 | *.msp
180 | *.txz
181 |
182 | # Backup
183 | *.bak
184 | *.gho
185 | *.ori
186 | *.orig
187 | *.tmp
188 |
189 | # GPG
190 | secring.*
191 |
192 | # OpenSSL-related files best not committed
193 | ## Certificate Authority
194 | *.ca
195 |
196 | ## Certificate
197 | *.crt
198 |
199 | ## Certificate Sign Request
200 | *.csr
201 |
202 | ## Certificate
203 | *.der
204 |
205 | ## Key database file
206 | *.kdb
207 |
208 | ## OSCP request data
209 | *.org
210 |
211 | ## PKCS #12
212 | *.p12
213 |
214 | ## PEM-encoded certificate data
215 | *.pem
216 |
217 | ## Random number seed
218 | *.rnd
219 |
220 | ## SSLeay data
221 | *.ssleay
222 |
223 | ## S/MIME message
224 | *.smime
225 |
226 | # ide
227 | .idea/
228 |
229 | # others
230 | migrations/
231 |
232 | # BBDD
233 | *.db
234 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | exclude: "^\
2 | (third-party/.*)\
3 | "
4 |
5 | repos:
6 | - repo: https://github.com/pre-commit/pre-commit-hooks
7 | rev: v4.1.0
8 | hooks:
9 | - id: check-merge-conflict # checks for some markers such as "<<<<<<<", "=======", and ">>>>>>>".
10 | - id: detect-private-key # detects the presence of private keys.
11 | - id: end-of-file-fixer # ensures that a file is either empty, or ends with one newline.
12 | - id: requirements-txt-fixer # sorts entries in requirements.txt.
13 | - id: trailing-whitespace # trims trailing whitespace at the end of lines.
14 |
15 | # Format YAML and other files
16 | - repo: https://github.com/pre-commit/mirrors-prettier
17 | rev: v2.5.1
18 | hooks:
19 | - id: prettier
20 | files: \.(js|ts|jsx|tsx|css|less|html|json|markdown|md|yaml|yml)$
21 |
22 | # Sort the order of importing libs
23 | - repo: https://github.com/PyCQA/isort
24 | rev: 5.12.0
25 | hooks:
26 | - id: isort
27 | args: [--profile=black, --line-length=100]
28 |
29 | # Format Python files
30 | - repo: https://github.com/psf/black
31 | rev: 23.7.0
32 | hooks:
33 | - id: black
34 | args: [--line-length=100]
35 |
36 | # - repo: https://github.com/PyCQA/flake8
37 | # rev: 6.1.0
38 | # hooks:
39 | # - id: flake8
40 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2022 Vector Nguyễn
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Real-Time Face Recognition
2 |
3 |
4 |
5 |
6 | Face Recognition
7 |
8 |
9 | ## Table of Contents
10 |
11 | - [Architecture](#architecture)
12 | - [How to use](#how-to-use)
13 | - [Create Environment and Install Packages](#create-environment-and-install-packages)
14 | - [Add new persons to datasets](#add-new-persons-to-datasets)
15 | - [Technology](#technology)
16 | - [Face Detection](#face-detection)
17 | - [Face Recognition](#face-recognition)
18 | - [Face Tracking](#face-tracking)
19 | - [Matching Algorithm](#matching-algorithm)
20 | - [Reference](#reference)
21 |
22 | ## Architecture
23 |
24 |
25 |
26 |
27 | Sequence Diagram
28 |
29 |
30 | ## How to use
31 |
32 | ### Create Environment and Install Packages
33 |
34 | ```shell
35 | conda create -n face-dev python=3.9
36 | ```
37 |
38 | ```shell
39 | conda activate face-dev
40 | ```
41 |
42 | ```shell
43 | pip install torch==1.9.1+cpu torchvision==0.10.1+cpu torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
44 | pip install -r requirements.txt
45 | ```
46 |
47 | ### Add new persons to datasets
48 |
49 | 1. **Create a folder with the folder name being the name of the person**
50 |
51 | ```
52 | datasets/
53 | ├── backup
54 | ├── data
55 | ├── face_features
56 | └── new_persons
57 | ├── name-person1
58 | └── name-person2
59 | ```
60 |
61 | 2. **Add the person's photo in the folder**
62 |
63 | ```
64 | datasets/
65 | ├── backup
66 | ├── data
67 | ├── face_features
68 | └── new_persons
69 | ├── name-person1
70 | │ └── image1.jpg
71 | │ └── image2.jpg
72 | └── name-person2
73 | └── image1.jpg
74 | └── image2.jpg
75 | ```
76 |
77 | 3. **Run to add new persons**
78 |
79 | ```shell
80 | python add_persons.py
81 | ```
82 |
83 | 4. **Run to recognize**
84 |
85 | ```shell
86 | python recognize.py
87 | ```
88 |
89 | ## Technology
90 |
91 | ### Face Detection
92 |
93 | 1. **Retinaface**
94 |
95 | - Retinaface is a powerful face detection algorithm known for its accuracy and speed. It utilizes a single deep convolutional network to detect faces in an image with high precision.
96 |
97 | 2. **Yolov5-face**
98 |
99 | - Yolov5-face is based on the YOLO (You Only Look Once) architecture, specializing in face detection. It provides real-time face detection with a focus on efficiency and accuracy.
100 |
101 | 3. **SCRFD**
102 | - SCRFD (Single-Shot Scale-Aware Face Detector) is designed for real-time face detection across various scales. It is particularly effective in detecting faces at different resolutions within the same image.
103 |
104 | ### Face Recognition
105 |
106 | 1. **ArcFace**
107 |
108 | - ArcFace is a state-of-the-art face recognition algorithm that focuses on learning highly discriminative features for face verification and identification. It is known for its robustness to variations in lighting, pose, and facial expressions.
109 |
110 |
111 |
112 |
113 | ArcFace
114 |
115 |
116 | ### Face Tracking
117 |
118 | 1. **ByteTrack**
119 |
120 |
121 |
122 |
123 | ByteTrack is a simple, fast and strong multi-object tracker.
124 |
125 |
126 | ### Matching Algorithm
127 |
128 | 1. **Cosine Similarity Algorithm**
129 |
130 | - The Cosine Similarity Algorithm is employed for matching faces based on the cosine of the angle between their feature vectors. It measures the similarity between two faces' feature representations, providing an effective approach for face recognition.
131 |
132 |
133 |
134 |
135 | Cosine Similarity Algorithm
136 |
137 |
138 | ## Reference
139 |
140 | - [ByteTrack](https://github.com/ifzhang/ByteTrack)
141 | - [Yolov5-face](https://github.com/deepcam-cn/yolov5-face)
142 | - [InsightFace - ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch)
143 | - [InsightFace-REST](https://github.com/SthPhoenix/InsightFace-REST)
144 |
--------------------------------------------------------------------------------
/add_persons.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import shutil
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | from torchvision import transforms
9 |
10 | from face_detection.scrfd.detector import SCRFD
11 | from face_detection.yolov5_face.detector import Yolov5Face
12 | from face_recognition.arcface.model import iresnet_inference
13 | from face_recognition.arcface.utils import read_features
14 |
15 | # Check if CUDA is available and set the device accordingly
16 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
17 |
18 | # Initialize the face detector (Choose one of the detectors)
19 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-face.pt")
20 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
21 |
22 | # Initialize the face recognizer
23 | recognizer = iresnet_inference(
24 | model_name="r100", path="face_recognition/arcface/weights/arcface_r100.pth", device=device
25 | )
26 |
27 |
28 | @torch.no_grad()
29 | def get_feature(face_image):
30 | """
31 | Extract facial features from an image using the face recognition model.
32 |
33 | Args:
34 | face_image (numpy.ndarray): Input facial image.
35 |
36 | Returns:
37 | numpy.ndarray: Extracted facial features.
38 | """
39 | # Define a series of image preprocessing steps
40 | face_preprocess = transforms.Compose(
41 | [
42 | transforms.ToTensor(),
43 | transforms.Resize((112, 112)),
44 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
45 | ]
46 | )
47 |
48 | # Convert the image to RGB format
49 | face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
50 |
51 | # Apply the defined preprocessing to the image
52 | face_image = face_preprocess(face_image).unsqueeze(0).to(device)
53 |
54 | # Use the model to obtain facial features
55 | emb_img_face = recognizer(face_image)[0].cpu().numpy()
56 |
57 | # Normalize the features
58 | images_emb = emb_img_face / np.linalg.norm(emb_img_face)
59 | return images_emb
60 |
61 |
62 | def add_persons(backup_dir, add_persons_dir, faces_save_dir, features_path):
63 | """
64 | Add a new person to the face recognition database.
65 |
66 | Args:
67 | backup_dir (str): Directory to save backup data.
68 | add_persons_dir (str): Directory containing images of the new person.
69 | faces_save_dir (str): Directory to save the extracted faces.
70 | features_path (str): Path to save face features.
71 | """
72 | # Initialize lists to store names and features of added images
73 | images_name = []
74 | images_emb = []
75 |
76 | # Read the folder with images of the new person, extract faces, and save them
77 | for name_person in os.listdir(add_persons_dir):
78 | person_image_path = os.path.join(add_persons_dir, name_person)
79 |
80 | # Create a directory to save the faces of the person
81 | person_face_path = os.path.join(faces_save_dir, name_person)
82 | os.makedirs(person_face_path, exist_ok=True)
83 |
84 | for image_name in os.listdir(person_image_path):
85 | if image_name.endswith(("png", "jpg", "jpeg")):
86 | input_image = cv2.imread(os.path.join(person_image_path, image_name))
87 |
88 | # Detect faces and landmarks using the face detector
89 | bboxes, landmarks = detector.detect(image=input_image)
90 |
91 | # Extract faces
92 | for i in range(len(bboxes)):
93 | # Get the number of files in the person's path
94 | number_files = len(os.listdir(person_face_path))
95 |
96 | # Get the location of the face
97 | x1, y1, x2, y2, score = bboxes[i]
98 |
99 | # Extract the face from the image
100 | face_image = input_image[y1:y2, x1:x2]
101 |
102 | # Path to save the face
103 | path_save_face = os.path.join(person_face_path, f"{number_files}.jpg")
104 |
105 | # Save the face to the database
106 | cv2.imwrite(path_save_face, face_image)
107 |
108 | # Extract features from the face
109 | images_emb.append(get_feature(face_image=face_image))
110 | images_name.append(name_person)
111 |
112 | # Check if no new person is found
113 | if images_emb == [] and images_name == []:
114 | print("No new person found!")
115 | return None
116 |
117 | # Convert lists to arrays
118 | images_emb = np.array(images_emb)
119 | images_name = np.array(images_name)
120 |
121 | # Read existing features if available
122 | features = read_features(features_path)
123 |
124 | if features is not None:
125 | # Unpack existing features
126 | old_images_name, old_images_emb = features
127 |
128 | # Combine new features with existing features
129 | images_name = np.hstack((old_images_name, images_name))
130 | images_emb = np.vstack((old_images_emb, images_emb))
131 |
132 | print("Update features!")
133 |
134 | # Save the combined features
135 | np.savez_compressed(features_path, images_name=images_name, images_emb=images_emb)
136 |
137 | # Move the data of the new person to the backup data directory
138 | for sub_dir in os.listdir(add_persons_dir):
139 | dir_to_move = os.path.join(add_persons_dir, sub_dir)
140 | shutil.move(dir_to_move, backup_dir, copy_function=shutil.copytree)
141 |
142 | print("Successfully added new person!")
143 |
144 |
145 | if __name__ == "__main__":
146 | # Parse command line arguments
147 | parser = argparse.ArgumentParser()
148 | parser.add_argument(
149 | "--backup-dir",
150 | type=str,
151 | default="./datasets/backup",
152 | help="Directory to save person data.",
153 | )
154 | parser.add_argument(
155 | "--add-persons-dir",
156 | type=str,
157 | default="./datasets/new_persons",
158 | help="Directory to add new persons.",
159 | )
160 | parser.add_argument(
161 | "--faces-save-dir",
162 | type=str,
163 | default="./datasets/data/",
164 | help="Directory to save faces.",
165 | )
166 | parser.add_argument(
167 | "--features-path",
168 | type=str,
169 | default="./datasets/face_features/feature",
170 | help="Path to save face features.",
171 | )
172 | opt = parser.parse_args()
173 |
174 | # Run the main function
175 | add_persons(**vars(opt))
176 |
--------------------------------------------------------------------------------
/assets/bytetrack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/bytetrack.png
--------------------------------------------------------------------------------
/assets/face-detection.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-detection.gif
--------------------------------------------------------------------------------
/assets/face-detection2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-detection2.gif
--------------------------------------------------------------------------------
/assets/face-recognition.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-recognition.gif
--------------------------------------------------------------------------------
/assets/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/result.jpg
--------------------------------------------------------------------------------
/assets/sequence-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/sequence-diagram.png
--------------------------------------------------------------------------------
/assets/train_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/train_image.jpg
--------------------------------------------------------------------------------
/assets/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/workflow.png
--------------------------------------------------------------------------------
/datasets/backup/lam/lam.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/lam/lam.jpg
--------------------------------------------------------------------------------
/datasets/backup/phuoc/avatar2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/avatar2.png
--------------------------------------------------------------------------------
/datasets/backup/phuoc/phuoc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/phuoc.jpg
--------------------------------------------------------------------------------
/datasets/backup/phuoc/quare.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/quare.jpg
--------------------------------------------------------------------------------
/datasets/data/lam/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/lam/0.jpg
--------------------------------------------------------------------------------
/datasets/data/phuoc/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/0.jpg
--------------------------------------------------------------------------------
/datasets/data/phuoc/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/1.jpg
--------------------------------------------------------------------------------
/datasets/data/phuoc/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/2.jpg
--------------------------------------------------------------------------------
/datasets/face_features/feature.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/face_features/feature.npz
--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import cv2
4 |
5 | from face_detection.scrfd.detector import SCRFD
6 | from face_detection.yolov5_face.detector import Yolov5Face
7 |
8 | # Initialize the face detector
9 | detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5m-face.pt")
10 | # detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
11 |
12 |
13 | def main():
14 | # Open the camera
15 | cap = cv2.VideoCapture(0)
16 |
17 | # Initialize variables for measuring frame rate
18 | start = time.time_ns()
19 | frame_count = 0
20 | fps = -1
21 |
22 | # Save video
23 | frame_width = int(cap.get(3))
24 | frame_height = int(cap.get(4))
25 | size = (frame_width, frame_height)
26 | video = cv2.VideoWriter(
27 | "results/face-detection.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 30, size
28 | )
29 |
30 | # Read frames from the camera
31 | while True:
32 | # Capture a frame from the camera
33 | _, frame = cap.read()
34 |
35 | # Get faces and landmarks using the face detector
36 | bboxes, landmarks = detector.detect(image=frame)
37 | h, w, c = frame.shape
38 |
39 | tl = 1 or round(0.002 * (h + w) / 2) + 1 # Line and font thickness
40 | clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)]
41 |
42 | # Draw bounding boxes and landmarks on the frame
43 | for i in range(len(bboxes)):
44 | # Get location of the face
45 | x1, y1, x2, y2, score = bboxes[i]
46 | cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2)
47 |
48 | # Draw facial landmarks
49 | for id, key_point in enumerate(landmarks[i]):
50 | cv2.circle(frame, tuple(key_point), tl + 1, clors[id], -1)
51 |
52 | # Calculate and display the frame rate
53 | frame_count += 1
54 | if frame_count >= 30:
55 | end = time.time_ns()
56 | fps = 1e9 * frame_count / (end - start)
57 | frame_count = 0
58 | start = time.time_ns()
59 |
60 | if fps > 0:
61 | fps_label = "FPS: %.2f" % fps
62 | cv2.putText(
63 | frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2
64 | )
65 |
66 | # Save the frame to the video
67 | video.write(frame)
68 |
69 | # Show the result in a window
70 | cv2.imshow("Face Detection", frame)
71 |
72 | # Press 'Q' on the keyboard to exit
73 | if cv2.waitKey(25) & 0xFF == ord("q"):
74 | break
75 |
76 | # Release video and camera, and close all OpenCV windows
77 | video.release()
78 | cap.release()
79 | cv2.destroyAllWindows()
80 | cv2.waitKey(0)
81 |
82 |
83 | if __name__ == "__main__":
84 | main()
85 |
--------------------------------------------------------------------------------
/face_align.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import cv2
4 |
5 | from face_alignment.alignment import norm_crop
6 | from face_detection.scrfd.detector import SCRFD
7 | from face_detection.yolov5_face.detector import Yolov5Face
8 |
9 | # Initialize the face detector
10 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-0.5.pt")
11 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
12 |
13 |
14 | def main():
15 | # Open the camera
16 | cap = cv2.VideoCapture(0)
17 |
18 | # Initialize variables for measuring frame rate
19 | start = time.time_ns()
20 | frame_count = 0
21 | fps = -1
22 |
23 | # Save video
24 | frame_width = int(cap.get(3))
25 | frame_height = int(cap.get(4))
26 | size = (frame_width, frame_height)
27 | video = cv2.VideoWriter("results/face-detection.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 30, size)
28 |
29 | # Read frames from the camera
30 | while True:
31 | # Capture a frame from the camera
32 | _, frame = cap.read()
33 |
34 | # Get faces and landmarks using the face detector
35 | bboxes, landmarks = detector.detect(image=frame)
36 | h, w, c = frame.shape
37 |
38 | tl = 1 or round(0.002 * (h + w) / 2) + 1 # Line and font thickness
39 | clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)]
40 |
41 | # Draw bounding boxes and landmarks on the frame
42 | for i in range(len(bboxes)):
43 | # Get location of the face
44 | x1, y1, x2, y2, score = bboxes[i]
45 | face = frame[y1:y2, x1:x2]
46 | cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2)
47 |
48 | # Draw facial landmarks
49 | for id, key_point in enumerate(landmarks[i]):
50 | cv2.circle(frame, tuple(key_point), tl + 1, clors[id], -1)
51 |
52 | align = norm_crop(frame, landmarks[i])
53 |
54 | # Calculate and display the frame rate
55 | frame_count += 1
56 | if frame_count >= 30:
57 | end = time.time_ns()
58 | fps = 1e9 * frame_count / (end - start)
59 | frame_count = 0
60 | start = time.time_ns()
61 |
62 | if fps > 0:
63 | fps_label = "FPS: %.2f" % fps
64 | cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
65 |
66 | # Save the frame to the video
67 | video.write(frame)
68 |
69 | # Show the result in a window
70 | cv2.imshow("Face Detection", frame)
71 | cv2.imshow("Face align", align)
72 |
73 | # Press 'Q' on the keyboard to exit
74 | if cv2.waitKey(25) & 0xFF == ord("q"):
75 | break
76 |
77 | # Release video and camera, and close all OpenCV windows
78 | video.release()
79 | cap.release()
80 | cv2.destroyAllWindows()
81 | cv2.waitKey(0)
82 |
83 |
84 | if __name__ == "__main__":
85 | main()
86 |
--------------------------------------------------------------------------------
/face_alignment/alignment.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | from skimage import transform as trans
4 |
5 | # Define a standard set of destination landmarks for ArcFace alignment
6 | arcface_dst = np.array(
7 | [
8 | [38.2946, 51.6963],
9 | [73.5318, 51.5014],
10 | [56.0252, 71.7366],
11 | [41.5493, 92.3655],
12 | [70.7299, 92.2041],
13 | ],
14 | dtype=np.float32,
15 | )
16 |
17 |
18 | def estimate_norm(lmk, image_size=112, mode="arcface"):
19 | """
20 | Estimate the transformation matrix for aligning facial landmarks.
21 |
22 | Args:
23 | lmk (numpy.ndarray): 2D array of shape (5, 2) representing facial landmarks.
24 | image_size (int): Desired output image size.
25 | mode (str): Alignment mode, currently only "arcface" is supported.
26 |
27 | Returns:
28 | numpy.ndarray: Transformation matrix (2x3) for aligning facial landmarks.
29 | """
30 | # Check input conditions
31 | assert lmk.shape == (5, 2)
32 | assert image_size % 112 == 0 or image_size % 128 == 0
33 |
34 | # Adjust ratio and x-coordinate difference based on image size
35 | if image_size % 112 == 0:
36 | ratio = float(image_size) / 112.0
37 | diff_x = 0
38 | else:
39 | ratio = float(image_size) / 128.0
40 | diff_x = 8.0 * ratio
41 |
42 | # Scale and shift the destination landmarks
43 | dst = arcface_dst * ratio
44 | dst[:, 0] += diff_x
45 |
46 | # Estimate the similarity transformation
47 | tform = trans.SimilarityTransform()
48 | tform.estimate(lmk, dst)
49 | M = tform.params[0:2, :]
50 |
51 | return M
52 |
53 |
54 | def norm_crop(img, landmark, image_size=112, mode="arcface"):
55 | """
56 | Normalize and crop a facial image based on provided landmarks.
57 |
58 | Args:
59 | img (numpy.ndarray): Input facial image.
60 | landmark (numpy.ndarray): 2D array of shape (5, 2) representing facial landmarks.
61 | image_size (int): Desired output image size.
62 | mode (str): Alignment mode, currently only "arcface" is supported.
63 |
64 | Returns:
65 | numpy.ndarray: Normalized and cropped facial image.
66 | """
67 | # Estimate the transformation matrix
68 | M = estimate_norm(landmark, image_size, mode)
69 |
70 | # Apply the affine transformation to the image
71 | warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
72 |
73 | return warped
74 |
--------------------------------------------------------------------------------
/face_detection/retinaface/LICENSE.MIT:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/face_detection/retinaface/README.md:
--------------------------------------------------------------------------------
1 | Get weights:
2 | [Google drive](https://drive.google.com/drive/folders/1oZRSG0ZegbVkVwUd8wUIQx8W7yfZ_ki1?usp=drive_link)
3 |
4 | Run
5 |
6 | Using Camera
7 | backbone: resnet50
8 |
9 | ```
10 | python camera_test.py --trained_model weights/Resnet50_Final.pth --network resnet50 --cpu
11 | ```
12 |
13 | backbone: mobilenet0.25
14 |
15 | ```
16 | python camera_test.py --trained_model weights/mobilenet0.25_Final.pth --network mobile0.25 --cpu
17 | ```
18 |
19 | Using Image
20 | change image in ./curve, change file path in detect.py (line 87)
21 |
22 | backbone: resnet50
23 |
24 | ```
25 | python detect.py --trained_model weights/Resnet50_Final.pth --network resnet50 --cpu
26 | ```
27 |
28 | backbone: mobilenet0.25
29 |
30 | ```
31 | python detect.py --trained_model weights/mobilenet0.25_Final.pth --network mobile0.25 --cpu
32 | ```
33 |
--------------------------------------------------------------------------------
/face_detection/retinaface/camera_test.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import argparse
4 | import os
5 | import time
6 |
7 | import cv2
8 | import numpy as np
9 | import torch
10 | import torch.backends.cudnn as cudnn
11 | from data import cfg_mnet, cfg_re50
12 | from layers.functions.prior_box import PriorBox
13 | from models.retinaface import RetinaFace
14 | from utils.box_utils import decode, decode_landm
15 | from utils.nms.py_cpu_nms import py_cpu_nms
16 |
17 | parser = argparse.ArgumentParser(description="Retinaface")
18 |
19 | parser.add_argument(
20 | "-m",
21 | "--trained_model",
22 | default="./weights/Resnet50_Final.pth",
23 | type=str,
24 | help="Trained state_dict file path to open",
25 | )
26 | parser.add_argument(
27 | "--network", default="resnet50", help="Backbone network mobile0.25 or resnet50"
28 | )
29 | parser.add_argument(
30 | "--cpu", action="store_true", default=False, help="Use cpu inference"
31 | )
32 | parser.add_argument(
33 | "--confidence_threshold", default=0.02, type=float, help="confidence_threshold"
34 | )
35 | parser.add_argument("--top_k", default=5000, type=int, help="top_k")
36 | parser.add_argument("--nms_threshold", default=0.4, type=float, help="nms_threshold")
37 | parser.add_argument("--keep_top_k", default=750, type=int, help="keep_top_k")
38 | parser.add_argument(
39 | "-s",
40 | "--save_image",
41 | action="store_true",
42 | default=True,
43 | help="show detection results",
44 | )
45 | parser.add_argument(
46 | "--vis_thres", default=0.6, type=float, help="visualization_threshold"
47 | )
48 | args = parser.parse_args()
49 |
50 |
51 | def check_keys(model, pretrained_state_dict):
52 | ckpt_keys = set(pretrained_state_dict.keys())
53 | model_keys = set(model.state_dict().keys())
54 | used_pretrained_keys = model_keys & ckpt_keys
55 | unused_pretrained_keys = ckpt_keys - model_keys
56 | missing_keys = model_keys - ckpt_keys
57 | print("Missing keys:{}".format(len(missing_keys)))
58 | print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys)))
59 | print("Used keys:{}".format(len(used_pretrained_keys)))
60 | assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint"
61 | return True
62 |
63 |
64 | def remove_prefix(state_dict, prefix):
65 | """Old style model is stored with all names of parameters sharing common prefix 'module.'"""
66 | print("remove prefix '{}'".format(prefix))
67 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
68 | return {f(key): value for key, value in state_dict.items()}
69 |
70 |
71 | def load_model(model, pretrained_path, load_to_cpu):
72 | print("Loading pretrained model from {}".format(pretrained_path))
73 | if load_to_cpu:
74 | pretrained_dict = torch.load(
75 | pretrained_path, map_location=lambda storage, loc: storage
76 | )
77 | else:
78 | device = torch.cuda.current_device()
79 | pretrained_dict = torch.load(
80 | pretrained_path, map_location=lambda storage, loc: storage.cuda(device)
81 | )
82 | if "state_dict" in pretrained_dict.keys():
83 | pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.")
84 | else:
85 | pretrained_dict = remove_prefix(pretrained_dict, "module.")
86 | check_keys(model, pretrained_dict)
87 | model.load_state_dict(pretrained_dict, strict=False)
88 | return model
89 |
90 |
91 | if __name__ == "__main__":
92 | torch.set_grad_enabled(False)
93 | cfg = None
94 | if args.network == "mobile0.25":
95 | cfg = cfg_mnet
96 | elif args.network == "resnet50":
97 | cfg = cfg_re50
98 | # net and model
99 | net = RetinaFace(cfg=cfg, phase="test")
100 | net = load_model(net, args.trained_model, args.cpu)
101 | net.eval()
102 | print("Finished loading model!")
103 | print(net)
104 | cudnn.benchmark = True
105 | device = torch.device("cpu" if args.cpu else "cuda")
106 | net = net.to(device)
107 |
108 | resize = 1
109 |
110 | cam = cv2.VideoCapture(0)
111 | fps = cam.get(cv2.CAP_PROP_FPS)
112 | print(fps)
113 |
114 | # testing begin
115 | # for i in range(10):
116 | # image_path = "./curve/test.jpg"
117 | # img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
118 |
119 | while True:
120 | _, img_raw = cam.read()
121 |
122 | img = np.float32(img_raw)
123 |
124 | im_height, im_width, _ = img.shape
125 | scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
126 | img -= (104, 117, 123)
127 | img = img.transpose(2, 0, 1)
128 | img = torch.from_numpy(img).unsqueeze(0)
129 | img = img.to(device)
130 | scale = scale.to(device)
131 |
132 | tic = time.time()
133 | loc, conf, landms = net(img) # forward pass
134 | print("net forward time: {:.4f}".format(time.time() - tic))
135 |
136 | priorbox = PriorBox(cfg, image_size=(im_height, im_width))
137 | priors = priorbox.forward()
138 | priors = priors.to(device)
139 | prior_data = priors.data
140 | boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"])
141 | boxes = boxes * scale / resize
142 | boxes = boxes.cpu().numpy()
143 | scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
144 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"])
145 | scale1 = torch.Tensor(
146 | [
147 | img.shape[3],
148 | img.shape[2],
149 | img.shape[3],
150 | img.shape[2],
151 | img.shape[3],
152 | img.shape[2],
153 | img.shape[3],
154 | img.shape[2],
155 | img.shape[3],
156 | img.shape[2],
157 | ]
158 | )
159 | scale1 = scale1.to(device)
160 | landms = landms * scale1 / resize
161 | landms = landms.cpu().numpy()
162 |
163 | # ignore low scores
164 | inds = np.where(scores > args.confidence_threshold)[0]
165 | boxes = boxes[inds]
166 | landms = landms[inds]
167 | scores = scores[inds]
168 |
169 | # keep top-K before NMS
170 | order = scores.argsort()[::-1][: args.top_k]
171 | boxes = boxes[order]
172 | landms = landms[order]
173 | scores = scores[order]
174 |
175 | # do NMS
176 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
177 | keep = py_cpu_nms(dets, args.nms_threshold)
178 | # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
179 | dets = dets[keep, :]
180 | landms = landms[keep]
181 |
182 | # keep top-K faster NMS
183 | dets = dets[: args.keep_top_k, :]
184 | landms = landms[: args.keep_top_k, :]
185 |
186 | dets = np.concatenate((dets, landms), axis=1)
187 |
188 | # show image
189 | if args.save_image:
190 | for b in dets:
191 | if b[4] < args.vis_thres:
192 | continue
193 | text = "{:.4f}".format(b[4])
194 | b = list(map(int, b))
195 | cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
196 | cx = b[0]
197 | cy = b[1] + 12
198 | cv2.putText(
199 | img_raw,
200 | text,
201 | (cx, cy),
202 | cv2.FONT_HERSHEY_DUPLEX,
203 | 0.5,
204 | (255, 255, 255),
205 | )
206 |
207 | # landms
208 | # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
209 | # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
210 | # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
211 | # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
212 | # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
213 |
214 | # save image
215 | name = "test.jpg"
216 | cv2.imwrite(name, img_raw)
217 | cv2.imshow("camera", img_raw)
218 |
--------------------------------------------------------------------------------
/face_detection/retinaface/convert_to_onnx.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import argparse
4 | import os
5 |
6 | import cv2
7 | import numpy as np
8 | import torch
9 | import torch.backends.cudnn as cudnn
10 | from data import cfg_mnet, cfg_re50
11 | from layers.functions.prior_box import PriorBox
12 | from models.retinaface import RetinaFace
13 | from utils.box_utils import decode, decode_landm
14 | from utils.nms.py_cpu_nms import py_cpu_nms
15 | from utils.timer import Timer
16 |
17 | parser = argparse.ArgumentParser(description="Test")
18 | parser.add_argument(
19 | "-m",
20 | "--trained_model",
21 | default="./weights/mobilenet0.25_Final.pth",
22 | type=str,
23 | help="Trained state_dict file path to open",
24 | )
25 | parser.add_argument(
26 | "--network", default="mobile0.25", help="Backbone network mobile0.25 or resnet50"
27 | )
28 | parser.add_argument(
29 | "--long_side",
30 | default=640,
31 | help="when origin_size is false, long_side is scaled size(320 or 640 for long side)",
32 | )
33 | parser.add_argument(
34 | "--cpu", action="store_true", default=True, help="Use cpu inference"
35 | )
36 |
37 | args = parser.parse_args()
38 |
39 |
40 | def check_keys(model, pretrained_state_dict):
41 | ckpt_keys = set(pretrained_state_dict.keys())
42 | model_keys = set(model.state_dict().keys())
43 | used_pretrained_keys = model_keys & ckpt_keys
44 | unused_pretrained_keys = ckpt_keys - model_keys
45 | missing_keys = model_keys - ckpt_keys
46 | print("Missing keys:{}".format(len(missing_keys)))
47 | print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys)))
48 | print("Used keys:{}".format(len(used_pretrained_keys)))
49 | assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint"
50 | return True
51 |
52 |
53 | def remove_prefix(state_dict, prefix):
54 | """Old style model is stored with all names of parameters sharing common prefix 'module.'"""
55 | print("remove prefix '{}'".format(prefix))
56 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
57 | return {f(key): value for key, value in state_dict.items()}
58 |
59 |
60 | def load_model(model, pretrained_path, load_to_cpu):
61 | print("Loading pretrained model from {}".format(pretrained_path))
62 | if load_to_cpu:
63 | pretrained_dict = torch.load(
64 | pretrained_path, map_location=lambda storage, loc: storage
65 | )
66 | else:
67 | device = torch.cuda.current_device()
68 | pretrained_dict = torch.load(
69 | pretrained_path, map_location=lambda storage, loc: storage.cuda(device)
70 | )
71 | if "state_dict" in pretrained_dict.keys():
72 | pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.")
73 | else:
74 | pretrained_dict = remove_prefix(pretrained_dict, "module.")
75 | check_keys(model, pretrained_dict)
76 | model.load_state_dict(pretrained_dict, strict=False)
77 | return model
78 |
79 |
80 | if __name__ == "__main__":
81 | torch.set_grad_enabled(False)
82 | cfg = None
83 | if args.network == "mobile0.25":
84 | cfg = cfg_mnet
85 | elif args.network == "resnet50":
86 | cfg = cfg_re50
87 | # net and model
88 | net = RetinaFace(cfg=cfg, phase="test")
89 | net = load_model(net, args.trained_model, args.cpu)
90 | net.eval()
91 | print("Finished loading model!")
92 | print(net)
93 | device = torch.device("cpu" if args.cpu else "cuda")
94 | net = net.to(device)
95 |
96 | # ------------------------ export -----------------------------
97 | output_onnx = "FaceDetector.onnx"
98 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx))
99 | input_names = ["input0"]
100 | output_names = ["output0"]
101 | inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device)
102 |
103 | torch_out = torch.onnx._export(
104 | net,
105 | inputs,
106 | output_onnx,
107 | export_params=True,
108 | verbose=False,
109 | input_names=input_names,
110 | output_names=output_names,
111 | )
112 |
--------------------------------------------------------------------------------
/face_detection/retinaface/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import *
2 | from .data_augment import *
3 | from .wider_face import WiderFaceDetection, detection_collate
4 |
--------------------------------------------------------------------------------
/face_detection/retinaface/data/config.py:
--------------------------------------------------------------------------------
1 | # config.py
2 |
3 | cfg_mnet = {
4 | "name": "mobilenet0.25",
5 | "min_sizes": [[16, 32], [64, 128], [256, 512]],
6 | "steps": [8, 16, 32],
7 | "variance": [0.1, 0.2],
8 | "clip": False,
9 | "loc_weight": 2.0,
10 | "gpu_train": True,
11 | "batch_size": 32,
12 | "ngpu": 1,
13 | "epoch": 250,
14 | "decay1": 190,
15 | "decay2": 220,
16 | "image_size": 640,
17 | "pretrain": True,
18 | "return_layers": {"stage1": 1, "stage2": 2, "stage3": 3},
19 | "in_channel": 32,
20 | "out_channel": 64,
21 | }
22 |
23 | cfg_re50 = {
24 | "name": "Resnet50",
25 | "min_sizes": [[16, 32], [64, 128], [256, 512]],
26 | "steps": [8, 16, 32],
27 | "variance": [0.1, 0.2],
28 | "clip": False,
29 | "loc_weight": 2.0,
30 | "gpu_train": True,
31 | "batch_size": 24,
32 | "ngpu": 4,
33 | "epoch": 100,
34 | "decay1": 70,
35 | "decay2": 90,
36 | "image_size": 840,
37 | "pretrain": True,
38 | "return_layers": {"layer2": 1, "layer3": 2, "layer4": 3},
39 | "in_channel": 256,
40 | "out_channel": 256,
41 | }
42 |
--------------------------------------------------------------------------------
/face_detection/retinaface/data/data_augment.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import cv2
4 | import numpy as np
5 | from utils.box_utils import matrix_iof
6 |
7 |
8 | def _crop(image, boxes, labels, landm, img_dim):
9 | height, width, _ = image.shape
10 | pad_image_flag = True
11 |
12 | for _ in range(250):
13 | """
14 | if random.uniform(0, 1) <= 0.2:
15 | scale = 1.0
16 | else:
17 | scale = random.uniform(0.3, 1.0)
18 | """
19 | PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
20 | scale = random.choice(PRE_SCALES)
21 | short_side = min(width, height)
22 | w = int(scale * short_side)
23 | h = w
24 |
25 | if width == w:
26 | l = 0
27 | else:
28 | l = random.randrange(width - w)
29 | if height == h:
30 | t = 0
31 | else:
32 | t = random.randrange(height - h)
33 | roi = np.array((l, t, l + w, t + h))
34 |
35 | value = matrix_iof(boxes, roi[np.newaxis])
36 | flag = value >= 1
37 | if not flag.any():
38 | continue
39 |
40 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2
41 | mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
42 | boxes_t = boxes[mask_a].copy()
43 | labels_t = labels[mask_a].copy()
44 | landms_t = landm[mask_a].copy()
45 | landms_t = landms_t.reshape([-1, 5, 2])
46 |
47 | if boxes_t.shape[0] == 0:
48 | continue
49 |
50 | image_t = image[roi[1] : roi[3], roi[0] : roi[2]]
51 |
52 | boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
53 | boxes_t[:, :2] -= roi[:2]
54 | boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
55 | boxes_t[:, 2:] -= roi[:2]
56 |
57 | # landm
58 | landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
59 | landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
60 | landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
61 | landms_t = landms_t.reshape([-1, 10])
62 |
63 | # make sure that the cropped image contains at least one face > 16 pixel at training image scale
64 | b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
65 | b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
66 | mask_b = np.minimum(b_w_t, b_h_t) > 0.0
67 | boxes_t = boxes_t[mask_b]
68 | labels_t = labels_t[mask_b]
69 | landms_t = landms_t[mask_b]
70 |
71 | if boxes_t.shape[0] == 0:
72 | continue
73 |
74 | pad_image_flag = False
75 |
76 | return image_t, boxes_t, labels_t, landms_t, pad_image_flag
77 | return image, boxes, labels, landm, pad_image_flag
78 |
79 |
80 | def _distort(image):
81 | def _convert(image, alpha=1, beta=0):
82 | tmp = image.astype(float) * alpha + beta
83 | tmp[tmp < 0] = 0
84 | tmp[tmp > 255] = 255
85 | image[:] = tmp
86 |
87 | image = image.copy()
88 |
89 | if random.randrange(2):
90 | # brightness distortion
91 | if random.randrange(2):
92 | _convert(image, beta=random.uniform(-32, 32))
93 |
94 | # contrast distortion
95 | if random.randrange(2):
96 | _convert(image, alpha=random.uniform(0.5, 1.5))
97 |
98 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
99 |
100 | # saturation distortion
101 | if random.randrange(2):
102 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
103 |
104 | # hue distortion
105 | if random.randrange(2):
106 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
107 | tmp %= 180
108 | image[:, :, 0] = tmp
109 |
110 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
111 |
112 | else:
113 | # brightness distortion
114 | if random.randrange(2):
115 | _convert(image, beta=random.uniform(-32, 32))
116 |
117 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
118 |
119 | # saturation distortion
120 | if random.randrange(2):
121 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
122 |
123 | # hue distortion
124 | if random.randrange(2):
125 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
126 | tmp %= 180
127 | image[:, :, 0] = tmp
128 |
129 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
130 |
131 | # contrast distortion
132 | if random.randrange(2):
133 | _convert(image, alpha=random.uniform(0.5, 1.5))
134 |
135 | return image
136 |
137 |
138 | def _expand(image, boxes, fill, p):
139 | if random.randrange(2):
140 | return image, boxes
141 |
142 | height, width, depth = image.shape
143 |
144 | scale = random.uniform(1, p)
145 | w = int(scale * width)
146 | h = int(scale * height)
147 |
148 | left = random.randint(0, w - width)
149 | top = random.randint(0, h - height)
150 |
151 | boxes_t = boxes.copy()
152 | boxes_t[:, :2] += (left, top)
153 | boxes_t[:, 2:] += (left, top)
154 | expand_image = np.empty((h, w, depth), dtype=image.dtype)
155 | expand_image[:, :] = fill
156 | expand_image[top : top + height, left : left + width] = image
157 | image = expand_image
158 |
159 | return image, boxes_t
160 |
161 |
162 | def _mirror(image, boxes, landms):
163 | _, width, _ = image.shape
164 | if random.randrange(2):
165 | image = image[:, ::-1]
166 | boxes = boxes.copy()
167 | boxes[:, 0::2] = width - boxes[:, 2::-2]
168 |
169 | # landm
170 | landms = landms.copy()
171 | landms = landms.reshape([-1, 5, 2])
172 | landms[:, :, 0] = width - landms[:, :, 0]
173 | tmp = landms[:, 1, :].copy()
174 | landms[:, 1, :] = landms[:, 0, :]
175 | landms[:, 0, :] = tmp
176 | tmp1 = landms[:, 4, :].copy()
177 | landms[:, 4, :] = landms[:, 3, :]
178 | landms[:, 3, :] = tmp1
179 | landms = landms.reshape([-1, 10])
180 |
181 | return image, boxes, landms
182 |
183 |
184 | def _pad_to_square(image, rgb_mean, pad_image_flag):
185 | if not pad_image_flag:
186 | return image
187 | height, width, _ = image.shape
188 | long_side = max(width, height)
189 | image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
190 | image_t[:, :] = rgb_mean
191 | image_t[0 : 0 + height, 0 : 0 + width] = image
192 | return image_t
193 |
194 |
195 | def _resize_subtract_mean(image, insize, rgb_mean):
196 | interp_methods = [
197 | cv2.INTER_LINEAR,
198 | cv2.INTER_CUBIC,
199 | cv2.INTER_AREA,
200 | cv2.INTER_NEAREST,
201 | cv2.INTER_LANCZOS4,
202 | ]
203 | interp_method = interp_methods[random.randrange(5)]
204 | image = cv2.resize(image, (insize, insize), interpolation=interp_method)
205 | image = image.astype(np.float32)
206 | image -= rgb_mean
207 | return image.transpose(2, 0, 1)
208 |
209 |
210 | class preproc(object):
211 | def __init__(self, img_dim, rgb_means):
212 | self.img_dim = img_dim
213 | self.rgb_means = rgb_means
214 |
215 | def __call__(self, image, targets):
216 | assert targets.shape[0] > 0, "this image does not have gt"
217 |
218 | boxes = targets[:, :4].copy()
219 | labels = targets[:, -1].copy()
220 | landm = targets[:, 4:-1].copy()
221 |
222 | image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(
223 | image, boxes, labels, landm, self.img_dim
224 | )
225 | image_t = _distort(image_t)
226 | image_t = _pad_to_square(image_t, self.rgb_means, pad_image_flag)
227 | image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
228 | height, width, _ = image_t.shape
229 | image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
230 | boxes_t[:, 0::2] /= width
231 | boxes_t[:, 1::2] /= height
232 |
233 | landm_t[:, 0::2] /= width
234 | landm_t[:, 1::2] /= height
235 |
236 | labels_t = np.expand_dims(labels_t, 1)
237 | targets_t = np.hstack((boxes_t, landm_t, labels_t))
238 |
239 | return image_t, targets_t
240 |
--------------------------------------------------------------------------------
/face_detection/retinaface/data/wider_face.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path
3 | import sys
4 |
5 | import cv2
6 | import numpy as np
7 | import torch
8 | import torch.utils.data as data
9 |
10 |
11 | class WiderFaceDetection(data.Dataset):
12 | def __init__(self, txt_path, preproc=None):
13 | self.preproc = preproc
14 | self.imgs_path = []
15 | self.words = []
16 | f = open(txt_path, "r")
17 | lines = f.readlines()
18 | isFirst = True
19 | labels = []
20 | for line in lines:
21 | line = line.rstrip()
22 | if line.startswith("#"):
23 | if isFirst is True:
24 | isFirst = False
25 | else:
26 | labels_copy = labels.copy()
27 | self.words.append(labels_copy)
28 | labels.clear()
29 | path = line[2:]
30 | path = txt_path.replace("label.txt", "images/") + path
31 | self.imgs_path.append(path)
32 | else:
33 | line = line.split(" ")
34 | label = [float(x) for x in line]
35 | labels.append(label)
36 |
37 | self.words.append(labels)
38 |
39 | def __len__(self):
40 | return len(self.imgs_path)
41 |
42 | def __getitem__(self, index):
43 | img = cv2.imread(self.imgs_path[index])
44 | height, width, _ = img.shape
45 |
46 | labels = self.words[index]
47 | annotations = np.zeros((0, 15))
48 | if len(labels) == 0:
49 | return annotations
50 | for idx, label in enumerate(labels):
51 | annotation = np.zeros((1, 15))
52 | # bbox
53 | annotation[0, 0] = label[0] # x1
54 | annotation[0, 1] = label[1] # y1
55 | annotation[0, 2] = label[0] + label[2] # x2
56 | annotation[0, 3] = label[1] + label[3] # y2
57 |
58 | # landmarks
59 | annotation[0, 4] = label[4] # l0_x
60 | annotation[0, 5] = label[5] # l0_y
61 | annotation[0, 6] = label[7] # l1_x
62 | annotation[0, 7] = label[8] # l1_y
63 | annotation[0, 8] = label[10] # l2_x
64 | annotation[0, 9] = label[11] # l2_y
65 | annotation[0, 10] = label[13] # l3_x
66 | annotation[0, 11] = label[14] # l3_y
67 | annotation[0, 12] = label[16] # l4_x
68 | annotation[0, 13] = label[17] # l4_y
69 | if annotation[0, 4] < 0:
70 | annotation[0, 14] = -1
71 | else:
72 | annotation[0, 14] = 1
73 |
74 | annotations = np.append(annotations, annotation, axis=0)
75 | target = np.array(annotations)
76 | if self.preproc is not None:
77 | img, target = self.preproc(img, target)
78 |
79 | return torch.from_numpy(img), target
80 |
81 |
82 | def detection_collate(batch):
83 | """Custom collate fn for dealing with batches of images that have a different
84 | number of associated object annotations (bounding boxes).
85 |
86 | Arguments:
87 | batch: (tuple) A tuple of tensor images and lists of annotations
88 |
89 | Return:
90 | A tuple containing:
91 | 1) (tensor) batch of images stacked on their 0 dim
92 | 2) (list of tensors) annotations for a given image are stacked on 0 dim
93 | """
94 | targets = []
95 | imgs = []
96 | for _, sample in enumerate(batch):
97 | for _, tup in enumerate(sample):
98 | if torch.is_tensor(tup):
99 | imgs.append(tup)
100 | elif isinstance(tup, type(np.empty(0))):
101 | annos = torch.from_numpy(tup).float()
102 | targets.append(annos)
103 |
104 | return (torch.stack(imgs, 0), targets)
105 |
--------------------------------------------------------------------------------
/face_detection/retinaface/detect.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import argparse
4 | import os
5 | import time
6 |
7 | import cv2
8 | import numpy as np
9 | import torch
10 | import torch.backends.cudnn as cudnn
11 | from data import cfg_mnet, cfg_re50
12 | from layers.functions.prior_box import PriorBox
13 | from models.retinaface import RetinaFace
14 | from utils.box_utils import decode, decode_landm
15 | from utils.nms.py_cpu_nms import py_cpu_nms
16 |
17 | parser = argparse.ArgumentParser(description="Retinaface")
18 |
19 | parser.add_argument(
20 | "-m",
21 | "--trained_model",
22 | default="./weights/Resnet50_Final.pth",
23 | type=str,
24 | help="Trained state_dict file path to open",
25 | )
26 | parser.add_argument(
27 | "--network", default="resnet50", help="Backbone network mobile0.25 or resnet50"
28 | )
29 | parser.add_argument(
30 | "--cpu", action="store_true", default=False, help="Use cpu inference"
31 | )
32 | parser.add_argument(
33 | "--confidence_threshold", default=0.02, type=float, help="confidence_threshold"
34 | )
35 | parser.add_argument("--top_k", default=5000, type=int, help="top_k")
36 | parser.add_argument("--nms_threshold", default=0.4, type=float, help="nms_threshold")
37 | parser.add_argument("--keep_top_k", default=750, type=int, help="keep_top_k")
38 | parser.add_argument(
39 | "-s",
40 | "--save_image",
41 | action="store_true",
42 | default=True,
43 | help="show detection results",
44 | )
45 | parser.add_argument(
46 | "--vis_thres", default=0.6, type=float, help="visualization_threshold"
47 | )
48 | args = parser.parse_args()
49 |
50 |
51 | def check_keys(model, pretrained_state_dict):
52 | ckpt_keys = set(pretrained_state_dict.keys())
53 | model_keys = set(model.state_dict().keys())
54 | used_pretrained_keys = model_keys & ckpt_keys
55 | unused_pretrained_keys = ckpt_keys - model_keys
56 | missing_keys = model_keys - ckpt_keys
57 | print("Missing keys:{}".format(len(missing_keys)))
58 | print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys)))
59 | print("Used keys:{}".format(len(used_pretrained_keys)))
60 | assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint"
61 | return True
62 |
63 |
64 | def remove_prefix(state_dict, prefix):
65 | """Old style model is stored with all names of parameters sharing common prefix 'module.'"""
66 | print("remove prefix '{}'".format(prefix))
67 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
68 | return {f(key): value for key, value in state_dict.items()}
69 |
70 |
71 | def load_model(model, pretrained_path, load_to_cpu):
72 | print("Loading pretrained model from {}".format(pretrained_path))
73 | if load_to_cpu:
74 | pretrained_dict = torch.load(
75 | pretrained_path, map_location=lambda storage, loc: storage
76 | )
77 | else:
78 | device = torch.cuda.current_device()
79 | pretrained_dict = torch.load(
80 | pretrained_path, map_location=lambda storage, loc: storage.cuda(device)
81 | )
82 | if "state_dict" in pretrained_dict.keys():
83 | pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.")
84 | else:
85 | pretrained_dict = remove_prefix(pretrained_dict, "module.")
86 | check_keys(model, pretrained_dict)
87 | model.load_state_dict(pretrained_dict, strict=False)
88 | return model
89 |
90 |
91 | if __name__ == "__main__":
92 | torch.set_grad_enabled(False)
93 | cfg = None
94 | if args.network == "mobile0.25":
95 | cfg = cfg_mnet
96 | elif args.network == "resnet50":
97 | cfg = cfg_re50
98 | # net and model
99 | net = RetinaFace(cfg=cfg, phase="test")
100 | net = load_model(net, args.trained_model, args.cpu)
101 | net.eval()
102 | print("Finished loading model!")
103 | print(net)
104 | cudnn.benchmark = True
105 | device = torch.device("cpu" if args.cpu else "cuda")
106 | net = net.to(device)
107 |
108 | resize = 1
109 |
110 | # testing begin
111 | for i in range(10):
112 | image_path = "./curve/test.jpg"
113 | img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
114 |
115 | img = np.float32(img_raw)
116 |
117 | im_height, im_width, _ = img.shape
118 | scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
119 | img -= (104, 117, 123)
120 | img = img.transpose(2, 0, 1)
121 | img = torch.from_numpy(img).unsqueeze(0)
122 | img = img.to(device)
123 | scale = scale.to(device)
124 |
125 | tic = time.time()
126 | loc, conf, landms = net(img) # forward pass
127 | print("net forward time: {:.4f}".format(time.time() - tic))
128 |
129 | priorbox = PriorBox(cfg, image_size=(im_height, im_width))
130 | priors = priorbox.forward()
131 | priors = priors.to(device)
132 | prior_data = priors.data
133 | boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"])
134 | boxes = boxes * scale / resize
135 | boxes = boxes.cpu().numpy()
136 | scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
137 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"])
138 | scale1 = torch.Tensor(
139 | [
140 | img.shape[3],
141 | img.shape[2],
142 | img.shape[3],
143 | img.shape[2],
144 | img.shape[3],
145 | img.shape[2],
146 | img.shape[3],
147 | img.shape[2],
148 | img.shape[3],
149 | img.shape[2],
150 | ]
151 | )
152 | scale1 = scale1.to(device)
153 | landms = landms * scale1 / resize
154 | landms = landms.cpu().numpy()
155 |
156 | # ignore low scores
157 | inds = np.where(scores > args.confidence_threshold)[0]
158 | boxes = boxes[inds]
159 | landms = landms[inds]
160 | scores = scores[inds]
161 |
162 | # keep top-K before NMS
163 | order = scores.argsort()[::-1][: args.top_k]
164 | boxes = boxes[order]
165 | landms = landms[order]
166 | scores = scores[order]
167 |
168 | # do NMS
169 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
170 | keep = py_cpu_nms(dets, args.nms_threshold)
171 | # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
172 | dets = dets[keep, :]
173 | landms = landms[keep]
174 |
175 | # keep top-K faster NMS
176 | dets = dets[: args.keep_top_k, :]
177 | landms = landms[: args.keep_top_k, :]
178 |
179 | dets = np.concatenate((dets, landms), axis=1)
180 |
181 | # show image
182 | if args.save_image:
183 | for b in dets:
184 | if b[4] < args.vis_thres:
185 | continue
186 | text = "{:.4f}".format(b[4])
187 | b = list(map(int, b))
188 | cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
189 | cx = b[0]
190 | cy = b[1] + 12
191 | cv2.putText(
192 | img_raw,
193 | text,
194 | (cx, cy),
195 | cv2.FONT_HERSHEY_DUPLEX,
196 | 0.5,
197 | (255, 255, 255),
198 | )
199 |
200 | # landms
201 | cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
202 | cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
203 | cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
204 | cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
205 | cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
206 | # save image
207 |
208 | name = "test.jpg"
209 | cv2.imwrite(name, img_raw)
210 |
--------------------------------------------------------------------------------
/face_detection/retinaface/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 |
--------------------------------------------------------------------------------
/face_detection/retinaface/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
1 | from itertools import product as product
2 | from math import ceil
3 |
4 | import numpy as np
5 | import torch
6 |
7 |
8 | class PriorBox(object):
9 | def __init__(self, cfg, image_size=None, phase="train"):
10 | super(PriorBox, self).__init__()
11 | self.min_sizes = cfg["min_sizes"]
12 | self.steps = cfg["steps"]
13 | self.clip = cfg["clip"]
14 | self.image_size = image_size
15 | self.feature_maps = [
16 | [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)]
17 | for step in self.steps
18 | ]
19 | self.name = "s"
20 |
21 | def forward(self):
22 | anchors = []
23 | for k, f in enumerate(self.feature_maps):
24 | min_sizes = self.min_sizes[k]
25 | for i, j in product(range(f[0]), range(f[1])):
26 | for min_size in min_sizes:
27 | s_kx = min_size / self.image_size[1]
28 | s_ky = min_size / self.image_size[0]
29 | dense_cx = [
30 | x * self.steps[k] / self.image_size[1] for x in [j + 0.5]
31 | ]
32 | dense_cy = [
33 | y * self.steps[k] / self.image_size[0] for y in [i + 0.5]
34 | ]
35 | for cy, cx in product(dense_cy, dense_cx):
36 | anchors += [cx, cy, s_kx, s_ky]
37 |
38 | # back to torch land
39 | output = torch.Tensor(anchors).view(-1, 4)
40 | if self.clip:
41 | output.clamp_(max=1, min=0)
42 | return output
43 |
--------------------------------------------------------------------------------
/face_detection/retinaface/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .multibox_loss import MultiBoxLoss
2 |
3 | __all__ = ["MultiBoxLoss"]
4 |
--------------------------------------------------------------------------------
/face_detection/retinaface/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from data import cfg_mnet
5 | from torch.autograd import Variable
6 | from utils.box_utils import log_sum_exp, match
7 |
8 | GPU = cfg_mnet["gpu_train"]
9 |
10 |
11 | class MultiBoxLoss(nn.Module):
12 | """SSD Weighted Loss Function
13 | Compute Targets:
14 | 1) Produce Confidence Target Indices by matching ground truth boxes
15 | with (default) 'priorboxes' that have jaccard index > threshold parameter
16 | (default threshold: 0.5).
17 | 2) Produce localization target by 'encoding' variance into offsets of ground
18 | truth boxes and their matched 'priorboxes'.
19 | 3) Hard negative mining to filter the excessive number of negative examples
20 | that comes with using a large number of default bounding boxes.
21 | (default negative:positive ratio 3:1)
22 | Objective Loss:
23 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
24 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
25 | weighted by α which is set to 1 by cross val.
26 | Args:
27 | c: class confidences,
28 | l: predicted boxes,
29 | g: ground truth boxes
30 | N: number of matched default boxes
31 | See: https://arxiv.org/pdf/1512.02325.pdf for more details.
32 | """
33 |
34 | def __init__(
35 | self,
36 | num_classes,
37 | overlap_thresh,
38 | prior_for_matching,
39 | bkg_label,
40 | neg_mining,
41 | neg_pos,
42 | neg_overlap,
43 | encode_target,
44 | ):
45 | super(MultiBoxLoss, self).__init__()
46 | self.num_classes = num_classes
47 | self.threshold = overlap_thresh
48 | self.background_label = bkg_label
49 | self.encode_target = encode_target
50 | self.use_prior_for_matching = prior_for_matching
51 | self.do_neg_mining = neg_mining
52 | self.negpos_ratio = neg_pos
53 | self.neg_overlap = neg_overlap
54 | self.variance = [0.1, 0.2]
55 |
56 | def forward(self, predictions, priors, targets):
57 | """Multibox Loss
58 | Args:
59 | predictions (tuple): A tuple containing loc preds, conf preds,
60 | and prior boxes from SSD net.
61 | conf shape: torch.size(batch_size,num_priors,num_classes)
62 | loc shape: torch.size(batch_size,num_priors,4)
63 | priors shape: torch.size(num_priors,4)
64 |
65 | ground_truth (tensor): Ground truth boxes and labels for a batch,
66 | shape: [batch_size,num_objs,5] (last idx is the label).
67 | """
68 |
69 | loc_data, conf_data, landm_data = predictions
70 | priors = priors
71 | num = loc_data.size(0)
72 | num_priors = priors.size(0)
73 |
74 | # match priors (default boxes) and ground truth boxes
75 | loc_t = torch.Tensor(num, num_priors, 4)
76 | landm_t = torch.Tensor(num, num_priors, 10)
77 | conf_t = torch.LongTensor(num, num_priors)
78 | for idx in range(num):
79 | truths = targets[idx][:, :4].data
80 | labels = targets[idx][:, -1].data
81 | landms = targets[idx][:, 4:14].data
82 | defaults = priors.data
83 | match(
84 | self.threshold,
85 | truths,
86 | defaults,
87 | self.variance,
88 | labels,
89 | landms,
90 | loc_t,
91 | conf_t,
92 | landm_t,
93 | idx,
94 | )
95 | if GPU:
96 | loc_t = loc_t.cuda()
97 | conf_t = conf_t.cuda()
98 | landm_t = landm_t.cuda()
99 |
100 | zeros = torch.tensor(0).cuda()
101 | # landm Loss (Smooth L1)
102 | # Shape: [batch,num_priors,10]
103 | pos1 = conf_t > zeros
104 | num_pos_landm = pos1.long().sum(1, keepdim=True)
105 | N1 = max(num_pos_landm.data.sum().float(), 1)
106 | pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
107 | landm_p = landm_data[pos_idx1].view(-1, 10)
108 | landm_t = landm_t[pos_idx1].view(-1, 10)
109 | loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction="sum")
110 |
111 | pos = conf_t != zeros
112 | conf_t[pos] = 1
113 |
114 | # Localization Loss (Smooth L1)
115 | # Shape: [batch,num_priors,4]
116 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
117 | loc_p = loc_data[pos_idx].view(-1, 4)
118 | loc_t = loc_t[pos_idx].view(-1, 4)
119 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction="sum")
120 |
121 | # Compute max conf across batch for hard negative mining
122 | batch_conf = conf_data.view(-1, self.num_classes)
123 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
124 |
125 | # Hard Negative Mining
126 | loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
127 | loss_c = loss_c.view(num, -1)
128 | _, loss_idx = loss_c.sort(1, descending=True)
129 | _, idx_rank = loss_idx.sort(1)
130 | num_pos = pos.long().sum(1, keepdim=True)
131 | num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
132 | neg = idx_rank < num_neg.expand_as(idx_rank)
133 |
134 | # Confidence Loss Including Positive and Negative Examples
135 | pos_idx = pos.unsqueeze(2).expand_as(conf_data)
136 | neg_idx = neg.unsqueeze(2).expand_as(conf_data)
137 | conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes)
138 | targets_weighted = conf_t[(pos + neg).gt(0)]
139 | loss_c = F.cross_entropy(conf_p, targets_weighted, reduction="sum")
140 |
141 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
142 | N = max(num_pos.data.sum().float(), 1)
143 | loss_l /= N
144 | loss_c /= N
145 | loss_landm /= N1
146 |
147 | return loss_l, loss_c, loss_landm
148 |
--------------------------------------------------------------------------------
/face_detection/retinaface/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/models/__init__.py
--------------------------------------------------------------------------------
/face_detection/retinaface/models/net.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import torchvision.models as models
7 | import torchvision.models._utils as _utils
8 | from torch.autograd import Variable
9 |
10 |
11 | def conv_bn(inp, oup, stride=1, leaky=0):
12 | return nn.Sequential(
13 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
14 | nn.BatchNorm2d(oup),
15 | nn.LeakyReLU(negative_slope=leaky, inplace=True),
16 | )
17 |
18 |
19 | def conv_bn_no_relu(inp, oup, stride):
20 | return nn.Sequential(
21 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
22 | nn.BatchNorm2d(oup),
23 | )
24 |
25 |
26 | def conv_bn1X1(inp, oup, stride, leaky=0):
27 | return nn.Sequential(
28 | nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
29 | nn.BatchNorm2d(oup),
30 | nn.LeakyReLU(negative_slope=leaky, inplace=True),
31 | )
32 |
33 |
34 | def conv_dw(inp, oup, stride, leaky=0.1):
35 | return nn.Sequential(
36 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
37 | nn.BatchNorm2d(inp),
38 | nn.LeakyReLU(negative_slope=leaky, inplace=True),
39 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
40 | nn.BatchNorm2d(oup),
41 | nn.LeakyReLU(negative_slope=leaky, inplace=True),
42 | )
43 |
44 |
45 | class SSH(nn.Module):
46 | def __init__(self, in_channel, out_channel):
47 | super(SSH, self).__init__()
48 | assert out_channel % 4 == 0
49 | leaky = 0
50 | if out_channel <= 64:
51 | leaky = 0.1
52 | self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1)
53 |
54 | self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky)
55 | self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
56 |
57 | self.conv7X7_2 = conv_bn(
58 | out_channel // 4, out_channel // 4, stride=1, leaky=leaky
59 | )
60 | self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
61 |
62 | def forward(self, input):
63 | conv3X3 = self.conv3X3(input)
64 |
65 | conv5X5_1 = self.conv5X5_1(input)
66 | conv5X5 = self.conv5X5_2(conv5X5_1)
67 |
68 | conv7X7_2 = self.conv7X7_2(conv5X5_1)
69 | conv7X7 = self.conv7x7_3(conv7X7_2)
70 |
71 | out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
72 | out = F.relu(out)
73 | return out
74 |
75 |
76 | class FPN(nn.Module):
77 | def __init__(self, in_channels_list, out_channels):
78 | super(FPN, self).__init__()
79 | leaky = 0
80 | if out_channels <= 64:
81 | leaky = 0.1
82 | self.output1 = conv_bn1X1(
83 | in_channels_list[0], out_channels, stride=1, leaky=leaky
84 | )
85 | self.output2 = conv_bn1X1(
86 | in_channels_list[1], out_channels, stride=1, leaky=leaky
87 | )
88 | self.output3 = conv_bn1X1(
89 | in_channels_list[2], out_channels, stride=1, leaky=leaky
90 | )
91 |
92 | self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
93 | self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
94 |
95 | def forward(self, input):
96 | # names = list(input.keys())
97 | input = list(input.values())
98 |
99 | output1 = self.output1(input[0])
100 | output2 = self.output2(input[1])
101 | output3 = self.output3(input[2])
102 |
103 | up3 = F.interpolate(
104 | output3, size=[output2.size(2), output2.size(3)], mode="nearest"
105 | )
106 | output2 = output2 + up3
107 | output2 = self.merge2(output2)
108 |
109 | up2 = F.interpolate(
110 | output2, size=[output1.size(2), output1.size(3)], mode="nearest"
111 | )
112 | output1 = output1 + up2
113 | output1 = self.merge1(output1)
114 |
115 | out = [output1, output2, output3]
116 | return out
117 |
118 |
119 | class MobileNetV1(nn.Module):
120 | def __init__(self):
121 | super(MobileNetV1, self).__init__()
122 | self.stage1 = nn.Sequential(
123 | conv_bn(3, 8, 2, leaky=0.1), # 3
124 | conv_dw(8, 16, 1), # 7
125 | conv_dw(16, 32, 2), # 11
126 | conv_dw(32, 32, 1), # 19
127 | conv_dw(32, 64, 2), # 27
128 | conv_dw(64, 64, 1), # 43
129 | )
130 | self.stage2 = nn.Sequential(
131 | conv_dw(64, 128, 2), # 43 + 16 = 59
132 | conv_dw(128, 128, 1), # 59 + 32 = 91
133 | conv_dw(128, 128, 1), # 91 + 32 = 123
134 | conv_dw(128, 128, 1), # 123 + 32 = 155
135 | conv_dw(128, 128, 1), # 155 + 32 = 187
136 | conv_dw(128, 128, 1), # 187 + 32 = 219
137 | )
138 | self.stage3 = nn.Sequential(
139 | conv_dw(128, 256, 2), # 219 +3 2 = 241
140 | conv_dw(256, 256, 1), # 241 + 64 = 301
141 | )
142 | self.avg = nn.AdaptiveAvgPool2d((1, 1))
143 | self.fc = nn.Linear(256, 1000)
144 |
145 | def forward(self, x):
146 | x = self.stage1(x)
147 | x = self.stage2(x)
148 | x = self.stage3(x)
149 | x = self.avg(x)
150 | # x = self.model(x)
151 | x = x.view(-1, 256)
152 | x = self.fc(x)
153 | return x
154 |
--------------------------------------------------------------------------------
/face_detection/retinaface/models/retinaface.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | import torchvision.models._utils as _utils
7 | import torchvision.models.detection.backbone_utils as backbone_utils
8 | from models.net import FPN as FPN
9 | from models.net import SSH as SSH
10 | from models.net import MobileNetV1 as MobileNetV1
11 |
12 |
13 | class ClassHead(nn.Module):
14 | def __init__(self, inchannels=512, num_anchors=3):
15 | super(ClassHead, self).__init__()
16 | self.num_anchors = num_anchors
17 | self.conv1x1 = nn.Conv2d(
18 | inchannels, self.num_anchors * 2, kernel_size=(1, 1), stride=1, padding=0
19 | )
20 |
21 | def forward(self, x):
22 | out = self.conv1x1(x)
23 | out = out.permute(0, 2, 3, 1).contiguous()
24 |
25 | return out.view(out.shape[0], -1, 2)
26 |
27 |
28 | class BboxHead(nn.Module):
29 | def __init__(self, inchannels=512, num_anchors=3):
30 | super(BboxHead, self).__init__()
31 | self.conv1x1 = nn.Conv2d(
32 | inchannels, num_anchors * 4, kernel_size=(1, 1), stride=1, padding=0
33 | )
34 |
35 | def forward(self, x):
36 | out = self.conv1x1(x)
37 | out = out.permute(0, 2, 3, 1).contiguous()
38 |
39 | return out.view(out.shape[0], -1, 4)
40 |
41 |
42 | class LandmarkHead(nn.Module):
43 | def __init__(self, inchannels=512, num_anchors=3):
44 | super(LandmarkHead, self).__init__()
45 | self.conv1x1 = nn.Conv2d(
46 | inchannels, num_anchors * 10, kernel_size=(1, 1), stride=1, padding=0
47 | )
48 |
49 | def forward(self, x):
50 | out = self.conv1x1(x)
51 | out = out.permute(0, 2, 3, 1).contiguous()
52 |
53 | return out.view(out.shape[0], -1, 10)
54 |
55 |
56 | class RetinaFace(nn.Module):
57 | def __init__(self, cfg=None, phase="train"):
58 | """
59 | :param cfg: Network related settings.
60 | :param phase: train or test.
61 | """
62 | super(RetinaFace, self).__init__()
63 | self.phase = phase
64 | backbone = None
65 | if cfg["name"] == "mobilenet0.25":
66 | backbone = MobileNetV1()
67 | if cfg["pretrain"]:
68 | checkpoint = torch.load(
69 | "./weights/mobilenetV1X0.25_pretrain.tar",
70 | map_location=torch.device("cpu"),
71 | )
72 | from collections import OrderedDict
73 |
74 | new_state_dict = OrderedDict()
75 | for k, v in checkpoint["state_dict"].items():
76 | name = k[7:] # remove module.
77 | new_state_dict[name] = v
78 | # load params
79 | backbone.load_state_dict(new_state_dict)
80 | elif cfg["name"] == "Resnet50":
81 | import torchvision.models as models
82 |
83 | backbone = models.resnet50(pretrained=cfg["pretrain"])
84 |
85 | self.body = _utils.IntermediateLayerGetter(backbone, cfg["return_layers"])
86 | in_channels_stage2 = cfg["in_channel"]
87 | in_channels_list = [
88 | in_channels_stage2 * 2,
89 | in_channels_stage2 * 4,
90 | in_channels_stage2 * 8,
91 | ]
92 | out_channels = cfg["out_channel"]
93 | self.fpn = FPN(in_channels_list, out_channels)
94 | self.ssh1 = SSH(out_channels, out_channels)
95 | self.ssh2 = SSH(out_channels, out_channels)
96 | self.ssh3 = SSH(out_channels, out_channels)
97 |
98 | self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg["out_channel"])
99 | self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg["out_channel"])
100 | self.LandmarkHead = self._make_landmark_head(
101 | fpn_num=3, inchannels=cfg["out_channel"]
102 | )
103 |
104 | def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2):
105 | classhead = nn.ModuleList()
106 | for i in range(fpn_num):
107 | classhead.append(ClassHead(inchannels, anchor_num))
108 | return classhead
109 |
110 | def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2):
111 | bboxhead = nn.ModuleList()
112 | for i in range(fpn_num):
113 | bboxhead.append(BboxHead(inchannels, anchor_num))
114 | return bboxhead
115 |
116 | def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2):
117 | landmarkhead = nn.ModuleList()
118 | for i in range(fpn_num):
119 | landmarkhead.append(LandmarkHead(inchannels, anchor_num))
120 | return landmarkhead
121 |
122 | def forward(self, inputs):
123 | out = self.body(inputs)
124 |
125 | # FPN
126 | fpn = self.fpn(out)
127 |
128 | # SSH
129 | feature1 = self.ssh1(fpn[0])
130 | feature2 = self.ssh2(fpn[1])
131 | feature3 = self.ssh3(fpn[2])
132 | features = [feature1, feature2, feature3]
133 |
134 | bbox_regressions = torch.cat(
135 | [self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1
136 | )
137 | classifications = torch.cat(
138 | [self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1
139 | )
140 | ldm_regressions = torch.cat(
141 | [self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1
142 | )
143 |
144 | if self.phase == "train":
145 | output = (bbox_regressions, classifications, ldm_regressions)
146 | else:
147 | output = (
148 | bbox_regressions,
149 | F.softmax(classifications, dim=-1),
150 | ldm_regressions,
151 | )
152 | return output
153 |
--------------------------------------------------------------------------------
/face_detection/retinaface/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/utils/__init__.py
--------------------------------------------------------------------------------
/face_detection/retinaface/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/utils/nms/__init__.py
--------------------------------------------------------------------------------
/face_detection/retinaface/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 |
11 | def py_cpu_nms(dets, thresh):
12 | """Pure Python NMS baseline."""
13 | x1 = dets[:, 0]
14 | y1 = dets[:, 1]
15 | x2 = dets[:, 2]
16 | y2 = dets[:, 3]
17 | scores = dets[:, 4]
18 |
19 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
20 | order = scores.argsort()[::-1]
21 |
22 | keep = []
23 | while order.size > 0:
24 | i = order[0]
25 | keep.append(i)
26 | xx1 = np.maximum(x1[i], x1[order[1:]])
27 | yy1 = np.maximum(y1[i], y1[order[1:]])
28 | xx2 = np.minimum(x2[i], x2[order[1:]])
29 | yy2 = np.minimum(y2[i], y2[order[1:]])
30 |
31 | w = np.maximum(0.0, xx2 - xx1 + 1)
32 | h = np.maximum(0.0, yy2 - yy1 + 1)
33 | inter = w * h
34 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
35 |
36 | inds = np.where(ovr <= thresh)[0]
37 | order = order[inds + 1]
38 |
39 | return keep
40 |
--------------------------------------------------------------------------------
/face_detection/retinaface/utils/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 |
11 | class Timer(object):
12 | """A simple timer."""
13 |
14 | def __init__(self):
15 | self.total_time = 0.0
16 | self.calls = 0
17 | self.start_time = 0.0
18 | self.diff = 0.0
19 | self.average_time = 0.0
20 |
21 | def tic(self):
22 | # using time.time instead of time.clock because time time.clock
23 | # does not normalize for multithreading
24 | self.start_time = time.time()
25 |
26 | def toc(self, average=True):
27 | self.diff = time.time() - self.start_time
28 | self.total_time += self.diff
29 | self.calls += 1
30 | self.average_time = self.total_time / self.calls
31 | if average:
32 | return self.average_time
33 | else:
34 | return self.diff
35 |
36 | def clear(self):
37 | self.total_time = 0.0
38 | self.calls = 0
39 | self.start_time = 0.0
40 | self.diff = 0.0
41 | self.average_time = 0.0
42 |
--------------------------------------------------------------------------------
/face_detection/scrfd/weights/README.md:
--------------------------------------------------------------------------------
1 | ## Download Weights:
2 |
3 | - https://drive.google.com/drive/folders/1C9RzReAihJQRl8EJOX6vQj7qbHBPmzME?usp=sharing
4 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/README.md:
--------------------------------------------------------------------------------
1 | # Reference
2 |
3 | https://github.com/deepcam-cn/yolov5-face
4 |
5 | [Pretrained] - Google Drive: https://drive.google.com/drive/folders/1UMG4hBor8CFipYm7y71_iTigHjZ4AkaH?usp=sharing
6 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/detector.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import cv2
5 | import numpy as np
6 | import torch
7 |
8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
9 | sys.path.append(BASE_DIR)
10 |
11 | from models.experimental import attempt_load
12 | from utils.datasets import letterbox
13 | from utils.general import check_img_size, non_max_suppression_face, scale_coords
14 |
15 |
16 | class Yolov5Face(object):
17 | def __init__(self, model_file=None):
18 | """
19 | Initialize the Detector class.
20 |
21 | :param model_path: Path to the YOLOv5 model file (default is yolov5n-0.5.pt)
22 | """
23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24 |
25 | self.device = device
26 | self.model = attempt_load(model_file, map_location=device)
27 |
28 | # Parameters
29 | self.size_convert = 128 # Size for image conversion
30 | self.conf_thres = 0.4 # Confidence threshold
31 | self.iou_thres = 0.5 # Intersection over Union threshold
32 |
33 | def resize_image(self, img0, img_size):
34 | """
35 | Resize the input image.
36 |
37 | :param img0: The input image to be resized.
38 | :param img_size: The desired size for the image.
39 |
40 | :return: The resized and preprocessed image.
41 | """
42 | h0, w0 = img0.shape[:2] # Original height and width
43 | r = img_size / max(h0, w0) # Resize image to img_size
44 |
45 | if r != 1:
46 | interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
47 | img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp)
48 |
49 | imgsz = check_img_size(img_size, s=self.model.stride.max()) # Check img_size
50 | img = letterbox(img0, new_shape=imgsz)[0]
51 |
52 | img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416
53 | img = torch.from_numpy(img).to(self.device)
54 | img = img.float() # uint8 to fp16/32
55 | img /= 255.0 # 0 - 255 to 0.0 - 1.0
56 |
57 | return img
58 |
59 | def scale_coords_landmarks(self, img1_shape, coords, img0_shape, ratio_pad=None):
60 | """
61 | Rescale coordinates from img1_shape to img0_shape.
62 |
63 | :param img1_shape: Shape of the source image.
64 | :param coords: Coordinates to be rescaled.
65 | :param img0_shape: Shape of the target image.
66 | :param ratio_pad: Padding ratio.
67 |
68 | :return: Rescaled coordinates.
69 | """
70 | if ratio_pad is None: # Calculate from img0_shape
71 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
72 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (
73 | img1_shape[0] - img0_shape[0] * gain
74 | ) / 2
75 | else:
76 | gain = ratio_pad[0][0]
77 | pad = ratio_pad[1]
78 |
79 | coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding
80 | coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding
81 | coords[:, :10] /= gain
82 | coords[:, :10] = coords[:, :10].clamp(
83 | 0, img0_shape[1]
84 | ) # Clamp x and y coordinates
85 |
86 | # Reshape the coordinates into the desired format
87 | coords = coords.reshape(-1, 5, 2)
88 | return coords
89 |
90 | def detect(self, image):
91 | """
92 | Perform face detection on the input image.
93 |
94 | :param input_image: The input image for face detection.
95 |
96 | :return: Detected bounding boxes and landmarks.
97 | """
98 | # Resize image
99 | img = self.resize_image(img0=image.copy(), img_size=self.size_convert)
100 |
101 | # Via yolov5-face
102 | with torch.no_grad():
103 | pred = self.model(img[None, :])[0]
104 |
105 | # Apply NMS
106 | det = non_max_suppression_face(pred, self.conf_thres, self.iou_thres)[0]
107 | bboxes = np.int32(
108 | scale_coords(img.shape[1:], det[:, :5], image.shape).round().cpu().numpy()
109 | )
110 |
111 | landmarks = np.int32(
112 | self.scale_coords_landmarks(img.shape[1:], det[:, 5:15], image.shape)
113 | .round()
114 | .cpu()
115 | .numpy()
116 | )
117 |
118 | return bboxes, landmarks
119 |
120 | def detect_tracking(self, image):
121 | """
122 | Perform object tracking on the input image.
123 |
124 | :param input_image: The input image for object tracking.
125 |
126 | :return: Tracking results and image information.
127 | """
128 | height, width = image.shape[:2]
129 | img_info = {"id": 0}
130 | img_info["height"] = height
131 | img_info["width"] = width
132 | img_info["raw_img"] = image
133 |
134 | # Resize image
135 | img = self.resize_image(img0=image.copy(), img_size=self.size_convert)
136 |
137 | # Via yolov5-face
138 | with torch.no_grad():
139 | pred = self.model(img[None, :])[0]
140 |
141 | scale = min(
142 | img.shape[1] / float(image.shape[0]), img.shape[2] / float(image.shape[1])
143 | )
144 |
145 | # Apply NMS
146 | det = non_max_suppression_face(pred, self.conf_thres, self.iou_thres)[0]
147 |
148 | bboxes = scale_coords(img.shape[1:], det[:, :4], image.shape)
149 | scores = det[:, 4:5]
150 | outputs = torch.cat((bboxes, scores), dim=1)
151 | outputs[:, :4] *= scale
152 |
153 | bboxes = np.int32(bboxes.round().cpu().numpy())
154 |
155 | landmarks = np.int32(
156 | self.scale_coords_landmarks(img.shape[1:], det[:, 5:15], image.shape)
157 | .round()
158 | .cpu()
159 | .numpy()
160 | )
161 |
162 | return outputs, img_info, bboxes, landmarks
163 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/models/__init__.py
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/blazeface.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [5, 6, 10, 13, 21, 26] # P3/8
9 | - [55, 72, 225, 304, 438, 553] # P4/16
10 |
11 | # YOLOv5 backbone
12 | backbone:
13 | # [from, number, module, args]
14 | [
15 | [-1, 1, Conv, [24, 3, 2]], # 0-P1/2
16 | [-1, 2, BlazeBlock, [24]], # 1
17 | [-1, 1, BlazeBlock, [48, None, 2]], # 2-P2/4
18 | [-1, 2, BlazeBlock, [48]], # 3
19 | [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 4-P3/8
20 | [-1, 2, DoubleBlazeBlock, [96, 24]], # 5
21 | [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 6-P4/16
22 | [-1, 2, DoubleBlazeBlock, [96, 24]], # 7
23 | ]
24 |
25 | # YOLOv5 head
26 | head: [
27 | [-1, 1, Conv, [64, 1, 1]], # 8 (P4/32-large)
28 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29 | [[-1, 5], 1, Concat, [1]], # cat backbone P3
30 | [-1, 1, Conv, [64, 1, 1]], # 11 (P3/8-medium)
31 |
32 | [[11, 8], 1, Detect, [nc, anchors]], # Detect(P3, P4)
33 | ]
34 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/blazeface_fpn.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [5, 6, 10, 13, 21, 26] # P3/8
9 | - [55, 72, 225, 304, 438, 553] # P4/16
10 |
11 | # YOLOv5 backbone
12 | backbone:
13 | # [from, number, module, args]
14 | [
15 | [-1, 1, Conv, [24, 3, 2]], # 0-P1/2
16 | [-1, 2, BlazeBlock, [24]], # 1
17 | [-1, 1, BlazeBlock, [48, None, 2]], # 2-P2/4
18 | [-1, 2, BlazeBlock, [48]], # 3
19 | [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 4-P3/8
20 | [-1, 2, DoubleBlazeBlock, [96, 24]], # 5
21 | [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 6-P4/16
22 | [-1, 2, DoubleBlazeBlock, [96, 24]], # 7
23 | ]
24 |
25 | # YOLOv5 head
26 | head: [
27 | [-1, 1, Conv, [48, 1, 1]], # 8
28 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29 | [[-1, 5], 1, Concat, [1]], # cat backbone P3
30 | [-1, 1, Conv, [48, 1, 1]], # 11 (P3/8-medium)
31 |
32 | [-1, 1, nn.MaxPool2d, [3, 2, 1]], # 12
33 | [[-1, 7], 1, Concat, [1]], # cat backbone P3
34 | [-1, 1, Conv, [48, 1, 1]], # 14 (P4/16-large)
35 |
36 | [[11, 14], 1, Detect, [nc, anchors]], # Detect(P3, P4)
37 | ]
38 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/experimental.py:
--------------------------------------------------------------------------------
1 | # This file contains experimental modules
2 |
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | from models.common import Conv, DWConv
7 | from utils.google_utils import attempt_download
8 |
9 |
10 | class CrossConv(nn.Module):
11 | # Cross Convolution Downsample
12 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
13 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
14 | super(CrossConv, self).__init__()
15 | c_ = int(c2 * e) # hidden channels
16 | self.cv1 = Conv(c1, c_, (1, k), (1, s))
17 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
18 | self.add = shortcut and c1 == c2
19 |
20 | def forward(self, x):
21 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
22 |
23 |
24 | class Sum(nn.Module):
25 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
26 | def __init__(self, n, weight=False): # n: number of inputs
27 | super(Sum, self).__init__()
28 | self.weight = weight # apply weights boolean
29 | self.iter = range(n - 1) # iter object
30 | if weight:
31 | self.w = nn.Parameter(
32 | -torch.arange(1.0, n) / 2, requires_grad=True
33 | ) # layer weights
34 |
35 | def forward(self, x):
36 | y = x[0] # no weight
37 | if self.weight:
38 | w = torch.sigmoid(self.w) * 2
39 | for i in self.iter:
40 | y = y + x[i + 1] * w[i]
41 | else:
42 | for i in self.iter:
43 | y = y + x[i + 1]
44 | return y
45 |
46 |
47 | class GhostConv(nn.Module):
48 | # Ghost Convolution https://github.com/huawei-noah/ghostnet
49 | def __init__(
50 | self, c1, c2, k=1, s=1, g=1, act=True
51 | ): # ch_in, ch_out, kernel, stride, groups
52 | super(GhostConv, self).__init__()
53 | c_ = c2 // 2 # hidden channels
54 | self.cv1 = Conv(c1, c_, k, s, None, g, act)
55 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
56 |
57 | def forward(self, x):
58 | y = self.cv1(x)
59 | return torch.cat([y, self.cv2(y)], 1)
60 |
61 |
62 | class GhostBottleneck(nn.Module):
63 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
64 | def __init__(self, c1, c2, k, s):
65 | super(GhostBottleneck, self).__init__()
66 | c_ = c2 // 2
67 | self.conv = nn.Sequential(
68 | GhostConv(c1, c_, 1, 1), # pw
69 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw
70 | GhostConv(c_, c2, 1, 1, act=False),
71 | ) # pw-linear
72 | self.shortcut = (
73 | nn.Sequential(
74 | DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)
75 | )
76 | if s == 2
77 | else nn.Identity()
78 | )
79 |
80 | def forward(self, x):
81 | return self.conv(x) + self.shortcut(x)
82 |
83 |
84 | class MixConv2d(nn.Module):
85 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
86 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
87 | super(MixConv2d, self).__init__()
88 | groups = len(k)
89 | if equal_ch: # equal c_ per group
90 | i = torch.linspace(0, groups - 1e-6, c2).floor() # c2 indices
91 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels
92 | else: # equal weight.numel() per group
93 | b = [c2] + [0] * groups
94 | a = np.eye(groups + 1, groups, k=-1)
95 | a -= np.roll(a, 1, axis=1)
96 | a *= np.array(k) ** 2
97 | a[0] = 1
98 | c_ = np.linalg.lstsq(a, b, rcond=None)[
99 | 0
100 | ].round() # solve for equal weight indices, ax = b
101 |
102 | self.m = nn.ModuleList(
103 | [
104 | nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False)
105 | for g in range(groups)
106 | ]
107 | )
108 | self.bn = nn.BatchNorm2d(c2)
109 | self.act = nn.LeakyReLU(0.1, inplace=True)
110 |
111 | def forward(self, x):
112 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
113 |
114 |
115 | class Ensemble(nn.ModuleList):
116 | # Ensemble of models
117 | def __init__(self):
118 | super(Ensemble, self).__init__()
119 |
120 | def forward(self, x, augment=False):
121 | y = []
122 | for module in self:
123 | y.append(module(x, augment)[0])
124 | # y = torch.stack(y).max(0)[0] # max ensemble
125 | # y = torch.stack(y).mean(0) # mean ensemble
126 | y = torch.cat(y, 1) # nms ensemble
127 | return y, None # inference, train output
128 |
129 |
130 | def attempt_load(weights, map_location=None):
131 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
132 | model = Ensemble()
133 | for w in weights if isinstance(weights, list) else [weights]:
134 | attempt_download(w)
135 | model.append(
136 | torch.load(w, map_location=map_location)["model"].float().fuse().eval()
137 | ) # load FP32 model
138 |
139 | # # Compatibility updates
140 | # for m in model.modules():
141 | # if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
142 | # m.inplace = True # pytorch 1.7.0 compatibility
143 | # elif type(m) is Conv:
144 | # m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility
145 |
146 | if len(model) == 1:
147 | return model[-1] # return model
148 | # else:
149 | # print("Ensemble created with %s\n" % weights)
150 | # for k in ["names", "stride"]:
151 | # setattr(model, k, getattr(model[-1], k))
152 | # return model # return ensemble
153 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5l.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [4, 5, 8, 10, 13, 16] # P3/8
9 | - [23, 29, 43, 55, 73, 105] # P4/16
10 | - [146, 217, 231, 300, 335, 433] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [
16 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
19 | [-1, 9, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
23 | [-1, 1, SPP, [1024, [3, 5, 7]]],
24 | [-1, 3, C3, [1024, False]], # 8
25 | ]
26 |
27 | # YOLOv5 head
28 | head: [
29 | [-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
31 | [[-1, 5], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 12
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
36 | [[-1, 3], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 16 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 13], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 9], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large)
46 |
47 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5l6.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [6, 7, 9, 11, 13, 16] # P3/8
9 | - [18, 23, 26, 33, 37, 47] # P4/16
10 | - [54, 67, 77, 104, 112, 154] # P5/32
11 | - [174, 238, 258, 355, 445, 568] # P6/64
12 |
13 | # YOLOv5 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [
17 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
20 | [-1, 9, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [768, 3, 2]], # 6-P5/32
24 | [-1, 3, C3, [768]],
25 | [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64
26 | [-1, 1, SPP, [1024, [3, 5, 7]]],
27 | [-1, 3, C3, [1024, False]], # 10
28 | ]
29 |
30 | # YOLOv5 head
31 | head: [
32 | [-1, 1, Conv, [768, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 | [[-1, 7], 1, Concat, [1]], # cat backbone P5
35 | [-1, 3, C3, [768, False]], # 14
36 |
37 | [-1, 1, Conv, [512, 1, 1]],
38 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39 | [[-1, 5], 1, Concat, [1]], # cat backbone P4
40 | [-1, 3, C3, [512, False]], # 18
41 |
42 | [-1, 1, Conv, [256, 1, 1]],
43 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
44 | [[-1, 3], 1, Concat, [1]], # cat backbone P3
45 | [-1, 3, C3, [256, False]], # 22 (P3/8-small)
46 |
47 | [-1, 1, Conv, [256, 3, 2]],
48 | [[-1, 19], 1, Concat, [1]], # cat head P4
49 | [-1, 3, C3, [512, False]], # 25 (P4/16-medium)
50 |
51 | [-1, 1, Conv, [512, 3, 2]],
52 | [[-1, 15], 1, Concat, [1]], # cat head P5
53 | [-1, 3, C3, [768, False]], # 28 (P5/32-large)
54 |
55 | [-1, 1, Conv, [768, 3, 2]],
56 | [[-1, 11], 1, Concat, [1]], # cat head P6
57 | [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge)
58 |
59 | [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 | ]
61 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5m.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 0.67 # model depth multiple
4 | width_multiple: 0.75 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [4, 5, 8, 10, 13, 16] # P3/8
9 | - [23, 29, 43, 55, 73, 105] # P4/16
10 | - [146, 217, 231, 300, 335, 433] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [
16 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
19 | [-1, 9, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
23 | [-1, 1, SPP, [1024, [3, 5, 7]]],
24 | [-1, 3, C3, [1024, False]], # 8
25 | ]
26 |
27 | # YOLOv5 head
28 | head: [
29 | [-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
31 | [[-1, 5], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 12
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
36 | [[-1, 3], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 16 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 13], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 9], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large)
46 |
47 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5m6.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 0.67 # model depth multiple
4 | width_multiple: 0.75 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [6, 7, 9, 11, 13, 16] # P3/8
9 | - [18, 23, 26, 33, 37, 47] # P4/16
10 | - [54, 67, 77, 104, 112, 154] # P5/32
11 | - [174, 238, 258, 355, 445, 568] # P6/64
12 |
13 | # YOLOv5 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [
17 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
20 | [-1, 9, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [768, 3, 2]], # 6-P5/32
24 | [-1, 3, C3, [768]],
25 | [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64
26 | [-1, 1, SPP, [1024, [3, 5, 7]]],
27 | [-1, 3, C3, [1024, False]], # 10
28 | ]
29 |
30 | # YOLOv5 head
31 | head: [
32 | [-1, 1, Conv, [768, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 | [[-1, 7], 1, Concat, [1]], # cat backbone P5
35 | [-1, 3, C3, [768, False]], # 14
36 |
37 | [-1, 1, Conv, [512, 1, 1]],
38 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39 | [[-1, 5], 1, Concat, [1]], # cat backbone P4
40 | [-1, 3, C3, [512, False]], # 18
41 |
42 | [-1, 1, Conv, [256, 1, 1]],
43 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
44 | [[-1, 3], 1, Concat, [1]], # cat backbone P3
45 | [-1, 3, C3, [256, False]], # 22 (P3/8-small)
46 |
47 | [-1, 1, Conv, [256, 3, 2]],
48 | [[-1, 19], 1, Concat, [1]], # cat head P4
49 | [-1, 3, C3, [512, False]], # 25 (P4/16-medium)
50 |
51 | [-1, 1, Conv, [512, 3, 2]],
52 | [[-1, 15], 1, Concat, [1]], # cat head P5
53 | [-1, 3, C3, [768, False]], # 28 (P5/32-large)
54 |
55 | [-1, 1, Conv, [768, 3, 2]],
56 | [[-1, 11], 1, Concat, [1]], # cat head P6
57 | [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge)
58 |
59 | [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 | ]
61 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5n-0.5.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 0.5 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [4, 5, 8, 10, 13, 16] # P3/8
9 | - [23, 29, 43, 55, 73, 105] # P4/16
10 | - [146, 217, 231, 300, 335, 433] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [
16 | [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4
17 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8
18 | [-1, 3, ShuffleV2Block, [128, 1]], # 2
19 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16
20 | [-1, 7, ShuffleV2Block, [256, 1]], # 4
21 | [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32
22 | [-1, 3, ShuffleV2Block, [512, 1]], # 6
23 | ]
24 |
25 | # YOLOv5 head
26 | head: [
27 | [-1, 1, Conv, [128, 1, 1]],
28 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29 | [[-1, 4], 1, Concat, [1]], # cat backbone P4
30 | [-1, 1, C3, [128, False]], # 10
31 |
32 | [-1, 1, Conv, [128, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 | [[-1, 2], 1, Concat, [1]], # cat backbone P3
35 | [-1, 1, C3, [128, False]], # 14 (P3/8-small)
36 |
37 | [-1, 1, Conv, [128, 3, 2]],
38 | [[-1, 11], 1, Concat, [1]], # cat head P4
39 | [-1, 1, C3, [128, False]], # 17 (P4/16-medium)
40 |
41 | [-1, 1, Conv, [128, 3, 2]],
42 | [[-1, 7], 1, Concat, [1]], # cat head P5
43 | [-1, 1, C3, [128, False]], # 20 (P5/32-large)
44 |
45 | [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
46 | ]
47 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5n.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [4, 5, 8, 10, 13, 16] # P3/8
9 | - [23, 29, 43, 55, 73, 105] # P4/16
10 | - [146, 217, 231, 300, 335, 433] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [
16 | [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4
17 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8
18 | [-1, 3, ShuffleV2Block, [128, 1]], # 2
19 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16
20 | [-1, 7, ShuffleV2Block, [256, 1]], # 4
21 | [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32
22 | [-1, 3, ShuffleV2Block, [512, 1]], # 6
23 | ]
24 |
25 | # YOLOv5 head
26 | head: [
27 | [-1, 1, Conv, [128, 1, 1]],
28 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29 | [[-1, 4], 1, Concat, [1]], # cat backbone P4
30 | [-1, 1, C3, [128, False]], # 10
31 |
32 | [-1, 1, Conv, [128, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 | [[-1, 2], 1, Concat, [1]], # cat backbone P3
35 | [-1, 1, C3, [128, False]], # 14 (P3/8-small)
36 |
37 | [-1, 1, Conv, [128, 3, 2]],
38 | [[-1, 11], 1, Concat, [1]], # cat head P4
39 | [-1, 1, C3, [128, False]], # 17 (P4/16-medium)
40 |
41 | [-1, 1, Conv, [128, 3, 2]],
42 | [[-1, 7], 1, Concat, [1]], # cat head P5
43 | [-1, 1, C3, [128, False]], # 20 (P5/32-large)
44 |
45 | [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
46 | ]
47 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5n6.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 1.0 # model depth multiple
4 | width_multiple: 1.0 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [6, 7, 9, 11, 13, 16] # P3/8
9 | - [18, 23, 26, 33, 37, 47] # P4/16
10 | - [54, 67, 77, 104, 112, 154] # P5/32
11 | - [174, 238, 258, 355, 445, 568] # P6/64
12 |
13 | # YOLOv5 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [
17 | [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4
18 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8
19 | [-1, 3, ShuffleV2Block, [128, 1]], # 2
20 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16
21 | [-1, 7, ShuffleV2Block, [256, 1]], # 4
22 | [-1, 1, ShuffleV2Block, [384, 2]], # 5-P5/32
23 | [-1, 3, ShuffleV2Block, [384, 1]], # 6
24 | [-1, 1, ShuffleV2Block, [512, 2]], # 7-P6/64
25 | [-1, 3, ShuffleV2Block, [512, 1]], # 8
26 | ]
27 |
28 | # YOLOv5 head
29 | head: [
30 | [-1, 1, Conv, [128, 1, 1]],
31 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32 | [[-1, 6], 1, Concat, [1]], # cat backbone P5
33 | [-1, 1, C3, [128, False]], # 12
34 |
35 | [-1, 1, Conv, [128, 1, 1]],
36 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37 | [[-1, 4], 1, Concat, [1]], # cat backbone P4
38 | [-1, 1, C3, [128, False]], # 16 (P4/8-small)
39 |
40 | [-1, 1, Conv, [128, 1, 1]],
41 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
42 | [[-1, 2], 1, Concat, [1]], # cat backbone P3
43 | [-1, 1, C3, [128, False]], # 20 (P3/8-small)
44 |
45 | [-1, 1, Conv, [128, 3, 2]],
46 | [[-1, 17], 1, Concat, [1]], # cat head P4
47 | [-1, 1, C3, [128, False]], # 23 (P4/16-medium)
48 |
49 | [-1, 1, Conv, [128, 3, 2]],
50 | [[-1, 13], 1, Concat, [1]], # cat head P5
51 | [-1, 1, C3, [128, False]], # 26 (P5/32-large)
52 |
53 | [-1, 1, Conv, [128, 3, 2]],
54 | [[-1, 9], 1, Concat, [1]], # cat head P6
55 | [-1, 1, C3, [128, False]], # 29 (P6/64-large)
56 |
57 | [[20, 23, 26, 29], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
58 | ]
59 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5s.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 0.33 # model depth multiple
4 | width_multiple: 0.5 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [4, 5, 8, 10, 13, 16] # P3/8
9 | - [23, 29, 43, 55, 73, 105] # P4/16
10 | - [146, 217, 231, 300, 335, 433] # P5/32
11 |
12 | # YOLOv5 backbone
13 | backbone:
14 | # [from, number, module, args]
15 | [
16 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
17 | [-1, 3, C3, [128]],
18 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
19 | [-1, 9, C3, [256]],
20 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
21 | [-1, 9, C3, [512]],
22 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
23 | [-1, 1, SPP, [1024, [3, 5, 7]]],
24 | [-1, 3, C3, [1024, False]], # 8
25 | ]
26 |
27 | # YOLOv5 head
28 | head: [
29 | [-1, 1, Conv, [512, 1, 1]],
30 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
31 | [[-1, 5], 1, Concat, [1]], # cat backbone P4
32 | [-1, 3, C3, [512, False]], # 12
33 |
34 | [-1, 1, Conv, [256, 1, 1]],
35 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
36 | [[-1, 3], 1, Concat, [1]], # cat backbone P3
37 | [-1, 3, C3, [256, False]], # 16 (P3/8-small)
38 |
39 | [-1, 1, Conv, [256, 3, 2]],
40 | [[-1, 13], 1, Concat, [1]], # cat head P4
41 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium)
42 |
43 | [-1, 1, Conv, [512, 3, 2]],
44 | [[-1, 9], 1, Concat, [1]], # cat head P5
45 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large)
46 |
47 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 | ]
49 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5s6.yaml:
--------------------------------------------------------------------------------
1 | # parameters
2 | nc: 1 # number of classes
3 | depth_multiple: 0.33 # model depth multiple
4 | width_multiple: 0.50 # layer channel multiple
5 |
6 | # anchors
7 | anchors:
8 | - [6, 7, 9, 11, 13, 16] # P3/8
9 | - [18, 23, 26, 33, 37, 47] # P4/16
10 | - [54, 67, 77, 104, 112, 154] # P5/32
11 | - [174, 238, 258, 355, 445, 568] # P6/64
12 |
13 | # YOLOv5 backbone
14 | backbone:
15 | # [from, number, module, args]
16 | [
17 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
18 | [-1, 3, C3, [128]],
19 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
20 | [-1, 9, C3, [256]],
21 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
22 | [-1, 9, C3, [512]],
23 | [-1, 1, Conv, [768, 3, 2]], # 6-P5/32
24 | [-1, 3, C3, [768]],
25 | [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64
26 | [-1, 1, SPP, [1024, [3, 5, 7]]],
27 | [-1, 3, C3, [1024, False]], # 10
28 | ]
29 |
30 | # YOLOv5 head
31 | head: [
32 | [-1, 1, Conv, [768, 1, 1]],
33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 | [[-1, 7], 1, Concat, [1]], # cat backbone P5
35 | [-1, 3, C3, [768, False]], # 14
36 |
37 | [-1, 1, Conv, [512, 1, 1]],
38 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39 | [[-1, 5], 1, Concat, [1]], # cat backbone P4
40 | [-1, 3, C3, [512, False]], # 18
41 |
42 | [-1, 1, Conv, [256, 1, 1]],
43 | [-1, 1, nn.Upsample, [None, 2, "nearest"]],
44 | [[-1, 3], 1, Concat, [1]], # cat backbone P3
45 | [-1, 3, C3, [256, False]], # 22 (P3/8-small)
46 |
47 | [-1, 1, Conv, [256, 3, 2]],
48 | [[-1, 19], 1, Concat, [1]], # cat head P4
49 | [-1, 3, C3, [512, False]], # 25 (P4/16-medium)
50 |
51 | [-1, 1, Conv, [512, 3, 2]],
52 | [[-1, 15], 1, Concat, [1]], # cat head P5
53 | [-1, 3, C3, [768, False]], # 28 (P5/32-large)
54 |
55 | [-1, 1, Conv, [768, 3, 2]],
56 | [[-1, 11], 1, Concat, [1]], # cat head P6
57 | [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge)
58 |
59 | [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 | ]
61 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/utils/__init__.py
--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/activations.py:
--------------------------------------------------------------------------------
1 | # Activation functions
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU()
10 | @staticmethod
11 | def forward(x):
12 | return x * torch.sigmoid(x)
13 |
14 |
15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish()
16 | @staticmethod
17 | def forward(x):
18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML
19 | return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for torchscript, CoreML and ONNX
20 |
21 |
22 | class MemoryEfficientSwish(nn.Module):
23 | class F(torch.autograd.Function):
24 | @staticmethod
25 | def forward(ctx, x):
26 | ctx.save_for_backward(x)
27 | return x * torch.sigmoid(x)
28 |
29 | @staticmethod
30 | def backward(ctx, grad_output):
31 | x = ctx.saved_tensors[0]
32 | sx = torch.sigmoid(x)
33 | return grad_output * (sx * (1 + x * (1 - sx)))
34 |
35 | def forward(self, x):
36 | return self.F.apply(x)
37 |
38 |
39 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40 | class Mish(nn.Module):
41 | @staticmethod
42 | def forward(x):
43 | return x * F.softplus(x).tanh()
44 |
45 |
46 | class MemoryEfficientMish(nn.Module):
47 | class F(torch.autograd.Function):
48 | @staticmethod
49 | def forward(ctx, x):
50 | ctx.save_for_backward(x)
51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
52 |
53 | @staticmethod
54 | def backward(ctx, grad_output):
55 | x = ctx.saved_tensors[0]
56 | sx = torch.sigmoid(x)
57 | fx = F.softplus(x).tanh()
58 | return grad_output * (fx + x * sx * (1 - fx * fx))
59 |
60 | def forward(self, x):
61 | return self.F.apply(x)
62 |
63 |
64 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65 | class FReLU(nn.Module):
66 | def __init__(self, c1, k=3): # ch_in, kernel
67 | super().__init__()
68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69 | self.bn = nn.BatchNorm2d(c1)
70 |
71 | def forward(self, x):
72 | return torch.max(x, self.bn(self.conv(x)))
73 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/autoanchor.py:
--------------------------------------------------------------------------------
1 | # Auto-anchor utils
2 |
3 | import numpy as np
4 | import torch
5 | import yaml
6 | from scipy.cluster.vq import kmeans
7 | from tqdm import tqdm
8 | from utils.general import colorstr
9 |
10 |
11 | def check_anchor_order(m):
12 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
13 | a = m.anchor_grid.prod(-1).view(-1) # anchor area
14 | da = a[-1] - a[0] # delta a
15 | ds = m.stride[-1] - m.stride[0] # delta s
16 | if da.sign() != ds.sign(): # same order
17 | print("Reversing anchor order")
18 | m.anchors[:] = m.anchors.flip(0)
19 | m.anchor_grid[:] = m.anchor_grid.flip(0)
20 |
21 |
22 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
23 | # Check anchor fit to data, recompute if necessary
24 | prefix = colorstr("autoanchor: ")
25 | print(f"\n{prefix}Analyzing anchors... ", end="")
26 | m = (
27 | model.module.model[-1] if hasattr(model, "module") else model.model[-1]
28 | ) # Detect()
29 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
30 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
31 | wh = torch.tensor(
32 | np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])
33 | ).float() # wh
34 |
35 | def metric(k): # compute metric
36 | r = wh[:, None] / k[None]
37 | x = torch.min(r, 1.0 / r).min(2)[0] # ratio metric
38 | best = x.max(1)[0] # best_x
39 | aat = (x > 1.0 / thr).float().sum(1).mean() # anchors above threshold
40 | bpr = (best > 1.0 / thr).float().mean() # best possible recall
41 | return bpr, aat
42 |
43 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
44 | print(f"anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}", end="")
45 | if bpr < 0.98: # threshold to recompute
46 | print(". Attempting to improve anchors, please wait...")
47 | na = m.anchor_grid.numel() // 2 # number of anchors
48 | new_anchors = kmean_anchors(
49 | dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False
50 | )
51 | new_bpr = metric(new_anchors.reshape(-1, 2))[0]
52 | if new_bpr > bpr: # replace anchors
53 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(
54 | m.anchors
55 | )
56 | m.anchor_grid[:] = new_anchors.clone().view_as(
57 | m.anchor_grid
58 | ) # for inference
59 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(
60 | m.anchors.device
61 | ).view(
62 | -1, 1, 1
63 | ) # loss
64 | check_anchor_order(m)
65 | print(
66 | f"{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future."
67 | )
68 | else:
69 | print(
70 | f"{prefix}Original anchors better than new anchors. Proceeding with original anchors."
71 | )
72 | print("") # newline
73 |
74 |
75 | def kmean_anchors(
76 | path="./data/coco128.yaml", n=9, img_size=640, thr=4.0, gen=1000, verbose=True
77 | ):
78 | """Creates kmeans-evolved anchors from training dataset
79 |
80 | Arguments:
81 | path: path to dataset *.yaml, or a loaded dataset
82 | n: number of anchors
83 | img_size: image size used for training
84 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
85 | gen: generations to evolve anchors using genetic algorithm
86 | verbose: print all results
87 |
88 | Return:
89 | k: kmeans evolved anchors
90 |
91 | Usage:
92 | from utils.autoanchor import *; _ = kmean_anchors()
93 | """
94 | thr = 1.0 / thr
95 | prefix = colorstr("autoanchor: ")
96 |
97 | def metric(k, wh): # compute metrics
98 | r = wh[:, None] / k[None]
99 | x = torch.min(r, 1.0 / r).min(2)[0] # ratio metric
100 | # x = wh_iou(wh, torch.tensor(k)) # iou metric
101 | return x, x.max(1)[0] # x, best_x
102 |
103 | def anchor_fitness(k): # mutation fitness
104 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
105 | return (best * (best > thr).float()).mean() # fitness
106 |
107 | def print_results(k):
108 | k = k[np.argsort(k.prod(1))] # sort small to large
109 | x, best = metric(k, wh0)
110 | bpr, aat = (best > thr).float().mean(), (
111 | x > thr
112 | ).float().mean() * n # best possible recall, anch > thr
113 | print(
114 | f"{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr"
115 | )
116 | print(
117 | f"{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, "
118 | f"past_thr={x[x > thr].mean():.3f}-mean: ",
119 | end="",
120 | )
121 | for i, x in enumerate(k):
122 | print(
123 | "%i,%i" % (round(x[0]), round(x[1])),
124 | end=", " if i < len(k) - 1 else "\n",
125 | ) # use in *.cfg
126 | return k
127 |
128 | if isinstance(path, str): # *.yaml file
129 | with open(path) as f:
130 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict
131 | from utils.datasets import LoadImagesAndLabels
132 |
133 | dataset = LoadImagesAndLabels(data_dict["train"], augment=True, rect=True)
134 | else:
135 | dataset = path # dataset
136 |
137 | # Get label wh
138 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
139 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh
140 |
141 | # Filter
142 | i = (wh0 < 3.0).any(1).sum()
143 | if i:
144 | print(
145 | f"{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size."
146 | )
147 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
148 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1
149 |
150 | # Kmeans calculation
151 | print(f"{prefix}Running kmeans for {n} anchors on {len(wh)} points...")
152 | s = wh.std(0) # sigmas for whitening
153 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
154 | k *= s
155 | wh = torch.tensor(wh, dtype=torch.float32) # filtered
156 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered
157 | k = print_results(k)
158 |
159 | # Plot
160 | # k, d = [None] * 20, [None] * 20
161 | # for i in tqdm(range(1, 21)):
162 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance
163 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
164 | # ax = ax.ravel()
165 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
166 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh
167 | # ax[0].hist(wh[wh[:, 0]<100, 0],400)
168 | # ax[1].hist(wh[wh[:, 1]<100, 1],400)
169 | # fig.savefig('wh.png', dpi=200)
170 |
171 | # Evolve
172 | npr = np.random
173 | f, sh, mp, s = (
174 | anchor_fitness(k),
175 | k.shape,
176 | 0.9,
177 | 0.1,
178 | ) # fitness, generations, mutation prob, sigma
179 | pbar = tqdm(
180 | range(gen), desc=f"{prefix}Evolving anchors with Genetic Algorithm:"
181 | ) # progress bar
182 | for _ in pbar:
183 | v = np.ones(sh)
184 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
185 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(
186 | 0.3, 3.0
187 | )
188 | kg = (k.copy() * v).clip(min=2.0)
189 | fg = anchor_fitness(kg)
190 | if fg > f:
191 | f, k = fg, kg.copy()
192 | pbar.desc = (
193 | f"{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}"
194 | )
195 | if verbose:
196 | print_results(k)
197 |
198 | return print_results(k)
199 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/google_utils.py:
--------------------------------------------------------------------------------
1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries
2 |
3 | import os
4 | import platform
5 | import subprocess
6 | import time
7 | from pathlib import Path
8 |
9 | import requests
10 | import torch
11 |
12 |
13 | def gsutil_getsize(url=""):
14 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
15 | s = subprocess.check_output(f"gsutil du {url}", shell=True).decode("utf-8")
16 | return eval(s.split(" ")[0]) if len(s) else 0 # bytes
17 |
18 |
19 | def attempt_download(file, repo="ultralytics/yolov5"):
20 | # Attempt file download if does not exist
21 | file = Path(str(file).strip().replace("'", "").lower())
22 |
23 | if not file.exists():
24 | try:
25 | response = requests.get(
26 | f"https://api.github.com/repos/{repo}/releases/latest"
27 | ).json() # github api
28 | assets = [
29 | x["name"] for x in response["assets"]
30 | ] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
31 | tag = response["tag_name"] # i.e. 'v1.0'
32 | except: # fallback plan
33 | assets = ["yolov5.pt", "yolov5.pt", "yolov5l.pt", "yolov5x.pt"]
34 | tag = (
35 | subprocess.check_output("git tag", shell=True)
36 | .decode("utf-8")
37 | .split("\n")[-2]
38 | )
39 |
40 | name = file.name
41 | if name in assets:
42 | msg = f"{file} missing, try downloading from https://github.com/{repo}/releases/"
43 | redundant = False # second download option
44 | try: # GitHub
45 | url = f"https://github.com/{repo}/releases/download/{tag}/{name}"
46 | print(f"Downloading {url} to {file}...")
47 | torch.hub.download_url_to_file(url, file)
48 | assert file.exists() and file.stat().st_size > 1e6 # check
49 | except Exception as e: # GCP
50 | print(f"Download error: {e}")
51 | assert redundant, "No secondary mirror"
52 | url = f"https://storage.googleapis.com/{repo}/ckpt/{name}"
53 | print(f"Downloading {url} to {file}...")
54 | os.system(
55 | f"curl -L {url} -o {file}"
56 | ) # torch.hub.download_url_to_file(url, weights)
57 | finally:
58 | if not file.exists() or file.stat().st_size < 1e6: # check
59 | file.unlink(missing_ok=True) # remove partial downloads
60 | print(f"ERROR: Download failure: {msg}")
61 | print("")
62 | return
63 |
64 |
65 | def gdrive_download(id="16TiPfZj7htmTyhntwcZyEEAejOUxuT6m", file="tmp.zip"):
66 | # Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download()
67 | t = time.time()
68 | file = Path(file)
69 | cookie = Path("cookie") # gdrive cookie
70 | print(
71 | f"Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ",
72 | end="",
73 | )
74 | file.unlink(missing_ok=True) # remove existing file
75 | cookie.unlink(missing_ok=True) # remove existing cookie
76 |
77 | # Attempt file download
78 | out = "NUL" if platform.system() == "Windows" else "/dev/null"
79 | os.system(
80 | f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}'
81 | )
82 | if os.path.exists("cookie"): # large file
83 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
84 | else: # small file
85 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
86 | r = os.system(s) # execute, capture return
87 | cookie.unlink(missing_ok=True) # remove existing cookie
88 |
89 | # Error check
90 | if r != 0:
91 | file.unlink(missing_ok=True) # remove partial
92 | print("Download error ") # raise Exception('Download error')
93 | return r
94 |
95 | # Unzip if archive
96 | if file.suffix == ".zip":
97 | print("unzipping... ", end="")
98 | os.system(f"unzip -q {file}") # unzip
99 | file.unlink() # remove zip to free space
100 |
101 | print(f"Done ({time.time() - t:.1f}s)")
102 | return r
103 |
104 |
105 | def get_token(cookie="./cookie"):
106 | with open(cookie) as f:
107 | for line in f:
108 | if "download" in line:
109 | return line.split()[-1]
110 | return ""
111 |
112 |
113 | # def upload_blob(bucket_name, source_file_name, destination_blob_name):
114 | # # Uploads a file to a bucket
115 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
116 | #
117 | # storage_client = storage.Client()
118 | # bucket = storage_client.get_bucket(bucket_name)
119 | # blob = bucket.blob(destination_blob_name)
120 | #
121 | # blob.upload_from_filename(source_file_name)
122 | #
123 | # print('File {} uploaded to {}.'.format(
124 | # source_file_name,
125 | # destination_blob_name))
126 | #
127 | #
128 | # def download_blob(bucket_name, source_blob_name, destination_file_name):
129 | # # Uploads a blob from a bucket
130 | # storage_client = storage.Client()
131 | # bucket = storage_client.get_bucket(bucket_name)
132 | # blob = bucket.blob(source_blob_name)
133 | #
134 | # blob.download_to_filename(destination_file_name)
135 | #
136 | # print('Blob {} downloaded to {}.'.format(
137 | # source_blob_name,
138 | # destination_file_name))
139 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/infer_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | def decode_infer(output, stride):
5 | # logging.info(torch.tensor(output.shape[0]))
6 | # logging.info(output.shape)
7 | # # bz is batch-size
8 | # bz = tuple(torch.tensor(output.shape[0]))
9 | # gridsize = tuple(torch.tensor(output.shape[-1]))
10 | # logging.info(gridsize)
11 | sh = torch.tensor(output.shape)
12 | bz = sh[0]
13 | gridsize = sh[-1]
14 |
15 | output = output.permute(0, 2, 3, 1)
16 | output = output.view(bz, gridsize, gridsize, self.gt_per_grid, 5 + self.numclass)
17 | x1y1, x2y2, conf, prob = torch.split(output, [2, 2, 1, self.numclass], dim=4)
18 |
19 | shiftx = torch.arange(0, gridsize, dtype=torch.float32)
20 | shifty = torch.arange(0, gridsize, dtype=torch.float32)
21 | shifty, shiftx = torch.meshgrid([shiftx, shifty])
22 | shiftx = shiftx.unsqueeze(-1).repeat(bz, 1, 1, self.gt_per_grid)
23 | shifty = shifty.unsqueeze(-1).repeat(bz, 1, 1, self.gt_per_grid)
24 |
25 | xy_grid = torch.stack([shiftx, shifty], dim=4).cuda()
26 | x1y1 = (xy_grid + 0.5 - torch.exp(x1y1)) * stride
27 | x2y2 = (xy_grid + 0.5 + torch.exp(x2y2)) * stride
28 |
29 | xyxy = torch.cat((x1y1, x2y2), dim=4)
30 | conf = torch.sigmoid(conf)
31 | prob = torch.sigmoid(prob)
32 | output = torch.cat((xyxy, conf, prob), 4)
33 | output = output.view(bz, -1, 5 + self.numclass)
34 | return output
35 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/metrics.py:
--------------------------------------------------------------------------------
1 | # Model validation metrics
2 |
3 | from pathlib import Path
4 |
5 | import matplotlib.pyplot as plt
6 | import numpy as np
7 | import torch
8 |
9 | from . import general
10 |
11 |
12 | def fitness(x):
13 | # Model fitness as a weighted combination of metrics
14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
15 | return (x[:, :4] * w).sum(1)
16 |
17 |
18 | def ap_per_class(
19 | tp,
20 | conf,
21 | pred_cls,
22 | target_cls,
23 | plot=False,
24 | save_dir="precision-recall_curve.png",
25 | names=[],
26 | ):
27 | """Compute the average precision, given the recall and precision curves.
28 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
29 | # Arguments
30 | tp: True positives (nparray, nx1 or nx10).
31 | conf: Objectness value from 0-1 (nparray).
32 | pred_cls: Predicted object classes (nparray).
33 | target_cls: True object classes (nparray).
34 | plot: Plot precision-recall curve at mAP@0.5
35 | save_dir: Plot save directory
36 | # Returns
37 | The average precision as computed in py-faster-rcnn.
38 | """
39 |
40 | # Sort by objectness
41 | i = np.argsort(-conf)
42 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
43 |
44 | # Find unique classes
45 | unique_classes = np.unique(target_cls)
46 |
47 | # Create Precision-Recall curve and compute AP for each class
48 | px, py = np.linspace(0, 1, 1000), [] # for plotting
49 | pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
50 | s = [
51 | unique_classes.shape[0],
52 | tp.shape[1],
53 | ] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
54 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)
55 | for ci, c in enumerate(unique_classes):
56 | i = pred_cls == c
57 | n_l = (target_cls == c).sum() # number of labels
58 | n_p = i.sum() # number of predictions
59 |
60 | if n_p == 0 or n_l == 0:
61 | continue
62 | else:
63 | # Accumulate FPs and TPs
64 | fpc = (1 - tp[i]).cumsum(0)
65 | tpc = tp[i].cumsum(0)
66 |
67 | # Recall
68 | recall = tpc / (n_l + 1e-16) # recall curve
69 | r[ci] = np.interp(
70 | -pr_score, -conf[i], recall[:, 0]
71 | ) # r at pr_score, negative x, xp because xp decreases
72 |
73 | # Precision
74 | precision = tpc / (tpc + fpc) # precision curve
75 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score
76 |
77 | # AP from recall-precision curve
78 | for j in range(tp.shape[1]):
79 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
80 | if plot and (j == 0):
81 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
82 |
83 | # Compute F1 score (harmonic mean of precision and recall)
84 | f1 = 2 * p * r / (p + r + 1e-16)
85 |
86 | if plot:
87 | plot_pr_curve(px, py, ap, save_dir, names)
88 |
89 | return p, r, ap, f1, unique_classes.astype("int32")
90 |
91 |
92 | def compute_ap(recall, precision):
93 | """Compute the average precision, given the recall and precision curves
94 | # Arguments
95 | recall: The recall curve (list)
96 | precision: The precision curve (list)
97 | # Returns
98 | Average precision, precision curve, recall curve
99 | """
100 |
101 | # Append sentinel values to beginning and end
102 | mrec = np.concatenate(([0.0], recall, [recall[-1] + 0.01]))
103 | mpre = np.concatenate(([1.0], precision, [0.0]))
104 |
105 | # Compute the precision envelope
106 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
107 |
108 | # Integrate area under curve
109 | method = "interp" # methods: 'continuous', 'interp'
110 | if method == "interp":
111 | x = np.linspace(0, 1, 101) # 101-point interp (COCO)
112 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
113 | else: # 'continuous'
114 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
115 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
116 |
117 | return ap, mpre, mrec
118 |
119 |
120 | class ConfusionMatrix:
121 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
122 | def __init__(self, nc, conf=0.25, iou_thres=0.45):
123 | self.matrix = np.zeros((nc + 1, nc + 1))
124 | self.nc = nc # number of classes
125 | self.conf = conf
126 | self.iou_thres = iou_thres
127 |
128 | def process_batch(self, detections, labels):
129 | """
130 | Return intersection-over-union (Jaccard index) of boxes.
131 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
132 | Arguments:
133 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class
134 | labels (Array[M, 5]), class, x1, y1, x2, y2
135 | Returns:
136 | None, updates confusion matrix accordingly
137 | """
138 | detections = detections[detections[:, 4] > self.conf]
139 | gt_classes = labels[:, 0].int()
140 | detection_classes = detections[:, 5].int()
141 | iou = general.box_iou(labels[:, 1:], detections[:, :4])
142 |
143 | x = torch.where(iou > self.iou_thres)
144 | if x[0].shape[0]:
145 | matches = (
146 | torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
147 | .cpu()
148 | .numpy()
149 | )
150 | if x[0].shape[0] > 1:
151 | matches = matches[matches[:, 2].argsort()[::-1]]
152 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
153 | matches = matches[matches[:, 2].argsort()[::-1]]
154 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
155 | else:
156 | matches = np.zeros((0, 3))
157 |
158 | n = matches.shape[0] > 0
159 | m0, m1, _ = matches.transpose().astype(np.int16)
160 | for i, gc in enumerate(gt_classes):
161 | j = m0 == i
162 | if n and sum(j) == 1:
163 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct
164 | else:
165 | self.matrix[gc, self.nc] += 1 # background FP
166 |
167 | if n:
168 | for i, dc in enumerate(detection_classes):
169 | if not any(m1 == i):
170 | self.matrix[self.nc, dc] += 1 # background FN
171 |
172 | def matrix(self):
173 | return self.matrix
174 |
175 | def plot(self, save_dir="", names=()):
176 | try:
177 | import seaborn as sn
178 |
179 | array = self.matrix / (
180 | self.matrix.sum(0).reshape(1, self.nc + 1) + 1e-6
181 | ) # normalize
182 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00)
183 |
184 | fig = plt.figure(figsize=(12, 9), tight_layout=True)
185 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size
186 | labels = (0 < len(names) < 99) and len(
187 | names
188 | ) == self.nc # apply names to ticklabels
189 | sn.heatmap(
190 | array,
191 | annot=self.nc < 30,
192 | annot_kws={"size": 8},
193 | cmap="Blues",
194 | fmt=".2f",
195 | square=True,
196 | xticklabels=names + ["background FN"] if labels else "auto",
197 | yticklabels=names + ["background FP"] if labels else "auto",
198 | ).set_facecolor((1, 1, 1))
199 | fig.axes[0].set_xlabel("True")
200 | fig.axes[0].set_ylabel("Predicted")
201 | fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250)
202 | except Exception as e:
203 | pass
204 |
205 | def print(self):
206 | for i in range(self.nc + 1):
207 | print(" ".join(map(str, self.matrix[i])))
208 |
209 |
210 | # Plots ----------------------------------------------------------------------------------------------------------------
211 |
212 |
213 | def plot_pr_curve(px, py, ap, save_dir=".", names=()):
214 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
215 | py = np.stack(py, axis=1)
216 |
217 | if 0 < len(names) < 21: # show mAP in legend if < 10 classes
218 | for i, y in enumerate(py.T):
219 | ax.plot(
220 | px, y, linewidth=1, label=f"{names[i]} %.3f" % ap[i, 0]
221 | ) # plot(recall, precision)
222 | else:
223 | ax.plot(px, py, linewidth=1, color="grey") # plot(recall, precision)
224 |
225 | ax.plot(
226 | px,
227 | py.mean(1),
228 | linewidth=3,
229 | color="blue",
230 | label="all classes %.3f mAP@0.5" % ap[:, 0].mean(),
231 | )
232 | ax.set_xlabel("Recall")
233 | ax.set_ylabel("Precision")
234 | ax.set_xlim(0, 1)
235 | ax.set_ylim(0, 1)
236 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
237 | fig.savefig(Path(save_dir) / "precision_recall_curve.png", dpi=250)
238 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/wandb_logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/utils/wandb_logging/__init__.py
--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/wandb_logging/log_dataset.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import yaml
4 | from wandb_utils import WandbLogger
5 |
6 | WANDB_ARTIFACT_PREFIX = "wandb-artifact://"
7 |
8 |
9 | def create_dataset_artifact(opt):
10 | with open(opt.data) as f:
11 | data = yaml.load(f, Loader=yaml.SafeLoader) # data dict
12 | logger = WandbLogger(opt, "", None, data, job_type="Dataset Creation")
13 |
14 |
15 | if __name__ == "__main__":
16 | parser = argparse.ArgumentParser()
17 | parser.add_argument(
18 | "--data", type=str, default="data/coco128.yaml", help="data.yaml path"
19 | )
20 | parser.add_argument(
21 | "--single-cls", action="store_true", help="train as single-class dataset"
22 | )
23 | parser.add_argument(
24 | "--project", type=str, default="YOLOv5", help="name of W&B Project"
25 | )
26 | opt = parser.parse_args()
27 | opt.resume = False # Explicitly disallow resume check for dataset upload job
28 |
29 | create_dataset_artifact(opt)
30 |
--------------------------------------------------------------------------------
/face_detection/yolov5_face/weights/README.md:
--------------------------------------------------------------------------------
1 | ## Download Weights:
2 |
3 | - https://drive.google.com/drive/folders/1CGq-2AfcSyWGwZWs9sIzQ1BXhRkPGgxF?usp=sharing
4 |
--------------------------------------------------------------------------------
/face_recognition/arcface/model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | from torch import nn
4 |
5 |
6 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
7 | """3x3 convolution with padding"""
8 | return nn.Conv2d(
9 | in_planes,
10 | out_planes,
11 | kernel_size=3,
12 | stride=stride,
13 | padding=dilation,
14 | groups=groups,
15 | bias=False,
16 | dilation=dilation,
17 | )
18 |
19 |
20 | def conv1x1(in_planes, out_planes, stride=1):
21 | """1x1 convolution"""
22 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
23 |
24 |
25 | class IBasicBlock(nn.Module):
26 | expansion = 1
27 |
28 | def __init__(
29 | self,
30 | inplanes,
31 | planes,
32 | stride=1,
33 | downsample=None,
34 | groups=1,
35 | base_width=64,
36 | dilation=1,
37 | ):
38 | super(IBasicBlock, self).__init__()
39 | if groups != 1 or base_width != 64:
40 | raise ValueError("BasicBlock only supports groups=1 and base_width=64")
41 | if dilation > 1:
42 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
43 | self.bn1 = nn.BatchNorm2d(
44 | inplanes,
45 | eps=1e-05,
46 | )
47 | self.conv1 = conv3x3(inplanes, planes)
48 | self.bn2 = nn.BatchNorm2d(
49 | planes,
50 | eps=1e-05,
51 | )
52 | self.prelu = nn.PReLU(planes)
53 | self.conv2 = conv3x3(planes, planes, stride)
54 | self.bn3 = nn.BatchNorm2d(
55 | planes,
56 | eps=1e-05,
57 | )
58 | self.downsample = downsample
59 | self.stride = stride
60 |
61 | def forward(self, x):
62 | identity = x
63 | out = self.bn1(x)
64 | out = self.conv1(out)
65 | out = self.bn2(out)
66 | out = self.prelu(out)
67 | out = self.conv2(out)
68 | out = self.bn3(out)
69 | if self.downsample is not None:
70 | identity = self.downsample(x)
71 | out += identity
72 | return out
73 |
74 |
75 | class IResNet(nn.Module):
76 | fc_scale = 7 * 7
77 |
78 | def __init__(
79 | self,
80 | block,
81 | layers,
82 | dropout=0,
83 | num_features=512,
84 | zero_init_residual=False,
85 | groups=1,
86 | width_per_group=64,
87 | replace_stride_with_dilation=None,
88 | fp16=False,
89 | ):
90 | super(IResNet, self).__init__()
91 | self.fp16 = fp16
92 | self.inplanes = 64
93 | self.dilation = 1
94 | if replace_stride_with_dilation is None:
95 | replace_stride_with_dilation = [False, False, False]
96 | if len(replace_stride_with_dilation) != 3:
97 | raise ValueError(
98 | "replace_stride_with_dilation should be None "
99 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)
100 | )
101 | self.groups = groups
102 | self.base_width = width_per_group
103 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
104 | self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
105 | self.prelu = nn.PReLU(self.inplanes)
106 | self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
107 | self.layer2 = self._make_layer(
108 | block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]
109 | )
110 | self.layer3 = self._make_layer(
111 | block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]
112 | )
113 | self.layer4 = self._make_layer(
114 | block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]
115 | )
116 | self.bn2 = nn.BatchNorm2d(
117 | 512 * block.expansion,
118 | eps=1e-05,
119 | )
120 | self.dropout = nn.Dropout(p=dropout, inplace=True)
121 | self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
122 | self.features = nn.BatchNorm1d(num_features, eps=1e-05)
123 | nn.init.constant_(self.features.weight, 1.0)
124 | self.features.weight.requires_grad = False
125 |
126 | for m in self.modules():
127 | if isinstance(m, nn.Conv2d):
128 | nn.init.normal_(m.weight, 0, 0.1)
129 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
130 | nn.init.constant_(m.weight, 1)
131 | nn.init.constant_(m.bias, 0)
132 |
133 | if zero_init_residual:
134 | for m in self.modules():
135 | if isinstance(m, IBasicBlock):
136 | nn.init.constant_(m.bn2.weight, 0)
137 |
138 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
139 | downsample = None
140 | previous_dilation = self.dilation
141 | if dilate:
142 | self.dilation *= stride
143 | stride = 1
144 | if stride != 1 or self.inplanes != planes * block.expansion:
145 | downsample = nn.Sequential(
146 | conv1x1(self.inplanes, planes * block.expansion, stride),
147 | nn.BatchNorm2d(
148 | planes * block.expansion,
149 | eps=1e-05,
150 | ),
151 | )
152 | layers = []
153 | layers.append(
154 | block(
155 | self.inplanes,
156 | planes,
157 | stride,
158 | downsample,
159 | self.groups,
160 | self.base_width,
161 | previous_dilation,
162 | )
163 | )
164 | self.inplanes = planes * block.expansion
165 | for _ in range(1, blocks):
166 | layers.append(
167 | block(
168 | self.inplanes,
169 | planes,
170 | groups=self.groups,
171 | base_width=self.base_width,
172 | dilation=self.dilation,
173 | )
174 | )
175 |
176 | return nn.Sequential(*layers)
177 |
178 | def forward(self, x):
179 | with torch.cuda.amp.autocast(self.fp16):
180 | x = self.conv1(x)
181 | x = self.bn1(x)
182 | x = self.prelu(x)
183 | x = self.layer1(x)
184 | x = self.layer2(x)
185 | x = self.layer3(x)
186 | x = self.layer4(x)
187 | x = self.bn2(x)
188 | x = torch.flatten(x, 1)
189 | x = self.dropout(x)
190 | x = self.fc(x.float() if self.fp16 else x)
191 | x = self.features(x)
192 | x = F.normalize(x, dim=1)
193 | return x
194 |
195 |
196 | def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
197 | model = IResNet(block, layers, **kwargs)
198 | if pretrained:
199 | raise ValueError()
200 | return model
201 |
202 |
203 | def iresnet18(pretrained=False, progress=True, **kwargs):
204 | return _iresnet("iresnet18", IBasicBlock, [2, 2, 2, 2], pretrained, progress, **kwargs)
205 |
206 |
207 | def iresnet34(pretrained=False, progress=True, **kwargs):
208 | return _iresnet("iresnet34", IBasicBlock, [3, 4, 6, 3], pretrained, progress, **kwargs)
209 |
210 |
211 | def iresnet50(pretrained=False, progress=True, **kwargs):
212 | return _iresnet("iresnet50", IBasicBlock, [3, 4, 14, 3], pretrained, progress, **kwargs)
213 |
214 |
215 | def iresnet100(pretrained=False, progress=True, **kwargs):
216 | return _iresnet("iresnet100", IBasicBlock, [3, 13, 30, 3], pretrained, progress, **kwargs)
217 |
218 |
219 | def iresnet200(pretrained=False, progress=True, **kwargs):
220 | return _iresnet("iresnet200", IBasicBlock, [6, 26, 60, 6], pretrained, progress, **kwargs)
221 |
222 |
223 | def iresnet_inference(model_name, path, device="cuda"):
224 | if model_name == "r18":
225 | model = iresnet18()
226 | elif model_name == "r34":
227 | model = iresnet34()
228 | elif model_name == "r50":
229 | model = iresnet50()
230 | elif model_name == "r100":
231 | model = iresnet100()
232 | else:
233 | raise ValueError()
234 |
235 | weight = torch.load(path, map_location=device)
236 |
237 | model.load_state_dict(weight)
238 | model.to(device)
239 |
240 | return model.eval()
241 |
--------------------------------------------------------------------------------
/face_recognition/arcface/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def read_features(feature_path):
5 | try:
6 | data = np.load(feature_path + ".npz", allow_pickle=True)
7 | images_name = data["images_name"]
8 | images_emb = data["images_emb"]
9 |
10 | return images_name, images_emb
11 | except:
12 | return None
13 |
14 |
15 | def compare_encodings(encoding, encodings):
16 | sims = np.dot(encodings, encoding.T)
17 | pare_index = np.argmax(sims)
18 | score = sims[pare_index]
19 | return score, pare_index
20 |
--------------------------------------------------------------------------------
/face_recognition/arcface/weights/README.md:
--------------------------------------------------------------------------------
1 | ## Download Weights:
2 |
3 | - https://drive.google.com/drive/folders/1CHHb_7wbvfjKPFNKVBb76lL5sVfBLcv5?usp=sharing
4 |
--------------------------------------------------------------------------------
/face_tracking/config/config_tracking.yaml:
--------------------------------------------------------------------------------
1 | device: cpu
2 | fps: 30
3 | match_thresh: 0.8
4 | min_box_area: 10
5 | save_result: True
6 | track_buffer: 30
7 | track_thresh: 0.5
8 | aspect_ratio_thresh: 1.6
9 | ckpt: bytetrack_s_mot17.pth.tar
10 | fp16: True
11 |
--------------------------------------------------------------------------------
/face_tracking/pretrained/README.md:
--------------------------------------------------------------------------------
1 | ## Model zoo
2 |
3 | | Model | MOTA | IDF1 | IDs | FPS |
4 | | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ---- | --- | ---- |
5 | | bytetrack_x_mot17 [[google]](https://drive.google.com/file/d/1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5/view?usp=sharing), [[baidu(code:ic0i)]](https://pan.baidu.com/s/1OJKrcQa_JP9zofC6ZtGBpw) | 90.0 | 83.3 | 422 | 29.6 |
6 | | bytetrack_l_mot17 [[google]](https://drive.google.com/file/d/1XwfUuCBF4IgWBWK2H7oOhQgEj9Mrb3rz/view?usp=sharing), [[baidu(code:1cml)]](https://pan.baidu.com/s/1242adimKM6TYdeLU2qnuRA) | 88.7 | 80.7 | 460 | 43.7 |
7 | | bytetrack_m_mot17 [[google]](https://drive.google.com/file/d/11Zb0NN_Uu7JwUd9e6Nk8o2_EUfxWqsun/view?usp=sharing), [[baidu(code:u3m4)]](https://pan.baidu.com/s/1fKemO1uZfvNSLzJfURO4TQ) | 87.0 | 80.1 | 477 | 54.1 |
8 | | bytetrack_s_mot17 [[google]](https://drive.google.com/file/d/1uSmhXzyV1Zvb4TJJCzpsZOIcw7CCJLxj/view?usp=sharing), [[baidu(code:qflm)]](https://pan.baidu.com/s/1PiP1kQfgxAIrnGUbFP6Wfg) | 79.2 | 74.3 | 533 | 64.5 |
9 |
10 | ## Reference
11 |
12 | - https://github.com/ifzhang/ByteTrack?tab=readme-ov-file#model-zoo
13 |
--------------------------------------------------------------------------------
/face_tracking/tracker/basetrack.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 |
3 | import numpy as np
4 |
5 |
6 | class TrackState(object):
7 | New = 0
8 | Tracked = 1
9 | Lost = 2
10 | Removed = 3
11 |
12 |
13 | class BaseTrack(object):
14 | _count = 0
15 |
16 | track_id = 0
17 | is_activated = False
18 | state = TrackState.New
19 |
20 | history = OrderedDict()
21 | features = []
22 | curr_feature = None
23 | score = 0
24 | start_frame = 0
25 | frame_id = 0
26 | time_since_update = 0
27 |
28 | # multi-camera
29 | location = (np.inf, np.inf)
30 |
31 | @property
32 | def end_frame(self):
33 | return self.frame_id
34 |
35 | @staticmethod
36 | def next_id():
37 | BaseTrack._count += 1
38 | return BaseTrack._count
39 |
40 | def activate(self, *args):
41 | raise NotImplementedError
42 |
43 | def predict(self):
44 | raise NotImplementedError
45 |
46 | def update(self, *args, **kwargs):
47 | raise NotImplementedError
48 |
49 | def mark_lost(self):
50 | self.state = TrackState.Lost
51 |
52 | def mark_removed(self):
53 | self.state = TrackState.Removed
54 |
--------------------------------------------------------------------------------
/face_tracking/tracker/byte_tracker.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import torch
5 |
6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
7 | sys.path.append(BASE_DIR)
8 |
9 | import matching
10 | import numpy as np
11 |
12 | from .basetrack import BaseTrack, TrackState
13 | from .kalman_filter import KalmanFilter
14 |
15 |
16 | class STrack(BaseTrack):
17 | shared_kalman = KalmanFilter()
18 |
19 | def __init__(self, tlwh, score):
20 | # wait activate
21 | self._tlwh = np.asarray(tlwh, dtype=np.float64)
22 | self.kalman_filter = None
23 | self.mean, self.covariance = None, None
24 | self.is_activated = False
25 |
26 | self.score = score
27 | self.tracklet_len = 0
28 |
29 | def predict(self):
30 | mean_state = self.mean.copy()
31 | if self.state != TrackState.Tracked:
32 | mean_state[7] = 0
33 | self.mean, self.covariance = self.kalman_filter.predict(
34 | mean_state, self.covariance
35 | )
36 |
37 | @staticmethod
38 | def multi_predict(stracks):
39 | if len(stracks) > 0:
40 | multi_mean = np.asarray([st.mean.copy() for st in stracks])
41 | multi_covariance = np.asarray([st.covariance for st in stracks])
42 | for i, st in enumerate(stracks):
43 | if st.state != TrackState.Tracked:
44 | multi_mean[i][7] = 0
45 | multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(
46 | multi_mean, multi_covariance
47 | )
48 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
49 | stracks[i].mean = mean
50 | stracks[i].covariance = cov
51 |
52 | def activate(self, kalman_filter, frame_id):
53 | """Start a new tracklet"""
54 | self.kalman_filter = kalman_filter
55 | self.track_id = self.next_id()
56 | self.mean, self.covariance = self.kalman_filter.initiate(
57 | self.tlwh_to_xyah(self._tlwh)
58 | )
59 |
60 | self.tracklet_len = 0
61 | self.state = TrackState.Tracked
62 | if frame_id == 1:
63 | self.is_activated = True
64 | # self.is_activated = True
65 | self.frame_id = frame_id
66 | self.start_frame = frame_id
67 |
68 | def re_activate(self, new_track, frame_id, new_id=False):
69 | self.mean, self.covariance = self.kalman_filter.update(
70 | self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
71 | )
72 | self.tracklet_len = 0
73 | self.state = TrackState.Tracked
74 | self.is_activated = True
75 | self.frame_id = frame_id
76 | if new_id:
77 | self.track_id = self.next_id()
78 | self.score = new_track.score
79 |
80 | def update(self, new_track, frame_id):
81 | """
82 | Update a matched track
83 | :type new_track: STrack
84 | :type frame_id: int
85 | :type update_feature: bool
86 | :return:
87 | """
88 | self.frame_id = frame_id
89 | self.tracklet_len += 1
90 |
91 | new_tlwh = new_track.tlwh
92 | self.mean, self.covariance = self.kalman_filter.update(
93 | self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)
94 | )
95 | self.state = TrackState.Tracked
96 | self.is_activated = True
97 |
98 | self.score = new_track.score
99 |
100 | @property
101 | # @jit(nopython=True)
102 | def tlwh(self):
103 | """Get current position in bounding box format `(top left x, top left y,
104 | width, height)`.
105 | """
106 | if self.mean is None:
107 | return self._tlwh.copy()
108 | ret = self.mean[:4].copy()
109 | ret[2] *= ret[3]
110 | ret[:2] -= ret[2:] / 2
111 | return ret
112 |
113 | @property
114 | # @jit(nopython=True)
115 | def tlbr(self):
116 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
117 | `(top left, bottom right)`.
118 | """
119 | ret = self.tlwh.copy()
120 | ret[2:] += ret[:2]
121 | return ret
122 |
123 | @staticmethod
124 | # @jit(nopython=True)
125 | def tlwh_to_xyah(tlwh):
126 | """Convert bounding box to format `(center x, center y, aspect ratio,
127 | height)`, where the aspect ratio is `width / height`.
128 | """
129 | ret = np.asarray(tlwh).copy()
130 | ret[:2] += ret[2:] / 2
131 | ret[2] /= ret[3]
132 | return ret
133 |
134 | def to_xyah(self):
135 | return self.tlwh_to_xyah(self.tlwh)
136 |
137 | @staticmethod
138 | # @jit(nopython=True)
139 | def tlbr_to_tlwh(tlbr):
140 | ret = np.asarray(tlbr).copy()
141 | ret[2:] -= ret[:2]
142 | return ret
143 |
144 | @staticmethod
145 | # @jit(nopython=True)
146 | def tlwh_to_tlbr(tlwh):
147 | ret = np.asarray(tlwh).copy()
148 | ret[2:] += ret[:2]
149 | return ret
150 |
151 | def __repr__(self):
152 | return "OT_{}_({}-{})".format(self.track_id, self.start_frame, self.end_frame)
153 |
154 |
155 | class BYTETracker(object):
156 | def __init__(self, args, frame_rate=30):
157 | self.tracked_stracks = [] # type: list[STrack]
158 | self.lost_stracks = [] # type: list[STrack]
159 | self.removed_stracks = [] # type: list[STrack]
160 |
161 | self.frame_id = 0
162 | self.args = args
163 | # self.det_thresh = args.track_thresh
164 | self.det_thresh = args["track_thresh"] + 0.1
165 | self.buffer_size = int(frame_rate / 30.0 * args["track_buffer"])
166 | self.max_time_lost = self.buffer_size
167 | self.kalman_filter = KalmanFilter()
168 |
169 | def update(self, output_results, img_info, img_size):
170 | self.frame_id += 1
171 | activated_starcks = []
172 | refind_stracks = []
173 | lost_stracks = []
174 | removed_stracks = []
175 |
176 | if output_results.shape[1] == 5:
177 | scores = output_results[:, 4]
178 | bboxes = output_results[:, :4]
179 | else:
180 | output_results = output_results.cpu().numpy()
181 | scores = output_results[:, 4] * output_results[:, 5]
182 | bboxes = output_results[:, :4] # x1y1x2y2
183 | img_h, img_w = img_info[0], img_info[1]
184 | scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w))
185 | bboxes /= scale
186 |
187 | remain_inds = scores > self.args["track_thresh"]
188 | inds_low = scores > 0.1
189 | inds_high = scores < self.args["track_thresh"]
190 |
191 | inds_second = np.logical_and(inds_low, inds_high)
192 | dets_second = bboxes[inds_second.to(torch.bool)]
193 | dets = bboxes[remain_inds]
194 | scores_keep = scores[remain_inds]
195 | scores_second = scores[inds_second.to(torch.bool)]
196 |
197 | if len(dets) > 0:
198 | """Detections"""
199 | detections = [
200 | STrack(STrack.tlbr_to_tlwh(tlbr), s)
201 | for (tlbr, s) in zip(dets, scores_keep)
202 | ]
203 | else:
204 | detections = []
205 |
206 | """ Add newly detected tracklets to tracked_stracks"""
207 | unconfirmed = []
208 | tracked_stracks = [] # type: list[STrack]
209 | for track in self.tracked_stracks:
210 | if not track.is_activated:
211 | unconfirmed.append(track)
212 | else:
213 | tracked_stracks.append(track)
214 |
215 | """ Step 2: First association, with high score detection boxes"""
216 | strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
217 | # Predict the current location with KF
218 | STrack.multi_predict(strack_pool)
219 | dists = matching.iou_distance(strack_pool, detections)
220 | # if not self.args.mot20:
221 | # dists = matching.fuse_score(dists, detections)
222 | matches, u_track, u_detection = matching.linear_assignment(
223 | dists, thresh=self.args["match_thresh"]
224 | )
225 |
226 | for itracked, idet in matches:
227 | track = strack_pool[itracked]
228 | det = detections[idet]
229 | if track.state == TrackState.Tracked:
230 | track.update(detections[idet], self.frame_id)
231 | activated_starcks.append(track)
232 | else:
233 | track.re_activate(det, self.frame_id, new_id=False)
234 | refind_stracks.append(track)
235 |
236 | """ Step 3: Second association, with low score detection boxes"""
237 | # association the untrack to the low score detections
238 | if len(dets_second) > 0:
239 | """Detections"""
240 | detections_second = [
241 | STrack(STrack.tlbr_to_tlwh(tlbr), s)
242 | for (tlbr, s) in zip(dets_second, scores_second)
243 | ]
244 | else:
245 | detections_second = []
246 | r_tracked_stracks = [
247 | strack_pool[i]
248 | for i in u_track
249 | if strack_pool[i].state == TrackState.Tracked
250 | ]
251 | dists = matching.iou_distance(r_tracked_stracks, detections_second)
252 | matches, u_track, u_detection_second = matching.linear_assignment(
253 | dists, thresh=0.5
254 | )
255 | for itracked, idet in matches:
256 | track = r_tracked_stracks[itracked]
257 | det = detections_second[idet]
258 | if track.state == TrackState.Tracked:
259 | track.update(det, self.frame_id)
260 | activated_starcks.append(track)
261 | else:
262 | track.re_activate(det, self.frame_id, new_id=False)
263 | refind_stracks.append(track)
264 |
265 | for it in u_track:
266 | track = r_tracked_stracks[it]
267 | if not track.state == TrackState.Lost:
268 | track.mark_lost()
269 | lost_stracks.append(track)
270 |
271 | """Deal with unconfirmed tracks, usually tracks with only one beginning frame"""
272 | detections = [detections[i] for i in u_detection]
273 | dists = matching.iou_distance(unconfirmed, detections)
274 | # if not self.args.mot20:
275 | # dists = matching.fuse_score(dists, detections)
276 | matches, u_unconfirmed, u_detection = matching.linear_assignment(
277 | dists, thresh=0.7
278 | )
279 | for itracked, idet in matches:
280 | unconfirmed[itracked].update(detections[idet], self.frame_id)
281 | activated_starcks.append(unconfirmed[itracked])
282 | for it in u_unconfirmed:
283 | track = unconfirmed[it]
284 | track.mark_removed()
285 | removed_stracks.append(track)
286 |
287 | """ Step 4: Init new stracks"""
288 | for inew in u_detection:
289 | track = detections[inew]
290 | if track.score < self.det_thresh:
291 | continue
292 | track.activate(self.kalman_filter, self.frame_id)
293 | activated_starcks.append(track)
294 | """ Step 5: Update state"""
295 | for track in self.lost_stracks:
296 | if self.frame_id - track.end_frame > self.max_time_lost:
297 | track.mark_removed()
298 | removed_stracks.append(track)
299 |
300 | # print('Ramained match {} s'.format(t4-t3))
301 |
302 | self.tracked_stracks = [
303 | t for t in self.tracked_stracks if t.state == TrackState.Tracked
304 | ]
305 | self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
306 | self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
307 | self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
308 | self.lost_stracks.extend(lost_stracks)
309 | self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
310 | self.removed_stracks.extend(removed_stracks)
311 | self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
312 | self.tracked_stracks, self.lost_stracks
313 | )
314 | # get scores of lost tracks
315 | output_stracks = [track for track in self.tracked_stracks if track.is_activated]
316 |
317 | return output_stracks
318 |
319 |
320 | def joint_stracks(tlista, tlistb):
321 | exists = {}
322 | res = []
323 | for t in tlista:
324 | exists[t.track_id] = 1
325 | res.append(t)
326 | for t in tlistb:
327 | tid = t.track_id
328 | if not exists.get(tid, 0):
329 | exists[tid] = 1
330 | res.append(t)
331 | return res
332 |
333 |
334 | def sub_stracks(tlista, tlistb):
335 | stracks = {}
336 | for t in tlista:
337 | stracks[t.track_id] = t
338 | for t in tlistb:
339 | tid = t.track_id
340 | if stracks.get(tid, 0):
341 | del stracks[tid]
342 | return list(stracks.values())
343 |
344 |
345 | def remove_duplicate_stracks(stracksa, stracksb):
346 | pdist = matching.iou_distance(stracksa, stracksb)
347 | pairs = np.where(pdist < 0.15)
348 | dupa, dupb = list(), list()
349 | for p, q in zip(*pairs):
350 | timep = stracksa[p].frame_id - stracksa[p].start_frame
351 | timeq = stracksb[q].frame_id - stracksb[q].start_frame
352 | if timep > timeq:
353 | dupb.append(q)
354 | else:
355 | dupa.append(p)
356 | resa = [t for i, t in enumerate(stracksa) if not i in dupa]
357 | resb = [t for i, t in enumerate(stracksb) if not i in dupb]
358 | return resa, resb
359 |
--------------------------------------------------------------------------------
/face_tracking/tracker/kalman_filter.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 | import scipy.linalg
4 |
5 | """
6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
8 | function and used as Mahalanobis gating threshold.
9 | """
10 | chi2inv95 = {
11 | 1: 3.8415,
12 | 2: 5.9915,
13 | 3: 7.8147,
14 | 4: 9.4877,
15 | 5: 11.070,
16 | 6: 12.592,
17 | 7: 14.067,
18 | 8: 15.507,
19 | 9: 16.919,
20 | }
21 |
22 |
23 | class KalmanFilter(object):
24 | """
25 | A simple Kalman filter for tracking bounding boxes in image space.
26 |
27 | The 8-dimensional state space
28 |
29 | x, y, a, h, vx, vy, va, vh
30 |
31 | contains the bounding box center position (x, y), aspect ratio a, height h,
32 | and their respective velocities.
33 |
34 | Object motion follows a constant velocity model. The bounding box location
35 | (x, y, a, h) is taken as direct observation of the state space (linear
36 | observation model).
37 |
38 | """
39 |
40 | def __init__(self):
41 | ndim, dt = 4, 1.0
42 |
43 | # Create Kalman filter model matrices.
44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim)
45 | for i in range(ndim):
46 | self._motion_mat[i, ndim + i] = dt
47 | self._update_mat = np.eye(ndim, 2 * ndim)
48 |
49 | # Motion and observation uncertainty are chosen relative to the current
50 | # state estimate. These weights control the amount of uncertainty in
51 | # the model. This is a bit hacky.
52 | self._std_weight_position = 1.0 / 20
53 | self._std_weight_velocity = 1.0 / 160
54 |
55 | def initiate(self, measurement):
56 | """Create track from unassociated measurement.
57 |
58 | Parameters
59 | ----------
60 | measurement : ndarray
61 | Bounding box coordinates (x, y, a, h) with center position (x, y),
62 | aspect ratio a, and height h.
63 |
64 | Returns
65 | -------
66 | (ndarray, ndarray)
67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8
68 | dimensional) of the new track. Unobserved velocities are initialized
69 | to 0 mean.
70 |
71 | """
72 | mean_pos = measurement
73 | mean_vel = np.zeros_like(mean_pos)
74 | mean = np.r_[mean_pos, mean_vel]
75 |
76 | std = [
77 | 2 * self._std_weight_position * measurement[3],
78 | 2 * self._std_weight_position * measurement[3],
79 | 1e-2,
80 | 2 * self._std_weight_position * measurement[3],
81 | 10 * self._std_weight_velocity * measurement[3],
82 | 10 * self._std_weight_velocity * measurement[3],
83 | 1e-5,
84 | 10 * self._std_weight_velocity * measurement[3],
85 | ]
86 | covariance = np.diag(np.square(std))
87 | return mean, covariance
88 |
89 | def predict(self, mean, covariance):
90 | """Run Kalman filter prediction step.
91 |
92 | Parameters
93 | ----------
94 | mean : ndarray
95 | The 8 dimensional mean vector of the object state at the previous
96 | time step.
97 | covariance : ndarray
98 | The 8x8 dimensional covariance matrix of the object state at the
99 | previous time step.
100 |
101 | Returns
102 | -------
103 | (ndarray, ndarray)
104 | Returns the mean vector and covariance matrix of the predicted
105 | state. Unobserved velocities are initialized to 0 mean.
106 |
107 | """
108 | std_pos = [
109 | self._std_weight_position * mean[3],
110 | self._std_weight_position * mean[3],
111 | 1e-2,
112 | self._std_weight_position * mean[3],
113 | ]
114 | std_vel = [
115 | self._std_weight_velocity * mean[3],
116 | self._std_weight_velocity * mean[3],
117 | 1e-5,
118 | self._std_weight_velocity * mean[3],
119 | ]
120 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
121 |
122 | # mean = np.dot(self._motion_mat, mean)
123 | mean = np.dot(mean, self._motion_mat.T)
124 | covariance = (
125 | np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T))
126 | + motion_cov
127 | )
128 |
129 | return mean, covariance
130 |
131 | def project(self, mean, covariance):
132 | """Project state distribution to measurement space.
133 |
134 | Parameters
135 | ----------
136 | mean : ndarray
137 | The state's mean vector (8 dimensional array).
138 | covariance : ndarray
139 | The state's covariance matrix (8x8 dimensional).
140 |
141 | Returns
142 | -------
143 | (ndarray, ndarray)
144 | Returns the projected mean and covariance matrix of the given state
145 | estimate.
146 |
147 | """
148 | std = [
149 | self._std_weight_position * mean[3],
150 | self._std_weight_position * mean[3],
151 | 1e-1,
152 | self._std_weight_position * mean[3],
153 | ]
154 | innovation_cov = np.diag(np.square(std))
155 |
156 | mean = np.dot(self._update_mat, mean)
157 | covariance = np.linalg.multi_dot(
158 | (self._update_mat, covariance, self._update_mat.T)
159 | )
160 | return mean, covariance + innovation_cov
161 |
162 | def multi_predict(self, mean, covariance):
163 | """Run Kalman filter prediction step (Vectorized version).
164 | Parameters
165 | ----------
166 | mean : ndarray
167 | The Nx8 dimensional mean matrix of the object states at the previous
168 | time step.
169 | covariance : ndarray
170 | The Nx8x8 dimensional covariance matrics of the object states at the
171 | previous time step.
172 | Returns
173 | -------
174 | (ndarray, ndarray)
175 | Returns the mean vector and covariance matrix of the predicted
176 | state. Unobserved velocities are initialized to 0 mean.
177 | """
178 | std_pos = [
179 | self._std_weight_position * mean[:, 3],
180 | self._std_weight_position * mean[:, 3],
181 | 1e-2 * np.ones_like(mean[:, 3]),
182 | self._std_weight_position * mean[:, 3],
183 | ]
184 | std_vel = [
185 | self._std_weight_velocity * mean[:, 3],
186 | self._std_weight_velocity * mean[:, 3],
187 | 1e-5 * np.ones_like(mean[:, 3]),
188 | self._std_weight_velocity * mean[:, 3],
189 | ]
190 | sqr = np.square(np.r_[std_pos, std_vel]).T
191 |
192 | motion_cov = []
193 | for i in range(len(mean)):
194 | motion_cov.append(np.diag(sqr[i]))
195 | motion_cov = np.asarray(motion_cov)
196 |
197 | mean = np.dot(mean, self._motion_mat.T)
198 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
199 | covariance = np.dot(left, self._motion_mat.T) + motion_cov
200 |
201 | return mean, covariance
202 |
203 | def update(self, mean, covariance, measurement):
204 | """Run Kalman filter correction step.
205 |
206 | Parameters
207 | ----------
208 | mean : ndarray
209 | The predicted state's mean vector (8 dimensional).
210 | covariance : ndarray
211 | The state's covariance matrix (8x8 dimensional).
212 | measurement : ndarray
213 | The 4 dimensional measurement vector (x, y, a, h), where (x, y)
214 | is the center position, a the aspect ratio, and h the height of the
215 | bounding box.
216 |
217 | Returns
218 | -------
219 | (ndarray, ndarray)
220 | Returns the measurement-corrected state distribution.
221 |
222 | """
223 | projected_mean, projected_cov = self.project(mean, covariance)
224 |
225 | chol_factor, lower = scipy.linalg.cho_factor(
226 | projected_cov, lower=True, check_finite=False
227 | )
228 | kalman_gain = scipy.linalg.cho_solve(
229 | (chol_factor, lower),
230 | np.dot(covariance, self._update_mat.T).T,
231 | check_finite=False,
232 | ).T
233 | innovation = measurement - projected_mean
234 |
235 | new_mean = mean + np.dot(innovation, kalman_gain.T)
236 | new_covariance = covariance - np.linalg.multi_dot(
237 | (kalman_gain, projected_cov, kalman_gain.T)
238 | )
239 | return new_mean, new_covariance
240 |
241 | def gating_distance(
242 | self, mean, covariance, measurements, only_position=False, metric="maha"
243 | ):
244 | """Compute gating distance between state distribution and measurements.
245 | A suitable distance threshold can be obtained from `chi2inv95`. If
246 | `only_position` is False, the chi-square distribution has 4 degrees of
247 | freedom, otherwise 2.
248 | Parameters
249 | ----------
250 | mean : ndarray
251 | Mean vector over the state distribution (8 dimensional).
252 | covariance : ndarray
253 | Covariance of the state distribution (8x8 dimensional).
254 | measurements : ndarray
255 | An Nx4 dimensional matrix of N measurements, each in
256 | format (x, y, a, h) where (x, y) is the bounding box center
257 | position, a the aspect ratio, and h the height.
258 | only_position : Optional[bool]
259 | If True, distance computation is done with respect to the bounding
260 | box center position only.
261 | Returns
262 | -------
263 | ndarray
264 | Returns an array of length N, where the i-th element contains the
265 | squared Mahalanobis distance between (mean, covariance) and
266 | `measurements[i]`.
267 | """
268 | mean, covariance = self.project(mean, covariance)
269 | if only_position:
270 | mean, covariance = mean[:2], covariance[:2, :2]
271 | measurements = measurements[:, :2]
272 |
273 | d = measurements - mean
274 | if metric == "gaussian":
275 | return np.sum(d * d, axis=1)
276 | elif metric == "maha":
277 | cholesky_factor = np.linalg.cholesky(covariance)
278 | z = scipy.linalg.solve_triangular(
279 | cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True
280 | )
281 | squared_maha = np.sum(z * z, axis=0)
282 | return squared_maha
283 | else:
284 | raise ValueError("invalid distance metric")
285 |
--------------------------------------------------------------------------------
/face_tracking/tracker/matching.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 | import kalman_filter
5 | import numpy as np
6 | from scipy.optimize import linear_sum_assignment
7 | from scipy.spatial.distance import cdist
8 |
9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
10 | sys.path.append(BASE_DIR)
11 |
12 | # Other function definitions remain the same
13 |
14 |
15 | def linear_assignment(cost_matrix, thresh):
16 | if cost_matrix.size == 0:
17 | return (
18 | np.empty((0, 2), dtype=int),
19 | tuple(range(cost_matrix.shape[0])),
20 | tuple(range(cost_matrix.shape[1])),
21 | )
22 |
23 | row_ind, col_ind = linear_sum_assignment(cost_matrix)
24 | matches = np.array(
25 | [[r, c] for r, c in zip(row_ind, col_ind) if cost_matrix[r, c] <= thresh]
26 | )
27 | unmatched_a = np.array([i for i in range(cost_matrix.shape[0]) if i not in row_ind])
28 | unmatched_b = np.array([i for i in range(cost_matrix.shape[1]) if i not in col_ind])
29 |
30 | return matches, tuple(unmatched_a), tuple(unmatched_b)
31 |
32 |
33 | def bbox_iou(box1, box2):
34 | """
35 | Compute the IoU of two bounding boxes.
36 | """
37 | # Determine the coordinates of each of the boxes
38 | x1, y1, x2, y2 = box1
39 | x1_p, y1_p, x2_p, y2_p = box2
40 |
41 | # Calculate the area of intersection rectangle
42 | xi1 = max(x1, x1_p)
43 | yi1 = max(y1, y1_p)
44 | xi2 = min(x2, x2_p)
45 | yi2 = min(y2, y2_p)
46 | inter_area = max(xi2 - xi1, 0) * max(yi2 - yi1, 0)
47 |
48 | # Calculate each box area
49 | box1_area = (x2 - x1) * (y2 - y1)
50 | box2_area = (x2_p - x1_p) * (y2_p - y1_p)
51 |
52 | # Calculate union area
53 | union_area = box1_area + box2_area - inter_area
54 |
55 | # Calculate IoU
56 | iou = inter_area / union_area
57 |
58 | return iou
59 |
60 |
61 | def ious(atlbrs, btlbrs):
62 | """
63 | Compute cost based on IoU
64 | :type atlbrs: list[tlbr] | np.ndarray
65 | :type atlbrs: list[tlbr] | np.ndarray
66 |
67 | :rtype ious np.ndarray
68 | """
69 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float64)
70 | for i, box1 in enumerate(atlbrs):
71 | for j, box2 in enumerate(btlbrs):
72 | ious[i, j] = bbox_iou(box1, box2)
73 | return ious
74 |
75 |
76 | def iou_distance(atracks, btracks):
77 | """
78 | Compute cost based on IoU
79 | :type atracks: list[STrack]
80 | :type btracks: list[STrack]
81 |
82 | :rtype cost_matrix np.ndarray
83 | """
84 |
85 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
86 | len(btracks) > 0 and isinstance(btracks[0], np.ndarray)
87 | ):
88 | atlbrs = atracks
89 | btlbrs = btracks
90 | else:
91 | atlbrs = [track.tlbr for track in atracks]
92 | btlbrs = [track.tlbr for track in btracks]
93 | _ious = ious(atlbrs, btlbrs)
94 | cost_matrix = 1 - _ious
95 |
96 | return cost_matrix
97 |
98 |
99 | def v_iou_distance(atracks, btracks):
100 | """
101 | Compute cost based on IoU
102 | :type atracks: list[STrack]
103 | :type btracks: list[STrack]
104 |
105 | :rtype cost_matrix np.ndarray
106 | """
107 |
108 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
109 | len(btracks) > 0 and isinstance(btracks[0], np.ndarray)
110 | ):
111 | atlbrs = atracks
112 | btlbrs = btracks
113 | else:
114 | atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
115 | btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
116 | _ious = ious(atlbrs, btlbrs)
117 | cost_matrix = 1 - _ious
118 |
119 | return cost_matrix
120 |
121 |
122 | def embedding_distance(tracks, detections, metric="cosine"):
123 | """
124 | :param tracks: list[STrack]
125 | :param detections: list[BaseTrack]
126 | :param metric:
127 | :return: cost_matrix np.ndarray
128 | """
129 |
130 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float64)
131 | if cost_matrix.size == 0:
132 | return cost_matrix
133 | det_features = np.asarray(
134 | [track.curr_feat for track in detections], dtype=np.float64
135 | )
136 | # for i, track in enumerate(tracks):
137 | # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
138 | track_features = np.asarray(
139 | [track.smooth_feat for track in tracks], dtype=np.float646
140 | )
141 | cost_matrix = np.maximum(
142 | 0.0, cdist(track_features, det_features, metric)
143 | ) # Nomalized features
144 | return cost_matrix
145 |
146 |
147 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
148 | if cost_matrix.size == 0:
149 | return cost_matrix
150 | gating_dim = 2 if only_position else 4
151 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
152 | measurements = np.asarray([det.to_xyah() for det in detections])
153 | for row, track in enumerate(tracks):
154 | gating_distance = kf.gating_distance(
155 | track.mean, track.covariance, measurements, only_position
156 | )
157 | cost_matrix[row, gating_distance > gating_threshold] = np.inf
158 | return cost_matrix
159 |
160 |
161 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
162 | if cost_matrix.size == 0:
163 | return cost_matrix
164 | gating_dim = 2 if only_position else 4
165 | gating_threshold = kalman_filter.chi2inv95[gating_dim]
166 | measurements = np.asarray([det.to_xyah() for det in detections])
167 | for row, track in enumerate(tracks):
168 | gating_distance = kf.gating_distance(
169 | track.mean, track.covariance, measurements, only_position, metric="maha"
170 | )
171 | cost_matrix[row, gating_distance > gating_threshold] = np.inf
172 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
173 | return cost_matrix
174 |
175 |
176 | def fuse_iou(cost_matrix, tracks, detections):
177 | if cost_matrix.size == 0:
178 | return cost_matrix
179 | reid_sim = 1 - cost_matrix
180 | iou_dist = iou_distance(tracks, detections)
181 | iou_sim = 1 - iou_dist
182 | fuse_sim = reid_sim * (1 + iou_sim) / 2
183 | det_scores = np.array([det.score for det in detections])
184 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
185 | # fuse_sim = fuse_sim * (1 + det_scores) / 2
186 | fuse_cost = 1 - fuse_sim
187 | return fuse_cost
188 |
189 |
190 | def fuse_score(cost_matrix, detections):
191 | if cost_matrix.size == 0:
192 | return cost_matrix
193 | iou_sim = 1 - cost_matrix
194 | det_scores = np.array([det.score for det in detections])
195 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
196 | fuse_sim = iou_sim * det_scores
197 | fuse_cost = 1 - fuse_sim
198 | return fuse_cost
199 |
--------------------------------------------------------------------------------
/face_tracking/tracker/visualize.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 | __all__ = ["vis"]
5 |
6 |
7 | def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
8 | for i in range(len(boxes)):
9 | box = boxes[i]
10 | cls_id = int(cls_ids[i])
11 | score = scores[i]
12 | if score < conf:
13 | continue
14 | x0 = int(box[0])
15 | y0 = int(box[1])
16 | x1 = int(box[2])
17 | y1 = int(box[3])
18 |
19 | color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
20 | text = "{}:{:.1f}%".format(class_names[cls_id], score * 100)
21 | txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
22 | font = cv2.FONT_HERSHEY_SIMPLEX
23 |
24 | txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
25 | cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
26 |
27 | txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
28 | cv2.rectangle(
29 | img,
30 | (x0, y0 + 1),
31 | (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])),
32 | txt_bk_color,
33 | -1,
34 | )
35 | cv2.putText(
36 | img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1
37 | )
38 |
39 | return img
40 |
41 |
42 | def get_color(idx):
43 | idx = idx * 3
44 | color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
45 |
46 | return color
47 |
48 |
49 | def plot_tracking(
50 | image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0.0, ids2=None, names=[]
51 | ):
52 | im = np.ascontiguousarray(np.copy(image))
53 | im_h, im_w = im.shape[:2]
54 |
55 | top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
56 |
57 | # text_scale = max(1, image.shape[1] / 1600.)
58 | # text_thickness = 2
59 | # line_thickness = max(1, int(image.shape[1] / 500.))
60 | text_scale = 2
61 | text_thickness = 2
62 | line_thickness = 3
63 |
64 | radius = max(5, int(im_w / 140.0))
65 | cv2.putText(
66 | im,
67 | "frame: %d fps: %.2f num: %d" % (frame_id, fps, len(tlwhs)),
68 | (0, int(15 * text_scale)),
69 | cv2.FONT_HERSHEY_PLAIN,
70 | 2,
71 | (0, 0, 255),
72 | thickness=2,
73 | )
74 |
75 | for i, tlwh in enumerate(tlwhs):
76 | x1, y1, w, h = tlwh
77 | intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
78 | obj_id = int(obj_ids[i])
79 | id_text = "{}".format(int(obj_id))
80 | if (obj_id) in names:
81 | id_text = id_text + ": " + names[obj_id]
82 | if ids2 is not None:
83 | id_text = id_text + ", {}".format(int(ids2[i]))
84 | color = get_color(abs(obj_id))
85 | cv2.rectangle(
86 | im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness
87 | )
88 | cv2.putText(
89 | im,
90 | id_text,
91 | (intbox[0], intbox[1]),
92 | cv2.FONT_HERSHEY_PLAIN,
93 | text_scale,
94 | (0, 0, 255),
95 | thickness=text_thickness,
96 | )
97 | return im
98 |
99 |
100 | _COLORS = (
101 | np.array(
102 | [
103 | 0.000,
104 | 0.447,
105 | 0.741,
106 | 0.850,
107 | 0.325,
108 | 0.098,
109 | 0.929,
110 | 0.694,
111 | 0.125,
112 | 0.494,
113 | 0.184,
114 | 0.556,
115 | 0.466,
116 | 0.674,
117 | 0.188,
118 | 0.301,
119 | 0.745,
120 | 0.933,
121 | 0.635,
122 | 0.078,
123 | 0.184,
124 | 0.300,
125 | 0.300,
126 | 0.300,
127 | 0.600,
128 | 0.600,
129 | 0.600,
130 | 1.000,
131 | 0.000,
132 | 0.000,
133 | 1.000,
134 | 0.500,
135 | 0.000,
136 | 0.749,
137 | 0.749,
138 | 0.000,
139 | 0.000,
140 | 1.000,
141 | 0.000,
142 | 0.000,
143 | 0.000,
144 | 1.000,
145 | 0.667,
146 | 0.000,
147 | 1.000,
148 | 0.333,
149 | 0.333,
150 | 0.000,
151 | 0.333,
152 | 0.667,
153 | 0.000,
154 | 0.333,
155 | 1.000,
156 | 0.000,
157 | 0.667,
158 | 0.333,
159 | 0.000,
160 | 0.667,
161 | 0.667,
162 | 0.000,
163 | 0.667,
164 | 1.000,
165 | 0.000,
166 | 1.000,
167 | 0.333,
168 | 0.000,
169 | 1.000,
170 | 0.667,
171 | 0.000,
172 | 1.000,
173 | 1.000,
174 | 0.000,
175 | 0.000,
176 | 0.333,
177 | 0.500,
178 | 0.000,
179 | 0.667,
180 | 0.500,
181 | 0.000,
182 | 1.000,
183 | 0.500,
184 | 0.333,
185 | 0.000,
186 | 0.500,
187 | 0.333,
188 | 0.333,
189 | 0.500,
190 | 0.333,
191 | 0.667,
192 | 0.500,
193 | 0.333,
194 | 1.000,
195 | 0.500,
196 | 0.667,
197 | 0.000,
198 | 0.500,
199 | 0.667,
200 | 0.333,
201 | 0.500,
202 | 0.667,
203 | 0.667,
204 | 0.500,
205 | 0.667,
206 | 1.000,
207 | 0.500,
208 | 1.000,
209 | 0.000,
210 | 0.500,
211 | 1.000,
212 | 0.333,
213 | 0.500,
214 | 1.000,
215 | 0.667,
216 | 0.500,
217 | 1.000,
218 | 1.000,
219 | 0.500,
220 | 0.000,
221 | 0.333,
222 | 1.000,
223 | 0.000,
224 | 0.667,
225 | 1.000,
226 | 0.000,
227 | 1.000,
228 | 1.000,
229 | 0.333,
230 | 0.000,
231 | 1.000,
232 | 0.333,
233 | 0.333,
234 | 1.000,
235 | 0.333,
236 | 0.667,
237 | 1.000,
238 | 0.333,
239 | 1.000,
240 | 1.000,
241 | 0.667,
242 | 0.000,
243 | 1.000,
244 | 0.667,
245 | 0.333,
246 | 1.000,
247 | 0.667,
248 | 0.667,
249 | 1.000,
250 | 0.667,
251 | 1.000,
252 | 1.000,
253 | 1.000,
254 | 0.000,
255 | 1.000,
256 | 1.000,
257 | 0.333,
258 | 1.000,
259 | 1.000,
260 | 0.667,
261 | 1.000,
262 | 0.333,
263 | 0.000,
264 | 0.000,
265 | 0.500,
266 | 0.000,
267 | 0.000,
268 | 0.667,
269 | 0.000,
270 | 0.000,
271 | 0.833,
272 | 0.000,
273 | 0.000,
274 | 1.000,
275 | 0.000,
276 | 0.000,
277 | 0.000,
278 | 0.167,
279 | 0.000,
280 | 0.000,
281 | 0.333,
282 | 0.000,
283 | 0.000,
284 | 0.500,
285 | 0.000,
286 | 0.000,
287 | 0.667,
288 | 0.000,
289 | 0.000,
290 | 0.833,
291 | 0.000,
292 | 0.000,
293 | 1.000,
294 | 0.000,
295 | 0.000,
296 | 0.000,
297 | 0.167,
298 | 0.000,
299 | 0.000,
300 | 0.333,
301 | 0.000,
302 | 0.000,
303 | 0.500,
304 | 0.000,
305 | 0.000,
306 | 0.667,
307 | 0.000,
308 | 0.000,
309 | 0.833,
310 | 0.000,
311 | 0.000,
312 | 1.000,
313 | 0.000,
314 | 0.000,
315 | 0.000,
316 | 0.143,
317 | 0.143,
318 | 0.143,
319 | 0.286,
320 | 0.286,
321 | 0.286,
322 | 0.429,
323 | 0.429,
324 | 0.429,
325 | 0.571,
326 | 0.571,
327 | 0.571,
328 | 0.714,
329 | 0.714,
330 | 0.714,
331 | 0.857,
332 | 0.857,
333 | 0.857,
334 | 0.000,
335 | 0.447,
336 | 0.741,
337 | 0.314,
338 | 0.717,
339 | 0.741,
340 | 0.50,
341 | 0.5,
342 | 0,
343 | ]
344 | )
345 | .astype(np.float32)
346 | .reshape(-1, 3)
347 | )
348 |
--------------------------------------------------------------------------------
/recognize.py:
--------------------------------------------------------------------------------
1 | import threading
2 | import time
3 |
4 | import cv2
5 | import numpy as np
6 | import torch
7 | import yaml
8 | from torchvision import transforms
9 |
10 | from face_alignment.alignment import norm_crop
11 | from face_detection.scrfd.detector import SCRFD
12 | from face_detection.yolov5_face.detector import Yolov5Face
13 | from face_recognition.arcface.model import iresnet_inference
14 | from face_recognition.arcface.utils import compare_encodings, read_features
15 | from face_tracking.tracker.byte_tracker import BYTETracker
16 | from face_tracking.tracker.visualize import plot_tracking
17 |
18 | # Device configuration
19 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20 |
21 | # Face detector (choose one)
22 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
23 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-face.pt")
24 |
25 | # Face recognizer
26 | recognizer = iresnet_inference(
27 | model_name="r100", path="face_recognition/arcface/weights/arcface_r100.pth", device=device
28 | )
29 |
30 | # Load precomputed face features and names
31 | images_names, images_embs = read_features(feature_path="./datasets/face_features/feature")
32 |
33 | # Mapping of face IDs to names
34 | id_face_mapping = {}
35 |
36 | # Data mapping for tracking information
37 | data_mapping = {
38 | "raw_image": [],
39 | "tracking_ids": [],
40 | "detection_bboxes": [],
41 | "detection_landmarks": [],
42 | "tracking_bboxes": [],
43 | }
44 |
45 |
46 | def load_config(file_name):
47 | """
48 | Load a YAML configuration file.
49 |
50 | Args:
51 | file_name (str): The path to the YAML configuration file.
52 |
53 | Returns:
54 | dict: The loaded configuration as a dictionary.
55 | """
56 | with open(file_name, "r") as stream:
57 | try:
58 | return yaml.safe_load(stream)
59 | except yaml.YAMLError as exc:
60 | print(exc)
61 |
62 |
63 | def process_tracking(frame, detector, tracker, args, frame_id, fps):
64 | """
65 | Process tracking for a frame.
66 |
67 | Args:
68 | frame: The input frame.
69 | detector: The face detector.
70 | tracker: The object tracker.
71 | args (dict): Tracking configuration parameters.
72 | frame_id (int): The frame ID.
73 | fps (float): Frames per second.
74 |
75 | Returns:
76 | numpy.ndarray: The processed tracking image.
77 | """
78 | # Face detection and tracking
79 | outputs, img_info, bboxes, landmarks = detector.detect_tracking(image=frame)
80 |
81 | tracking_tlwhs = []
82 | tracking_ids = []
83 | tracking_scores = []
84 | tracking_bboxes = []
85 |
86 | if outputs is not None:
87 | online_targets = tracker.update(
88 | outputs, [img_info["height"], img_info["width"]], (128, 128)
89 | )
90 |
91 | for i in range(len(online_targets)):
92 | t = online_targets[i]
93 | tlwh = t.tlwh
94 | tid = t.track_id
95 | vertical = tlwh[2] / tlwh[3] > args["aspect_ratio_thresh"]
96 | if tlwh[2] * tlwh[3] > args["min_box_area"] and not vertical:
97 | x1, y1, w, h = tlwh
98 | tracking_bboxes.append([x1, y1, x1 + w, y1 + h])
99 | tracking_tlwhs.append(tlwh)
100 | tracking_ids.append(tid)
101 | tracking_scores.append(t.score)
102 |
103 | tracking_image = plot_tracking(
104 | img_info["raw_img"],
105 | tracking_tlwhs,
106 | tracking_ids,
107 | names=id_face_mapping,
108 | frame_id=frame_id + 1,
109 | fps=fps,
110 | )
111 | else:
112 | tracking_image = img_info["raw_img"]
113 |
114 | data_mapping["raw_image"] = img_info["raw_img"]
115 | data_mapping["detection_bboxes"] = bboxes
116 | data_mapping["detection_landmarks"] = landmarks
117 | data_mapping["tracking_ids"] = tracking_ids
118 | data_mapping["tracking_bboxes"] = tracking_bboxes
119 |
120 | return tracking_image
121 |
122 |
123 | @torch.no_grad()
124 | def get_feature(face_image):
125 | """
126 | Extract features from a face image.
127 |
128 | Args:
129 | face_image: The input face image.
130 |
131 | Returns:
132 | numpy.ndarray: The extracted features.
133 | """
134 | face_preprocess = transforms.Compose(
135 | [
136 | transforms.ToTensor(),
137 | transforms.Resize((112, 112)),
138 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
139 | ]
140 | )
141 |
142 | # Convert to RGB
143 | face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
144 |
145 | # Preprocess image (BGR)
146 | face_image = face_preprocess(face_image).unsqueeze(0).to(device)
147 |
148 | # Inference to get feature
149 | emb_img_face = recognizer(face_image).cpu().numpy()
150 |
151 | # Convert to array
152 | images_emb = emb_img_face / np.linalg.norm(emb_img_face)
153 |
154 | return images_emb
155 |
156 |
157 | def recognition(face_image):
158 | """
159 | Recognize a face image.
160 |
161 | Args:
162 | face_image: The input face image.
163 |
164 | Returns:
165 | tuple: A tuple containing the recognition score and name.
166 | """
167 | # Get feature from face
168 | query_emb = get_feature(face_image)
169 |
170 | score, id_min = compare_encodings(query_emb, images_embs)
171 | name = images_names[id_min]
172 | score = score[0]
173 |
174 | return score, name
175 |
176 |
177 | def mapping_bbox(box1, box2):
178 | """
179 | Calculate the Intersection over Union (IoU) between two bounding boxes.
180 |
181 | Args:
182 | box1 (tuple): The first bounding box (x_min, y_min, x_max, y_max).
183 | box2 (tuple): The second bounding box (x_min, y_min, x_max, y_max).
184 |
185 | Returns:
186 | float: The IoU score.
187 | """
188 | # Calculate the intersection area
189 | x_min_inter = max(box1[0], box2[0])
190 | y_min_inter = max(box1[1], box2[1])
191 | x_max_inter = min(box1[2], box2[2])
192 | y_max_inter = min(box1[3], box2[3])
193 |
194 | intersection_area = max(0, x_max_inter - x_min_inter + 1) * max(
195 | 0, y_max_inter - y_min_inter + 1
196 | )
197 |
198 | # Calculate the area of each bounding box
199 | area_box1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
200 | area_box2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
201 |
202 | # Calculate the union area
203 | union_area = area_box1 + area_box2 - intersection_area
204 |
205 | # Calculate IoU
206 | iou = intersection_area / union_area
207 |
208 | return iou
209 |
210 |
211 | def tracking(detector, args):
212 | """
213 | Face tracking in a separate thread.
214 |
215 | Args:
216 | detector: The face detector.
217 | args (dict): Tracking configuration parameters.
218 | """
219 | # Initialize variables for measuring frame rate
220 | start_time = time.time_ns()
221 | frame_count = 0
222 | fps = -1
223 |
224 | # Initialize a tracker and a timer
225 | tracker = BYTETracker(args=args, frame_rate=30)
226 | frame_id = 0
227 |
228 | cap = cv2.VideoCapture(0)
229 |
230 | while True:
231 | _, img = cap.read()
232 |
233 | tracking_image = process_tracking(img, detector, tracker, args, frame_id, fps)
234 |
235 | # Calculate and display the frame rate
236 | frame_count += 1
237 | if frame_count >= 30:
238 | fps = 1e9 * frame_count / (time.time_ns() - start_time)
239 | frame_count = 0
240 | start_time = time.time_ns()
241 |
242 | cv2.imshow("Face Recognition", tracking_image)
243 |
244 | # Check for user exit input
245 | ch = cv2.waitKey(1)
246 | if ch == 27 or ch == ord("q") or ch == ord("Q"):
247 | break
248 |
249 |
250 | def recognize():
251 | """Face recognition in a separate thread."""
252 | while True:
253 | raw_image = data_mapping["raw_image"]
254 | detection_landmarks = data_mapping["detection_landmarks"]
255 | detection_bboxes = data_mapping["detection_bboxes"]
256 | tracking_ids = data_mapping["tracking_ids"]
257 | tracking_bboxes = data_mapping["tracking_bboxes"]
258 |
259 | for i in range(len(tracking_bboxes)):
260 | for j in range(len(detection_bboxes)):
261 | mapping_score = mapping_bbox(box1=tracking_bboxes[i], box2=detection_bboxes[j])
262 | if mapping_score > 0.9:
263 | face_alignment = norm_crop(img=raw_image, landmark=detection_landmarks[j])
264 |
265 | score, name = recognition(face_image=face_alignment)
266 | if name is not None:
267 | if score < 0.25:
268 | caption = "UN_KNOWN"
269 | else:
270 | caption = f"{name}:{score:.2f}"
271 |
272 | id_face_mapping[tracking_ids[i]] = caption
273 |
274 | detection_bboxes = np.delete(detection_bboxes, j, axis=0)
275 | detection_landmarks = np.delete(detection_landmarks, j, axis=0)
276 |
277 | break
278 |
279 | if tracking_bboxes == []:
280 | print("Waiting for a person...")
281 |
282 |
283 | def main():
284 | """Main function to start face tracking and recognition threads."""
285 | file_name = "./face_tracking/config/config_tracking.yaml"
286 | config_tracking = load_config(file_name)
287 |
288 | # Start tracking thread
289 | thread_track = threading.Thread(
290 | target=tracking,
291 | args=(
292 | detector,
293 | config_tracking,
294 | ),
295 | )
296 | thread_track.start()
297 |
298 | # Start recognition thread
299 | thread_recognize = threading.Thread(target=recognize)
300 | thread_recognize.start()
301 |
302 |
303 | if __name__ == "__main__":
304 | main()
305 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | certifi==2023.11.17
2 | charset-normalizer==3.3.2
3 | colorama==0.4.6
4 | coloredlogs==15.0.1
5 | contourpy==1.2.0
6 | cycler==0.12.1
7 | flatbuffers==23.5.26
8 | fonttools==4.46.0
9 | humanfriendly==10.0
10 | idna==3.6
11 | imageio==2.33.0
12 | importlib-resources==6.1.1
13 | kiwisolver==1.4.5
14 | lazy_loader==0.3
15 | matplotlib==3.8.2
16 | mpmath==1.3.0
17 | networkx==3.2.1
18 | numpy==1.23.5
19 | onnxruntime==1.16.3
20 | opencv-python==4.8.1.78
21 | packaging==23.2
22 | pandas==2.1.3
23 | Pillow==10.1.0
24 | protobuf==4.25.1
25 | pyparsing==3.1.1
26 | pyreadline3==3.4.1
27 | python-dateutil==2.8.2
28 | pytz==2023.3.post1
29 | PyYAML==6.0.1
30 | requests==2.31.0
31 | scikit-image==0.22.0
32 | scipy==1.11.4
33 | seaborn==0.13.0
34 | six==1.16.0
35 | sympy==1.12
36 | tifffile==2023.9.26
37 | tqdm==4.66.1
38 | typing_extensions==4.8.0
39 | tzdata==2023.3
40 | urllib3==2.1.0
41 | zipp==3.17.0
42 |
--------------------------------------------------------------------------------
/tracking.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import cv2
4 | import yaml
5 |
6 | from face_detection.scrfd.detector import SCRFD
7 | from face_detection.yolov5_face.detector import Yolov5Face
8 | from face_tracking.tracker.byte_tracker import BYTETracker
9 | from face_tracking.tracker.visualize import plot_tracking
10 |
11 |
12 | # Function to load a YAML configuration file
13 | def load_config(file_name):
14 | with open(file_name, "r") as stream:
15 | try:
16 | return yaml.safe_load(stream)
17 | except yaml.YAMLError as exc:
18 | print(exc)
19 |
20 |
21 | # Function for performing object detection and tracking
22 | def inference(detector, args):
23 | # Open a video capture object
24 | cap = cv2.VideoCapture(0)
25 |
26 | # Initialize variables for measuring frame rate
27 | start_time = time.time_ns()
28 | frame_count = 0
29 | fps = -1
30 |
31 | # Initialize a tracker and a timer
32 | tracker = BYTETracker(args=args, frame_rate=30)
33 | frame_id = 0
34 |
35 | while True:
36 | # Read a frame from the video capture
37 | ret_val, frame = cap.read()
38 |
39 | if ret_val:
40 | # Perform face detection and tracking on the frame
41 | outputs, img_info, bboxes, landmarks = detector.detect_tracking(image=frame)
42 |
43 | if outputs is not None:
44 | online_targets = tracker.update(
45 | outputs, [img_info["height"], img_info["width"]], (128, 128)
46 | )
47 | online_tlwhs = []
48 | online_ids = []
49 | online_scores = []
50 |
51 | for t in online_targets:
52 | tlwh = t.tlwh
53 | tid = t.track_id
54 | vertical = tlwh[2] / tlwh[3] > args["aspect_ratio_thresh"]
55 | if tlwh[2] * tlwh[3] > args["min_box_area"] and not vertical:
56 | online_tlwhs.append(tlwh)
57 | online_ids.append(tid)
58 | online_scores.append(t.score)
59 |
60 | online_im = plot_tracking(
61 | img_info["raw_img"],
62 | online_tlwhs,
63 | online_ids,
64 | frame_id=frame_id + 1,
65 | fps=fps,
66 | )
67 | else:
68 | online_im = img_info["raw_img"]
69 |
70 | # Calculate and display the frame rate
71 | frame_count += 1
72 | if frame_count >= 30:
73 | fps = 1e9 * frame_count / (time.time_ns() - start_time)
74 | frame_count = 0
75 | start_time = time.time_ns()
76 |
77 | # # Draw bounding boxes and landmarks on the frame
78 | # for i in range(len(bboxes)):
79 | # # Get location of the face
80 | # x1, y1, x2, y2, score = bboxes[i]
81 | # cv2.rectangle(online_im, (x1, y1), (x2, y2), (200, 200, 230), 2)
82 |
83 | cv2.imshow("Face Tracking", online_im)
84 |
85 | # Check for user exit input
86 | ch = cv2.waitKey(1)
87 | if ch == 27 or ch == ord("q") or ch == ord("Q"):
88 | break
89 | else:
90 | break
91 | frame_id += 1
92 |
93 |
94 | def main():
95 | file_name = "./face_tracking/config/config_tracking.yaml"
96 | config_tracking = load_config(file_name)
97 | # detector = Yolov5Face(
98 | # model_file="face_detection/yolov5_face/weights/yolov5m-face.pt"
99 | # )
100 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
101 |
102 | inference(detector=detector, args=config_tracking)
103 |
104 |
105 | if __name__ == "__main__":
106 | main()
107 |
--------------------------------------------------------------------------------