├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE.md
├── README.md
├── add_persons.py
├── assets
    ├── bytetrack.png
    ├── face-detection.gif
    ├── face-detection2.gif
    ├── face-recognition.gif
    ├── result.jpg
    ├── sequence-diagram.png
    ├── train_image.jpg
    └── workflow.png
├── datasets
    ├── backup
    │   ├── lam
    │   │   └── lam.jpg
    │   └── phuoc
    │   │   ├── avatar2.png
    │   │   ├── phuoc.jpg
    │   │   └── quare.jpg
    ├── data
    │   ├── lam
    │   │   └── 0.jpg
    │   └── phuoc
    │   │   ├── 0.jpg
    │   │   ├── 1.jpg
    │   │   └── 2.jpg
    └── face_features
    │   └── feature.npz
├── detect.py
├── face_align.py
├── face_alignment
    └── alignment.py
├── face_detection
    ├── retinaface
    │   ├── LICENSE.MIT
    │   ├── README.md
    │   ├── camera_test.py
    │   ├── convert_to_onnx.py
    │   ├── data
    │   │   ├── FDDB
    │   │   │   └── img_list.txt
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── data_augment.py
    │   │   └── wider_face.py
    │   ├── detect.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── functions
    │   │   │   └── prior_box.py
    │   │   └── modules
    │   │   │   ├── __init__.py
    │   │   │   └── multibox_loss.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── net.py
    │   │   └── retinaface.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── box_utils.py
    │   │   ├── nms
    │   │       ├── __init__.py
    │   │       └── py_cpu_nms.py
    │   │   └── timer.py
    ├── scrfd
    │   ├── detector.py
    │   └── weights
    │   │   └── README.md
    └── yolov5_face
    │   ├── README.md
    │   ├── detector.py
    │   ├── models
    │       ├── __init__.py
    │       ├── blazeface.yaml
    │       ├── blazeface_fpn.yaml
    │       ├── common.py
    │       ├── experimental.py
    │       ├── yolo.py
    │       ├── yolov5l.yaml
    │       ├── yolov5l6.yaml
    │       ├── yolov5m.yaml
    │       ├── yolov5m6.yaml
    │       ├── yolov5n-0.5.yaml
    │       ├── yolov5n.yaml
    │       ├── yolov5n6.yaml
    │       ├── yolov5s.yaml
    │       └── yolov5s6.yaml
    │   ├── utils
    │       ├── __init__.py
    │       ├── activations.py
    │       ├── autoanchor.py
    │       ├── datasets.py
    │       ├── face_datasets.py
    │       ├── general.py
    │       ├── google_utils.py
    │       ├── infer_utils.py
    │       ├── loss.py
    │       ├── metrics.py
    │       ├── plots.py
    │       ├── torch_utils.py
    │       └── wandb_logging
    │       │   ├── __init__.py
    │       │   ├── log_dataset.py
    │       │   └── wandb_utils.py
    │   └── weights
    │       └── README.md
├── face_recognition
    └── arcface
    │   ├── model.py
    │   ├── utils.py
    │   └── weights
    │       └── README.md
├── face_tracking
    ├── config
    │   └── config_tracking.yaml
    ├── pretrained
    │   └── README.md
    └── tracker
    │   ├── basetrack.py
    │   ├── byte_tracker.py
    │   ├── kalman_filter.py
    │   ├── matching.py
    │   └── visualize.py
├── recognize.py
├── requirements.txt
└── tracking.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | *.mp4
  2 | 
  3 | .env
  4 | # *.npz
  5 | index.faiss
  6 | test*
  7 | # *.ipynb
  8 | NOTE.md
  9 | data-elastic-search
 10 | qdrant-vector-database
 11 | architectures
 12 | *.csv
 13 | data.csv
 14 | 
 15 | image-search-engine/assets/uploaded_images/*
 16 | !image-search-engine/assets/uploaded_images/.gitkeep
 17 | 
 18 | # Model
 19 | *.pth
 20 | *.pt
 21 | *.onnx
 22 | 
 23 | # Byte-compiled / optimized / DLL files
 24 | __pycache__/
 25 | *.py[cod]
 26 | *$py.class
 27 | 
 28 | # C extensions
 29 | *.so
 30 | 
 31 | # Distribution / packaging
 32 | .Python
 33 | build/
 34 | develop-eggs/
 35 | dist/
 36 | downloads/
 37 | eggs/
 38 | .eggs/
 39 | lib/
 40 | lib64/
 41 | parts/
 42 | sdist/
 43 | var/
 44 | wheels/
 45 | pip-wheel-metadata/
 46 | share/python-wheels/
 47 | *.egg-info/
 48 | .installed.cfg
 49 | MANIFEST
 50 | 
 51 | # PyInstaller
 52 | #  Usually these files are written by a python script from a template
 53 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 54 | *.manifest
 55 | *.spec
 56 | 
 57 | # Installer logs
 58 | pip-log.txt
 59 | pip-delete-this-directory.txt
 60 | 
 61 | # Unit test / coverage reports
 62 | htmlcov/
 63 | .tox/
 64 | .nox/
 65 | .coverage
 66 | .coverage.*
 67 | .cache
 68 | nosetests.xml
 69 | coverage.xml
 70 | *.cover
 71 | *.py,cover
 72 | .hypothesis/
 73 | .pytest_cache/
 74 | 
 75 | # Translations
 76 | *.mo
 77 | *.pot
 78 | 
 79 | # Django stuff:
 80 | *.log
 81 | local_settings.py
 82 | db.sqlite3
 83 | db.sqlite3-journal
 84 | 
 85 | # Flask stuff:
 86 | instance/
 87 | .webassets-cache
 88 | 
 89 | # Scrapy stuff:
 90 | .scrapy
 91 | 
 92 | # Sphinx documentation
 93 | docs/_build/
 94 | 
 95 | # PyBuilder
 96 | target/
 97 | 
 98 | # Jupyter Notebook
 99 | .ipynb_checkpoints
100 | 
101 | # IPython
102 | profile_default/
103 | ipython_config.py
104 | 
105 | # pyenv
106 | .python-version
107 | 
108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
109 | __pypackages__/
110 | 
111 | # Celery stuff
112 | celerybeat-schedule
113 | celerybeat.pid
114 | 
115 | # SageMath parsed files
116 | *.sage.py
117 | 
118 | # Environment variable
119 | # .env
120 | # .env*
121 | 
122 | # Environments
123 | .venv/
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # It's better to unpack these files and commit the raw source because
150 | # git has its own built in compression methods.
151 | *.7z
152 | *.jar
153 | *.rar
154 | *.zip
155 | *.gz
156 | *.gzip
157 | *.tgz
158 | *.bzip
159 | *.bzip2
160 | *.bz2
161 | *.xz
162 | *.lzma
163 | *.cab
164 | *.xar
165 | 
166 | # Packing-only formats
167 | *.iso
168 | *.tar
169 | 
170 | # Package management formats
171 | *.dmg
172 | *.xpi
173 | *.gem
174 | *.egg
175 | *.deb
176 | *.rpm
177 | *.msi
178 | *.msm
179 | *.msp
180 | *.txz
181 | 
182 | # Backup
183 | *.bak
184 | *.gho
185 | *.ori
186 | *.orig
187 | *.tmp
188 | 
189 | # GPG
190 | secring.*
191 | 
192 | # OpenSSL-related files best not committed
193 | ## Certificate Authority
194 | *.ca
195 | 
196 | ## Certificate
197 | *.crt
198 | 
199 | ## Certificate Sign Request
200 | *.csr
201 | 
202 | ## Certificate
203 | *.der
204 | 
205 | ## Key database file
206 | *.kdb
207 | 
208 | ## OSCP request data
209 | *.org
210 | 
211 | ## PKCS #12
212 | *.p12
213 | 
214 | ## PEM-encoded certificate data
215 | *.pem
216 | 
217 | ## Random number seed
218 | *.rnd
219 | 
220 | ## SSLeay data
221 | *.ssleay
222 | 
223 | ## S/MIME message
224 | *.smime
225 | 
226 | # ide
227 | .idea/
228 | 
229 | # others
230 | migrations/
231 | 
232 | # BBDD
233 | *.db
234 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | exclude: "^\
 2 |   (third-party/.*)\
 3 |   "
 4 | 
 5 | repos:
 6 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 7 |     rev: v4.1.0
 8 |     hooks:
 9 |       - id: check-merge-conflict # checks for some markers such as "<<<<<<<", "=======", and ">>>>>>>".
10 |       - id: detect-private-key # detects the presence of private keys.
11 |       - id: end-of-file-fixer # ensures that a file is either empty, or ends with one newline.
12 |       - id: requirements-txt-fixer # sorts entries in requirements.txt.
13 |       - id: trailing-whitespace # trims trailing whitespace at the end of lines.
14 | 
15 |   # Format YAML and other files
16 |   - repo: https://github.com/pre-commit/mirrors-prettier
17 |     rev: v2.5.1
18 |     hooks:
19 |       - id: prettier
20 |         files: \.(js|ts|jsx|tsx|css|less|html|json|markdown|md|yaml|yml)$
21 | 
22 |   # Sort the order of importing libs
23 |   - repo: https://github.com/PyCQA/isort
24 |     rev: 5.12.0
25 |     hooks:
26 |       - id: isort
27 |         args: [--profile=black, --line-length=100]
28 | 
29 |   # Format Python files
30 |   - repo: https://github.com/psf/black
31 |     rev: 23.7.0
32 |     hooks:
33 |       - id: black
34 |         args: [--line-length=100]
35 | 
36 |   # - repo: https://github.com/PyCQA/flake8
37 |   #   rev: 6.1.0
38 |   #   hooks:
39 |   #     - id: flake8
40 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Vector Nguyễn
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Real-Time Face Recognition
  2 | 
  3 |    <p align="center">
  4 |    <img src="./assets/face-recognition.gif" alt="Face Recognition" />
  5 |    <br>
  6 |    <em>Face Recognition</em>
  7 |    </p>
  8 | 
  9 | ## Table of Contents
 10 | 
 11 | - [Architecture](#architecture)
 12 | - [How to use](#how-to-use)
 13 |   - [Create Environment and Install Packages](#create-environment-and-install-packages)
 14 |   - [Add new persons to datasets](#add-new-persons-to-datasets)
 15 | - [Technology](#technology)
 16 |   - [Face Detection](#face-detection)
 17 |   - [Face Recognition](#face-recognition)
 18 |   - [Face Tracking](#face-tracking)
 19 |   - [Matching Algorithm](#matching-algorithm)
 20 | - [Reference](#reference)
 21 | 
 22 | ## Architecture
 23 | 
 24 |    <p align="center">
 25 |    <img src="./assets/sequence-diagram.png" alt="Sequence Diagram" />
 26 |    <br>
 27 |    <em>Sequence Diagram</em>
 28 |    </p>
 29 | 
 30 | ## How to use
 31 | 
 32 | ### Create Environment and Install Packages
 33 | 
 34 | ```shell
 35 | conda create -n face-dev python=3.9
 36 | ```
 37 | 
 38 | ```shell
 39 | conda activate face-dev
 40 | ```
 41 | 
 42 | ```shell
 43 | pip install torch==1.9.1+cpu torchvision==0.10.1+cpu torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
 44 | pip install -r requirements.txt
 45 | ```
 46 | 
 47 | ### Add new persons to datasets
 48 | 
 49 | 1. **Create a folder with the folder name being the name of the person**
 50 | 
 51 |    ```
 52 |    datasets/
 53 |    ├── backup
 54 |    ├── data
 55 |    ├── face_features
 56 |    └── new_persons
 57 |        ├── name-person1
 58 |        └── name-person2
 59 |    ```
 60 | 
 61 | 2. **Add the person's photo in the folder**
 62 | 
 63 |    ```
 64 |    datasets/
 65 |    ├── backup
 66 |    ├── data
 67 |    ├── face_features
 68 |    └── new_persons
 69 |        ├── name-person1
 70 |        │   └── image1.jpg
 71 |        │   └── image2.jpg
 72 |        └── name-person2
 73 |            └── image1.jpg
 74 |            └── image2.jpg
 75 |    ```
 76 | 
 77 | 3. **Run to add new persons**
 78 | 
 79 |    ```shell
 80 |    python add_persons.py
 81 |    ```
 82 | 
 83 | 4. **Run to recognize**
 84 | 
 85 |    ```shell
 86 |    python recognize.py
 87 |    ```
 88 | 
 89 | ## Technology
 90 | 
 91 | ### Face Detection
 92 | 
 93 | 1. **Retinaface**
 94 | 
 95 |    - Retinaface is a powerful face detection algorithm known for its accuracy and speed. It utilizes a single deep convolutional network to detect faces in an image with high precision.
 96 | 
 97 | 2. **Yolov5-face**
 98 | 
 99 |    - Yolov5-face is based on the YOLO (You Only Look Once) architecture, specializing in face detection. It provides real-time face detection with a focus on efficiency and accuracy.
100 | 
101 | 3. **SCRFD**
102 |    - SCRFD (Single-Shot Scale-Aware Face Detector) is designed for real-time face detection across various scales. It is particularly effective in detecting faces at different resolutions within the same image.
103 | 
104 | ### Face Recognition
105 | 
106 | 1. **ArcFace**
107 | 
108 |    - ArcFace is a state-of-the-art face recognition algorithm that focuses on learning highly discriminative features for face verification and identification. It is known for its robustness to variations in lighting, pose, and facial expressions.
109 | 
110 |    <p align="center">
111 |    <img src="https://user-images.githubusercontent.com/80930272/160270088-a3760d88-ebc8-4535-907e-6b684276755a.png" alt="ArcFace" />
112 |    <br>
113 |    <em>ArcFace</em>
114 |    </p>
115 | 
116 | ### Face Tracking
117 | 
118 | 1. **ByteTrack**
119 | 
120 |    <p align="center">
121 |    <img src="./assets/bytetrack.png" alt="ByteTrack" />
122 |    <br>
123 |    <em>ByteTrack is a simple, fast and strong multi-object tracker.</em>
124 |    </p>
125 | 
126 | ### Matching Algorithm
127 | 
128 | 1. **Cosine Similarity Algorithm**
129 | 
130 |    - The Cosine Similarity Algorithm is employed for matching faces based on the cosine of the angle between their feature vectors. It measures the similarity between two faces' feature representations, providing an effective approach for face recognition.
131 | 
132 |    <p align="center">
133 |    <img src="https://user-images.githubusercontent.com/80930272/160270156-37fe3269-ca65-4692-a3b2-e9568b3876f8.png" alt="Cosine Similarity Algorithm" />
134 |    <br>
135 |    <em>Cosine Similarity Algorithm</em>
136 |    </p>
137 | 
138 | ## Reference
139 | 
140 | - [ByteTrack](https://github.com/ifzhang/ByteTrack)
141 | - [Yolov5-face](https://github.com/deepcam-cn/yolov5-face)
142 | - [InsightFace - ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch)
143 | - [InsightFace-REST](https://github.com/SthPhoenix/InsightFace-REST)
144 | 


--------------------------------------------------------------------------------
/add_persons.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import shutil
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import torch
  8 | from torchvision import transforms
  9 | 
 10 | from face_detection.scrfd.detector import SCRFD
 11 | from face_detection.yolov5_face.detector import Yolov5Face
 12 | from face_recognition.arcface.model import iresnet_inference
 13 | from face_recognition.arcface.utils import read_features
 14 | 
 15 | # Check if CUDA is available and set the device accordingly
 16 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 17 | 
 18 | # Initialize the face detector (Choose one of the detectors)
 19 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-face.pt")
 20 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
 21 | 
 22 | # Initialize the face recognizer
 23 | recognizer = iresnet_inference(
 24 |     model_name="r100", path="face_recognition/arcface/weights/arcface_r100.pth", device=device
 25 | )
 26 | 
 27 | 
 28 | @torch.no_grad()
 29 | def get_feature(face_image):
 30 |     """
 31 |     Extract facial features from an image using the face recognition model.
 32 | 
 33 |     Args:
 34 |         face_image (numpy.ndarray): Input facial image.
 35 | 
 36 |     Returns:
 37 |         numpy.ndarray: Extracted facial features.
 38 |     """
 39 |     # Define a series of image preprocessing steps
 40 |     face_preprocess = transforms.Compose(
 41 |         [
 42 |             transforms.ToTensor(),
 43 |             transforms.Resize((112, 112)),
 44 |             transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
 45 |         ]
 46 |     )
 47 | 
 48 |     # Convert the image to RGB format
 49 |     face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
 50 | 
 51 |     # Apply the defined preprocessing to the image
 52 |     face_image = face_preprocess(face_image).unsqueeze(0).to(device)
 53 | 
 54 |     # Use the model to obtain facial features
 55 |     emb_img_face = recognizer(face_image)[0].cpu().numpy()
 56 | 
 57 |     # Normalize the features
 58 |     images_emb = emb_img_face / np.linalg.norm(emb_img_face)
 59 |     return images_emb
 60 | 
 61 | 
 62 | def add_persons(backup_dir, add_persons_dir, faces_save_dir, features_path):
 63 |     """
 64 |     Add a new person to the face recognition database.
 65 | 
 66 |     Args:
 67 |         backup_dir (str): Directory to save backup data.
 68 |         add_persons_dir (str): Directory containing images of the new person.
 69 |         faces_save_dir (str): Directory to save the extracted faces.
 70 |         features_path (str): Path to save face features.
 71 |     """
 72 |     # Initialize lists to store names and features of added images
 73 |     images_name = []
 74 |     images_emb = []
 75 | 
 76 |     # Read the folder with images of the new person, extract faces, and save them
 77 |     for name_person in os.listdir(add_persons_dir):
 78 |         person_image_path = os.path.join(add_persons_dir, name_person)
 79 | 
 80 |         # Create a directory to save the faces of the person
 81 |         person_face_path = os.path.join(faces_save_dir, name_person)
 82 |         os.makedirs(person_face_path, exist_ok=True)
 83 | 
 84 |         for image_name in os.listdir(person_image_path):
 85 |             if image_name.endswith(("png", "jpg", "jpeg")):
 86 |                 input_image = cv2.imread(os.path.join(person_image_path, image_name))
 87 | 
 88 |                 # Detect faces and landmarks using the face detector
 89 |                 bboxes, landmarks = detector.detect(image=input_image)
 90 | 
 91 |                 # Extract faces
 92 |                 for i in range(len(bboxes)):
 93 |                     # Get the number of files in the person's path
 94 |                     number_files = len(os.listdir(person_face_path))
 95 | 
 96 |                     # Get the location of the face
 97 |                     x1, y1, x2, y2, score = bboxes[i]
 98 | 
 99 |                     # Extract the face from the image
100 |                     face_image = input_image[y1:y2, x1:x2]
101 | 
102 |                     # Path to save the face
103 |                     path_save_face = os.path.join(person_face_path, f"{number_files}.jpg")
104 | 
105 |                     # Save the face to the database
106 |                     cv2.imwrite(path_save_face, face_image)
107 | 
108 |                     # Extract features from the face
109 |                     images_emb.append(get_feature(face_image=face_image))
110 |                     images_name.append(name_person)
111 | 
112 |     # Check if no new person is found
113 |     if images_emb == [] and images_name == []:
114 |         print("No new person found!")
115 |         return None
116 | 
117 |     # Convert lists to arrays
118 |     images_emb = np.array(images_emb)
119 |     images_name = np.array(images_name)
120 | 
121 |     # Read existing features if available
122 |     features = read_features(features_path)
123 | 
124 |     if features is not None:
125 |         # Unpack existing features
126 |         old_images_name, old_images_emb = features
127 | 
128 |         # Combine new features with existing features
129 |         images_name = np.hstack((old_images_name, images_name))
130 |         images_emb = np.vstack((old_images_emb, images_emb))
131 | 
132 |         print("Update features!")
133 | 
134 |     # Save the combined features
135 |     np.savez_compressed(features_path, images_name=images_name, images_emb=images_emb)
136 | 
137 |     # Move the data of the new person to the backup data directory
138 |     for sub_dir in os.listdir(add_persons_dir):
139 |         dir_to_move = os.path.join(add_persons_dir, sub_dir)
140 |         shutil.move(dir_to_move, backup_dir, copy_function=shutil.copytree)
141 | 
142 |     print("Successfully added new person!")
143 | 
144 | 
145 | if __name__ == "__main__":
146 |     # Parse command line arguments
147 |     parser = argparse.ArgumentParser()
148 |     parser.add_argument(
149 |         "--backup-dir",
150 |         type=str,
151 |         default="./datasets/backup",
152 |         help="Directory to save person data.",
153 |     )
154 |     parser.add_argument(
155 |         "--add-persons-dir",
156 |         type=str,
157 |         default="./datasets/new_persons",
158 |         help="Directory to add new persons.",
159 |     )
160 |     parser.add_argument(
161 |         "--faces-save-dir",
162 |         type=str,
163 |         default="./datasets/data/",
164 |         help="Directory to save faces.",
165 |     )
166 |     parser.add_argument(
167 |         "--features-path",
168 |         type=str,
169 |         default="./datasets/face_features/feature",
170 |         help="Path to save face features.",
171 |     )
172 |     opt = parser.parse_args()
173 | 
174 |     # Run the main function
175 |     add_persons(**vars(opt))
176 | 


--------------------------------------------------------------------------------
/assets/bytetrack.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/bytetrack.png


--------------------------------------------------------------------------------
/assets/face-detection.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-detection.gif


--------------------------------------------------------------------------------
/assets/face-detection2.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-detection2.gif


--------------------------------------------------------------------------------
/assets/face-recognition.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-recognition.gif


--------------------------------------------------------------------------------
/assets/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/result.jpg


--------------------------------------------------------------------------------
/assets/sequence-diagram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/sequence-diagram.png


--------------------------------------------------------------------------------
/assets/train_image.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/train_image.jpg


--------------------------------------------------------------------------------
/assets/workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/workflow.png


--------------------------------------------------------------------------------
/datasets/backup/lam/lam.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/lam/lam.jpg


--------------------------------------------------------------------------------
/datasets/backup/phuoc/avatar2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/avatar2.png


--------------------------------------------------------------------------------
/datasets/backup/phuoc/phuoc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/phuoc.jpg


--------------------------------------------------------------------------------
/datasets/backup/phuoc/quare.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/quare.jpg


--------------------------------------------------------------------------------
/datasets/data/lam/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/lam/0.jpg


--------------------------------------------------------------------------------
/datasets/data/phuoc/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/0.jpg


--------------------------------------------------------------------------------
/datasets/data/phuoc/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/1.jpg


--------------------------------------------------------------------------------
/datasets/data/phuoc/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/2.jpg


--------------------------------------------------------------------------------
/datasets/face_features/feature.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/face_features/feature.npz


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import cv2
 4 | 
 5 | from face_detection.scrfd.detector import SCRFD
 6 | from face_detection.yolov5_face.detector import Yolov5Face
 7 | 
 8 | # Initialize the face detector
 9 | detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5m-face.pt")
10 | # detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
11 | 
12 | 
13 | def main():
14 |     # Open the camera
15 |     cap = cv2.VideoCapture(0)
16 | 
17 |     # Initialize variables for measuring frame rate
18 |     start = time.time_ns()
19 |     frame_count = 0
20 |     fps = -1
21 | 
22 |     # Save video
23 |     frame_width = int(cap.get(3))
24 |     frame_height = int(cap.get(4))
25 |     size = (frame_width, frame_height)
26 |     video = cv2.VideoWriter(
27 |         "results/face-detection.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 30, size
28 |     )
29 | 
30 |     # Read frames from the camera
31 |     while True:
32 |         # Capture a frame from the camera
33 |         _, frame = cap.read()
34 | 
35 |         # Get faces and landmarks using the face detector
36 |         bboxes, landmarks = detector.detect(image=frame)
37 |         h, w, c = frame.shape
38 | 
39 |         tl = 1 or round(0.002 * (h + w) / 2) + 1  # Line and font thickness
40 |         clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)]
41 | 
42 |         # Draw bounding boxes and landmarks on the frame
43 |         for i in range(len(bboxes)):
44 |             # Get location of the face
45 |             x1, y1, x2, y2, score = bboxes[i]
46 |             cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2)
47 | 
48 |             # Draw facial landmarks
49 |             for id, key_point in enumerate(landmarks[i]):
50 |                 cv2.circle(frame, tuple(key_point), tl + 1, clors[id], -1)
51 | 
52 |         # Calculate and display the frame rate
53 |         frame_count += 1
54 |         if frame_count >= 30:
55 |             end = time.time_ns()
56 |             fps = 1e9 * frame_count / (end - start)
57 |             frame_count = 0
58 |             start = time.time_ns()
59 | 
60 |         if fps > 0:
61 |             fps_label = "FPS: %.2f" % fps
62 |             cv2.putText(
63 |                 frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2
64 |             )
65 | 
66 |         # Save the frame to the video
67 |         video.write(frame)
68 | 
69 |         # Show the result in a window
70 |         cv2.imshow("Face Detection", frame)
71 | 
72 |         # Press 'Q' on the keyboard to exit
73 |         if cv2.waitKey(25) & 0xFF == ord("q"):
74 |             break
75 | 
76 |     # Release video and camera, and close all OpenCV windows
77 |     video.release()
78 |     cap.release()
79 |     cv2.destroyAllWindows()
80 |     cv2.waitKey(0)
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     main()
85 | 


--------------------------------------------------------------------------------
/face_align.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import cv2
 4 | 
 5 | from face_alignment.alignment import norm_crop
 6 | from face_detection.scrfd.detector import SCRFD
 7 | from face_detection.yolov5_face.detector import Yolov5Face
 8 | 
 9 | # Initialize the face detector
10 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-0.5.pt")
11 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
12 | 
13 | 
14 | def main():
15 |     # Open the camera
16 |     cap = cv2.VideoCapture(0)
17 | 
18 |     # Initialize variables for measuring frame rate
19 |     start = time.time_ns()
20 |     frame_count = 0
21 |     fps = -1
22 | 
23 |     # Save video
24 |     frame_width = int(cap.get(3))
25 |     frame_height = int(cap.get(4))
26 |     size = (frame_width, frame_height)
27 |     video = cv2.VideoWriter("results/face-detection.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 30, size)
28 | 
29 |     # Read frames from the camera
30 |     while True:
31 |         # Capture a frame from the camera
32 |         _, frame = cap.read()
33 | 
34 |         # Get faces and landmarks using the face detector
35 |         bboxes, landmarks = detector.detect(image=frame)
36 |         h, w, c = frame.shape
37 | 
38 |         tl = 1 or round(0.002 * (h + w) / 2) + 1  # Line and font thickness
39 |         clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)]
40 | 
41 |         # Draw bounding boxes and landmarks on the frame
42 |         for i in range(len(bboxes)):
43 |             # Get location of the face
44 |             x1, y1, x2, y2, score = bboxes[i]
45 |             face = frame[y1:y2, x1:x2]
46 |             cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2)
47 | 
48 |             # Draw facial landmarks
49 |             for id, key_point in enumerate(landmarks[i]):
50 |                 cv2.circle(frame, tuple(key_point), tl + 1, clors[id], -1)
51 | 
52 |             align = norm_crop(frame, landmarks[i])
53 | 
54 |         # Calculate and display the frame rate
55 |         frame_count += 1
56 |         if frame_count >= 30:
57 |             end = time.time_ns()
58 |             fps = 1e9 * frame_count / (end - start)
59 |             frame_count = 0
60 |             start = time.time_ns()
61 | 
62 |         if fps > 0:
63 |             fps_label = "FPS: %.2f" % fps
64 |             cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
65 | 
66 |         # Save the frame to the video
67 |         video.write(frame)
68 | 
69 |         # Show the result in a window
70 |         cv2.imshow("Face Detection", frame)
71 |         cv2.imshow("Face align", align)
72 | 
73 |         # Press 'Q' on the keyboard to exit
74 |         if cv2.waitKey(25) & 0xFF == ord("q"):
75 |             break
76 | 
77 |     # Release video and camera, and close all OpenCV windows
78 |     video.release()
79 |     cap.release()
80 |     cv2.destroyAllWindows()
81 |     cv2.waitKey(0)
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     main()
86 | 


--------------------------------------------------------------------------------
/face_alignment/alignment.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | from skimage import transform as trans
 4 | 
 5 | # Define a standard set of destination landmarks for ArcFace alignment
 6 | arcface_dst = np.array(
 7 |     [
 8 |         [38.2946, 51.6963],
 9 |         [73.5318, 51.5014],
10 |         [56.0252, 71.7366],
11 |         [41.5493, 92.3655],
12 |         [70.7299, 92.2041],
13 |     ],
14 |     dtype=np.float32,
15 | )
16 | 
17 | 
18 | def estimate_norm(lmk, image_size=112, mode="arcface"):
19 |     """
20 |     Estimate the transformation matrix for aligning facial landmarks.
21 | 
22 |     Args:
23 |         lmk (numpy.ndarray): 2D array of shape (5, 2) representing facial landmarks.
24 |         image_size (int): Desired output image size.
25 |         mode (str): Alignment mode, currently only "arcface" is supported.
26 | 
27 |     Returns:
28 |         numpy.ndarray: Transformation matrix (2x3) for aligning facial landmarks.
29 |     """
30 |     # Check input conditions
31 |     assert lmk.shape == (5, 2)
32 |     assert image_size % 112 == 0 or image_size % 128 == 0
33 | 
34 |     # Adjust ratio and x-coordinate difference based on image size
35 |     if image_size % 112 == 0:
36 |         ratio = float(image_size) / 112.0
37 |         diff_x = 0
38 |     else:
39 |         ratio = float(image_size) / 128.0
40 |         diff_x = 8.0 * ratio
41 | 
42 |     # Scale and shift the destination landmarks
43 |     dst = arcface_dst * ratio
44 |     dst[:, 0] += diff_x
45 | 
46 |     # Estimate the similarity transformation
47 |     tform = trans.SimilarityTransform()
48 |     tform.estimate(lmk, dst)
49 |     M = tform.params[0:2, :]
50 | 
51 |     return M
52 | 
53 | 
54 | def norm_crop(img, landmark, image_size=112, mode="arcface"):
55 |     """
56 |     Normalize and crop a facial image based on provided landmarks.
57 | 
58 |     Args:
59 |         img (numpy.ndarray): Input facial image.
60 |         landmark (numpy.ndarray): 2D array of shape (5, 2) representing facial landmarks.
61 |         image_size (int): Desired output image size.
62 |         mode (str): Alignment mode, currently only "arcface" is supported.
63 | 
64 |     Returns:
65 |         numpy.ndarray: Normalized and cropped facial image.
66 |     """
67 |     # Estimate the transformation matrix
68 |     M = estimate_norm(landmark, image_size, mode)
69 | 
70 |     # Apply the affine transformation to the image
71 |     warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0)
72 | 
73 |     return warped
74 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/LICENSE.MIT:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/README.md:
--------------------------------------------------------------------------------
 1 | <h1>Get weights: </h1>
 2 | [Google drive](https://drive.google.com/drive/folders/1oZRSG0ZegbVkVwUd8wUIQx8W7yfZ_ki1?usp=drive_link)
 3 | 
 4 | <h1>Run</h1>
 5 | 
 6 | <h3> Using Camera </h3>
 7 | backbone: resnet50
 8 | 
 9 | ```
10 | python camera_test.py --trained_model weights/Resnet50_Final.pth --network resnet50 --cpu
11 | ```
12 | 
13 | backbone: mobilenet0.25
14 | 
15 | ```
16 | python camera_test.py --trained_model weights/mobilenet0.25_Final.pth --network mobile0.25 --cpu
17 | ```
18 | 
19 | <h3> Using Image </h3>
20 | change image in ./curve, change file path in detect.py (line 87)
21 | 
22 | backbone: resnet50
23 | 
24 | ```
25 | python detect.py --trained_model weights/Resnet50_Final.pth --network resnet50 --cpu
26 | ```
27 | 
28 | backbone: mobilenet0.25
29 | 
30 | ```
31 | python detect.py --trained_model weights/mobilenet0.25_Final.pth --network mobile0.25 --cpu
32 | ```
33 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/camera_test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import argparse
  4 | import os
  5 | import time
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | import torch
 10 | import torch.backends.cudnn as cudnn
 11 | from data import cfg_mnet, cfg_re50
 12 | from layers.functions.prior_box import PriorBox
 13 | from models.retinaface import RetinaFace
 14 | from utils.box_utils import decode, decode_landm
 15 | from utils.nms.py_cpu_nms import py_cpu_nms
 16 | 
 17 | parser = argparse.ArgumentParser(description="Retinaface")
 18 | 
 19 | parser.add_argument(
 20 |     "-m",
 21 |     "--trained_model",
 22 |     default="./weights/Resnet50_Final.pth",
 23 |     type=str,
 24 |     help="Trained state_dict file path to open",
 25 | )
 26 | parser.add_argument(
 27 |     "--network", default="resnet50", help="Backbone network mobile0.25 or resnet50"
 28 | )
 29 | parser.add_argument(
 30 |     "--cpu", action="store_true", default=False, help="Use cpu inference"
 31 | )
 32 | parser.add_argument(
 33 |     "--confidence_threshold", default=0.02, type=float, help="confidence_threshold"
 34 | )
 35 | parser.add_argument("--top_k", default=5000, type=int, help="top_k")
 36 | parser.add_argument("--nms_threshold", default=0.4, type=float, help="nms_threshold")
 37 | parser.add_argument("--keep_top_k", default=750, type=int, help="keep_top_k")
 38 | parser.add_argument(
 39 |     "-s",
 40 |     "--save_image",
 41 |     action="store_true",
 42 |     default=True,
 43 |     help="show detection results",
 44 | )
 45 | parser.add_argument(
 46 |     "--vis_thres", default=0.6, type=float, help="visualization_threshold"
 47 | )
 48 | args = parser.parse_args()
 49 | 
 50 | 
 51 | def check_keys(model, pretrained_state_dict):
 52 |     ckpt_keys = set(pretrained_state_dict.keys())
 53 |     model_keys = set(model.state_dict().keys())
 54 |     used_pretrained_keys = model_keys & ckpt_keys
 55 |     unused_pretrained_keys = ckpt_keys - model_keys
 56 |     missing_keys = model_keys - ckpt_keys
 57 |     print("Missing keys:{}".format(len(missing_keys)))
 58 |     print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys)))
 59 |     print("Used keys:{}".format(len(used_pretrained_keys)))
 60 |     assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint"
 61 |     return True
 62 | 
 63 | 
 64 | def remove_prefix(state_dict, prefix):
 65 |     """Old style model is stored with all names of parameters sharing common prefix 'module.'"""
 66 |     print("remove prefix '{}'".format(prefix))
 67 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
 68 |     return {f(key): value for key, value in state_dict.items()}
 69 | 
 70 | 
 71 | def load_model(model, pretrained_path, load_to_cpu):
 72 |     print("Loading pretrained model from {}".format(pretrained_path))
 73 |     if load_to_cpu:
 74 |         pretrained_dict = torch.load(
 75 |             pretrained_path, map_location=lambda storage, loc: storage
 76 |         )
 77 |     else:
 78 |         device = torch.cuda.current_device()
 79 |         pretrained_dict = torch.load(
 80 |             pretrained_path, map_location=lambda storage, loc: storage.cuda(device)
 81 |         )
 82 |     if "state_dict" in pretrained_dict.keys():
 83 |         pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.")
 84 |     else:
 85 |         pretrained_dict = remove_prefix(pretrained_dict, "module.")
 86 |     check_keys(model, pretrained_dict)
 87 |     model.load_state_dict(pretrained_dict, strict=False)
 88 |     return model
 89 | 
 90 | 
 91 | if __name__ == "__main__":
 92 |     torch.set_grad_enabled(False)
 93 |     cfg = None
 94 |     if args.network == "mobile0.25":
 95 |         cfg = cfg_mnet
 96 |     elif args.network == "resnet50":
 97 |         cfg = cfg_re50
 98 |     # net and model
 99 |     net = RetinaFace(cfg=cfg, phase="test")
100 |     net = load_model(net, args.trained_model, args.cpu)
101 |     net.eval()
102 |     print("Finished loading model!")
103 |     print(net)
104 |     cudnn.benchmark = True
105 |     device = torch.device("cpu" if args.cpu else "cuda")
106 |     net = net.to(device)
107 | 
108 |     resize = 1
109 | 
110 |     cam = cv2.VideoCapture(0)
111 |     fps = cam.get(cv2.CAP_PROP_FPS)
112 |     print(fps)
113 | 
114 |     # testing begin
115 |     # for i in range(10):
116 |     # image_path = "./curve/test.jpg"
117 |     # img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
118 | 
119 |     while True:
120 |         _, img_raw = cam.read()
121 | 
122 |         img = np.float32(img_raw)
123 | 
124 |         im_height, im_width, _ = img.shape
125 |         scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
126 |         img -= (104, 117, 123)
127 |         img = img.transpose(2, 0, 1)
128 |         img = torch.from_numpy(img).unsqueeze(0)
129 |         img = img.to(device)
130 |         scale = scale.to(device)
131 | 
132 |         tic = time.time()
133 |         loc, conf, landms = net(img)  # forward pass
134 |         print("net forward time: {:.4f}".format(time.time() - tic))
135 | 
136 |         priorbox = PriorBox(cfg, image_size=(im_height, im_width))
137 |         priors = priorbox.forward()
138 |         priors = priors.to(device)
139 |         prior_data = priors.data
140 |         boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"])
141 |         boxes = boxes * scale / resize
142 |         boxes = boxes.cpu().numpy()
143 |         scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
144 |         landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"])
145 |         scale1 = torch.Tensor(
146 |             [
147 |                 img.shape[3],
148 |                 img.shape[2],
149 |                 img.shape[3],
150 |                 img.shape[2],
151 |                 img.shape[3],
152 |                 img.shape[2],
153 |                 img.shape[3],
154 |                 img.shape[2],
155 |                 img.shape[3],
156 |                 img.shape[2],
157 |             ]
158 |         )
159 |         scale1 = scale1.to(device)
160 |         landms = landms * scale1 / resize
161 |         landms = landms.cpu().numpy()
162 | 
163 |         # ignore low scores
164 |         inds = np.where(scores > args.confidence_threshold)[0]
165 |         boxes = boxes[inds]
166 |         landms = landms[inds]
167 |         scores = scores[inds]
168 | 
169 |         # keep top-K before NMS
170 |         order = scores.argsort()[::-1][: args.top_k]
171 |         boxes = boxes[order]
172 |         landms = landms[order]
173 |         scores = scores[order]
174 | 
175 |         # do NMS
176 |         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
177 |         keep = py_cpu_nms(dets, args.nms_threshold)
178 |         # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
179 |         dets = dets[keep, :]
180 |         landms = landms[keep]
181 | 
182 |         # keep top-K faster NMS
183 |         dets = dets[: args.keep_top_k, :]
184 |         landms = landms[: args.keep_top_k, :]
185 | 
186 |         dets = np.concatenate((dets, landms), axis=1)
187 | 
188 |         # show image
189 |         if args.save_image:
190 |             for b in dets:
191 |                 if b[4] < args.vis_thres:
192 |                     continue
193 |                 text = "{:.4f}".format(b[4])
194 |                 b = list(map(int, b))
195 |                 cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
196 |                 cx = b[0]
197 |                 cy = b[1] + 12
198 |                 cv2.putText(
199 |                     img_raw,
200 |                     text,
201 |                     (cx, cy),
202 |                     cv2.FONT_HERSHEY_DUPLEX,
203 |                     0.5,
204 |                     (255, 255, 255),
205 |                 )
206 | 
207 |                 # landms
208 |                 # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
209 |                 # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
210 |                 # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
211 |                 # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
212 |                 # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
213 | 
214 |             # save image
215 |             name = "test.jpg"
216 |             cv2.imwrite(name, img_raw)
217 |             cv2.imshow("camera", img_raw)
218 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/convert_to_onnx.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import argparse
  4 | import os
  5 | 
  6 | import cv2
  7 | import numpy as np
  8 | import torch
  9 | import torch.backends.cudnn as cudnn
 10 | from data import cfg_mnet, cfg_re50
 11 | from layers.functions.prior_box import PriorBox
 12 | from models.retinaface import RetinaFace
 13 | from utils.box_utils import decode, decode_landm
 14 | from utils.nms.py_cpu_nms import py_cpu_nms
 15 | from utils.timer import Timer
 16 | 
 17 | parser = argparse.ArgumentParser(description="Test")
 18 | parser.add_argument(
 19 |     "-m",
 20 |     "--trained_model",
 21 |     default="./weights/mobilenet0.25_Final.pth",
 22 |     type=str,
 23 |     help="Trained state_dict file path to open",
 24 | )
 25 | parser.add_argument(
 26 |     "--network", default="mobile0.25", help="Backbone network mobile0.25 or resnet50"
 27 | )
 28 | parser.add_argument(
 29 |     "--long_side",
 30 |     default=640,
 31 |     help="when origin_size is false, long_side is scaled size(320 or 640 for long side)",
 32 | )
 33 | parser.add_argument(
 34 |     "--cpu", action="store_true", default=True, help="Use cpu inference"
 35 | )
 36 | 
 37 | args = parser.parse_args()
 38 | 
 39 | 
 40 | def check_keys(model, pretrained_state_dict):
 41 |     ckpt_keys = set(pretrained_state_dict.keys())
 42 |     model_keys = set(model.state_dict().keys())
 43 |     used_pretrained_keys = model_keys & ckpt_keys
 44 |     unused_pretrained_keys = ckpt_keys - model_keys
 45 |     missing_keys = model_keys - ckpt_keys
 46 |     print("Missing keys:{}".format(len(missing_keys)))
 47 |     print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys)))
 48 |     print("Used keys:{}".format(len(used_pretrained_keys)))
 49 |     assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint"
 50 |     return True
 51 | 
 52 | 
 53 | def remove_prefix(state_dict, prefix):
 54 |     """Old style model is stored with all names of parameters sharing common prefix 'module.'"""
 55 |     print("remove prefix '{}'".format(prefix))
 56 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
 57 |     return {f(key): value for key, value in state_dict.items()}
 58 | 
 59 | 
 60 | def load_model(model, pretrained_path, load_to_cpu):
 61 |     print("Loading pretrained model from {}".format(pretrained_path))
 62 |     if load_to_cpu:
 63 |         pretrained_dict = torch.load(
 64 |             pretrained_path, map_location=lambda storage, loc: storage
 65 |         )
 66 |     else:
 67 |         device = torch.cuda.current_device()
 68 |         pretrained_dict = torch.load(
 69 |             pretrained_path, map_location=lambda storage, loc: storage.cuda(device)
 70 |         )
 71 |     if "state_dict" in pretrained_dict.keys():
 72 |         pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.")
 73 |     else:
 74 |         pretrained_dict = remove_prefix(pretrained_dict, "module.")
 75 |     check_keys(model, pretrained_dict)
 76 |     model.load_state_dict(pretrained_dict, strict=False)
 77 |     return model
 78 | 
 79 | 
 80 | if __name__ == "__main__":
 81 |     torch.set_grad_enabled(False)
 82 |     cfg = None
 83 |     if args.network == "mobile0.25":
 84 |         cfg = cfg_mnet
 85 |     elif args.network == "resnet50":
 86 |         cfg = cfg_re50
 87 |     # net and model
 88 |     net = RetinaFace(cfg=cfg, phase="test")
 89 |     net = load_model(net, args.trained_model, args.cpu)
 90 |     net.eval()
 91 |     print("Finished loading model!")
 92 |     print(net)
 93 |     device = torch.device("cpu" if args.cpu else "cuda")
 94 |     net = net.to(device)
 95 | 
 96 |     # ------------------------ export -----------------------------
 97 |     output_onnx = "FaceDetector.onnx"
 98 |     print("==> Exporting model to ONNX format at '{}'".format(output_onnx))
 99 |     input_names = ["input0"]
100 |     output_names = ["output0"]
101 |     inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device)
102 | 
103 |     torch_out = torch.onnx._export(
104 |         net,
105 |         inputs,
106 |         output_onnx,
107 |         export_params=True,
108 |         verbose=False,
109 |         input_names=input_names,
110 |         output_names=output_names,
111 |     )
112 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import *
2 | from .data_augment import *
3 | from .wider_face import WiderFaceDetection, detection_collate
4 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/data/config.py:
--------------------------------------------------------------------------------
 1 | # config.py
 2 | 
 3 | cfg_mnet = {
 4 |     "name": "mobilenet0.25",
 5 |     "min_sizes": [[16, 32], [64, 128], [256, 512]],
 6 |     "steps": [8, 16, 32],
 7 |     "variance": [0.1, 0.2],
 8 |     "clip": False,
 9 |     "loc_weight": 2.0,
10 |     "gpu_train": True,
11 |     "batch_size": 32,
12 |     "ngpu": 1,
13 |     "epoch": 250,
14 |     "decay1": 190,
15 |     "decay2": 220,
16 |     "image_size": 640,
17 |     "pretrain": True,
18 |     "return_layers": {"stage1": 1, "stage2": 2, "stage3": 3},
19 |     "in_channel": 32,
20 |     "out_channel": 64,
21 | }
22 | 
23 | cfg_re50 = {
24 |     "name": "Resnet50",
25 |     "min_sizes": [[16, 32], [64, 128], [256, 512]],
26 |     "steps": [8, 16, 32],
27 |     "variance": [0.1, 0.2],
28 |     "clip": False,
29 |     "loc_weight": 2.0,
30 |     "gpu_train": True,
31 |     "batch_size": 24,
32 |     "ngpu": 4,
33 |     "epoch": 100,
34 |     "decay1": 70,
35 |     "decay2": 90,
36 |     "image_size": 840,
37 |     "pretrain": True,
38 |     "return_layers": {"layer2": 1, "layer3": 2, "layer4": 3},
39 |     "in_channel": 256,
40 |     "out_channel": 256,
41 | }
42 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/data/data_augment.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | from utils.box_utils import matrix_iof
  6 | 
  7 | 
  8 | def _crop(image, boxes, labels, landm, img_dim):
  9 |     height, width, _ = image.shape
 10 |     pad_image_flag = True
 11 | 
 12 |     for _ in range(250):
 13 |         """
 14 |         if random.uniform(0, 1) <= 0.2:
 15 |             scale = 1.0
 16 |         else:
 17 |             scale = random.uniform(0.3, 1.0)
 18 |         """
 19 |         PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
 20 |         scale = random.choice(PRE_SCALES)
 21 |         short_side = min(width, height)
 22 |         w = int(scale * short_side)
 23 |         h = w
 24 | 
 25 |         if width == w:
 26 |             l = 0
 27 |         else:
 28 |             l = random.randrange(width - w)
 29 |         if height == h:
 30 |             t = 0
 31 |         else:
 32 |             t = random.randrange(height - h)
 33 |         roi = np.array((l, t, l + w, t + h))
 34 | 
 35 |         value = matrix_iof(boxes, roi[np.newaxis])
 36 |         flag = value >= 1
 37 |         if not flag.any():
 38 |             continue
 39 | 
 40 |         centers = (boxes[:, :2] + boxes[:, 2:]) / 2
 41 |         mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
 42 |         boxes_t = boxes[mask_a].copy()
 43 |         labels_t = labels[mask_a].copy()
 44 |         landms_t = landm[mask_a].copy()
 45 |         landms_t = landms_t.reshape([-1, 5, 2])
 46 | 
 47 |         if boxes_t.shape[0] == 0:
 48 |             continue
 49 | 
 50 |         image_t = image[roi[1] : roi[3], roi[0] : roi[2]]
 51 | 
 52 |         boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
 53 |         boxes_t[:, :2] -= roi[:2]
 54 |         boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
 55 |         boxes_t[:, 2:] -= roi[:2]
 56 | 
 57 |         # landm
 58 |         landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
 59 |         landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
 60 |         landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
 61 |         landms_t = landms_t.reshape([-1, 10])
 62 | 
 63 |         # make sure that the cropped image contains at least one face > 16 pixel at training image scale
 64 |         b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
 65 |         b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
 66 |         mask_b = np.minimum(b_w_t, b_h_t) > 0.0
 67 |         boxes_t = boxes_t[mask_b]
 68 |         labels_t = labels_t[mask_b]
 69 |         landms_t = landms_t[mask_b]
 70 | 
 71 |         if boxes_t.shape[0] == 0:
 72 |             continue
 73 | 
 74 |         pad_image_flag = False
 75 | 
 76 |         return image_t, boxes_t, labels_t, landms_t, pad_image_flag
 77 |     return image, boxes, labels, landm, pad_image_flag
 78 | 
 79 | 
 80 | def _distort(image):
 81 |     def _convert(image, alpha=1, beta=0):
 82 |         tmp = image.astype(float) * alpha + beta
 83 |         tmp[tmp < 0] = 0
 84 |         tmp[tmp > 255] = 255
 85 |         image[:] = tmp
 86 | 
 87 |     image = image.copy()
 88 | 
 89 |     if random.randrange(2):
 90 |         # brightness distortion
 91 |         if random.randrange(2):
 92 |             _convert(image, beta=random.uniform(-32, 32))
 93 | 
 94 |         # contrast distortion
 95 |         if random.randrange(2):
 96 |             _convert(image, alpha=random.uniform(0.5, 1.5))
 97 | 
 98 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 99 | 
100 |         # saturation distortion
101 |         if random.randrange(2):
102 |             _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
103 | 
104 |         # hue distortion
105 |         if random.randrange(2):
106 |             tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
107 |             tmp %= 180
108 |             image[:, :, 0] = tmp
109 | 
110 |         image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
111 | 
112 |     else:
113 |         # brightness distortion
114 |         if random.randrange(2):
115 |             _convert(image, beta=random.uniform(-32, 32))
116 | 
117 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
118 | 
119 |         # saturation distortion
120 |         if random.randrange(2):
121 |             _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
122 | 
123 |         # hue distortion
124 |         if random.randrange(2):
125 |             tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
126 |             tmp %= 180
127 |             image[:, :, 0] = tmp
128 | 
129 |         image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
130 | 
131 |         # contrast distortion
132 |         if random.randrange(2):
133 |             _convert(image, alpha=random.uniform(0.5, 1.5))
134 | 
135 |     return image
136 | 
137 | 
138 | def _expand(image, boxes, fill, p):
139 |     if random.randrange(2):
140 |         return image, boxes
141 | 
142 |     height, width, depth = image.shape
143 | 
144 |     scale = random.uniform(1, p)
145 |     w = int(scale * width)
146 |     h = int(scale * height)
147 | 
148 |     left = random.randint(0, w - width)
149 |     top = random.randint(0, h - height)
150 | 
151 |     boxes_t = boxes.copy()
152 |     boxes_t[:, :2] += (left, top)
153 |     boxes_t[:, 2:] += (left, top)
154 |     expand_image = np.empty((h, w, depth), dtype=image.dtype)
155 |     expand_image[:, :] = fill
156 |     expand_image[top : top + height, left : left + width] = image
157 |     image = expand_image
158 | 
159 |     return image, boxes_t
160 | 
161 | 
162 | def _mirror(image, boxes, landms):
163 |     _, width, _ = image.shape
164 |     if random.randrange(2):
165 |         image = image[:, ::-1]
166 |         boxes = boxes.copy()
167 |         boxes[:, 0::2] = width - boxes[:, 2::-2]
168 | 
169 |         # landm
170 |         landms = landms.copy()
171 |         landms = landms.reshape([-1, 5, 2])
172 |         landms[:, :, 0] = width - landms[:, :, 0]
173 |         tmp = landms[:, 1, :].copy()
174 |         landms[:, 1, :] = landms[:, 0, :]
175 |         landms[:, 0, :] = tmp
176 |         tmp1 = landms[:, 4, :].copy()
177 |         landms[:, 4, :] = landms[:, 3, :]
178 |         landms[:, 3, :] = tmp1
179 |         landms = landms.reshape([-1, 10])
180 | 
181 |     return image, boxes, landms
182 | 
183 | 
184 | def _pad_to_square(image, rgb_mean, pad_image_flag):
185 |     if not pad_image_flag:
186 |         return image
187 |     height, width, _ = image.shape
188 |     long_side = max(width, height)
189 |     image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
190 |     image_t[:, :] = rgb_mean
191 |     image_t[0 : 0 + height, 0 : 0 + width] = image
192 |     return image_t
193 | 
194 | 
195 | def _resize_subtract_mean(image, insize, rgb_mean):
196 |     interp_methods = [
197 |         cv2.INTER_LINEAR,
198 |         cv2.INTER_CUBIC,
199 |         cv2.INTER_AREA,
200 |         cv2.INTER_NEAREST,
201 |         cv2.INTER_LANCZOS4,
202 |     ]
203 |     interp_method = interp_methods[random.randrange(5)]
204 |     image = cv2.resize(image, (insize, insize), interpolation=interp_method)
205 |     image = image.astype(np.float32)
206 |     image -= rgb_mean
207 |     return image.transpose(2, 0, 1)
208 | 
209 | 
210 | class preproc(object):
211 |     def __init__(self, img_dim, rgb_means):
212 |         self.img_dim = img_dim
213 |         self.rgb_means = rgb_means
214 | 
215 |     def __call__(self, image, targets):
216 |         assert targets.shape[0] > 0, "this image does not have gt"
217 | 
218 |         boxes = targets[:, :4].copy()
219 |         labels = targets[:, -1].copy()
220 |         landm = targets[:, 4:-1].copy()
221 | 
222 |         image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(
223 |             image, boxes, labels, landm, self.img_dim
224 |         )
225 |         image_t = _distort(image_t)
226 |         image_t = _pad_to_square(image_t, self.rgb_means, pad_image_flag)
227 |         image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
228 |         height, width, _ = image_t.shape
229 |         image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
230 |         boxes_t[:, 0::2] /= width
231 |         boxes_t[:, 1::2] /= height
232 | 
233 |         landm_t[:, 0::2] /= width
234 |         landm_t[:, 1::2] /= height
235 | 
236 |         labels_t = np.expand_dims(labels_t, 1)
237 |         targets_t = np.hstack((boxes_t, landm_t, labels_t))
238 | 
239 |         return image_t, targets_t
240 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/data/wider_face.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import sys
  4 | 
  5 | import cv2
  6 | import numpy as np
  7 | import torch
  8 | import torch.utils.data as data
  9 | 
 10 | 
 11 | class WiderFaceDetection(data.Dataset):
 12 |     def __init__(self, txt_path, preproc=None):
 13 |         self.preproc = preproc
 14 |         self.imgs_path = []
 15 |         self.words = []
 16 |         f = open(txt_path, "r")
 17 |         lines = f.readlines()
 18 |         isFirst = True
 19 |         labels = []
 20 |         for line in lines:
 21 |             line = line.rstrip()
 22 |             if line.startswith("#"):
 23 |                 if isFirst is True:
 24 |                     isFirst = False
 25 |                 else:
 26 |                     labels_copy = labels.copy()
 27 |                     self.words.append(labels_copy)
 28 |                     labels.clear()
 29 |                 path = line[2:]
 30 |                 path = txt_path.replace("label.txt", "images/") + path
 31 |                 self.imgs_path.append(path)
 32 |             else:
 33 |                 line = line.split(" ")
 34 |                 label = [float(x) for x in line]
 35 |                 labels.append(label)
 36 | 
 37 |         self.words.append(labels)
 38 | 
 39 |     def __len__(self):
 40 |         return len(self.imgs_path)
 41 | 
 42 |     def __getitem__(self, index):
 43 |         img = cv2.imread(self.imgs_path[index])
 44 |         height, width, _ = img.shape
 45 | 
 46 |         labels = self.words[index]
 47 |         annotations = np.zeros((0, 15))
 48 |         if len(labels) == 0:
 49 |             return annotations
 50 |         for idx, label in enumerate(labels):
 51 |             annotation = np.zeros((1, 15))
 52 |             # bbox
 53 |             annotation[0, 0] = label[0]  # x1
 54 |             annotation[0, 1] = label[1]  # y1
 55 |             annotation[0, 2] = label[0] + label[2]  # x2
 56 |             annotation[0, 3] = label[1] + label[3]  # y2
 57 | 
 58 |             # landmarks
 59 |             annotation[0, 4] = label[4]  # l0_x
 60 |             annotation[0, 5] = label[5]  # l0_y
 61 |             annotation[0, 6] = label[7]  # l1_x
 62 |             annotation[0, 7] = label[8]  # l1_y
 63 |             annotation[0, 8] = label[10]  # l2_x
 64 |             annotation[0, 9] = label[11]  # l2_y
 65 |             annotation[0, 10] = label[13]  # l3_x
 66 |             annotation[0, 11] = label[14]  # l3_y
 67 |             annotation[0, 12] = label[16]  # l4_x
 68 |             annotation[0, 13] = label[17]  # l4_y
 69 |             if annotation[0, 4] < 0:
 70 |                 annotation[0, 14] = -1
 71 |             else:
 72 |                 annotation[0, 14] = 1
 73 | 
 74 |             annotations = np.append(annotations, annotation, axis=0)
 75 |         target = np.array(annotations)
 76 |         if self.preproc is not None:
 77 |             img, target = self.preproc(img, target)
 78 | 
 79 |         return torch.from_numpy(img), target
 80 | 
 81 | 
 82 | def detection_collate(batch):
 83 |     """Custom collate fn for dealing with batches of images that have a different
 84 |     number of associated object annotations (bounding boxes).
 85 | 
 86 |     Arguments:
 87 |         batch: (tuple) A tuple of tensor images and lists of annotations
 88 | 
 89 |     Return:
 90 |         A tuple containing:
 91 |             1) (tensor) batch of images stacked on their 0 dim
 92 |             2) (list of tensors) annotations for a given image are stacked on 0 dim
 93 |     """
 94 |     targets = []
 95 |     imgs = []
 96 |     for _, sample in enumerate(batch):
 97 |         for _, tup in enumerate(sample):
 98 |             if torch.is_tensor(tup):
 99 |                 imgs.append(tup)
100 |             elif isinstance(tup, type(np.empty(0))):
101 |                 annos = torch.from_numpy(tup).float()
102 |                 targets.append(annos)
103 | 
104 |     return (torch.stack(imgs, 0), targets)
105 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/detect.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import argparse
  4 | import os
  5 | import time
  6 | 
  7 | import cv2
  8 | import numpy as np
  9 | import torch
 10 | import torch.backends.cudnn as cudnn
 11 | from data import cfg_mnet, cfg_re50
 12 | from layers.functions.prior_box import PriorBox
 13 | from models.retinaface import RetinaFace
 14 | from utils.box_utils import decode, decode_landm
 15 | from utils.nms.py_cpu_nms import py_cpu_nms
 16 | 
 17 | parser = argparse.ArgumentParser(description="Retinaface")
 18 | 
 19 | parser.add_argument(
 20 |     "-m",
 21 |     "--trained_model",
 22 |     default="./weights/Resnet50_Final.pth",
 23 |     type=str,
 24 |     help="Trained state_dict file path to open",
 25 | )
 26 | parser.add_argument(
 27 |     "--network", default="resnet50", help="Backbone network mobile0.25 or resnet50"
 28 | )
 29 | parser.add_argument(
 30 |     "--cpu", action="store_true", default=False, help="Use cpu inference"
 31 | )
 32 | parser.add_argument(
 33 |     "--confidence_threshold", default=0.02, type=float, help="confidence_threshold"
 34 | )
 35 | parser.add_argument("--top_k", default=5000, type=int, help="top_k")
 36 | parser.add_argument("--nms_threshold", default=0.4, type=float, help="nms_threshold")
 37 | parser.add_argument("--keep_top_k", default=750, type=int, help="keep_top_k")
 38 | parser.add_argument(
 39 |     "-s",
 40 |     "--save_image",
 41 |     action="store_true",
 42 |     default=True,
 43 |     help="show detection results",
 44 | )
 45 | parser.add_argument(
 46 |     "--vis_thres", default=0.6, type=float, help="visualization_threshold"
 47 | )
 48 | args = parser.parse_args()
 49 | 
 50 | 
 51 | def check_keys(model, pretrained_state_dict):
 52 |     ckpt_keys = set(pretrained_state_dict.keys())
 53 |     model_keys = set(model.state_dict().keys())
 54 |     used_pretrained_keys = model_keys & ckpt_keys
 55 |     unused_pretrained_keys = ckpt_keys - model_keys
 56 |     missing_keys = model_keys - ckpt_keys
 57 |     print("Missing keys:{}".format(len(missing_keys)))
 58 |     print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys)))
 59 |     print("Used keys:{}".format(len(used_pretrained_keys)))
 60 |     assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint"
 61 |     return True
 62 | 
 63 | 
 64 | def remove_prefix(state_dict, prefix):
 65 |     """Old style model is stored with all names of parameters sharing common prefix 'module.'"""
 66 |     print("remove prefix '{}'".format(prefix))
 67 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
 68 |     return {f(key): value for key, value in state_dict.items()}
 69 | 
 70 | 
 71 | def load_model(model, pretrained_path, load_to_cpu):
 72 |     print("Loading pretrained model from {}".format(pretrained_path))
 73 |     if load_to_cpu:
 74 |         pretrained_dict = torch.load(
 75 |             pretrained_path, map_location=lambda storage, loc: storage
 76 |         )
 77 |     else:
 78 |         device = torch.cuda.current_device()
 79 |         pretrained_dict = torch.load(
 80 |             pretrained_path, map_location=lambda storage, loc: storage.cuda(device)
 81 |         )
 82 |     if "state_dict" in pretrained_dict.keys():
 83 |         pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.")
 84 |     else:
 85 |         pretrained_dict = remove_prefix(pretrained_dict, "module.")
 86 |     check_keys(model, pretrained_dict)
 87 |     model.load_state_dict(pretrained_dict, strict=False)
 88 |     return model
 89 | 
 90 | 
 91 | if __name__ == "__main__":
 92 |     torch.set_grad_enabled(False)
 93 |     cfg = None
 94 |     if args.network == "mobile0.25":
 95 |         cfg = cfg_mnet
 96 |     elif args.network == "resnet50":
 97 |         cfg = cfg_re50
 98 |     # net and model
 99 |     net = RetinaFace(cfg=cfg, phase="test")
100 |     net = load_model(net, args.trained_model, args.cpu)
101 |     net.eval()
102 |     print("Finished loading model!")
103 |     print(net)
104 |     cudnn.benchmark = True
105 |     device = torch.device("cpu" if args.cpu else "cuda")
106 |     net = net.to(device)
107 | 
108 |     resize = 1
109 | 
110 |     # testing begin
111 |     for i in range(10):
112 |         image_path = "./curve/test.jpg"
113 |         img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
114 | 
115 |         img = np.float32(img_raw)
116 | 
117 |         im_height, im_width, _ = img.shape
118 |         scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
119 |         img -= (104, 117, 123)
120 |         img = img.transpose(2, 0, 1)
121 |         img = torch.from_numpy(img).unsqueeze(0)
122 |         img = img.to(device)
123 |         scale = scale.to(device)
124 | 
125 |         tic = time.time()
126 |         loc, conf, landms = net(img)  # forward pass
127 |         print("net forward time: {:.4f}".format(time.time() - tic))
128 | 
129 |         priorbox = PriorBox(cfg, image_size=(im_height, im_width))
130 |         priors = priorbox.forward()
131 |         priors = priors.to(device)
132 |         prior_data = priors.data
133 |         boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"])
134 |         boxes = boxes * scale / resize
135 |         boxes = boxes.cpu().numpy()
136 |         scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
137 |         landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"])
138 |         scale1 = torch.Tensor(
139 |             [
140 |                 img.shape[3],
141 |                 img.shape[2],
142 |                 img.shape[3],
143 |                 img.shape[2],
144 |                 img.shape[3],
145 |                 img.shape[2],
146 |                 img.shape[3],
147 |                 img.shape[2],
148 |                 img.shape[3],
149 |                 img.shape[2],
150 |             ]
151 |         )
152 |         scale1 = scale1.to(device)
153 |         landms = landms * scale1 / resize
154 |         landms = landms.cpu().numpy()
155 | 
156 |         # ignore low scores
157 |         inds = np.where(scores > args.confidence_threshold)[0]
158 |         boxes = boxes[inds]
159 |         landms = landms[inds]
160 |         scores = scores[inds]
161 | 
162 |         # keep top-K before NMS
163 |         order = scores.argsort()[::-1][: args.top_k]
164 |         boxes = boxes[order]
165 |         landms = landms[order]
166 |         scores = scores[order]
167 | 
168 |         # do NMS
169 |         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
170 |         keep = py_cpu_nms(dets, args.nms_threshold)
171 |         # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
172 |         dets = dets[keep, :]
173 |         landms = landms[keep]
174 | 
175 |         # keep top-K faster NMS
176 |         dets = dets[: args.keep_top_k, :]
177 |         landms = landms[: args.keep_top_k, :]
178 | 
179 |         dets = np.concatenate((dets, landms), axis=1)
180 | 
181 |         # show image
182 |         if args.save_image:
183 |             for b in dets:
184 |                 if b[4] < args.vis_thres:
185 |                     continue
186 |                 text = "{:.4f}".format(b[4])
187 |                 b = list(map(int, b))
188 |                 cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
189 |                 cx = b[0]
190 |                 cy = b[1] + 12
191 |                 cv2.putText(
192 |                     img_raw,
193 |                     text,
194 |                     (cx, cy),
195 |                     cv2.FONT_HERSHEY_DUPLEX,
196 |                     0.5,
197 |                     (255, 255, 255),
198 |                 )
199 | 
200 |                 # landms
201 |                 cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
202 |                 cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
203 |                 cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
204 |                 cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
205 |                 cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
206 |             # save image
207 | 
208 |             name = "test.jpg"
209 |             cv2.imwrite(name, img_raw)
210 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | from itertools import product as product
 2 | from math import ceil
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | 
 8 | class PriorBox(object):
 9 |     def __init__(self, cfg, image_size=None, phase="train"):
10 |         super(PriorBox, self).__init__()
11 |         self.min_sizes = cfg["min_sizes"]
12 |         self.steps = cfg["steps"]
13 |         self.clip = cfg["clip"]
14 |         self.image_size = image_size
15 |         self.feature_maps = [
16 |             [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)]
17 |             for step in self.steps
18 |         ]
19 |         self.name = "s"
20 | 
21 |     def forward(self):
22 |         anchors = []
23 |         for k, f in enumerate(self.feature_maps):
24 |             min_sizes = self.min_sizes[k]
25 |             for i, j in product(range(f[0]), range(f[1])):
26 |                 for min_size in min_sizes:
27 |                     s_kx = min_size / self.image_size[1]
28 |                     s_ky = min_size / self.image_size[0]
29 |                     dense_cx = [
30 |                         x * self.steps[k] / self.image_size[1] for x in [j + 0.5]
31 |                     ]
32 |                     dense_cy = [
33 |                         y * self.steps[k] / self.image_size[0] for y in [i + 0.5]
34 |                     ]
35 |                     for cy, cx in product(dense_cy, dense_cx):
36 |                         anchors += [cx, cy, s_kx, s_ky]
37 | 
38 |         # back to torch land
39 |         output = torch.Tensor(anchors).view(-1, 4)
40 |         if self.clip:
41 |             output.clamp_(max=1, min=0)
42 |         return output
43 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .multibox_loss import MultiBoxLoss
2 | 
3 | __all__ = ["MultiBoxLoss"]
4 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from data import cfg_mnet
  5 | from torch.autograd import Variable
  6 | from utils.box_utils import log_sum_exp, match
  7 | 
  8 | GPU = cfg_mnet["gpu_train"]
  9 | 
 10 | 
 11 | class MultiBoxLoss(nn.Module):
 12 |     """SSD Weighted Loss Function
 13 |     Compute Targets:
 14 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 15 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 16 |            (default threshold: 0.5).
 17 |         2) Produce localization target by 'encoding' variance into offsets of ground
 18 |            truth boxes and their matched  'priorboxes'.
 19 |         3) Hard negative mining to filter the excessive number of negative examples
 20 |            that comes with using a large number of default bounding boxes.
 21 |            (default negative:positive ratio 3:1)
 22 |     Objective Loss:
 23 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 24 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 25 |         weighted by α which is set to 1 by cross val.
 26 |         Args:
 27 |             c: class confidences,
 28 |             l: predicted boxes,
 29 |             g: ground truth boxes
 30 |             N: number of matched default boxes
 31 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 32 |     """
 33 | 
 34 |     def __init__(
 35 |         self,
 36 |         num_classes,
 37 |         overlap_thresh,
 38 |         prior_for_matching,
 39 |         bkg_label,
 40 |         neg_mining,
 41 |         neg_pos,
 42 |         neg_overlap,
 43 |         encode_target,
 44 |     ):
 45 |         super(MultiBoxLoss, self).__init__()
 46 |         self.num_classes = num_classes
 47 |         self.threshold = overlap_thresh
 48 |         self.background_label = bkg_label
 49 |         self.encode_target = encode_target
 50 |         self.use_prior_for_matching = prior_for_matching
 51 |         self.do_neg_mining = neg_mining
 52 |         self.negpos_ratio = neg_pos
 53 |         self.neg_overlap = neg_overlap
 54 |         self.variance = [0.1, 0.2]
 55 | 
 56 |     def forward(self, predictions, priors, targets):
 57 |         """Multibox Loss
 58 |         Args:
 59 |             predictions (tuple): A tuple containing loc preds, conf preds,
 60 |             and prior boxes from SSD net.
 61 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 62 |                 loc shape: torch.size(batch_size,num_priors,4)
 63 |                 priors shape: torch.size(num_priors,4)
 64 | 
 65 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 66 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 67 |         """
 68 | 
 69 |         loc_data, conf_data, landm_data = predictions
 70 |         priors = priors
 71 |         num = loc_data.size(0)
 72 |         num_priors = priors.size(0)
 73 | 
 74 |         # match priors (default boxes) and ground truth boxes
 75 |         loc_t = torch.Tensor(num, num_priors, 4)
 76 |         landm_t = torch.Tensor(num, num_priors, 10)
 77 |         conf_t = torch.LongTensor(num, num_priors)
 78 |         for idx in range(num):
 79 |             truths = targets[idx][:, :4].data
 80 |             labels = targets[idx][:, -1].data
 81 |             landms = targets[idx][:, 4:14].data
 82 |             defaults = priors.data
 83 |             match(
 84 |                 self.threshold,
 85 |                 truths,
 86 |                 defaults,
 87 |                 self.variance,
 88 |                 labels,
 89 |                 landms,
 90 |                 loc_t,
 91 |                 conf_t,
 92 |                 landm_t,
 93 |                 idx,
 94 |             )
 95 |         if GPU:
 96 |             loc_t = loc_t.cuda()
 97 |             conf_t = conf_t.cuda()
 98 |             landm_t = landm_t.cuda()
 99 | 
100 |         zeros = torch.tensor(0).cuda()
101 |         # landm Loss (Smooth L1)
102 |         # Shape: [batch,num_priors,10]
103 |         pos1 = conf_t > zeros
104 |         num_pos_landm = pos1.long().sum(1, keepdim=True)
105 |         N1 = max(num_pos_landm.data.sum().float(), 1)
106 |         pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
107 |         landm_p = landm_data[pos_idx1].view(-1, 10)
108 |         landm_t = landm_t[pos_idx1].view(-1, 10)
109 |         loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction="sum")
110 | 
111 |         pos = conf_t != zeros
112 |         conf_t[pos] = 1
113 | 
114 |         # Localization Loss (Smooth L1)
115 |         # Shape: [batch,num_priors,4]
116 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
117 |         loc_p = loc_data[pos_idx].view(-1, 4)
118 |         loc_t = loc_t[pos_idx].view(-1, 4)
119 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction="sum")
120 | 
121 |         # Compute max conf across batch for hard negative mining
122 |         batch_conf = conf_data.view(-1, self.num_classes)
123 |         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
124 | 
125 |         # Hard Negative Mining
126 |         loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
127 |         loss_c = loss_c.view(num, -1)
128 |         _, loss_idx = loss_c.sort(1, descending=True)
129 |         _, idx_rank = loss_idx.sort(1)
130 |         num_pos = pos.long().sum(1, keepdim=True)
131 |         num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
132 |         neg = idx_rank < num_neg.expand_as(idx_rank)
133 | 
134 |         # Confidence Loss Including Positive and Negative Examples
135 |         pos_idx = pos.unsqueeze(2).expand_as(conf_data)
136 |         neg_idx = neg.unsqueeze(2).expand_as(conf_data)
137 |         conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes)
138 |         targets_weighted = conf_t[(pos + neg).gt(0)]
139 |         loss_c = F.cross_entropy(conf_p, targets_weighted, reduction="sum")
140 | 
141 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
142 |         N = max(num_pos.data.sum().float(), 1)
143 |         loss_l /= N
144 |         loss_c /= N
145 |         loss_landm /= N1
146 | 
147 |         return loss_l, loss_c, loss_landm
148 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/models/__init__.py


--------------------------------------------------------------------------------
/face_detection/retinaface/models/net.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torchvision.models as models
  7 | import torchvision.models._utils as _utils
  8 | from torch.autograd import Variable
  9 | 
 10 | 
 11 | def conv_bn(inp, oup, stride=1, leaky=0):
 12 |     return nn.Sequential(
 13 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 14 |         nn.BatchNorm2d(oup),
 15 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 16 |     )
 17 | 
 18 | 
 19 | def conv_bn_no_relu(inp, oup, stride):
 20 |     return nn.Sequential(
 21 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 22 |         nn.BatchNorm2d(oup),
 23 |     )
 24 | 
 25 | 
 26 | def conv_bn1X1(inp, oup, stride, leaky=0):
 27 |     return nn.Sequential(
 28 |         nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
 29 |         nn.BatchNorm2d(oup),
 30 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 31 |     )
 32 | 
 33 | 
 34 | def conv_dw(inp, oup, stride, leaky=0.1):
 35 |     return nn.Sequential(
 36 |         nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
 37 |         nn.BatchNorm2d(inp),
 38 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 39 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 40 |         nn.BatchNorm2d(oup),
 41 |         nn.LeakyReLU(negative_slope=leaky, inplace=True),
 42 |     )
 43 | 
 44 | 
 45 | class SSH(nn.Module):
 46 |     def __init__(self, in_channel, out_channel):
 47 |         super(SSH, self).__init__()
 48 |         assert out_channel % 4 == 0
 49 |         leaky = 0
 50 |         if out_channel <= 64:
 51 |             leaky = 0.1
 52 |         self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1)
 53 | 
 54 |         self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky)
 55 |         self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
 56 | 
 57 |         self.conv7X7_2 = conv_bn(
 58 |             out_channel // 4, out_channel // 4, stride=1, leaky=leaky
 59 |         )
 60 |         self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1)
 61 | 
 62 |     def forward(self, input):
 63 |         conv3X3 = self.conv3X3(input)
 64 | 
 65 |         conv5X5_1 = self.conv5X5_1(input)
 66 |         conv5X5 = self.conv5X5_2(conv5X5_1)
 67 | 
 68 |         conv7X7_2 = self.conv7X7_2(conv5X5_1)
 69 |         conv7X7 = self.conv7x7_3(conv7X7_2)
 70 | 
 71 |         out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
 72 |         out = F.relu(out)
 73 |         return out
 74 | 
 75 | 
 76 | class FPN(nn.Module):
 77 |     def __init__(self, in_channels_list, out_channels):
 78 |         super(FPN, self).__init__()
 79 |         leaky = 0
 80 |         if out_channels <= 64:
 81 |             leaky = 0.1
 82 |         self.output1 = conv_bn1X1(
 83 |             in_channels_list[0], out_channels, stride=1, leaky=leaky
 84 |         )
 85 |         self.output2 = conv_bn1X1(
 86 |             in_channels_list[1], out_channels, stride=1, leaky=leaky
 87 |         )
 88 |         self.output3 = conv_bn1X1(
 89 |             in_channels_list[2], out_channels, stride=1, leaky=leaky
 90 |         )
 91 | 
 92 |         self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky)
 93 |         self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky)
 94 | 
 95 |     def forward(self, input):
 96 |         # names = list(input.keys())
 97 |         input = list(input.values())
 98 | 
 99 |         output1 = self.output1(input[0])
100 |         output2 = self.output2(input[1])
101 |         output3 = self.output3(input[2])
102 | 
103 |         up3 = F.interpolate(
104 |             output3, size=[output2.size(2), output2.size(3)], mode="nearest"
105 |         )
106 |         output2 = output2 + up3
107 |         output2 = self.merge2(output2)
108 | 
109 |         up2 = F.interpolate(
110 |             output2, size=[output1.size(2), output1.size(3)], mode="nearest"
111 |         )
112 |         output1 = output1 + up2
113 |         output1 = self.merge1(output1)
114 | 
115 |         out = [output1, output2, output3]
116 |         return out
117 | 
118 | 
119 | class MobileNetV1(nn.Module):
120 |     def __init__(self):
121 |         super(MobileNetV1, self).__init__()
122 |         self.stage1 = nn.Sequential(
123 |             conv_bn(3, 8, 2, leaky=0.1),  # 3
124 |             conv_dw(8, 16, 1),  # 7
125 |             conv_dw(16, 32, 2),  # 11
126 |             conv_dw(32, 32, 1),  # 19
127 |             conv_dw(32, 64, 2),  # 27
128 |             conv_dw(64, 64, 1),  # 43
129 |         )
130 |         self.stage2 = nn.Sequential(
131 |             conv_dw(64, 128, 2),  # 43 + 16 = 59
132 |             conv_dw(128, 128, 1),  # 59 + 32 = 91
133 |             conv_dw(128, 128, 1),  # 91 + 32 = 123
134 |             conv_dw(128, 128, 1),  # 123 + 32 = 155
135 |             conv_dw(128, 128, 1),  # 155 + 32 = 187
136 |             conv_dw(128, 128, 1),  # 187 + 32 = 219
137 |         )
138 |         self.stage3 = nn.Sequential(
139 |             conv_dw(128, 256, 2),  # 219 +3 2 = 241
140 |             conv_dw(256, 256, 1),  # 241 + 64 = 301
141 |         )
142 |         self.avg = nn.AdaptiveAvgPool2d((1, 1))
143 |         self.fc = nn.Linear(256, 1000)
144 | 
145 |     def forward(self, x):
146 |         x = self.stage1(x)
147 |         x = self.stage2(x)
148 |         x = self.stage3(x)
149 |         x = self.avg(x)
150 |         # x = self.model(x)
151 |         x = x.view(-1, 256)
152 |         x = self.fc(x)
153 |         return x
154 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/models/retinaface.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import torchvision.models._utils as _utils
  7 | import torchvision.models.detection.backbone_utils as backbone_utils
  8 | from models.net import FPN as FPN
  9 | from models.net import SSH as SSH
 10 | from models.net import MobileNetV1 as MobileNetV1
 11 | 
 12 | 
 13 | class ClassHead(nn.Module):
 14 |     def __init__(self, inchannels=512, num_anchors=3):
 15 |         super(ClassHead, self).__init__()
 16 |         self.num_anchors = num_anchors
 17 |         self.conv1x1 = nn.Conv2d(
 18 |             inchannels, self.num_anchors * 2, kernel_size=(1, 1), stride=1, padding=0
 19 |         )
 20 | 
 21 |     def forward(self, x):
 22 |         out = self.conv1x1(x)
 23 |         out = out.permute(0, 2, 3, 1).contiguous()
 24 | 
 25 |         return out.view(out.shape[0], -1, 2)
 26 | 
 27 | 
 28 | class BboxHead(nn.Module):
 29 |     def __init__(self, inchannels=512, num_anchors=3):
 30 |         super(BboxHead, self).__init__()
 31 |         self.conv1x1 = nn.Conv2d(
 32 |             inchannels, num_anchors * 4, kernel_size=(1, 1), stride=1, padding=0
 33 |         )
 34 | 
 35 |     def forward(self, x):
 36 |         out = self.conv1x1(x)
 37 |         out = out.permute(0, 2, 3, 1).contiguous()
 38 | 
 39 |         return out.view(out.shape[0], -1, 4)
 40 | 
 41 | 
 42 | class LandmarkHead(nn.Module):
 43 |     def __init__(self, inchannels=512, num_anchors=3):
 44 |         super(LandmarkHead, self).__init__()
 45 |         self.conv1x1 = nn.Conv2d(
 46 |             inchannels, num_anchors * 10, kernel_size=(1, 1), stride=1, padding=0
 47 |         )
 48 | 
 49 |     def forward(self, x):
 50 |         out = self.conv1x1(x)
 51 |         out = out.permute(0, 2, 3, 1).contiguous()
 52 | 
 53 |         return out.view(out.shape[0], -1, 10)
 54 | 
 55 | 
 56 | class RetinaFace(nn.Module):
 57 |     def __init__(self, cfg=None, phase="train"):
 58 |         """
 59 |         :param cfg:  Network related settings.
 60 |         :param phase: train or test.
 61 |         """
 62 |         super(RetinaFace, self).__init__()
 63 |         self.phase = phase
 64 |         backbone = None
 65 |         if cfg["name"] == "mobilenet0.25":
 66 |             backbone = MobileNetV1()
 67 |             if cfg["pretrain"]:
 68 |                 checkpoint = torch.load(
 69 |                     "./weights/mobilenetV1X0.25_pretrain.tar",
 70 |                     map_location=torch.device("cpu"),
 71 |                 )
 72 |                 from collections import OrderedDict
 73 | 
 74 |                 new_state_dict = OrderedDict()
 75 |                 for k, v in checkpoint["state_dict"].items():
 76 |                     name = k[7:]  # remove module.
 77 |                     new_state_dict[name] = v
 78 |                 # load params
 79 |                 backbone.load_state_dict(new_state_dict)
 80 |         elif cfg["name"] == "Resnet50":
 81 |             import torchvision.models as models
 82 | 
 83 |             backbone = models.resnet50(pretrained=cfg["pretrain"])
 84 | 
 85 |         self.body = _utils.IntermediateLayerGetter(backbone, cfg["return_layers"])
 86 |         in_channels_stage2 = cfg["in_channel"]
 87 |         in_channels_list = [
 88 |             in_channels_stage2 * 2,
 89 |             in_channels_stage2 * 4,
 90 |             in_channels_stage2 * 8,
 91 |         ]
 92 |         out_channels = cfg["out_channel"]
 93 |         self.fpn = FPN(in_channels_list, out_channels)
 94 |         self.ssh1 = SSH(out_channels, out_channels)
 95 |         self.ssh2 = SSH(out_channels, out_channels)
 96 |         self.ssh3 = SSH(out_channels, out_channels)
 97 | 
 98 |         self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg["out_channel"])
 99 |         self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg["out_channel"])
100 |         self.LandmarkHead = self._make_landmark_head(
101 |             fpn_num=3, inchannels=cfg["out_channel"]
102 |         )
103 | 
104 |     def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2):
105 |         classhead = nn.ModuleList()
106 |         for i in range(fpn_num):
107 |             classhead.append(ClassHead(inchannels, anchor_num))
108 |         return classhead
109 | 
110 |     def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2):
111 |         bboxhead = nn.ModuleList()
112 |         for i in range(fpn_num):
113 |             bboxhead.append(BboxHead(inchannels, anchor_num))
114 |         return bboxhead
115 | 
116 |     def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2):
117 |         landmarkhead = nn.ModuleList()
118 |         for i in range(fpn_num):
119 |             landmarkhead.append(LandmarkHead(inchannels, anchor_num))
120 |         return landmarkhead
121 | 
122 |     def forward(self, inputs):
123 |         out = self.body(inputs)
124 | 
125 |         # FPN
126 |         fpn = self.fpn(out)
127 | 
128 |         # SSH
129 |         feature1 = self.ssh1(fpn[0])
130 |         feature2 = self.ssh2(fpn[1])
131 |         feature3 = self.ssh3(fpn[2])
132 |         features = [feature1, feature2, feature3]
133 | 
134 |         bbox_regressions = torch.cat(
135 |             [self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1
136 |         )
137 |         classifications = torch.cat(
138 |             [self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1
139 |         )
140 |         ldm_regressions = torch.cat(
141 |             [self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1
142 |         )
143 | 
144 |         if self.phase == "train":
145 |             output = (bbox_regressions, classifications, ldm_regressions)
146 |         else:
147 |             output = (
148 |                 bbox_regressions,
149 |                 F.softmax(classifications, dim=-1),
150 |                 ldm_regressions,
151 |             )
152 |         return output
153 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/utils/__init__.py


--------------------------------------------------------------------------------
/face_detection/retinaface/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/utils/nms/__init__.py


--------------------------------------------------------------------------------
/face_detection/retinaface/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | 
11 | def py_cpu_nms(dets, thresh):
12 |     """Pure Python NMS baseline."""
13 |     x1 = dets[:, 0]
14 |     y1 = dets[:, 1]
15 |     x2 = dets[:, 2]
16 |     y2 = dets[:, 3]
17 |     scores = dets[:, 4]
18 | 
19 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
20 |     order = scores.argsort()[::-1]
21 | 
22 |     keep = []
23 |     while order.size > 0:
24 |         i = order[0]
25 |         keep.append(i)
26 |         xx1 = np.maximum(x1[i], x1[order[1:]])
27 |         yy1 = np.maximum(y1[i], y1[order[1:]])
28 |         xx2 = np.minimum(x2[i], x2[order[1:]])
29 |         yy2 = np.minimum(y2[i], y2[order[1:]])
30 | 
31 |         w = np.maximum(0.0, xx2 - xx1 + 1)
32 |         h = np.maximum(0.0, yy2 - yy1 + 1)
33 |         inter = w * h
34 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
35 | 
36 |         inds = np.where(ovr <= thresh)[0]
37 |         order = order[inds + 1]
38 | 
39 |     return keep
40 | 


--------------------------------------------------------------------------------
/face_detection/retinaface/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 | 
14 |     def __init__(self):
15 |         self.total_time = 0.0
16 |         self.calls = 0
17 |         self.start_time = 0.0
18 |         self.diff = 0.0
19 |         self.average_time = 0.0
20 | 
21 |     def tic(self):
22 |         # using time.time instead of time.clock because time time.clock
23 |         # does not normalize for multithreading
24 |         self.start_time = time.time()
25 | 
26 |     def toc(self, average=True):
27 |         self.diff = time.time() - self.start_time
28 |         self.total_time += self.diff
29 |         self.calls += 1
30 |         self.average_time = self.total_time / self.calls
31 |         if average:
32 |             return self.average_time
33 |         else:
34 |             return self.diff
35 | 
36 |     def clear(self):
37 |         self.total_time = 0.0
38 |         self.calls = 0
39 |         self.start_time = 0.0
40 |         self.diff = 0.0
41 |         self.average_time = 0.0
42 | 


--------------------------------------------------------------------------------
/face_detection/scrfd/weights/README.md:
--------------------------------------------------------------------------------
1 | ## Download Weights:
2 | 
3 | - https://drive.google.com/drive/folders/1C9RzReAihJQRl8EJOX6vQj7qbHBPmzME?usp=sharing
4 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/README.md:
--------------------------------------------------------------------------------
1 | # Reference
2 | 
3 | https://github.com/deepcam-cn/yolov5-face
4 | 
5 | [Pretrained] - Google Drive: https://drive.google.com/drive/folders/1UMG4hBor8CFipYm7y71_iTigHjZ4AkaH?usp=sharing
6 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/detector.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | import torch
  7 | 
  8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  9 | sys.path.append(BASE_DIR)
 10 | 
 11 | from models.experimental import attempt_load
 12 | from utils.datasets import letterbox
 13 | from utils.general import check_img_size, non_max_suppression_face, scale_coords
 14 | 
 15 | 
 16 | class Yolov5Face(object):
 17 |     def __init__(self, model_file=None):
 18 |         """
 19 |         Initialize the Detector class.
 20 | 
 21 |         :param model_path: Path to the YOLOv5 model file (default is yolov5n-0.5.pt)
 22 |         """
 23 |         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 24 | 
 25 |         self.device = device
 26 |         self.model = attempt_load(model_file, map_location=device)
 27 | 
 28 |         # Parameters
 29 |         self.size_convert = 128  # Size for image conversion
 30 |         self.conf_thres = 0.4  # Confidence threshold
 31 |         self.iou_thres = 0.5  # Intersection over Union threshold
 32 | 
 33 |     def resize_image(self, img0, img_size):
 34 |         """
 35 |         Resize the input image.
 36 | 
 37 |         :param img0: The input image to be resized.
 38 |         :param img_size: The desired size for the image.
 39 | 
 40 |         :return: The resized and preprocessed image.
 41 |         """
 42 |         h0, w0 = img0.shape[:2]  # Original height and width
 43 |         r = img_size / max(h0, w0)  # Resize image to img_size
 44 | 
 45 |         if r != 1:
 46 |             interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR
 47 |             img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp)
 48 | 
 49 |         imgsz = check_img_size(img_size, s=self.model.stride.max())  # Check img_size
 50 |         img = letterbox(img0, new_shape=imgsz)[0]
 51 | 
 52 |         img = img[:, :, ::-1].transpose(2, 0, 1).copy()  # BGR to RGB, to 3x416x416
 53 |         img = torch.from_numpy(img).to(self.device)
 54 |         img = img.float()  # uint8 to fp16/32
 55 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 56 | 
 57 |         return img
 58 | 
 59 |     def scale_coords_landmarks(self, img1_shape, coords, img0_shape, ratio_pad=None):
 60 |         """
 61 |         Rescale coordinates from img1_shape to img0_shape.
 62 | 
 63 |         :param img1_shape: Shape of the source image.
 64 |         :param coords: Coordinates to be rescaled.
 65 |         :param img0_shape: Shape of the target image.
 66 |         :param ratio_pad: Padding ratio.
 67 | 
 68 |         :return: Rescaled coordinates.
 69 |         """
 70 |         if ratio_pad is None:  # Calculate from img0_shape
 71 |             gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
 72 |             pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (
 73 |                 img1_shape[0] - img0_shape[0] * gain
 74 |             ) / 2
 75 |         else:
 76 |             gain = ratio_pad[0][0]
 77 |             pad = ratio_pad[1]
 78 | 
 79 |         coords[:, [0, 2, 4, 6, 8]] -= pad[0]  # x padding
 80 |         coords[:, [1, 3, 5, 7, 9]] -= pad[1]  # y padding
 81 |         coords[:, :10] /= gain
 82 |         coords[:, :10] = coords[:, :10].clamp(
 83 |             0, img0_shape[1]
 84 |         )  # Clamp x and y coordinates
 85 | 
 86 |         # Reshape the coordinates into the desired format
 87 |         coords = coords.reshape(-1, 5, 2)
 88 |         return coords
 89 | 
 90 |     def detect(self, image):
 91 |         """
 92 |         Perform face detection on the input image.
 93 | 
 94 |         :param input_image: The input image for face detection.
 95 | 
 96 |         :return: Detected bounding boxes and landmarks.
 97 |         """
 98 |         # Resize image
 99 |         img = self.resize_image(img0=image.copy(), img_size=self.size_convert)
100 | 
101 |         # Via yolov5-face
102 |         with torch.no_grad():
103 |             pred = self.model(img[None, :])[0]
104 | 
105 |         # Apply NMS
106 |         det = non_max_suppression_face(pred, self.conf_thres, self.iou_thres)[0]
107 |         bboxes = np.int32(
108 |             scale_coords(img.shape[1:], det[:, :5], image.shape).round().cpu().numpy()
109 |         )
110 | 
111 |         landmarks = np.int32(
112 |             self.scale_coords_landmarks(img.shape[1:], det[:, 5:15], image.shape)
113 |             .round()
114 |             .cpu()
115 |             .numpy()
116 |         )
117 | 
118 |         return bboxes, landmarks
119 | 
120 |     def detect_tracking(self, image):
121 |         """
122 |         Perform object tracking on the input image.
123 | 
124 |         :param input_image: The input image for object tracking.
125 | 
126 |         :return: Tracking results and image information.
127 |         """
128 |         height, width = image.shape[:2]
129 |         img_info = {"id": 0}
130 |         img_info["height"] = height
131 |         img_info["width"] = width
132 |         img_info["raw_img"] = image
133 | 
134 |         # Resize image
135 |         img = self.resize_image(img0=image.copy(), img_size=self.size_convert)
136 | 
137 |         # Via yolov5-face
138 |         with torch.no_grad():
139 |             pred = self.model(img[None, :])[0]
140 | 
141 |         scale = min(
142 |             img.shape[1] / float(image.shape[0]), img.shape[2] / float(image.shape[1])
143 |         )
144 | 
145 |         # Apply NMS
146 |         det = non_max_suppression_face(pred, self.conf_thres, self.iou_thres)[0]
147 | 
148 |         bboxes = scale_coords(img.shape[1:], det[:, :4], image.shape)
149 |         scores = det[:, 4:5]
150 |         outputs = torch.cat((bboxes, scores), dim=1)
151 |         outputs[:, :4] *= scale
152 | 
153 |         bboxes = np.int32(bboxes.round().cpu().numpy())
154 | 
155 |         landmarks = np.int32(
156 |             self.scale_coords_landmarks(img.shape[1:], det[:, 5:15], image.shape)
157 |             .round()
158 |             .cpu()
159 |             .numpy()
160 |         )
161 | 
162 |         return outputs, img_info, bboxes, landmarks
163 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/models/__init__.py


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/blazeface.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 1.0 # model depth multiple
 4 | width_multiple: 1.0 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [5, 6, 10, 13, 21, 26] # P3/8
 9 |   - [55, 72, 225, 304, 438, 553] # P4/16
10 | 
11 | # YOLOv5 backbone
12 | backbone:
13 |   # [from, number, module, args]
14 |   [
15 |     [-1, 1, Conv, [24, 3, 2]], # 0-P1/2
16 |     [-1, 2, BlazeBlock, [24]], # 1
17 |     [-1, 1, BlazeBlock, [48, None, 2]], # 2-P2/4
18 |     [-1, 2, BlazeBlock, [48]], # 3
19 |     [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 4-P3/8
20 |     [-1, 2, DoubleBlazeBlock, [96, 24]], # 5
21 |     [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 6-P4/16
22 |     [-1, 2, DoubleBlazeBlock, [96, 24]], # 7
23 |   ]
24 | 
25 | # YOLOv5 head
26 | head: [
27 |     [-1, 1, Conv, [64, 1, 1]], # 8 (P4/32-large)
28 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29 |     [[-1, 5], 1, Concat, [1]], # cat backbone P3
30 |     [-1, 1, Conv, [64, 1, 1]], # 11 (P3/8-medium)
31 | 
32 |     [[11, 8], 1, Detect, [nc, anchors]], # Detect(P3, P4)
33 |   ]
34 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/blazeface_fpn.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 1.0 # model depth multiple
 4 | width_multiple: 1.0 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [5, 6, 10, 13, 21, 26] # P3/8
 9 |   - [55, 72, 225, 304, 438, 553] # P4/16
10 | 
11 | # YOLOv5 backbone
12 | backbone:
13 |   # [from, number, module, args]
14 |   [
15 |     [-1, 1, Conv, [24, 3, 2]], # 0-P1/2
16 |     [-1, 2, BlazeBlock, [24]], # 1
17 |     [-1, 1, BlazeBlock, [48, None, 2]], # 2-P2/4
18 |     [-1, 2, BlazeBlock, [48]], # 3
19 |     [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 4-P3/8
20 |     [-1, 2, DoubleBlazeBlock, [96, 24]], # 5
21 |     [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 6-P4/16
22 |     [-1, 2, DoubleBlazeBlock, [96, 24]], # 7
23 |   ]
24 | 
25 | # YOLOv5 head
26 | head: [
27 |     [-1, 1, Conv, [48, 1, 1]], # 8
28 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29 |     [[-1, 5], 1, Concat, [1]], # cat backbone P3
30 |     [-1, 1, Conv, [48, 1, 1]], # 11 (P3/8-medium)
31 | 
32 |     [-1, 1, nn.MaxPool2d, [3, 2, 1]], # 12
33 |     [[-1, 7], 1, Concat, [1]], # cat backbone P3
34 |     [-1, 1, Conv, [48, 1, 1]], # 14 (P4/16-large)
35 | 
36 |     [[11, 14], 1, Detect, [nc, anchors]], # Detect(P3, P4)
37 |   ]
38 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/experimental.py:
--------------------------------------------------------------------------------
  1 | # This file contains experimental modules
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | from models.common import Conv, DWConv
  7 | from utils.google_utils import attempt_download
  8 | 
  9 | 
 10 | class CrossConv(nn.Module):
 11 |     # Cross Convolution Downsample
 12 |     def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
 13 |         # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
 14 |         super(CrossConv, self).__init__()
 15 |         c_ = int(c2 * e)  # hidden channels
 16 |         self.cv1 = Conv(c1, c_, (1, k), (1, s))
 17 |         self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
 18 |         self.add = shortcut and c1 == c2
 19 | 
 20 |     def forward(self, x):
 21 |         return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
 22 | 
 23 | 
 24 | class Sum(nn.Module):
 25 |     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
 26 |     def __init__(self, n, weight=False):  # n: number of inputs
 27 |         super(Sum, self).__init__()
 28 |         self.weight = weight  # apply weights boolean
 29 |         self.iter = range(n - 1)  # iter object
 30 |         if weight:
 31 |             self.w = nn.Parameter(
 32 |                 -torch.arange(1.0, n) / 2, requires_grad=True
 33 |             )  # layer weights
 34 | 
 35 |     def forward(self, x):
 36 |         y = x[0]  # no weight
 37 |         if self.weight:
 38 |             w = torch.sigmoid(self.w) * 2
 39 |             for i in self.iter:
 40 |                 y = y + x[i + 1] * w[i]
 41 |         else:
 42 |             for i in self.iter:
 43 |                 y = y + x[i + 1]
 44 |         return y
 45 | 
 46 | 
 47 | class GhostConv(nn.Module):
 48 |     # Ghost Convolution https://github.com/huawei-noah/ghostnet
 49 |     def __init__(
 50 |         self, c1, c2, k=1, s=1, g=1, act=True
 51 |     ):  # ch_in, ch_out, kernel, stride, groups
 52 |         super(GhostConv, self).__init__()
 53 |         c_ = c2 // 2  # hidden channels
 54 |         self.cv1 = Conv(c1, c_, k, s, None, g, act)
 55 |         self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
 56 | 
 57 |     def forward(self, x):
 58 |         y = self.cv1(x)
 59 |         return torch.cat([y, self.cv2(y)], 1)
 60 | 
 61 | 
 62 | class GhostBottleneck(nn.Module):
 63 |     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
 64 |     def __init__(self, c1, c2, k, s):
 65 |         super(GhostBottleneck, self).__init__()
 66 |         c_ = c2 // 2
 67 |         self.conv = nn.Sequential(
 68 |             GhostConv(c1, c_, 1, 1),  # pw
 69 |             DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
 70 |             GhostConv(c_, c2, 1, 1, act=False),
 71 |         )  # pw-linear
 72 |         self.shortcut = (
 73 |             nn.Sequential(
 74 |                 DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)
 75 |             )
 76 |             if s == 2
 77 |             else nn.Identity()
 78 |         )
 79 | 
 80 |     def forward(self, x):
 81 |         return self.conv(x) + self.shortcut(x)
 82 | 
 83 | 
 84 | class MixConv2d(nn.Module):
 85 |     # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
 86 |     def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
 87 |         super(MixConv2d, self).__init__()
 88 |         groups = len(k)
 89 |         if equal_ch:  # equal c_ per group
 90 |             i = torch.linspace(0, groups - 1e-6, c2).floor()  # c2 indices
 91 |             c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
 92 |         else:  # equal weight.numel() per group
 93 |             b = [c2] + [0] * groups
 94 |             a = np.eye(groups + 1, groups, k=-1)
 95 |             a -= np.roll(a, 1, axis=1)
 96 |             a *= np.array(k) ** 2
 97 |             a[0] = 1
 98 |             c_ = np.linalg.lstsq(a, b, rcond=None)[
 99 |                 0
100 |             ].round()  # solve for equal weight indices, ax = b
101 | 
102 |         self.m = nn.ModuleList(
103 |             [
104 |                 nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False)
105 |                 for g in range(groups)
106 |             ]
107 |         )
108 |         self.bn = nn.BatchNorm2d(c2)
109 |         self.act = nn.LeakyReLU(0.1, inplace=True)
110 | 
111 |     def forward(self, x):
112 |         return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
113 | 
114 | 
115 | class Ensemble(nn.ModuleList):
116 |     # Ensemble of models
117 |     def __init__(self):
118 |         super(Ensemble, self).__init__()
119 | 
120 |     def forward(self, x, augment=False):
121 |         y = []
122 |         for module in self:
123 |             y.append(module(x, augment)[0])
124 |         # y = torch.stack(y).max(0)[0]  # max ensemble
125 |         # y = torch.stack(y).mean(0)  # mean ensemble
126 |         y = torch.cat(y, 1)  # nms ensemble
127 |         return y, None  # inference, train output
128 | 
129 | 
130 | def attempt_load(weights, map_location=None):
131 |     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
132 |     model = Ensemble()
133 |     for w in weights if isinstance(weights, list) else [weights]:
134 |         attempt_download(w)
135 |         model.append(
136 |             torch.load(w, map_location=map_location)["model"].float().fuse().eval()
137 |         )  # load FP32 model
138 | 
139 |     # # Compatibility updates
140 |     # for m in model.modules():
141 |     #     if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
142 |     #         m.inplace = True  # pytorch 1.7.0 compatibility
143 |     #     elif type(m) is Conv:
144 |     #         m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
145 | 
146 |     if len(model) == 1:
147 |         return model[-1]  # return model
148 |     # else:
149 |     #     print("Ensemble created with %s\n" % weights)
150 |     #     for k in ["names", "stride"]:
151 |     #         setattr(model, k, getattr(model[-1], k))
152 |     #     return model  # return ensemble
153 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5l.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 1.0 # model depth multiple
 4 | width_multiple: 1.0 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [4, 5, 8, 10, 13, 16] # P3/8
 9 |   - [23, 29, 43, 55, 73, 105] # P4/16
10 |   - [146, 217, 231, 300, 335, 433] # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [
16 |     [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
17 |     [-1, 3, C3, [128]],
18 |     [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
19 |     [-1, 9, C3, [256]],
20 |     [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
21 |     [-1, 9, C3, [512]],
22 |     [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
23 |     [-1, 1, SPP, [1024, [3, 5, 7]]],
24 |     [-1, 3, C3, [1024, False]], # 8
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head: [
29 |     [-1, 1, Conv, [512, 1, 1]],
30 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
31 |     [[-1, 5], 1, Concat, [1]], # cat backbone P4
32 |     [-1, 3, C3, [512, False]], # 12
33 | 
34 |     [-1, 1, Conv, [256, 1, 1]],
35 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
36 |     [[-1, 3], 1, Concat, [1]], # cat backbone P3
37 |     [-1, 3, C3, [256, False]], # 16 (P3/8-small)
38 | 
39 |     [-1, 1, Conv, [256, 3, 2]],
40 |     [[-1, 13], 1, Concat, [1]], # cat head P4
41 |     [-1, 3, C3, [512, False]], # 19 (P4/16-medium)
42 | 
43 |     [-1, 1, Conv, [512, 3, 2]],
44 |     [[-1, 9], 1, Concat, [1]], # cat head P5
45 |     [-1, 3, C3, [1024, False]], # 22 (P5/32-large)
46 | 
47 |     [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5l6.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 1.0 # model depth multiple
 4 | width_multiple: 1.0 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [6, 7, 9, 11, 13, 16] # P3/8
 9 |   - [18, 23, 26, 33, 37, 47] # P4/16
10 |   - [54, 67, 77, 104, 112, 154] # P5/32
11 |   - [174, 238, 258, 355, 445, 568] # P6/64
12 | 
13 | # YOLOv5 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [
17 |     [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
18 |     [-1, 3, C3, [128]],
19 |     [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
20 |     [-1, 9, C3, [256]],
21 |     [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
22 |     [-1, 9, C3, [512]],
23 |     [-1, 1, Conv, [768, 3, 2]], # 6-P5/32
24 |     [-1, 3, C3, [768]],
25 |     [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64
26 |     [-1, 1, SPP, [1024, [3, 5, 7]]],
27 |     [-1, 3, C3, [1024, False]], # 10
28 |   ]
29 | 
30 | # YOLOv5 head
31 | head: [
32 |     [-1, 1, Conv, [768, 1, 1]],
33 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 |     [[-1, 7], 1, Concat, [1]], # cat backbone P5
35 |     [-1, 3, C3, [768, False]], # 14
36 | 
37 |     [-1, 1, Conv, [512, 1, 1]],
38 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39 |     [[-1, 5], 1, Concat, [1]], # cat backbone P4
40 |     [-1, 3, C3, [512, False]], # 18
41 | 
42 |     [-1, 1, Conv, [256, 1, 1]],
43 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
44 |     [[-1, 3], 1, Concat, [1]], # cat backbone P3
45 |     [-1, 3, C3, [256, False]], # 22 (P3/8-small)
46 | 
47 |     [-1, 1, Conv, [256, 3, 2]],
48 |     [[-1, 19], 1, Concat, [1]], # cat head P4
49 |     [-1, 3, C3, [512, False]], # 25 (P4/16-medium)
50 | 
51 |     [-1, 1, Conv, [512, 3, 2]],
52 |     [[-1, 15], 1, Concat, [1]], # cat head P5
53 |     [-1, 3, C3, [768, False]], # 28 (P5/32-large)
54 | 
55 |     [-1, 1, Conv, [768, 3, 2]],
56 |     [[-1, 11], 1, Concat, [1]], # cat head P6
57 |     [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge)
58 | 
59 |     [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 |   ]
61 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5m.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 0.67 # model depth multiple
 4 | width_multiple: 0.75 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [4, 5, 8, 10, 13, 16] # P3/8
 9 |   - [23, 29, 43, 55, 73, 105] # P4/16
10 |   - [146, 217, 231, 300, 335, 433] # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [
16 |     [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
17 |     [-1, 3, C3, [128]],
18 |     [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
19 |     [-1, 9, C3, [256]],
20 |     [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
21 |     [-1, 9, C3, [512]],
22 |     [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
23 |     [-1, 1, SPP, [1024, [3, 5, 7]]],
24 |     [-1, 3, C3, [1024, False]], # 8
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head: [
29 |     [-1, 1, Conv, [512, 1, 1]],
30 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
31 |     [[-1, 5], 1, Concat, [1]], # cat backbone P4
32 |     [-1, 3, C3, [512, False]], # 12
33 | 
34 |     [-1, 1, Conv, [256, 1, 1]],
35 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
36 |     [[-1, 3], 1, Concat, [1]], # cat backbone P3
37 |     [-1, 3, C3, [256, False]], # 16 (P3/8-small)
38 | 
39 |     [-1, 1, Conv, [256, 3, 2]],
40 |     [[-1, 13], 1, Concat, [1]], # cat head P4
41 |     [-1, 3, C3, [512, False]], # 19 (P4/16-medium)
42 | 
43 |     [-1, 1, Conv, [512, 3, 2]],
44 |     [[-1, 9], 1, Concat, [1]], # cat head P5
45 |     [-1, 3, C3, [1024, False]], # 22 (P5/32-large)
46 | 
47 |     [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5m6.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 0.67 # model depth multiple
 4 | width_multiple: 0.75 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [6, 7, 9, 11, 13, 16] # P3/8
 9 |   - [18, 23, 26, 33, 37, 47] # P4/16
10 |   - [54, 67, 77, 104, 112, 154] # P5/32
11 |   - [174, 238, 258, 355, 445, 568] # P6/64
12 | 
13 | # YOLOv5 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [
17 |     [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
18 |     [-1, 3, C3, [128]],
19 |     [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
20 |     [-1, 9, C3, [256]],
21 |     [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
22 |     [-1, 9, C3, [512]],
23 |     [-1, 1, Conv, [768, 3, 2]], # 6-P5/32
24 |     [-1, 3, C3, [768]],
25 |     [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64
26 |     [-1, 1, SPP, [1024, [3, 5, 7]]],
27 |     [-1, 3, C3, [1024, False]], # 10
28 |   ]
29 | 
30 | # YOLOv5 head
31 | head: [
32 |     [-1, 1, Conv, [768, 1, 1]],
33 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 |     [[-1, 7], 1, Concat, [1]], # cat backbone P5
35 |     [-1, 3, C3, [768, False]], # 14
36 | 
37 |     [-1, 1, Conv, [512, 1, 1]],
38 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39 |     [[-1, 5], 1, Concat, [1]], # cat backbone P4
40 |     [-1, 3, C3, [512, False]], # 18
41 | 
42 |     [-1, 1, Conv, [256, 1, 1]],
43 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
44 |     [[-1, 3], 1, Concat, [1]], # cat backbone P3
45 |     [-1, 3, C3, [256, False]], # 22 (P3/8-small)
46 | 
47 |     [-1, 1, Conv, [256, 3, 2]],
48 |     [[-1, 19], 1, Concat, [1]], # cat head P4
49 |     [-1, 3, C3, [512, False]], # 25 (P4/16-medium)
50 | 
51 |     [-1, 1, Conv, [512, 3, 2]],
52 |     [[-1, 15], 1, Concat, [1]], # cat head P5
53 |     [-1, 3, C3, [768, False]], # 28 (P5/32-large)
54 | 
55 |     [-1, 1, Conv, [768, 3, 2]],
56 |     [[-1, 11], 1, Concat, [1]], # cat head P6
57 |     [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge)
58 | 
59 |     [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 |   ]
61 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5n-0.5.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 1.0 # model depth multiple
 4 | width_multiple: 0.5 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [4, 5, 8, 10, 13, 16] # P3/8
 9 |   - [23, 29, 43, 55, 73, 105] # P4/16
10 |   - [146, 217, 231, 300, 335, 433] # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [
16 |     [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4
17 |     [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8
18 |     [-1, 3, ShuffleV2Block, [128, 1]], # 2
19 |     [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16
20 |     [-1, 7, ShuffleV2Block, [256, 1]], # 4
21 |     [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32
22 |     [-1, 3, ShuffleV2Block, [512, 1]], # 6
23 |   ]
24 | 
25 | # YOLOv5 head
26 | head: [
27 |     [-1, 1, Conv, [128, 1, 1]],
28 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29 |     [[-1, 4], 1, Concat, [1]], # cat backbone P4
30 |     [-1, 1, C3, [128, False]], # 10
31 | 
32 |     [-1, 1, Conv, [128, 1, 1]],
33 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 |     [[-1, 2], 1, Concat, [1]], # cat backbone P3
35 |     [-1, 1, C3, [128, False]], # 14 (P3/8-small)
36 | 
37 |     [-1, 1, Conv, [128, 3, 2]],
38 |     [[-1, 11], 1, Concat, [1]], # cat head P4
39 |     [-1, 1, C3, [128, False]], # 17 (P4/16-medium)
40 | 
41 |     [-1, 1, Conv, [128, 3, 2]],
42 |     [[-1, 7], 1, Concat, [1]], # cat head P5
43 |     [-1, 1, C3, [128, False]], # 20 (P5/32-large)
44 | 
45 |     [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
46 |   ]
47 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5n.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 1.0 # model depth multiple
 4 | width_multiple: 1.0 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [4, 5, 8, 10, 13, 16] # P3/8
 9 |   - [23, 29, 43, 55, 73, 105] # P4/16
10 |   - [146, 217, 231, 300, 335, 433] # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [
16 |     [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4
17 |     [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8
18 |     [-1, 3, ShuffleV2Block, [128, 1]], # 2
19 |     [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16
20 |     [-1, 7, ShuffleV2Block, [256, 1]], # 4
21 |     [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32
22 |     [-1, 3, ShuffleV2Block, [512, 1]], # 6
23 |   ]
24 | 
25 | # YOLOv5 head
26 | head: [
27 |     [-1, 1, Conv, [128, 1, 1]],
28 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
29 |     [[-1, 4], 1, Concat, [1]], # cat backbone P4
30 |     [-1, 1, C3, [128, False]], # 10
31 | 
32 |     [-1, 1, Conv, [128, 1, 1]],
33 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 |     [[-1, 2], 1, Concat, [1]], # cat backbone P3
35 |     [-1, 1, C3, [128, False]], # 14 (P3/8-small)
36 | 
37 |     [-1, 1, Conv, [128, 3, 2]],
38 |     [[-1, 11], 1, Concat, [1]], # cat head P4
39 |     [-1, 1, C3, [128, False]], # 17 (P4/16-medium)
40 | 
41 |     [-1, 1, Conv, [128, 3, 2]],
42 |     [[-1, 7], 1, Concat, [1]], # cat head P5
43 |     [-1, 1, C3, [128, False]], # 20 (P5/32-large)
44 | 
45 |     [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
46 |   ]
47 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5n6.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 1.0 # model depth multiple
 4 | width_multiple: 1.0 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [6, 7, 9, 11, 13, 16] # P3/8
 9 |   - [18, 23, 26, 33, 37, 47] # P4/16
10 |   - [54, 67, 77, 104, 112, 154] # P5/32
11 |   - [174, 238, 258, 355, 445, 568] # P6/64
12 | 
13 | # YOLOv5 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [
17 |     [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4
18 |     [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8
19 |     [-1, 3, ShuffleV2Block, [128, 1]], # 2
20 |     [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16
21 |     [-1, 7, ShuffleV2Block, [256, 1]], # 4
22 |     [-1, 1, ShuffleV2Block, [384, 2]], # 5-P5/32
23 |     [-1, 3, ShuffleV2Block, [384, 1]], # 6
24 |     [-1, 1, ShuffleV2Block, [512, 2]], # 7-P6/64
25 |     [-1, 3, ShuffleV2Block, [512, 1]], # 8
26 |   ]
27 | 
28 | # YOLOv5 head
29 | head: [
30 |     [-1, 1, Conv, [128, 1, 1]],
31 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
32 |     [[-1, 6], 1, Concat, [1]], # cat backbone P5
33 |     [-1, 1, C3, [128, False]], # 12
34 | 
35 |     [-1, 1, Conv, [128, 1, 1]],
36 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
37 |     [[-1, 4], 1, Concat, [1]], # cat backbone P4
38 |     [-1, 1, C3, [128, False]], # 16 (P4/8-small)
39 | 
40 |     [-1, 1, Conv, [128, 1, 1]],
41 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
42 |     [[-1, 2], 1, Concat, [1]], # cat backbone P3
43 |     [-1, 1, C3, [128, False]], # 20 (P3/8-small)
44 | 
45 |     [-1, 1, Conv, [128, 3, 2]],
46 |     [[-1, 17], 1, Concat, [1]], # cat head P4
47 |     [-1, 1, C3, [128, False]], # 23 (P4/16-medium)
48 | 
49 |     [-1, 1, Conv, [128, 3, 2]],
50 |     [[-1, 13], 1, Concat, [1]], # cat head P5
51 |     [-1, 1, C3, [128, False]], # 26 (P5/32-large)
52 | 
53 |     [-1, 1, Conv, [128, 3, 2]],
54 |     [[-1, 9], 1, Concat, [1]], # cat head P6
55 |     [-1, 1, C3, [128, False]], # 29 (P6/64-large)
56 | 
57 |     [[20, 23, 26, 29], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
58 |   ]
59 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5s.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 0.33 # model depth multiple
 4 | width_multiple: 0.5 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [4, 5, 8, 10, 13, 16] # P3/8
 9 |   - [23, 29, 43, 55, 73, 105] # P4/16
10 |   - [146, 217, 231, 300, 335, 433] # P5/32
11 | 
12 | # YOLOv5 backbone
13 | backbone:
14 |   # [from, number, module, args]
15 |   [
16 |     [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
17 |     [-1, 3, C3, [128]],
18 |     [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
19 |     [-1, 9, C3, [256]],
20 |     [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
21 |     [-1, 9, C3, [512]],
22 |     [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32
23 |     [-1, 1, SPP, [1024, [3, 5, 7]]],
24 |     [-1, 3, C3, [1024, False]], # 8
25 |   ]
26 | 
27 | # YOLOv5 head
28 | head: [
29 |     [-1, 1, Conv, [512, 1, 1]],
30 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
31 |     [[-1, 5], 1, Concat, [1]], # cat backbone P4
32 |     [-1, 3, C3, [512, False]], # 12
33 | 
34 |     [-1, 1, Conv, [256, 1, 1]],
35 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
36 |     [[-1, 3], 1, Concat, [1]], # cat backbone P3
37 |     [-1, 3, C3, [256, False]], # 16 (P3/8-small)
38 | 
39 |     [-1, 1, Conv, [256, 3, 2]],
40 |     [[-1, 13], 1, Concat, [1]], # cat head P4
41 |     [-1, 3, C3, [512, False]], # 19 (P4/16-medium)
42 | 
43 |     [-1, 1, Conv, [512, 3, 2]],
44 |     [[-1, 9], 1, Concat, [1]], # cat head P5
45 |     [-1, 3, C3, [1024, False]], # 22 (P5/32-large)
46 | 
47 |     [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
48 |   ]
49 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/models/yolov5s6.yaml:
--------------------------------------------------------------------------------
 1 | # parameters
 2 | nc: 1 # number of classes
 3 | depth_multiple: 0.33 # model depth multiple
 4 | width_multiple: 0.50 # layer channel multiple
 5 | 
 6 | # anchors
 7 | anchors:
 8 |   - [6, 7, 9, 11, 13, 16] # P3/8
 9 |   - [18, 23, 26, 33, 37, 47] # P4/16
10 |   - [54, 67, 77, 104, 112, 154] # P5/32
11 |   - [174, 238, 258, 355, 445, 568] # P6/64
12 | 
13 | # YOLOv5 backbone
14 | backbone:
15 |   # [from, number, module, args]
16 |   [
17 |     [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2
18 |     [-1, 3, C3, [128]],
19 |     [-1, 1, Conv, [256, 3, 2]], # 2-P3/8
20 |     [-1, 9, C3, [256]],
21 |     [-1, 1, Conv, [512, 3, 2]], # 4-P4/16
22 |     [-1, 9, C3, [512]],
23 |     [-1, 1, Conv, [768, 3, 2]], # 6-P5/32
24 |     [-1, 3, C3, [768]],
25 |     [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64
26 |     [-1, 1, SPP, [1024, [3, 5, 7]]],
27 |     [-1, 3, C3, [1024, False]], # 10
28 |   ]
29 | 
30 | # YOLOv5 head
31 | head: [
32 |     [-1, 1, Conv, [768, 1, 1]],
33 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
34 |     [[-1, 7], 1, Concat, [1]], # cat backbone P5
35 |     [-1, 3, C3, [768, False]], # 14
36 | 
37 |     [-1, 1, Conv, [512, 1, 1]],
38 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
39 |     [[-1, 5], 1, Concat, [1]], # cat backbone P4
40 |     [-1, 3, C3, [512, False]], # 18
41 | 
42 |     [-1, 1, Conv, [256, 1, 1]],
43 |     [-1, 1, nn.Upsample, [None, 2, "nearest"]],
44 |     [[-1, 3], 1, Concat, [1]], # cat backbone P3
45 |     [-1, 3, C3, [256, False]], # 22 (P3/8-small)
46 | 
47 |     [-1, 1, Conv, [256, 3, 2]],
48 |     [[-1, 19], 1, Concat, [1]], # cat head P4
49 |     [-1, 3, C3, [512, False]], # 25 (P4/16-medium)
50 | 
51 |     [-1, 1, Conv, [512, 3, 2]],
52 |     [[-1, 15], 1, Concat, [1]], # cat head P5
53 |     [-1, 3, C3, [768, False]], # 28 (P5/32-large)
54 | 
55 |     [-1, 1, Conv, [768, 3, 2]],
56 |     [[-1, 11], 1, Concat, [1]], # cat head P6
57 |     [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge)
58 | 
59 |     [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6)
60 |   ]
61 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/utils/__init__.py


--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/activations.py:
--------------------------------------------------------------------------------
 1 | # Activation functions
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
 9 | class SiLU(nn.Module):  # export-friendly version of nn.SiLU()
10 |     @staticmethod
11 |     def forward(x):
12 |         return x * torch.sigmoid(x)
13 | 
14 | 
15 | class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
16 |     @staticmethod
17 |     def forward(x):
18 |         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
19 |         return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0  # for torchscript, CoreML and ONNX
20 | 
21 | 
22 | class MemoryEfficientSwish(nn.Module):
23 |     class F(torch.autograd.Function):
24 |         @staticmethod
25 |         def forward(ctx, x):
26 |             ctx.save_for_backward(x)
27 |             return x * torch.sigmoid(x)
28 | 
29 |         @staticmethod
30 |         def backward(ctx, grad_output):
31 |             x = ctx.saved_tensors[0]
32 |             sx = torch.sigmoid(x)
33 |             return grad_output * (sx * (1 + x * (1 - sx)))
34 | 
35 |     def forward(self, x):
36 |         return self.F.apply(x)
37 | 
38 | 
39 | # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
40 | class Mish(nn.Module):
41 |     @staticmethod
42 |     def forward(x):
43 |         return x * F.softplus(x).tanh()
44 | 
45 | 
46 | class MemoryEfficientMish(nn.Module):
47 |     class F(torch.autograd.Function):
48 |         @staticmethod
49 |         def forward(ctx, x):
50 |             ctx.save_for_backward(x)
51 |             return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
52 | 
53 |         @staticmethod
54 |         def backward(ctx, grad_output):
55 |             x = ctx.saved_tensors[0]
56 |             sx = torch.sigmoid(x)
57 |             fx = F.softplus(x).tanh()
58 |             return grad_output * (fx + x * sx * (1 - fx * fx))
59 | 
60 |     def forward(self, x):
61 |         return self.F.apply(x)
62 | 
63 | 
64 | # FReLU https://arxiv.org/abs/2007.11824 -------------------------------------------------------------------------------
65 | class FReLU(nn.Module):
66 |     def __init__(self, c1, k=3):  # ch_in, kernel
67 |         super().__init__()
68 |         self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
69 |         self.bn = nn.BatchNorm2d(c1)
70 | 
71 |     def forward(self, x):
72 |         return torch.max(x, self.bn(self.conv(x)))
73 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/autoanchor.py:
--------------------------------------------------------------------------------
  1 | # Auto-anchor utils
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import yaml
  6 | from scipy.cluster.vq import kmeans
  7 | from tqdm import tqdm
  8 | from utils.general import colorstr
  9 | 
 10 | 
 11 | def check_anchor_order(m):
 12 |     # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
 13 |     a = m.anchor_grid.prod(-1).view(-1)  # anchor area
 14 |     da = a[-1] - a[0]  # delta a
 15 |     ds = m.stride[-1] - m.stride[0]  # delta s
 16 |     if da.sign() != ds.sign():  # same order
 17 |         print("Reversing anchor order")
 18 |         m.anchors[:] = m.anchors.flip(0)
 19 |         m.anchor_grid[:] = m.anchor_grid.flip(0)
 20 | 
 21 | 
 22 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
 23 |     # Check anchor fit to data, recompute if necessary
 24 |     prefix = colorstr("autoanchor: ")
 25 |     print(f"\n{prefix}Analyzing anchors... ", end="")
 26 |     m = (
 27 |         model.module.model[-1] if hasattr(model, "module") else model.model[-1]
 28 |     )  # Detect()
 29 |     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
 30 |     scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
 31 |     wh = torch.tensor(
 32 |         np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])
 33 |     ).float()  # wh
 34 | 
 35 |     def metric(k):  # compute metric
 36 |         r = wh[:, None] / k[None]
 37 |         x = torch.min(r, 1.0 / r).min(2)[0]  # ratio metric
 38 |         best = x.max(1)[0]  # best_x
 39 |         aat = (x > 1.0 / thr).float().sum(1).mean()  # anchors above threshold
 40 |         bpr = (best > 1.0 / thr).float().mean()  # best possible recall
 41 |         return bpr, aat
 42 | 
 43 |     bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
 44 |     print(f"anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}", end="")
 45 |     if bpr < 0.98:  # threshold to recompute
 46 |         print(". Attempting to improve anchors, please wait...")
 47 |         na = m.anchor_grid.numel() // 2  # number of anchors
 48 |         new_anchors = kmean_anchors(
 49 |             dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False
 50 |         )
 51 |         new_bpr = metric(new_anchors.reshape(-1, 2))[0]
 52 |         if new_bpr > bpr:  # replace anchors
 53 |             new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as(
 54 |                 m.anchors
 55 |             )
 56 |             m.anchor_grid[:] = new_anchors.clone().view_as(
 57 |                 m.anchor_grid
 58 |             )  # for inference
 59 |             m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(
 60 |                 m.anchors.device
 61 |             ).view(
 62 |                 -1, 1, 1
 63 |             )  # loss
 64 |             check_anchor_order(m)
 65 |             print(
 66 |                 f"{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future."
 67 |             )
 68 |         else:
 69 |             print(
 70 |                 f"{prefix}Original anchors better than new anchors. Proceeding with original anchors."
 71 |             )
 72 |     print("")  # newline
 73 | 
 74 | 
 75 | def kmean_anchors(
 76 |     path="./data/coco128.yaml", n=9, img_size=640, thr=4.0, gen=1000, verbose=True
 77 | ):
 78 |     """Creates kmeans-evolved anchors from training dataset
 79 | 
 80 |     Arguments:
 81 |         path: path to dataset *.yaml, or a loaded dataset
 82 |         n: number of anchors
 83 |         img_size: image size used for training
 84 |         thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
 85 |         gen: generations to evolve anchors using genetic algorithm
 86 |         verbose: print all results
 87 | 
 88 |     Return:
 89 |         k: kmeans evolved anchors
 90 | 
 91 |     Usage:
 92 |         from utils.autoanchor import *; _ = kmean_anchors()
 93 |     """
 94 |     thr = 1.0 / thr
 95 |     prefix = colorstr("autoanchor: ")
 96 | 
 97 |     def metric(k, wh):  # compute metrics
 98 |         r = wh[:, None] / k[None]
 99 |         x = torch.min(r, 1.0 / r).min(2)[0]  # ratio metric
100 |         # x = wh_iou(wh, torch.tensor(k))  # iou metric
101 |         return x, x.max(1)[0]  # x, best_x
102 | 
103 |     def anchor_fitness(k):  # mutation fitness
104 |         _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
105 |         return (best * (best > thr).float()).mean()  # fitness
106 | 
107 |     def print_results(k):
108 |         k = k[np.argsort(k.prod(1))]  # sort small to large
109 |         x, best = metric(k, wh0)
110 |         bpr, aat = (best > thr).float().mean(), (
111 |             x > thr
112 |         ).float().mean() * n  # best possible recall, anch > thr
113 |         print(
114 |             f"{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr"
115 |         )
116 |         print(
117 |             f"{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, "
118 |             f"past_thr={x[x > thr].mean():.3f}-mean: ",
119 |             end="",
120 |         )
121 |         for i, x in enumerate(k):
122 |             print(
123 |                 "%i,%i" % (round(x[0]), round(x[1])),
124 |                 end=",  " if i < len(k) - 1 else "\n",
125 |             )  # use in *.cfg
126 |         return k
127 | 
128 |     if isinstance(path, str):  # *.yaml file
129 |         with open(path) as f:
130 |             data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
131 |         from utils.datasets import LoadImagesAndLabels
132 | 
133 |         dataset = LoadImagesAndLabels(data_dict["train"], augment=True, rect=True)
134 |     else:
135 |         dataset = path  # dataset
136 | 
137 |     # Get label wh
138 |     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
139 |     wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
140 | 
141 |     # Filter
142 |     i = (wh0 < 3.0).any(1).sum()
143 |     if i:
144 |         print(
145 |             f"{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size."
146 |         )
147 |     wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
148 |     # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
149 | 
150 |     # Kmeans calculation
151 |     print(f"{prefix}Running kmeans for {n} anchors on {len(wh)} points...")
152 |     s = wh.std(0)  # sigmas for whitening
153 |     k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
154 |     k *= s
155 |     wh = torch.tensor(wh, dtype=torch.float32)  # filtered
156 |     wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
157 |     k = print_results(k)
158 | 
159 |     # Plot
160 |     # k, d = [None] * 20, [None] * 20
161 |     # for i in tqdm(range(1, 21)):
162 |     #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
163 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
164 |     # ax = ax.ravel()
165 |     # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
166 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
167 |     # ax[0].hist(wh[wh[:, 0]<100, 0],400)
168 |     # ax[1].hist(wh[wh[:, 1]<100, 1],400)
169 |     # fig.savefig('wh.png', dpi=200)
170 | 
171 |     # Evolve
172 |     npr = np.random
173 |     f, sh, mp, s = (
174 |         anchor_fitness(k),
175 |         k.shape,
176 |         0.9,
177 |         0.1,
178 |     )  # fitness, generations, mutation prob, sigma
179 |     pbar = tqdm(
180 |         range(gen), desc=f"{prefix}Evolving anchors with Genetic Algorithm:"
181 |     )  # progress bar
182 |     for _ in pbar:
183 |         v = np.ones(sh)
184 |         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
185 |             v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(
186 |                 0.3, 3.0
187 |             )
188 |         kg = (k.copy() * v).clip(min=2.0)
189 |         fg = anchor_fitness(kg)
190 |         if fg > f:
191 |             f, k = fg, kg.copy()
192 |             pbar.desc = (
193 |                 f"{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}"
194 |             )
195 |             if verbose:
196 |                 print_results(k)
197 | 
198 |     return print_results(k)
199 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/google_utils.py:
--------------------------------------------------------------------------------
  1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries
  2 | 
  3 | import os
  4 | import platform
  5 | import subprocess
  6 | import time
  7 | from pathlib import Path
  8 | 
  9 | import requests
 10 | import torch
 11 | 
 12 | 
 13 | def gsutil_getsize(url=""):
 14 |     # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
 15 |     s = subprocess.check_output(f"gsutil du {url}", shell=True).decode("utf-8")
 16 |     return eval(s.split(" ")[0]) if len(s) else 0  # bytes
 17 | 
 18 | 
 19 | def attempt_download(file, repo="ultralytics/yolov5"):
 20 |     # Attempt file download if does not exist
 21 |     file = Path(str(file).strip().replace("'", "").lower())
 22 | 
 23 |     if not file.exists():
 24 |         try:
 25 |             response = requests.get(
 26 |                 f"https://api.github.com/repos/{repo}/releases/latest"
 27 |             ).json()  # github api
 28 |             assets = [
 29 |                 x["name"] for x in response["assets"]
 30 |             ]  # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
 31 |             tag = response["tag_name"]  # i.e. 'v1.0'
 32 |         except:  # fallback plan
 33 |             assets = ["yolov5.pt", "yolov5.pt", "yolov5l.pt", "yolov5x.pt"]
 34 |             tag = (
 35 |                 subprocess.check_output("git tag", shell=True)
 36 |                 .decode("utf-8")
 37 |                 .split("\n")[-2]
 38 |             )
 39 | 
 40 |         name = file.name
 41 |         if name in assets:
 42 |             msg = f"{file} missing, try downloading from https://github.com/{repo}/releases/"
 43 |             redundant = False  # second download option
 44 |             try:  # GitHub
 45 |                 url = f"https://github.com/{repo}/releases/download/{tag}/{name}"
 46 |                 print(f"Downloading {url} to {file}...")
 47 |                 torch.hub.download_url_to_file(url, file)
 48 |                 assert file.exists() and file.stat().st_size > 1e6  # check
 49 |             except Exception as e:  # GCP
 50 |                 print(f"Download error: {e}")
 51 |                 assert redundant, "No secondary mirror"
 52 |                 url = f"https://storage.googleapis.com/{repo}/ckpt/{name}"
 53 |                 print(f"Downloading {url} to {file}...")
 54 |                 os.system(
 55 |                     f"curl -L {url} -o {file}"
 56 |                 )  # torch.hub.download_url_to_file(url, weights)
 57 |             finally:
 58 |                 if not file.exists() or file.stat().st_size < 1e6:  # check
 59 |                     file.unlink(missing_ok=True)  # remove partial downloads
 60 |                     print(f"ERROR: Download failure: {msg}")
 61 |                 print("")
 62 |                 return
 63 | 
 64 | 
 65 | def gdrive_download(id="16TiPfZj7htmTyhntwcZyEEAejOUxuT6m", file="tmp.zip"):
 66 |     # Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download()
 67 |     t = time.time()
 68 |     file = Path(file)
 69 |     cookie = Path("cookie")  # gdrive cookie
 70 |     print(
 71 |         f"Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ",
 72 |         end="",
 73 |     )
 74 |     file.unlink(missing_ok=True)  # remove existing file
 75 |     cookie.unlink(missing_ok=True)  # remove existing cookie
 76 | 
 77 |     # Attempt file download
 78 |     out = "NUL" if platform.system() == "Windows" else "/dev/null"
 79 |     os.system(
 80 |         f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}'
 81 |     )
 82 |     if os.path.exists("cookie"):  # large file
 83 |         s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
 84 |     else:  # small file
 85 |         s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
 86 |     r = os.system(s)  # execute, capture return
 87 |     cookie.unlink(missing_ok=True)  # remove existing cookie
 88 | 
 89 |     # Error check
 90 |     if r != 0:
 91 |         file.unlink(missing_ok=True)  # remove partial
 92 |         print("Download error ")  # raise Exception('Download error')
 93 |         return r
 94 | 
 95 |     # Unzip if archive
 96 |     if file.suffix == ".zip":
 97 |         print("unzipping... ", end="")
 98 |         os.system(f"unzip -q {file}")  # unzip
 99 |         file.unlink()  # remove zip to free space
100 | 
101 |     print(f"Done ({time.time() - t:.1f}s)")
102 |     return r
103 | 
104 | 
105 | def get_token(cookie="./cookie"):
106 |     with open(cookie) as f:
107 |         for line in f:
108 |             if "download" in line:
109 |                 return line.split()[-1]
110 |     return ""
111 | 
112 | 
113 | # def upload_blob(bucket_name, source_file_name, destination_blob_name):
114 | #     # Uploads a file to a bucket
115 | #     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
116 | #
117 | #     storage_client = storage.Client()
118 | #     bucket = storage_client.get_bucket(bucket_name)
119 | #     blob = bucket.blob(destination_blob_name)
120 | #
121 | #     blob.upload_from_filename(source_file_name)
122 | #
123 | #     print('File {} uploaded to {}.'.format(
124 | #         source_file_name,
125 | #         destination_blob_name))
126 | #
127 | #
128 | # def download_blob(bucket_name, source_blob_name, destination_file_name):
129 | #     # Uploads a blob from a bucket
130 | #     storage_client = storage.Client()
131 | #     bucket = storage_client.get_bucket(bucket_name)
132 | #     blob = bucket.blob(source_blob_name)
133 | #
134 | #     blob.download_to_filename(destination_file_name)
135 | #
136 | #     print('Blob {} downloaded to {}.'.format(
137 | #         source_blob_name,
138 | #         destination_file_name))
139 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/infer_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def decode_infer(output, stride):
 5 |     # logging.info(torch.tensor(output.shape[0]))
 6 |     # logging.info(output.shape)
 7 |     # # bz is batch-size
 8 |     # bz = tuple(torch.tensor(output.shape[0]))
 9 |     # gridsize = tuple(torch.tensor(output.shape[-1]))
10 |     # logging.info(gridsize)
11 |     sh = torch.tensor(output.shape)
12 |     bz = sh[0]
13 |     gridsize = sh[-1]
14 | 
15 |     output = output.permute(0, 2, 3, 1)
16 |     output = output.view(bz, gridsize, gridsize, self.gt_per_grid, 5 + self.numclass)
17 |     x1y1, x2y2, conf, prob = torch.split(output, [2, 2, 1, self.numclass], dim=4)
18 | 
19 |     shiftx = torch.arange(0, gridsize, dtype=torch.float32)
20 |     shifty = torch.arange(0, gridsize, dtype=torch.float32)
21 |     shifty, shiftx = torch.meshgrid([shiftx, shifty])
22 |     shiftx = shiftx.unsqueeze(-1).repeat(bz, 1, 1, self.gt_per_grid)
23 |     shifty = shifty.unsqueeze(-1).repeat(bz, 1, 1, self.gt_per_grid)
24 | 
25 |     xy_grid = torch.stack([shiftx, shifty], dim=4).cuda()
26 |     x1y1 = (xy_grid + 0.5 - torch.exp(x1y1)) * stride
27 |     x2y2 = (xy_grid + 0.5 + torch.exp(x2y2)) * stride
28 | 
29 |     xyxy = torch.cat((x1y1, x2y2), dim=4)
30 |     conf = torch.sigmoid(conf)
31 |     prob = torch.sigmoid(prob)
32 |     output = torch.cat((xyxy, conf, prob), 4)
33 |     output = output.view(bz, -1, 5 + self.numclass)
34 |     return output
35 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | # Model validation metrics
  2 | 
  3 | from pathlib import Path
  4 | 
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import torch
  8 | 
  9 | from . import general
 10 | 
 11 | 
 12 | def fitness(x):
 13 |     # Model fitness as a weighted combination of metrics
 14 |     w = [0.0, 0.0, 0.1, 0.9]  # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
 15 |     return (x[:, :4] * w).sum(1)
 16 | 
 17 | 
 18 | def ap_per_class(
 19 |     tp,
 20 |     conf,
 21 |     pred_cls,
 22 |     target_cls,
 23 |     plot=False,
 24 |     save_dir="precision-recall_curve.png",
 25 |     names=[],
 26 | ):
 27 |     """Compute the average precision, given the recall and precision curves.
 28 |     Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
 29 |     # Arguments
 30 |         tp:  True positives (nparray, nx1 or nx10).
 31 |         conf:  Objectness value from 0-1 (nparray).
 32 |         pred_cls:  Predicted object classes (nparray).
 33 |         target_cls:  True object classes (nparray).
 34 |         plot:  Plot precision-recall curve at mAP@0.5
 35 |         save_dir:  Plot save directory
 36 |     # Returns
 37 |         The average precision as computed in py-faster-rcnn.
 38 |     """
 39 | 
 40 |     # Sort by objectness
 41 |     i = np.argsort(-conf)
 42 |     tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
 43 | 
 44 |     # Find unique classes
 45 |     unique_classes = np.unique(target_cls)
 46 | 
 47 |     # Create Precision-Recall curve and compute AP for each class
 48 |     px, py = np.linspace(0, 1, 1000), []  # for plotting
 49 |     pr_score = 0.1  # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
 50 |     s = [
 51 |         unique_classes.shape[0],
 52 |         tp.shape[1],
 53 |     ]  # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
 54 |     ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)
 55 |     for ci, c in enumerate(unique_classes):
 56 |         i = pred_cls == c
 57 |         n_l = (target_cls == c).sum()  # number of labels
 58 |         n_p = i.sum()  # number of predictions
 59 | 
 60 |         if n_p == 0 or n_l == 0:
 61 |             continue
 62 |         else:
 63 |             # Accumulate FPs and TPs
 64 |             fpc = (1 - tp[i]).cumsum(0)
 65 |             tpc = tp[i].cumsum(0)
 66 | 
 67 |             # Recall
 68 |             recall = tpc / (n_l + 1e-16)  # recall curve
 69 |             r[ci] = np.interp(
 70 |                 -pr_score, -conf[i], recall[:, 0]
 71 |             )  # r at pr_score, negative x, xp because xp decreases
 72 | 
 73 |             # Precision
 74 |             precision = tpc / (tpc + fpc)  # precision curve
 75 |             p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0])  # p at pr_score
 76 | 
 77 |             # AP from recall-precision curve
 78 |             for j in range(tp.shape[1]):
 79 |                 ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
 80 |                 if plot and (j == 0):
 81 |                     py.append(np.interp(px, mrec, mpre))  # precision at mAP@0.5
 82 | 
 83 |     # Compute F1 score (harmonic mean of precision and recall)
 84 |     f1 = 2 * p * r / (p + r + 1e-16)
 85 | 
 86 |     if plot:
 87 |         plot_pr_curve(px, py, ap, save_dir, names)
 88 | 
 89 |     return p, r, ap, f1, unique_classes.astype("int32")
 90 | 
 91 | 
 92 | def compute_ap(recall, precision):
 93 |     """Compute the average precision, given the recall and precision curves
 94 |     # Arguments
 95 |         recall:    The recall curve (list)
 96 |         precision: The precision curve (list)
 97 |     # Returns
 98 |         Average precision, precision curve, recall curve
 99 |     """
100 | 
101 |     # Append sentinel values to beginning and end
102 |     mrec = np.concatenate(([0.0], recall, [recall[-1] + 0.01]))
103 |     mpre = np.concatenate(([1.0], precision, [0.0]))
104 | 
105 |     # Compute the precision envelope
106 |     mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
107 | 
108 |     # Integrate area under curve
109 |     method = "interp"  # methods: 'continuous', 'interp'
110 |     if method == "interp":
111 |         x = np.linspace(0, 1, 101)  # 101-point interp (COCO)
112 |         ap = np.trapz(np.interp(x, mrec, mpre), x)  # integrate
113 |     else:  # 'continuous'
114 |         i = np.where(mrec[1:] != mrec[:-1])[0]  # points where x axis (recall) changes
115 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])  # area under curve
116 | 
117 |     return ap, mpre, mrec
118 | 
119 | 
120 | class ConfusionMatrix:
121 |     # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
122 |     def __init__(self, nc, conf=0.25, iou_thres=0.45):
123 |         self.matrix = np.zeros((nc + 1, nc + 1))
124 |         self.nc = nc  # number of classes
125 |         self.conf = conf
126 |         self.iou_thres = iou_thres
127 | 
128 |     def process_batch(self, detections, labels):
129 |         """
130 |         Return intersection-over-union (Jaccard index) of boxes.
131 |         Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
132 |         Arguments:
133 |             detections (Array[N, 6]), x1, y1, x2, y2, conf, class
134 |             labels (Array[M, 5]), class, x1, y1, x2, y2
135 |         Returns:
136 |             None, updates confusion matrix accordingly
137 |         """
138 |         detections = detections[detections[:, 4] > self.conf]
139 |         gt_classes = labels[:, 0].int()
140 |         detection_classes = detections[:, 5].int()
141 |         iou = general.box_iou(labels[:, 1:], detections[:, :4])
142 | 
143 |         x = torch.where(iou > self.iou_thres)
144 |         if x[0].shape[0]:
145 |             matches = (
146 |                 torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1)
147 |                 .cpu()
148 |                 .numpy()
149 |             )
150 |             if x[0].shape[0] > 1:
151 |                 matches = matches[matches[:, 2].argsort()[::-1]]
152 |                 matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
153 |                 matches = matches[matches[:, 2].argsort()[::-1]]
154 |                 matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
155 |         else:
156 |             matches = np.zeros((0, 3))
157 | 
158 |         n = matches.shape[0] > 0
159 |         m0, m1, _ = matches.transpose().astype(np.int16)
160 |         for i, gc in enumerate(gt_classes):
161 |             j = m0 == i
162 |             if n and sum(j) == 1:
163 |                 self.matrix[gc, detection_classes[m1[j]]] += 1  # correct
164 |             else:
165 |                 self.matrix[gc, self.nc] += 1  # background FP
166 | 
167 |         if n:
168 |             for i, dc in enumerate(detection_classes):
169 |                 if not any(m1 == i):
170 |                     self.matrix[self.nc, dc] += 1  # background FN
171 | 
172 |     def matrix(self):
173 |         return self.matrix
174 | 
175 |     def plot(self, save_dir="", names=()):
176 |         try:
177 |             import seaborn as sn
178 | 
179 |             array = self.matrix / (
180 |                 self.matrix.sum(0).reshape(1, self.nc + 1) + 1e-6
181 |             )  # normalize
182 |             array[array < 0.005] = np.nan  # don't annotate (would appear as 0.00)
183 | 
184 |             fig = plt.figure(figsize=(12, 9), tight_layout=True)
185 |             sn.set(font_scale=1.0 if self.nc < 50 else 0.8)  # for label size
186 |             labels = (0 < len(names) < 99) and len(
187 |                 names
188 |             ) == self.nc  # apply names to ticklabels
189 |             sn.heatmap(
190 |                 array,
191 |                 annot=self.nc < 30,
192 |                 annot_kws={"size": 8},
193 |                 cmap="Blues",
194 |                 fmt=".2f",
195 |                 square=True,
196 |                 xticklabels=names + ["background FN"] if labels else "auto",
197 |                 yticklabels=names + ["background FP"] if labels else "auto",
198 |             ).set_facecolor((1, 1, 1))
199 |             fig.axes[0].set_xlabel("True")
200 |             fig.axes[0].set_ylabel("Predicted")
201 |             fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250)
202 |         except Exception as e:
203 |             pass
204 | 
205 |     def print(self):
206 |         for i in range(self.nc + 1):
207 |             print(" ".join(map(str, self.matrix[i])))
208 | 
209 | 
210 | # Plots ----------------------------------------------------------------------------------------------------------------
211 | 
212 | 
213 | def plot_pr_curve(px, py, ap, save_dir=".", names=()):
214 |     fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
215 |     py = np.stack(py, axis=1)
216 | 
217 |     if 0 < len(names) < 21:  # show mAP in legend if < 10 classes
218 |         for i, y in enumerate(py.T):
219 |             ax.plot(
220 |                 px, y, linewidth=1, label=f"{names[i]} %.3f" % ap[i, 0]
221 |             )  # plot(recall, precision)
222 |     else:
223 |         ax.plot(px, py, linewidth=1, color="grey")  # plot(recall, precision)
224 | 
225 |     ax.plot(
226 |         px,
227 |         py.mean(1),
228 |         linewidth=3,
229 |         color="blue",
230 |         label="all classes %.3f mAP@0.5" % ap[:, 0].mean(),
231 |     )
232 |     ax.set_xlabel("Recall")
233 |     ax.set_ylabel("Precision")
234 |     ax.set_xlim(0, 1)
235 |     ax.set_ylim(0, 1)
236 |     plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
237 |     fig.savefig(Path(save_dir) / "precision_recall_curve.png", dpi=250)
238 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/wandb_logging/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/utils/wandb_logging/__init__.py


--------------------------------------------------------------------------------
/face_detection/yolov5_face/utils/wandb_logging/log_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import yaml
 4 | from wandb_utils import WandbLogger
 5 | 
 6 | WANDB_ARTIFACT_PREFIX = "wandb-artifact://"
 7 | 
 8 | 
 9 | def create_dataset_artifact(opt):
10 |     with open(opt.data) as f:
11 |         data = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
12 |     logger = WandbLogger(opt, "", None, data, job_type="Dataset Creation")
13 | 
14 | 
15 | if __name__ == "__main__":
16 |     parser = argparse.ArgumentParser()
17 |     parser.add_argument(
18 |         "--data", type=str, default="data/coco128.yaml", help="data.yaml path"
19 |     )
20 |     parser.add_argument(
21 |         "--single-cls", action="store_true", help="train as single-class dataset"
22 |     )
23 |     parser.add_argument(
24 |         "--project", type=str, default="YOLOv5", help="name of W&B Project"
25 |     )
26 |     opt = parser.parse_args()
27 |     opt.resume = False  # Explicitly disallow resume check for dataset upload job
28 | 
29 |     create_dataset_artifact(opt)
30 | 


--------------------------------------------------------------------------------
/face_detection/yolov5_face/weights/README.md:
--------------------------------------------------------------------------------
1 | ## Download Weights:
2 | 
3 | - https://drive.google.com/drive/folders/1CGq-2AfcSyWGwZWs9sIzQ1BXhRkPGgxF?usp=sharing
4 | 


--------------------------------------------------------------------------------
/face_recognition/arcface/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | from torch import nn
  4 | 
  5 | 
  6 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
  7 |     """3x3 convolution with padding"""
  8 |     return nn.Conv2d(
  9 |         in_planes,
 10 |         out_planes,
 11 |         kernel_size=3,
 12 |         stride=stride,
 13 |         padding=dilation,
 14 |         groups=groups,
 15 |         bias=False,
 16 |         dilation=dilation,
 17 |     )
 18 | 
 19 | 
 20 | def conv1x1(in_planes, out_planes, stride=1):
 21 |     """1x1 convolution"""
 22 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 23 | 
 24 | 
 25 | class IBasicBlock(nn.Module):
 26 |     expansion = 1
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         inplanes,
 31 |         planes,
 32 |         stride=1,
 33 |         downsample=None,
 34 |         groups=1,
 35 |         base_width=64,
 36 |         dilation=1,
 37 |     ):
 38 |         super(IBasicBlock, self).__init__()
 39 |         if groups != 1 or base_width != 64:
 40 |             raise ValueError("BasicBlock only supports groups=1 and base_width=64")
 41 |         if dilation > 1:
 42 |             raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
 43 |         self.bn1 = nn.BatchNorm2d(
 44 |             inplanes,
 45 |             eps=1e-05,
 46 |         )
 47 |         self.conv1 = conv3x3(inplanes, planes)
 48 |         self.bn2 = nn.BatchNorm2d(
 49 |             planes,
 50 |             eps=1e-05,
 51 |         )
 52 |         self.prelu = nn.PReLU(planes)
 53 |         self.conv2 = conv3x3(planes, planes, stride)
 54 |         self.bn3 = nn.BatchNorm2d(
 55 |             planes,
 56 |             eps=1e-05,
 57 |         )
 58 |         self.downsample = downsample
 59 |         self.stride = stride
 60 | 
 61 |     def forward(self, x):
 62 |         identity = x
 63 |         out = self.bn1(x)
 64 |         out = self.conv1(out)
 65 |         out = self.bn2(out)
 66 |         out = self.prelu(out)
 67 |         out = self.conv2(out)
 68 |         out = self.bn3(out)
 69 |         if self.downsample is not None:
 70 |             identity = self.downsample(x)
 71 |         out += identity
 72 |         return out
 73 | 
 74 | 
 75 | class IResNet(nn.Module):
 76 |     fc_scale = 7 * 7
 77 | 
 78 |     def __init__(
 79 |         self,
 80 |         block,
 81 |         layers,
 82 |         dropout=0,
 83 |         num_features=512,
 84 |         zero_init_residual=False,
 85 |         groups=1,
 86 |         width_per_group=64,
 87 |         replace_stride_with_dilation=None,
 88 |         fp16=False,
 89 |     ):
 90 |         super(IResNet, self).__init__()
 91 |         self.fp16 = fp16
 92 |         self.inplanes = 64
 93 |         self.dilation = 1
 94 |         if replace_stride_with_dilation is None:
 95 |             replace_stride_with_dilation = [False, False, False]
 96 |         if len(replace_stride_with_dilation) != 3:
 97 |             raise ValueError(
 98 |                 "replace_stride_with_dilation should be None "
 99 |                 "or a 3-element tuple, got {}".format(replace_stride_with_dilation)
100 |             )
101 |         self.groups = groups
102 |         self.base_width = width_per_group
103 |         self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
104 |         self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05)
105 |         self.prelu = nn.PReLU(self.inplanes)
106 |         self.layer1 = self._make_layer(block, 64, layers[0], stride=2)
107 |         self.layer2 = self._make_layer(
108 |             block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]
109 |         )
110 |         self.layer3 = self._make_layer(
111 |             block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]
112 |         )
113 |         self.layer4 = self._make_layer(
114 |             block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]
115 |         )
116 |         self.bn2 = nn.BatchNorm2d(
117 |             512 * block.expansion,
118 |             eps=1e-05,
119 |         )
120 |         self.dropout = nn.Dropout(p=dropout, inplace=True)
121 |         self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features)
122 |         self.features = nn.BatchNorm1d(num_features, eps=1e-05)
123 |         nn.init.constant_(self.features.weight, 1.0)
124 |         self.features.weight.requires_grad = False
125 | 
126 |         for m in self.modules():
127 |             if isinstance(m, nn.Conv2d):
128 |                 nn.init.normal_(m.weight, 0, 0.1)
129 |             elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
130 |                 nn.init.constant_(m.weight, 1)
131 |                 nn.init.constant_(m.bias, 0)
132 | 
133 |         if zero_init_residual:
134 |             for m in self.modules():
135 |                 if isinstance(m, IBasicBlock):
136 |                     nn.init.constant_(m.bn2.weight, 0)
137 | 
138 |     def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
139 |         downsample = None
140 |         previous_dilation = self.dilation
141 |         if dilate:
142 |             self.dilation *= stride
143 |             stride = 1
144 |         if stride != 1 or self.inplanes != planes * block.expansion:
145 |             downsample = nn.Sequential(
146 |                 conv1x1(self.inplanes, planes * block.expansion, stride),
147 |                 nn.BatchNorm2d(
148 |                     planes * block.expansion,
149 |                     eps=1e-05,
150 |                 ),
151 |             )
152 |         layers = []
153 |         layers.append(
154 |             block(
155 |                 self.inplanes,
156 |                 planes,
157 |                 stride,
158 |                 downsample,
159 |                 self.groups,
160 |                 self.base_width,
161 |                 previous_dilation,
162 |             )
163 |         )
164 |         self.inplanes = planes * block.expansion
165 |         for _ in range(1, blocks):
166 |             layers.append(
167 |                 block(
168 |                     self.inplanes,
169 |                     planes,
170 |                     groups=self.groups,
171 |                     base_width=self.base_width,
172 |                     dilation=self.dilation,
173 |                 )
174 |             )
175 | 
176 |         return nn.Sequential(*layers)
177 | 
178 |     def forward(self, x):
179 |         with torch.cuda.amp.autocast(self.fp16):
180 |             x = self.conv1(x)
181 |             x = self.bn1(x)
182 |             x = self.prelu(x)
183 |             x = self.layer1(x)
184 |             x = self.layer2(x)
185 |             x = self.layer3(x)
186 |             x = self.layer4(x)
187 |             x = self.bn2(x)
188 |             x = torch.flatten(x, 1)
189 |             x = self.dropout(x)
190 |         x = self.fc(x.float() if self.fp16 else x)
191 |         x = self.features(x)
192 |         x = F.normalize(x, dim=1)
193 |         return x
194 | 
195 | 
196 | def _iresnet(arch, block, layers, pretrained, progress, **kwargs):
197 |     model = IResNet(block, layers, **kwargs)
198 |     if pretrained:
199 |         raise ValueError()
200 |     return model
201 | 
202 | 
203 | def iresnet18(pretrained=False, progress=True, **kwargs):
204 |     return _iresnet("iresnet18", IBasicBlock, [2, 2, 2, 2], pretrained, progress, **kwargs)
205 | 
206 | 
207 | def iresnet34(pretrained=False, progress=True, **kwargs):
208 |     return _iresnet("iresnet34", IBasicBlock, [3, 4, 6, 3], pretrained, progress, **kwargs)
209 | 
210 | 
211 | def iresnet50(pretrained=False, progress=True, **kwargs):
212 |     return _iresnet("iresnet50", IBasicBlock, [3, 4, 14, 3], pretrained, progress, **kwargs)
213 | 
214 | 
215 | def iresnet100(pretrained=False, progress=True, **kwargs):
216 |     return _iresnet("iresnet100", IBasicBlock, [3, 13, 30, 3], pretrained, progress, **kwargs)
217 | 
218 | 
219 | def iresnet200(pretrained=False, progress=True, **kwargs):
220 |     return _iresnet("iresnet200", IBasicBlock, [6, 26, 60, 6], pretrained, progress, **kwargs)
221 | 
222 | 
223 | def iresnet_inference(model_name, path, device="cuda"):
224 |     if model_name == "r18":
225 |         model = iresnet18()
226 |     elif model_name == "r34":
227 |         model = iresnet34()
228 |     elif model_name == "r50":
229 |         model = iresnet50()
230 |     elif model_name == "r100":
231 |         model = iresnet100()
232 |     else:
233 |         raise ValueError()
234 | 
235 |     weight = torch.load(path, map_location=device)
236 | 
237 |     model.load_state_dict(weight)
238 |     model.to(device)
239 | 
240 |     return model.eval()
241 | 


--------------------------------------------------------------------------------
/face_recognition/arcface/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def read_features(feature_path):
 5 |     try:
 6 |         data = np.load(feature_path + ".npz", allow_pickle=True)
 7 |         images_name = data["images_name"]
 8 |         images_emb = data["images_emb"]
 9 | 
10 |         return images_name, images_emb
11 |     except:
12 |         return None
13 | 
14 | 
15 | def compare_encodings(encoding, encodings):
16 |     sims = np.dot(encodings, encoding.T)
17 |     pare_index = np.argmax(sims)
18 |     score = sims[pare_index]
19 |     return score, pare_index
20 | 


--------------------------------------------------------------------------------
/face_recognition/arcface/weights/README.md:
--------------------------------------------------------------------------------
1 | ## Download Weights:
2 | 
3 | - https://drive.google.com/drive/folders/1CHHb_7wbvfjKPFNKVBb76lL5sVfBLcv5?usp=sharing
4 | 


--------------------------------------------------------------------------------
/face_tracking/config/config_tracking.yaml:
--------------------------------------------------------------------------------
 1 | device: cpu
 2 | fps: 30
 3 | match_thresh: 0.8
 4 | min_box_area: 10
 5 | save_result: True
 6 | track_buffer: 30
 7 | track_thresh: 0.5
 8 | aspect_ratio_thresh: 1.6
 9 | ckpt: bytetrack_s_mot17.pth.tar
10 | fp16: True
11 | 


--------------------------------------------------------------------------------
/face_tracking/pretrained/README.md:
--------------------------------------------------------------------------------
 1 | ## Model zoo
 2 | 
 3 | | Model                                                                                                                                                                                   | MOTA | IDF1 | IDs | FPS  |
 4 | | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ---- | --- | ---- |
 5 | | bytetrack_x_mot17 [[google]](https://drive.google.com/file/d/1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5/view?usp=sharing), [[baidu(code:ic0i)]](https://pan.baidu.com/s/1OJKrcQa_JP9zofC6ZtGBpw) | 90.0 | 83.3 | 422 | 29.6 |
 6 | | bytetrack_l_mot17 [[google]](https://drive.google.com/file/d/1XwfUuCBF4IgWBWK2H7oOhQgEj9Mrb3rz/view?usp=sharing), [[baidu(code:1cml)]](https://pan.baidu.com/s/1242adimKM6TYdeLU2qnuRA) | 88.7 | 80.7 | 460 | 43.7 |
 7 | | bytetrack_m_mot17 [[google]](https://drive.google.com/file/d/11Zb0NN_Uu7JwUd9e6Nk8o2_EUfxWqsun/view?usp=sharing), [[baidu(code:u3m4)]](https://pan.baidu.com/s/1fKemO1uZfvNSLzJfURO4TQ) | 87.0 | 80.1 | 477 | 54.1 |
 8 | | bytetrack_s_mot17 [[google]](https://drive.google.com/file/d/1uSmhXzyV1Zvb4TJJCzpsZOIcw7CCJLxj/view?usp=sharing), [[baidu(code:qflm)]](https://pan.baidu.com/s/1PiP1kQfgxAIrnGUbFP6Wfg) | 79.2 | 74.3 | 533 | 64.5 |
 9 | 
10 | ## Reference
11 | 
12 | - https://github.com/ifzhang/ByteTrack?tab=readme-ov-file#model-zoo
13 | 


--------------------------------------------------------------------------------
/face_tracking/tracker/basetrack.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import numpy as np
 4 | 
 5 | 
 6 | class TrackState(object):
 7 |     New = 0
 8 |     Tracked = 1
 9 |     Lost = 2
10 |     Removed = 3
11 | 
12 | 
13 | class BaseTrack(object):
14 |     _count = 0
15 | 
16 |     track_id = 0
17 |     is_activated = False
18 |     state = TrackState.New
19 | 
20 |     history = OrderedDict()
21 |     features = []
22 |     curr_feature = None
23 |     score = 0
24 |     start_frame = 0
25 |     frame_id = 0
26 |     time_since_update = 0
27 | 
28 |     # multi-camera
29 |     location = (np.inf, np.inf)
30 | 
31 |     @property
32 |     def end_frame(self):
33 |         return self.frame_id
34 | 
35 |     @staticmethod
36 |     def next_id():
37 |         BaseTrack._count += 1
38 |         return BaseTrack._count
39 | 
40 |     def activate(self, *args):
41 |         raise NotImplementedError
42 | 
43 |     def predict(self):
44 |         raise NotImplementedError
45 | 
46 |     def update(self, *args, **kwargs):
47 |         raise NotImplementedError
48 | 
49 |     def mark_lost(self):
50 |         self.state = TrackState.Lost
51 | 
52 |     def mark_removed(self):
53 |         self.state = TrackState.Removed
54 | 


--------------------------------------------------------------------------------
/face_tracking/tracker/byte_tracker.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import torch
  5 | 
  6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | sys.path.append(BASE_DIR)
  8 | 
  9 | import matching
 10 | import numpy as np
 11 | 
 12 | from .basetrack import BaseTrack, TrackState
 13 | from .kalman_filter import KalmanFilter
 14 | 
 15 | 
 16 | class STrack(BaseTrack):
 17 |     shared_kalman = KalmanFilter()
 18 | 
 19 |     def __init__(self, tlwh, score):
 20 |         # wait activate
 21 |         self._tlwh = np.asarray(tlwh, dtype=np.float64)
 22 |         self.kalman_filter = None
 23 |         self.mean, self.covariance = None, None
 24 |         self.is_activated = False
 25 | 
 26 |         self.score = score
 27 |         self.tracklet_len = 0
 28 | 
 29 |     def predict(self):
 30 |         mean_state = self.mean.copy()
 31 |         if self.state != TrackState.Tracked:
 32 |             mean_state[7] = 0
 33 |         self.mean, self.covariance = self.kalman_filter.predict(
 34 |             mean_state, self.covariance
 35 |         )
 36 | 
 37 |     @staticmethod
 38 |     def multi_predict(stracks):
 39 |         if len(stracks) > 0:
 40 |             multi_mean = np.asarray([st.mean.copy() for st in stracks])
 41 |             multi_covariance = np.asarray([st.covariance for st in stracks])
 42 |             for i, st in enumerate(stracks):
 43 |                 if st.state != TrackState.Tracked:
 44 |                     multi_mean[i][7] = 0
 45 |             multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(
 46 |                 multi_mean, multi_covariance
 47 |             )
 48 |             for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
 49 |                 stracks[i].mean = mean
 50 |                 stracks[i].covariance = cov
 51 | 
 52 |     def activate(self, kalman_filter, frame_id):
 53 |         """Start a new tracklet"""
 54 |         self.kalman_filter = kalman_filter
 55 |         self.track_id = self.next_id()
 56 |         self.mean, self.covariance = self.kalman_filter.initiate(
 57 |             self.tlwh_to_xyah(self._tlwh)
 58 |         )
 59 | 
 60 |         self.tracklet_len = 0
 61 |         self.state = TrackState.Tracked
 62 |         if frame_id == 1:
 63 |             self.is_activated = True
 64 |         # self.is_activated = True
 65 |         self.frame_id = frame_id
 66 |         self.start_frame = frame_id
 67 | 
 68 |     def re_activate(self, new_track, frame_id, new_id=False):
 69 |         self.mean, self.covariance = self.kalman_filter.update(
 70 |             self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
 71 |         )
 72 |         self.tracklet_len = 0
 73 |         self.state = TrackState.Tracked
 74 |         self.is_activated = True
 75 |         self.frame_id = frame_id
 76 |         if new_id:
 77 |             self.track_id = self.next_id()
 78 |         self.score = new_track.score
 79 | 
 80 |     def update(self, new_track, frame_id):
 81 |         """
 82 |         Update a matched track
 83 |         :type new_track: STrack
 84 |         :type frame_id: int
 85 |         :type update_feature: bool
 86 |         :return:
 87 |         """
 88 |         self.frame_id = frame_id
 89 |         self.tracklet_len += 1
 90 | 
 91 |         new_tlwh = new_track.tlwh
 92 |         self.mean, self.covariance = self.kalman_filter.update(
 93 |             self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)
 94 |         )
 95 |         self.state = TrackState.Tracked
 96 |         self.is_activated = True
 97 | 
 98 |         self.score = new_track.score
 99 | 
100 |     @property
101 |     # @jit(nopython=True)
102 |     def tlwh(self):
103 |         """Get current position in bounding box format `(top left x, top left y,
104 |         width, height)`.
105 |         """
106 |         if self.mean is None:
107 |             return self._tlwh.copy()
108 |         ret = self.mean[:4].copy()
109 |         ret[2] *= ret[3]
110 |         ret[:2] -= ret[2:] / 2
111 |         return ret
112 | 
113 |     @property
114 |     # @jit(nopython=True)
115 |     def tlbr(self):
116 |         """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
117 |         `(top left, bottom right)`.
118 |         """
119 |         ret = self.tlwh.copy()
120 |         ret[2:] += ret[:2]
121 |         return ret
122 | 
123 |     @staticmethod
124 |     # @jit(nopython=True)
125 |     def tlwh_to_xyah(tlwh):
126 |         """Convert bounding box to format `(center x, center y, aspect ratio,
127 |         height)`, where the aspect ratio is `width / height`.
128 |         """
129 |         ret = np.asarray(tlwh).copy()
130 |         ret[:2] += ret[2:] / 2
131 |         ret[2] /= ret[3]
132 |         return ret
133 | 
134 |     def to_xyah(self):
135 |         return self.tlwh_to_xyah(self.tlwh)
136 | 
137 |     @staticmethod
138 |     # @jit(nopython=True)
139 |     def tlbr_to_tlwh(tlbr):
140 |         ret = np.asarray(tlbr).copy()
141 |         ret[2:] -= ret[:2]
142 |         return ret
143 | 
144 |     @staticmethod
145 |     # @jit(nopython=True)
146 |     def tlwh_to_tlbr(tlwh):
147 |         ret = np.asarray(tlwh).copy()
148 |         ret[2:] += ret[:2]
149 |         return ret
150 | 
151 |     def __repr__(self):
152 |         return "OT_{}_({}-{})".format(self.track_id, self.start_frame, self.end_frame)
153 | 
154 | 
155 | class BYTETracker(object):
156 |     def __init__(self, args, frame_rate=30):
157 |         self.tracked_stracks = []  # type: list[STrack]
158 |         self.lost_stracks = []  # type: list[STrack]
159 |         self.removed_stracks = []  # type: list[STrack]
160 | 
161 |         self.frame_id = 0
162 |         self.args = args
163 |         # self.det_thresh = args.track_thresh
164 |         self.det_thresh = args["track_thresh"] + 0.1
165 |         self.buffer_size = int(frame_rate / 30.0 * args["track_buffer"])
166 |         self.max_time_lost = self.buffer_size
167 |         self.kalman_filter = KalmanFilter()
168 | 
169 |     def update(self, output_results, img_info, img_size):
170 |         self.frame_id += 1
171 |         activated_starcks = []
172 |         refind_stracks = []
173 |         lost_stracks = []
174 |         removed_stracks = []
175 | 
176 |         if output_results.shape[1] == 5:
177 |             scores = output_results[:, 4]
178 |             bboxes = output_results[:, :4]
179 |         else:
180 |             output_results = output_results.cpu().numpy()
181 |             scores = output_results[:, 4] * output_results[:, 5]
182 |             bboxes = output_results[:, :4]  # x1y1x2y2
183 |         img_h, img_w = img_info[0], img_info[1]
184 |         scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w))
185 |         bboxes /= scale
186 | 
187 |         remain_inds = scores > self.args["track_thresh"]
188 |         inds_low = scores > 0.1
189 |         inds_high = scores < self.args["track_thresh"]
190 | 
191 |         inds_second = np.logical_and(inds_low, inds_high)
192 |         dets_second = bboxes[inds_second.to(torch.bool)]
193 |         dets = bboxes[remain_inds]
194 |         scores_keep = scores[remain_inds]
195 |         scores_second = scores[inds_second.to(torch.bool)]
196 | 
197 |         if len(dets) > 0:
198 |             """Detections"""
199 |             detections = [
200 |                 STrack(STrack.tlbr_to_tlwh(tlbr), s)
201 |                 for (tlbr, s) in zip(dets, scores_keep)
202 |             ]
203 |         else:
204 |             detections = []
205 | 
206 |         """ Add newly detected tracklets to tracked_stracks"""
207 |         unconfirmed = []
208 |         tracked_stracks = []  # type: list[STrack]
209 |         for track in self.tracked_stracks:
210 |             if not track.is_activated:
211 |                 unconfirmed.append(track)
212 |             else:
213 |                 tracked_stracks.append(track)
214 | 
215 |         """ Step 2: First association, with high score detection boxes"""
216 |         strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
217 |         # Predict the current location with KF
218 |         STrack.multi_predict(strack_pool)
219 |         dists = matching.iou_distance(strack_pool, detections)
220 |         # if not self.args.mot20:
221 |         #     dists = matching.fuse_score(dists, detections)
222 |         matches, u_track, u_detection = matching.linear_assignment(
223 |             dists, thresh=self.args["match_thresh"]
224 |         )
225 | 
226 |         for itracked, idet in matches:
227 |             track = strack_pool[itracked]
228 |             det = detections[idet]
229 |             if track.state == TrackState.Tracked:
230 |                 track.update(detections[idet], self.frame_id)
231 |                 activated_starcks.append(track)
232 |             else:
233 |                 track.re_activate(det, self.frame_id, new_id=False)
234 |                 refind_stracks.append(track)
235 | 
236 |         """ Step 3: Second association, with low score detection boxes"""
237 |         # association the untrack to the low score detections
238 |         if len(dets_second) > 0:
239 |             """Detections"""
240 |             detections_second = [
241 |                 STrack(STrack.tlbr_to_tlwh(tlbr), s)
242 |                 for (tlbr, s) in zip(dets_second, scores_second)
243 |             ]
244 |         else:
245 |             detections_second = []
246 |         r_tracked_stracks = [
247 |             strack_pool[i]
248 |             for i in u_track
249 |             if strack_pool[i].state == TrackState.Tracked
250 |         ]
251 |         dists = matching.iou_distance(r_tracked_stracks, detections_second)
252 |         matches, u_track, u_detection_second = matching.linear_assignment(
253 |             dists, thresh=0.5
254 |         )
255 |         for itracked, idet in matches:
256 |             track = r_tracked_stracks[itracked]
257 |             det = detections_second[idet]
258 |             if track.state == TrackState.Tracked:
259 |                 track.update(det, self.frame_id)
260 |                 activated_starcks.append(track)
261 |             else:
262 |                 track.re_activate(det, self.frame_id, new_id=False)
263 |                 refind_stracks.append(track)
264 | 
265 |         for it in u_track:
266 |             track = r_tracked_stracks[it]
267 |             if not track.state == TrackState.Lost:
268 |                 track.mark_lost()
269 |                 lost_stracks.append(track)
270 | 
271 |         """Deal with unconfirmed tracks, usually tracks with only one beginning frame"""
272 |         detections = [detections[i] for i in u_detection]
273 |         dists = matching.iou_distance(unconfirmed, detections)
274 |         # if not self.args.mot20:
275 |         #     dists = matching.fuse_score(dists, detections)
276 |         matches, u_unconfirmed, u_detection = matching.linear_assignment(
277 |             dists, thresh=0.7
278 |         )
279 |         for itracked, idet in matches:
280 |             unconfirmed[itracked].update(detections[idet], self.frame_id)
281 |             activated_starcks.append(unconfirmed[itracked])
282 |         for it in u_unconfirmed:
283 |             track = unconfirmed[it]
284 |             track.mark_removed()
285 |             removed_stracks.append(track)
286 | 
287 |         """ Step 4: Init new stracks"""
288 |         for inew in u_detection:
289 |             track = detections[inew]
290 |             if track.score < self.det_thresh:
291 |                 continue
292 |             track.activate(self.kalman_filter, self.frame_id)
293 |             activated_starcks.append(track)
294 |         """ Step 5: Update state"""
295 |         for track in self.lost_stracks:
296 |             if self.frame_id - track.end_frame > self.max_time_lost:
297 |                 track.mark_removed()
298 |                 removed_stracks.append(track)
299 | 
300 |         # print('Ramained match {} s'.format(t4-t3))
301 | 
302 |         self.tracked_stracks = [
303 |             t for t in self.tracked_stracks if t.state == TrackState.Tracked
304 |         ]
305 |         self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
306 |         self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
307 |         self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
308 |         self.lost_stracks.extend(lost_stracks)
309 |         self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
310 |         self.removed_stracks.extend(removed_stracks)
311 |         self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(
312 |             self.tracked_stracks, self.lost_stracks
313 |         )
314 |         # get scores of lost tracks
315 |         output_stracks = [track for track in self.tracked_stracks if track.is_activated]
316 | 
317 |         return output_stracks
318 | 
319 | 
320 | def joint_stracks(tlista, tlistb):
321 |     exists = {}
322 |     res = []
323 |     for t in tlista:
324 |         exists[t.track_id] = 1
325 |         res.append(t)
326 |     for t in tlistb:
327 |         tid = t.track_id
328 |         if not exists.get(tid, 0):
329 |             exists[tid] = 1
330 |             res.append(t)
331 |     return res
332 | 
333 | 
334 | def sub_stracks(tlista, tlistb):
335 |     stracks = {}
336 |     for t in tlista:
337 |         stracks[t.track_id] = t
338 |     for t in tlistb:
339 |         tid = t.track_id
340 |         if stracks.get(tid, 0):
341 |             del stracks[tid]
342 |     return list(stracks.values())
343 | 
344 | 
345 | def remove_duplicate_stracks(stracksa, stracksb):
346 |     pdist = matching.iou_distance(stracksa, stracksb)
347 |     pairs = np.where(pdist < 0.15)
348 |     dupa, dupb = list(), list()
349 |     for p, q in zip(*pairs):
350 |         timep = stracksa[p].frame_id - stracksa[p].start_frame
351 |         timeq = stracksb[q].frame_id - stracksb[q].start_frame
352 |         if timep > timeq:
353 |             dupb.append(q)
354 |         else:
355 |             dupa.append(p)
356 |     resa = [t for i, t in enumerate(stracksa) if not i in dupa]
357 |     resb = [t for i, t in enumerate(stracksb) if not i in dupb]
358 |     return resa, resb
359 | 


--------------------------------------------------------------------------------
/face_tracking/tracker/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | """
  6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  8 | function and used as Mahalanobis gating threshold.
  9 | """
 10 | chi2inv95 = {
 11 |     1: 3.8415,
 12 |     2: 5.9915,
 13 |     3: 7.8147,
 14 |     4: 9.4877,
 15 |     5: 11.070,
 16 |     6: 12.592,
 17 |     7: 14.067,
 18 |     8: 15.507,
 19 |     9: 16.919,
 20 | }
 21 | 
 22 | 
 23 | class KalmanFilter(object):
 24 |     """
 25 |     A simple Kalman filter for tracking bounding boxes in image space.
 26 | 
 27 |     The 8-dimensional state space
 28 | 
 29 |         x, y, a, h, vx, vy, va, vh
 30 | 
 31 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 32 |     and their respective velocities.
 33 | 
 34 |     Object motion follows a constant velocity model. The bounding box location
 35 |     (x, y, a, h) is taken as direct observation of the state space (linear
 36 |     observation model).
 37 | 
 38 |     """
 39 | 
 40 |     def __init__(self):
 41 |         ndim, dt = 4, 1.0
 42 | 
 43 |         # Create Kalman filter model matrices.
 44 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 45 |         for i in range(ndim):
 46 |             self._motion_mat[i, ndim + i] = dt
 47 |         self._update_mat = np.eye(ndim, 2 * ndim)
 48 | 
 49 |         # Motion and observation uncertainty are chosen relative to the current
 50 |         # state estimate. These weights control the amount of uncertainty in
 51 |         # the model. This is a bit hacky.
 52 |         self._std_weight_position = 1.0 / 20
 53 |         self._std_weight_velocity = 1.0 / 160
 54 | 
 55 |     def initiate(self, measurement):
 56 |         """Create track from unassociated measurement.
 57 | 
 58 |         Parameters
 59 |         ----------
 60 |         measurement : ndarray
 61 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 62 |             aspect ratio a, and height h.
 63 | 
 64 |         Returns
 65 |         -------
 66 |         (ndarray, ndarray)
 67 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 68 |             dimensional) of the new track. Unobserved velocities are initialized
 69 |             to 0 mean.
 70 | 
 71 |         """
 72 |         mean_pos = measurement
 73 |         mean_vel = np.zeros_like(mean_pos)
 74 |         mean = np.r_[mean_pos, mean_vel]
 75 | 
 76 |         std = [
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             2 * self._std_weight_position * measurement[3],
 79 |             1e-2,
 80 |             2 * self._std_weight_position * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             10 * self._std_weight_velocity * measurement[3],
 83 |             1e-5,
 84 |             10 * self._std_weight_velocity * measurement[3],
 85 |         ]
 86 |         covariance = np.diag(np.square(std))
 87 |         return mean, covariance
 88 | 
 89 |     def predict(self, mean, covariance):
 90 |         """Run Kalman filter prediction step.
 91 | 
 92 |         Parameters
 93 |         ----------
 94 |         mean : ndarray
 95 |             The 8 dimensional mean vector of the object state at the previous
 96 |             time step.
 97 |         covariance : ndarray
 98 |             The 8x8 dimensional covariance matrix of the object state at the
 99 |             previous time step.
100 | 
101 |         Returns
102 |         -------
103 |         (ndarray, ndarray)
104 |             Returns the mean vector and covariance matrix of the predicted
105 |             state. Unobserved velocities are initialized to 0 mean.
106 | 
107 |         """
108 |         std_pos = [
109 |             self._std_weight_position * mean[3],
110 |             self._std_weight_position * mean[3],
111 |             1e-2,
112 |             self._std_weight_position * mean[3],
113 |         ]
114 |         std_vel = [
115 |             self._std_weight_velocity * mean[3],
116 |             self._std_weight_velocity * mean[3],
117 |             1e-5,
118 |             self._std_weight_velocity * mean[3],
119 |         ]
120 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
121 | 
122 |         # mean = np.dot(self._motion_mat, mean)
123 |         mean = np.dot(mean, self._motion_mat.T)
124 |         covariance = (
125 |             np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T))
126 |             + motion_cov
127 |         )
128 | 
129 |         return mean, covariance
130 | 
131 |     def project(self, mean, covariance):
132 |         """Project state distribution to measurement space.
133 | 
134 |         Parameters
135 |         ----------
136 |         mean : ndarray
137 |             The state's mean vector (8 dimensional array).
138 |         covariance : ndarray
139 |             The state's covariance matrix (8x8 dimensional).
140 | 
141 |         Returns
142 |         -------
143 |         (ndarray, ndarray)
144 |             Returns the projected mean and covariance matrix of the given state
145 |             estimate.
146 | 
147 |         """
148 |         std = [
149 |             self._std_weight_position * mean[3],
150 |             self._std_weight_position * mean[3],
151 |             1e-1,
152 |             self._std_weight_position * mean[3],
153 |         ]
154 |         innovation_cov = np.diag(np.square(std))
155 | 
156 |         mean = np.dot(self._update_mat, mean)
157 |         covariance = np.linalg.multi_dot(
158 |             (self._update_mat, covariance, self._update_mat.T)
159 |         )
160 |         return mean, covariance + innovation_cov
161 | 
162 |     def multi_predict(self, mean, covariance):
163 |         """Run Kalman filter prediction step (Vectorized version).
164 |         Parameters
165 |         ----------
166 |         mean : ndarray
167 |             The Nx8 dimensional mean matrix of the object states at the previous
168 |             time step.
169 |         covariance : ndarray
170 |             The Nx8x8 dimensional covariance matrics of the object states at the
171 |             previous time step.
172 |         Returns
173 |         -------
174 |         (ndarray, ndarray)
175 |             Returns the mean vector and covariance matrix of the predicted
176 |             state. Unobserved velocities are initialized to 0 mean.
177 |         """
178 |         std_pos = [
179 |             self._std_weight_position * mean[:, 3],
180 |             self._std_weight_position * mean[:, 3],
181 |             1e-2 * np.ones_like(mean[:, 3]),
182 |             self._std_weight_position * mean[:, 3],
183 |         ]
184 |         std_vel = [
185 |             self._std_weight_velocity * mean[:, 3],
186 |             self._std_weight_velocity * mean[:, 3],
187 |             1e-5 * np.ones_like(mean[:, 3]),
188 |             self._std_weight_velocity * mean[:, 3],
189 |         ]
190 |         sqr = np.square(np.r_[std_pos, std_vel]).T
191 | 
192 |         motion_cov = []
193 |         for i in range(len(mean)):
194 |             motion_cov.append(np.diag(sqr[i]))
195 |         motion_cov = np.asarray(motion_cov)
196 | 
197 |         mean = np.dot(mean, self._motion_mat.T)
198 |         left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
199 |         covariance = np.dot(left, self._motion_mat.T) + motion_cov
200 | 
201 |         return mean, covariance
202 | 
203 |     def update(self, mean, covariance, measurement):
204 |         """Run Kalman filter correction step.
205 | 
206 |         Parameters
207 |         ----------
208 |         mean : ndarray
209 |             The predicted state's mean vector (8 dimensional).
210 |         covariance : ndarray
211 |             The state's covariance matrix (8x8 dimensional).
212 |         measurement : ndarray
213 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
214 |             is the center position, a the aspect ratio, and h the height of the
215 |             bounding box.
216 | 
217 |         Returns
218 |         -------
219 |         (ndarray, ndarray)
220 |             Returns the measurement-corrected state distribution.
221 | 
222 |         """
223 |         projected_mean, projected_cov = self.project(mean, covariance)
224 | 
225 |         chol_factor, lower = scipy.linalg.cho_factor(
226 |             projected_cov, lower=True, check_finite=False
227 |         )
228 |         kalman_gain = scipy.linalg.cho_solve(
229 |             (chol_factor, lower),
230 |             np.dot(covariance, self._update_mat.T).T,
231 |             check_finite=False,
232 |         ).T
233 |         innovation = measurement - projected_mean
234 | 
235 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
236 |         new_covariance = covariance - np.linalg.multi_dot(
237 |             (kalman_gain, projected_cov, kalman_gain.T)
238 |         )
239 |         return new_mean, new_covariance
240 | 
241 |     def gating_distance(
242 |         self, mean, covariance, measurements, only_position=False, metric="maha"
243 |     ):
244 |         """Compute gating distance between state distribution and measurements.
245 |         A suitable distance threshold can be obtained from `chi2inv95`. If
246 |         `only_position` is False, the chi-square distribution has 4 degrees of
247 |         freedom, otherwise 2.
248 |         Parameters
249 |         ----------
250 |         mean : ndarray
251 |             Mean vector over the state distribution (8 dimensional).
252 |         covariance : ndarray
253 |             Covariance of the state distribution (8x8 dimensional).
254 |         measurements : ndarray
255 |             An Nx4 dimensional matrix of N measurements, each in
256 |             format (x, y, a, h) where (x, y) is the bounding box center
257 |             position, a the aspect ratio, and h the height.
258 |         only_position : Optional[bool]
259 |             If True, distance computation is done with respect to the bounding
260 |             box center position only.
261 |         Returns
262 |         -------
263 |         ndarray
264 |             Returns an array of length N, where the i-th element contains the
265 |             squared Mahalanobis distance between (mean, covariance) and
266 |             `measurements[i]`.
267 |         """
268 |         mean, covariance = self.project(mean, covariance)
269 |         if only_position:
270 |             mean, covariance = mean[:2], covariance[:2, :2]
271 |             measurements = measurements[:, :2]
272 | 
273 |         d = measurements - mean
274 |         if metric == "gaussian":
275 |             return np.sum(d * d, axis=1)
276 |         elif metric == "maha":
277 |             cholesky_factor = np.linalg.cholesky(covariance)
278 |             z = scipy.linalg.solve_triangular(
279 |                 cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True
280 |             )
281 |             squared_maha = np.sum(z * z, axis=0)
282 |             return squared_maha
283 |         else:
284 |             raise ValueError("invalid distance metric")
285 | 


--------------------------------------------------------------------------------
/face_tracking/tracker/matching.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | 
  4 | import kalman_filter
  5 | import numpy as np
  6 | from scipy.optimize import linear_sum_assignment
  7 | from scipy.spatial.distance import cdist
  8 | 
  9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 10 | sys.path.append(BASE_DIR)
 11 | 
 12 | # Other function definitions remain the same
 13 | 
 14 | 
 15 | def linear_assignment(cost_matrix, thresh):
 16 |     if cost_matrix.size == 0:
 17 |         return (
 18 |             np.empty((0, 2), dtype=int),
 19 |             tuple(range(cost_matrix.shape[0])),
 20 |             tuple(range(cost_matrix.shape[1])),
 21 |         )
 22 | 
 23 |     row_ind, col_ind = linear_sum_assignment(cost_matrix)
 24 |     matches = np.array(
 25 |         [[r, c] for r, c in zip(row_ind, col_ind) if cost_matrix[r, c] <= thresh]
 26 |     )
 27 |     unmatched_a = np.array([i for i in range(cost_matrix.shape[0]) if i not in row_ind])
 28 |     unmatched_b = np.array([i for i in range(cost_matrix.shape[1]) if i not in col_ind])
 29 | 
 30 |     return matches, tuple(unmatched_a), tuple(unmatched_b)
 31 | 
 32 | 
 33 | def bbox_iou(box1, box2):
 34 |     """
 35 |     Compute the IoU of two bounding boxes.
 36 |     """
 37 |     # Determine the coordinates of each of the boxes
 38 |     x1, y1, x2, y2 = box1
 39 |     x1_p, y1_p, x2_p, y2_p = box2
 40 | 
 41 |     # Calculate the area of intersection rectangle
 42 |     xi1 = max(x1, x1_p)
 43 |     yi1 = max(y1, y1_p)
 44 |     xi2 = min(x2, x2_p)
 45 |     yi2 = min(y2, y2_p)
 46 |     inter_area = max(xi2 - xi1, 0) * max(yi2 - yi1, 0)
 47 | 
 48 |     # Calculate each box area
 49 |     box1_area = (x2 - x1) * (y2 - y1)
 50 |     box2_area = (x2_p - x1_p) * (y2_p - y1_p)
 51 | 
 52 |     # Calculate union area
 53 |     union_area = box1_area + box2_area - inter_area
 54 | 
 55 |     # Calculate IoU
 56 |     iou = inter_area / union_area
 57 | 
 58 |     return iou
 59 | 
 60 | 
 61 | def ious(atlbrs, btlbrs):
 62 |     """
 63 |     Compute cost based on IoU
 64 |     :type atlbrs: list[tlbr] | np.ndarray
 65 |     :type atlbrs: list[tlbr] | np.ndarray
 66 | 
 67 |     :rtype ious np.ndarray
 68 |     """
 69 |     ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float64)
 70 |     for i, box1 in enumerate(atlbrs):
 71 |         for j, box2 in enumerate(btlbrs):
 72 |             ious[i, j] = bbox_iou(box1, box2)
 73 |     return ious
 74 | 
 75 | 
 76 | def iou_distance(atracks, btracks):
 77 |     """
 78 |     Compute cost based on IoU
 79 |     :type atracks: list[STrack]
 80 |     :type btracks: list[STrack]
 81 | 
 82 |     :rtype cost_matrix np.ndarray
 83 |     """
 84 | 
 85 |     if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
 86 |         len(btracks) > 0 and isinstance(btracks[0], np.ndarray)
 87 |     ):
 88 |         atlbrs = atracks
 89 |         btlbrs = btracks
 90 |     else:
 91 |         atlbrs = [track.tlbr for track in atracks]
 92 |         btlbrs = [track.tlbr for track in btracks]
 93 |     _ious = ious(atlbrs, btlbrs)
 94 |     cost_matrix = 1 - _ious
 95 | 
 96 |     return cost_matrix
 97 | 
 98 | 
 99 | def v_iou_distance(atracks, btracks):
100 |     """
101 |     Compute cost based on IoU
102 |     :type atracks: list[STrack]
103 |     :type btracks: list[STrack]
104 | 
105 |     :rtype cost_matrix np.ndarray
106 |     """
107 | 
108 |     if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
109 |         len(btracks) > 0 and isinstance(btracks[0], np.ndarray)
110 |     ):
111 |         atlbrs = atracks
112 |         btlbrs = btracks
113 |     else:
114 |         atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]
115 |         btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]
116 |     _ious = ious(atlbrs, btlbrs)
117 |     cost_matrix = 1 - _ious
118 | 
119 |     return cost_matrix
120 | 
121 | 
122 | def embedding_distance(tracks, detections, metric="cosine"):
123 |     """
124 |     :param tracks: list[STrack]
125 |     :param detections: list[BaseTrack]
126 |     :param metric:
127 |     :return: cost_matrix np.ndarray
128 |     """
129 | 
130 |     cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float64)
131 |     if cost_matrix.size == 0:
132 |         return cost_matrix
133 |     det_features = np.asarray(
134 |         [track.curr_feat for track in detections], dtype=np.float64
135 |     )
136 |     # for i, track in enumerate(tracks):
137 |     # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
138 |     track_features = np.asarray(
139 |         [track.smooth_feat for track in tracks], dtype=np.float646
140 |     )
141 |     cost_matrix = np.maximum(
142 |         0.0, cdist(track_features, det_features, metric)
143 |     )  # Nomalized features
144 |     return cost_matrix
145 | 
146 | 
147 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
148 |     if cost_matrix.size == 0:
149 |         return cost_matrix
150 |     gating_dim = 2 if only_position else 4
151 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
152 |     measurements = np.asarray([det.to_xyah() for det in detections])
153 |     for row, track in enumerate(tracks):
154 |         gating_distance = kf.gating_distance(
155 |             track.mean, track.covariance, measurements, only_position
156 |         )
157 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
158 |     return cost_matrix
159 | 
160 | 
161 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):
162 |     if cost_matrix.size == 0:
163 |         return cost_matrix
164 |     gating_dim = 2 if only_position else 4
165 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
166 |     measurements = np.asarray([det.to_xyah() for det in detections])
167 |     for row, track in enumerate(tracks):
168 |         gating_distance = kf.gating_distance(
169 |             track.mean, track.covariance, measurements, only_position, metric="maha"
170 |         )
171 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
172 |         cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
173 |     return cost_matrix
174 | 
175 | 
176 | def fuse_iou(cost_matrix, tracks, detections):
177 |     if cost_matrix.size == 0:
178 |         return cost_matrix
179 |     reid_sim = 1 - cost_matrix
180 |     iou_dist = iou_distance(tracks, detections)
181 |     iou_sim = 1 - iou_dist
182 |     fuse_sim = reid_sim * (1 + iou_sim) / 2
183 |     det_scores = np.array([det.score for det in detections])
184 |     det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
185 |     # fuse_sim = fuse_sim * (1 + det_scores) / 2
186 |     fuse_cost = 1 - fuse_sim
187 |     return fuse_cost
188 | 
189 | 
190 | def fuse_score(cost_matrix, detections):
191 |     if cost_matrix.size == 0:
192 |         return cost_matrix
193 |     iou_sim = 1 - cost_matrix
194 |     det_scores = np.array([det.score for det in detections])
195 |     det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)
196 |     fuse_sim = iou_sim * det_scores
197 |     fuse_cost = 1 - fuse_sim
198 |     return fuse_cost
199 | 


--------------------------------------------------------------------------------
/face_tracking/tracker/visualize.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | __all__ = ["vis"]
  5 | 
  6 | 
  7 | def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
  8 |     for i in range(len(boxes)):
  9 |         box = boxes[i]
 10 |         cls_id = int(cls_ids[i])
 11 |         score = scores[i]
 12 |         if score < conf:
 13 |             continue
 14 |         x0 = int(box[0])
 15 |         y0 = int(box[1])
 16 |         x1 = int(box[2])
 17 |         y1 = int(box[3])
 18 | 
 19 |         color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
 20 |         text = "{}:{:.1f}%".format(class_names[cls_id], score * 100)
 21 |         txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
 22 |         font = cv2.FONT_HERSHEY_SIMPLEX
 23 | 
 24 |         txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
 25 |         cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
 26 | 
 27 |         txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
 28 |         cv2.rectangle(
 29 |             img,
 30 |             (x0, y0 + 1),
 31 |             (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])),
 32 |             txt_bk_color,
 33 |             -1,
 34 |         )
 35 |         cv2.putText(
 36 |             img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1
 37 |         )
 38 | 
 39 |     return img
 40 | 
 41 | 
 42 | def get_color(idx):
 43 |     idx = idx * 3
 44 |     color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
 45 | 
 46 |     return color
 47 | 
 48 | 
 49 | def plot_tracking(
 50 |     image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0.0, ids2=None, names=[]
 51 | ):
 52 |     im = np.ascontiguousarray(np.copy(image))
 53 |     im_h, im_w = im.shape[:2]
 54 | 
 55 |     top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
 56 | 
 57 |     # text_scale = max(1, image.shape[1] / 1600.)
 58 |     # text_thickness = 2
 59 |     # line_thickness = max(1, int(image.shape[1] / 500.))
 60 |     text_scale = 2
 61 |     text_thickness = 2
 62 |     line_thickness = 3
 63 | 
 64 |     radius = max(5, int(im_w / 140.0))
 65 |     cv2.putText(
 66 |         im,
 67 |         "frame: %d fps: %.2f num: %d" % (frame_id, fps, len(tlwhs)),
 68 |         (0, int(15 * text_scale)),
 69 |         cv2.FONT_HERSHEY_PLAIN,
 70 |         2,
 71 |         (0, 0, 255),
 72 |         thickness=2,
 73 |     )
 74 | 
 75 |     for i, tlwh in enumerate(tlwhs):
 76 |         x1, y1, w, h = tlwh
 77 |         intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
 78 |         obj_id = int(obj_ids[i])
 79 |         id_text = "{}".format(int(obj_id))
 80 |         if (obj_id) in names:
 81 |             id_text = id_text + ": " + names[obj_id]
 82 |         if ids2 is not None:
 83 |             id_text = id_text + ", {}".format(int(ids2[i]))
 84 |         color = get_color(abs(obj_id))
 85 |         cv2.rectangle(
 86 |             im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness
 87 |         )
 88 |         cv2.putText(
 89 |             im,
 90 |             id_text,
 91 |             (intbox[0], intbox[1]),
 92 |             cv2.FONT_HERSHEY_PLAIN,
 93 |             text_scale,
 94 |             (0, 0, 255),
 95 |             thickness=text_thickness,
 96 |         )
 97 |     return im
 98 | 
 99 | 
100 | _COLORS = (
101 |     np.array(
102 |         [
103 |             0.000,
104 |             0.447,
105 |             0.741,
106 |             0.850,
107 |             0.325,
108 |             0.098,
109 |             0.929,
110 |             0.694,
111 |             0.125,
112 |             0.494,
113 |             0.184,
114 |             0.556,
115 |             0.466,
116 |             0.674,
117 |             0.188,
118 |             0.301,
119 |             0.745,
120 |             0.933,
121 |             0.635,
122 |             0.078,
123 |             0.184,
124 |             0.300,
125 |             0.300,
126 |             0.300,
127 |             0.600,
128 |             0.600,
129 |             0.600,
130 |             1.000,
131 |             0.000,
132 |             0.000,
133 |             1.000,
134 |             0.500,
135 |             0.000,
136 |             0.749,
137 |             0.749,
138 |             0.000,
139 |             0.000,
140 |             1.000,
141 |             0.000,
142 |             0.000,
143 |             0.000,
144 |             1.000,
145 |             0.667,
146 |             0.000,
147 |             1.000,
148 |             0.333,
149 |             0.333,
150 |             0.000,
151 |             0.333,
152 |             0.667,
153 |             0.000,
154 |             0.333,
155 |             1.000,
156 |             0.000,
157 |             0.667,
158 |             0.333,
159 |             0.000,
160 |             0.667,
161 |             0.667,
162 |             0.000,
163 |             0.667,
164 |             1.000,
165 |             0.000,
166 |             1.000,
167 |             0.333,
168 |             0.000,
169 |             1.000,
170 |             0.667,
171 |             0.000,
172 |             1.000,
173 |             1.000,
174 |             0.000,
175 |             0.000,
176 |             0.333,
177 |             0.500,
178 |             0.000,
179 |             0.667,
180 |             0.500,
181 |             0.000,
182 |             1.000,
183 |             0.500,
184 |             0.333,
185 |             0.000,
186 |             0.500,
187 |             0.333,
188 |             0.333,
189 |             0.500,
190 |             0.333,
191 |             0.667,
192 |             0.500,
193 |             0.333,
194 |             1.000,
195 |             0.500,
196 |             0.667,
197 |             0.000,
198 |             0.500,
199 |             0.667,
200 |             0.333,
201 |             0.500,
202 |             0.667,
203 |             0.667,
204 |             0.500,
205 |             0.667,
206 |             1.000,
207 |             0.500,
208 |             1.000,
209 |             0.000,
210 |             0.500,
211 |             1.000,
212 |             0.333,
213 |             0.500,
214 |             1.000,
215 |             0.667,
216 |             0.500,
217 |             1.000,
218 |             1.000,
219 |             0.500,
220 |             0.000,
221 |             0.333,
222 |             1.000,
223 |             0.000,
224 |             0.667,
225 |             1.000,
226 |             0.000,
227 |             1.000,
228 |             1.000,
229 |             0.333,
230 |             0.000,
231 |             1.000,
232 |             0.333,
233 |             0.333,
234 |             1.000,
235 |             0.333,
236 |             0.667,
237 |             1.000,
238 |             0.333,
239 |             1.000,
240 |             1.000,
241 |             0.667,
242 |             0.000,
243 |             1.000,
244 |             0.667,
245 |             0.333,
246 |             1.000,
247 |             0.667,
248 |             0.667,
249 |             1.000,
250 |             0.667,
251 |             1.000,
252 |             1.000,
253 |             1.000,
254 |             0.000,
255 |             1.000,
256 |             1.000,
257 |             0.333,
258 |             1.000,
259 |             1.000,
260 |             0.667,
261 |             1.000,
262 |             0.333,
263 |             0.000,
264 |             0.000,
265 |             0.500,
266 |             0.000,
267 |             0.000,
268 |             0.667,
269 |             0.000,
270 |             0.000,
271 |             0.833,
272 |             0.000,
273 |             0.000,
274 |             1.000,
275 |             0.000,
276 |             0.000,
277 |             0.000,
278 |             0.167,
279 |             0.000,
280 |             0.000,
281 |             0.333,
282 |             0.000,
283 |             0.000,
284 |             0.500,
285 |             0.000,
286 |             0.000,
287 |             0.667,
288 |             0.000,
289 |             0.000,
290 |             0.833,
291 |             0.000,
292 |             0.000,
293 |             1.000,
294 |             0.000,
295 |             0.000,
296 |             0.000,
297 |             0.167,
298 |             0.000,
299 |             0.000,
300 |             0.333,
301 |             0.000,
302 |             0.000,
303 |             0.500,
304 |             0.000,
305 |             0.000,
306 |             0.667,
307 |             0.000,
308 |             0.000,
309 |             0.833,
310 |             0.000,
311 |             0.000,
312 |             1.000,
313 |             0.000,
314 |             0.000,
315 |             0.000,
316 |             0.143,
317 |             0.143,
318 |             0.143,
319 |             0.286,
320 |             0.286,
321 |             0.286,
322 |             0.429,
323 |             0.429,
324 |             0.429,
325 |             0.571,
326 |             0.571,
327 |             0.571,
328 |             0.714,
329 |             0.714,
330 |             0.714,
331 |             0.857,
332 |             0.857,
333 |             0.857,
334 |             0.000,
335 |             0.447,
336 |             0.741,
337 |             0.314,
338 |             0.717,
339 |             0.741,
340 |             0.50,
341 |             0.5,
342 |             0,
343 |         ]
344 |     )
345 |     .astype(np.float32)
346 |     .reshape(-1, 3)
347 | )
348 | 


--------------------------------------------------------------------------------
/recognize.py:
--------------------------------------------------------------------------------
  1 | import threading
  2 | import time
  3 | 
  4 | import cv2
  5 | import numpy as np
  6 | import torch
  7 | import yaml
  8 | from torchvision import transforms
  9 | 
 10 | from face_alignment.alignment import norm_crop
 11 | from face_detection.scrfd.detector import SCRFD
 12 | from face_detection.yolov5_face.detector import Yolov5Face
 13 | from face_recognition.arcface.model import iresnet_inference
 14 | from face_recognition.arcface.utils import compare_encodings, read_features
 15 | from face_tracking.tracker.byte_tracker import BYTETracker
 16 | from face_tracking.tracker.visualize import plot_tracking
 17 | 
 18 | # Device configuration
 19 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 20 | 
 21 | # Face detector (choose one)
 22 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
 23 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-face.pt")
 24 | 
 25 | # Face recognizer
 26 | recognizer = iresnet_inference(
 27 |     model_name="r100", path="face_recognition/arcface/weights/arcface_r100.pth", device=device
 28 | )
 29 | 
 30 | # Load precomputed face features and names
 31 | images_names, images_embs = read_features(feature_path="./datasets/face_features/feature")
 32 | 
 33 | # Mapping of face IDs to names
 34 | id_face_mapping = {}
 35 | 
 36 | # Data mapping for tracking information
 37 | data_mapping = {
 38 |     "raw_image": [],
 39 |     "tracking_ids": [],
 40 |     "detection_bboxes": [],
 41 |     "detection_landmarks": [],
 42 |     "tracking_bboxes": [],
 43 | }
 44 | 
 45 | 
 46 | def load_config(file_name):
 47 |     """
 48 |     Load a YAML configuration file.
 49 | 
 50 |     Args:
 51 |         file_name (str): The path to the YAML configuration file.
 52 | 
 53 |     Returns:
 54 |         dict: The loaded configuration as a dictionary.
 55 |     """
 56 |     with open(file_name, "r") as stream:
 57 |         try:
 58 |             return yaml.safe_load(stream)
 59 |         except yaml.YAMLError as exc:
 60 |             print(exc)
 61 | 
 62 | 
 63 | def process_tracking(frame, detector, tracker, args, frame_id, fps):
 64 |     """
 65 |     Process tracking for a frame.
 66 | 
 67 |     Args:
 68 |         frame: The input frame.
 69 |         detector: The face detector.
 70 |         tracker: The object tracker.
 71 |         args (dict): Tracking configuration parameters.
 72 |         frame_id (int): The frame ID.
 73 |         fps (float): Frames per second.
 74 | 
 75 |     Returns:
 76 |         numpy.ndarray: The processed tracking image.
 77 |     """
 78 |     # Face detection and tracking
 79 |     outputs, img_info, bboxes, landmarks = detector.detect_tracking(image=frame)
 80 | 
 81 |     tracking_tlwhs = []
 82 |     tracking_ids = []
 83 |     tracking_scores = []
 84 |     tracking_bboxes = []
 85 | 
 86 |     if outputs is not None:
 87 |         online_targets = tracker.update(
 88 |             outputs, [img_info["height"], img_info["width"]], (128, 128)
 89 |         )
 90 | 
 91 |         for i in range(len(online_targets)):
 92 |             t = online_targets[i]
 93 |             tlwh = t.tlwh
 94 |             tid = t.track_id
 95 |             vertical = tlwh[2] / tlwh[3] > args["aspect_ratio_thresh"]
 96 |             if tlwh[2] * tlwh[3] > args["min_box_area"] and not vertical:
 97 |                 x1, y1, w, h = tlwh
 98 |                 tracking_bboxes.append([x1, y1, x1 + w, y1 + h])
 99 |                 tracking_tlwhs.append(tlwh)
100 |                 tracking_ids.append(tid)
101 |                 tracking_scores.append(t.score)
102 | 
103 |         tracking_image = plot_tracking(
104 |             img_info["raw_img"],
105 |             tracking_tlwhs,
106 |             tracking_ids,
107 |             names=id_face_mapping,
108 |             frame_id=frame_id + 1,
109 |             fps=fps,
110 |         )
111 |     else:
112 |         tracking_image = img_info["raw_img"]
113 | 
114 |     data_mapping["raw_image"] = img_info["raw_img"]
115 |     data_mapping["detection_bboxes"] = bboxes
116 |     data_mapping["detection_landmarks"] = landmarks
117 |     data_mapping["tracking_ids"] = tracking_ids
118 |     data_mapping["tracking_bboxes"] = tracking_bboxes
119 | 
120 |     return tracking_image
121 | 
122 | 
123 | @torch.no_grad()
124 | def get_feature(face_image):
125 |     """
126 |     Extract features from a face image.
127 | 
128 |     Args:
129 |         face_image: The input face image.
130 | 
131 |     Returns:
132 |         numpy.ndarray: The extracted features.
133 |     """
134 |     face_preprocess = transforms.Compose(
135 |         [
136 |             transforms.ToTensor(),
137 |             transforms.Resize((112, 112)),
138 |             transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
139 |         ]
140 |     )
141 | 
142 |     # Convert to RGB
143 |     face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB)
144 | 
145 |     # Preprocess image (BGR)
146 |     face_image = face_preprocess(face_image).unsqueeze(0).to(device)
147 | 
148 |     # Inference to get feature
149 |     emb_img_face = recognizer(face_image).cpu().numpy()
150 | 
151 |     # Convert to array
152 |     images_emb = emb_img_face / np.linalg.norm(emb_img_face)
153 | 
154 |     return images_emb
155 | 
156 | 
157 | def recognition(face_image):
158 |     """
159 |     Recognize a face image.
160 | 
161 |     Args:
162 |         face_image: The input face image.
163 | 
164 |     Returns:
165 |         tuple: A tuple containing the recognition score and name.
166 |     """
167 |     # Get feature from face
168 |     query_emb = get_feature(face_image)
169 | 
170 |     score, id_min = compare_encodings(query_emb, images_embs)
171 |     name = images_names[id_min]
172 |     score = score[0]
173 | 
174 |     return score, name
175 | 
176 | 
177 | def mapping_bbox(box1, box2):
178 |     """
179 |     Calculate the Intersection over Union (IoU) between two bounding boxes.
180 | 
181 |     Args:
182 |         box1 (tuple): The first bounding box (x_min, y_min, x_max, y_max).
183 |         box2 (tuple): The second bounding box (x_min, y_min, x_max, y_max).
184 | 
185 |     Returns:
186 |         float: The IoU score.
187 |     """
188 |     # Calculate the intersection area
189 |     x_min_inter = max(box1[0], box2[0])
190 |     y_min_inter = max(box1[1], box2[1])
191 |     x_max_inter = min(box1[2], box2[2])
192 |     y_max_inter = min(box1[3], box2[3])
193 | 
194 |     intersection_area = max(0, x_max_inter - x_min_inter + 1) * max(
195 |         0, y_max_inter - y_min_inter + 1
196 |     )
197 | 
198 |     # Calculate the area of each bounding box
199 |     area_box1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1)
200 |     area_box2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1)
201 | 
202 |     # Calculate the union area
203 |     union_area = area_box1 + area_box2 - intersection_area
204 | 
205 |     # Calculate IoU
206 |     iou = intersection_area / union_area
207 | 
208 |     return iou
209 | 
210 | 
211 | def tracking(detector, args):
212 |     """
213 |     Face tracking in a separate thread.
214 | 
215 |     Args:
216 |         detector: The face detector.
217 |         args (dict): Tracking configuration parameters.
218 |     """
219 |     # Initialize variables for measuring frame rate
220 |     start_time = time.time_ns()
221 |     frame_count = 0
222 |     fps = -1
223 | 
224 |     # Initialize a tracker and a timer
225 |     tracker = BYTETracker(args=args, frame_rate=30)
226 |     frame_id = 0
227 | 
228 |     cap = cv2.VideoCapture(0)
229 | 
230 |     while True:
231 |         _, img = cap.read()
232 | 
233 |         tracking_image = process_tracking(img, detector, tracker, args, frame_id, fps)
234 | 
235 |         # Calculate and display the frame rate
236 |         frame_count += 1
237 |         if frame_count >= 30:
238 |             fps = 1e9 * frame_count / (time.time_ns() - start_time)
239 |             frame_count = 0
240 |             start_time = time.time_ns()
241 | 
242 |         cv2.imshow("Face Recognition", tracking_image)
243 | 
244 |         # Check for user exit input
245 |         ch = cv2.waitKey(1)
246 |         if ch == 27 or ch == ord("q") or ch == ord("Q"):
247 |             break
248 | 
249 | 
250 | def recognize():
251 |     """Face recognition in a separate thread."""
252 |     while True:
253 |         raw_image = data_mapping["raw_image"]
254 |         detection_landmarks = data_mapping["detection_landmarks"]
255 |         detection_bboxes = data_mapping["detection_bboxes"]
256 |         tracking_ids = data_mapping["tracking_ids"]
257 |         tracking_bboxes = data_mapping["tracking_bboxes"]
258 | 
259 |         for i in range(len(tracking_bboxes)):
260 |             for j in range(len(detection_bboxes)):
261 |                 mapping_score = mapping_bbox(box1=tracking_bboxes[i], box2=detection_bboxes[j])
262 |                 if mapping_score > 0.9:
263 |                     face_alignment = norm_crop(img=raw_image, landmark=detection_landmarks[j])
264 | 
265 |                     score, name = recognition(face_image=face_alignment)
266 |                     if name is not None:
267 |                         if score < 0.25:
268 |                             caption = "UN_KNOWN"
269 |                         else:
270 |                             caption = f"{name}:{score:.2f}"
271 | 
272 |                     id_face_mapping[tracking_ids[i]] = caption
273 | 
274 |                     detection_bboxes = np.delete(detection_bboxes, j, axis=0)
275 |                     detection_landmarks = np.delete(detection_landmarks, j, axis=0)
276 | 
277 |                     break
278 | 
279 |         if tracking_bboxes == []:
280 |             print("Waiting for a person...")
281 | 
282 | 
283 | def main():
284 |     """Main function to start face tracking and recognition threads."""
285 |     file_name = "./face_tracking/config/config_tracking.yaml"
286 |     config_tracking = load_config(file_name)
287 | 
288 |     # Start tracking thread
289 |     thread_track = threading.Thread(
290 |         target=tracking,
291 |         args=(
292 |             detector,
293 |             config_tracking,
294 |         ),
295 |     )
296 |     thread_track.start()
297 | 
298 |     # Start recognition thread
299 |     thread_recognize = threading.Thread(target=recognize)
300 |     thread_recognize.start()
301 | 
302 | 
303 | if __name__ == "__main__":
304 |     main()
305 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | certifi==2023.11.17
 2 | charset-normalizer==3.3.2
 3 | colorama==0.4.6
 4 | coloredlogs==15.0.1
 5 | contourpy==1.2.0
 6 | cycler==0.12.1
 7 | flatbuffers==23.5.26
 8 | fonttools==4.46.0
 9 | humanfriendly==10.0
10 | idna==3.6
11 | imageio==2.33.0
12 | importlib-resources==6.1.1
13 | kiwisolver==1.4.5
14 | lazy_loader==0.3
15 | matplotlib==3.8.2
16 | mpmath==1.3.0
17 | networkx==3.2.1
18 | numpy==1.23.5
19 | onnxruntime==1.16.3
20 | opencv-python==4.8.1.78
21 | packaging==23.2
22 | pandas==2.1.3
23 | Pillow==10.1.0
24 | protobuf==4.25.1
25 | pyparsing==3.1.1
26 | pyreadline3==3.4.1
27 | python-dateutil==2.8.2
28 | pytz==2023.3.post1
29 | PyYAML==6.0.1
30 | requests==2.31.0
31 | scikit-image==0.22.0
32 | scipy==1.11.4
33 | seaborn==0.13.0
34 | six==1.16.0
35 | sympy==1.12
36 | tifffile==2023.9.26
37 | tqdm==4.66.1
38 | typing_extensions==4.8.0
39 | tzdata==2023.3
40 | urllib3==2.1.0
41 | zipp==3.17.0
42 | 


--------------------------------------------------------------------------------
/tracking.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | 
  3 | import cv2
  4 | import yaml
  5 | 
  6 | from face_detection.scrfd.detector import SCRFD
  7 | from face_detection.yolov5_face.detector import Yolov5Face
  8 | from face_tracking.tracker.byte_tracker import BYTETracker
  9 | from face_tracking.tracker.visualize import plot_tracking
 10 | 
 11 | 
 12 | # Function to load a YAML configuration file
 13 | def load_config(file_name):
 14 |     with open(file_name, "r") as stream:
 15 |         try:
 16 |             return yaml.safe_load(stream)
 17 |         except yaml.YAMLError as exc:
 18 |             print(exc)
 19 | 
 20 | 
 21 | # Function for performing object detection and tracking
 22 | def inference(detector, args):
 23 |     # Open a video capture object
 24 |     cap = cv2.VideoCapture(0)
 25 | 
 26 |     # Initialize variables for measuring frame rate
 27 |     start_time = time.time_ns()
 28 |     frame_count = 0
 29 |     fps = -1
 30 | 
 31 |     # Initialize a tracker and a timer
 32 |     tracker = BYTETracker(args=args, frame_rate=30)
 33 |     frame_id = 0
 34 | 
 35 |     while True:
 36 |         # Read a frame from the video capture
 37 |         ret_val, frame = cap.read()
 38 | 
 39 |         if ret_val:
 40 |             # Perform face detection and tracking on the frame
 41 |             outputs, img_info, bboxes, landmarks = detector.detect_tracking(image=frame)
 42 | 
 43 |             if outputs is not None:
 44 |                 online_targets = tracker.update(
 45 |                     outputs, [img_info["height"], img_info["width"]], (128, 128)
 46 |                 )
 47 |                 online_tlwhs = []
 48 |                 online_ids = []
 49 |                 online_scores = []
 50 | 
 51 |                 for t in online_targets:
 52 |                     tlwh = t.tlwh
 53 |                     tid = t.track_id
 54 |                     vertical = tlwh[2] / tlwh[3] > args["aspect_ratio_thresh"]
 55 |                     if tlwh[2] * tlwh[3] > args["min_box_area"] and not vertical:
 56 |                         online_tlwhs.append(tlwh)
 57 |                         online_ids.append(tid)
 58 |                         online_scores.append(t.score)
 59 | 
 60 |                 online_im = plot_tracking(
 61 |                     img_info["raw_img"],
 62 |                     online_tlwhs,
 63 |                     online_ids,
 64 |                     frame_id=frame_id + 1,
 65 |                     fps=fps,
 66 |                 )
 67 |             else:
 68 |                 online_im = img_info["raw_img"]
 69 | 
 70 |             # Calculate and display the frame rate
 71 |             frame_count += 1
 72 |             if frame_count >= 30:
 73 |                 fps = 1e9 * frame_count / (time.time_ns() - start_time)
 74 |                 frame_count = 0
 75 |                 start_time = time.time_ns()
 76 | 
 77 |             # # Draw bounding boxes and landmarks on the frame
 78 |             # for i in range(len(bboxes)):
 79 |             #     # Get location of the face
 80 |             #     x1, y1, x2, y2, score = bboxes[i]
 81 |             #     cv2.rectangle(online_im, (x1, y1), (x2, y2), (200, 200, 230), 2)
 82 | 
 83 |             cv2.imshow("Face Tracking", online_im)
 84 | 
 85 |             # Check for user exit input
 86 |             ch = cv2.waitKey(1)
 87 |             if ch == 27 or ch == ord("q") or ch == ord("Q"):
 88 |                 break
 89 |         else:
 90 |             break
 91 |         frame_id += 1
 92 | 
 93 | 
 94 | def main():
 95 |     file_name = "./face_tracking/config/config_tracking.yaml"
 96 |     config_tracking = load_config(file_name)
 97 |     # detector = Yolov5Face(
 98 |     #     model_file="face_detection/yolov5_face/weights/yolov5m-face.pt"
 99 |     # )
100 |     detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx")
101 | 
102 |     inference(detector=detector, args=config_tracking)
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     main()
107 | 


--------------------------------------------------------------------------------