├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE.md ├── README.md ├── add_persons.py ├── assets ├── bytetrack.png ├── face-detection.gif ├── face-detection2.gif ├── face-recognition.gif ├── result.jpg ├── sequence-diagram.png ├── train_image.jpg └── workflow.png ├── datasets ├── backup │ ├── lam │ │ └── lam.jpg │ └── phuoc │ │ ├── avatar2.png │ │ ├── phuoc.jpg │ │ └── quare.jpg ├── data │ ├── lam │ │ └── 0.jpg │ └── phuoc │ │ ├── 0.jpg │ │ ├── 1.jpg │ │ └── 2.jpg └── face_features │ └── feature.npz ├── detect.py ├── face_align.py ├── face_alignment └── alignment.py ├── face_detection ├── retinaface │ ├── LICENSE.MIT │ ├── README.md │ ├── camera_test.py │ ├── convert_to_onnx.py │ ├── data │ │ ├── FDDB │ │ │ └── img_list.txt │ │ ├── __init__.py │ │ ├── config.py │ │ ├── data_augment.py │ │ └── wider_face.py │ ├── detect.py │ ├── layers │ │ ├── __init__.py │ │ ├── functions │ │ │ └── prior_box.py │ │ └── modules │ │ │ ├── __init__.py │ │ │ └── multibox_loss.py │ ├── models │ │ ├── __init__.py │ │ ├── net.py │ │ └── retinaface.py │ └── utils │ │ ├── __init__.py │ │ ├── box_utils.py │ │ ├── nms │ │ ├── __init__.py │ │ └── py_cpu_nms.py │ │ └── timer.py ├── scrfd │ ├── detector.py │ └── weights │ │ └── README.md └── yolov5_face │ ├── README.md │ ├── detector.py │ ├── models │ ├── __init__.py │ ├── blazeface.yaml │ ├── blazeface_fpn.yaml │ ├── common.py │ ├── experimental.py │ ├── yolo.py │ ├── yolov5l.yaml │ ├── yolov5l6.yaml │ ├── yolov5m.yaml │ ├── yolov5m6.yaml │ ├── yolov5n-0.5.yaml │ ├── yolov5n.yaml │ ├── yolov5n6.yaml │ ├── yolov5s.yaml │ └── yolov5s6.yaml │ ├── utils │ ├── __init__.py │ ├── activations.py │ ├── autoanchor.py │ ├── datasets.py │ ├── face_datasets.py │ ├── general.py │ ├── google_utils.py │ ├── infer_utils.py │ ├── loss.py │ ├── metrics.py │ ├── plots.py │ ├── torch_utils.py │ └── wandb_logging │ │ ├── __init__.py │ │ ├── log_dataset.py │ │ └── wandb_utils.py │ └── weights │ └── README.md ├── face_recognition └── arcface │ ├── model.py │ ├── utils.py │ └── weights │ └── README.md ├── face_tracking ├── config │ └── config_tracking.yaml ├── pretrained │ └── README.md └── tracker │ ├── basetrack.py │ ├── byte_tracker.py │ ├── kalman_filter.py │ ├── matching.py │ └── visualize.py ├── recognize.py ├── requirements.txt └── tracking.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.mp4 2 | 3 | .env 4 | # *.npz 5 | index.faiss 6 | test* 7 | # *.ipynb 8 | NOTE.md 9 | data-elastic-search 10 | qdrant-vector-database 11 | architectures 12 | *.csv 13 | data.csv 14 | 15 | image-search-engine/assets/uploaded_images/* 16 | !image-search-engine/assets/uploaded_images/.gitkeep 17 | 18 | # Model 19 | *.pth 20 | *.pt 21 | *.onnx 22 | 23 | # Byte-compiled / optimized / DLL files 24 | __pycache__/ 25 | *.py[cod] 26 | *$py.class 27 | 28 | # C extensions 29 | *.so 30 | 31 | # Distribution / packaging 32 | .Python 33 | build/ 34 | develop-eggs/ 35 | dist/ 36 | downloads/ 37 | eggs/ 38 | .eggs/ 39 | lib/ 40 | lib64/ 41 | parts/ 42 | sdist/ 43 | var/ 44 | wheels/ 45 | pip-wheel-metadata/ 46 | share/python-wheels/ 47 | *.egg-info/ 48 | .installed.cfg 49 | MANIFEST 50 | 51 | # PyInstaller 52 | # Usually these files are written by a python script from a template 53 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 54 | *.manifest 55 | *.spec 56 | 57 | # Installer logs 58 | pip-log.txt 59 | pip-delete-this-directory.txt 60 | 61 | # Unit test / coverage reports 62 | htmlcov/ 63 | .tox/ 64 | .nox/ 65 | .coverage 66 | .coverage.* 67 | .cache 68 | nosetests.xml 69 | coverage.xml 70 | *.cover 71 | *.py,cover 72 | .hypothesis/ 73 | .pytest_cache/ 74 | 75 | # Translations 76 | *.mo 77 | *.pot 78 | 79 | # Django stuff: 80 | *.log 81 | local_settings.py 82 | db.sqlite3 83 | db.sqlite3-journal 84 | 85 | # Flask stuff: 86 | instance/ 87 | .webassets-cache 88 | 89 | # Scrapy stuff: 90 | .scrapy 91 | 92 | # Sphinx documentation 93 | docs/_build/ 94 | 95 | # PyBuilder 96 | target/ 97 | 98 | # Jupyter Notebook 99 | .ipynb_checkpoints 100 | 101 | # IPython 102 | profile_default/ 103 | ipython_config.py 104 | 105 | # pyenv 106 | .python-version 107 | 108 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 109 | __pypackages__/ 110 | 111 | # Celery stuff 112 | celerybeat-schedule 113 | celerybeat.pid 114 | 115 | # SageMath parsed files 116 | *.sage.py 117 | 118 | # Environment variable 119 | # .env 120 | # .env* 121 | 122 | # Environments 123 | .venv/ 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # It's better to unpack these files and commit the raw source because 150 | # git has its own built in compression methods. 151 | *.7z 152 | *.jar 153 | *.rar 154 | *.zip 155 | *.gz 156 | *.gzip 157 | *.tgz 158 | *.bzip 159 | *.bzip2 160 | *.bz2 161 | *.xz 162 | *.lzma 163 | *.cab 164 | *.xar 165 | 166 | # Packing-only formats 167 | *.iso 168 | *.tar 169 | 170 | # Package management formats 171 | *.dmg 172 | *.xpi 173 | *.gem 174 | *.egg 175 | *.deb 176 | *.rpm 177 | *.msi 178 | *.msm 179 | *.msp 180 | *.txz 181 | 182 | # Backup 183 | *.bak 184 | *.gho 185 | *.ori 186 | *.orig 187 | *.tmp 188 | 189 | # GPG 190 | secring.* 191 | 192 | # OpenSSL-related files best not committed 193 | ## Certificate Authority 194 | *.ca 195 | 196 | ## Certificate 197 | *.crt 198 | 199 | ## Certificate Sign Request 200 | *.csr 201 | 202 | ## Certificate 203 | *.der 204 | 205 | ## Key database file 206 | *.kdb 207 | 208 | ## OSCP request data 209 | *.org 210 | 211 | ## PKCS #12 212 | *.p12 213 | 214 | ## PEM-encoded certificate data 215 | *.pem 216 | 217 | ## Random number seed 218 | *.rnd 219 | 220 | ## SSLeay data 221 | *.ssleay 222 | 223 | ## S/MIME message 224 | *.smime 225 | 226 | # ide 227 | .idea/ 228 | 229 | # others 230 | migrations/ 231 | 232 | # BBDD 233 | *.db 234 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: "^\ 2 | (third-party/.*)\ 3 | " 4 | 5 | repos: 6 | - repo: https://github.com/pre-commit/pre-commit-hooks 7 | rev: v4.1.0 8 | hooks: 9 | - id: check-merge-conflict # checks for some markers such as "<<<<<<<", "=======", and ">>>>>>>". 10 | - id: detect-private-key # detects the presence of private keys. 11 | - id: end-of-file-fixer # ensures that a file is either empty, or ends with one newline. 12 | - id: requirements-txt-fixer # sorts entries in requirements.txt. 13 | - id: trailing-whitespace # trims trailing whitespace at the end of lines. 14 | 15 | # Format YAML and other files 16 | - repo: https://github.com/pre-commit/mirrors-prettier 17 | rev: v2.5.1 18 | hooks: 19 | - id: prettier 20 | files: \.(js|ts|jsx|tsx|css|less|html|json|markdown|md|yaml|yml)$ 21 | 22 | # Sort the order of importing libs 23 | - repo: https://github.com/PyCQA/isort 24 | rev: 5.12.0 25 | hooks: 26 | - id: isort 27 | args: [--profile=black, --line-length=100] 28 | 29 | # Format Python files 30 | - repo: https://github.com/psf/black 31 | rev: 23.7.0 32 | hooks: 33 | - id: black 34 | args: [--line-length=100] 35 | 36 | # - repo: https://github.com/PyCQA/flake8 37 | # rev: 6.1.0 38 | # hooks: 39 | # - id: flake8 40 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Vector Nguyễn 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Real-Time Face Recognition 2 | 3 |

4 | Face Recognition 5 |
6 | Face Recognition 7 |

8 | 9 | ## Table of Contents 10 | 11 | - [Architecture](#architecture) 12 | - [How to use](#how-to-use) 13 | - [Create Environment and Install Packages](#create-environment-and-install-packages) 14 | - [Add new persons to datasets](#add-new-persons-to-datasets) 15 | - [Technology](#technology) 16 | - [Face Detection](#face-detection) 17 | - [Face Recognition](#face-recognition) 18 | - [Face Tracking](#face-tracking) 19 | - [Matching Algorithm](#matching-algorithm) 20 | - [Reference](#reference) 21 | 22 | ## Architecture 23 | 24 |

25 | Sequence Diagram 26 |
27 | Sequence Diagram 28 |

29 | 30 | ## How to use 31 | 32 | ### Create Environment and Install Packages 33 | 34 | ```shell 35 | conda create -n face-dev python=3.9 36 | ``` 37 | 38 | ```shell 39 | conda activate face-dev 40 | ``` 41 | 42 | ```shell 43 | pip install torch==1.9.1+cpu torchvision==0.10.1+cpu torchaudio==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | ### Add new persons to datasets 48 | 49 | 1. **Create a folder with the folder name being the name of the person** 50 | 51 | ``` 52 | datasets/ 53 | ├── backup 54 | ├── data 55 | ├── face_features 56 | └── new_persons 57 | ├── name-person1 58 | └── name-person2 59 | ``` 60 | 61 | 2. **Add the person's photo in the folder** 62 | 63 | ``` 64 | datasets/ 65 | ├── backup 66 | ├── data 67 | ├── face_features 68 | └── new_persons 69 | ├── name-person1 70 | │ └── image1.jpg 71 | │ └── image2.jpg 72 | └── name-person2 73 | └── image1.jpg 74 | └── image2.jpg 75 | ``` 76 | 77 | 3. **Run to add new persons** 78 | 79 | ```shell 80 | python add_persons.py 81 | ``` 82 | 83 | 4. **Run to recognize** 84 | 85 | ```shell 86 | python recognize.py 87 | ``` 88 | 89 | ## Technology 90 | 91 | ### Face Detection 92 | 93 | 1. **Retinaface** 94 | 95 | - Retinaface is a powerful face detection algorithm known for its accuracy and speed. It utilizes a single deep convolutional network to detect faces in an image with high precision. 96 | 97 | 2. **Yolov5-face** 98 | 99 | - Yolov5-face is based on the YOLO (You Only Look Once) architecture, specializing in face detection. It provides real-time face detection with a focus on efficiency and accuracy. 100 | 101 | 3. **SCRFD** 102 | - SCRFD (Single-Shot Scale-Aware Face Detector) is designed for real-time face detection across various scales. It is particularly effective in detecting faces at different resolutions within the same image. 103 | 104 | ### Face Recognition 105 | 106 | 1. **ArcFace** 107 | 108 | - ArcFace is a state-of-the-art face recognition algorithm that focuses on learning highly discriminative features for face verification and identification. It is known for its robustness to variations in lighting, pose, and facial expressions. 109 | 110 |

111 | ArcFace 112 |
113 | ArcFace 114 |

115 | 116 | ### Face Tracking 117 | 118 | 1. **ByteTrack** 119 | 120 |

121 | ByteTrack 122 |
123 | ByteTrack is a simple, fast and strong multi-object tracker. 124 |

125 | 126 | ### Matching Algorithm 127 | 128 | 1. **Cosine Similarity Algorithm** 129 | 130 | - The Cosine Similarity Algorithm is employed for matching faces based on the cosine of the angle between their feature vectors. It measures the similarity between two faces' feature representations, providing an effective approach for face recognition. 131 | 132 |

133 | Cosine Similarity Algorithm 134 |
135 | Cosine Similarity Algorithm 136 |

137 | 138 | ## Reference 139 | 140 | - [ByteTrack](https://github.com/ifzhang/ByteTrack) 141 | - [Yolov5-face](https://github.com/deepcam-cn/yolov5-face) 142 | - [InsightFace - ArcFace](https://github.com/deepinsight/insightface/tree/master/recognition/arcface_torch) 143 | - [InsightFace-REST](https://github.com/SthPhoenix/InsightFace-REST) 144 | -------------------------------------------------------------------------------- /add_persons.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import shutil 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | from torchvision import transforms 9 | 10 | from face_detection.scrfd.detector import SCRFD 11 | from face_detection.yolov5_face.detector import Yolov5Face 12 | from face_recognition.arcface.model import iresnet_inference 13 | from face_recognition.arcface.utils import read_features 14 | 15 | # Check if CUDA is available and set the device accordingly 16 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 17 | 18 | # Initialize the face detector (Choose one of the detectors) 19 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-face.pt") 20 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx") 21 | 22 | # Initialize the face recognizer 23 | recognizer = iresnet_inference( 24 | model_name="r100", path="face_recognition/arcface/weights/arcface_r100.pth", device=device 25 | ) 26 | 27 | 28 | @torch.no_grad() 29 | def get_feature(face_image): 30 | """ 31 | Extract facial features from an image using the face recognition model. 32 | 33 | Args: 34 | face_image (numpy.ndarray): Input facial image. 35 | 36 | Returns: 37 | numpy.ndarray: Extracted facial features. 38 | """ 39 | # Define a series of image preprocessing steps 40 | face_preprocess = transforms.Compose( 41 | [ 42 | transforms.ToTensor(), 43 | transforms.Resize((112, 112)), 44 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), 45 | ] 46 | ) 47 | 48 | # Convert the image to RGB format 49 | face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB) 50 | 51 | # Apply the defined preprocessing to the image 52 | face_image = face_preprocess(face_image).unsqueeze(0).to(device) 53 | 54 | # Use the model to obtain facial features 55 | emb_img_face = recognizer(face_image)[0].cpu().numpy() 56 | 57 | # Normalize the features 58 | images_emb = emb_img_face / np.linalg.norm(emb_img_face) 59 | return images_emb 60 | 61 | 62 | def add_persons(backup_dir, add_persons_dir, faces_save_dir, features_path): 63 | """ 64 | Add a new person to the face recognition database. 65 | 66 | Args: 67 | backup_dir (str): Directory to save backup data. 68 | add_persons_dir (str): Directory containing images of the new person. 69 | faces_save_dir (str): Directory to save the extracted faces. 70 | features_path (str): Path to save face features. 71 | """ 72 | # Initialize lists to store names and features of added images 73 | images_name = [] 74 | images_emb = [] 75 | 76 | # Read the folder with images of the new person, extract faces, and save them 77 | for name_person in os.listdir(add_persons_dir): 78 | person_image_path = os.path.join(add_persons_dir, name_person) 79 | 80 | # Create a directory to save the faces of the person 81 | person_face_path = os.path.join(faces_save_dir, name_person) 82 | os.makedirs(person_face_path, exist_ok=True) 83 | 84 | for image_name in os.listdir(person_image_path): 85 | if image_name.endswith(("png", "jpg", "jpeg")): 86 | input_image = cv2.imread(os.path.join(person_image_path, image_name)) 87 | 88 | # Detect faces and landmarks using the face detector 89 | bboxes, landmarks = detector.detect(image=input_image) 90 | 91 | # Extract faces 92 | for i in range(len(bboxes)): 93 | # Get the number of files in the person's path 94 | number_files = len(os.listdir(person_face_path)) 95 | 96 | # Get the location of the face 97 | x1, y1, x2, y2, score = bboxes[i] 98 | 99 | # Extract the face from the image 100 | face_image = input_image[y1:y2, x1:x2] 101 | 102 | # Path to save the face 103 | path_save_face = os.path.join(person_face_path, f"{number_files}.jpg") 104 | 105 | # Save the face to the database 106 | cv2.imwrite(path_save_face, face_image) 107 | 108 | # Extract features from the face 109 | images_emb.append(get_feature(face_image=face_image)) 110 | images_name.append(name_person) 111 | 112 | # Check if no new person is found 113 | if images_emb == [] and images_name == []: 114 | print("No new person found!") 115 | return None 116 | 117 | # Convert lists to arrays 118 | images_emb = np.array(images_emb) 119 | images_name = np.array(images_name) 120 | 121 | # Read existing features if available 122 | features = read_features(features_path) 123 | 124 | if features is not None: 125 | # Unpack existing features 126 | old_images_name, old_images_emb = features 127 | 128 | # Combine new features with existing features 129 | images_name = np.hstack((old_images_name, images_name)) 130 | images_emb = np.vstack((old_images_emb, images_emb)) 131 | 132 | print("Update features!") 133 | 134 | # Save the combined features 135 | np.savez_compressed(features_path, images_name=images_name, images_emb=images_emb) 136 | 137 | # Move the data of the new person to the backup data directory 138 | for sub_dir in os.listdir(add_persons_dir): 139 | dir_to_move = os.path.join(add_persons_dir, sub_dir) 140 | shutil.move(dir_to_move, backup_dir, copy_function=shutil.copytree) 141 | 142 | print("Successfully added new person!") 143 | 144 | 145 | if __name__ == "__main__": 146 | # Parse command line arguments 147 | parser = argparse.ArgumentParser() 148 | parser.add_argument( 149 | "--backup-dir", 150 | type=str, 151 | default="./datasets/backup", 152 | help="Directory to save person data.", 153 | ) 154 | parser.add_argument( 155 | "--add-persons-dir", 156 | type=str, 157 | default="./datasets/new_persons", 158 | help="Directory to add new persons.", 159 | ) 160 | parser.add_argument( 161 | "--faces-save-dir", 162 | type=str, 163 | default="./datasets/data/", 164 | help="Directory to save faces.", 165 | ) 166 | parser.add_argument( 167 | "--features-path", 168 | type=str, 169 | default="./datasets/face_features/feature", 170 | help="Path to save face features.", 171 | ) 172 | opt = parser.parse_args() 173 | 174 | # Run the main function 175 | add_persons(**vars(opt)) 176 | -------------------------------------------------------------------------------- /assets/bytetrack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/bytetrack.png -------------------------------------------------------------------------------- /assets/face-detection.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-detection.gif -------------------------------------------------------------------------------- /assets/face-detection2.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-detection2.gif -------------------------------------------------------------------------------- /assets/face-recognition.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/face-recognition.gif -------------------------------------------------------------------------------- /assets/result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/result.jpg -------------------------------------------------------------------------------- /assets/sequence-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/sequence-diagram.png -------------------------------------------------------------------------------- /assets/train_image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/train_image.jpg -------------------------------------------------------------------------------- /assets/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/assets/workflow.png -------------------------------------------------------------------------------- /datasets/backup/lam/lam.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/lam/lam.jpg -------------------------------------------------------------------------------- /datasets/backup/phuoc/avatar2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/avatar2.png -------------------------------------------------------------------------------- /datasets/backup/phuoc/phuoc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/phuoc.jpg -------------------------------------------------------------------------------- /datasets/backup/phuoc/quare.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/backup/phuoc/quare.jpg -------------------------------------------------------------------------------- /datasets/data/lam/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/lam/0.jpg -------------------------------------------------------------------------------- /datasets/data/phuoc/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/0.jpg -------------------------------------------------------------------------------- /datasets/data/phuoc/1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/1.jpg -------------------------------------------------------------------------------- /datasets/data/phuoc/2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/data/phuoc/2.jpg -------------------------------------------------------------------------------- /datasets/face_features/feature.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/datasets/face_features/feature.npz -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import cv2 4 | 5 | from face_detection.scrfd.detector import SCRFD 6 | from face_detection.yolov5_face.detector import Yolov5Face 7 | 8 | # Initialize the face detector 9 | detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5m-face.pt") 10 | # detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx") 11 | 12 | 13 | def main(): 14 | # Open the camera 15 | cap = cv2.VideoCapture(0) 16 | 17 | # Initialize variables for measuring frame rate 18 | start = time.time_ns() 19 | frame_count = 0 20 | fps = -1 21 | 22 | # Save video 23 | frame_width = int(cap.get(3)) 24 | frame_height = int(cap.get(4)) 25 | size = (frame_width, frame_height) 26 | video = cv2.VideoWriter( 27 | "results/face-detection.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 30, size 28 | ) 29 | 30 | # Read frames from the camera 31 | while True: 32 | # Capture a frame from the camera 33 | _, frame = cap.read() 34 | 35 | # Get faces and landmarks using the face detector 36 | bboxes, landmarks = detector.detect(image=frame) 37 | h, w, c = frame.shape 38 | 39 | tl = 1 or round(0.002 * (h + w) / 2) + 1 # Line and font thickness 40 | clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)] 41 | 42 | # Draw bounding boxes and landmarks on the frame 43 | for i in range(len(bboxes)): 44 | # Get location of the face 45 | x1, y1, x2, y2, score = bboxes[i] 46 | cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2) 47 | 48 | # Draw facial landmarks 49 | for id, key_point in enumerate(landmarks[i]): 50 | cv2.circle(frame, tuple(key_point), tl + 1, clors[id], -1) 51 | 52 | # Calculate and display the frame rate 53 | frame_count += 1 54 | if frame_count >= 30: 55 | end = time.time_ns() 56 | fps = 1e9 * frame_count / (end - start) 57 | frame_count = 0 58 | start = time.time_ns() 59 | 60 | if fps > 0: 61 | fps_label = "FPS: %.2f" % fps 62 | cv2.putText( 63 | frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2 64 | ) 65 | 66 | # Save the frame to the video 67 | video.write(frame) 68 | 69 | # Show the result in a window 70 | cv2.imshow("Face Detection", frame) 71 | 72 | # Press 'Q' on the keyboard to exit 73 | if cv2.waitKey(25) & 0xFF == ord("q"): 74 | break 75 | 76 | # Release video and camera, and close all OpenCV windows 77 | video.release() 78 | cap.release() 79 | cv2.destroyAllWindows() 80 | cv2.waitKey(0) 81 | 82 | 83 | if __name__ == "__main__": 84 | main() 85 | -------------------------------------------------------------------------------- /face_align.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import cv2 4 | 5 | from face_alignment.alignment import norm_crop 6 | from face_detection.scrfd.detector import SCRFD 7 | from face_detection.yolov5_face.detector import Yolov5Face 8 | 9 | # Initialize the face detector 10 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-0.5.pt") 11 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx") 12 | 13 | 14 | def main(): 15 | # Open the camera 16 | cap = cv2.VideoCapture(0) 17 | 18 | # Initialize variables for measuring frame rate 19 | start = time.time_ns() 20 | frame_count = 0 21 | fps = -1 22 | 23 | # Save video 24 | frame_width = int(cap.get(3)) 25 | frame_height = int(cap.get(4)) 26 | size = (frame_width, frame_height) 27 | video = cv2.VideoWriter("results/face-detection.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 30, size) 28 | 29 | # Read frames from the camera 30 | while True: 31 | # Capture a frame from the camera 32 | _, frame = cap.read() 33 | 34 | # Get faces and landmarks using the face detector 35 | bboxes, landmarks = detector.detect(image=frame) 36 | h, w, c = frame.shape 37 | 38 | tl = 1 or round(0.002 * (h + w) / 2) + 1 # Line and font thickness 39 | clors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255)] 40 | 41 | # Draw bounding boxes and landmarks on the frame 42 | for i in range(len(bboxes)): 43 | # Get location of the face 44 | x1, y1, x2, y2, score = bboxes[i] 45 | face = frame[y1:y2, x1:x2] 46 | cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 146, 230), 2) 47 | 48 | # Draw facial landmarks 49 | for id, key_point in enumerate(landmarks[i]): 50 | cv2.circle(frame, tuple(key_point), tl + 1, clors[id], -1) 51 | 52 | align = norm_crop(frame, landmarks[i]) 53 | 54 | # Calculate and display the frame rate 55 | frame_count += 1 56 | if frame_count >= 30: 57 | end = time.time_ns() 58 | fps = 1e9 * frame_count / (end - start) 59 | frame_count = 0 60 | start = time.time_ns() 61 | 62 | if fps > 0: 63 | fps_label = "FPS: %.2f" % fps 64 | cv2.putText(frame, fps_label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2) 65 | 66 | # Save the frame to the video 67 | video.write(frame) 68 | 69 | # Show the result in a window 70 | cv2.imshow("Face Detection", frame) 71 | cv2.imshow("Face align", align) 72 | 73 | # Press 'Q' on the keyboard to exit 74 | if cv2.waitKey(25) & 0xFF == ord("q"): 75 | break 76 | 77 | # Release video and camera, and close all OpenCV windows 78 | video.release() 79 | cap.release() 80 | cv2.destroyAllWindows() 81 | cv2.waitKey(0) 82 | 83 | 84 | if __name__ == "__main__": 85 | main() 86 | -------------------------------------------------------------------------------- /face_alignment/alignment.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from skimage import transform as trans 4 | 5 | # Define a standard set of destination landmarks for ArcFace alignment 6 | arcface_dst = np.array( 7 | [ 8 | [38.2946, 51.6963], 9 | [73.5318, 51.5014], 10 | [56.0252, 71.7366], 11 | [41.5493, 92.3655], 12 | [70.7299, 92.2041], 13 | ], 14 | dtype=np.float32, 15 | ) 16 | 17 | 18 | def estimate_norm(lmk, image_size=112, mode="arcface"): 19 | """ 20 | Estimate the transformation matrix for aligning facial landmarks. 21 | 22 | Args: 23 | lmk (numpy.ndarray): 2D array of shape (5, 2) representing facial landmarks. 24 | image_size (int): Desired output image size. 25 | mode (str): Alignment mode, currently only "arcface" is supported. 26 | 27 | Returns: 28 | numpy.ndarray: Transformation matrix (2x3) for aligning facial landmarks. 29 | """ 30 | # Check input conditions 31 | assert lmk.shape == (5, 2) 32 | assert image_size % 112 == 0 or image_size % 128 == 0 33 | 34 | # Adjust ratio and x-coordinate difference based on image size 35 | if image_size % 112 == 0: 36 | ratio = float(image_size) / 112.0 37 | diff_x = 0 38 | else: 39 | ratio = float(image_size) / 128.0 40 | diff_x = 8.0 * ratio 41 | 42 | # Scale and shift the destination landmarks 43 | dst = arcface_dst * ratio 44 | dst[:, 0] += diff_x 45 | 46 | # Estimate the similarity transformation 47 | tform = trans.SimilarityTransform() 48 | tform.estimate(lmk, dst) 49 | M = tform.params[0:2, :] 50 | 51 | return M 52 | 53 | 54 | def norm_crop(img, landmark, image_size=112, mode="arcface"): 55 | """ 56 | Normalize and crop a facial image based on provided landmarks. 57 | 58 | Args: 59 | img (numpy.ndarray): Input facial image. 60 | landmark (numpy.ndarray): 2D array of shape (5, 2) representing facial landmarks. 61 | image_size (int): Desired output image size. 62 | mode (str): Alignment mode, currently only "arcface" is supported. 63 | 64 | Returns: 65 | numpy.ndarray: Normalized and cropped facial image. 66 | """ 67 | # Estimate the transformation matrix 68 | M = estimate_norm(landmark, image_size, mode) 69 | 70 | # Apply the affine transformation to the image 71 | warped = cv2.warpAffine(img, M, (image_size, image_size), borderValue=0.0) 72 | 73 | return warped 74 | -------------------------------------------------------------------------------- /face_detection/retinaface/LICENSE.MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /face_detection/retinaface/README.md: -------------------------------------------------------------------------------- 1 |

Get weights:

2 | [Google drive](https://drive.google.com/drive/folders/1oZRSG0ZegbVkVwUd8wUIQx8W7yfZ_ki1?usp=drive_link) 3 | 4 |

Run

5 | 6 |

Using Camera

7 | backbone: resnet50 8 | 9 | ``` 10 | python camera_test.py --trained_model weights/Resnet50_Final.pth --network resnet50 --cpu 11 | ``` 12 | 13 | backbone: mobilenet0.25 14 | 15 | ``` 16 | python camera_test.py --trained_model weights/mobilenet0.25_Final.pth --network mobile0.25 --cpu 17 | ``` 18 | 19 |

Using Image

20 | change image in ./curve, change file path in detect.py (line 87) 21 | 22 | backbone: resnet50 23 | 24 | ``` 25 | python detect.py --trained_model weights/Resnet50_Final.pth --network resnet50 --cpu 26 | ``` 27 | 28 | backbone: mobilenet0.25 29 | 30 | ``` 31 | python detect.py --trained_model weights/mobilenet0.25_Final.pth --network mobile0.25 --cpu 32 | ``` 33 | -------------------------------------------------------------------------------- /face_detection/retinaface/camera_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import os 5 | import time 6 | 7 | import cv2 8 | import numpy as np 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | from data import cfg_mnet, cfg_re50 12 | from layers.functions.prior_box import PriorBox 13 | from models.retinaface import RetinaFace 14 | from utils.box_utils import decode, decode_landm 15 | from utils.nms.py_cpu_nms import py_cpu_nms 16 | 17 | parser = argparse.ArgumentParser(description="Retinaface") 18 | 19 | parser.add_argument( 20 | "-m", 21 | "--trained_model", 22 | default="./weights/Resnet50_Final.pth", 23 | type=str, 24 | help="Trained state_dict file path to open", 25 | ) 26 | parser.add_argument( 27 | "--network", default="resnet50", help="Backbone network mobile0.25 or resnet50" 28 | ) 29 | parser.add_argument( 30 | "--cpu", action="store_true", default=False, help="Use cpu inference" 31 | ) 32 | parser.add_argument( 33 | "--confidence_threshold", default=0.02, type=float, help="confidence_threshold" 34 | ) 35 | parser.add_argument("--top_k", default=5000, type=int, help="top_k") 36 | parser.add_argument("--nms_threshold", default=0.4, type=float, help="nms_threshold") 37 | parser.add_argument("--keep_top_k", default=750, type=int, help="keep_top_k") 38 | parser.add_argument( 39 | "-s", 40 | "--save_image", 41 | action="store_true", 42 | default=True, 43 | help="show detection results", 44 | ) 45 | parser.add_argument( 46 | "--vis_thres", default=0.6, type=float, help="visualization_threshold" 47 | ) 48 | args = parser.parse_args() 49 | 50 | 51 | def check_keys(model, pretrained_state_dict): 52 | ckpt_keys = set(pretrained_state_dict.keys()) 53 | model_keys = set(model.state_dict().keys()) 54 | used_pretrained_keys = model_keys & ckpt_keys 55 | unused_pretrained_keys = ckpt_keys - model_keys 56 | missing_keys = model_keys - ckpt_keys 57 | print("Missing keys:{}".format(len(missing_keys))) 58 | print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys))) 59 | print("Used keys:{}".format(len(used_pretrained_keys))) 60 | assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint" 61 | return True 62 | 63 | 64 | def remove_prefix(state_dict, prefix): 65 | """Old style model is stored with all names of parameters sharing common prefix 'module.'""" 66 | print("remove prefix '{}'".format(prefix)) 67 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 68 | return {f(key): value for key, value in state_dict.items()} 69 | 70 | 71 | def load_model(model, pretrained_path, load_to_cpu): 72 | print("Loading pretrained model from {}".format(pretrained_path)) 73 | if load_to_cpu: 74 | pretrained_dict = torch.load( 75 | pretrained_path, map_location=lambda storage, loc: storage 76 | ) 77 | else: 78 | device = torch.cuda.current_device() 79 | pretrained_dict = torch.load( 80 | pretrained_path, map_location=lambda storage, loc: storage.cuda(device) 81 | ) 82 | if "state_dict" in pretrained_dict.keys(): 83 | pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.") 84 | else: 85 | pretrained_dict = remove_prefix(pretrained_dict, "module.") 86 | check_keys(model, pretrained_dict) 87 | model.load_state_dict(pretrained_dict, strict=False) 88 | return model 89 | 90 | 91 | if __name__ == "__main__": 92 | torch.set_grad_enabled(False) 93 | cfg = None 94 | if args.network == "mobile0.25": 95 | cfg = cfg_mnet 96 | elif args.network == "resnet50": 97 | cfg = cfg_re50 98 | # net and model 99 | net = RetinaFace(cfg=cfg, phase="test") 100 | net = load_model(net, args.trained_model, args.cpu) 101 | net.eval() 102 | print("Finished loading model!") 103 | print(net) 104 | cudnn.benchmark = True 105 | device = torch.device("cpu" if args.cpu else "cuda") 106 | net = net.to(device) 107 | 108 | resize = 1 109 | 110 | cam = cv2.VideoCapture(0) 111 | fps = cam.get(cv2.CAP_PROP_FPS) 112 | print(fps) 113 | 114 | # testing begin 115 | # for i in range(10): 116 | # image_path = "./curve/test.jpg" 117 | # img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) 118 | 119 | while True: 120 | _, img_raw = cam.read() 121 | 122 | img = np.float32(img_raw) 123 | 124 | im_height, im_width, _ = img.shape 125 | scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) 126 | img -= (104, 117, 123) 127 | img = img.transpose(2, 0, 1) 128 | img = torch.from_numpy(img).unsqueeze(0) 129 | img = img.to(device) 130 | scale = scale.to(device) 131 | 132 | tic = time.time() 133 | loc, conf, landms = net(img) # forward pass 134 | print("net forward time: {:.4f}".format(time.time() - tic)) 135 | 136 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 137 | priors = priorbox.forward() 138 | priors = priors.to(device) 139 | prior_data = priors.data 140 | boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) 141 | boxes = boxes * scale / resize 142 | boxes = boxes.cpu().numpy() 143 | scores = conf.squeeze(0).data.cpu().numpy()[:, 1] 144 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"]) 145 | scale1 = torch.Tensor( 146 | [ 147 | img.shape[3], 148 | img.shape[2], 149 | img.shape[3], 150 | img.shape[2], 151 | img.shape[3], 152 | img.shape[2], 153 | img.shape[3], 154 | img.shape[2], 155 | img.shape[3], 156 | img.shape[2], 157 | ] 158 | ) 159 | scale1 = scale1.to(device) 160 | landms = landms * scale1 / resize 161 | landms = landms.cpu().numpy() 162 | 163 | # ignore low scores 164 | inds = np.where(scores > args.confidence_threshold)[0] 165 | boxes = boxes[inds] 166 | landms = landms[inds] 167 | scores = scores[inds] 168 | 169 | # keep top-K before NMS 170 | order = scores.argsort()[::-1][: args.top_k] 171 | boxes = boxes[order] 172 | landms = landms[order] 173 | scores = scores[order] 174 | 175 | # do NMS 176 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 177 | keep = py_cpu_nms(dets, args.nms_threshold) 178 | # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) 179 | dets = dets[keep, :] 180 | landms = landms[keep] 181 | 182 | # keep top-K faster NMS 183 | dets = dets[: args.keep_top_k, :] 184 | landms = landms[: args.keep_top_k, :] 185 | 186 | dets = np.concatenate((dets, landms), axis=1) 187 | 188 | # show image 189 | if args.save_image: 190 | for b in dets: 191 | if b[4] < args.vis_thres: 192 | continue 193 | text = "{:.4f}".format(b[4]) 194 | b = list(map(int, b)) 195 | cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) 196 | cx = b[0] 197 | cy = b[1] + 12 198 | cv2.putText( 199 | img_raw, 200 | text, 201 | (cx, cy), 202 | cv2.FONT_HERSHEY_DUPLEX, 203 | 0.5, 204 | (255, 255, 255), 205 | ) 206 | 207 | # landms 208 | # cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) 209 | # cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) 210 | # cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) 211 | # cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) 212 | # cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) 213 | 214 | # save image 215 | name = "test.jpg" 216 | cv2.imwrite(name, img_raw) 217 | cv2.imshow("camera", img_raw) 218 | -------------------------------------------------------------------------------- /face_detection/retinaface/convert_to_onnx.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import os 5 | 6 | import cv2 7 | import numpy as np 8 | import torch 9 | import torch.backends.cudnn as cudnn 10 | from data import cfg_mnet, cfg_re50 11 | from layers.functions.prior_box import PriorBox 12 | from models.retinaface import RetinaFace 13 | from utils.box_utils import decode, decode_landm 14 | from utils.nms.py_cpu_nms import py_cpu_nms 15 | from utils.timer import Timer 16 | 17 | parser = argparse.ArgumentParser(description="Test") 18 | parser.add_argument( 19 | "-m", 20 | "--trained_model", 21 | default="./weights/mobilenet0.25_Final.pth", 22 | type=str, 23 | help="Trained state_dict file path to open", 24 | ) 25 | parser.add_argument( 26 | "--network", default="mobile0.25", help="Backbone network mobile0.25 or resnet50" 27 | ) 28 | parser.add_argument( 29 | "--long_side", 30 | default=640, 31 | help="when origin_size is false, long_side is scaled size(320 or 640 for long side)", 32 | ) 33 | parser.add_argument( 34 | "--cpu", action="store_true", default=True, help="Use cpu inference" 35 | ) 36 | 37 | args = parser.parse_args() 38 | 39 | 40 | def check_keys(model, pretrained_state_dict): 41 | ckpt_keys = set(pretrained_state_dict.keys()) 42 | model_keys = set(model.state_dict().keys()) 43 | used_pretrained_keys = model_keys & ckpt_keys 44 | unused_pretrained_keys = ckpt_keys - model_keys 45 | missing_keys = model_keys - ckpt_keys 46 | print("Missing keys:{}".format(len(missing_keys))) 47 | print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys))) 48 | print("Used keys:{}".format(len(used_pretrained_keys))) 49 | assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint" 50 | return True 51 | 52 | 53 | def remove_prefix(state_dict, prefix): 54 | """Old style model is stored with all names of parameters sharing common prefix 'module.'""" 55 | print("remove prefix '{}'".format(prefix)) 56 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 57 | return {f(key): value for key, value in state_dict.items()} 58 | 59 | 60 | def load_model(model, pretrained_path, load_to_cpu): 61 | print("Loading pretrained model from {}".format(pretrained_path)) 62 | if load_to_cpu: 63 | pretrained_dict = torch.load( 64 | pretrained_path, map_location=lambda storage, loc: storage 65 | ) 66 | else: 67 | device = torch.cuda.current_device() 68 | pretrained_dict = torch.load( 69 | pretrained_path, map_location=lambda storage, loc: storage.cuda(device) 70 | ) 71 | if "state_dict" in pretrained_dict.keys(): 72 | pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.") 73 | else: 74 | pretrained_dict = remove_prefix(pretrained_dict, "module.") 75 | check_keys(model, pretrained_dict) 76 | model.load_state_dict(pretrained_dict, strict=False) 77 | return model 78 | 79 | 80 | if __name__ == "__main__": 81 | torch.set_grad_enabled(False) 82 | cfg = None 83 | if args.network == "mobile0.25": 84 | cfg = cfg_mnet 85 | elif args.network == "resnet50": 86 | cfg = cfg_re50 87 | # net and model 88 | net = RetinaFace(cfg=cfg, phase="test") 89 | net = load_model(net, args.trained_model, args.cpu) 90 | net.eval() 91 | print("Finished loading model!") 92 | print(net) 93 | device = torch.device("cpu" if args.cpu else "cuda") 94 | net = net.to(device) 95 | 96 | # ------------------------ export ----------------------------- 97 | output_onnx = "FaceDetector.onnx" 98 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx)) 99 | input_names = ["input0"] 100 | output_names = ["output0"] 101 | inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device) 102 | 103 | torch_out = torch.onnx._export( 104 | net, 105 | inputs, 106 | output_onnx, 107 | export_params=True, 108 | verbose=False, 109 | input_names=input_names, 110 | output_names=output_names, 111 | ) 112 | -------------------------------------------------------------------------------- /face_detection/retinaface/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import * 2 | from .data_augment import * 3 | from .wider_face import WiderFaceDetection, detection_collate 4 | -------------------------------------------------------------------------------- /face_detection/retinaface/data/config.py: -------------------------------------------------------------------------------- 1 | # config.py 2 | 3 | cfg_mnet = { 4 | "name": "mobilenet0.25", 5 | "min_sizes": [[16, 32], [64, 128], [256, 512]], 6 | "steps": [8, 16, 32], 7 | "variance": [0.1, 0.2], 8 | "clip": False, 9 | "loc_weight": 2.0, 10 | "gpu_train": True, 11 | "batch_size": 32, 12 | "ngpu": 1, 13 | "epoch": 250, 14 | "decay1": 190, 15 | "decay2": 220, 16 | "image_size": 640, 17 | "pretrain": True, 18 | "return_layers": {"stage1": 1, "stage2": 2, "stage3": 3}, 19 | "in_channel": 32, 20 | "out_channel": 64, 21 | } 22 | 23 | cfg_re50 = { 24 | "name": "Resnet50", 25 | "min_sizes": [[16, 32], [64, 128], [256, 512]], 26 | "steps": [8, 16, 32], 27 | "variance": [0.1, 0.2], 28 | "clip": False, 29 | "loc_weight": 2.0, 30 | "gpu_train": True, 31 | "batch_size": 24, 32 | "ngpu": 4, 33 | "epoch": 100, 34 | "decay1": 70, 35 | "decay2": 90, 36 | "image_size": 840, 37 | "pretrain": True, 38 | "return_layers": {"layer2": 1, "layer3": 2, "layer4": 3}, 39 | "in_channel": 256, 40 | "out_channel": 256, 41 | } 42 | -------------------------------------------------------------------------------- /face_detection/retinaface/data/data_augment.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import cv2 4 | import numpy as np 5 | from utils.box_utils import matrix_iof 6 | 7 | 8 | def _crop(image, boxes, labels, landm, img_dim): 9 | height, width, _ = image.shape 10 | pad_image_flag = True 11 | 12 | for _ in range(250): 13 | """ 14 | if random.uniform(0, 1) <= 0.2: 15 | scale = 1.0 16 | else: 17 | scale = random.uniform(0.3, 1.0) 18 | """ 19 | PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0] 20 | scale = random.choice(PRE_SCALES) 21 | short_side = min(width, height) 22 | w = int(scale * short_side) 23 | h = w 24 | 25 | if width == w: 26 | l = 0 27 | else: 28 | l = random.randrange(width - w) 29 | if height == h: 30 | t = 0 31 | else: 32 | t = random.randrange(height - h) 33 | roi = np.array((l, t, l + w, t + h)) 34 | 35 | value = matrix_iof(boxes, roi[np.newaxis]) 36 | flag = value >= 1 37 | if not flag.any(): 38 | continue 39 | 40 | centers = (boxes[:, :2] + boxes[:, 2:]) / 2 41 | mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1) 42 | boxes_t = boxes[mask_a].copy() 43 | labels_t = labels[mask_a].copy() 44 | landms_t = landm[mask_a].copy() 45 | landms_t = landms_t.reshape([-1, 5, 2]) 46 | 47 | if boxes_t.shape[0] == 0: 48 | continue 49 | 50 | image_t = image[roi[1] : roi[3], roi[0] : roi[2]] 51 | 52 | boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2]) 53 | boxes_t[:, :2] -= roi[:2] 54 | boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:]) 55 | boxes_t[:, 2:] -= roi[:2] 56 | 57 | # landm 58 | landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2] 59 | landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0])) 60 | landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2]) 61 | landms_t = landms_t.reshape([-1, 10]) 62 | 63 | # make sure that the cropped image contains at least one face > 16 pixel at training image scale 64 | b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim 65 | b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim 66 | mask_b = np.minimum(b_w_t, b_h_t) > 0.0 67 | boxes_t = boxes_t[mask_b] 68 | labels_t = labels_t[mask_b] 69 | landms_t = landms_t[mask_b] 70 | 71 | if boxes_t.shape[0] == 0: 72 | continue 73 | 74 | pad_image_flag = False 75 | 76 | return image_t, boxes_t, labels_t, landms_t, pad_image_flag 77 | return image, boxes, labels, landm, pad_image_flag 78 | 79 | 80 | def _distort(image): 81 | def _convert(image, alpha=1, beta=0): 82 | tmp = image.astype(float) * alpha + beta 83 | tmp[tmp < 0] = 0 84 | tmp[tmp > 255] = 255 85 | image[:] = tmp 86 | 87 | image = image.copy() 88 | 89 | if random.randrange(2): 90 | # brightness distortion 91 | if random.randrange(2): 92 | _convert(image, beta=random.uniform(-32, 32)) 93 | 94 | # contrast distortion 95 | if random.randrange(2): 96 | _convert(image, alpha=random.uniform(0.5, 1.5)) 97 | 98 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 99 | 100 | # saturation distortion 101 | if random.randrange(2): 102 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 103 | 104 | # hue distortion 105 | if random.randrange(2): 106 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 107 | tmp %= 180 108 | image[:, :, 0] = tmp 109 | 110 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 111 | 112 | else: 113 | # brightness distortion 114 | if random.randrange(2): 115 | _convert(image, beta=random.uniform(-32, 32)) 116 | 117 | image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 118 | 119 | # saturation distortion 120 | if random.randrange(2): 121 | _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5)) 122 | 123 | # hue distortion 124 | if random.randrange(2): 125 | tmp = image[:, :, 0].astype(int) + random.randint(-18, 18) 126 | tmp %= 180 127 | image[:, :, 0] = tmp 128 | 129 | image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR) 130 | 131 | # contrast distortion 132 | if random.randrange(2): 133 | _convert(image, alpha=random.uniform(0.5, 1.5)) 134 | 135 | return image 136 | 137 | 138 | def _expand(image, boxes, fill, p): 139 | if random.randrange(2): 140 | return image, boxes 141 | 142 | height, width, depth = image.shape 143 | 144 | scale = random.uniform(1, p) 145 | w = int(scale * width) 146 | h = int(scale * height) 147 | 148 | left = random.randint(0, w - width) 149 | top = random.randint(0, h - height) 150 | 151 | boxes_t = boxes.copy() 152 | boxes_t[:, :2] += (left, top) 153 | boxes_t[:, 2:] += (left, top) 154 | expand_image = np.empty((h, w, depth), dtype=image.dtype) 155 | expand_image[:, :] = fill 156 | expand_image[top : top + height, left : left + width] = image 157 | image = expand_image 158 | 159 | return image, boxes_t 160 | 161 | 162 | def _mirror(image, boxes, landms): 163 | _, width, _ = image.shape 164 | if random.randrange(2): 165 | image = image[:, ::-1] 166 | boxes = boxes.copy() 167 | boxes[:, 0::2] = width - boxes[:, 2::-2] 168 | 169 | # landm 170 | landms = landms.copy() 171 | landms = landms.reshape([-1, 5, 2]) 172 | landms[:, :, 0] = width - landms[:, :, 0] 173 | tmp = landms[:, 1, :].copy() 174 | landms[:, 1, :] = landms[:, 0, :] 175 | landms[:, 0, :] = tmp 176 | tmp1 = landms[:, 4, :].copy() 177 | landms[:, 4, :] = landms[:, 3, :] 178 | landms[:, 3, :] = tmp1 179 | landms = landms.reshape([-1, 10]) 180 | 181 | return image, boxes, landms 182 | 183 | 184 | def _pad_to_square(image, rgb_mean, pad_image_flag): 185 | if not pad_image_flag: 186 | return image 187 | height, width, _ = image.shape 188 | long_side = max(width, height) 189 | image_t = np.empty((long_side, long_side, 3), dtype=image.dtype) 190 | image_t[:, :] = rgb_mean 191 | image_t[0 : 0 + height, 0 : 0 + width] = image 192 | return image_t 193 | 194 | 195 | def _resize_subtract_mean(image, insize, rgb_mean): 196 | interp_methods = [ 197 | cv2.INTER_LINEAR, 198 | cv2.INTER_CUBIC, 199 | cv2.INTER_AREA, 200 | cv2.INTER_NEAREST, 201 | cv2.INTER_LANCZOS4, 202 | ] 203 | interp_method = interp_methods[random.randrange(5)] 204 | image = cv2.resize(image, (insize, insize), interpolation=interp_method) 205 | image = image.astype(np.float32) 206 | image -= rgb_mean 207 | return image.transpose(2, 0, 1) 208 | 209 | 210 | class preproc(object): 211 | def __init__(self, img_dim, rgb_means): 212 | self.img_dim = img_dim 213 | self.rgb_means = rgb_means 214 | 215 | def __call__(self, image, targets): 216 | assert targets.shape[0] > 0, "this image does not have gt" 217 | 218 | boxes = targets[:, :4].copy() 219 | labels = targets[:, -1].copy() 220 | landm = targets[:, 4:-1].copy() 221 | 222 | image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop( 223 | image, boxes, labels, landm, self.img_dim 224 | ) 225 | image_t = _distort(image_t) 226 | image_t = _pad_to_square(image_t, self.rgb_means, pad_image_flag) 227 | image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t) 228 | height, width, _ = image_t.shape 229 | image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means) 230 | boxes_t[:, 0::2] /= width 231 | boxes_t[:, 1::2] /= height 232 | 233 | landm_t[:, 0::2] /= width 234 | landm_t[:, 1::2] /= height 235 | 236 | labels_t = np.expand_dims(labels_t, 1) 237 | targets_t = np.hstack((boxes_t, landm_t, labels_t)) 238 | 239 | return image_t, targets_t 240 | -------------------------------------------------------------------------------- /face_detection/retinaface/data/wider_face.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import sys 4 | 5 | import cv2 6 | import numpy as np 7 | import torch 8 | import torch.utils.data as data 9 | 10 | 11 | class WiderFaceDetection(data.Dataset): 12 | def __init__(self, txt_path, preproc=None): 13 | self.preproc = preproc 14 | self.imgs_path = [] 15 | self.words = [] 16 | f = open(txt_path, "r") 17 | lines = f.readlines() 18 | isFirst = True 19 | labels = [] 20 | for line in lines: 21 | line = line.rstrip() 22 | if line.startswith("#"): 23 | if isFirst is True: 24 | isFirst = False 25 | else: 26 | labels_copy = labels.copy() 27 | self.words.append(labels_copy) 28 | labels.clear() 29 | path = line[2:] 30 | path = txt_path.replace("label.txt", "images/") + path 31 | self.imgs_path.append(path) 32 | else: 33 | line = line.split(" ") 34 | label = [float(x) for x in line] 35 | labels.append(label) 36 | 37 | self.words.append(labels) 38 | 39 | def __len__(self): 40 | return len(self.imgs_path) 41 | 42 | def __getitem__(self, index): 43 | img = cv2.imread(self.imgs_path[index]) 44 | height, width, _ = img.shape 45 | 46 | labels = self.words[index] 47 | annotations = np.zeros((0, 15)) 48 | if len(labels) == 0: 49 | return annotations 50 | for idx, label in enumerate(labels): 51 | annotation = np.zeros((1, 15)) 52 | # bbox 53 | annotation[0, 0] = label[0] # x1 54 | annotation[0, 1] = label[1] # y1 55 | annotation[0, 2] = label[0] + label[2] # x2 56 | annotation[0, 3] = label[1] + label[3] # y2 57 | 58 | # landmarks 59 | annotation[0, 4] = label[4] # l0_x 60 | annotation[0, 5] = label[5] # l0_y 61 | annotation[0, 6] = label[7] # l1_x 62 | annotation[0, 7] = label[8] # l1_y 63 | annotation[0, 8] = label[10] # l2_x 64 | annotation[0, 9] = label[11] # l2_y 65 | annotation[0, 10] = label[13] # l3_x 66 | annotation[0, 11] = label[14] # l3_y 67 | annotation[0, 12] = label[16] # l4_x 68 | annotation[0, 13] = label[17] # l4_y 69 | if annotation[0, 4] < 0: 70 | annotation[0, 14] = -1 71 | else: 72 | annotation[0, 14] = 1 73 | 74 | annotations = np.append(annotations, annotation, axis=0) 75 | target = np.array(annotations) 76 | if self.preproc is not None: 77 | img, target = self.preproc(img, target) 78 | 79 | return torch.from_numpy(img), target 80 | 81 | 82 | def detection_collate(batch): 83 | """Custom collate fn for dealing with batches of images that have a different 84 | number of associated object annotations (bounding boxes). 85 | 86 | Arguments: 87 | batch: (tuple) A tuple of tensor images and lists of annotations 88 | 89 | Return: 90 | A tuple containing: 91 | 1) (tensor) batch of images stacked on their 0 dim 92 | 2) (list of tensors) annotations for a given image are stacked on 0 dim 93 | """ 94 | targets = [] 95 | imgs = [] 96 | for _, sample in enumerate(batch): 97 | for _, tup in enumerate(sample): 98 | if torch.is_tensor(tup): 99 | imgs.append(tup) 100 | elif isinstance(tup, type(np.empty(0))): 101 | annos = torch.from_numpy(tup).float() 102 | targets.append(annos) 103 | 104 | return (torch.stack(imgs, 0), targets) 105 | -------------------------------------------------------------------------------- /face_detection/retinaface/detect.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import os 5 | import time 6 | 7 | import cv2 8 | import numpy as np 9 | import torch 10 | import torch.backends.cudnn as cudnn 11 | from data import cfg_mnet, cfg_re50 12 | from layers.functions.prior_box import PriorBox 13 | from models.retinaface import RetinaFace 14 | from utils.box_utils import decode, decode_landm 15 | from utils.nms.py_cpu_nms import py_cpu_nms 16 | 17 | parser = argparse.ArgumentParser(description="Retinaface") 18 | 19 | parser.add_argument( 20 | "-m", 21 | "--trained_model", 22 | default="./weights/Resnet50_Final.pth", 23 | type=str, 24 | help="Trained state_dict file path to open", 25 | ) 26 | parser.add_argument( 27 | "--network", default="resnet50", help="Backbone network mobile0.25 or resnet50" 28 | ) 29 | parser.add_argument( 30 | "--cpu", action="store_true", default=False, help="Use cpu inference" 31 | ) 32 | parser.add_argument( 33 | "--confidence_threshold", default=0.02, type=float, help="confidence_threshold" 34 | ) 35 | parser.add_argument("--top_k", default=5000, type=int, help="top_k") 36 | parser.add_argument("--nms_threshold", default=0.4, type=float, help="nms_threshold") 37 | parser.add_argument("--keep_top_k", default=750, type=int, help="keep_top_k") 38 | parser.add_argument( 39 | "-s", 40 | "--save_image", 41 | action="store_true", 42 | default=True, 43 | help="show detection results", 44 | ) 45 | parser.add_argument( 46 | "--vis_thres", default=0.6, type=float, help="visualization_threshold" 47 | ) 48 | args = parser.parse_args() 49 | 50 | 51 | def check_keys(model, pretrained_state_dict): 52 | ckpt_keys = set(pretrained_state_dict.keys()) 53 | model_keys = set(model.state_dict().keys()) 54 | used_pretrained_keys = model_keys & ckpt_keys 55 | unused_pretrained_keys = ckpt_keys - model_keys 56 | missing_keys = model_keys - ckpt_keys 57 | print("Missing keys:{}".format(len(missing_keys))) 58 | print("Unused checkpoint keys:{}".format(len(unused_pretrained_keys))) 59 | print("Used keys:{}".format(len(used_pretrained_keys))) 60 | assert len(used_pretrained_keys) > 0, "load NONE from pretrained checkpoint" 61 | return True 62 | 63 | 64 | def remove_prefix(state_dict, prefix): 65 | """Old style model is stored with all names of parameters sharing common prefix 'module.'""" 66 | print("remove prefix '{}'".format(prefix)) 67 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 68 | return {f(key): value for key, value in state_dict.items()} 69 | 70 | 71 | def load_model(model, pretrained_path, load_to_cpu): 72 | print("Loading pretrained model from {}".format(pretrained_path)) 73 | if load_to_cpu: 74 | pretrained_dict = torch.load( 75 | pretrained_path, map_location=lambda storage, loc: storage 76 | ) 77 | else: 78 | device = torch.cuda.current_device() 79 | pretrained_dict = torch.load( 80 | pretrained_path, map_location=lambda storage, loc: storage.cuda(device) 81 | ) 82 | if "state_dict" in pretrained_dict.keys(): 83 | pretrained_dict = remove_prefix(pretrained_dict["state_dict"], "module.") 84 | else: 85 | pretrained_dict = remove_prefix(pretrained_dict, "module.") 86 | check_keys(model, pretrained_dict) 87 | model.load_state_dict(pretrained_dict, strict=False) 88 | return model 89 | 90 | 91 | if __name__ == "__main__": 92 | torch.set_grad_enabled(False) 93 | cfg = None 94 | if args.network == "mobile0.25": 95 | cfg = cfg_mnet 96 | elif args.network == "resnet50": 97 | cfg = cfg_re50 98 | # net and model 99 | net = RetinaFace(cfg=cfg, phase="test") 100 | net = load_model(net, args.trained_model, args.cpu) 101 | net.eval() 102 | print("Finished loading model!") 103 | print(net) 104 | cudnn.benchmark = True 105 | device = torch.device("cpu" if args.cpu else "cuda") 106 | net = net.to(device) 107 | 108 | resize = 1 109 | 110 | # testing begin 111 | for i in range(10): 112 | image_path = "./curve/test.jpg" 113 | img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) 114 | 115 | img = np.float32(img_raw) 116 | 117 | im_height, im_width, _ = img.shape 118 | scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) 119 | img -= (104, 117, 123) 120 | img = img.transpose(2, 0, 1) 121 | img = torch.from_numpy(img).unsqueeze(0) 122 | img = img.to(device) 123 | scale = scale.to(device) 124 | 125 | tic = time.time() 126 | loc, conf, landms = net(img) # forward pass 127 | print("net forward time: {:.4f}".format(time.time() - tic)) 128 | 129 | priorbox = PriorBox(cfg, image_size=(im_height, im_width)) 130 | priors = priorbox.forward() 131 | priors = priors.to(device) 132 | prior_data = priors.data 133 | boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) 134 | boxes = boxes * scale / resize 135 | boxes = boxes.cpu().numpy() 136 | scores = conf.squeeze(0).data.cpu().numpy()[:, 1] 137 | landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"]) 138 | scale1 = torch.Tensor( 139 | [ 140 | img.shape[3], 141 | img.shape[2], 142 | img.shape[3], 143 | img.shape[2], 144 | img.shape[3], 145 | img.shape[2], 146 | img.shape[3], 147 | img.shape[2], 148 | img.shape[3], 149 | img.shape[2], 150 | ] 151 | ) 152 | scale1 = scale1.to(device) 153 | landms = landms * scale1 / resize 154 | landms = landms.cpu().numpy() 155 | 156 | # ignore low scores 157 | inds = np.where(scores > args.confidence_threshold)[0] 158 | boxes = boxes[inds] 159 | landms = landms[inds] 160 | scores = scores[inds] 161 | 162 | # keep top-K before NMS 163 | order = scores.argsort()[::-1][: args.top_k] 164 | boxes = boxes[order] 165 | landms = landms[order] 166 | scores = scores[order] 167 | 168 | # do NMS 169 | dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) 170 | keep = py_cpu_nms(dets, args.nms_threshold) 171 | # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) 172 | dets = dets[keep, :] 173 | landms = landms[keep] 174 | 175 | # keep top-K faster NMS 176 | dets = dets[: args.keep_top_k, :] 177 | landms = landms[: args.keep_top_k, :] 178 | 179 | dets = np.concatenate((dets, landms), axis=1) 180 | 181 | # show image 182 | if args.save_image: 183 | for b in dets: 184 | if b[4] < args.vis_thres: 185 | continue 186 | text = "{:.4f}".format(b[4]) 187 | b = list(map(int, b)) 188 | cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) 189 | cx = b[0] 190 | cy = b[1] + 12 191 | cv2.putText( 192 | img_raw, 193 | text, 194 | (cx, cy), 195 | cv2.FONT_HERSHEY_DUPLEX, 196 | 0.5, 197 | (255, 255, 255), 198 | ) 199 | 200 | # landms 201 | cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) 202 | cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) 203 | cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) 204 | cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) 205 | cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) 206 | # save image 207 | 208 | name = "test.jpg" 209 | cv2.imwrite(name, img_raw) 210 | -------------------------------------------------------------------------------- /face_detection/retinaface/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | from .modules import * 3 | -------------------------------------------------------------------------------- /face_detection/retinaface/layers/functions/prior_box.py: -------------------------------------------------------------------------------- 1 | from itertools import product as product 2 | from math import ceil 3 | 4 | import numpy as np 5 | import torch 6 | 7 | 8 | class PriorBox(object): 9 | def __init__(self, cfg, image_size=None, phase="train"): 10 | super(PriorBox, self).__init__() 11 | self.min_sizes = cfg["min_sizes"] 12 | self.steps = cfg["steps"] 13 | self.clip = cfg["clip"] 14 | self.image_size = image_size 15 | self.feature_maps = [ 16 | [ceil(self.image_size[0] / step), ceil(self.image_size[1] / step)] 17 | for step in self.steps 18 | ] 19 | self.name = "s" 20 | 21 | def forward(self): 22 | anchors = [] 23 | for k, f in enumerate(self.feature_maps): 24 | min_sizes = self.min_sizes[k] 25 | for i, j in product(range(f[0]), range(f[1])): 26 | for min_size in min_sizes: 27 | s_kx = min_size / self.image_size[1] 28 | s_ky = min_size / self.image_size[0] 29 | dense_cx = [ 30 | x * self.steps[k] / self.image_size[1] for x in [j + 0.5] 31 | ] 32 | dense_cy = [ 33 | y * self.steps[k] / self.image_size[0] for y in [i + 0.5] 34 | ] 35 | for cy, cx in product(dense_cy, dense_cx): 36 | anchors += [cx, cy, s_kx, s_ky] 37 | 38 | # back to torch land 39 | output = torch.Tensor(anchors).view(-1, 4) 40 | if self.clip: 41 | output.clamp_(max=1, min=0) 42 | return output 43 | -------------------------------------------------------------------------------- /face_detection/retinaface/layers/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .multibox_loss import MultiBoxLoss 2 | 3 | __all__ = ["MultiBoxLoss"] 4 | -------------------------------------------------------------------------------- /face_detection/retinaface/layers/modules/multibox_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from data import cfg_mnet 5 | from torch.autograd import Variable 6 | from utils.box_utils import log_sum_exp, match 7 | 8 | GPU = cfg_mnet["gpu_train"] 9 | 10 | 11 | class MultiBoxLoss(nn.Module): 12 | """SSD Weighted Loss Function 13 | Compute Targets: 14 | 1) Produce Confidence Target Indices by matching ground truth boxes 15 | with (default) 'priorboxes' that have jaccard index > threshold parameter 16 | (default threshold: 0.5). 17 | 2) Produce localization target by 'encoding' variance into offsets of ground 18 | truth boxes and their matched 'priorboxes'. 19 | 3) Hard negative mining to filter the excessive number of negative examples 20 | that comes with using a large number of default bounding boxes. 21 | (default negative:positive ratio 3:1) 22 | Objective Loss: 23 | L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 24 | Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss 25 | weighted by α which is set to 1 by cross val. 26 | Args: 27 | c: class confidences, 28 | l: predicted boxes, 29 | g: ground truth boxes 30 | N: number of matched default boxes 31 | See: https://arxiv.org/pdf/1512.02325.pdf for more details. 32 | """ 33 | 34 | def __init__( 35 | self, 36 | num_classes, 37 | overlap_thresh, 38 | prior_for_matching, 39 | bkg_label, 40 | neg_mining, 41 | neg_pos, 42 | neg_overlap, 43 | encode_target, 44 | ): 45 | super(MultiBoxLoss, self).__init__() 46 | self.num_classes = num_classes 47 | self.threshold = overlap_thresh 48 | self.background_label = bkg_label 49 | self.encode_target = encode_target 50 | self.use_prior_for_matching = prior_for_matching 51 | self.do_neg_mining = neg_mining 52 | self.negpos_ratio = neg_pos 53 | self.neg_overlap = neg_overlap 54 | self.variance = [0.1, 0.2] 55 | 56 | def forward(self, predictions, priors, targets): 57 | """Multibox Loss 58 | Args: 59 | predictions (tuple): A tuple containing loc preds, conf preds, 60 | and prior boxes from SSD net. 61 | conf shape: torch.size(batch_size,num_priors,num_classes) 62 | loc shape: torch.size(batch_size,num_priors,4) 63 | priors shape: torch.size(num_priors,4) 64 | 65 | ground_truth (tensor): Ground truth boxes and labels for a batch, 66 | shape: [batch_size,num_objs,5] (last idx is the label). 67 | """ 68 | 69 | loc_data, conf_data, landm_data = predictions 70 | priors = priors 71 | num = loc_data.size(0) 72 | num_priors = priors.size(0) 73 | 74 | # match priors (default boxes) and ground truth boxes 75 | loc_t = torch.Tensor(num, num_priors, 4) 76 | landm_t = torch.Tensor(num, num_priors, 10) 77 | conf_t = torch.LongTensor(num, num_priors) 78 | for idx in range(num): 79 | truths = targets[idx][:, :4].data 80 | labels = targets[idx][:, -1].data 81 | landms = targets[idx][:, 4:14].data 82 | defaults = priors.data 83 | match( 84 | self.threshold, 85 | truths, 86 | defaults, 87 | self.variance, 88 | labels, 89 | landms, 90 | loc_t, 91 | conf_t, 92 | landm_t, 93 | idx, 94 | ) 95 | if GPU: 96 | loc_t = loc_t.cuda() 97 | conf_t = conf_t.cuda() 98 | landm_t = landm_t.cuda() 99 | 100 | zeros = torch.tensor(0).cuda() 101 | # landm Loss (Smooth L1) 102 | # Shape: [batch,num_priors,10] 103 | pos1 = conf_t > zeros 104 | num_pos_landm = pos1.long().sum(1, keepdim=True) 105 | N1 = max(num_pos_landm.data.sum().float(), 1) 106 | pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data) 107 | landm_p = landm_data[pos_idx1].view(-1, 10) 108 | landm_t = landm_t[pos_idx1].view(-1, 10) 109 | loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction="sum") 110 | 111 | pos = conf_t != zeros 112 | conf_t[pos] = 1 113 | 114 | # Localization Loss (Smooth L1) 115 | # Shape: [batch,num_priors,4] 116 | pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data) 117 | loc_p = loc_data[pos_idx].view(-1, 4) 118 | loc_t = loc_t[pos_idx].view(-1, 4) 119 | loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction="sum") 120 | 121 | # Compute max conf across batch for hard negative mining 122 | batch_conf = conf_data.view(-1, self.num_classes) 123 | loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1)) 124 | 125 | # Hard Negative Mining 126 | loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now 127 | loss_c = loss_c.view(num, -1) 128 | _, loss_idx = loss_c.sort(1, descending=True) 129 | _, idx_rank = loss_idx.sort(1) 130 | num_pos = pos.long().sum(1, keepdim=True) 131 | num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1) 132 | neg = idx_rank < num_neg.expand_as(idx_rank) 133 | 134 | # Confidence Loss Including Positive and Negative Examples 135 | pos_idx = pos.unsqueeze(2).expand_as(conf_data) 136 | neg_idx = neg.unsqueeze(2).expand_as(conf_data) 137 | conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(-1, self.num_classes) 138 | targets_weighted = conf_t[(pos + neg).gt(0)] 139 | loss_c = F.cross_entropy(conf_p, targets_weighted, reduction="sum") 140 | 141 | # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N 142 | N = max(num_pos.data.sum().float(), 1) 143 | loss_l /= N 144 | loss_c /= N 145 | loss_landm /= N1 146 | 147 | return loss_l, loss_c, loss_landm 148 | -------------------------------------------------------------------------------- /face_detection/retinaface/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/models/__init__.py -------------------------------------------------------------------------------- /face_detection/retinaface/models/net.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torchvision.models as models 7 | import torchvision.models._utils as _utils 8 | from torch.autograd import Variable 9 | 10 | 11 | def conv_bn(inp, oup, stride=1, leaky=0): 12 | return nn.Sequential( 13 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 14 | nn.BatchNorm2d(oup), 15 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 16 | ) 17 | 18 | 19 | def conv_bn_no_relu(inp, oup, stride): 20 | return nn.Sequential( 21 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 22 | nn.BatchNorm2d(oup), 23 | ) 24 | 25 | 26 | def conv_bn1X1(inp, oup, stride, leaky=0): 27 | return nn.Sequential( 28 | nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False), 29 | nn.BatchNorm2d(oup), 30 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 31 | ) 32 | 33 | 34 | def conv_dw(inp, oup, stride, leaky=0.1): 35 | return nn.Sequential( 36 | nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), 37 | nn.BatchNorm2d(inp), 38 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 39 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 40 | nn.BatchNorm2d(oup), 41 | nn.LeakyReLU(negative_slope=leaky, inplace=True), 42 | ) 43 | 44 | 45 | class SSH(nn.Module): 46 | def __init__(self, in_channel, out_channel): 47 | super(SSH, self).__init__() 48 | assert out_channel % 4 == 0 49 | leaky = 0 50 | if out_channel <= 64: 51 | leaky = 0.1 52 | self.conv3X3 = conv_bn_no_relu(in_channel, out_channel // 2, stride=1) 53 | 54 | self.conv5X5_1 = conv_bn(in_channel, out_channel // 4, stride=1, leaky=leaky) 55 | self.conv5X5_2 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1) 56 | 57 | self.conv7X7_2 = conv_bn( 58 | out_channel // 4, out_channel // 4, stride=1, leaky=leaky 59 | ) 60 | self.conv7x7_3 = conv_bn_no_relu(out_channel // 4, out_channel // 4, stride=1) 61 | 62 | def forward(self, input): 63 | conv3X3 = self.conv3X3(input) 64 | 65 | conv5X5_1 = self.conv5X5_1(input) 66 | conv5X5 = self.conv5X5_2(conv5X5_1) 67 | 68 | conv7X7_2 = self.conv7X7_2(conv5X5_1) 69 | conv7X7 = self.conv7x7_3(conv7X7_2) 70 | 71 | out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1) 72 | out = F.relu(out) 73 | return out 74 | 75 | 76 | class FPN(nn.Module): 77 | def __init__(self, in_channels_list, out_channels): 78 | super(FPN, self).__init__() 79 | leaky = 0 80 | if out_channels <= 64: 81 | leaky = 0.1 82 | self.output1 = conv_bn1X1( 83 | in_channels_list[0], out_channels, stride=1, leaky=leaky 84 | ) 85 | self.output2 = conv_bn1X1( 86 | in_channels_list[1], out_channels, stride=1, leaky=leaky 87 | ) 88 | self.output3 = conv_bn1X1( 89 | in_channels_list[2], out_channels, stride=1, leaky=leaky 90 | ) 91 | 92 | self.merge1 = conv_bn(out_channels, out_channels, leaky=leaky) 93 | self.merge2 = conv_bn(out_channels, out_channels, leaky=leaky) 94 | 95 | def forward(self, input): 96 | # names = list(input.keys()) 97 | input = list(input.values()) 98 | 99 | output1 = self.output1(input[0]) 100 | output2 = self.output2(input[1]) 101 | output3 = self.output3(input[2]) 102 | 103 | up3 = F.interpolate( 104 | output3, size=[output2.size(2), output2.size(3)], mode="nearest" 105 | ) 106 | output2 = output2 + up3 107 | output2 = self.merge2(output2) 108 | 109 | up2 = F.interpolate( 110 | output2, size=[output1.size(2), output1.size(3)], mode="nearest" 111 | ) 112 | output1 = output1 + up2 113 | output1 = self.merge1(output1) 114 | 115 | out = [output1, output2, output3] 116 | return out 117 | 118 | 119 | class MobileNetV1(nn.Module): 120 | def __init__(self): 121 | super(MobileNetV1, self).__init__() 122 | self.stage1 = nn.Sequential( 123 | conv_bn(3, 8, 2, leaky=0.1), # 3 124 | conv_dw(8, 16, 1), # 7 125 | conv_dw(16, 32, 2), # 11 126 | conv_dw(32, 32, 1), # 19 127 | conv_dw(32, 64, 2), # 27 128 | conv_dw(64, 64, 1), # 43 129 | ) 130 | self.stage2 = nn.Sequential( 131 | conv_dw(64, 128, 2), # 43 + 16 = 59 132 | conv_dw(128, 128, 1), # 59 + 32 = 91 133 | conv_dw(128, 128, 1), # 91 + 32 = 123 134 | conv_dw(128, 128, 1), # 123 + 32 = 155 135 | conv_dw(128, 128, 1), # 155 + 32 = 187 136 | conv_dw(128, 128, 1), # 187 + 32 = 219 137 | ) 138 | self.stage3 = nn.Sequential( 139 | conv_dw(128, 256, 2), # 219 +3 2 = 241 140 | conv_dw(256, 256, 1), # 241 + 64 = 301 141 | ) 142 | self.avg = nn.AdaptiveAvgPool2d((1, 1)) 143 | self.fc = nn.Linear(256, 1000) 144 | 145 | def forward(self, x): 146 | x = self.stage1(x) 147 | x = self.stage2(x) 148 | x = self.stage3(x) 149 | x = self.avg(x) 150 | # x = self.model(x) 151 | x = x.view(-1, 256) 152 | x = self.fc(x) 153 | return x 154 | -------------------------------------------------------------------------------- /face_detection/retinaface/models/retinaface.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torchvision.models._utils as _utils 7 | import torchvision.models.detection.backbone_utils as backbone_utils 8 | from models.net import FPN as FPN 9 | from models.net import SSH as SSH 10 | from models.net import MobileNetV1 as MobileNetV1 11 | 12 | 13 | class ClassHead(nn.Module): 14 | def __init__(self, inchannels=512, num_anchors=3): 15 | super(ClassHead, self).__init__() 16 | self.num_anchors = num_anchors 17 | self.conv1x1 = nn.Conv2d( 18 | inchannels, self.num_anchors * 2, kernel_size=(1, 1), stride=1, padding=0 19 | ) 20 | 21 | def forward(self, x): 22 | out = self.conv1x1(x) 23 | out = out.permute(0, 2, 3, 1).contiguous() 24 | 25 | return out.view(out.shape[0], -1, 2) 26 | 27 | 28 | class BboxHead(nn.Module): 29 | def __init__(self, inchannels=512, num_anchors=3): 30 | super(BboxHead, self).__init__() 31 | self.conv1x1 = nn.Conv2d( 32 | inchannels, num_anchors * 4, kernel_size=(1, 1), stride=1, padding=0 33 | ) 34 | 35 | def forward(self, x): 36 | out = self.conv1x1(x) 37 | out = out.permute(0, 2, 3, 1).contiguous() 38 | 39 | return out.view(out.shape[0], -1, 4) 40 | 41 | 42 | class LandmarkHead(nn.Module): 43 | def __init__(self, inchannels=512, num_anchors=3): 44 | super(LandmarkHead, self).__init__() 45 | self.conv1x1 = nn.Conv2d( 46 | inchannels, num_anchors * 10, kernel_size=(1, 1), stride=1, padding=0 47 | ) 48 | 49 | def forward(self, x): 50 | out = self.conv1x1(x) 51 | out = out.permute(0, 2, 3, 1).contiguous() 52 | 53 | return out.view(out.shape[0], -1, 10) 54 | 55 | 56 | class RetinaFace(nn.Module): 57 | def __init__(self, cfg=None, phase="train"): 58 | """ 59 | :param cfg: Network related settings. 60 | :param phase: train or test. 61 | """ 62 | super(RetinaFace, self).__init__() 63 | self.phase = phase 64 | backbone = None 65 | if cfg["name"] == "mobilenet0.25": 66 | backbone = MobileNetV1() 67 | if cfg["pretrain"]: 68 | checkpoint = torch.load( 69 | "./weights/mobilenetV1X0.25_pretrain.tar", 70 | map_location=torch.device("cpu"), 71 | ) 72 | from collections import OrderedDict 73 | 74 | new_state_dict = OrderedDict() 75 | for k, v in checkpoint["state_dict"].items(): 76 | name = k[7:] # remove module. 77 | new_state_dict[name] = v 78 | # load params 79 | backbone.load_state_dict(new_state_dict) 80 | elif cfg["name"] == "Resnet50": 81 | import torchvision.models as models 82 | 83 | backbone = models.resnet50(pretrained=cfg["pretrain"]) 84 | 85 | self.body = _utils.IntermediateLayerGetter(backbone, cfg["return_layers"]) 86 | in_channels_stage2 = cfg["in_channel"] 87 | in_channels_list = [ 88 | in_channels_stage2 * 2, 89 | in_channels_stage2 * 4, 90 | in_channels_stage2 * 8, 91 | ] 92 | out_channels = cfg["out_channel"] 93 | self.fpn = FPN(in_channels_list, out_channels) 94 | self.ssh1 = SSH(out_channels, out_channels) 95 | self.ssh2 = SSH(out_channels, out_channels) 96 | self.ssh3 = SSH(out_channels, out_channels) 97 | 98 | self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg["out_channel"]) 99 | self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg["out_channel"]) 100 | self.LandmarkHead = self._make_landmark_head( 101 | fpn_num=3, inchannels=cfg["out_channel"] 102 | ) 103 | 104 | def _make_class_head(self, fpn_num=3, inchannels=64, anchor_num=2): 105 | classhead = nn.ModuleList() 106 | for i in range(fpn_num): 107 | classhead.append(ClassHead(inchannels, anchor_num)) 108 | return classhead 109 | 110 | def _make_bbox_head(self, fpn_num=3, inchannels=64, anchor_num=2): 111 | bboxhead = nn.ModuleList() 112 | for i in range(fpn_num): 113 | bboxhead.append(BboxHead(inchannels, anchor_num)) 114 | return bboxhead 115 | 116 | def _make_landmark_head(self, fpn_num=3, inchannels=64, anchor_num=2): 117 | landmarkhead = nn.ModuleList() 118 | for i in range(fpn_num): 119 | landmarkhead.append(LandmarkHead(inchannels, anchor_num)) 120 | return landmarkhead 121 | 122 | def forward(self, inputs): 123 | out = self.body(inputs) 124 | 125 | # FPN 126 | fpn = self.fpn(out) 127 | 128 | # SSH 129 | feature1 = self.ssh1(fpn[0]) 130 | feature2 = self.ssh2(fpn[1]) 131 | feature3 = self.ssh3(fpn[2]) 132 | features = [feature1, feature2, feature3] 133 | 134 | bbox_regressions = torch.cat( 135 | [self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1 136 | ) 137 | classifications = torch.cat( 138 | [self.ClassHead[i](feature) for i, feature in enumerate(features)], dim=1 139 | ) 140 | ldm_regressions = torch.cat( 141 | [self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1 142 | ) 143 | 144 | if self.phase == "train": 145 | output = (bbox_regressions, classifications, ldm_regressions) 146 | else: 147 | output = ( 148 | bbox_regressions, 149 | F.softmax(classifications, dim=-1), 150 | ldm_regressions, 151 | ) 152 | return output 153 | -------------------------------------------------------------------------------- /face_detection/retinaface/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/utils/__init__.py -------------------------------------------------------------------------------- /face_detection/retinaface/utils/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/retinaface/utils/nms/__init__.py -------------------------------------------------------------------------------- /face_detection/retinaface/utils/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | 11 | def py_cpu_nms(dets, thresh): 12 | """Pure Python NMS baseline.""" 13 | x1 = dets[:, 0] 14 | y1 = dets[:, 1] 15 | x2 = dets[:, 2] 16 | y2 = dets[:, 3] 17 | scores = dets[:, 4] 18 | 19 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 20 | order = scores.argsort()[::-1] 21 | 22 | keep = [] 23 | while order.size > 0: 24 | i = order[0] 25 | keep.append(i) 26 | xx1 = np.maximum(x1[i], x1[order[1:]]) 27 | yy1 = np.maximum(y1[i], y1[order[1:]]) 28 | xx2 = np.minimum(x2[i], x2[order[1:]]) 29 | yy2 = np.minimum(y2[i], y2[order[1:]]) 30 | 31 | w = np.maximum(0.0, xx2 - xx1 + 1) 32 | h = np.maximum(0.0, yy2 - yy1 + 1) 33 | inter = w * h 34 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 35 | 36 | inds = np.where(ovr <= thresh)[0] 37 | order = order[inds + 1] 38 | 39 | return keep 40 | -------------------------------------------------------------------------------- /face_detection/retinaface/utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | 14 | def __init__(self): 15 | self.total_time = 0.0 16 | self.calls = 0 17 | self.start_time = 0.0 18 | self.diff = 0.0 19 | self.average_time = 0.0 20 | 21 | def tic(self): 22 | # using time.time instead of time.clock because time time.clock 23 | # does not normalize for multithreading 24 | self.start_time = time.time() 25 | 26 | def toc(self, average=True): 27 | self.diff = time.time() - self.start_time 28 | self.total_time += self.diff 29 | self.calls += 1 30 | self.average_time = self.total_time / self.calls 31 | if average: 32 | return self.average_time 33 | else: 34 | return self.diff 35 | 36 | def clear(self): 37 | self.total_time = 0.0 38 | self.calls = 0 39 | self.start_time = 0.0 40 | self.diff = 0.0 41 | self.average_time = 0.0 42 | -------------------------------------------------------------------------------- /face_detection/scrfd/weights/README.md: -------------------------------------------------------------------------------- 1 | ## Download Weights: 2 | 3 | - https://drive.google.com/drive/folders/1C9RzReAihJQRl8EJOX6vQj7qbHBPmzME?usp=sharing 4 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/README.md: -------------------------------------------------------------------------------- 1 | # Reference 2 | 3 | https://github.com/deepcam-cn/yolov5-face 4 | 5 | [Pretrained] - Google Drive: https://drive.google.com/drive/folders/1UMG4hBor8CFipYm7y71_iTigHjZ4AkaH?usp=sharing 6 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/detector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import cv2 5 | import numpy as np 6 | import torch 7 | 8 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 9 | sys.path.append(BASE_DIR) 10 | 11 | from models.experimental import attempt_load 12 | from utils.datasets import letterbox 13 | from utils.general import check_img_size, non_max_suppression_face, scale_coords 14 | 15 | 16 | class Yolov5Face(object): 17 | def __init__(self, model_file=None): 18 | """ 19 | Initialize the Detector class. 20 | 21 | :param model_path: Path to the YOLOv5 model file (default is yolov5n-0.5.pt) 22 | """ 23 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 24 | 25 | self.device = device 26 | self.model = attempt_load(model_file, map_location=device) 27 | 28 | # Parameters 29 | self.size_convert = 128 # Size for image conversion 30 | self.conf_thres = 0.4 # Confidence threshold 31 | self.iou_thres = 0.5 # Intersection over Union threshold 32 | 33 | def resize_image(self, img0, img_size): 34 | """ 35 | Resize the input image. 36 | 37 | :param img0: The input image to be resized. 38 | :param img_size: The desired size for the image. 39 | 40 | :return: The resized and preprocessed image. 41 | """ 42 | h0, w0 = img0.shape[:2] # Original height and width 43 | r = img_size / max(h0, w0) # Resize image to img_size 44 | 45 | if r != 1: 46 | interp = cv2.INTER_AREA if r < 1 else cv2.INTER_LINEAR 47 | img0 = cv2.resize(img0, (int(w0 * r), int(h0 * r)), interpolation=interp) 48 | 49 | imgsz = check_img_size(img_size, s=self.model.stride.max()) # Check img_size 50 | img = letterbox(img0, new_shape=imgsz)[0] 51 | 52 | img = img[:, :, ::-1].transpose(2, 0, 1).copy() # BGR to RGB, to 3x416x416 53 | img = torch.from_numpy(img).to(self.device) 54 | img = img.float() # uint8 to fp16/32 55 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 56 | 57 | return img 58 | 59 | def scale_coords_landmarks(self, img1_shape, coords, img0_shape, ratio_pad=None): 60 | """ 61 | Rescale coordinates from img1_shape to img0_shape. 62 | 63 | :param img1_shape: Shape of the source image. 64 | :param coords: Coordinates to be rescaled. 65 | :param img0_shape: Shape of the target image. 66 | :param ratio_pad: Padding ratio. 67 | 68 | :return: Rescaled coordinates. 69 | """ 70 | if ratio_pad is None: # Calculate from img0_shape 71 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) 72 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, ( 73 | img1_shape[0] - img0_shape[0] * gain 74 | ) / 2 75 | else: 76 | gain = ratio_pad[0][0] 77 | pad = ratio_pad[1] 78 | 79 | coords[:, [0, 2, 4, 6, 8]] -= pad[0] # x padding 80 | coords[:, [1, 3, 5, 7, 9]] -= pad[1] # y padding 81 | coords[:, :10] /= gain 82 | coords[:, :10] = coords[:, :10].clamp( 83 | 0, img0_shape[1] 84 | ) # Clamp x and y coordinates 85 | 86 | # Reshape the coordinates into the desired format 87 | coords = coords.reshape(-1, 5, 2) 88 | return coords 89 | 90 | def detect(self, image): 91 | """ 92 | Perform face detection on the input image. 93 | 94 | :param input_image: The input image for face detection. 95 | 96 | :return: Detected bounding boxes and landmarks. 97 | """ 98 | # Resize image 99 | img = self.resize_image(img0=image.copy(), img_size=self.size_convert) 100 | 101 | # Via yolov5-face 102 | with torch.no_grad(): 103 | pred = self.model(img[None, :])[0] 104 | 105 | # Apply NMS 106 | det = non_max_suppression_face(pred, self.conf_thres, self.iou_thres)[0] 107 | bboxes = np.int32( 108 | scale_coords(img.shape[1:], det[:, :5], image.shape).round().cpu().numpy() 109 | ) 110 | 111 | landmarks = np.int32( 112 | self.scale_coords_landmarks(img.shape[1:], det[:, 5:15], image.shape) 113 | .round() 114 | .cpu() 115 | .numpy() 116 | ) 117 | 118 | return bboxes, landmarks 119 | 120 | def detect_tracking(self, image): 121 | """ 122 | Perform object tracking on the input image. 123 | 124 | :param input_image: The input image for object tracking. 125 | 126 | :return: Tracking results and image information. 127 | """ 128 | height, width = image.shape[:2] 129 | img_info = {"id": 0} 130 | img_info["height"] = height 131 | img_info["width"] = width 132 | img_info["raw_img"] = image 133 | 134 | # Resize image 135 | img = self.resize_image(img0=image.copy(), img_size=self.size_convert) 136 | 137 | # Via yolov5-face 138 | with torch.no_grad(): 139 | pred = self.model(img[None, :])[0] 140 | 141 | scale = min( 142 | img.shape[1] / float(image.shape[0]), img.shape[2] / float(image.shape[1]) 143 | ) 144 | 145 | # Apply NMS 146 | det = non_max_suppression_face(pred, self.conf_thres, self.iou_thres)[0] 147 | 148 | bboxes = scale_coords(img.shape[1:], det[:, :4], image.shape) 149 | scores = det[:, 4:5] 150 | outputs = torch.cat((bboxes, scores), dim=1) 151 | outputs[:, :4] *= scale 152 | 153 | bboxes = np.int32(bboxes.round().cpu().numpy()) 154 | 155 | landmarks = np.int32( 156 | self.scale_coords_landmarks(img.shape[1:], det[:, 5:15], image.shape) 157 | .round() 158 | .cpu() 159 | .numpy() 160 | ) 161 | 162 | return outputs, img_info, bboxes, landmarks 163 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/models/__init__.py -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/blazeface.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [5, 6, 10, 13, 21, 26] # P3/8 9 | - [55, 72, 225, 304, 438, 553] # P4/16 10 | 11 | # YOLOv5 backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [ 15 | [-1, 1, Conv, [24, 3, 2]], # 0-P1/2 16 | [-1, 2, BlazeBlock, [24]], # 1 17 | [-1, 1, BlazeBlock, [48, None, 2]], # 2-P2/4 18 | [-1, 2, BlazeBlock, [48]], # 3 19 | [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 4-P3/8 20 | [-1, 2, DoubleBlazeBlock, [96, 24]], # 5 21 | [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 6-P4/16 22 | [-1, 2, DoubleBlazeBlock, [96, 24]], # 7 23 | ] 24 | 25 | # YOLOv5 head 26 | head: [ 27 | [-1, 1, Conv, [64, 1, 1]], # 8 (P4/32-large) 28 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 29 | [[-1, 5], 1, Concat, [1]], # cat backbone P3 30 | [-1, 1, Conv, [64, 1, 1]], # 11 (P3/8-medium) 31 | 32 | [[11, 8], 1, Detect, [nc, anchors]], # Detect(P3, P4) 33 | ] 34 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/blazeface_fpn.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [5, 6, 10, 13, 21, 26] # P3/8 9 | - [55, 72, 225, 304, 438, 553] # P4/16 10 | 11 | # YOLOv5 backbone 12 | backbone: 13 | # [from, number, module, args] 14 | [ 15 | [-1, 1, Conv, [24, 3, 2]], # 0-P1/2 16 | [-1, 2, BlazeBlock, [24]], # 1 17 | [-1, 1, BlazeBlock, [48, None, 2]], # 2-P2/4 18 | [-1, 2, BlazeBlock, [48]], # 3 19 | [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 4-P3/8 20 | [-1, 2, DoubleBlazeBlock, [96, 24]], # 5 21 | [-1, 1, DoubleBlazeBlock, [96, 24, 2]], # 6-P4/16 22 | [-1, 2, DoubleBlazeBlock, [96, 24]], # 7 23 | ] 24 | 25 | # YOLOv5 head 26 | head: [ 27 | [-1, 1, Conv, [48, 1, 1]], # 8 28 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 29 | [[-1, 5], 1, Concat, [1]], # cat backbone P3 30 | [-1, 1, Conv, [48, 1, 1]], # 11 (P3/8-medium) 31 | 32 | [-1, 1, nn.MaxPool2d, [3, 2, 1]], # 12 33 | [[-1, 7], 1, Concat, [1]], # cat backbone P3 34 | [-1, 1, Conv, [48, 1, 1]], # 14 (P4/16-large) 35 | 36 | [[11, 14], 1, Detect, [nc, anchors]], # Detect(P3, P4) 37 | ] 38 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/experimental.py: -------------------------------------------------------------------------------- 1 | # This file contains experimental modules 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | from models.common import Conv, DWConv 7 | from utils.google_utils import attempt_download 8 | 9 | 10 | class CrossConv(nn.Module): 11 | # Cross Convolution Downsample 12 | def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False): 13 | # ch_in, ch_out, kernel, stride, groups, expansion, shortcut 14 | super(CrossConv, self).__init__() 15 | c_ = int(c2 * e) # hidden channels 16 | self.cv1 = Conv(c1, c_, (1, k), (1, s)) 17 | self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g) 18 | self.add = shortcut and c1 == c2 19 | 20 | def forward(self, x): 21 | return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) 22 | 23 | 24 | class Sum(nn.Module): 25 | # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 26 | def __init__(self, n, weight=False): # n: number of inputs 27 | super(Sum, self).__init__() 28 | self.weight = weight # apply weights boolean 29 | self.iter = range(n - 1) # iter object 30 | if weight: 31 | self.w = nn.Parameter( 32 | -torch.arange(1.0, n) / 2, requires_grad=True 33 | ) # layer weights 34 | 35 | def forward(self, x): 36 | y = x[0] # no weight 37 | if self.weight: 38 | w = torch.sigmoid(self.w) * 2 39 | for i in self.iter: 40 | y = y + x[i + 1] * w[i] 41 | else: 42 | for i in self.iter: 43 | y = y + x[i + 1] 44 | return y 45 | 46 | 47 | class GhostConv(nn.Module): 48 | # Ghost Convolution https://github.com/huawei-noah/ghostnet 49 | def __init__( 50 | self, c1, c2, k=1, s=1, g=1, act=True 51 | ): # ch_in, ch_out, kernel, stride, groups 52 | super(GhostConv, self).__init__() 53 | c_ = c2 // 2 # hidden channels 54 | self.cv1 = Conv(c1, c_, k, s, None, g, act) 55 | self.cv2 = Conv(c_, c_, 5, 1, None, c_, act) 56 | 57 | def forward(self, x): 58 | y = self.cv1(x) 59 | return torch.cat([y, self.cv2(y)], 1) 60 | 61 | 62 | class GhostBottleneck(nn.Module): 63 | # Ghost Bottleneck https://github.com/huawei-noah/ghostnet 64 | def __init__(self, c1, c2, k, s): 65 | super(GhostBottleneck, self).__init__() 66 | c_ = c2 // 2 67 | self.conv = nn.Sequential( 68 | GhostConv(c1, c_, 1, 1), # pw 69 | DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(), # dw 70 | GhostConv(c_, c2, 1, 1, act=False), 71 | ) # pw-linear 72 | self.shortcut = ( 73 | nn.Sequential( 74 | DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False) 75 | ) 76 | if s == 2 77 | else nn.Identity() 78 | ) 79 | 80 | def forward(self, x): 81 | return self.conv(x) + self.shortcut(x) 82 | 83 | 84 | class MixConv2d(nn.Module): 85 | # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595 86 | def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True): 87 | super(MixConv2d, self).__init__() 88 | groups = len(k) 89 | if equal_ch: # equal c_ per group 90 | i = torch.linspace(0, groups - 1e-6, c2).floor() # c2 indices 91 | c_ = [(i == g).sum() for g in range(groups)] # intermediate channels 92 | else: # equal weight.numel() per group 93 | b = [c2] + [0] * groups 94 | a = np.eye(groups + 1, groups, k=-1) 95 | a -= np.roll(a, 1, axis=1) 96 | a *= np.array(k) ** 2 97 | a[0] = 1 98 | c_ = np.linalg.lstsq(a, b, rcond=None)[ 99 | 0 100 | ].round() # solve for equal weight indices, ax = b 101 | 102 | self.m = nn.ModuleList( 103 | [ 104 | nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) 105 | for g in range(groups) 106 | ] 107 | ) 108 | self.bn = nn.BatchNorm2d(c2) 109 | self.act = nn.LeakyReLU(0.1, inplace=True) 110 | 111 | def forward(self, x): 112 | return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1))) 113 | 114 | 115 | class Ensemble(nn.ModuleList): 116 | # Ensemble of models 117 | def __init__(self): 118 | super(Ensemble, self).__init__() 119 | 120 | def forward(self, x, augment=False): 121 | y = [] 122 | for module in self: 123 | y.append(module(x, augment)[0]) 124 | # y = torch.stack(y).max(0)[0] # max ensemble 125 | # y = torch.stack(y).mean(0) # mean ensemble 126 | y = torch.cat(y, 1) # nms ensemble 127 | return y, None # inference, train output 128 | 129 | 130 | def attempt_load(weights, map_location=None): 131 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 132 | model = Ensemble() 133 | for w in weights if isinstance(weights, list) else [weights]: 134 | attempt_download(w) 135 | model.append( 136 | torch.load(w, map_location=map_location)["model"].float().fuse().eval() 137 | ) # load FP32 model 138 | 139 | # # Compatibility updates 140 | # for m in model.modules(): 141 | # if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: 142 | # m.inplace = True # pytorch 1.7.0 compatibility 143 | # elif type(m) is Conv: 144 | # m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 145 | 146 | if len(model) == 1: 147 | return model[-1] # return model 148 | # else: 149 | # print("Ensemble created with %s\n" % weights) 150 | # for k in ["names", "stride"]: 151 | # setattr(model, k, getattr(model[-1], k)) 152 | # return model # return ensemble 153 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5l.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4, 5, 8, 10, 13, 16] # P3/8 9 | - [23, 29, 43, 55, 73, 105] # P4/16 10 | - [146, 217, 231, 300, 335, 433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [ 16 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 23 | [-1, 1, SPP, [1024, [3, 5, 7]]], 24 | [-1, 3, C3, [1024, False]], # 8 25 | ] 26 | 27 | # YOLOv5 head 28 | head: [ 29 | [-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 31 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 12 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 36 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 13], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 9], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 46 | 47 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5l6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6, 7, 9, 11, 13, 16] # P3/8 9 | - [18, 23, 26, 33, 37, 47] # P4/16 10 | - [54, 67, 77, 104, 112, 154] # P5/32 11 | - [174, 238, 258, 355, 445, 568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ 17 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 20 | [-1, 9, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 6-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64 26 | [-1, 1, SPP, [1024, [3, 5, 7]]], 27 | [-1, 3, C3, [1024, False]], # 10 28 | ] 29 | 30 | # YOLOv5 head 31 | head: [ 32 | [-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 34 | [[-1, 7], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 14 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 39 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 18 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 44 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 22 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 19], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 25 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 15], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 28 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 11], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge) 58 | 59 | [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5m.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4, 5, 8, 10, 13, 16] # P3/8 9 | - [23, 29, 43, 55, 73, 105] # P4/16 10 | - [146, 217, 231, 300, 335, 433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [ 16 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 23 | [-1, 1, SPP, [1024, [3, 5, 7]]], 24 | [-1, 3, C3, [1024, False]], # 8 25 | ] 26 | 27 | # YOLOv5 head 28 | head: [ 29 | [-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 31 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 12 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 36 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 13], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 9], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 46 | 47 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5m6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.67 # model depth multiple 4 | width_multiple: 0.75 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6, 7, 9, 11, 13, 16] # P3/8 9 | - [18, 23, 26, 33, 37, 47] # P4/16 10 | - [54, 67, 77, 104, 112, 154] # P5/32 11 | - [174, 238, 258, 355, 445, 568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ 17 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 20 | [-1, 9, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 6-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64 26 | [-1, 1, SPP, [1024, [3, 5, 7]]], 27 | [-1, 3, C3, [1024, False]], # 10 28 | ] 29 | 30 | # YOLOv5 head 31 | head: [ 32 | [-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 34 | [[-1, 7], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 14 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 39 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 18 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 44 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 22 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 19], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 25 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 15], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 28 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 11], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge) 58 | 59 | [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5n-0.5.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 0.5 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4, 5, 8, 10, 13, 16] # P3/8 9 | - [23, 29, 43, 55, 73, 105] # P4/16 10 | - [146, 217, 231, 300, 335, 433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [ 16 | [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4 17 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8 18 | [-1, 3, ShuffleV2Block, [128, 1]], # 2 19 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16 20 | [-1, 7, ShuffleV2Block, [256, 1]], # 4 21 | [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32 22 | [-1, 3, ShuffleV2Block, [512, 1]], # 6 23 | ] 24 | 25 | # YOLOv5 head 26 | head: [ 27 | [-1, 1, Conv, [128, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 29 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 30 | [-1, 1, C3, [128, False]], # 10 31 | 32 | [-1, 1, Conv, [128, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 34 | [[-1, 2], 1, Concat, [1]], # cat backbone P3 35 | [-1, 1, C3, [128, False]], # 14 (P3/8-small) 36 | 37 | [-1, 1, Conv, [128, 3, 2]], 38 | [[-1, 11], 1, Concat, [1]], # cat head P4 39 | [-1, 1, C3, [128, False]], # 17 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [128, 3, 2]], 42 | [[-1, 7], 1, Concat, [1]], # cat head P5 43 | [-1, 1, C3, [128, False]], # 20 (P5/32-large) 44 | 45 | [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5n.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4, 5, 8, 10, 13, 16] # P3/8 9 | - [23, 29, 43, 55, 73, 105] # P4/16 10 | - [146, 217, 231, 300, 335, 433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [ 16 | [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4 17 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8 18 | [-1, 3, ShuffleV2Block, [128, 1]], # 2 19 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16 20 | [-1, 7, ShuffleV2Block, [256, 1]], # 4 21 | [-1, 1, ShuffleV2Block, [512, 2]], # 5-P5/32 22 | [-1, 3, ShuffleV2Block, [512, 1]], # 6 23 | ] 24 | 25 | # YOLOv5 head 26 | head: [ 27 | [-1, 1, Conv, [128, 1, 1]], 28 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 29 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 30 | [-1, 1, C3, [128, False]], # 10 31 | 32 | [-1, 1, Conv, [128, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 34 | [[-1, 2], 1, Concat, [1]], # cat backbone P3 35 | [-1, 1, C3, [128, False]], # 14 (P3/8-small) 36 | 37 | [-1, 1, Conv, [128, 3, 2]], 38 | [[-1, 11], 1, Concat, [1]], # cat head P4 39 | [-1, 1, C3, [128, False]], # 17 (P4/16-medium) 40 | 41 | [-1, 1, Conv, [128, 3, 2]], 42 | [[-1, 7], 1, Concat, [1]], # cat head P5 43 | [-1, 1, C3, [128, False]], # 20 (P5/32-large) 44 | 45 | [[14, 17, 20], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 46 | ] 47 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5n6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 1.0 # model depth multiple 4 | width_multiple: 1.0 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6, 7, 9, 11, 13, 16] # P3/8 9 | - [18, 23, 26, 33, 37, 47] # P4/16 10 | - [54, 67, 77, 104, 112, 154] # P5/32 11 | - [174, 238, 258, 355, 445, 568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ 17 | [-1, 1, StemBlock, [32, 3, 2]], # 0-P2/4 18 | [-1, 1, ShuffleV2Block, [128, 2]], # 1-P3/8 19 | [-1, 3, ShuffleV2Block, [128, 1]], # 2 20 | [-1, 1, ShuffleV2Block, [256, 2]], # 3-P4/16 21 | [-1, 7, ShuffleV2Block, [256, 1]], # 4 22 | [-1, 1, ShuffleV2Block, [384, 2]], # 5-P5/32 23 | [-1, 3, ShuffleV2Block, [384, 1]], # 6 24 | [-1, 1, ShuffleV2Block, [512, 2]], # 7-P6/64 25 | [-1, 3, ShuffleV2Block, [512, 1]], # 8 26 | ] 27 | 28 | # YOLOv5 head 29 | head: [ 30 | [-1, 1, Conv, [128, 1, 1]], 31 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 32 | [[-1, 6], 1, Concat, [1]], # cat backbone P5 33 | [-1, 1, C3, [128, False]], # 12 34 | 35 | [-1, 1, Conv, [128, 1, 1]], 36 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 37 | [[-1, 4], 1, Concat, [1]], # cat backbone P4 38 | [-1, 1, C3, [128, False]], # 16 (P4/8-small) 39 | 40 | [-1, 1, Conv, [128, 1, 1]], 41 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 42 | [[-1, 2], 1, Concat, [1]], # cat backbone P3 43 | [-1, 1, C3, [128, False]], # 20 (P3/8-small) 44 | 45 | [-1, 1, Conv, [128, 3, 2]], 46 | [[-1, 17], 1, Concat, [1]], # cat head P4 47 | [-1, 1, C3, [128, False]], # 23 (P4/16-medium) 48 | 49 | [-1, 1, Conv, [128, 3, 2]], 50 | [[-1, 13], 1, Concat, [1]], # cat head P5 51 | [-1, 1, C3, [128, False]], # 26 (P5/32-large) 52 | 53 | [-1, 1, Conv, [128, 3, 2]], 54 | [[-1, 9], 1, Concat, [1]], # cat head P6 55 | [-1, 1, C3, [128, False]], # 29 (P6/64-large) 56 | 57 | [[20, 23, 26, 29], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 58 | ] 59 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5s.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.5 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [4, 5, 8, 10, 13, 16] # P3/8 9 | - [23, 29, 43, 55, 73, 105] # P4/16 10 | - [146, 217, 231, 300, 335, 433] # P5/32 11 | 12 | # YOLOv5 backbone 13 | backbone: 14 | # [from, number, module, args] 15 | [ 16 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 17 | [-1, 3, C3, [128]], 18 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 19 | [-1, 9, C3, [256]], 20 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 21 | [-1, 9, C3, [512]], 22 | [-1, 1, Conv, [1024, 3, 2]], # 6-P5/32 23 | [-1, 1, SPP, [1024, [3, 5, 7]]], 24 | [-1, 3, C3, [1024, False]], # 8 25 | ] 26 | 27 | # YOLOv5 head 28 | head: [ 29 | [-1, 1, Conv, [512, 1, 1]], 30 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 31 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 32 | [-1, 3, C3, [512, False]], # 12 33 | 34 | [-1, 1, Conv, [256, 1, 1]], 35 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 36 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 37 | [-1, 3, C3, [256, False]], # 16 (P3/8-small) 38 | 39 | [-1, 1, Conv, [256, 3, 2]], 40 | [[-1, 13], 1, Concat, [1]], # cat head P4 41 | [-1, 3, C3, [512, False]], # 19 (P4/16-medium) 42 | 43 | [-1, 1, Conv, [512, 3, 2]], 44 | [[-1, 9], 1, Concat, [1]], # cat head P5 45 | [-1, 3, C3, [1024, False]], # 22 (P5/32-large) 46 | 47 | [[16, 19, 22], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) 48 | ] 49 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/models/yolov5s6.yaml: -------------------------------------------------------------------------------- 1 | # parameters 2 | nc: 1 # number of classes 3 | depth_multiple: 0.33 # model depth multiple 4 | width_multiple: 0.50 # layer channel multiple 5 | 6 | # anchors 7 | anchors: 8 | - [6, 7, 9, 11, 13, 16] # P3/8 9 | - [18, 23, 26, 33, 37, 47] # P4/16 10 | - [54, 67, 77, 104, 112, 154] # P5/32 11 | - [174, 238, 258, 355, 445, 568] # P6/64 12 | 13 | # YOLOv5 backbone 14 | backbone: 15 | # [from, number, module, args] 16 | [ 17 | [-1, 1, StemBlock, [64, 3, 2]], # 0-P1/2 18 | [-1, 3, C3, [128]], 19 | [-1, 1, Conv, [256, 3, 2]], # 2-P3/8 20 | [-1, 9, C3, [256]], 21 | [-1, 1, Conv, [512, 3, 2]], # 4-P4/16 22 | [-1, 9, C3, [512]], 23 | [-1, 1, Conv, [768, 3, 2]], # 6-P5/32 24 | [-1, 3, C3, [768]], 25 | [-1, 1, Conv, [1024, 3, 2]], # 8-P6/64 26 | [-1, 1, SPP, [1024, [3, 5, 7]]], 27 | [-1, 3, C3, [1024, False]], # 10 28 | ] 29 | 30 | # YOLOv5 head 31 | head: [ 32 | [-1, 1, Conv, [768, 1, 1]], 33 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 34 | [[-1, 7], 1, Concat, [1]], # cat backbone P5 35 | [-1, 3, C3, [768, False]], # 14 36 | 37 | [-1, 1, Conv, [512, 1, 1]], 38 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 39 | [[-1, 5], 1, Concat, [1]], # cat backbone P4 40 | [-1, 3, C3, [512, False]], # 18 41 | 42 | [-1, 1, Conv, [256, 1, 1]], 43 | [-1, 1, nn.Upsample, [None, 2, "nearest"]], 44 | [[-1, 3], 1, Concat, [1]], # cat backbone P3 45 | [-1, 3, C3, [256, False]], # 22 (P3/8-small) 46 | 47 | [-1, 1, Conv, [256, 3, 2]], 48 | [[-1, 19], 1, Concat, [1]], # cat head P4 49 | [-1, 3, C3, [512, False]], # 25 (P4/16-medium) 50 | 51 | [-1, 1, Conv, [512, 3, 2]], 52 | [[-1, 15], 1, Concat, [1]], # cat head P5 53 | [-1, 3, C3, [768, False]], # 28 (P5/32-large) 54 | 55 | [-1, 1, Conv, [768, 3, 2]], 56 | [[-1, 11], 1, Concat, [1]], # cat head P6 57 | [-1, 3, C3, [1024, False]], # 31 (P6/64-xlarge) 58 | 59 | [[22, 25, 28, 31], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) 60 | ] 61 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/utils/__init__.py -------------------------------------------------------------------------------- /face_detection/yolov5_face/utils/activations.py: -------------------------------------------------------------------------------- 1 | # Activation functions 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | # SiLU https://arxiv.org/pdf/1606.08415.pdf ---------------------------------------------------------------------------- 9 | class SiLU(nn.Module): # export-friendly version of nn.SiLU() 10 | @staticmethod 11 | def forward(x): 12 | return x * torch.sigmoid(x) 13 | 14 | 15 | class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() 16 | @staticmethod 17 | def forward(x): 18 | # return x * F.hardsigmoid(x) # for torchscript and CoreML 19 | return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for torchscript, CoreML and ONNX 20 | 21 | 22 | class MemoryEfficientSwish(nn.Module): 23 | class F(torch.autograd.Function): 24 | @staticmethod 25 | def forward(ctx, x): 26 | ctx.save_for_backward(x) 27 | return x * torch.sigmoid(x) 28 | 29 | @staticmethod 30 | def backward(ctx, grad_output): 31 | x = ctx.saved_tensors[0] 32 | sx = torch.sigmoid(x) 33 | return grad_output * (sx * (1 + x * (1 - sx))) 34 | 35 | def forward(self, x): 36 | return self.F.apply(x) 37 | 38 | 39 | # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- 40 | class Mish(nn.Module): 41 | @staticmethod 42 | def forward(x): 43 | return x * F.softplus(x).tanh() 44 | 45 | 46 | class MemoryEfficientMish(nn.Module): 47 | class F(torch.autograd.Function): 48 | @staticmethod 49 | def forward(ctx, x): 50 | ctx.save_for_backward(x) 51 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 52 | 53 | @staticmethod 54 | def backward(ctx, grad_output): 55 | x = ctx.saved_tensors[0] 56 | sx = torch.sigmoid(x) 57 | fx = F.softplus(x).tanh() 58 | return grad_output * (fx + x * sx * (1 - fx * fx)) 59 | 60 | def forward(self, x): 61 | return self.F.apply(x) 62 | 63 | 64 | # FReLU https://arxiv.org/abs/2007.11824 ------------------------------------------------------------------------------- 65 | class FReLU(nn.Module): 66 | def __init__(self, c1, k=3): # ch_in, kernel 67 | super().__init__() 68 | self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False) 69 | self.bn = nn.BatchNorm2d(c1) 70 | 71 | def forward(self, x): 72 | return torch.max(x, self.bn(self.conv(x))) 73 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | 3 | import numpy as np 4 | import torch 5 | import yaml 6 | from scipy.cluster.vq import kmeans 7 | from tqdm import tqdm 8 | from utils.general import colorstr 9 | 10 | 11 | def check_anchor_order(m): 12 | # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary 13 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 14 | da = a[-1] - a[0] # delta a 15 | ds = m.stride[-1] - m.stride[0] # delta s 16 | if da.sign() != ds.sign(): # same order 17 | print("Reversing anchor order") 18 | m.anchors[:] = m.anchors.flip(0) 19 | m.anchor_grid[:] = m.anchor_grid.flip(0) 20 | 21 | 22 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 23 | # Check anchor fit to data, recompute if necessary 24 | prefix = colorstr("autoanchor: ") 25 | print(f"\n{prefix}Analyzing anchors... ", end="") 26 | m = ( 27 | model.module.model[-1] if hasattr(model, "module") else model.model[-1] 28 | ) # Detect() 29 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 30 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 31 | wh = torch.tensor( 32 | np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)]) 33 | ).float() # wh 34 | 35 | def metric(k): # compute metric 36 | r = wh[:, None] / k[None] 37 | x = torch.min(r, 1.0 / r).min(2)[0] # ratio metric 38 | best = x.max(1)[0] # best_x 39 | aat = (x > 1.0 / thr).float().sum(1).mean() # anchors above threshold 40 | bpr = (best > 1.0 / thr).float().mean() # best possible recall 41 | return bpr, aat 42 | 43 | bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2)) 44 | print(f"anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}", end="") 45 | if bpr < 0.98: # threshold to recompute 46 | print(". Attempting to improve anchors, please wait...") 47 | na = m.anchor_grid.numel() // 2 # number of anchors 48 | new_anchors = kmean_anchors( 49 | dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False 50 | ) 51 | new_bpr = metric(new_anchors.reshape(-1, 2))[0] 52 | if new_bpr > bpr: # replace anchors 53 | new_anchors = torch.tensor(new_anchors, device=m.anchors.device).type_as( 54 | m.anchors 55 | ) 56 | m.anchor_grid[:] = new_anchors.clone().view_as( 57 | m.anchor_grid 58 | ) # for inference 59 | m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to( 60 | m.anchors.device 61 | ).view( 62 | -1, 1, 1 63 | ) # loss 64 | check_anchor_order(m) 65 | print( 66 | f"{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future." 67 | ) 68 | else: 69 | print( 70 | f"{prefix}Original anchors better than new anchors. Proceeding with original anchors." 71 | ) 72 | print("") # newline 73 | 74 | 75 | def kmean_anchors( 76 | path="./data/coco128.yaml", n=9, img_size=640, thr=4.0, gen=1000, verbose=True 77 | ): 78 | """Creates kmeans-evolved anchors from training dataset 79 | 80 | Arguments: 81 | path: path to dataset *.yaml, or a loaded dataset 82 | n: number of anchors 83 | img_size: image size used for training 84 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 85 | gen: generations to evolve anchors using genetic algorithm 86 | verbose: print all results 87 | 88 | Return: 89 | k: kmeans evolved anchors 90 | 91 | Usage: 92 | from utils.autoanchor import *; _ = kmean_anchors() 93 | """ 94 | thr = 1.0 / thr 95 | prefix = colorstr("autoanchor: ") 96 | 97 | def metric(k, wh): # compute metrics 98 | r = wh[:, None] / k[None] 99 | x = torch.min(r, 1.0 / r).min(2)[0] # ratio metric 100 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 101 | return x, x.max(1)[0] # x, best_x 102 | 103 | def anchor_fitness(k): # mutation fitness 104 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 105 | return (best * (best > thr).float()).mean() # fitness 106 | 107 | def print_results(k): 108 | k = k[np.argsort(k.prod(1))] # sort small to large 109 | x, best = metric(k, wh0) 110 | bpr, aat = (best > thr).float().mean(), ( 111 | x > thr 112 | ).float().mean() * n # best possible recall, anch > thr 113 | print( 114 | f"{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr" 115 | ) 116 | print( 117 | f"{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, " 118 | f"past_thr={x[x > thr].mean():.3f}-mean: ", 119 | end="", 120 | ) 121 | for i, x in enumerate(k): 122 | print( 123 | "%i,%i" % (round(x[0]), round(x[1])), 124 | end=", " if i < len(k) - 1 else "\n", 125 | ) # use in *.cfg 126 | return k 127 | 128 | if isinstance(path, str): # *.yaml file 129 | with open(path) as f: 130 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict 131 | from utils.datasets import LoadImagesAndLabels 132 | 133 | dataset = LoadImagesAndLabels(data_dict["train"], augment=True, rect=True) 134 | else: 135 | dataset = path # dataset 136 | 137 | # Get label wh 138 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 139 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 140 | 141 | # Filter 142 | i = (wh0 < 3.0).any(1).sum() 143 | if i: 144 | print( 145 | f"{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size." 146 | ) 147 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 148 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 149 | 150 | # Kmeans calculation 151 | print(f"{prefix}Running kmeans for {n} anchors on {len(wh)} points...") 152 | s = wh.std(0) # sigmas for whitening 153 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 154 | k *= s 155 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 156 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 157 | k = print_results(k) 158 | 159 | # Plot 160 | # k, d = [None] * 20, [None] * 20 161 | # for i in tqdm(range(1, 21)): 162 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 163 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 164 | # ax = ax.ravel() 165 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 166 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 167 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 168 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 169 | # fig.savefig('wh.png', dpi=200) 170 | 171 | # Evolve 172 | npr = np.random 173 | f, sh, mp, s = ( 174 | anchor_fitness(k), 175 | k.shape, 176 | 0.9, 177 | 0.1, 178 | ) # fitness, generations, mutation prob, sigma 179 | pbar = tqdm( 180 | range(gen), desc=f"{prefix}Evolving anchors with Genetic Algorithm:" 181 | ) # progress bar 182 | for _ in pbar: 183 | v = np.ones(sh) 184 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 185 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip( 186 | 0.3, 3.0 187 | ) 188 | kg = (k.copy() * v).clip(min=2.0) 189 | fg = anchor_fitness(kg) 190 | if fg > f: 191 | f, k = fg, kg.copy() 192 | pbar.desc = ( 193 | f"{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}" 194 | ) 195 | if verbose: 196 | print_results(k) 197 | 198 | return print_results(k) 199 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # Google utils: https://cloud.google.com/storage/docs/reference/libraries 2 | 3 | import os 4 | import platform 5 | import subprocess 6 | import time 7 | from pathlib import Path 8 | 9 | import requests 10 | import torch 11 | 12 | 13 | def gsutil_getsize(url=""): 14 | # gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du 15 | s = subprocess.check_output(f"gsutil du {url}", shell=True).decode("utf-8") 16 | return eval(s.split(" ")[0]) if len(s) else 0 # bytes 17 | 18 | 19 | def attempt_download(file, repo="ultralytics/yolov5"): 20 | # Attempt file download if does not exist 21 | file = Path(str(file).strip().replace("'", "").lower()) 22 | 23 | if not file.exists(): 24 | try: 25 | response = requests.get( 26 | f"https://api.github.com/repos/{repo}/releases/latest" 27 | ).json() # github api 28 | assets = [ 29 | x["name"] for x in response["assets"] 30 | ] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...] 31 | tag = response["tag_name"] # i.e. 'v1.0' 32 | except: # fallback plan 33 | assets = ["yolov5.pt", "yolov5.pt", "yolov5l.pt", "yolov5x.pt"] 34 | tag = ( 35 | subprocess.check_output("git tag", shell=True) 36 | .decode("utf-8") 37 | .split("\n")[-2] 38 | ) 39 | 40 | name = file.name 41 | if name in assets: 42 | msg = f"{file} missing, try downloading from https://github.com/{repo}/releases/" 43 | redundant = False # second download option 44 | try: # GitHub 45 | url = f"https://github.com/{repo}/releases/download/{tag}/{name}" 46 | print(f"Downloading {url} to {file}...") 47 | torch.hub.download_url_to_file(url, file) 48 | assert file.exists() and file.stat().st_size > 1e6 # check 49 | except Exception as e: # GCP 50 | print(f"Download error: {e}") 51 | assert redundant, "No secondary mirror" 52 | url = f"https://storage.googleapis.com/{repo}/ckpt/{name}" 53 | print(f"Downloading {url} to {file}...") 54 | os.system( 55 | f"curl -L {url} -o {file}" 56 | ) # torch.hub.download_url_to_file(url, weights) 57 | finally: 58 | if not file.exists() or file.stat().st_size < 1e6: # check 59 | file.unlink(missing_ok=True) # remove partial downloads 60 | print(f"ERROR: Download failure: {msg}") 61 | print("") 62 | return 63 | 64 | 65 | def gdrive_download(id="16TiPfZj7htmTyhntwcZyEEAejOUxuT6m", file="tmp.zip"): 66 | # Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download() 67 | t = time.time() 68 | file = Path(file) 69 | cookie = Path("cookie") # gdrive cookie 70 | print( 71 | f"Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ", 72 | end="", 73 | ) 74 | file.unlink(missing_ok=True) # remove existing file 75 | cookie.unlink(missing_ok=True) # remove existing cookie 76 | 77 | # Attempt file download 78 | out = "NUL" if platform.system() == "Windows" else "/dev/null" 79 | os.system( 80 | f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}' 81 | ) 82 | if os.path.exists("cookie"): # large file 83 | s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}' 84 | else: # small file 85 | s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"' 86 | r = os.system(s) # execute, capture return 87 | cookie.unlink(missing_ok=True) # remove existing cookie 88 | 89 | # Error check 90 | if r != 0: 91 | file.unlink(missing_ok=True) # remove partial 92 | print("Download error ") # raise Exception('Download error') 93 | return r 94 | 95 | # Unzip if archive 96 | if file.suffix == ".zip": 97 | print("unzipping... ", end="") 98 | os.system(f"unzip -q {file}") # unzip 99 | file.unlink() # remove zip to free space 100 | 101 | print(f"Done ({time.time() - t:.1f}s)") 102 | return r 103 | 104 | 105 | def get_token(cookie="./cookie"): 106 | with open(cookie) as f: 107 | for line in f: 108 | if "download" in line: 109 | return line.split()[-1] 110 | return "" 111 | 112 | 113 | # def upload_blob(bucket_name, source_file_name, destination_blob_name): 114 | # # Uploads a file to a bucket 115 | # # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 116 | # 117 | # storage_client = storage.Client() 118 | # bucket = storage_client.get_bucket(bucket_name) 119 | # blob = bucket.blob(destination_blob_name) 120 | # 121 | # blob.upload_from_filename(source_file_name) 122 | # 123 | # print('File {} uploaded to {}.'.format( 124 | # source_file_name, 125 | # destination_blob_name)) 126 | # 127 | # 128 | # def download_blob(bucket_name, source_blob_name, destination_file_name): 129 | # # Uploads a blob from a bucket 130 | # storage_client = storage.Client() 131 | # bucket = storage_client.get_bucket(bucket_name) 132 | # blob = bucket.blob(source_blob_name) 133 | # 134 | # blob.download_to_filename(destination_file_name) 135 | # 136 | # print('Blob {} downloaded to {}.'.format( 137 | # source_blob_name, 138 | # destination_file_name)) 139 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/utils/infer_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def decode_infer(output, stride): 5 | # logging.info(torch.tensor(output.shape[0])) 6 | # logging.info(output.shape) 7 | # # bz is batch-size 8 | # bz = tuple(torch.tensor(output.shape[0])) 9 | # gridsize = tuple(torch.tensor(output.shape[-1])) 10 | # logging.info(gridsize) 11 | sh = torch.tensor(output.shape) 12 | bz = sh[0] 13 | gridsize = sh[-1] 14 | 15 | output = output.permute(0, 2, 3, 1) 16 | output = output.view(bz, gridsize, gridsize, self.gt_per_grid, 5 + self.numclass) 17 | x1y1, x2y2, conf, prob = torch.split(output, [2, 2, 1, self.numclass], dim=4) 18 | 19 | shiftx = torch.arange(0, gridsize, dtype=torch.float32) 20 | shifty = torch.arange(0, gridsize, dtype=torch.float32) 21 | shifty, shiftx = torch.meshgrid([shiftx, shifty]) 22 | shiftx = shiftx.unsqueeze(-1).repeat(bz, 1, 1, self.gt_per_grid) 23 | shifty = shifty.unsqueeze(-1).repeat(bz, 1, 1, self.gt_per_grid) 24 | 25 | xy_grid = torch.stack([shiftx, shifty], dim=4).cuda() 26 | x1y1 = (xy_grid + 0.5 - torch.exp(x1y1)) * stride 27 | x2y2 = (xy_grid + 0.5 + torch.exp(x2y2)) * stride 28 | 29 | xyxy = torch.cat((x1y1, x2y2), dim=4) 30 | conf = torch.sigmoid(conf) 31 | prob = torch.sigmoid(prob) 32 | output = torch.cat((xyxy, conf, prob), 4) 33 | output = output.view(bz, -1, 5 + self.numclass) 34 | return output 35 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/utils/metrics.py: -------------------------------------------------------------------------------- 1 | # Model validation metrics 2 | 3 | from pathlib import Path 4 | 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import torch 8 | 9 | from . import general 10 | 11 | 12 | def fitness(x): 13 | # Model fitness as a weighted combination of metrics 14 | w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95] 15 | return (x[:, :4] * w).sum(1) 16 | 17 | 18 | def ap_per_class( 19 | tp, 20 | conf, 21 | pred_cls, 22 | target_cls, 23 | plot=False, 24 | save_dir="precision-recall_curve.png", 25 | names=[], 26 | ): 27 | """Compute the average precision, given the recall and precision curves. 28 | Source: https://github.com/rafaelpadilla/Object-Detection-Metrics. 29 | # Arguments 30 | tp: True positives (nparray, nx1 or nx10). 31 | conf: Objectness value from 0-1 (nparray). 32 | pred_cls: Predicted object classes (nparray). 33 | target_cls: True object classes (nparray). 34 | plot: Plot precision-recall curve at mAP@0.5 35 | save_dir: Plot save directory 36 | # Returns 37 | The average precision as computed in py-faster-rcnn. 38 | """ 39 | 40 | # Sort by objectness 41 | i = np.argsort(-conf) 42 | tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] 43 | 44 | # Find unique classes 45 | unique_classes = np.unique(target_cls) 46 | 47 | # Create Precision-Recall curve and compute AP for each class 48 | px, py = np.linspace(0, 1, 1000), [] # for plotting 49 | pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898 50 | s = [ 51 | unique_classes.shape[0], 52 | tp.shape[1], 53 | ] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95) 54 | ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s) 55 | for ci, c in enumerate(unique_classes): 56 | i = pred_cls == c 57 | n_l = (target_cls == c).sum() # number of labels 58 | n_p = i.sum() # number of predictions 59 | 60 | if n_p == 0 or n_l == 0: 61 | continue 62 | else: 63 | # Accumulate FPs and TPs 64 | fpc = (1 - tp[i]).cumsum(0) 65 | tpc = tp[i].cumsum(0) 66 | 67 | # Recall 68 | recall = tpc / (n_l + 1e-16) # recall curve 69 | r[ci] = np.interp( 70 | -pr_score, -conf[i], recall[:, 0] 71 | ) # r at pr_score, negative x, xp because xp decreases 72 | 73 | # Precision 74 | precision = tpc / (tpc + fpc) # precision curve 75 | p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score 76 | 77 | # AP from recall-precision curve 78 | for j in range(tp.shape[1]): 79 | ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j]) 80 | if plot and (j == 0): 81 | py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5 82 | 83 | # Compute F1 score (harmonic mean of precision and recall) 84 | f1 = 2 * p * r / (p + r + 1e-16) 85 | 86 | if plot: 87 | plot_pr_curve(px, py, ap, save_dir, names) 88 | 89 | return p, r, ap, f1, unique_classes.astype("int32") 90 | 91 | 92 | def compute_ap(recall, precision): 93 | """Compute the average precision, given the recall and precision curves 94 | # Arguments 95 | recall: The recall curve (list) 96 | precision: The precision curve (list) 97 | # Returns 98 | Average precision, precision curve, recall curve 99 | """ 100 | 101 | # Append sentinel values to beginning and end 102 | mrec = np.concatenate(([0.0], recall, [recall[-1] + 0.01])) 103 | mpre = np.concatenate(([1.0], precision, [0.0])) 104 | 105 | # Compute the precision envelope 106 | mpre = np.flip(np.maximum.accumulate(np.flip(mpre))) 107 | 108 | # Integrate area under curve 109 | method = "interp" # methods: 'continuous', 'interp' 110 | if method == "interp": 111 | x = np.linspace(0, 1, 101) # 101-point interp (COCO) 112 | ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate 113 | else: # 'continuous' 114 | i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes 115 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve 116 | 117 | return ap, mpre, mrec 118 | 119 | 120 | class ConfusionMatrix: 121 | # Updated version of https://github.com/kaanakan/object_detection_confusion_matrix 122 | def __init__(self, nc, conf=0.25, iou_thres=0.45): 123 | self.matrix = np.zeros((nc + 1, nc + 1)) 124 | self.nc = nc # number of classes 125 | self.conf = conf 126 | self.iou_thres = iou_thres 127 | 128 | def process_batch(self, detections, labels): 129 | """ 130 | Return intersection-over-union (Jaccard index) of boxes. 131 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 132 | Arguments: 133 | detections (Array[N, 6]), x1, y1, x2, y2, conf, class 134 | labels (Array[M, 5]), class, x1, y1, x2, y2 135 | Returns: 136 | None, updates confusion matrix accordingly 137 | """ 138 | detections = detections[detections[:, 4] > self.conf] 139 | gt_classes = labels[:, 0].int() 140 | detection_classes = detections[:, 5].int() 141 | iou = general.box_iou(labels[:, 1:], detections[:, :4]) 142 | 143 | x = torch.where(iou > self.iou_thres) 144 | if x[0].shape[0]: 145 | matches = ( 146 | torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1) 147 | .cpu() 148 | .numpy() 149 | ) 150 | if x[0].shape[0] > 1: 151 | matches = matches[matches[:, 2].argsort()[::-1]] 152 | matches = matches[np.unique(matches[:, 1], return_index=True)[1]] 153 | matches = matches[matches[:, 2].argsort()[::-1]] 154 | matches = matches[np.unique(matches[:, 0], return_index=True)[1]] 155 | else: 156 | matches = np.zeros((0, 3)) 157 | 158 | n = matches.shape[0] > 0 159 | m0, m1, _ = matches.transpose().astype(np.int16) 160 | for i, gc in enumerate(gt_classes): 161 | j = m0 == i 162 | if n and sum(j) == 1: 163 | self.matrix[gc, detection_classes[m1[j]]] += 1 # correct 164 | else: 165 | self.matrix[gc, self.nc] += 1 # background FP 166 | 167 | if n: 168 | for i, dc in enumerate(detection_classes): 169 | if not any(m1 == i): 170 | self.matrix[self.nc, dc] += 1 # background FN 171 | 172 | def matrix(self): 173 | return self.matrix 174 | 175 | def plot(self, save_dir="", names=()): 176 | try: 177 | import seaborn as sn 178 | 179 | array = self.matrix / ( 180 | self.matrix.sum(0).reshape(1, self.nc + 1) + 1e-6 181 | ) # normalize 182 | array[array < 0.005] = np.nan # don't annotate (would appear as 0.00) 183 | 184 | fig = plt.figure(figsize=(12, 9), tight_layout=True) 185 | sn.set(font_scale=1.0 if self.nc < 50 else 0.8) # for label size 186 | labels = (0 < len(names) < 99) and len( 187 | names 188 | ) == self.nc # apply names to ticklabels 189 | sn.heatmap( 190 | array, 191 | annot=self.nc < 30, 192 | annot_kws={"size": 8}, 193 | cmap="Blues", 194 | fmt=".2f", 195 | square=True, 196 | xticklabels=names + ["background FN"] if labels else "auto", 197 | yticklabels=names + ["background FP"] if labels else "auto", 198 | ).set_facecolor((1, 1, 1)) 199 | fig.axes[0].set_xlabel("True") 200 | fig.axes[0].set_ylabel("Predicted") 201 | fig.savefig(Path(save_dir) / "confusion_matrix.png", dpi=250) 202 | except Exception as e: 203 | pass 204 | 205 | def print(self): 206 | for i in range(self.nc + 1): 207 | print(" ".join(map(str, self.matrix[i]))) 208 | 209 | 210 | # Plots ---------------------------------------------------------------------------------------------------------------- 211 | 212 | 213 | def plot_pr_curve(px, py, ap, save_dir=".", names=()): 214 | fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True) 215 | py = np.stack(py, axis=1) 216 | 217 | if 0 < len(names) < 21: # show mAP in legend if < 10 classes 218 | for i, y in enumerate(py.T): 219 | ax.plot( 220 | px, y, linewidth=1, label=f"{names[i]} %.3f" % ap[i, 0] 221 | ) # plot(recall, precision) 222 | else: 223 | ax.plot(px, py, linewidth=1, color="grey") # plot(recall, precision) 224 | 225 | ax.plot( 226 | px, 227 | py.mean(1), 228 | linewidth=3, 229 | color="blue", 230 | label="all classes %.3f mAP@0.5" % ap[:, 0].mean(), 231 | ) 232 | ax.set_xlabel("Recall") 233 | ax.set_ylabel("Precision") 234 | ax.set_xlim(0, 1) 235 | ax.set_ylim(0, 1) 236 | plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left") 237 | fig.savefig(Path(save_dir) / "precision_recall_curve.png", dpi=250) 238 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/utils/wandb_logging/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vectornguyen76/face-recognition/1cc00f7baa78815099b43302c7a2933319b0d4a0/face_detection/yolov5_face/utils/wandb_logging/__init__.py -------------------------------------------------------------------------------- /face_detection/yolov5_face/utils/wandb_logging/log_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import yaml 4 | from wandb_utils import WandbLogger 5 | 6 | WANDB_ARTIFACT_PREFIX = "wandb-artifact://" 7 | 8 | 9 | def create_dataset_artifact(opt): 10 | with open(opt.data) as f: 11 | data = yaml.load(f, Loader=yaml.SafeLoader) # data dict 12 | logger = WandbLogger(opt, "", None, data, job_type="Dataset Creation") 13 | 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument( 18 | "--data", type=str, default="data/coco128.yaml", help="data.yaml path" 19 | ) 20 | parser.add_argument( 21 | "--single-cls", action="store_true", help="train as single-class dataset" 22 | ) 23 | parser.add_argument( 24 | "--project", type=str, default="YOLOv5", help="name of W&B Project" 25 | ) 26 | opt = parser.parse_args() 27 | opt.resume = False # Explicitly disallow resume check for dataset upload job 28 | 29 | create_dataset_artifact(opt) 30 | -------------------------------------------------------------------------------- /face_detection/yolov5_face/weights/README.md: -------------------------------------------------------------------------------- 1 | ## Download Weights: 2 | 3 | - https://drive.google.com/drive/folders/1CGq-2AfcSyWGwZWs9sIzQ1BXhRkPGgxF?usp=sharing 4 | -------------------------------------------------------------------------------- /face_recognition/arcface/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | 5 | 6 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 7 | """3x3 convolution with padding""" 8 | return nn.Conv2d( 9 | in_planes, 10 | out_planes, 11 | kernel_size=3, 12 | stride=stride, 13 | padding=dilation, 14 | groups=groups, 15 | bias=False, 16 | dilation=dilation, 17 | ) 18 | 19 | 20 | def conv1x1(in_planes, out_planes, stride=1): 21 | """1x1 convolution""" 22 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 23 | 24 | 25 | class IBasicBlock(nn.Module): 26 | expansion = 1 27 | 28 | def __init__( 29 | self, 30 | inplanes, 31 | planes, 32 | stride=1, 33 | downsample=None, 34 | groups=1, 35 | base_width=64, 36 | dilation=1, 37 | ): 38 | super(IBasicBlock, self).__init__() 39 | if groups != 1 or base_width != 64: 40 | raise ValueError("BasicBlock only supports groups=1 and base_width=64") 41 | if dilation > 1: 42 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 43 | self.bn1 = nn.BatchNorm2d( 44 | inplanes, 45 | eps=1e-05, 46 | ) 47 | self.conv1 = conv3x3(inplanes, planes) 48 | self.bn2 = nn.BatchNorm2d( 49 | planes, 50 | eps=1e-05, 51 | ) 52 | self.prelu = nn.PReLU(planes) 53 | self.conv2 = conv3x3(planes, planes, stride) 54 | self.bn3 = nn.BatchNorm2d( 55 | planes, 56 | eps=1e-05, 57 | ) 58 | self.downsample = downsample 59 | self.stride = stride 60 | 61 | def forward(self, x): 62 | identity = x 63 | out = self.bn1(x) 64 | out = self.conv1(out) 65 | out = self.bn2(out) 66 | out = self.prelu(out) 67 | out = self.conv2(out) 68 | out = self.bn3(out) 69 | if self.downsample is not None: 70 | identity = self.downsample(x) 71 | out += identity 72 | return out 73 | 74 | 75 | class IResNet(nn.Module): 76 | fc_scale = 7 * 7 77 | 78 | def __init__( 79 | self, 80 | block, 81 | layers, 82 | dropout=0, 83 | num_features=512, 84 | zero_init_residual=False, 85 | groups=1, 86 | width_per_group=64, 87 | replace_stride_with_dilation=None, 88 | fp16=False, 89 | ): 90 | super(IResNet, self).__init__() 91 | self.fp16 = fp16 92 | self.inplanes = 64 93 | self.dilation = 1 94 | if replace_stride_with_dilation is None: 95 | replace_stride_with_dilation = [False, False, False] 96 | if len(replace_stride_with_dilation) != 3: 97 | raise ValueError( 98 | "replace_stride_with_dilation should be None " 99 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation) 100 | ) 101 | self.groups = groups 102 | self.base_width = width_per_group 103 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False) 104 | self.bn1 = nn.BatchNorm2d(self.inplanes, eps=1e-05) 105 | self.prelu = nn.PReLU(self.inplanes) 106 | self.layer1 = self._make_layer(block, 64, layers[0], stride=2) 107 | self.layer2 = self._make_layer( 108 | block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0] 109 | ) 110 | self.layer3 = self._make_layer( 111 | block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1] 112 | ) 113 | self.layer4 = self._make_layer( 114 | block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2] 115 | ) 116 | self.bn2 = nn.BatchNorm2d( 117 | 512 * block.expansion, 118 | eps=1e-05, 119 | ) 120 | self.dropout = nn.Dropout(p=dropout, inplace=True) 121 | self.fc = nn.Linear(512 * block.expansion * self.fc_scale, num_features) 122 | self.features = nn.BatchNorm1d(num_features, eps=1e-05) 123 | nn.init.constant_(self.features.weight, 1.0) 124 | self.features.weight.requires_grad = False 125 | 126 | for m in self.modules(): 127 | if isinstance(m, nn.Conv2d): 128 | nn.init.normal_(m.weight, 0, 0.1) 129 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 130 | nn.init.constant_(m.weight, 1) 131 | nn.init.constant_(m.bias, 0) 132 | 133 | if zero_init_residual: 134 | for m in self.modules(): 135 | if isinstance(m, IBasicBlock): 136 | nn.init.constant_(m.bn2.weight, 0) 137 | 138 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 139 | downsample = None 140 | previous_dilation = self.dilation 141 | if dilate: 142 | self.dilation *= stride 143 | stride = 1 144 | if stride != 1 or self.inplanes != planes * block.expansion: 145 | downsample = nn.Sequential( 146 | conv1x1(self.inplanes, planes * block.expansion, stride), 147 | nn.BatchNorm2d( 148 | planes * block.expansion, 149 | eps=1e-05, 150 | ), 151 | ) 152 | layers = [] 153 | layers.append( 154 | block( 155 | self.inplanes, 156 | planes, 157 | stride, 158 | downsample, 159 | self.groups, 160 | self.base_width, 161 | previous_dilation, 162 | ) 163 | ) 164 | self.inplanes = planes * block.expansion 165 | for _ in range(1, blocks): 166 | layers.append( 167 | block( 168 | self.inplanes, 169 | planes, 170 | groups=self.groups, 171 | base_width=self.base_width, 172 | dilation=self.dilation, 173 | ) 174 | ) 175 | 176 | return nn.Sequential(*layers) 177 | 178 | def forward(self, x): 179 | with torch.cuda.amp.autocast(self.fp16): 180 | x = self.conv1(x) 181 | x = self.bn1(x) 182 | x = self.prelu(x) 183 | x = self.layer1(x) 184 | x = self.layer2(x) 185 | x = self.layer3(x) 186 | x = self.layer4(x) 187 | x = self.bn2(x) 188 | x = torch.flatten(x, 1) 189 | x = self.dropout(x) 190 | x = self.fc(x.float() if self.fp16 else x) 191 | x = self.features(x) 192 | x = F.normalize(x, dim=1) 193 | return x 194 | 195 | 196 | def _iresnet(arch, block, layers, pretrained, progress, **kwargs): 197 | model = IResNet(block, layers, **kwargs) 198 | if pretrained: 199 | raise ValueError() 200 | return model 201 | 202 | 203 | def iresnet18(pretrained=False, progress=True, **kwargs): 204 | return _iresnet("iresnet18", IBasicBlock, [2, 2, 2, 2], pretrained, progress, **kwargs) 205 | 206 | 207 | def iresnet34(pretrained=False, progress=True, **kwargs): 208 | return _iresnet("iresnet34", IBasicBlock, [3, 4, 6, 3], pretrained, progress, **kwargs) 209 | 210 | 211 | def iresnet50(pretrained=False, progress=True, **kwargs): 212 | return _iresnet("iresnet50", IBasicBlock, [3, 4, 14, 3], pretrained, progress, **kwargs) 213 | 214 | 215 | def iresnet100(pretrained=False, progress=True, **kwargs): 216 | return _iresnet("iresnet100", IBasicBlock, [3, 13, 30, 3], pretrained, progress, **kwargs) 217 | 218 | 219 | def iresnet200(pretrained=False, progress=True, **kwargs): 220 | return _iresnet("iresnet200", IBasicBlock, [6, 26, 60, 6], pretrained, progress, **kwargs) 221 | 222 | 223 | def iresnet_inference(model_name, path, device="cuda"): 224 | if model_name == "r18": 225 | model = iresnet18() 226 | elif model_name == "r34": 227 | model = iresnet34() 228 | elif model_name == "r50": 229 | model = iresnet50() 230 | elif model_name == "r100": 231 | model = iresnet100() 232 | else: 233 | raise ValueError() 234 | 235 | weight = torch.load(path, map_location=device) 236 | 237 | model.load_state_dict(weight) 238 | model.to(device) 239 | 240 | return model.eval() 241 | -------------------------------------------------------------------------------- /face_recognition/arcface/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def read_features(feature_path): 5 | try: 6 | data = np.load(feature_path + ".npz", allow_pickle=True) 7 | images_name = data["images_name"] 8 | images_emb = data["images_emb"] 9 | 10 | return images_name, images_emb 11 | except: 12 | return None 13 | 14 | 15 | def compare_encodings(encoding, encodings): 16 | sims = np.dot(encodings, encoding.T) 17 | pare_index = np.argmax(sims) 18 | score = sims[pare_index] 19 | return score, pare_index 20 | -------------------------------------------------------------------------------- /face_recognition/arcface/weights/README.md: -------------------------------------------------------------------------------- 1 | ## Download Weights: 2 | 3 | - https://drive.google.com/drive/folders/1CHHb_7wbvfjKPFNKVBb76lL5sVfBLcv5?usp=sharing 4 | -------------------------------------------------------------------------------- /face_tracking/config/config_tracking.yaml: -------------------------------------------------------------------------------- 1 | device: cpu 2 | fps: 30 3 | match_thresh: 0.8 4 | min_box_area: 10 5 | save_result: True 6 | track_buffer: 30 7 | track_thresh: 0.5 8 | aspect_ratio_thresh: 1.6 9 | ckpt: bytetrack_s_mot17.pth.tar 10 | fp16: True 11 | -------------------------------------------------------------------------------- /face_tracking/pretrained/README.md: -------------------------------------------------------------------------------- 1 | ## Model zoo 2 | 3 | | Model | MOTA | IDF1 | IDs | FPS | 4 | | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | ---- | --- | ---- | 5 | | bytetrack_x_mot17 [[google]](https://drive.google.com/file/d/1P4mY0Yyd3PPTybgZkjMYhFri88nTmJX5/view?usp=sharing), [[baidu(code:ic0i)]](https://pan.baidu.com/s/1OJKrcQa_JP9zofC6ZtGBpw) | 90.0 | 83.3 | 422 | 29.6 | 6 | | bytetrack_l_mot17 [[google]](https://drive.google.com/file/d/1XwfUuCBF4IgWBWK2H7oOhQgEj9Mrb3rz/view?usp=sharing), [[baidu(code:1cml)]](https://pan.baidu.com/s/1242adimKM6TYdeLU2qnuRA) | 88.7 | 80.7 | 460 | 43.7 | 7 | | bytetrack_m_mot17 [[google]](https://drive.google.com/file/d/11Zb0NN_Uu7JwUd9e6Nk8o2_EUfxWqsun/view?usp=sharing), [[baidu(code:u3m4)]](https://pan.baidu.com/s/1fKemO1uZfvNSLzJfURO4TQ) | 87.0 | 80.1 | 477 | 54.1 | 8 | | bytetrack_s_mot17 [[google]](https://drive.google.com/file/d/1uSmhXzyV1Zvb4TJJCzpsZOIcw7CCJLxj/view?usp=sharing), [[baidu(code:qflm)]](https://pan.baidu.com/s/1PiP1kQfgxAIrnGUbFP6Wfg) | 79.2 | 74.3 | 533 | 64.5 | 9 | 10 | ## Reference 11 | 12 | - https://github.com/ifzhang/ByteTrack?tab=readme-ov-file#model-zoo 13 | -------------------------------------------------------------------------------- /face_tracking/tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import numpy as np 4 | 5 | 6 | class TrackState(object): 7 | New = 0 8 | Tracked = 1 9 | Lost = 2 10 | Removed = 3 11 | 12 | 13 | class BaseTrack(object): 14 | _count = 0 15 | 16 | track_id = 0 17 | is_activated = False 18 | state = TrackState.New 19 | 20 | history = OrderedDict() 21 | features = [] 22 | curr_feature = None 23 | score = 0 24 | start_frame = 0 25 | frame_id = 0 26 | time_since_update = 0 27 | 28 | # multi-camera 29 | location = (np.inf, np.inf) 30 | 31 | @property 32 | def end_frame(self): 33 | return self.frame_id 34 | 35 | @staticmethod 36 | def next_id(): 37 | BaseTrack._count += 1 38 | return BaseTrack._count 39 | 40 | def activate(self, *args): 41 | raise NotImplementedError 42 | 43 | def predict(self): 44 | raise NotImplementedError 45 | 46 | def update(self, *args, **kwargs): 47 | raise NotImplementedError 48 | 49 | def mark_lost(self): 50 | self.state = TrackState.Lost 51 | 52 | def mark_removed(self): 53 | self.state = TrackState.Removed 54 | -------------------------------------------------------------------------------- /face_tracking/tracker/byte_tracker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import torch 5 | 6 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | sys.path.append(BASE_DIR) 8 | 9 | import matching 10 | import numpy as np 11 | 12 | from .basetrack import BaseTrack, TrackState 13 | from .kalman_filter import KalmanFilter 14 | 15 | 16 | class STrack(BaseTrack): 17 | shared_kalman = KalmanFilter() 18 | 19 | def __init__(self, tlwh, score): 20 | # wait activate 21 | self._tlwh = np.asarray(tlwh, dtype=np.float64) 22 | self.kalman_filter = None 23 | self.mean, self.covariance = None, None 24 | self.is_activated = False 25 | 26 | self.score = score 27 | self.tracklet_len = 0 28 | 29 | def predict(self): 30 | mean_state = self.mean.copy() 31 | if self.state != TrackState.Tracked: 32 | mean_state[7] = 0 33 | self.mean, self.covariance = self.kalman_filter.predict( 34 | mean_state, self.covariance 35 | ) 36 | 37 | @staticmethod 38 | def multi_predict(stracks): 39 | if len(stracks) > 0: 40 | multi_mean = np.asarray([st.mean.copy() for st in stracks]) 41 | multi_covariance = np.asarray([st.covariance for st in stracks]) 42 | for i, st in enumerate(stracks): 43 | if st.state != TrackState.Tracked: 44 | multi_mean[i][7] = 0 45 | multi_mean, multi_covariance = STrack.shared_kalman.multi_predict( 46 | multi_mean, multi_covariance 47 | ) 48 | for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): 49 | stracks[i].mean = mean 50 | stracks[i].covariance = cov 51 | 52 | def activate(self, kalman_filter, frame_id): 53 | """Start a new tracklet""" 54 | self.kalman_filter = kalman_filter 55 | self.track_id = self.next_id() 56 | self.mean, self.covariance = self.kalman_filter.initiate( 57 | self.tlwh_to_xyah(self._tlwh) 58 | ) 59 | 60 | self.tracklet_len = 0 61 | self.state = TrackState.Tracked 62 | if frame_id == 1: 63 | self.is_activated = True 64 | # self.is_activated = True 65 | self.frame_id = frame_id 66 | self.start_frame = frame_id 67 | 68 | def re_activate(self, new_track, frame_id, new_id=False): 69 | self.mean, self.covariance = self.kalman_filter.update( 70 | self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh) 71 | ) 72 | self.tracklet_len = 0 73 | self.state = TrackState.Tracked 74 | self.is_activated = True 75 | self.frame_id = frame_id 76 | if new_id: 77 | self.track_id = self.next_id() 78 | self.score = new_track.score 79 | 80 | def update(self, new_track, frame_id): 81 | """ 82 | Update a matched track 83 | :type new_track: STrack 84 | :type frame_id: int 85 | :type update_feature: bool 86 | :return: 87 | """ 88 | self.frame_id = frame_id 89 | self.tracklet_len += 1 90 | 91 | new_tlwh = new_track.tlwh 92 | self.mean, self.covariance = self.kalman_filter.update( 93 | self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh) 94 | ) 95 | self.state = TrackState.Tracked 96 | self.is_activated = True 97 | 98 | self.score = new_track.score 99 | 100 | @property 101 | # @jit(nopython=True) 102 | def tlwh(self): 103 | """Get current position in bounding box format `(top left x, top left y, 104 | width, height)`. 105 | """ 106 | if self.mean is None: 107 | return self._tlwh.copy() 108 | ret = self.mean[:4].copy() 109 | ret[2] *= ret[3] 110 | ret[:2] -= ret[2:] / 2 111 | return ret 112 | 113 | @property 114 | # @jit(nopython=True) 115 | def tlbr(self): 116 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 117 | `(top left, bottom right)`. 118 | """ 119 | ret = self.tlwh.copy() 120 | ret[2:] += ret[:2] 121 | return ret 122 | 123 | @staticmethod 124 | # @jit(nopython=True) 125 | def tlwh_to_xyah(tlwh): 126 | """Convert bounding box to format `(center x, center y, aspect ratio, 127 | height)`, where the aspect ratio is `width / height`. 128 | """ 129 | ret = np.asarray(tlwh).copy() 130 | ret[:2] += ret[2:] / 2 131 | ret[2] /= ret[3] 132 | return ret 133 | 134 | def to_xyah(self): 135 | return self.tlwh_to_xyah(self.tlwh) 136 | 137 | @staticmethod 138 | # @jit(nopython=True) 139 | def tlbr_to_tlwh(tlbr): 140 | ret = np.asarray(tlbr).copy() 141 | ret[2:] -= ret[:2] 142 | return ret 143 | 144 | @staticmethod 145 | # @jit(nopython=True) 146 | def tlwh_to_tlbr(tlwh): 147 | ret = np.asarray(tlwh).copy() 148 | ret[2:] += ret[:2] 149 | return ret 150 | 151 | def __repr__(self): 152 | return "OT_{}_({}-{})".format(self.track_id, self.start_frame, self.end_frame) 153 | 154 | 155 | class BYTETracker(object): 156 | def __init__(self, args, frame_rate=30): 157 | self.tracked_stracks = [] # type: list[STrack] 158 | self.lost_stracks = [] # type: list[STrack] 159 | self.removed_stracks = [] # type: list[STrack] 160 | 161 | self.frame_id = 0 162 | self.args = args 163 | # self.det_thresh = args.track_thresh 164 | self.det_thresh = args["track_thresh"] + 0.1 165 | self.buffer_size = int(frame_rate / 30.0 * args["track_buffer"]) 166 | self.max_time_lost = self.buffer_size 167 | self.kalman_filter = KalmanFilter() 168 | 169 | def update(self, output_results, img_info, img_size): 170 | self.frame_id += 1 171 | activated_starcks = [] 172 | refind_stracks = [] 173 | lost_stracks = [] 174 | removed_stracks = [] 175 | 176 | if output_results.shape[1] == 5: 177 | scores = output_results[:, 4] 178 | bboxes = output_results[:, :4] 179 | else: 180 | output_results = output_results.cpu().numpy() 181 | scores = output_results[:, 4] * output_results[:, 5] 182 | bboxes = output_results[:, :4] # x1y1x2y2 183 | img_h, img_w = img_info[0], img_info[1] 184 | scale = min(img_size[0] / float(img_h), img_size[1] / float(img_w)) 185 | bboxes /= scale 186 | 187 | remain_inds = scores > self.args["track_thresh"] 188 | inds_low = scores > 0.1 189 | inds_high = scores < self.args["track_thresh"] 190 | 191 | inds_second = np.logical_and(inds_low, inds_high) 192 | dets_second = bboxes[inds_second.to(torch.bool)] 193 | dets = bboxes[remain_inds] 194 | scores_keep = scores[remain_inds] 195 | scores_second = scores[inds_second.to(torch.bool)] 196 | 197 | if len(dets) > 0: 198 | """Detections""" 199 | detections = [ 200 | STrack(STrack.tlbr_to_tlwh(tlbr), s) 201 | for (tlbr, s) in zip(dets, scores_keep) 202 | ] 203 | else: 204 | detections = [] 205 | 206 | """ Add newly detected tracklets to tracked_stracks""" 207 | unconfirmed = [] 208 | tracked_stracks = [] # type: list[STrack] 209 | for track in self.tracked_stracks: 210 | if not track.is_activated: 211 | unconfirmed.append(track) 212 | else: 213 | tracked_stracks.append(track) 214 | 215 | """ Step 2: First association, with high score detection boxes""" 216 | strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) 217 | # Predict the current location with KF 218 | STrack.multi_predict(strack_pool) 219 | dists = matching.iou_distance(strack_pool, detections) 220 | # if not self.args.mot20: 221 | # dists = matching.fuse_score(dists, detections) 222 | matches, u_track, u_detection = matching.linear_assignment( 223 | dists, thresh=self.args["match_thresh"] 224 | ) 225 | 226 | for itracked, idet in matches: 227 | track = strack_pool[itracked] 228 | det = detections[idet] 229 | if track.state == TrackState.Tracked: 230 | track.update(detections[idet], self.frame_id) 231 | activated_starcks.append(track) 232 | else: 233 | track.re_activate(det, self.frame_id, new_id=False) 234 | refind_stracks.append(track) 235 | 236 | """ Step 3: Second association, with low score detection boxes""" 237 | # association the untrack to the low score detections 238 | if len(dets_second) > 0: 239 | """Detections""" 240 | detections_second = [ 241 | STrack(STrack.tlbr_to_tlwh(tlbr), s) 242 | for (tlbr, s) in zip(dets_second, scores_second) 243 | ] 244 | else: 245 | detections_second = [] 246 | r_tracked_stracks = [ 247 | strack_pool[i] 248 | for i in u_track 249 | if strack_pool[i].state == TrackState.Tracked 250 | ] 251 | dists = matching.iou_distance(r_tracked_stracks, detections_second) 252 | matches, u_track, u_detection_second = matching.linear_assignment( 253 | dists, thresh=0.5 254 | ) 255 | for itracked, idet in matches: 256 | track = r_tracked_stracks[itracked] 257 | det = detections_second[idet] 258 | if track.state == TrackState.Tracked: 259 | track.update(det, self.frame_id) 260 | activated_starcks.append(track) 261 | else: 262 | track.re_activate(det, self.frame_id, new_id=False) 263 | refind_stracks.append(track) 264 | 265 | for it in u_track: 266 | track = r_tracked_stracks[it] 267 | if not track.state == TrackState.Lost: 268 | track.mark_lost() 269 | lost_stracks.append(track) 270 | 271 | """Deal with unconfirmed tracks, usually tracks with only one beginning frame""" 272 | detections = [detections[i] for i in u_detection] 273 | dists = matching.iou_distance(unconfirmed, detections) 274 | # if not self.args.mot20: 275 | # dists = matching.fuse_score(dists, detections) 276 | matches, u_unconfirmed, u_detection = matching.linear_assignment( 277 | dists, thresh=0.7 278 | ) 279 | for itracked, idet in matches: 280 | unconfirmed[itracked].update(detections[idet], self.frame_id) 281 | activated_starcks.append(unconfirmed[itracked]) 282 | for it in u_unconfirmed: 283 | track = unconfirmed[it] 284 | track.mark_removed() 285 | removed_stracks.append(track) 286 | 287 | """ Step 4: Init new stracks""" 288 | for inew in u_detection: 289 | track = detections[inew] 290 | if track.score < self.det_thresh: 291 | continue 292 | track.activate(self.kalman_filter, self.frame_id) 293 | activated_starcks.append(track) 294 | """ Step 5: Update state""" 295 | for track in self.lost_stracks: 296 | if self.frame_id - track.end_frame > self.max_time_lost: 297 | track.mark_removed() 298 | removed_stracks.append(track) 299 | 300 | # print('Ramained match {} s'.format(t4-t3)) 301 | 302 | self.tracked_stracks = [ 303 | t for t in self.tracked_stracks if t.state == TrackState.Tracked 304 | ] 305 | self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) 306 | self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) 307 | self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) 308 | self.lost_stracks.extend(lost_stracks) 309 | self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) 310 | self.removed_stracks.extend(removed_stracks) 311 | self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks( 312 | self.tracked_stracks, self.lost_stracks 313 | ) 314 | # get scores of lost tracks 315 | output_stracks = [track for track in self.tracked_stracks if track.is_activated] 316 | 317 | return output_stracks 318 | 319 | 320 | def joint_stracks(tlista, tlistb): 321 | exists = {} 322 | res = [] 323 | for t in tlista: 324 | exists[t.track_id] = 1 325 | res.append(t) 326 | for t in tlistb: 327 | tid = t.track_id 328 | if not exists.get(tid, 0): 329 | exists[tid] = 1 330 | res.append(t) 331 | return res 332 | 333 | 334 | def sub_stracks(tlista, tlistb): 335 | stracks = {} 336 | for t in tlista: 337 | stracks[t.track_id] = t 338 | for t in tlistb: 339 | tid = t.track_id 340 | if stracks.get(tid, 0): 341 | del stracks[tid] 342 | return list(stracks.values()) 343 | 344 | 345 | def remove_duplicate_stracks(stracksa, stracksb): 346 | pdist = matching.iou_distance(stracksa, stracksb) 347 | pairs = np.where(pdist < 0.15) 348 | dupa, dupb = list(), list() 349 | for p, q in zip(*pairs): 350 | timep = stracksa[p].frame_id - stracksa[p].start_frame 351 | timeq = stracksb[q].frame_id - stracksb[q].start_frame 352 | if timep > timeq: 353 | dupb.append(q) 354 | else: 355 | dupa.append(p) 356 | resa = [t for i, t in enumerate(stracksa) if not i in dupa] 357 | resb = [t for i, t in enumerate(stracksb) if not i in dupb] 358 | return resa, resb 359 | -------------------------------------------------------------------------------- /face_tracking/tracker/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | """ 6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 8 | function and used as Mahalanobis gating threshold. 9 | """ 10 | chi2inv95 = { 11 | 1: 3.8415, 12 | 2: 5.9915, 13 | 3: 7.8147, 14 | 4: 9.4877, 15 | 5: 11.070, 16 | 6: 12.592, 17 | 7: 14.067, 18 | 8: 15.507, 19 | 9: 16.919, 20 | } 21 | 22 | 23 | class KalmanFilter(object): 24 | """ 25 | A simple Kalman filter for tracking bounding boxes in image space. 26 | 27 | The 8-dimensional state space 28 | 29 | x, y, a, h, vx, vy, va, vh 30 | 31 | contains the bounding box center position (x, y), aspect ratio a, height h, 32 | and their respective velocities. 33 | 34 | Object motion follows a constant velocity model. The bounding box location 35 | (x, y, a, h) is taken as direct observation of the state space (linear 36 | observation model). 37 | 38 | """ 39 | 40 | def __init__(self): 41 | ndim, dt = 4, 1.0 42 | 43 | # Create Kalman filter model matrices. 44 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 45 | for i in range(ndim): 46 | self._motion_mat[i, ndim + i] = dt 47 | self._update_mat = np.eye(ndim, 2 * ndim) 48 | 49 | # Motion and observation uncertainty are chosen relative to the current 50 | # state estimate. These weights control the amount of uncertainty in 51 | # the model. This is a bit hacky. 52 | self._std_weight_position = 1.0 / 20 53 | self._std_weight_velocity = 1.0 / 160 54 | 55 | def initiate(self, measurement): 56 | """Create track from unassociated measurement. 57 | 58 | Parameters 59 | ---------- 60 | measurement : ndarray 61 | Bounding box coordinates (x, y, a, h) with center position (x, y), 62 | aspect ratio a, and height h. 63 | 64 | Returns 65 | ------- 66 | (ndarray, ndarray) 67 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 68 | dimensional) of the new track. Unobserved velocities are initialized 69 | to 0 mean. 70 | 71 | """ 72 | mean_pos = measurement 73 | mean_vel = np.zeros_like(mean_pos) 74 | mean = np.r_[mean_pos, mean_vel] 75 | 76 | std = [ 77 | 2 * self._std_weight_position * measurement[3], 78 | 2 * self._std_weight_position * measurement[3], 79 | 1e-2, 80 | 2 * self._std_weight_position * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 10 * self._std_weight_velocity * measurement[3], 83 | 1e-5, 84 | 10 * self._std_weight_velocity * measurement[3], 85 | ] 86 | covariance = np.diag(np.square(std)) 87 | return mean, covariance 88 | 89 | def predict(self, mean, covariance): 90 | """Run Kalman filter prediction step. 91 | 92 | Parameters 93 | ---------- 94 | mean : ndarray 95 | The 8 dimensional mean vector of the object state at the previous 96 | time step. 97 | covariance : ndarray 98 | The 8x8 dimensional covariance matrix of the object state at the 99 | previous time step. 100 | 101 | Returns 102 | ------- 103 | (ndarray, ndarray) 104 | Returns the mean vector and covariance matrix of the predicted 105 | state. Unobserved velocities are initialized to 0 mean. 106 | 107 | """ 108 | std_pos = [ 109 | self._std_weight_position * mean[3], 110 | self._std_weight_position * mean[3], 111 | 1e-2, 112 | self._std_weight_position * mean[3], 113 | ] 114 | std_vel = [ 115 | self._std_weight_velocity * mean[3], 116 | self._std_weight_velocity * mean[3], 117 | 1e-5, 118 | self._std_weight_velocity * mean[3], 119 | ] 120 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 121 | 122 | # mean = np.dot(self._motion_mat, mean) 123 | mean = np.dot(mean, self._motion_mat.T) 124 | covariance = ( 125 | np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) 126 | + motion_cov 127 | ) 128 | 129 | return mean, covariance 130 | 131 | def project(self, mean, covariance): 132 | """Project state distribution to measurement space. 133 | 134 | Parameters 135 | ---------- 136 | mean : ndarray 137 | The state's mean vector (8 dimensional array). 138 | covariance : ndarray 139 | The state's covariance matrix (8x8 dimensional). 140 | 141 | Returns 142 | ------- 143 | (ndarray, ndarray) 144 | Returns the projected mean and covariance matrix of the given state 145 | estimate. 146 | 147 | """ 148 | std = [ 149 | self._std_weight_position * mean[3], 150 | self._std_weight_position * mean[3], 151 | 1e-1, 152 | self._std_weight_position * mean[3], 153 | ] 154 | innovation_cov = np.diag(np.square(std)) 155 | 156 | mean = np.dot(self._update_mat, mean) 157 | covariance = np.linalg.multi_dot( 158 | (self._update_mat, covariance, self._update_mat.T) 159 | ) 160 | return mean, covariance + innovation_cov 161 | 162 | def multi_predict(self, mean, covariance): 163 | """Run Kalman filter prediction step (Vectorized version). 164 | Parameters 165 | ---------- 166 | mean : ndarray 167 | The Nx8 dimensional mean matrix of the object states at the previous 168 | time step. 169 | covariance : ndarray 170 | The Nx8x8 dimensional covariance matrics of the object states at the 171 | previous time step. 172 | Returns 173 | ------- 174 | (ndarray, ndarray) 175 | Returns the mean vector and covariance matrix of the predicted 176 | state. Unobserved velocities are initialized to 0 mean. 177 | """ 178 | std_pos = [ 179 | self._std_weight_position * mean[:, 3], 180 | self._std_weight_position * mean[:, 3], 181 | 1e-2 * np.ones_like(mean[:, 3]), 182 | self._std_weight_position * mean[:, 3], 183 | ] 184 | std_vel = [ 185 | self._std_weight_velocity * mean[:, 3], 186 | self._std_weight_velocity * mean[:, 3], 187 | 1e-5 * np.ones_like(mean[:, 3]), 188 | self._std_weight_velocity * mean[:, 3], 189 | ] 190 | sqr = np.square(np.r_[std_pos, std_vel]).T 191 | 192 | motion_cov = [] 193 | for i in range(len(mean)): 194 | motion_cov.append(np.diag(sqr[i])) 195 | motion_cov = np.asarray(motion_cov) 196 | 197 | mean = np.dot(mean, self._motion_mat.T) 198 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) 199 | covariance = np.dot(left, self._motion_mat.T) + motion_cov 200 | 201 | return mean, covariance 202 | 203 | def update(self, mean, covariance, measurement): 204 | """Run Kalman filter correction step. 205 | 206 | Parameters 207 | ---------- 208 | mean : ndarray 209 | The predicted state's mean vector (8 dimensional). 210 | covariance : ndarray 211 | The state's covariance matrix (8x8 dimensional). 212 | measurement : ndarray 213 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 214 | is the center position, a the aspect ratio, and h the height of the 215 | bounding box. 216 | 217 | Returns 218 | ------- 219 | (ndarray, ndarray) 220 | Returns the measurement-corrected state distribution. 221 | 222 | """ 223 | projected_mean, projected_cov = self.project(mean, covariance) 224 | 225 | chol_factor, lower = scipy.linalg.cho_factor( 226 | projected_cov, lower=True, check_finite=False 227 | ) 228 | kalman_gain = scipy.linalg.cho_solve( 229 | (chol_factor, lower), 230 | np.dot(covariance, self._update_mat.T).T, 231 | check_finite=False, 232 | ).T 233 | innovation = measurement - projected_mean 234 | 235 | new_mean = mean + np.dot(innovation, kalman_gain.T) 236 | new_covariance = covariance - np.linalg.multi_dot( 237 | (kalman_gain, projected_cov, kalman_gain.T) 238 | ) 239 | return new_mean, new_covariance 240 | 241 | def gating_distance( 242 | self, mean, covariance, measurements, only_position=False, metric="maha" 243 | ): 244 | """Compute gating distance between state distribution and measurements. 245 | A suitable distance threshold can be obtained from `chi2inv95`. If 246 | `only_position` is False, the chi-square distribution has 4 degrees of 247 | freedom, otherwise 2. 248 | Parameters 249 | ---------- 250 | mean : ndarray 251 | Mean vector over the state distribution (8 dimensional). 252 | covariance : ndarray 253 | Covariance of the state distribution (8x8 dimensional). 254 | measurements : ndarray 255 | An Nx4 dimensional matrix of N measurements, each in 256 | format (x, y, a, h) where (x, y) is the bounding box center 257 | position, a the aspect ratio, and h the height. 258 | only_position : Optional[bool] 259 | If True, distance computation is done with respect to the bounding 260 | box center position only. 261 | Returns 262 | ------- 263 | ndarray 264 | Returns an array of length N, where the i-th element contains the 265 | squared Mahalanobis distance between (mean, covariance) and 266 | `measurements[i]`. 267 | """ 268 | mean, covariance = self.project(mean, covariance) 269 | if only_position: 270 | mean, covariance = mean[:2], covariance[:2, :2] 271 | measurements = measurements[:, :2] 272 | 273 | d = measurements - mean 274 | if metric == "gaussian": 275 | return np.sum(d * d, axis=1) 276 | elif metric == "maha": 277 | cholesky_factor = np.linalg.cholesky(covariance) 278 | z = scipy.linalg.solve_triangular( 279 | cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True 280 | ) 281 | squared_maha = np.sum(z * z, axis=0) 282 | return squared_maha 283 | else: 284 | raise ValueError("invalid distance metric") 285 | -------------------------------------------------------------------------------- /face_tracking/tracker/matching.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import kalman_filter 5 | import numpy as np 6 | from scipy.optimize import linear_sum_assignment 7 | from scipy.spatial.distance import cdist 8 | 9 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.append(BASE_DIR) 11 | 12 | # Other function definitions remain the same 13 | 14 | 15 | def linear_assignment(cost_matrix, thresh): 16 | if cost_matrix.size == 0: 17 | return ( 18 | np.empty((0, 2), dtype=int), 19 | tuple(range(cost_matrix.shape[0])), 20 | tuple(range(cost_matrix.shape[1])), 21 | ) 22 | 23 | row_ind, col_ind = linear_sum_assignment(cost_matrix) 24 | matches = np.array( 25 | [[r, c] for r, c in zip(row_ind, col_ind) if cost_matrix[r, c] <= thresh] 26 | ) 27 | unmatched_a = np.array([i for i in range(cost_matrix.shape[0]) if i not in row_ind]) 28 | unmatched_b = np.array([i for i in range(cost_matrix.shape[1]) if i not in col_ind]) 29 | 30 | return matches, tuple(unmatched_a), tuple(unmatched_b) 31 | 32 | 33 | def bbox_iou(box1, box2): 34 | """ 35 | Compute the IoU of two bounding boxes. 36 | """ 37 | # Determine the coordinates of each of the boxes 38 | x1, y1, x2, y2 = box1 39 | x1_p, y1_p, x2_p, y2_p = box2 40 | 41 | # Calculate the area of intersection rectangle 42 | xi1 = max(x1, x1_p) 43 | yi1 = max(y1, y1_p) 44 | xi2 = min(x2, x2_p) 45 | yi2 = min(y2, y2_p) 46 | inter_area = max(xi2 - xi1, 0) * max(yi2 - yi1, 0) 47 | 48 | # Calculate each box area 49 | box1_area = (x2 - x1) * (y2 - y1) 50 | box2_area = (x2_p - x1_p) * (y2_p - y1_p) 51 | 52 | # Calculate union area 53 | union_area = box1_area + box2_area - inter_area 54 | 55 | # Calculate IoU 56 | iou = inter_area / union_area 57 | 58 | return iou 59 | 60 | 61 | def ious(atlbrs, btlbrs): 62 | """ 63 | Compute cost based on IoU 64 | :type atlbrs: list[tlbr] | np.ndarray 65 | :type atlbrs: list[tlbr] | np.ndarray 66 | 67 | :rtype ious np.ndarray 68 | """ 69 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float64) 70 | for i, box1 in enumerate(atlbrs): 71 | for j, box2 in enumerate(btlbrs): 72 | ious[i, j] = bbox_iou(box1, box2) 73 | return ious 74 | 75 | 76 | def iou_distance(atracks, btracks): 77 | """ 78 | Compute cost based on IoU 79 | :type atracks: list[STrack] 80 | :type btracks: list[STrack] 81 | 82 | :rtype cost_matrix np.ndarray 83 | """ 84 | 85 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or ( 86 | len(btracks) > 0 and isinstance(btracks[0], np.ndarray) 87 | ): 88 | atlbrs = atracks 89 | btlbrs = btracks 90 | else: 91 | atlbrs = [track.tlbr for track in atracks] 92 | btlbrs = [track.tlbr for track in btracks] 93 | _ious = ious(atlbrs, btlbrs) 94 | cost_matrix = 1 - _ious 95 | 96 | return cost_matrix 97 | 98 | 99 | def v_iou_distance(atracks, btracks): 100 | """ 101 | Compute cost based on IoU 102 | :type atracks: list[STrack] 103 | :type btracks: list[STrack] 104 | 105 | :rtype cost_matrix np.ndarray 106 | """ 107 | 108 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or ( 109 | len(btracks) > 0 and isinstance(btracks[0], np.ndarray) 110 | ): 111 | atlbrs = atracks 112 | btlbrs = btracks 113 | else: 114 | atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks] 115 | btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks] 116 | _ious = ious(atlbrs, btlbrs) 117 | cost_matrix = 1 - _ious 118 | 119 | return cost_matrix 120 | 121 | 122 | def embedding_distance(tracks, detections, metric="cosine"): 123 | """ 124 | :param tracks: list[STrack] 125 | :param detections: list[BaseTrack] 126 | :param metric: 127 | :return: cost_matrix np.ndarray 128 | """ 129 | 130 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float64) 131 | if cost_matrix.size == 0: 132 | return cost_matrix 133 | det_features = np.asarray( 134 | [track.curr_feat for track in detections], dtype=np.float64 135 | ) 136 | # for i, track in enumerate(tracks): 137 | # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) 138 | track_features = np.asarray( 139 | [track.smooth_feat for track in tracks], dtype=np.float646 140 | ) 141 | cost_matrix = np.maximum( 142 | 0.0, cdist(track_features, det_features, metric) 143 | ) # Nomalized features 144 | return cost_matrix 145 | 146 | 147 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): 148 | if cost_matrix.size == 0: 149 | return cost_matrix 150 | gating_dim = 2 if only_position else 4 151 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 152 | measurements = np.asarray([det.to_xyah() for det in detections]) 153 | for row, track in enumerate(tracks): 154 | gating_distance = kf.gating_distance( 155 | track.mean, track.covariance, measurements, only_position 156 | ) 157 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 158 | return cost_matrix 159 | 160 | 161 | def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): 162 | if cost_matrix.size == 0: 163 | return cost_matrix 164 | gating_dim = 2 if only_position else 4 165 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 166 | measurements = np.asarray([det.to_xyah() for det in detections]) 167 | for row, track in enumerate(tracks): 168 | gating_distance = kf.gating_distance( 169 | track.mean, track.covariance, measurements, only_position, metric="maha" 170 | ) 171 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 172 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance 173 | return cost_matrix 174 | 175 | 176 | def fuse_iou(cost_matrix, tracks, detections): 177 | if cost_matrix.size == 0: 178 | return cost_matrix 179 | reid_sim = 1 - cost_matrix 180 | iou_dist = iou_distance(tracks, detections) 181 | iou_sim = 1 - iou_dist 182 | fuse_sim = reid_sim * (1 + iou_sim) / 2 183 | det_scores = np.array([det.score for det in detections]) 184 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) 185 | # fuse_sim = fuse_sim * (1 + det_scores) / 2 186 | fuse_cost = 1 - fuse_sim 187 | return fuse_cost 188 | 189 | 190 | def fuse_score(cost_matrix, detections): 191 | if cost_matrix.size == 0: 192 | return cost_matrix 193 | iou_sim = 1 - cost_matrix 194 | det_scores = np.array([det.score for det in detections]) 195 | det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0) 196 | fuse_sim = iou_sim * det_scores 197 | fuse_cost = 1 - fuse_sim 198 | return fuse_cost 199 | -------------------------------------------------------------------------------- /face_tracking/tracker/visualize.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | __all__ = ["vis"] 5 | 6 | 7 | def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None): 8 | for i in range(len(boxes)): 9 | box = boxes[i] 10 | cls_id = int(cls_ids[i]) 11 | score = scores[i] 12 | if score < conf: 13 | continue 14 | x0 = int(box[0]) 15 | y0 = int(box[1]) 16 | x1 = int(box[2]) 17 | y1 = int(box[3]) 18 | 19 | color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist() 20 | text = "{}:{:.1f}%".format(class_names[cls_id], score * 100) 21 | txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255) 22 | font = cv2.FONT_HERSHEY_SIMPLEX 23 | 24 | txt_size = cv2.getTextSize(text, font, 0.4, 1)[0] 25 | cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) 26 | 27 | txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist() 28 | cv2.rectangle( 29 | img, 30 | (x0, y0 + 1), 31 | (x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])), 32 | txt_bk_color, 33 | -1, 34 | ) 35 | cv2.putText( 36 | img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1 37 | ) 38 | 39 | return img 40 | 41 | 42 | def get_color(idx): 43 | idx = idx * 3 44 | color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) 45 | 46 | return color 47 | 48 | 49 | def plot_tracking( 50 | image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0.0, ids2=None, names=[] 51 | ): 52 | im = np.ascontiguousarray(np.copy(image)) 53 | im_h, im_w = im.shape[:2] 54 | 55 | top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 56 | 57 | # text_scale = max(1, image.shape[1] / 1600.) 58 | # text_thickness = 2 59 | # line_thickness = max(1, int(image.shape[1] / 500.)) 60 | text_scale = 2 61 | text_thickness = 2 62 | line_thickness = 3 63 | 64 | radius = max(5, int(im_w / 140.0)) 65 | cv2.putText( 66 | im, 67 | "frame: %d fps: %.2f num: %d" % (frame_id, fps, len(tlwhs)), 68 | (0, int(15 * text_scale)), 69 | cv2.FONT_HERSHEY_PLAIN, 70 | 2, 71 | (0, 0, 255), 72 | thickness=2, 73 | ) 74 | 75 | for i, tlwh in enumerate(tlwhs): 76 | x1, y1, w, h = tlwh 77 | intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) 78 | obj_id = int(obj_ids[i]) 79 | id_text = "{}".format(int(obj_id)) 80 | if (obj_id) in names: 81 | id_text = id_text + ": " + names[obj_id] 82 | if ids2 is not None: 83 | id_text = id_text + ", {}".format(int(ids2[i])) 84 | color = get_color(abs(obj_id)) 85 | cv2.rectangle( 86 | im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness 87 | ) 88 | cv2.putText( 89 | im, 90 | id_text, 91 | (intbox[0], intbox[1]), 92 | cv2.FONT_HERSHEY_PLAIN, 93 | text_scale, 94 | (0, 0, 255), 95 | thickness=text_thickness, 96 | ) 97 | return im 98 | 99 | 100 | _COLORS = ( 101 | np.array( 102 | [ 103 | 0.000, 104 | 0.447, 105 | 0.741, 106 | 0.850, 107 | 0.325, 108 | 0.098, 109 | 0.929, 110 | 0.694, 111 | 0.125, 112 | 0.494, 113 | 0.184, 114 | 0.556, 115 | 0.466, 116 | 0.674, 117 | 0.188, 118 | 0.301, 119 | 0.745, 120 | 0.933, 121 | 0.635, 122 | 0.078, 123 | 0.184, 124 | 0.300, 125 | 0.300, 126 | 0.300, 127 | 0.600, 128 | 0.600, 129 | 0.600, 130 | 1.000, 131 | 0.000, 132 | 0.000, 133 | 1.000, 134 | 0.500, 135 | 0.000, 136 | 0.749, 137 | 0.749, 138 | 0.000, 139 | 0.000, 140 | 1.000, 141 | 0.000, 142 | 0.000, 143 | 0.000, 144 | 1.000, 145 | 0.667, 146 | 0.000, 147 | 1.000, 148 | 0.333, 149 | 0.333, 150 | 0.000, 151 | 0.333, 152 | 0.667, 153 | 0.000, 154 | 0.333, 155 | 1.000, 156 | 0.000, 157 | 0.667, 158 | 0.333, 159 | 0.000, 160 | 0.667, 161 | 0.667, 162 | 0.000, 163 | 0.667, 164 | 1.000, 165 | 0.000, 166 | 1.000, 167 | 0.333, 168 | 0.000, 169 | 1.000, 170 | 0.667, 171 | 0.000, 172 | 1.000, 173 | 1.000, 174 | 0.000, 175 | 0.000, 176 | 0.333, 177 | 0.500, 178 | 0.000, 179 | 0.667, 180 | 0.500, 181 | 0.000, 182 | 1.000, 183 | 0.500, 184 | 0.333, 185 | 0.000, 186 | 0.500, 187 | 0.333, 188 | 0.333, 189 | 0.500, 190 | 0.333, 191 | 0.667, 192 | 0.500, 193 | 0.333, 194 | 1.000, 195 | 0.500, 196 | 0.667, 197 | 0.000, 198 | 0.500, 199 | 0.667, 200 | 0.333, 201 | 0.500, 202 | 0.667, 203 | 0.667, 204 | 0.500, 205 | 0.667, 206 | 1.000, 207 | 0.500, 208 | 1.000, 209 | 0.000, 210 | 0.500, 211 | 1.000, 212 | 0.333, 213 | 0.500, 214 | 1.000, 215 | 0.667, 216 | 0.500, 217 | 1.000, 218 | 1.000, 219 | 0.500, 220 | 0.000, 221 | 0.333, 222 | 1.000, 223 | 0.000, 224 | 0.667, 225 | 1.000, 226 | 0.000, 227 | 1.000, 228 | 1.000, 229 | 0.333, 230 | 0.000, 231 | 1.000, 232 | 0.333, 233 | 0.333, 234 | 1.000, 235 | 0.333, 236 | 0.667, 237 | 1.000, 238 | 0.333, 239 | 1.000, 240 | 1.000, 241 | 0.667, 242 | 0.000, 243 | 1.000, 244 | 0.667, 245 | 0.333, 246 | 1.000, 247 | 0.667, 248 | 0.667, 249 | 1.000, 250 | 0.667, 251 | 1.000, 252 | 1.000, 253 | 1.000, 254 | 0.000, 255 | 1.000, 256 | 1.000, 257 | 0.333, 258 | 1.000, 259 | 1.000, 260 | 0.667, 261 | 1.000, 262 | 0.333, 263 | 0.000, 264 | 0.000, 265 | 0.500, 266 | 0.000, 267 | 0.000, 268 | 0.667, 269 | 0.000, 270 | 0.000, 271 | 0.833, 272 | 0.000, 273 | 0.000, 274 | 1.000, 275 | 0.000, 276 | 0.000, 277 | 0.000, 278 | 0.167, 279 | 0.000, 280 | 0.000, 281 | 0.333, 282 | 0.000, 283 | 0.000, 284 | 0.500, 285 | 0.000, 286 | 0.000, 287 | 0.667, 288 | 0.000, 289 | 0.000, 290 | 0.833, 291 | 0.000, 292 | 0.000, 293 | 1.000, 294 | 0.000, 295 | 0.000, 296 | 0.000, 297 | 0.167, 298 | 0.000, 299 | 0.000, 300 | 0.333, 301 | 0.000, 302 | 0.000, 303 | 0.500, 304 | 0.000, 305 | 0.000, 306 | 0.667, 307 | 0.000, 308 | 0.000, 309 | 0.833, 310 | 0.000, 311 | 0.000, 312 | 1.000, 313 | 0.000, 314 | 0.000, 315 | 0.000, 316 | 0.143, 317 | 0.143, 318 | 0.143, 319 | 0.286, 320 | 0.286, 321 | 0.286, 322 | 0.429, 323 | 0.429, 324 | 0.429, 325 | 0.571, 326 | 0.571, 327 | 0.571, 328 | 0.714, 329 | 0.714, 330 | 0.714, 331 | 0.857, 332 | 0.857, 333 | 0.857, 334 | 0.000, 335 | 0.447, 336 | 0.741, 337 | 0.314, 338 | 0.717, 339 | 0.741, 340 | 0.50, 341 | 0.5, 342 | 0, 343 | ] 344 | ) 345 | .astype(np.float32) 346 | .reshape(-1, 3) 347 | ) 348 | -------------------------------------------------------------------------------- /recognize.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | 4 | import cv2 5 | import numpy as np 6 | import torch 7 | import yaml 8 | from torchvision import transforms 9 | 10 | from face_alignment.alignment import norm_crop 11 | from face_detection.scrfd.detector import SCRFD 12 | from face_detection.yolov5_face.detector import Yolov5Face 13 | from face_recognition.arcface.model import iresnet_inference 14 | from face_recognition.arcface.utils import compare_encodings, read_features 15 | from face_tracking.tracker.byte_tracker import BYTETracker 16 | from face_tracking.tracker.visualize import plot_tracking 17 | 18 | # Device configuration 19 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 20 | 21 | # Face detector (choose one) 22 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx") 23 | # detector = Yolov5Face(model_file="face_detection/yolov5_face/weights/yolov5n-face.pt") 24 | 25 | # Face recognizer 26 | recognizer = iresnet_inference( 27 | model_name="r100", path="face_recognition/arcface/weights/arcface_r100.pth", device=device 28 | ) 29 | 30 | # Load precomputed face features and names 31 | images_names, images_embs = read_features(feature_path="./datasets/face_features/feature") 32 | 33 | # Mapping of face IDs to names 34 | id_face_mapping = {} 35 | 36 | # Data mapping for tracking information 37 | data_mapping = { 38 | "raw_image": [], 39 | "tracking_ids": [], 40 | "detection_bboxes": [], 41 | "detection_landmarks": [], 42 | "tracking_bboxes": [], 43 | } 44 | 45 | 46 | def load_config(file_name): 47 | """ 48 | Load a YAML configuration file. 49 | 50 | Args: 51 | file_name (str): The path to the YAML configuration file. 52 | 53 | Returns: 54 | dict: The loaded configuration as a dictionary. 55 | """ 56 | with open(file_name, "r") as stream: 57 | try: 58 | return yaml.safe_load(stream) 59 | except yaml.YAMLError as exc: 60 | print(exc) 61 | 62 | 63 | def process_tracking(frame, detector, tracker, args, frame_id, fps): 64 | """ 65 | Process tracking for a frame. 66 | 67 | Args: 68 | frame: The input frame. 69 | detector: The face detector. 70 | tracker: The object tracker. 71 | args (dict): Tracking configuration parameters. 72 | frame_id (int): The frame ID. 73 | fps (float): Frames per second. 74 | 75 | Returns: 76 | numpy.ndarray: The processed tracking image. 77 | """ 78 | # Face detection and tracking 79 | outputs, img_info, bboxes, landmarks = detector.detect_tracking(image=frame) 80 | 81 | tracking_tlwhs = [] 82 | tracking_ids = [] 83 | tracking_scores = [] 84 | tracking_bboxes = [] 85 | 86 | if outputs is not None: 87 | online_targets = tracker.update( 88 | outputs, [img_info["height"], img_info["width"]], (128, 128) 89 | ) 90 | 91 | for i in range(len(online_targets)): 92 | t = online_targets[i] 93 | tlwh = t.tlwh 94 | tid = t.track_id 95 | vertical = tlwh[2] / tlwh[3] > args["aspect_ratio_thresh"] 96 | if tlwh[2] * tlwh[3] > args["min_box_area"] and not vertical: 97 | x1, y1, w, h = tlwh 98 | tracking_bboxes.append([x1, y1, x1 + w, y1 + h]) 99 | tracking_tlwhs.append(tlwh) 100 | tracking_ids.append(tid) 101 | tracking_scores.append(t.score) 102 | 103 | tracking_image = plot_tracking( 104 | img_info["raw_img"], 105 | tracking_tlwhs, 106 | tracking_ids, 107 | names=id_face_mapping, 108 | frame_id=frame_id + 1, 109 | fps=fps, 110 | ) 111 | else: 112 | tracking_image = img_info["raw_img"] 113 | 114 | data_mapping["raw_image"] = img_info["raw_img"] 115 | data_mapping["detection_bboxes"] = bboxes 116 | data_mapping["detection_landmarks"] = landmarks 117 | data_mapping["tracking_ids"] = tracking_ids 118 | data_mapping["tracking_bboxes"] = tracking_bboxes 119 | 120 | return tracking_image 121 | 122 | 123 | @torch.no_grad() 124 | def get_feature(face_image): 125 | """ 126 | Extract features from a face image. 127 | 128 | Args: 129 | face_image: The input face image. 130 | 131 | Returns: 132 | numpy.ndarray: The extracted features. 133 | """ 134 | face_preprocess = transforms.Compose( 135 | [ 136 | transforms.ToTensor(), 137 | transforms.Resize((112, 112)), 138 | transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), 139 | ] 140 | ) 141 | 142 | # Convert to RGB 143 | face_image = cv2.cvtColor(face_image, cv2.COLOR_BGR2RGB) 144 | 145 | # Preprocess image (BGR) 146 | face_image = face_preprocess(face_image).unsqueeze(0).to(device) 147 | 148 | # Inference to get feature 149 | emb_img_face = recognizer(face_image).cpu().numpy() 150 | 151 | # Convert to array 152 | images_emb = emb_img_face / np.linalg.norm(emb_img_face) 153 | 154 | return images_emb 155 | 156 | 157 | def recognition(face_image): 158 | """ 159 | Recognize a face image. 160 | 161 | Args: 162 | face_image: The input face image. 163 | 164 | Returns: 165 | tuple: A tuple containing the recognition score and name. 166 | """ 167 | # Get feature from face 168 | query_emb = get_feature(face_image) 169 | 170 | score, id_min = compare_encodings(query_emb, images_embs) 171 | name = images_names[id_min] 172 | score = score[0] 173 | 174 | return score, name 175 | 176 | 177 | def mapping_bbox(box1, box2): 178 | """ 179 | Calculate the Intersection over Union (IoU) between two bounding boxes. 180 | 181 | Args: 182 | box1 (tuple): The first bounding box (x_min, y_min, x_max, y_max). 183 | box2 (tuple): The second bounding box (x_min, y_min, x_max, y_max). 184 | 185 | Returns: 186 | float: The IoU score. 187 | """ 188 | # Calculate the intersection area 189 | x_min_inter = max(box1[0], box2[0]) 190 | y_min_inter = max(box1[1], box2[1]) 191 | x_max_inter = min(box1[2], box2[2]) 192 | y_max_inter = min(box1[3], box2[3]) 193 | 194 | intersection_area = max(0, x_max_inter - x_min_inter + 1) * max( 195 | 0, y_max_inter - y_min_inter + 1 196 | ) 197 | 198 | # Calculate the area of each bounding box 199 | area_box1 = (box1[2] - box1[0] + 1) * (box1[3] - box1[1] + 1) 200 | area_box2 = (box2[2] - box2[0] + 1) * (box2[3] - box2[1] + 1) 201 | 202 | # Calculate the union area 203 | union_area = area_box1 + area_box2 - intersection_area 204 | 205 | # Calculate IoU 206 | iou = intersection_area / union_area 207 | 208 | return iou 209 | 210 | 211 | def tracking(detector, args): 212 | """ 213 | Face tracking in a separate thread. 214 | 215 | Args: 216 | detector: The face detector. 217 | args (dict): Tracking configuration parameters. 218 | """ 219 | # Initialize variables for measuring frame rate 220 | start_time = time.time_ns() 221 | frame_count = 0 222 | fps = -1 223 | 224 | # Initialize a tracker and a timer 225 | tracker = BYTETracker(args=args, frame_rate=30) 226 | frame_id = 0 227 | 228 | cap = cv2.VideoCapture(0) 229 | 230 | while True: 231 | _, img = cap.read() 232 | 233 | tracking_image = process_tracking(img, detector, tracker, args, frame_id, fps) 234 | 235 | # Calculate and display the frame rate 236 | frame_count += 1 237 | if frame_count >= 30: 238 | fps = 1e9 * frame_count / (time.time_ns() - start_time) 239 | frame_count = 0 240 | start_time = time.time_ns() 241 | 242 | cv2.imshow("Face Recognition", tracking_image) 243 | 244 | # Check for user exit input 245 | ch = cv2.waitKey(1) 246 | if ch == 27 or ch == ord("q") or ch == ord("Q"): 247 | break 248 | 249 | 250 | def recognize(): 251 | """Face recognition in a separate thread.""" 252 | while True: 253 | raw_image = data_mapping["raw_image"] 254 | detection_landmarks = data_mapping["detection_landmarks"] 255 | detection_bboxes = data_mapping["detection_bboxes"] 256 | tracking_ids = data_mapping["tracking_ids"] 257 | tracking_bboxes = data_mapping["tracking_bboxes"] 258 | 259 | for i in range(len(tracking_bboxes)): 260 | for j in range(len(detection_bboxes)): 261 | mapping_score = mapping_bbox(box1=tracking_bboxes[i], box2=detection_bboxes[j]) 262 | if mapping_score > 0.9: 263 | face_alignment = norm_crop(img=raw_image, landmark=detection_landmarks[j]) 264 | 265 | score, name = recognition(face_image=face_alignment) 266 | if name is not None: 267 | if score < 0.25: 268 | caption = "UN_KNOWN" 269 | else: 270 | caption = f"{name}:{score:.2f}" 271 | 272 | id_face_mapping[tracking_ids[i]] = caption 273 | 274 | detection_bboxes = np.delete(detection_bboxes, j, axis=0) 275 | detection_landmarks = np.delete(detection_landmarks, j, axis=0) 276 | 277 | break 278 | 279 | if tracking_bboxes == []: 280 | print("Waiting for a person...") 281 | 282 | 283 | def main(): 284 | """Main function to start face tracking and recognition threads.""" 285 | file_name = "./face_tracking/config/config_tracking.yaml" 286 | config_tracking = load_config(file_name) 287 | 288 | # Start tracking thread 289 | thread_track = threading.Thread( 290 | target=tracking, 291 | args=( 292 | detector, 293 | config_tracking, 294 | ), 295 | ) 296 | thread_track.start() 297 | 298 | # Start recognition thread 299 | thread_recognize = threading.Thread(target=recognize) 300 | thread_recognize.start() 301 | 302 | 303 | if __name__ == "__main__": 304 | main() 305 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi==2023.11.17 2 | charset-normalizer==3.3.2 3 | colorama==0.4.6 4 | coloredlogs==15.0.1 5 | contourpy==1.2.0 6 | cycler==0.12.1 7 | flatbuffers==23.5.26 8 | fonttools==4.46.0 9 | humanfriendly==10.0 10 | idna==3.6 11 | imageio==2.33.0 12 | importlib-resources==6.1.1 13 | kiwisolver==1.4.5 14 | lazy_loader==0.3 15 | matplotlib==3.8.2 16 | mpmath==1.3.0 17 | networkx==3.2.1 18 | numpy==1.23.5 19 | onnxruntime==1.16.3 20 | opencv-python==4.8.1.78 21 | packaging==23.2 22 | pandas==2.1.3 23 | Pillow==10.1.0 24 | protobuf==4.25.1 25 | pyparsing==3.1.1 26 | pyreadline3==3.4.1 27 | python-dateutil==2.8.2 28 | pytz==2023.3.post1 29 | PyYAML==6.0.1 30 | requests==2.31.0 31 | scikit-image==0.22.0 32 | scipy==1.11.4 33 | seaborn==0.13.0 34 | six==1.16.0 35 | sympy==1.12 36 | tifffile==2023.9.26 37 | tqdm==4.66.1 38 | typing_extensions==4.8.0 39 | tzdata==2023.3 40 | urllib3==2.1.0 41 | zipp==3.17.0 42 | -------------------------------------------------------------------------------- /tracking.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import cv2 4 | import yaml 5 | 6 | from face_detection.scrfd.detector import SCRFD 7 | from face_detection.yolov5_face.detector import Yolov5Face 8 | from face_tracking.tracker.byte_tracker import BYTETracker 9 | from face_tracking.tracker.visualize import plot_tracking 10 | 11 | 12 | # Function to load a YAML configuration file 13 | def load_config(file_name): 14 | with open(file_name, "r") as stream: 15 | try: 16 | return yaml.safe_load(stream) 17 | except yaml.YAMLError as exc: 18 | print(exc) 19 | 20 | 21 | # Function for performing object detection and tracking 22 | def inference(detector, args): 23 | # Open a video capture object 24 | cap = cv2.VideoCapture(0) 25 | 26 | # Initialize variables for measuring frame rate 27 | start_time = time.time_ns() 28 | frame_count = 0 29 | fps = -1 30 | 31 | # Initialize a tracker and a timer 32 | tracker = BYTETracker(args=args, frame_rate=30) 33 | frame_id = 0 34 | 35 | while True: 36 | # Read a frame from the video capture 37 | ret_val, frame = cap.read() 38 | 39 | if ret_val: 40 | # Perform face detection and tracking on the frame 41 | outputs, img_info, bboxes, landmarks = detector.detect_tracking(image=frame) 42 | 43 | if outputs is not None: 44 | online_targets = tracker.update( 45 | outputs, [img_info["height"], img_info["width"]], (128, 128) 46 | ) 47 | online_tlwhs = [] 48 | online_ids = [] 49 | online_scores = [] 50 | 51 | for t in online_targets: 52 | tlwh = t.tlwh 53 | tid = t.track_id 54 | vertical = tlwh[2] / tlwh[3] > args["aspect_ratio_thresh"] 55 | if tlwh[2] * tlwh[3] > args["min_box_area"] and not vertical: 56 | online_tlwhs.append(tlwh) 57 | online_ids.append(tid) 58 | online_scores.append(t.score) 59 | 60 | online_im = plot_tracking( 61 | img_info["raw_img"], 62 | online_tlwhs, 63 | online_ids, 64 | frame_id=frame_id + 1, 65 | fps=fps, 66 | ) 67 | else: 68 | online_im = img_info["raw_img"] 69 | 70 | # Calculate and display the frame rate 71 | frame_count += 1 72 | if frame_count >= 30: 73 | fps = 1e9 * frame_count / (time.time_ns() - start_time) 74 | frame_count = 0 75 | start_time = time.time_ns() 76 | 77 | # # Draw bounding boxes and landmarks on the frame 78 | # for i in range(len(bboxes)): 79 | # # Get location of the face 80 | # x1, y1, x2, y2, score = bboxes[i] 81 | # cv2.rectangle(online_im, (x1, y1), (x2, y2), (200, 200, 230), 2) 82 | 83 | cv2.imshow("Face Tracking", online_im) 84 | 85 | # Check for user exit input 86 | ch = cv2.waitKey(1) 87 | if ch == 27 or ch == ord("q") or ch == ord("Q"): 88 | break 89 | else: 90 | break 91 | frame_id += 1 92 | 93 | 94 | def main(): 95 | file_name = "./face_tracking/config/config_tracking.yaml" 96 | config_tracking = load_config(file_name) 97 | # detector = Yolov5Face( 98 | # model_file="face_detection/yolov5_face/weights/yolov5m-face.pt" 99 | # ) 100 | detector = SCRFD(model_file="face_detection/scrfd/weights/scrfd_2.5g_bnkps.onnx") 101 | 102 | inference(detector=detector, args=config_tracking) 103 | 104 | 105 | if __name__ == "__main__": 106 | main() 107 | --------------------------------------------------------------------------------