├── visionface ├── PyFaces.py ├── models │ ├── face_detection │ │ ├── mediapipe.py │ │ ├── OpenCV.py │ │ ├── YOLO.py │ │ ├── MediaPipe.py │ │ ├── YOLOEye.py │ │ ├── YOLOWolrd.py │ │ └── MTCNN.py │ ├── LandmarkDetector.py │ ├── face_embedding │ │ ├── Dlib.py │ │ ├── ArcFace.py │ │ └── FaceNet.py │ ├── Detector.py │ ├── landmark_detection │ │ ├── MediaPipeLandmark.py │ │ ├── Dlib.py │ │ └── utils.py │ └── FaceEmbedding.py ├── __init__.py ├── annotators │ ├── base.py │ ├── helper │ │ └── landmark_styles.py │ ├── FaceAnnotators.py │ ├── utils.py │ ├── landmark.py │ └── detection.py ├── commons │ ├── detection_utils.py │ ├── utils.py │ ├── download_files.py │ └── image_utils.py ├── modules │ ├── embedding.py │ ├── landmarks.py │ ├── modeling.py │ ├── detection.py │ └── recognition.py ├── db │ ├── qdrant │ │ ├── search_manager.py │ │ ├── config.py │ │ ├── data_manager.py │ │ └── collection_manager.py │ └── qdrant_client.py └── FaceAnnotators.py ├── .gitignore ├── banners ├── VisionFace2.png ├── face_analysis.jpg ├── face_detection.jpg ├── face_landmarks.jpg ├── face_recognition.jpg ├── face_verification.jpg └── face_visualization.jpg ├── requirements.txt ├── CONTRIBUTORS.md ├── LICENSE ├── setup.py └── README.md /visionface/PyFaces.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | testing/ 2 | __pycache__/ 3 | .vscode/ -------------------------------------------------------------------------------- /visionface/models/face_detection/mediapipe.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /banners/VisionFace2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/VisionFace2.png -------------------------------------------------------------------------------- /banners/face_analysis.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_analysis.jpg -------------------------------------------------------------------------------- /banners/face_detection.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_detection.jpg -------------------------------------------------------------------------------- /banners/face_landmarks.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_landmarks.jpg -------------------------------------------------------------------------------- /banners/face_recognition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_recognition.jpg -------------------------------------------------------------------------------- /banners/face_verification.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_verification.jpg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.8.0 2 | numpy>=1.19.0 3 | opencv-python>=4.5.0 4 | Pillow>=8.0.0 5 | requests>=2.25.0 -------------------------------------------------------------------------------- /banners/face_visualization.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_visualization.jpg -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # Contributors 2 | 3 | ## Core Team 4 | - **Milad Farzalizadeh** (@miladfa7) 5 | - **Zahra Sheikhvand** (@zahra-she) 6 | 7 | ## All Contributors 8 | Thanks to everyone who contributed to VisionFace: 9 | - List all contributors here -------------------------------------------------------------------------------- /visionface/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | VisionFace: Modern Face Detection & Recognition Framework 3 | """ 4 | 5 | __version__ = "1.0.0" 6 | __author__ = "VisionFace Team" 7 | __email__ = "visio.face2025@gmail.com" 8 | 9 | 10 | try: 11 | from .modules.recognition import FaceRecognition 12 | from .modules.detection import FaceDetection 13 | from .modules.embedding import FaceEmbedder 14 | from .modules.landmarks import LandmarkDetection 15 | from .annotators import FaceAnnotators 16 | 17 | __all__ = [ 18 | "FaceDetection", 19 | "FaceEmbedder", 20 | "FaceRecognition", 21 | "LandmarkDetection", 22 | "FaceAnnotators" 23 | ] 24 | except ImportError as e: 25 | print(f"Warning: Some modules could not be imported: {e}") 26 | __all__ = [] 27 | -------------------------------------------------------------------------------- /visionface/models/LandmarkDetector.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, Optional 3 | import numpy as np 4 | from dataclasses import dataclass 5 | 6 | 7 | class LandmarkDetector(ABC): 8 | """ 9 | Abstract base class for a face landmark system 10 | """ 11 | 12 | def __init__(self): 13 | pass 14 | 15 | @abstractmethod 16 | def build_model(self) -> Any: 17 | pass 18 | 19 | @abstractmethod 20 | def detect_landmarks(self, img: np.ndarray): 21 | pass 22 | 23 | @abstractmethod 24 | def process_landmarks(self, results): 25 | pass 26 | 27 | 28 | 29 | @dataclass 30 | class DetectedLandmark2D: 31 | x: float 32 | y : float 33 | name: Optional[str] = None 34 | conf: Optional[float] = None 35 | 36 | @dataclass 37 | class DetectedLandmark3D: 38 | x: float 39 | y: float 40 | z: float 41 | name: Optional[str] = None 42 | conf: Optional[float] = None 43 | -------------------------------------------------------------------------------- /visionface/annotators/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Union, List, Optional, Tuple 3 | import numpy as np 4 | from PIL import Image 5 | 6 | # Pyface modules 7 | from visionface.models.Detector import Detector 8 | from visionface.models.LandmarkDetector import DetectedLandmark3D, DetectedLandmark2D 9 | 10 | RawDetection = List[Union[int, float, str]] 11 | ImageType = Union[str, np.ndarray, Image.Image] 12 | 13 | class BaseAnnotator(ABC): 14 | @abstractmethod 15 | def annotate(self, img: ImageType, detections: Union[List[Detector], List[RawDetection]]) -> np.ndarray: 16 | pass 17 | 18 | class BaseLandmarkAnnotator: 19 | @abstractmethod 20 | def annotate( 21 | self, 22 | img: ImageType, 23 | landmarks: Union[ 24 | List[DetectedLandmark3D], 25 | List[DetectedLandmark2D], 26 | ], 27 | connections: Optional[List[Tuple[int, int]]] = None, 28 | is_drawing_landmarks: bool = True 29 | ) -> np.ndarray: 30 | pass -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 VisionFace Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /visionface/commons/detection_utils.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import torch 3 | 4 | from visionface.models.Detector import DetectedFace 5 | 6 | 7 | def convert_to_square_bbox(bboxA): 8 | h = bboxA[:, 3] - bboxA[:, 1] 9 | w = bboxA[:, 2] - bboxA[:, 0] 10 | 11 | l = torch.max(w, h) 12 | bboxA[:, 0] = bboxA[:, 0] + w * 0.5 - l * 0.5 13 | bboxA[:, 1] = bboxA[:, 1] + h * 0.5 - l * 0.5 14 | bboxA[:, 2:4] = bboxA[:, :2] + l.repeat(2, 1).permute(1, 0) 15 | 16 | return bboxA 17 | 18 | 19 | def box_padding(boxes, w, h): 20 | boxes = boxes.trunc().int().cpu().numpy() 21 | x = boxes[:, 0] 22 | y = boxes[:, 1] 23 | ex = boxes[:, 2] 24 | ey = boxes[:, 3] 25 | 26 | x[x < 1] = 1 27 | y[y < 1] = 1 28 | ex[ex > w] = w 29 | ey[ey > h] = h 30 | 31 | return y, ey, x, ex 32 | 33 | def apply_bbox_regression(boundingbox, reg): 34 | if reg.shape[1] == 1: 35 | reg = torch.reshape(reg, (reg.shape[2], reg.shape[3])) 36 | 37 | w = boundingbox[:, 2] - boundingbox[:, 0] + 1 38 | h = boundingbox[:, 3] - boundingbox[:, 1] + 1 39 | b1 = boundingbox[:, 0] + reg[:, 0] * w 40 | b2 = boundingbox[:, 1] + reg[:, 1] * h 41 | b3 = boundingbox[:, 2] + reg[:, 2] * w 42 | b4 = boundingbox[:, 3] + reg[:, 3] * h 43 | boundingbox[:, :4] = torch.stack([b1, b2, b3, b4]).permute(1, 0) 44 | 45 | return boundingbox 46 | 47 | def select_max_conf_faces( 48 | face_detections: List[List[DetectedFace]] 49 | ) -> List[DetectedFace]: 50 | """ 51 | Selects the DetectedFace with the highest confidence from each list of detections. 52 | 53 | Parameters 54 | ---------- 55 | face_detections : List[List[DetectedFace]] 56 | A list of detection lists. Each inner list contains DetectedFace objects for one image. 57 | 58 | Returns 59 | ------- 60 | List[DetectedFace] 61 | A list containing the DetectedFace with the highest confidence from each image. 62 | """ 63 | return [[max(detections, key=lambda face: face.conf) for detections in face_detections if detections]] -------------------------------------------------------------------------------- /visionface/modules/embedding.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Union 2 | import numpy as np 3 | 4 | from visionface.models.Detector import DetectedFace 5 | from visionface.models.FaceEmbedding import FaceEmbedding 6 | from visionface.modules.modeling import build_model 7 | from visionface.commons.image_utils import load_images, validate_images 8 | 9 | 10 | class FaceEmbedder: 11 | """ 12 | A class for generating embeddings from face images 13 | using a specified face embedding model. 14 | """ 15 | def __init__(self, embedding_backbone: str = "FaceNet-VGG") -> None: 16 | """ 17 | Initializes the FaceEmbedder with the given embedding model. 18 | 19 | Parameters 20 | ---------- 21 | embedding_backbone : str, optional 22 | The name of the face embedding model to use. Default is "FaceNet-VGG". 23 | """ 24 | self.face_embedder = self.build_model(embedding_backbone) 25 | self.vector_size = self.face_embedder.output_shape 26 | 27 | def build_model(self, embedding_backbone) -> Any: 28 | """ 29 | Builds and returns the face embedding model. 30 | 31 | Parameters 32 | ---------- 33 | embedding_backbone : str 34 | The name of the model to load. 35 | 36 | Returns 37 | ------- 38 | Any 39 | An initialized face embedding model instance. 40 | """ 41 | return build_model(embedding_backbone, "face_embedding") 42 | 43 | def embed_faces( 44 | self, 45 | face_imgs: Union[str, np.ndarray, List[np.ndarray], List[str], List[DetectedFace]], 46 | normalize_embeddings: bool = True 47 | ) -> FaceEmbedding: 48 | """ 49 | Computes face embeddings for one or more face images. 50 | 51 | Parameters 52 | ---------- 53 | face_imgs : Union[str, np.ndarray, List[np.ndarray], List[str], List[DetectedFace]] 54 | A single face image or a list of face images. Each image can be a file path (str), 55 | a NumPy array, or a DetectedFace object. 56 | 57 | normalize_embeddings : bool, optional 58 | Whether to apply L2 normalization to the output embeddings. Default is True. 59 | 60 | Returns 61 | ------- 62 | FaceEmbedding 63 | An object containing embedding vectors for each face. 64 | """ 65 | face_images = load_images(face_imgs) 66 | validated_images = validate_images(face_images) 67 | return self.face_embedder.embed(validated_images, normalize_embeddings) 68 | 69 | -------------------------------------------------------------------------------- /visionface/annotators/helper/landmark_styles.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple, Mapping 2 | from dataclasses import dataclass 3 | 4 | from regex import D 5 | 6 | 7 | from visionface.annotators.helper import landmark_connections 8 | 9 | RADIUS = 5 10 | RED = (48, 48, 255) 11 | GREEN = (48, 255, 48) 12 | BLUE = (192, 101, 21) 13 | YELLOW = (0, 204, 255) 14 | GRAY = (128, 128, 128) 15 | PURPLE = (128, 64, 128) 16 | PEACH = (180, 229, 255) 17 | WHITE = (224, 224, 224) 18 | CYAN = (192, 255, 48) 19 | MAGENTA = (192, 48, 255) 20 | 21 | THICKNESS_TESSELATION = 1 22 | THICKNESS_CONTOURS = 2 23 | CIRCLE_REDIUS = 2 24 | 25 | 26 | @dataclass 27 | class FaceMeshStyle: 28 | color: Tuple[int, int, int] = GRAY 29 | thickness: int = THICKNESS_TESSELATION 30 | circle_radius: int = CIRCLE_REDIUS 31 | 32 | FACEMESH_CONTOURS_CONNECTION_STYLE = { 33 | landmark_connections.FACEMESH_LIPS: 34 | FaceMeshStyle(color=WHITE, thickness=THICKNESS_CONTOURS), 35 | landmark_connections.FACEMESH_LEFT_EYE: 36 | FaceMeshStyle(color=GREEN, thickness=THICKNESS_CONTOURS), 37 | landmark_connections.FACEMESH_LEFT_EYEBROW: 38 | FaceMeshStyle(color=GREEN, thickness=THICKNESS_CONTOURS), 39 | landmark_connections.FACEMESH_RIGHT_EYE: 40 | FaceMeshStyle(color=RED, thickness=THICKNESS_CONTOURS), 41 | landmark_connections.FACEMESH_RIGHT_EYEBROW: 42 | FaceMeshStyle(color=RED, thickness=THICKNESS_CONTOURS), 43 | landmark_connections.FACEMESH_FACE_OVAL: 44 | FaceMeshStyle(color=WHITE, thickness=THICKNESS_CONTOURS) 45 | } 46 | 47 | class DefaultFaceMeshContoursStyle: 48 | def __call__(self, i: int = 0) -> Mapping[Tuple[int, int], 'FaceMeshStyle']: 49 | default_style = (FACEMESH_CONTOURS_CONNECTION_STYLE) 50 | connection_style = {} 51 | for k, v in default_style.items(): 52 | for connection in k: 53 | connection_style[connection] = v 54 | return connection_style 55 | 56 | 57 | 58 | 59 | class DefaultFaceMeshIrisConnectionsStyle: 60 | def __call__(self) -> Mapping[Tuple[int, int], 'FaceMeshStyle']: 61 | 62 | iris_style = {} 63 | 64 | left_spec = FaceMeshStyle(color=GREEN, thickness=THICKNESS_CONTOURS) 65 | for connection in landmark_connections.FACEMESH_LEFT_IRIS: 66 | iris_style[connection] = left_spec 67 | 68 | right_spec = FaceMeshStyle(color=RED, thickness=THICKNESS_CONTOURS) 69 | for connection in landmark_connections.FACEMESH_RIGHT_IRIS: 70 | iris_style[connection] = right_spec 71 | 72 | return iris_style 73 | 74 | 75 | FaceMeshContoursStyle = DefaultFaceMeshContoursStyle() 76 | FaceMeshIrisStyle = DefaultFaceMeshIrisConnectionsStyle() 77 | -------------------------------------------------------------------------------- /visionface/modules/landmarks.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Union 2 | import numpy as np 3 | 4 | # VisionFace modules 5 | from visionface.models.LandmarkDetector import DetectedLandmark2D, DetectedLandmark3D 6 | from visionface.modules.modeling import build_model 7 | from visionface.commons.image_utils import load_images, validate_images 8 | 9 | 10 | class LandmarkDetection: 11 | def __init__(self, detector_backbone: str = "mediapipe") -> None: 12 | """ 13 | Initialize the landmark detection with the specified backbone. 14 | 15 | Currently supported backbones: 16 | - "mediapipe": 3D landmark detection 17 | - "dlib": 2D landmark detection 18 | 19 | Args: 20 | detector_backbone: Backbone name for the landmark detector (e.g., "mediapipe", "dlib"). 21 | """ 22 | self.detector_backbone = detector_backbone 23 | self.landmark_detector = self.build_model() 24 | 25 | def build_model(self) -> Any: 26 | """ 27 | Builds the landmark detection model based on the specified backbone. 28 | 29 | Returns: 30 | An initialized landmark detection model. 31 | """ 32 | return build_model(self.detector_backbone, "landmark_detection") 33 | 34 | def detect_3d_landmarks( 35 | self, 36 | images: Union[str, np.ndarray, List[np.ndarray], List[str]], 37 | ) -> List[List[DetectedLandmark3D]]: 38 | """ 39 | Detect 3D facial landmarks in one or more images using the specified detection backbone. 40 | 41 | Args: 42 | images: A single image or a list of images, each can be a file path or a NumPy array. 43 | 44 | Returns: 45 | A list of lists containing DetectedLandmark3D instances with 3D coordinates. 46 | """ 47 | loaded_images = load_images(images) 48 | validated_images = validate_images(loaded_images) 49 | return self.landmark_detector.detect_landmarks(validated_images) 50 | 51 | def detect_landmarks( 52 | self, 53 | images: Union[str, np.ndarray, List[np.ndarray], List[str]], 54 | ) -> List[List[DetectedLandmark2D]]: 55 | """ 56 | Detect 2D facial landmarks in one or more images using the specified detection backbone. 57 | 58 | Args: 59 | images: A single image or a list of images, each can be a file path or a NumPy array. 60 | 61 | Returns: 62 | A list of lists containing DetectedLandmark2D instances with 2D coordinates. 63 | """ 64 | loaded_images = load_images(images) 65 | validated_images = validate_images(loaded_images) 66 | return self.landmark_detector.detect_landmarks(validated_images) 67 | -------------------------------------------------------------------------------- /visionface/commons/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from typing import List 4 | 5 | 6 | def xywh2xyxy(detection: List[int]) -> List[int]: 7 | """ 8 | Convert bounding box coordinates from [x, y, width, height] to [x1, y1, x2, y2] format. 9 | 10 | Parameters 11 | ---------- 12 | detection : List[int] 13 | Bounding box in [x, y, width, height] format where: 14 | - x, y: coordinates of the top-left corner 15 | - width, height: dimensions of the bounding box 16 | 17 | Returns 18 | ------- 19 | List[int] 20 | Bounding box in [x1, y1, x2, y2] format where: 21 | - x1, y1: coordinates of the top-left corner 22 | - x2, y2: coordinates of the bottom-right corner 23 | """ 24 | return [ 25 | detection[0], 26 | detection[1], 27 | detection[0] + detection[2], 28 | detection[1] + detection[3], 29 | ] 30 | 31 | def get_home_directory(): 32 | return "." 33 | 34 | def nms_numpy(boxes, scores, threshold, method): 35 | if boxes.size == 0: 36 | return np.empty((0, 3)) 37 | 38 | x1 = boxes[:, 0].copy() 39 | y1 = boxes[:, 1].copy() 40 | x2 = boxes[:, 2].copy() 41 | y2 = boxes[:, 3].copy() 42 | s = scores 43 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 44 | 45 | I = np.argsort(s) 46 | pick = np.zeros_like(s, dtype=np.int16) 47 | counter = 0 48 | while I.size > 0: 49 | i = I[-1] 50 | pick[counter] = i 51 | counter += 1 52 | idx = I[0:-1] 53 | 54 | xx1 = np.maximum(x1[i], x1[idx]).copy() 55 | yy1 = np.maximum(y1[i], y1[idx]).copy() 56 | xx2 = np.minimum(x2[i], x2[idx]).copy() 57 | yy2 = np.minimum(y2[i], y2[idx]).copy() 58 | 59 | w = np.maximum(0.0, xx2 - xx1 + 1).copy() 60 | h = np.maximum(0.0, yy2 - yy1 + 1).copy() 61 | 62 | inter = w * h 63 | if method == 'Min': 64 | o = inter / np.minimum(area[i], area[idx]) 65 | else: 66 | o = inter / (area[i] + area[idx] - inter) 67 | I = I[np.where(o <= threshold)] 68 | 69 | pick = pick[:counter].copy() 70 | return pick 71 | 72 | def batched_nms_numpy(boxes, scores, idxs, threshold, method): 73 | device = boxes.device 74 | if boxes.numel() == 0: 75 | return torch.empty((0,), dtype=torch.int64, device=device) 76 | max_coordinate = boxes.max() 77 | offsets = idxs.to(boxes) * (max_coordinate + 1) 78 | boxes_for_nms = boxes + offsets[:, None] 79 | boxes_for_nms = boxes_for_nms.detach().numpy() 80 | scores = scores.detach().numpy() 81 | keep = nms_numpy(boxes_for_nms, scores, threshold, method) 82 | return torch.as_tensor(keep, dtype=torch.long, device=device) -------------------------------------------------------------------------------- /visionface/modules/modeling.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | # face detection models 4 | from visionface.models.face_detection import ( 5 | MediaPipe, 6 | MTCNN, 7 | YOLO, 8 | YOLOEye, 9 | YOLOWolrd, 10 | OpenCV 11 | ) 12 | from visionface.models.face_embedding import ( 13 | FaceNet 14 | ) 15 | 16 | # Landmarks detection models 17 | from visionface.models.landmark_detection import ( 18 | MediaPipeLandmark, 19 | Dlib 20 | ) 21 | 22 | 23 | def build_model(model_name: str, task: str) -> Any: 24 | """ 25 | Build and return a model instance based on the specified task and model name. 26 | 27 | This function creates and returns an appropriate model instance 28 | for the requested task using the specified model implementation. 29 | 30 | Parameters 31 | ---------- 32 | model_name : str 33 | The name of the model implementation to use (e.g., "mediapipe"). 34 | task : str 35 | The task category for which to build a model (e.g., "face_detection"). 36 | 37 | Returns 38 | ------- 39 | Any 40 | A buit model class for the specified task. 41 | 42 | Raises 43 | ------ 44 | ValueError 45 | If the requested task is not implemented in the model registry 46 | """ 47 | models = { 48 | "face_detection": { 49 | "mediapipe": MediaPipe.MediaPipeDetector, 50 | "mtcnn": MTCNN.MTCNNDetector, 51 | "yoloe-small": YOLOEye.YOLOEyeSmallDetector, 52 | "yoloe-medium": YOLOEye.YOLOEyeMediumDetector, 53 | "yoloe-large": YOLOEye.YOLOEyeLargeDetector, 54 | "yolo-nano": YOLO.YOLONanoDetector, 55 | "yolo-small": YOLO.YOLOSmallDetector, 56 | "yolo-medium": YOLO.YOLOMediumDetector, 57 | "yolo-large": YOLO.YOLOLargeDetector, 58 | "yolow-small": YOLOWolrd.YOLOWorldSmallDetector, 59 | "yolow-medium": YOLOWolrd.YOLOWorldMediumDetector, 60 | "yolow-large": YOLOWolrd.YOLOWorldLargeDetector, 61 | "yolow-xlarge": YOLOWolrd.YOLOWorldXLargeDetector, 62 | "opencv": OpenCV.OpenCVDetector 63 | }, 64 | "landmark_detection": { 65 | "mediapipe": MediaPipeLandmark.MediaPipeFaceMeshDetector, 66 | "dlib": Dlib.DlibFaceLandmarkDetector 67 | }, 68 | "face_embedding": { 69 | "FaceNet-VGG": FaceNet.FaceNetVGG, 70 | "FaceNet-CASIA": FaceNet.FaceNetCASIA 71 | } 72 | } 73 | 74 | if models.get(task) is None: 75 | raise ValueError(f"Unimplemented task: {task}") 76 | 77 | model = models[task].get(model_name) 78 | if model is None: 79 | raise ValueError(f"Invalid model_name passed - {task}/{model_name}") 80 | return model() 81 | -------------------------------------------------------------------------------- /visionface/models/face_embedding/Dlib.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | # VisionFace modules 8 | from visionface.commons.download_files import download_model_weights 9 | from visionface.models.FaceEmbedding import FaceEmbedder 10 | 11 | 12 | DLIB_WEIGHTS = "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2" 13 | DLIB_WEIGHT_FILENAME = "dlib_resnet_v1.dat" 14 | 15 | class DlibFaceEmbedder(FaceEmbedder): 16 | """ 17 | Dlib-based face embedding model implementation. 18 | """ 19 | def __init__(self): 20 | super().__init__() 21 | self.model = DlibResNetModel() 22 | self.model_name = "Dlib" 23 | self.input_shape = (150, 150) 24 | self.output_shape = 128 25 | 26 | class DlibResNetModel(nn.Module): 27 | """ 28 | Dlib face recognition ResNet model. 29 | """ 30 | 31 | def __init__(self): 32 | self._dlib_model = self._load_dlib_model() 33 | 34 | def _load_dlib_model(self): 35 | """ 36 | Load the Dlib face recognition model. 37 | 38 | Returns: 39 | dlib.face_recognition_model_v1: Loaded Dlib face recognition model. 40 | """ 41 | try: 42 | import dlib 43 | except ModuleNotFoundError as e: 44 | raise ImportError( 45 | "Dlib is an optional dependency. Please install it using 'pip install dlib' " 46 | "to use the Dlib face embedder." 47 | ) from e 48 | 49 | # Download model weights if necessary 50 | weight_file_path = download_model_weights( 51 | filename=DLIB_WEIGHT_FILENAME, 52 | download_url=DLIB_WEIGHTS, 53 | compression_format="bz2" 54 | ) 55 | return dlib.face_recognition_model_v1(str(weight_file_path)) 56 | 57 | def forward(self, imgs: List[np.ndarray], normalize_embeddings: bool = True) -> List[List[float]]: 58 | """ 59 | Compute face embeddings for a batch of images. 60 | 61 | Args: 62 | imgs (List[np.ndarray]): List of face images. 63 | normalize_embeddings (bool): Whether to apply L2 normalization to embeddings. 64 | 65 | Returns: 66 | torch.Tensor: Tensor of shape (batch_size, 128) with face embeddings. 67 | """ 68 | 69 | embeddings = [] 70 | 71 | for img in imgs: 72 | face_descriptor = self._dlib_model.compute_face_descriptor(img) 73 | embedding_vector = np.array(face_descriptor, dtype=np.float32) 74 | embeddings.append(embedding_vector) 75 | 76 | # Convert list of arrays to tensor 77 | embeddings_tensor = torch.tensor(embeddings) 78 | 79 | if normalize_embeddings: 80 | embeddings_tensor = F.normalize(embeddings_tensor, p=2, dim=1) 81 | 82 | return embeddings_tensor -------------------------------------------------------------------------------- /visionface/db/qdrant/search_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import List, Dict, Union, Optional 3 | import numpy as np 4 | 5 | from visionface.db.qdrant.config import SearchConfig, SearchMethod 6 | 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | class SearchManager: 12 | """Manages search operations for Qdrant""" 13 | 14 | def __init__(self, client): 15 | self.client = client 16 | 17 | def search_embeddings(self, 18 | collection_name: str, 19 | query_vectors: Union[np.ndarray, List[float]], 20 | config: SearchConfig = SearchConfig()) -> List[Dict]: 21 | """ 22 | Search embeddings using various methods 23 | 24 | Args: 25 | collection_name: Target collection 26 | query_vectors: Query embedding vector 27 | config: Search configuration 28 | 29 | Returns: 30 | List[Dict]: Search results 31 | """ 32 | try: 33 | # Validate config 34 | config.validate() 35 | results = [] 36 | if config.method == SearchMethod.SIMILARITY: 37 | for query_vector in query_vectors: 38 | results.extend( 39 | self._similarity_search(collection_name, query_vector, config) 40 | ) 41 | else: 42 | raise ValueError(f"Unsupported search method: {config.method}") 43 | 44 | formatted_results = self._format_results(results, config) 45 | 46 | return formatted_results 47 | 48 | except Exception as e: 49 | logger.error(f"Search failed: {e}") 50 | raise ValueError(f"Search operation failed: {e}") 51 | 52 | def _similarity_search(self, collection_name: str, query_vector: List[float], 53 | config: SearchConfig) -> List: 54 | """Perform similarity search""" 55 | return self.client.search( 56 | collection_name=collection_name, 57 | query_vector=query_vector, 58 | limit=config.limit, 59 | offset=config.offset, 60 | with_payload=config.with_payload, 61 | with_vectors=config.with_vectors, 62 | score_threshold=config.score_threshold 63 | ) 64 | 65 | def _format_results(self, results: List, config: SearchConfig) -> List[Dict]: 66 | """Format search results""" 67 | formatted_results = [] 68 | for result in results: 69 | formatted_result = { 70 | "id": result.id, 71 | "face_name": result.payload["face_name"] if config.with_payload else None, 72 | "score": getattr(result, 'score', None), 73 | "vector": result.vector if config.with_vectors else None 74 | } 75 | formatted_results.append(formatted_result) 76 | return formatted_results -------------------------------------------------------------------------------- /visionface/db/qdrant/config.py: -------------------------------------------------------------------------------- 1 | 2 | from dataclasses import asdict, dataclass 3 | from enum import Enum 4 | from typing import Any, Dict, Optional 5 | from qdrant_client.http.models import Distance 6 | 7 | 8 | class IndexType(Enum): 9 | """Available index types for payload fields""" 10 | TEXT = "text" 11 | INTEGER = "integer" 12 | FLOAT = "float" 13 | BOOL = "bool" 14 | GEO = "geo" 15 | DATETIME = "datetime" 16 | 17 | @dataclass 18 | class ConnectionConfig: 19 | """Configuration for Qdrant connection""" 20 | host: str = "localhost" 21 | port: int = 6333 22 | url: Optional[str] = None 23 | api_key: Optional[str] = None 24 | https: bool = False 25 | timeout: float = 5.0 26 | 27 | def validate(self) -> bool: 28 | """Validate connection configuration""" 29 | if not self.url and not self.host: 30 | raise ValueError(f"Either URL or host must be provided, url: {self.url}, host: {self.host}") 31 | if self.port <= 0: 32 | raise ValueError(f"Port must be positive, You set port to {self.port}") 33 | if self.timeout <= 0: 34 | raise ValueError(f"Timeout must be positive, {self.timeout}") 35 | return True 36 | 37 | @dataclass 38 | class CollectionConfig: 39 | """Configuration for collection creation""" 40 | name: str 41 | vector_size: int 42 | hnsw_config: Optional[Dict] = None 43 | optimizer_config: Optional[Dict] = None 44 | quantization_config: Optional[Dict] = None 45 | payload_indexes: Optional[Dict[str, IndexType]] = None 46 | replication_factor: int = 1 47 | write_consistency_factor: int = 1 48 | 49 | def to_dict(self) -> Dict[str, Any]: 50 | """Convert config to dictionary""" 51 | return asdict(self) 52 | 53 | @classmethod 54 | def from_dict(cls, data: Dict[str, Any]) -> 'CollectionConfig': 55 | """Create config from dictionary""" 56 | return cls(**data) 57 | 58 | def validate(self) -> bool: 59 | """Validate configuration parameters""" 60 | if self.vector_size <= 0: 61 | raise ValueError(f"Vector size must be positive, {self.vector_size}") 62 | if self.replication_factor <= 0: 63 | raise ValueError(f"Replication factor must be positive, {self.replication_factor}") 64 | if self.write_consistency_factor <= 0: 65 | raise ValueError(f"Write consistency factor must be positive, {self.write_consistency_factor}") 66 | return True 67 | 68 | class SearchMethod(Enum): 69 | """Available search methods""" 70 | SIMILARITY = "similarity" 71 | 72 | @dataclass 73 | class SearchConfig: 74 | """Configuration for search operations""" 75 | method: SearchMethod = SearchMethod.SIMILARITY 76 | limit: int = 10 77 | offset: int = 0 78 | with_payload: bool = True 79 | with_vectors: bool = False 80 | score_threshold: Optional[float] = None 81 | exact: bool = False 82 | hnsw_ef: Optional[int] = None 83 | quantization_rescore: Optional[bool] = None 84 | 85 | def validate(self) -> bool: 86 | """Validate search configuration""" 87 | if self.limit <= 0: 88 | raise ValueError("Limit must be positive") 89 | if self.offset < 0: 90 | raise ValueError("Offset must be non-negative") 91 | if self.score_threshold is not None and (self.score_threshold < 0 or self.score_threshold > 1): 92 | raise ValueError("Score threshold must be between 0 and 1") 93 | return True -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | # Read README file 5 | def read_readme(): 6 | with open("README.md", "r", encoding="utf-8") as fh: 7 | return fh.read() 8 | 9 | def read_requirements(): 10 | requirements_path = "requirements.txt" 11 | if os.path.exists(requirements_path): 12 | with open(requirements_path, "r", encoding="utf-8") as f: 13 | return [line.strip() for line in f if line.strip() and not line.startswith("#")] 14 | return [] 15 | 16 | __version__ = "1.0.0" 17 | 18 | CORE_REQUIRES = [ 19 | "torch>=1.8.0", 20 | "torchvision>=0.9.0", 21 | "numpy>=1.19.0", 22 | "opencv-python>=4.5.0", 23 | "Pillow>=8.0.0", 24 | "requests>=2.25.0", 25 | ] 26 | 27 | 28 | 29 | setup( 30 | name="visionface", 31 | version=__version__, 32 | author="VisionFace Team", 33 | author_email="visio.face2025@gmail.com", 34 | description="Modern face detection, recognition & analysis framework with 12+ models", 35 | long_description=read_readme(), 36 | long_description_content_type="text/markdown", 37 | url="https://github.com/miladfa7/visionface", 38 | project_urls={ 39 | "Documentation": "https://visionface.readthedocs.io", 40 | "Source Code": "https://github.com/miladfa7/visionface", 41 | "Bug Tracker": "https://github.com/miladfa7/visionface/issues", 42 | "Changelog": "https://github.com/miladfa7/visionface/blob/main/CHANGELOG.md", 43 | }, 44 | packages=find_packages(exclude=["tests*", "docs*", "examples*"]), 45 | classifiers=[ 46 | "Development Status :: 4 - Beta", 47 | "Intended Audience :: Developers", 48 | "Intended Audience :: Science/Research", 49 | "Intended Audience :: Education", 50 | "License :: OSI Approved :: MIT License", 51 | "Operating System :: OS Independent", 52 | "Programming Language :: Python :: 3", 53 | "Programming Language :: Python :: 3.8", 54 | "Programming Language :: Python :: 3.9", 55 | "Programming Language :: Python :: 3.10", 56 | "Programming Language :: Python :: 3.11", 57 | "Programming Language :: Python :: 3.12", 58 | "Programming Language :: Python :: 3.13", 59 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 60 | "Topic :: Scientific/Engineering :: Image Recognition", 61 | "Topic :: Software Development :: Libraries :: Python Modules", 62 | "Topic :: Multimedia :: Graphics :: Graphics Conversion", 63 | "Topic :: Security", 64 | "Framework :: FastAPI", 65 | ], 66 | python_requires=">=3.8", # Fixed: was >=3.10 but classifiers show 3.8+ 67 | install_requires=CORE_REQUIRES, 68 | include_package_data=True, 69 | package_data={ 70 | "visionface": [ 71 | "models/*.pth", 72 | "models/*.onnx", 73 | "configs/*.yaml", 74 | "data/*.json", 75 | ], 76 | }, 77 | entry_points={ 78 | "console_scripts": [ 79 | "visionface=visionface.cli:main", 80 | ], 81 | }, 82 | keywords=[ 83 | "computer-vision", 84 | "face-detection", 85 | "face-recognition", 86 | "facial-landmarks", 87 | "deep-learning", 88 | "pytorch", 89 | "yolo", 90 | "mediapipe", 91 | "artificial-intelligence", 92 | "biometrics", 93 | "image-processing", 94 | "real-time", 95 | "production-ready", 96 | ], 97 | zip_safe=False, 98 | platforms=["any"], 99 | license="MIT", 100 | ) -------------------------------------------------------------------------------- /visionface/FaceAnnotators.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Tuple 2 | 3 | 4 | #VisionFace module 5 | from visionface.annotators.base import ImageType, RawDetection 6 | from visionface.models.Detector import Detector 7 | from visionface.annotators.detection import BoxCornerAnnotator, BoxAnnotator 8 | 9 | 10 | def box_annotator( 11 | img: ImageType, 12 | detections: Union[List[Detector], List[RawDetection]], 13 | color: Tuple = (245, 113, 47), 14 | thickness: int = 4, 15 | highlight: bool = True, 16 | highlight_opacity: float = 0.2, 17 | highlight_color: tuple = (255, 255, 255), 18 | ): 19 | """ 20 | Annotate an image with bounding boxes around detected face(s). 21 | 22 | Parameters 23 | ---------- 24 | img : ImageType 25 | The input image on which to draw annotations. Can be either a NumPy array 26 | or a PIL Image object. 27 | detections : List[Detector] 28 | A list of detection face(s) containing bounding box information. 29 | color : Tuple, optional 30 | The RGB color for the bounding boxes, default is (245, 113, 47). 31 | thickness : int, optional 32 | The thickness of the bounding box lines in pixels, default is 4. 33 | highlight : bool, optional 34 | Whether to highlight the detected regions, by default True 35 | highlight_opacity : float, optional 36 | Opacity of the highlight overlay (0.0 to 1.0), by default 0.2 37 | highlight_color : tuple, optional 38 | BGR color tuple for the highlight, by default (255, 255, 255) 39 | 40 | Returns 41 | ------- 42 | ImageType 43 | The input image with bounding box annotations added. 44 | """ 45 | annotator = BoxAnnotator( 46 | color=color, 47 | thickness=thickness, 48 | ) 49 | return annotator.annotate( 50 | img=img, 51 | detections=detections, 52 | highlight=highlight, 53 | highlight_opacity=highlight_opacity, 54 | highlight_color=highlight_color 55 | ) 56 | 57 | def box_corner_annotator( 58 | img: ImageType, 59 | detections: Union[List[Detector], List[RawDetection]], 60 | color: Tuple = (245, 113, 47), 61 | thickness: int = 4, 62 | corner_length: int = 15, 63 | highlight: bool = True, 64 | highlight_opacity: float = 0.2, 65 | highlight_color: tuple = (255, 255, 255), 66 | ): 67 | """ 68 | Annotate an image with corner boxes around detected face(s). 69 | 70 | Parameters 71 | ---------- 72 | img : ImageType 73 | The input image on which to draw annotations. Can be either a NumPy array 74 | or a PIL Image object. 75 | detections : List[Detector] 76 | A list of detection face(s) containing bounding box information. 77 | color : Tuple, optional 78 | The RGB color for the corner boxes, default is (245, 113, 47). 79 | thickness : int, optional 80 | The thickness of the corner box lines in pixels, default is 4. 81 | corner_length : int, optional 82 | The length of each corner in pixels, default is 15. 83 | 84 | Returns 85 | ------- 86 | ImageType 87 | The input image with corner box annotations added. 88 | """ 89 | annotator = BoxCornerAnnotator( 90 | color=color, 91 | thickness=thickness, 92 | corner_length=corner_length, 93 | ) 94 | return annotator.annotate( 95 | img=img, 96 | detections=detections, 97 | highlight=highlight, 98 | highlight_opacity=highlight_opacity, 99 | highlight_color=highlight_color 100 | ) -------------------------------------------------------------------------------- /visionface/annotators/FaceAnnotators.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union, Tuple 2 | 3 | 4 | #VisionFace module 5 | from visionface.annotators.base import ImageType, RawDetection 6 | from visionface.models.Detector import Detector 7 | from visionface.annotators.detection import BoxCornerAnnotator, BoxAnnotator 8 | 9 | 10 | def box_annotator( 11 | img: ImageType, 12 | detections: Union[List[Detector], List[RawDetection]], 13 | color: Tuple = (245, 113, 47), 14 | thickness: int = 4, 15 | highlight: bool = True, 16 | highlight_opacity: float = 0.2, 17 | highlight_color: tuple = (255, 255, 255), 18 | ): 19 | """ 20 | Annotate an image with bounding boxes around detected face(s). 21 | 22 | Parameters 23 | ---------- 24 | img : ImageType 25 | The input image on which to draw annotations. Can be either a NumPy array 26 | or a PIL Image object. 27 | detections : List[Detector] 28 | A list of detection face(s) containing bounding box information. 29 | color : Tuple, optional 30 | The RGB color for the bounding boxes, default is (245, 113, 47). 31 | thickness : int, optional 32 | The thickness of the bounding box lines in pixels, default is 4. 33 | highlight : bool, optional 34 | Whether to highlight the detected regions, by default True 35 | highlight_opacity : float, optional 36 | Opacity of the highlight overlay (0.0 to 1.0), by default 0.2 37 | highlight_color : tuple, optional 38 | BGR color tuple for the highlight, by default (255, 255, 255) 39 | 40 | Returns 41 | ------- 42 | ImageType 43 | The input image with bounding box annotations added. 44 | """ 45 | annotator = BoxAnnotator( 46 | color=color, 47 | thickness=thickness, 48 | ) 49 | return annotator.annotate( 50 | img=img, 51 | detections=detections, 52 | highlight=highlight, 53 | highlight_opacity=highlight_opacity, 54 | highlight_color=highlight_color 55 | ) 56 | 57 | def box_corner_annotator( 58 | img: ImageType, 59 | detections: Union[List[Detector], List[RawDetection]], 60 | color: Tuple = (245, 113, 47), 61 | thickness: int = 4, 62 | corner_length: int = 15, 63 | highlight: bool = True, 64 | highlight_opacity: float = 0.2, 65 | highlight_color: tuple = (255, 255, 255), 66 | ): 67 | """ 68 | Annotate an image with corner boxes around detected face(s). 69 | 70 | Parameters 71 | ---------- 72 | img : ImageType 73 | The input image on which to draw annotations. Can be either a NumPy array 74 | or a PIL Image object. 75 | detections : List[Detector] 76 | A list of detection face(s) containing bounding box information. 77 | color : Tuple, optional 78 | The RGB color for the corner boxes, default is (245, 113, 47). 79 | thickness : int, optional 80 | The thickness of the corner box lines in pixels, default is 4. 81 | corner_length : int, optional 82 | The length of each corner in pixels, default is 15. 83 | 84 | Returns 85 | ------- 86 | ImageType 87 | The input image with corner box annotations added. 88 | """ 89 | annotator = BoxCornerAnnotator( 90 | color=color, 91 | thickness=thickness, 92 | corner_length=corner_length, 93 | ) 94 | return annotator.annotate( 95 | img=img, 96 | detections=detections, 97 | highlight=highlight, 98 | highlight_opacity=highlight_opacity, 99 | highlight_color=highlight_color 100 | ) -------------------------------------------------------------------------------- /visionface/models/Detector.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import numpy as np 3 | from typing import Any, Union, List, Optional 4 | from dataclasses import dataclass 5 | 6 | class Detector(ABC): 7 | """ 8 | Abstract base class for a face detection system. 9 | 10 | This class defines the interface for building a detection model, 11 | running detection on images, and post-processing the results. 12 | Subclasses must implement all abstract methods. 13 | """ 14 | 15 | def __init__(self, MODEL_ID: int = 0, MIN_CONFIDENCE: float = 0.5): 16 | """ 17 | Initialize the base Detector with a confidence threshold. 18 | 19 | Args: 20 | conf (float): Minimum confidence score to consider a face detection valid. Default 0.25 21 | """ 22 | self.model_id = MODEL_ID 23 | self.conf = MIN_CONFIDENCE 24 | 25 | @abstractmethod 26 | def build_model(self) -> Any: 27 | """ 28 | Build and return the face detection model. 29 | 30 | This method should load or initialize the face detection model. 31 | Returns: 32 | model (Any): The model used for detection. 33 | """ 34 | pass 35 | 36 | @abstractmethod 37 | def detect_faces(self, imgs: Union[np.ndarray, List[np.ndarray]]): 38 | """ 39 | Detect faces in a single image or a list of images. 40 | 41 | Args: 42 | imgs (Union[np.ndarray, List[np.ndarray]]): 43 | - A single image as a NumPy array with shape (H, W, 3), or 44 | - A list of such images. 45 | 46 | Returns: 47 | detections (Any): Raw output of the detection model. 48 | """ 49 | pass 50 | 51 | @abstractmethod 52 | def process_faces(self, results): 53 | """ 54 | Process the raw detections into a structured format. 55 | 56 | This could include bounding boxes, landmarks, confidence scores, etc. 57 | 58 | Args: 59 | results (Any): Raw model output from `detect_faces`. 60 | 61 | Returns: 62 | results (List[Any]): Processed list of face detection results in a consistent format. 63 | """ 64 | pass 65 | 66 | 67 | @dataclass 68 | class DetectedFace: 69 | """ 70 | Represents detected faces in an image. 71 | 72 | Attributes: 73 | x (int): The x-coordinate of the top-left corner of the face bounding box. 74 | y (int): The y-coordinate of the top-left corner of the face bounding box. 75 | w (int): The width of the face bounding box. 76 | h (int): The height of the face bounding box. 77 | conf (float): The confidence score of the face detection, typically between 0 and 1. 78 | class_name (str): The name of the detected class (e.g., "face"). 79 | """ 80 | xmin: int 81 | ymin: int 82 | xmax: int 83 | ymax: int 84 | conf: float 85 | class_name: Optional[str] = None 86 | cropped_face: Optional[np.ndarray] = None 87 | 88 | @property 89 | def xyxy(self): 90 | """ 91 | Returns the bounding box coordinates as a tuple (xmin, ymin, xmax, ymax). 92 | """ 93 | return (self.xmin, self.ymin, self.xmax, self.ymax) 94 | 95 | @property 96 | def xywh(self): 97 | """ 98 | Returns the bounding box coordinates as a tuple (x, y, w, h). 99 | """ 100 | width = self.xmax - self.xmin 101 | height = self.ymax - self.ymin 102 | return (self.xmin, self.ymin, width, height) 103 | 104 | def to_dict(self): 105 | return { 106 | "xywh": self.xywh, 107 | "xyxy": self.xyxy, 108 | "conf": self.conf, 109 | "class_name": self.class_name 110 | } -------------------------------------------------------------------------------- /visionface/models/landmark_detection/MediaPipeLandmark.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List 3 | import cv2 4 | 5 | # VisionFace modules 6 | from visionface.models.LandmarkDetector import LandmarkDetector, DetectedLandmark3D 7 | from visionface.models.landmark_detection.utils import medipipe_mesh_landmark_names 8 | 9 | class MediaPipeFaceMeshDetector(LandmarkDetector): 10 | """ 11 | Landmark detector that uses MediaPipe Face Mesh to extract 3D facial landmarks. 12 | """ 13 | def __init__(self): 14 | """ 15 | Initialize the MediaPipe face mesh model and load landmark names. 16 | """ 17 | self.mesh_landmark_names = medipipe_mesh_landmark_names() 18 | self.model = self.build_model() 19 | 20 | def build_model(self): 21 | """ 22 | Load the MediaPipe FaceMesh model. 23 | 24 | Returns 25 | ------- 26 | model : mediapipe.solutions.face_mesh.FaceMesh 27 | An instance of the MediaPipe FaceMesh model. 28 | """ 29 | try: 30 | import mediapipe as mp 31 | except ModuleNotFoundError as error: 32 | raise ImportError( 33 | "The 'mediapipe' library is not installed. " 34 | "It is required for MediaPipeFaceMeshDetector to work. " 35 | "Please install it using: pip install mediapipe" 36 | ) from error 37 | 38 | mp_face_mesh = mp.solutions.face_mesh 39 | landmark_detection = mp_face_mesh.FaceMesh( 40 | static_image_mode=True, 41 | max_num_faces=1, 42 | refine_landmarks=True, 43 | min_detection_confidence=0.5 44 | ) 45 | return landmark_detection 46 | 47 | def _detect_one(self, img: np.ndarray) -> List[DetectedLandmark3D]: 48 | """ 49 | Detect facial landmarks in a single image. 50 | 51 | Parameters 52 | ---------- 53 | img : np.ndarray 54 | The input image in BGR format. 55 | 56 | Returns 57 | ------- 58 | landmarks : List[DetectedLandmark3D] 59 | List of detected 3D landmarks for the face. 60 | """ 61 | results = self.model.process(img) 62 | if results.multi_face_landmarks: 63 | return self.process_landmarks(results) 64 | else: 65 | return [] 66 | 67 | def detect_landmarks(self, imgs: List[np.ndarray]) -> List[List[DetectedLandmark3D]]: 68 | """ 69 | Detect facial landmarks in a list of images. 70 | 71 | Parameters 72 | ---------- 73 | imgs : List[np.ndarray] 74 | List of images (each as a NumPy array in BGR format). 75 | 76 | Returns 77 | ------- 78 | List[List[DetectedLandmark3D]] 79 | A list where each element contains the detected landmarks for an image. 80 | """ 81 | return [self._detect_one(img) for img in imgs] 82 | 83 | def process_landmarks(self, results) -> List[DetectedLandmark3D]: 84 | """ 85 | Convert MediaPipe landmark results into DetectedLandmark3D objects. 86 | 87 | Parameters 88 | ---------- 89 | results : mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList 90 | The raw landmark output from the MediaPipe model. 91 | 92 | Returns 93 | ------- 94 | landmarks : List[DetectedLandmark3D] 95 | List of 3D landmarks with optional names. 96 | """ 97 | landmarks = [] 98 | for face_landmarks in results.multi_face_landmarks: 99 | for idx, lm in enumerate(face_landmarks.landmark): 100 | landmark_name = self.mesh_landmark_names.get(idx, f"unknown_{idx}") 101 | x, y, z = lm.x, lm.y, lm.z 102 | facial_landmarks = DetectedLandmark3D(x=x, y=y, z=z, name=landmark_name) 103 | landmarks.append(facial_landmarks) 104 | return landmarks -------------------------------------------------------------------------------- /visionface/annotators/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | from PIL import Image 5 | from typing import List, Tuple, Union 6 | import math 7 | 8 | # VisionFace modules 9 | from visionface.annotators.base import ImageType, RawDetection 10 | from visionface.models.Detector import Detector 11 | 12 | def get_xyxy(detection: Union[Detector, RawDetection]) -> Tuple[int, int, int, int]: 13 | """ 14 | Extract bounding box coordinates from detection object or list. 15 | 16 | Args: 17 | detection: Detector or list with [x1, y1, x2, y2] 18 | 19 | Returns: 20 | Tuple of (x1, y1, x2, y2) 21 | 22 | Raises: 23 | TypeError: If detection format is not supported! 24 | """ 25 | if hasattr(detection, 'xyxy'): 26 | return detection.xyxy 27 | elif (isinstance(detection, List) or isinstance(detection, Tuple)) and len(detection) >= 4: 28 | return int(detection[0]), int(detection[1]), int(detection[2]), int(detection[3]) 29 | else: 30 | raise TypeError(f"Unsupported detection type: {type(detection)}") 31 | 32 | 33 | def highlight_face( 34 | img: ImageType, 35 | detections: List[Detector], 36 | highlight_opacity: float = 0.2, 37 | highlight_color: Tuple[int, int, int] = (255, 255, 255), 38 | ) -> ImageType: 39 | """ 40 | Apply semi-transparent highlight to detected regions in image. 41 | 42 | Args: 43 | img: Input image 44 | detections: List of detections to highlight 45 | highlight_opacity: Opacity of highlight (0.0-1.0) 46 | highlight_color: BGR color tuple for highlight 47 | 48 | Returns: 49 | Image with highlighted regions 50 | """ 51 | overlay = img.copy() 52 | for detection in detections: 53 | x1, y1, x2, y2 = get_xyxy(detection) 54 | cv2.rectangle( 55 | overlay, 56 | (x1, y1), 57 | (x2, y2), 58 | highlight_color, 59 | -1 60 | ) 61 | cv2.addWeighted(overlay, highlight_opacity, img, 1 - highlight_opacity, 0, img) 62 | return img 63 | 64 | def convert_img_to_numpy(img: ImageType) -> np.ndarray: 65 | """ 66 | Convert different image formats to numpy array for processing. 67 | 68 | Args: 69 | img: Image as file path, numpy array, or PIL Image 70 | 71 | Returns: 72 | Image as numpy array in BGR format 73 | 74 | Raises: 75 | FileNotFoundError: If image file does not exist 76 | ValueError: If image file cannot be loaded 77 | TypeError: If image format is not supported 78 | """ 79 | if isinstance(img, str): 80 | if not os.path.exists(img): 81 | raise FileNotFoundError(f"Image file not found: {img}") 82 | img_np = cv2.imread(img) 83 | if img_np is None: 84 | raise ValueError(f"Failed to load image: {img}") 85 | return img_np 86 | 87 | elif isinstance(img, np.ndarray): 88 | return img.copy() 89 | 90 | elif isinstance(img, Image.Image): 91 | img_np = np.array(img) 92 | # Convert RGB to BGR (OpenCV format) 93 | if img_np.shape[-1] == 3: 94 | img_np = img_np[..., ::-1].copy() 95 | return img_np 96 | 97 | else: 98 | raise TypeError(f"Unsupported image type: {type(img)}") 99 | 100 | 101 | def denormalize_landmark( 102 | normalized_x: float, 103 | normalized_y: float, 104 | image_width: int, 105 | image_height: int 106 | ) -> Union[None, Tuple[int, int]]: 107 | 108 | def is_valid_normalized_value(value: float) -> bool: 109 | return (value > 0 or math.isclose(0, value)) and (value < 1 or 110 | math.isclose(1, value)) 111 | 112 | if not (is_valid_normalized_value(normalized_x) and 113 | is_valid_normalized_value(normalized_y)): 114 | # TODO: Draw coordinates even if it's outside of the image bounds. 115 | return None 116 | x_px = min(math.floor(normalized_x * image_width), image_width - 1) 117 | y_px = min(math.floor(normalized_y * image_height), image_height - 1) 118 | return x_px, y_px 119 | -------------------------------------------------------------------------------- /visionface/modules/detection.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Union, List 2 | import numpy as np 3 | 4 | # visionface modules 5 | from visionface.models.Detector import Detector, DetectedFace 6 | from visionface.modules.modeling import build_model 7 | from visionface.commons.image_utils import load_images, validate_images 8 | 9 | class FaceDetection: 10 | """ 11 | detecting faces in images using a specified detection backbone. 12 | """ 13 | 14 | def __init__(self, detector_backbone: str = "mediapipe") -> None: 15 | """ 16 | Initializes the FaceDetection class with the specified detector backbone. 17 | 18 | Parameters 19 | ---------- 20 | detector_backbone : str, optional 21 | Name of the face detection backend to use. Default is "mediapipe". 22 | """ 23 | self.face_detector = self.build_model(detector_backbone) 24 | 25 | def build_model(self, model_name: str) -> Any: 26 | """ 27 | Builds the face detection model based on the specified model name. 28 | 29 | Parameters 30 | ---------- 31 | model_name : str 32 | The name of the face detection model to use. 33 | 34 | Returns 35 | ------- 36 | Any 37 | An initialized face detection model. 38 | """ 39 | return build_model(model_name, "face_detection") 40 | 41 | def detect_faces( 42 | self, 43 | images: Union[str, np.ndarray, List[np.ndarray], List[str]], 44 | return_cropped_faces: bool = True 45 | ) -> List[List[DetectedFace]]: 46 | """ 47 | Detect faces in one or more images using the specified detector backbone. 48 | 49 | Parameters 50 | ---------- 51 | images : Union[str, np.ndarray, List[str], List[np.ndarray]] 52 | A single image or a list of images. Each image can be either a file path (str) 53 | or an image array. 54 | return_cropped_faces : bool, optional 55 | Whether to include cropped face images in each DetectedFace object. Default is True. 56 | 57 | Returns 58 | ------- 59 | List[List[DetectedFace]]: 60 | A list where each element is a list of DetectedFace objects for the corresponding input image. 61 | """ 62 | loaded_images = load_images(images) 63 | validated_images = validate_images(loaded_images) 64 | return self.face_detector.detect_faces(validated_images, return_cropped_faces) 65 | 66 | 67 | def detect_faces_with_prompt( 68 | self, 69 | images: Union[str, np.ndarray, List[np.ndarray], List[str]], 70 | prompts: Union[str, List[str]], 71 | return_cropped_faces: bool = True 72 | ) -> List[List[DetectedFace]]: 73 | """ 74 | Detect faces in one or more images using a prompt-based detection approach. 75 | 76 | Parameters 77 | ---------- 78 | images : Union[str, np.ndarray, List[str], List[np.ndarray]] 79 | A single image or a list of images. Each image can be either a file path (str) 80 | or an image array. 81 | 82 | prompts : Union[str, List[str]] 83 | A single prompt or a list of prompts describing the object(s) to detect. 84 | For example, "face". 85 | 86 | return_cropped_faces : bool, optional 87 | Whether to include cropped face images in each DetectedFace object. Default is True. 88 | 89 | Returns 90 | ------- 91 | List[List[DetectedFace]] 92 | A list where each element is a list of DetectedFace objects 93 | for the corresponding input image. Each detection includes bounding box 94 | coordinates, confidence score, class name, and optionally a cropped region. 95 | """ 96 | loaded_images = load_images(images) 97 | validated_images = validate_images(loaded_images) 98 | 99 | if isinstance(prompts, str): 100 | prompts = [prompts] 101 | 102 | # Optional: enforce prompt count matching image count 103 | # if len(validated_images) != len(prompts): 104 | # raise ValueError("The number of images and prompts must be the same.") 105 | 106 | return self.face_detector.detect_faces_with_prompt(validated_images, prompts, return_cropped_faces) 107 | 108 | -------------------------------------------------------------------------------- /visionface/models/FaceEmbedding.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from typing import List, Union, Any, Tuple 3 | from dataclasses import dataclass 4 | import numpy as np 5 | import cv2 6 | import torch 7 | from torchvision.transforms import functional as F 8 | 9 | # VisionFace module 10 | from visionface.commons.image_utils import validate_images 11 | 12 | class FaceEmbedder(ABC): 13 | model: Any 14 | model_name: str 15 | input_shape: Tuple[int, int] 16 | output_shape: int 17 | 18 | def __init__(self): 19 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 20 | 21 | def _img_preprocess(self, imgs: Union[np.ndarray, List[np.ndarray]]) -> Union[torch.Tensor, np.ndarray]: 22 | """ 23 | Preprocess input images based on the model type. 24 | 25 | For PyTorch models: 26 | - Converts images to normalized float tensors [0, 1] 27 | - Returns a batched tensor of shape (N, 3, H, W) 28 | 29 | For Dlib models: 30 | - Ensures each image is resized to the target input shape 31 | - Returns a list of RGB images as NumPy arrays 32 | 33 | Args: 34 | imgs (List[np.ndarray]): List of images in BGR format (OpenCV) 35 | 36 | Returns: 37 | Union[torch.Tensor, List[np.ndarray]]: Preprocessed inputs ready for embedding 38 | """ 39 | target_h, target_w = self.input_shape 40 | batch_size = len(imgs) 41 | 42 | if self.model_name=="Dlib": 43 | batch_inputs = [] 44 | for img in imgs: 45 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 46 | if img.shape[:2] != (target_h, target_w): 47 | img = cv2.resize(img, (target_w, target_h), interpolation=cv2.INTER_LINEAR) 48 | batch_inputs.append(img) 49 | return batch_inputs 50 | else: 51 | batch_tensor = torch.empty(batch_size, 3, target_h, target_w, dtype=torch.float32, device=self.device) 52 | for i, img in enumerate(imgs): 53 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 54 | if img.shape[:2] != (target_h, target_w): 55 | img = cv2.resize(img, (target_w, target_h), interpolation=cv2.INTER_LINEAR) 56 | img_tensor = torch.from_numpy(img.transpose(2, 0, 1)).float() / 255.0 57 | batch_tensor[i] = img_tensor 58 | return batch_tensor 59 | 60 | @torch.no_grad() 61 | def embed(self, imgs: Union[np.ndarray, List[np.ndarray]], normalize_embeddings: bool = True) -> 'FaceEmbedding': 62 | """ 63 | Generate face embeddings from one or more face images. 64 | 65 | Args: 66 | imgs (Union[np.ndarray, List[np.ndarray]]): 67 | A single image with shape (H, W, 3) or a list of such images in BGR format. 68 | normalize_embeddings (bool, optional): 69 | If True, applies L2 normalization to the output embeddings. Default is True. 70 | 71 | Returns: 72 | FaceEmbedding: 73 | An object containing the computed embedding tensor(s) with shape (N, D), 74 | where N is the number of input images and D is the embedding dimension (e.g., 128 or 512). 75 | """ 76 | 77 | # Validate input images 78 | imgs = validate_images(imgs) 79 | 80 | # Preprocess images depending on model type 81 | batch_inputs = self._img_preprocess(imgs) 82 | 83 | # Compute embeddings using the model's forward 84 | embeddings = self.model.forward(batch_inputs, normalize_embeddings) 85 | 86 | return FaceEmbedding(embeddings) 87 | 88 | 89 | @dataclass 90 | class FaceEmbedding: 91 | embeddings: torch.Tensor 92 | 93 | def __getitem__(self, idx): 94 | """Get embedding vector(s) at index idx (supports int or slice).""" 95 | return self.embeddings[idx] 96 | 97 | def batch_size(self) -> int: 98 | """Returns the batch size (number of embeddings).""" 99 | return self.embeddings.size(0) 100 | 101 | def to(self, device: torch.device): 102 | """Returns a new FaceEmbedding on the given device.""" 103 | return FaceEmbedding(self.embeddings.to(device)) 104 | 105 | def cpu(self): 106 | """Move embeddings to CPU.""" 107 | return self.to(torch.device('cpu')) 108 | 109 | def cuda(self): 110 | """Move embeddings to CUDA device.""" 111 | return self.to(torch.device('cuda')) 112 | 113 | def as_numpy(self): 114 | """Return embeddings as a NumPy array (on CPU).""" 115 | return self.embeddings.detach().cpu().numpy() 116 | 117 | def to_list(self): 118 | """Return embeddings as a list of lists (compatible with Qdrant).""" 119 | return self.embeddings.detach().cpu().numpy().tolist() -------------------------------------------------------------------------------- /visionface/db/qdrant_client.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, List, Dict, Union 2 | import logging 3 | 4 | import numpy as np 5 | from visionface.db.qdrant.config import CollectionConfig, ConnectionConfig, SearchConfig 6 | from visionface.db.qdrant.data_manager import DataManager 7 | from visionface.db.qdrant.search_manager import SearchManager 8 | 9 | logger = logging.getLogger(__name__) 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | class QdrantVectorDB: 13 | """ 14 | Qdrant Vector Database for face vector storage and search operation 15 | """ 16 | def __init__(self, **kwargs): 17 | try: 18 | try: 19 | from qdrant_client import QdrantClient 20 | from visionface.db.qdrant.collection_manager import CollectionManager 21 | except ImportError: 22 | logger.error("Please install qdrant-client: pip install qdrant-client") 23 | raise 24 | 25 | host = kwargs.get("host", "localhost") 26 | port = kwargs.get("port", 6333) 27 | url = kwargs.get("url", None) 28 | api_key = kwargs.get("api_key", None) 29 | https = kwargs.get("https", False) 30 | timeout = kwargs.get("timeout", 5.0) 31 | 32 | # Create connection config 33 | self.config = ConnectionConfig( 34 | host=host, port=port, url=url, 35 | api_key=api_key, https=https, timeout=timeout 36 | ) 37 | self.config.validate() 38 | 39 | # Initialize Qdrant client 40 | if url: 41 | self.client = QdrantClient(url=url, api_key=api_key, timeout=timeout) 42 | else: 43 | self.client = QdrantClient( 44 | host=host, 45 | port=port, 46 | https=https, 47 | api_key=api_key, 48 | timeout=timeout 49 | ) 50 | 51 | # Initialize managers 52 | self.collections = CollectionManager(self.client) 53 | self.search = SearchManager(self.client) 54 | self.data = DataManager(self.client) 55 | 56 | logger.info(f"Connected to Qdrant at {url or f'{host}:{port}'}") 57 | 58 | except Exception as e: 59 | logger.error(f"Failed to connect to Qdrant: {e}") 60 | raise ConnectionError(f"Connection failed: {e}") 61 | 62 | def create_collection( 63 | self, 64 | collection_name: str, 65 | vector_size: int 66 | ) -> bool: 67 | """Create a new collection""" 68 | config = CollectionConfig( 69 | name=collection_name, 70 | vector_size=vector_size, 71 | ) 72 | if not self.collection_exists(collection_name): 73 | self.collections.create_collection(config) 74 | else: 75 | logger.info(f"Collection [{collection_name}] already exists! ✅") 76 | 77 | def list_collections(self) -> List[str]: 78 | """List all collections""" 79 | return self.collections.list_collections() 80 | 81 | def delete_collection(self, collection_name: str) -> bool: 82 | """Delete a collection""" 83 | return self.collections.delete_collection(collection_name) 84 | 85 | def get_collection_info(self, collection_name: str) -> Optional[Dict]: 86 | """Get collection information""" 87 | return self.collections.get_collection_info(collection_name) 88 | 89 | def collection_exists(self, collection_name: str) -> bool: 90 | """Check if collection exists""" 91 | return self.collections.collection_exists(collection_name) 92 | 93 | def insert_embeddings(self, 94 | collection_name: str, 95 | embeddings: List[List[float]], 96 | payloads: Optional[List[Dict]] = None, 97 | ids: Optional[List[Union[str, int]]] = None, 98 | batch_size: int = 100) -> bool: 99 | """Insert embeddings with optional payloads""" 100 | return self.data.insert_embeddings( 101 | collection_name, embeddings, payloads, ids, batch_size 102 | ) 103 | 104 | def search_embeddings(self, 105 | collection_name: str, 106 | query_vectors: List[np.ndarray], 107 | score_threshold: Optional[float] = None, 108 | top_k: int = 5) -> List[Dict]: 109 | """Search embeddings using various methods""" 110 | config: SearchConfig = SearchConfig() 111 | config.limit = top_k 112 | config.score_threshold = score_threshold 113 | return self.search.search_embeddings( 114 | collection_name, query_vectors, config 115 | ) -------------------------------------------------------------------------------- /visionface/models/landmark_detection/Dlib.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from typing import List, Any 4 | import cv2 5 | 6 | from visionface.models.LandmarkDetector import LandmarkDetector, DetectedLandmark2D 7 | from visionface.commons.download_files import download_model_weights 8 | from visionface.models.landmark_detection.utils import dlib_landmarks_names 9 | 10 | 11 | DLIB_PREDICTOR_URL = "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" 12 | DEFAULT_PREDICTOR_NAME = "shape_predictor_68_face_landmarks.dat" 13 | EXPECTED_LANDMARK_COUNT = 68 14 | 15 | class DlibFaceLandmarkDetector(LandmarkDetector): 16 | """ 17 | Landmark detector using dlib's 68-point facial shape predictor. 18 | 19 | Attributes 20 | ---------- 21 | detector : dlib.fhog_object_detector 22 | Dlib's frontal face detector. 23 | 24 | predictor : dlib.shape_predictor 25 | Dlib's facial landmark shape predictor. 26 | 27 | dlib_landmarks_names : dict 28 | Mapping of landmark indices to semantic names. 29 | 30 | dlib_landmarks : int 31 | Expected number of facial landmarks (default: 68). 32 | """ 33 | def __init__(self): 34 | """Initialize the DlibFaceLandmarkDetector.""" 35 | self.detector, self.predictor = self.build_model() 36 | self.dlib_landmarks_names = dlib_landmarks_names() 37 | self.dlib_landmarks = EXPECTED_LANDMARK_COUNT 38 | 39 | def build_model(self) -> Any: 40 | """ 41 | Load the dlib face detector and shape predictor. 42 | 43 | Parameters 44 | ---------- 45 | predictor_name : str, optional 46 | Filename of the dlib predictor (default is shape_predictor_68_face_landmarks.dat) 47 | 48 | Returns 49 | ------- 50 | Tuple[dlib.fhog_object_detector, dlib.shape_predictor] 51 | Dlib face detector and shape predictor. 52 | """ 53 | try: 54 | import dlib 55 | except ImportError as e: 56 | raise ImportError( 57 | "dlib library is required but not installed. " 58 | "Install it using: pip install dlib or from source https://github.com/davisking/dlib" 59 | ) from e 60 | 61 | # Get the predictor file path 62 | predictor_path = download_model_weights( 63 | filename="shape_predictor_68_face_landmarks.dat", 64 | download_url=DLIB_PREDICTOR_URL, 65 | compression_format="bz2", 66 | ) 67 | # Initialize dlib components 68 | detector = dlib.get_frontal_face_detector() 69 | predictor = dlib.shape_predictor(str(predictor_path)) 70 | 71 | return detector, predictor 72 | 73 | def _detect_one(self, img: np.ndarray) -> List[DetectedLandmark2D]: 74 | """ 75 | Detect facial landmarks in a single image. 76 | 77 | Parameters 78 | ---------- 79 | img : np.ndarray 80 | The input image in BGR format. 81 | 82 | Returns 83 | ------- 84 | List[DetectedLandmark2D] 85 | List of 2D landmarks detected for all faces in the image. 86 | """ 87 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 88 | faces = self.detector(gray) 89 | all_landmarks = [self.predictor(gray, face) for face in faces] 90 | return self.process_landmarks(all_landmarks) 91 | 92 | 93 | def detect_landmarks(self, imgs: List[np.ndarray]) -> List[List[DetectedLandmark2D]]: 94 | """ 95 | Detects facial landmarks in a list of images using dlib's face detector and shape predictor. 96 | 97 | Parameters 98 | ---------- 99 | imgs : List[np.ndarray] 100 | List of images (each as a NumPy array in BGR format). 101 | 102 | Returns: 103 | List[List[DetectedLandmark2D]]: A list of detected 2D facial landmarks with coordinates and names. 104 | 105 | """ 106 | return [self._detect_one(img) for img in imgs] 107 | 108 | 109 | def process_landmarks(self, results: List) -> List[DetectedLandmark2D]: 110 | """ 111 | Convert raw dlib detection results into structured landmark data. 112 | 113 | Parameters 114 | ---------- 115 | results : List[dlib.full_object_detection] 116 | Raw landmark predictions from dlib. 117 | 118 | Returns 119 | ------- 120 | List[DetectedLandmark2D] 121 | List of structured 2D facial landmarks with names and coordinates. 122 | """ 123 | landmarks = [] 124 | for face_landmarks in results: 125 | for idx in range(self.dlib_landmarks): 126 | name = self.dlib_landmarks_names.get(idx, f"unknown_{idx}") 127 | part = face_landmarks.part(idx) 128 | landmarks.append(DetectedLandmark2D(x=part.x, y=part.y, name=name)) 129 | return landmarks 130 | -------------------------------------------------------------------------------- /visionface/commons/download_files.py: -------------------------------------------------------------------------------- 1 | import os 2 | import bz2 3 | import logging 4 | from pathlib import Path 5 | from typing import Optional 6 | 7 | import gdown 8 | from visionface.commons.utils import get_home_directory 9 | 10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class WeightsDownloadError(Exception): 15 | """Custom exception for weights download failures.""" 16 | pass 17 | 18 | 19 | 20 | def get_face_models_home() -> str: 21 | """ 22 | Get the home directory for storing model weights 23 | 24 | Returns: 25 | str: the home directory. 26 | """ 27 | return str(os.getenv("DEEPFACE_HOME", default=os.path.expanduser("~"))) 28 | 29 | 30 | def download_model_weights( 31 | filename: str, 32 | download_url: str, 33 | compression_format: Optional[str] = None, 34 | ) -> Path: 35 | """ 36 | Download and extract model weights from a URL. 37 | 38 | Args: 39 | filename: Name of the target file (without extension) 40 | download_url: URL to download the file from 41 | compression_format: File compression format ('zip', 'bz2' or None) 42 | Returns: 43 | Path to the downloaded and extracted file 44 | 45 | Raises: 46 | WeightsDownloadError: If download fails 47 | FileNotFoundError: If home directory cannot be determined 48 | """ 49 | 50 | home_dir = Path(get_face_models_home()) 51 | 52 | # Create weights directory structure 53 | weights_dir = home_dir / ".VisionFace/weights" 54 | weights_dir.mkdir(parents=True, exist_ok=True) 55 | 56 | # Define target file path 57 | target_filepath = weights_dir / filename 58 | 59 | # Check if file already exists 60 | if target_filepath.exists() and target_filepath.is_file(): 61 | logger.info(f"✓ {filename} already exists at {target_filepath}") 62 | return target_filepath 63 | 64 | # Download the file 65 | logger.info(f"Downloading {filename} model weights...") 66 | logger.info(f"Source URL: {download_url}") 67 | logger.info(f"Target directory: {weights_dir}") 68 | 69 | # Determine download filename based on compression 70 | if compression_format: 71 | download_filename = f"{filename}.{compression_format}" 72 | download_filepath = weights_dir / download_filename 73 | else: 74 | download_filename = filename 75 | download_filepath = target_filepath 76 | 77 | try: 78 | gdown.download(download_url, str(download_filepath), quiet=False) 79 | logger.info(f"✓ Successfully downloaded {download_filename}") 80 | except Exception as e: 81 | error_msg = ( 82 | f"Failed to download {filename} from {download_url}. " 83 | f"Please verify the URL is accessible or download manually to {target_filepath}" 84 | ) 85 | logger.error(error_msg) 86 | raise WeightsDownloadError(error_msg) from e 87 | 88 | # Extract file if compressed 89 | if compression_format: 90 | logger.info(f"Extracting {download_filename}...") 91 | _extract_compressed_file(download_filepath, target_filepath, compression_format) 92 | 93 | # Clean up compressed file after extraction 94 | try: 95 | download_filepath.unlink() 96 | logger.info(f"Removed compressed file: {download_filename}") 97 | except Exception as e: 98 | logger.warning(f"Could not remove compressed file {download_filename}: {e}") 99 | 100 | logger.info(f"Model weights ready at: {target_filepath}") 101 | return target_filepath 102 | 103 | 104 | def _extract_compressed_file( 105 | compressed_filepath: Path, 106 | target_filepath: Path, 107 | compression_format: str 108 | ) -> None: 109 | """ 110 | Extract a compressed file to the target location. 111 | 112 | Args: 113 | compressed_filepath: Path to the compressed file 114 | target_filepath: Path where extracted file should be saved 115 | compression_format: Type of compression ('bz2') 116 | 117 | Raises: 118 | WeightsDownloadError: If extraction fails 119 | """ 120 | if compression_format.lower() == "bz2": 121 | try: 122 | with bz2.BZ2File(compressed_filepath, 'rb') as compressed_file: 123 | with open(target_filepath, 'wb') as target_file: 124 | chunk_size = 64 * 1024 # 64KB chunks 125 | while True: 126 | chunk = compressed_file.read(chunk_size) 127 | if not chunk: 128 | break 129 | target_file.write(chunk) 130 | 131 | logger.info(f"✓ Successfully extracted {compressed_filepath.name} to {target_filepath.name}") 132 | 133 | except Exception as e: 134 | error_msg = f"Failed to extract {compressed_filepath}: {e}" 135 | logger.error(error_msg) 136 | raise WeightsDownloadError(error_msg) from e -------------------------------------------------------------------------------- /visionface/db/qdrant/data_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import uuid 3 | from typing import List, Dict, Union, Optional 4 | import numpy as np 5 | from qdrant_client.http.models import PointStruct, models 6 | 7 | logger = logging.getLogger(__name__) 8 | logging.basicConfig(level=logging.INFO) 9 | 10 | class DataManager: 11 | """Manages data operations for Qdrant""" 12 | 13 | def __init__(self, client): 14 | self.client = client 15 | 16 | def insert_embeddings(self, 17 | collection_name: str, 18 | embeddings: List[List[float]], 19 | payloads: Optional[List[Dict]] = None, 20 | ids: Optional[List[Union[str, int]]] = None, 21 | batch_size: int = 100) -> bool: 22 | """ 23 | Insert embeddings with optional payloads 24 | 25 | Args: 26 | collection_name: Target collection 27 | embeddings: List of embedding vectors 28 | payloads: Optional metadata for each embedding 29 | ids: Optional custom IDs (auto-generated if None) 30 | batch_size: Batch size for insertion 31 | 32 | Returns: 33 | bool: Success status 34 | """ 35 | try: 36 | if not embeddings: 37 | logger.warning("No embeddings provided") 38 | return False 39 | 40 | # Generate IDs if not provided 41 | if ids is None: 42 | ids = [str(uuid.uuid4()) for _ in embeddings] 43 | 44 | # Ensure payloads list matches embeddings length 45 | if payloads is None: 46 | payloads = [{}] * len(embeddings) 47 | elif len(payloads) != len(embeddings): 48 | raise ValueError("Payloads length must match embeddings length") 49 | 50 | # Process in batches 51 | total_inserted = 0 52 | batch_size = len(embeddings) if len(embeddings)<=batch_size else batch_size 53 | 54 | for i in range(0, len(embeddings), batch_size): 55 | batch_embeddings = embeddings[i:i+batch_size] 56 | batch_payloads = payloads[i:i+batch_size] 57 | batch_ids = ids[i:i+batch_size] 58 | 59 | points = [ 60 | PointStruct( 61 | id=point_id, 62 | vector=embedding, 63 | payload=payload 64 | ) 65 | for point_id, embedding, payload in zip(batch_ids, batch_embeddings, batch_payloads) 66 | ] 67 | 68 | self.client.upsert( 69 | collection_name=collection_name, 70 | points=points 71 | ) 72 | 73 | total_inserted += len(points) 74 | logger.info(f"Successfully inserted {total_inserted} embeddings into '{collection_name}' ✅") 75 | return True 76 | 77 | except Exception as e: 78 | logger.error(f"Failed to insert embeddings: {e}") 79 | raise ValueError(f"Insertion failed: {e}") 80 | 81 | def delete_embeddings(self, 82 | collection_name: str, 83 | ids: Optional[List[Union[str, int]]] = None) -> bool: 84 | """ 85 | Delete embeddings by IDs or filter conditions 86 | 87 | Args: 88 | collection_name: Target collection 89 | ids: Specific IDs to delete 90 | filter_conditions: Filter conditions for deletion 91 | 92 | Returns: 93 | bool: Success status 94 | """ 95 | try: 96 | if ids: 97 | # Delete by IDs 98 | self.client.delete( 99 | collection_name=collection_name, 100 | points_selector=models.PointIdsList(points=ids) 101 | ) 102 | logger.info(f"Deleted {len(ids)} points by ID") 103 | else: 104 | raise ValueError("Either ids must be provided for removing embeddings") 105 | 106 | return True 107 | 108 | except Exception as e: 109 | logger.error(f"Failed to delete embeddings: {e}") 110 | raise ValueError(f"Deletion failed: {e}") 111 | 112 | 113 | def get_points(self, 114 | collection_name: str, 115 | ids: List[Union[str, int]], 116 | with_payload: bool = True, 117 | with_vectors: bool = False) -> List[Dict]: 118 | """Retrieve specific points by ID""" 119 | try: 120 | points = self.client.retrieve( 121 | collection_name=collection_name, 122 | ids=ids, 123 | with_payload=with_payload, 124 | with_vectors=with_vectors 125 | ) 126 | 127 | return [ 128 | { 129 | "id": point.id, 130 | "payload": point.payload if with_payload else None, 131 | "vector": point.vector if with_vectors else None 132 | } 133 | for point in points 134 | ] 135 | except Exception as e: 136 | logger.error(f"Failed to retrieve points: {e}") 137 | return [] -------------------------------------------------------------------------------- /visionface/models/face_embedding/ArcFace.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | import os 7 | 8 | # VisionFace modules 9 | from visionface.models.FaceEmbedding import FaceEmbedder 10 | from visionface.commons.download_files import download_model_weights 11 | 12 | 13 | ARCFACE_WEIGHTS_18 = "https://download.pytorch.org/models/resnet18-5c106cde.pth" 14 | 15 | class ArcFace18(FaceEmbedder): 16 | """ 17 | ArcFace 18 model class 18 | """ 19 | def __init__(self): 20 | super().__init__() 21 | self.model = ResNetFace18(block=IRBlock, 22 | layers=[2, 2, 2, 2], 23 | use_se=True, 24 | pretrained="resnet-18") 25 | self.model_name = "ArcFace-18" 26 | self.input_shape = (112, 112) 27 | self.output_shape = 512 28 | 29 | 30 | class ResNetFace18(nn.Module): 31 | def __init__(self, block, layers: List, use_se: bool = True, pretrained: Optional[str] = None): 32 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 33 | self.inplanes = 64 34 | self.use_se = use_se 35 | super(ResNetFace18, self).__init__() 36 | self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False) 37 | self.bn1 = nn.BatchNorm2d(64) 38 | self.prelu = nn.PReLU() 39 | self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2) 40 | self.layer1 = self._make_layer(block, 64, layers[0]) 41 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 42 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 43 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 44 | self.bn4 = nn.BatchNorm2d(512) 45 | self.dropout = nn.Dropout() 46 | self.fc5 = nn.Linear(512 * 8 * 8, 512) 47 | self.bn5 = nn.BatchNorm1d(512) 48 | 49 | for m in self.modules(): 50 | if isinstance(m, nn.Conv2d): 51 | nn.init.xavier_normal_(m.weight) 52 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d): 53 | nn.init.constant_(m.weight, 1) 54 | nn.init.constant_(m.bias, 0) 55 | elif isinstance(m, nn.Linear): 56 | nn.init.xavier_normal_(m.weight) 57 | nn.init.constant_(m.bias, 0) 58 | 59 | def _make_layer(self, block, planes, blocks, stride=1): 60 | downsample = None 61 | if stride != 1 or self.inplanes != planes * block.expansion: 62 | downsample = nn.Sequential( 63 | nn.Conv2d(self.inplanes, planes * block.expansion, 64 | kernel_size=1, stride=stride, bias=False), 65 | nn.BatchNorm2d(planes * block.expansion), 66 | ) 67 | layers = [] 68 | layers.append(block(self.inplanes, planes, stride, downsample, use_se=self.use_se)) 69 | self.inplanes = planes 70 | for i in range(1, blocks): 71 | layers.append(block(self.inplanes, planes, use_se=self.use_se)) 72 | 73 | return nn.Sequential(*layers) 74 | 75 | def forward(self, x, normalize_embeddings=True): 76 | x = self.conv1(x) 77 | x = self.bn1(x) 78 | x = self.prelu(x) 79 | x = self.maxpool(x) 80 | 81 | x = self.layer1(x) 82 | x = self.layer2(x) 83 | x = self.layer3(x) 84 | x = self.layer4(x) 85 | x = self.bn4(x) 86 | x = self.dropout(x) 87 | x = x.view(x.size(0), -1) 88 | x = self.fc5(x) 89 | x = self.bn5(x) 90 | 91 | return x 92 | 93 | 94 | 95 | class IRBlock(nn.Module): 96 | expansion = 1 97 | def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True): 98 | super(IRBlock, self).__init__() 99 | self.bn0 = nn.BatchNorm2d(inplanes) 100 | self.conv1 = self._conv3x3(inplanes, inplanes) 101 | self.bn1 = nn.BatchNorm2d(inplanes) 102 | self.prelu = nn.PReLU() 103 | self.conv2 = self._conv3x3(inplanes, planes, stride) 104 | self.bn2 = nn.BatchNorm2d(planes) 105 | self.downsample = downsample 106 | self.stride = stride 107 | self.use_se = use_se 108 | if self.use_se: 109 | self.se = SEBlock(planes) 110 | 111 | def _conv3x3(self, in_planes, out_planes, stride=1): 112 | """3x3 convolution with padding""" 113 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 114 | padding=1, bias=False) 115 | 116 | def forward(self, x): 117 | residual = x 118 | out = self.bn0(x) 119 | out = self.conv1(out) 120 | out = self.bn1(out) 121 | out = self.prelu(out) 122 | 123 | out = self.conv2(out) 124 | out = self.bn2(out) 125 | if self.use_se: 126 | out = self.se(out) 127 | 128 | if self.downsample is not None: 129 | residual = self.downsample(x) 130 | 131 | out += residual 132 | out = self.prelu(out) 133 | 134 | return out 135 | 136 | 137 | class SEBlock(nn.Module): 138 | def __init__(self, channel, reduction=16): 139 | super(SEBlock, self).__init__() 140 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 141 | self.fc = nn.Sequential( 142 | nn.Linear(channel, channel // reduction), 143 | nn.PReLU(), 144 | nn.Linear(channel // reduction, channel), 145 | nn.Sigmoid() 146 | ) 147 | 148 | def forward(self, x): 149 | b, c, _, _ = x.size() 150 | y = self.avg_pool(x).view(b, c) 151 | y = self.fc(y).view(b, c, 1, 1) 152 | return x * y 153 | 154 | 155 | -------------------------------------------------------------------------------- /visionface/models/face_detection/OpenCV.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import logging 4 | from typing import List 5 | 6 | # VisionFace modules 7 | from visionface.models.Detector import Detector, DetectedFace 8 | from visionface.commons.download_files import download_model_weights 9 | from visionface.commons.image_utils import get_cropped_face 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | 14 | FILE_NAMES = [ 15 | "opencv_deploy.prototxt", 16 | "opencv_res10_300x300_ssd_iter_140000.caffemodel", 17 | ] 18 | 19 | FILE_URLS = [ 20 | "https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt", 21 | "https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel", 22 | ] 23 | 24 | 25 | class OpenCVDetector(Detector): 26 | def __init__(self): 27 | """ 28 | Initializes the OpenCV face detector using a pre-trained Caffe model. 29 | """ 30 | super().__init__() 31 | self.input_size = (300, 300) # Standard input size for the model 32 | self.model = self.build_model() 33 | 34 | def build_model(self) -> cv2.dnn_Net: 35 | """ 36 | Downloads model files and loads the OpenCV DNN face detector. 37 | 38 | Returns: 39 | cv2.dnn_Net: The loaded OpenCV DNN model. 40 | """ 41 | prototxt_name = FILE_NAMES[0] 42 | prototxt_url = FILE_URLS[0] 43 | weights_name = FILE_NAMES[1] 44 | weights_url = FILE_URLS[1] 45 | 46 | prototxt_path = download_model_weights( 47 | filename=prototxt_name, 48 | download_url=prototxt_url 49 | ) 50 | weights_path = download_model_weights( 51 | filename=weights_name, 52 | download_url=weights_url 53 | ) 54 | # Load OpenCV DNN model 55 | model = cv2.dnn.readNetFromCaffe(prototxt_path, weights_path) 56 | # Set backend and target for better performance 57 | model.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) 58 | model.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU) 59 | return model 60 | 61 | def _detect_one(self, img: np.ndarray) -> np.ndarray: 62 | """ 63 | Detects faces in a single image using the loaded OpenCV DNN model. 64 | 65 | Args: 66 | img (np.ndarray): Input image in BGR format. 67 | 68 | Returns: 69 | np.ndarray: Raw detection output from the model. 70 | """ 71 | blob = cv2.dnn.blobFromImage( 72 | img, 73 | scalefactor=1.0, 74 | size=self.input_size, 75 | mean=(104.0, 177.0, 123.0) 76 | ) 77 | self.model.setInput(blob) 78 | return self.model.forward() 79 | 80 | def detect_faces( 81 | self, 82 | imgs: List[np.ndarray], 83 | return_cropped_faces: bool = True 84 | ) -> List[List[DetectedFace]]: 85 | """ 86 | Detect faces in one or more input images using the Opencv model. 87 | 88 | Parameters: 89 | imgs (List[np.ndarray]): 90 | A single image or a list of images in BGR format. 91 | return_cropped_faces : bool, optional 92 | Whether to include cropped face images in each DetectedFace object. Default is True. 93 | 94 | Returns: 95 | List[List[DetectedFace]]: 96 | A list where each element is a list of DetectedFace objects corresponding to one input image. 97 | Each DetectedFace includes the bounding box coordinates, confidence score, class name, 98 | and the cropped face region. 99 | """ 100 | results = [self._detect_one(img) for img in imgs] 101 | return self.process_faces(imgs, results, return_cropped_faces) 102 | 103 | def process_faces( 104 | self, 105 | imgs: List[np.ndarray], 106 | results: List[np.ndarray], 107 | return_cropped_faces: bool 108 | ) -> List[List[DetectedFace]]: 109 | """ 110 | Converts raw model outputs into structured DetectedFace objects. 111 | 112 | Args: 113 | imgs (List[np.ndarray]): List of original images. 114 | results (List[np.ndarray]): List of raw model outputs per image. 115 | return_cropped_faces: bool 116 | Whether to include cropped face images in each DetectedFace object. 117 | 118 | Returns: 119 | List[List[DetectedFace]]: List of detections for each image. 120 | """ 121 | 122 | detections = [] 123 | 124 | for idx, result in enumerate(results): 125 | img = imgs[idx] 126 | h, w = img.shape[:2] 127 | current_detections = [] 128 | face_no = 0 129 | for i in range(result.shape[2]): 130 | confidence = result[0, 0, i, 2] 131 | if confidence > self.conf: 132 | # Get bounding box coordinates 133 | box = result[0, 0, i, 3:7] * np.array([w, h, w, h]) 134 | x1, y1, x2, y2 = box.astype(int) 135 | x1, y1 = max(0, x1), max(0, y1) 136 | x2, y2 = min(w, x2), min(h, y2) 137 | cropped_face = get_cropped_face(img, [x1, y1, x2, y2]) if return_cropped_faces else None 138 | 139 | facial_info = DetectedFace( 140 | xmin=x1, 141 | ymin=y1, 142 | xmax=x2, 143 | ymax=y2, 144 | conf=round(confidence, 2), 145 | class_name="face", 146 | cropped_face=cropped_face 147 | ) 148 | current_detections.append(facial_info) 149 | face_no +=1 150 | 151 | if not len(current_detections): 152 | current_detections = DetectedFace(xmin=0, ymin=0, xmax=0, ymax=0, conf=0) 153 | 154 | logging.info( 155 | f"[OpenCVDetector] {face_no} face(s) detected in image id: {idx}, " 156 | f"min confidence threshold 0.25." 157 | ) 158 | 159 | detections.append(current_detections) 160 | 161 | return detections -------------------------------------------------------------------------------- /visionface/models/face_detection/YOLO.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging 3 | from typing import List, Any, Union 4 | from enum import Enum 5 | 6 | # VisionFace modules 7 | from visionface.models.Detector import Detector, DetectedFace 8 | from visionface.commons.image_utils import get_cropped_face 9 | from visionface.commons.download_files import download_model_weights 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | 13 | class YOLOModel(Enum): 14 | """Enum for YOLO model types.""" 15 | NANO = 0 16 | SMALL = 1 17 | MEDIUM = 2 18 | LARGE = 3 19 | 20 | WEIGHT_NAMES = [ 21 | "yolov12n-face.pt", 22 | "yolov12s-face.pt", 23 | "yolov12m-face.pt", 24 | "yolov12l-face.pt", 25 | ] 26 | 27 | WEIGHT_URLS = [ 28 | "https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov12n-face.pt", 29 | "https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov12s-face.pt", 30 | "https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov12m-face.pt", 31 | "https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov12l-face.pt", 32 | ] 33 | 34 | 35 | class YOLODetector(Detector): 36 | """ 37 | References: 38 | YOLO Face Detection: https://github.com/akanametov/yolo-face 39 | """ 40 | def __init__(self, model: YOLOModel = YOLOModel.SMALL): 41 | """ 42 | Initialize the YOLO Detector. 43 | """ 44 | import torch 45 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 46 | self.model = self.build_model(model) 47 | 48 | def build_model(self, model: YOLOModel): 49 | try: 50 | from ultralytics import YOLO 51 | except ModuleNotFoundError as error: 52 | raise ImportError( 53 | "The 'ultralytics' library is not installed. " 54 | "It is required for YOLOEyeDetector to work. " 55 | "Please install it using: pip install ultralytics" 56 | ) from error 57 | 58 | # Get the weight file (and download if necessary) 59 | model_id = model.value 60 | model_name = WEIGHT_NAMES[model_id] 61 | weight_url = WEIGHT_URLS[model_id] 62 | model_path = download_model_weights( 63 | filename=model_name, 64 | download_url=weight_url 65 | ) 66 | # Load the YOLO face model 67 | return YOLO(model_path) 68 | 69 | def detect_faces( 70 | self, 71 | imgs: List[np.ndarray], 72 | return_cropped_faces: bool = True 73 | ) -> List[List[DetectedFace]]: 74 | """ 75 | Detect faces in one or more input images using the MediaPipe model. 76 | 77 | Parameters: 78 | imgs (List[np.ndarray]): 79 | A single image or a list of images in BGR format. 80 | 81 | return_cropped_faces : bool, optional 82 | Whether to include cropped face images in each DetectedFace object. Default is True. 83 | 84 | Returns: 85 | List[List[DetectedFace]]: 86 | A list where each element is a list of DetectedFace objects corresponding to one input image. 87 | Each DetectedFace includes the bounding box coordinates, confidence score, class name, 88 | and the cropped face region. 89 | """ 90 | results = self.model.predict( 91 | imgs, 92 | verbose=False, 93 | show=False, 94 | device=self.device 95 | ) 96 | return self.process_faces(imgs, results, return_cropped_faces) 97 | 98 | def process_faces( 99 | self, 100 | imgs: List[np.ndarray], 101 | results: Any, 102 | return_cropped_faces: bool 103 | ) -> List[List[DetectedFace]]: 104 | """ 105 | Process YOLO detection results and convert them into DetectedFace objects. 106 | 107 | Parameters 108 | ---------- 109 | imgs : List[np.ndarray] 110 | A single image or a list of images (NumPy arrays). 111 | return_cropped_faces : bool 112 | Whether to include cropped face images in each DetectedFace object. 113 | 114 | results : List[ultralytics.engine.results.Results] 115 | A list of YOLO detection results, one for each input image. 116 | 117 | Returns 118 | ------- 119 | List[List[DetectedFace]] 120 | A list where each element is a list of DetectedFace objects corresponding to one input image. 121 | Each DetectedFace includes the bounding box coordinates, confidence score, class name, 122 | and the cropped face region. 123 | """ 124 | 125 | detections = [] 126 | 127 | for idx, result in enumerate(results): 128 | 129 | if result.boxes is None: 130 | continue 131 | 132 | current_detections = [] 133 | bboxes = result.boxes.xyxy.cpu().numpy().astype(int).tolist() 134 | confidences = result.boxes.conf.cpu().numpy().tolist() 135 | img = imgs[idx] 136 | 137 | for bbox, conf in zip(bboxes, confidences): 138 | cropped_face = get_cropped_face(img, bbox) if return_cropped_faces else None 139 | facial_info = DetectedFace( 140 | xmin=bbox[0], 141 | ymin=bbox[1], 142 | xmax=bbox[2], 143 | ymax=bbox[3], 144 | conf=round(conf, 2), 145 | class_name="face", 146 | cropped_face=cropped_face 147 | ) 148 | current_detections.append(facial_info) 149 | 150 | logging.info( 151 | f"[YOLODetector] {len(current_detections)} face(s) detected in image id: {idx}, " 152 | f"min confidence threshold 0.25." 153 | ) 154 | 155 | detections.append(current_detections) 156 | 157 | return detections 158 | 159 | 160 | class YOLONanoDetector(YOLODetector): 161 | """YOLO Nano detector implementation""" 162 | def __init__(self): 163 | super().__init__(model=YOLOModel.NANO) 164 | 165 | class YOLOSmallDetector(YOLODetector): 166 | """YOLO Small detector implementation""" 167 | def __init__(self): 168 | super().__init__(model=YOLOModel.SMALL) 169 | 170 | class YOLOMediumDetector(YOLODetector): 171 | """YOLO Medium detector implementation""" 172 | def __init__(self): 173 | super().__init__(model=YOLOModel.MEDIUM) 174 | 175 | class YOLOLargeDetector(YOLODetector): 176 | """YOLO Large detector implementation""" 177 | def __init__(self): 178 | super().__init__(model=YOLOModel.LARGE) -------------------------------------------------------------------------------- /visionface/models/face_detection/MediaPipe.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging 3 | from typing import List, Any, Union 4 | 5 | # VisionFace modules 6 | from visionface.models.Detector import Detector, DetectedFace 7 | from visionface.commons.utils import xywh2xyxy 8 | from visionface.commons.image_utils import get_cropped_face 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | 12 | 13 | class MediaPipeDetector(Detector): 14 | """ 15 | References: 16 | MediaPipe Face Detection: https://github.com/google-ai-edge/mediapipe/blob/master/docs/solutions/face_detection.md 17 | """ 18 | def __init__(self, MODEL_ID: int = 1, MIN_CONFIDENCE: float = 0.5): 19 | """ 20 | Initialize the MediaPipeDetector. 21 | 22 | Parameters: 23 | model_id: int, default=1 24 | The MediaPipe face detection model to use: 25 | - 0: Short-range model (optimized for faces within 2 meters) 26 | - 1: Full-range model (optimized for faces within 5 meters) 27 | 28 | min_confidence: float, default=0.5 29 | Minimum confidence threshold (0.0 to 1.0) for face detection. 30 | Detections below this threshold will be filtered out. 31 | """ 32 | if MODEL_ID not in (0, 1): 33 | raise ValueError(f"Invalid MODEL_ID: {MODEL_ID}. MediaPipe only 0 (short-range) or 1 (full-range) are supported.") 34 | 35 | super().__init__(MODEL_ID, MIN_CONFIDENCE) 36 | self.model = self.build_model() 37 | 38 | def build_model(self) -> Any: 39 | """ 40 | Build and initialize the MediaPipe face detection model. 41 | 42 | Returns: 43 | An instance of MediaPipe's FaceDetection model. 44 | 45 | Raises: 46 | ImportError: If the 'mediapipe' library is not installed. 47 | """ 48 | try: 49 | import mediapipe as mp 50 | except ModuleNotFoundError as error: 51 | raise ImportError( 52 | "The 'mediapipe' library is not installed. " 53 | "It is required for MediaPipeDetector to work. " 54 | "Please install it using: pip install mediapipe" 55 | ) from error 56 | 57 | mp_face_detection = mp.solutions.face_detection 58 | face_detection = mp_face_detection.FaceDetection( 59 | min_detection_confidence=self.conf, 60 | model_selection=self.model_id 61 | ) 62 | return face_detection 63 | 64 | def _detect_one( 65 | self, 66 | img_id: int, 67 | img: np.ndarray, 68 | return_cropped_faces: bool 69 | ) -> List[DetectedFace]: 70 | """ 71 | Detect faces in a single image using the MediaPipe model. 72 | 73 | Parameters: 74 | img_id (int): id for the image 75 | img (np.ndarray): The input image in BGR format 76 | return_cropped_faces(bool): cropped face images in each DetectedFace object. 77 | 78 | Returns: 79 | List[DetectedFace]: A list of DetectedFace objects. 80 | """ 81 | # img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 82 | h, w = img.shape[:2] 83 | results = self.model.process(img) 84 | if results.detections is None: 85 | return [] 86 | return self.process_faces(img, results, w, h, img_id, return_cropped_faces) 87 | 88 | def detect_faces( 89 | self, 90 | imgs: List[np.ndarray], 91 | return_cropped_faces: bool = True 92 | ) -> List[List[DetectedFace]]: 93 | """ 94 | Detect faces in one or more input images using the MediaPipe model. 95 | 96 | Parameters: 97 | imgs: List[np.ndarray]: 98 | A single image or a list of images in BGR format. 99 | 100 | return_cropped_faces : bool, optional 101 | Whether to include cropped face images in each DetectedFace object. Default is True. 102 | 103 | Returns: 104 | List[List[DetectedFace]]: 105 | A list where each element is a list of DetectedFace objects for the corresponding input image. 106 | """ 107 | # Run face detection on each image 108 | detections = [self._detect_one(img_id, img, return_cropped_faces) for img_id, img in enumerate(imgs)] 109 | return detections 110 | 111 | def process_faces( 112 | self, 113 | img: np.ndarray, 114 | results: Any, 115 | img_width: int, 116 | img_height: int, 117 | img_id: int, 118 | return_cropped_faces: bool 119 | ) -> List[DetectedFace]: 120 | """ 121 | Process the raw detection results from MediaPipe into DetectedFace objects. 122 | 123 | Parameters: 124 | img (np.ndarray): 125 | The input image in BGR or RGB format. 126 | results: Any 127 | Detection results from the MediaPipe model's process. 128 | img_width: int 129 | Width of the image in pixels. 130 | img_height: int 131 | Height of the image in pixels. 132 | return_cropped_faces : bool 133 | Whether to include cropped face images in each DetectedFace object. 134 | 135 | Returns: 136 | List[DetectedFace] 137 | A list of DetectedFace objects with face coordinates 138 | and confidence scores for each detected face. 139 | """ 140 | 141 | detections = [] 142 | 143 | for detection in results.detections: 144 | (confidence,) = detection.score 145 | bounding_box = detection.location_data.relative_bounding_box 146 | 147 | # Convert relative coordinates to absolute pixel coordinates 148 | x = int(bounding_box.xmin * img_width) 149 | w = int(bounding_box.width * img_width) 150 | y = int(bounding_box.ymin * img_height) 151 | h = int(bounding_box.height * img_height) 152 | 153 | # Convert xywh format to xyxy 154 | bbox = xywh2xyxy([x, y, w, h]) 155 | cropped_face = get_cropped_face(img, bbox) if return_cropped_faces else None 156 | 157 | facial_info = DetectedFace( 158 | xmin=bbox[0], 159 | ymin=bbox[1], 160 | xmax=bbox[2], 161 | ymax=bbox[3], 162 | conf=round(confidence, 2), 163 | class_name="face", 164 | cropped_face=cropped_face 165 | ) 166 | detections.append(facial_info) 167 | 168 | logging.info( 169 | f"[MediaPipeDetector] {len(detections)} face(s) detected in image id: {img_id}, " 170 | f"min confidence threshold {self.conf:.2f}." 171 | ) 172 | 173 | return detections -------------------------------------------------------------------------------- /visionface/db/qdrant/collection_manager.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from typing import Any, Dict, List, Optional 4 | from qdrant_client.http.models import ( 5 | VectorParams, HnswConfigDiff, OptimizersConfigDiff, 6 | ScalarQuantization, ProductQuantization, BinaryQuantization, 7 | ) 8 | from qdrant_client.http.models import Distance 9 | from visionface.db.qdrant.config import CollectionConfig 10 | 11 | logger = logging.getLogger(__name__) 12 | logging.basicConfig(level=logging.INFO) 13 | 14 | class CollectionManager: 15 | """Manages collection operations for Qdrant""" 16 | 17 | def __init__(self, client): 18 | self.client = client 19 | 20 | def create_collection(self, config: CollectionConfig) -> bool: 21 | """ 22 | Create a new collection with configuration 23 | 24 | Args: 25 | config: Collection configuration 26 | 27 | Returns: 28 | bool: Success status 29 | """ 30 | try: 31 | # Validate configuration 32 | config.validate() 33 | 34 | # Build vector params 35 | vectors_config = VectorParams( 36 | size=config.vector_size, 37 | distance=Distance.COSINE 38 | ) 39 | 40 | # Build HNSW config 41 | hnsw_config = None 42 | if config.hnsw_config: 43 | hnsw_config = HnswConfigDiff(**config.hnsw_config) 44 | 45 | # Build optimizer config 46 | optimizer_config = None 47 | if config.optimizer_config: 48 | optimizer_config = OptimizersConfigDiff(**config.optimizer_config) 49 | 50 | # Build quantization config 51 | quantization_config = self._build_quantization_config(config.quantization_config) 52 | 53 | # Create collection 54 | self.client.create_collection( 55 | collection_name=config.name, 56 | vectors_config=vectors_config, 57 | hnsw_config=hnsw_config, 58 | optimizers_config=optimizer_config, 59 | quantization_config=quantization_config, 60 | replication_factor=config.replication_factor, 61 | write_consistency_factor=config.write_consistency_factor 62 | ) 63 | 64 | logger.info(f"Collection '{config.name}' created successfully ✅") 65 | return True 66 | 67 | except Exception as e: 68 | logger.error(f"Failed to create collection '{config.name}': {e}") 69 | if "Connection refused" in str(e): 70 | logger.error( 71 | "Qdrant connection was refused. Make sure the Qdrant server is running.\n" 72 | "To start it with Docker, run Qdrant server locally with docker:\n" 73 | "docker run -d -p 6333:6333 qdrant/qdrant:latest", 74 | "See more launch options in, https://github.com/qdrant/qdrant#usage" 75 | ) 76 | 77 | raise ValueError(f"Collection creation failed: {e}") 78 | 79 | def _build_quantization_config(self, quantization_config: Optional[Dict]) -> Optional[Any]: 80 | """Build quantization configuration""" 81 | if not quantization_config: 82 | return None 83 | 84 | quant_type = quantization_config.get("type", "scalar") 85 | 86 | if quant_type == "scalar": 87 | return ScalarQuantization(scalar=quantization_config) 88 | elif quant_type == "product": 89 | return ProductQuantization(product=quantization_config) 90 | elif quant_type == "binary": 91 | return BinaryQuantization(binary=quantization_config) 92 | else: 93 | raise ValueError(f"Unknown quantization type: {quant_type}") 94 | 95 | def get_collection_info(self, collection_name: str) -> Optional[Dict]: 96 | """Get detailed collection information""" 97 | try: 98 | info = self.client.get_collection(collection_name) 99 | return { 100 | "name": collection_name, 101 | "status": info.status, 102 | "optimizer_status": info.optimizer_status, 103 | "vectors_count": info.vectors_count, 104 | "indexed_vectors_count": info.indexed_vectors_count, 105 | "points_count": info.points_count, 106 | "segments_count": info.segments_count, 107 | "config": { 108 | "params": info.config.params.__dict__ if info.config.params else None, 109 | "hnsw_config": info.config.hnsw_config.__dict__ if info.config.hnsw_config else None, 110 | "optimizer_config": info.config.optimizer_config.__dict__ if info.config.optimizer_config else None, 111 | "quantization_config": str(info.config.quantization_config) if info.config.quantization_config else None 112 | }, 113 | "payload_schema": info.payload_schema 114 | } 115 | except Exception as e: 116 | logger.error(f"Failed to get collection info: {e}") 117 | raise ValueError(f"Collection '{collection_name}' not found") 118 | 119 | def list_collections(self) -> List[str]: 120 | """List all collections""" 121 | try: 122 | collections = self.client.get_collections() 123 | return [collection.name for collection in collections.collections] 124 | except Exception as e: 125 | logger.error(f"Failed to list collections: {e}") 126 | return [] 127 | 128 | def delete_collection(self, collection_name: str) -> bool: 129 | """Delete a collection""" 130 | try: 131 | self.client.delete_collection(collection_name) 132 | logger.info(f"Collection '{collection_name}' deleted") 133 | return True 134 | except Exception as e: 135 | logger.error(f"Failed to delete collection '{collection_name}': {e}") 136 | return False 137 | 138 | def collection_exists(self, collection_name: str) -> bool: 139 | """Check if collection exists""" 140 | try: 141 | self.client.get_collection(collection_name) 142 | return True 143 | except: 144 | return False 145 | 146 | def refresh_collection(self, collection_name: str) -> bool: 147 | """Refresh collection (optimize indexes)""" 148 | try: 149 | self.client.update_collection( 150 | collection_name=collection_name, 151 | optimizer_config=OptimizersConfigDiff( 152 | indexing_threshold=20000 153 | ) 154 | ) 155 | logger.info(f"Collection '{collection_name}' refresh initiated") 156 | return True 157 | except Exception as e: 158 | logger.error(f"Failed to refresh collection '{collection_name}': {e}") 159 | return False 160 | -------------------------------------------------------------------------------- /visionface/models/face_detection/YOLOEye.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import logging 4 | from typing import List, Any, Union 5 | import cv2 6 | from enum import Enum 7 | 8 | # VisionFace modules 9 | from visionface.models.Detector import Detector, DetectedFace 10 | from visionface.commons.download_files import download_model_weights 11 | from visionface.commons.image_utils import get_cropped_face 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | class YOLOEModel(Enum): 16 | """Enum for YOLOE model types.""" 17 | SMALL = 0 18 | MEDIUM = 1 19 | LARGE = 2 20 | 21 | #Text/Visual Prompt models 22 | WEIGHT_NAMES = [ 23 | "yoloe-11s-seg.pt", 24 | "yoloe-11m-seg.pt", 25 | "yoloe-11l-seg.pt" 26 | ] 27 | 28 | WEIGHT_URLS = [ 29 | "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11s-seg.pt", 30 | "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11m-seg.pt", 31 | "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11l-seg.pt" 32 | ] 33 | 34 | class YOLOEyeDetector(Detector): 35 | """ 36 | Reference: https://github.com/THU-MIG/yoloe 37 | """ 38 | def __init__(self, model: YOLOEModel = YOLOEModel.MEDIUM): 39 | """ 40 | Initialize the YOLOEyeDetector. 41 | """ 42 | import torch 43 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 44 | self.model = self.build_model(model) 45 | 46 | def build_model(self, model: YOLOEModel): 47 | try: 48 | from ultralytics import YOLO 49 | except ModuleNotFoundError as error: 50 | raise ImportError( 51 | "The 'ultralytics' library is not installed. " 52 | "It is required for YOLOEyeDetector to work. " 53 | "Please install it using: pip install ultralytics" 54 | ) from error 55 | 56 | # Get the weight file (and download if necessary) 57 | model_id = model.value 58 | model_name = WEIGHT_NAMES[model_id] 59 | weight_url = WEIGHT_URLS[model_id] 60 | model_path = download_model_weights( 61 | filename=model_name, 62 | download_url=weight_url 63 | ) 64 | return YOLO(model_path) 65 | 66 | def detect_faces(self, imgs: List[np.ndarray], return_cropped_faces: bool = True) -> List[List[DetectedFace]]: 67 | """ 68 | Detect faces in one or more input images using the YOLOe model. 69 | 70 | Parameters: 71 | imgs (List[np.ndarray]): 72 | A single image or a list of images in BGR format. 73 | 74 | return_cropped_faces : bool, optional 75 | Whether to include cropped face images in each DetectedFace object. Default is True. 76 | 77 | Returns: 78 | List[List[DetectedFace]]: 79 | A list where each element is a list of DetectedFace objects corresponding to one input image. 80 | Each DetectedFace includes the bounding box coordinates, confidence score, class name, 81 | """ 82 | # By default, use a generic "face" prompt for detection 83 | prompt = "face" 84 | return self.detect_faces_with_prompt(imgs, prompt, return_cropped_faces) 85 | 86 | def _set_text_prompt(self, prompts: List[str]) -> None: 87 | """ 88 | Set the text prompt for the YOLO World model. 89 | """ 90 | self.model.set_classes(prompts, self.model.get_text_pe(prompts)) 91 | 92 | def detect_faces_with_prompt( 93 | self, 94 | imgs: List[np.ndarray], 95 | prompts: List[str], 96 | return_cropped_faces: bool = True 97 | ) -> List[List[DetectedFace]]: 98 | """ 99 | Detect faces in the given image based on text prompt guidance. 100 | 101 | Args: 102 | img (np.ndarray): Input image as a NumPy array (H, W, C). 103 | prompt (Union[str, List[str]]): Either a single text prompt or a list of text prompts 104 | describing the faces to detect. 105 | return_cropped_faces : bool, optional 106 | Whether to include cropped face images in each DetectedFace object. Default is True. 107 | 108 | Returns: 109 | List[DetectedFace]: A list of detected faces that match the prompt(s). 110 | """ 111 | self._set_text_prompt(prompts) 112 | results = self.model.predict( 113 | imgs, 114 | verbose=False, 115 | show=False, 116 | device=self.device 117 | ) 118 | return self.process_faces(imgs, results, return_cropped_faces) 119 | 120 | def detect_faces_with_visual(self, imgs: List[np.ndarray]) -> List[DetectedFace]: 121 | pass 122 | 123 | def process_faces( 124 | self, 125 | imgs: List[np.ndarray], 126 | results: List[Any], 127 | return_cropped_faces: bool 128 | ) -> List[List[DetectedFace]]: 129 | """ 130 | Process the raw detections into a structured format. 131 | """ 132 | 133 | detections = [] 134 | 135 | for idx, result in enumerate(results): 136 | 137 | current_detections = [] 138 | class_id = result.boxes.cls.cpu().numpy().astype(int) 139 | class_names = np.array([result.names[i] for i in class_id]) 140 | bboxes = result.boxes.xyxy.cpu().numpy().astype(int) 141 | confidence = result.boxes.conf.cpu().numpy() 142 | img = imgs[idx] 143 | 144 | if not len(bboxes): 145 | detections.append(DetectedFace(xmin=0, ymin=0, xmax=0, ymax=0, conf=0)) 146 | continue 147 | 148 | for bbox, conf, class_name in zip(bboxes, confidence, class_names): 149 | cropped_face = get_cropped_face(img, bbox) if return_cropped_faces else None 150 | facial_info = DetectedFace( 151 | xmin=bbox[0], 152 | ymin=bbox[1], 153 | xmax=bbox[2], 154 | ymax=bbox[3], 155 | conf=round(conf, 2), 156 | class_name=class_name, 157 | cropped_face=cropped_face 158 | ) 159 | current_detections.append(facial_info) 160 | 161 | logging.info( 162 | f"{len(current_detections)} face(s) detected in image id: {idx}," 163 | ) 164 | 165 | detections.append(current_detections) 166 | 167 | return detections 168 | 169 | 170 | 171 | class YOLOEyeSmallDetector(YOLOEyeDetector): 172 | """YOLOEye Small detector implementation""" 173 | def __init__(self): 174 | super().__init__(model=YOLOEModel.SMALL) 175 | 176 | class YOLOEyeMediumDetector(YOLOEyeDetector): 177 | """YOLOEye Medium detector implementation""" 178 | def __init__(self): 179 | super().__init__(model=YOLOEModel.MEDIUM) 180 | 181 | class YOLOEyeLargeDetector(YOLOEyeDetector): 182 | """YOLOEye Large detector implementation""" 183 | def __init__(self): 184 | super().__init__(model=YOLOEModel.LARGE) 185 | -------------------------------------------------------------------------------- /visionface/models/face_detection/YOLOWolrd.py: -------------------------------------------------------------------------------- 1 | from click import prompt 2 | import numpy as np 3 | import logging 4 | from typing import List, Any, Union 5 | from enum import Enum 6 | 7 | # VisionFace modules 8 | from visionface.models.Detector import Detector, DetectedFace 9 | from visionface.commons.image_utils import get_cropped_face 10 | from visionface.commons.download_files import download_model_weights 11 | 12 | logging.basicConfig(level=logging.INFO) 13 | 14 | class YOLOModel(Enum): 15 | """Enum for YOLO World model types.""" 16 | SMALL = 0 17 | MEDIUM = 1 18 | LARGE = 2 19 | XLARGE = 3 20 | 21 | WEIGHT_NAMES = [ 22 | "yolov8s-world.pt", 23 | "yolov8m-world.pt", 24 | "yolov8l-world.pt", 25 | "yolov8x-world.pt", 26 | ] 27 | 28 | WEIGHT_URLS = [ 29 | "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s-world.pt", 30 | "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m-world.pt", 31 | "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l-world.pt", 32 | "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-world.pt", 33 | ] 34 | 35 | 36 | class YOLOWolrdDetector(Detector): 37 | def __init__(self, model: YOLOModel = YOLOModel.MEDIUM): 38 | """ 39 | Initialize the YOLO Detector. 40 | """ 41 | import torch 42 | self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 43 | self.model = self.build_model(model) 44 | 45 | def build_model(self, model: YOLOModel): 46 | try: 47 | from ultralytics import YOLOWorld 48 | except ModuleNotFoundError as error: 49 | raise ImportError( 50 | "The 'ultralytics' library is not installed. " 51 | "It is required for YOLOEyeDetector to work. " 52 | "Please install it using: pip install ultralytics" 53 | ) from error 54 | 55 | # Get the weight file (and download if necessary) 56 | model_id = model.value 57 | model_name = WEIGHT_NAMES[model_id] 58 | weight_url = WEIGHT_URLS[model_id] 59 | model_path = download_model_weights( 60 | filename=model_name, 61 | download_url=weight_url 62 | ) 63 | return YOLOWorld(model_path) 64 | 65 | def detect_faces( 66 | self, 67 | imgs: List[np.ndarray], 68 | return_cropped_faces: bool = True 69 | ) -> List[List[DetectedFace]]: 70 | """ 71 | Detect faces in one or more input images using the YOLO Wolrd model. 72 | 73 | Parameters: 74 | imgs (List[np.ndarray]): 75 | A single image or a list of images in BGR format. 76 | 77 | return_cropped_faces : bool, optional 78 | Whether to include cropped face images in each DetectedFace object. Default is True. 79 | 80 | Returns: 81 | List[List[DetectedFace]]: 82 | A list where each element is a list of DetectedFace objects corresponding to one input image. 83 | Each DetectedFace includes the bounding box coordinates, confidence score, class name, 84 | """ 85 | # By default, use a generic "face" prompt for detection 86 | prompts = "face" 87 | return self.detect_faces_with_prompt(imgs, prompts, return_cropped_faces) 88 | 89 | def _set_text_prompt(self, prompts: List[str]) -> None: 90 | """ 91 | Set the text prompt for the YOLO World model. 92 | """ 93 | self.model.set_classes(prompts) 94 | 95 | def detect_faces_with_prompt( 96 | self, 97 | imgs: List[np.ndarray], 98 | prompts: List[str], 99 | return_cropped_faces: bool = True 100 | ) -> List[List[DetectedFace]]: 101 | """ 102 | Detect faces in the given image based on text prompt guidance. 103 | 104 | Args: 105 | img (np.ndarray): Input image as a NumPy array (H, W, C). 106 | prompts (Union[str, List[str]]): Either a single text prompt or a list of text prompts 107 | describing the faces to detect. 108 | return_cropped_faces : bool, optional 109 | Whether to include cropped face images in each DetectedFace object. Default is True. 110 | 111 | Returns: 112 | List[DetectedFace]: A list of detected faces that match the prompt(s). 113 | """ 114 | self._set_text_prompt(prompts) 115 | results = self.model.predict( 116 | imgs, 117 | verbose=False, 118 | show=False, 119 | device=self.device 120 | ) 121 | return self.process_faces(imgs, results, return_cropped_faces) 122 | 123 | 124 | def detect_faces_with_visual(self, imgs: List[np.ndarray]) -> List[DetectedFace]: 125 | pass 126 | 127 | def process_faces( 128 | self, 129 | imgs: List[np.ndarray], 130 | results: List[Any], 131 | return_cropped_faces: bool 132 | ) -> List[List[DetectedFace]]: 133 | """ 134 | Process the raw detections into a structured format. 135 | """ 136 | 137 | detections = [] 138 | 139 | for idx, result in enumerate(results): 140 | current_detections = [] 141 | class_id = result.boxes.cls.cpu().numpy().astype(int) 142 | class_names = np.array([result.names[i] for i in class_id]) 143 | bboxes = result.boxes.xyxy.cpu().numpy().astype(int) 144 | confidence = result.boxes.conf.cpu().numpy() 145 | img = imgs[idx] 146 | 147 | if not len(bboxes): 148 | detections.append(DetectedFace(xmin=0, ymin=0, xmax=0, ymax=0, conf=0)) 149 | continue 150 | 151 | for bbox, conf, class_name in zip(bboxes, confidence, class_names): 152 | cropped_face = get_cropped_face(img, bbox) if return_cropped_faces else None 153 | facial_info = DetectedFace( 154 | xmin=bbox[0], 155 | ymin=bbox[1], 156 | xmax=bbox[2], 157 | ymax=bbox[3], 158 | conf=round(conf, 2), 159 | class_name=class_name, 160 | cropped_face=cropped_face 161 | ) 162 | current_detections.append(facial_info) 163 | 164 | logging.info( 165 | f"{len(current_detections)} face(s) detected in image id: {idx}," 166 | ) 167 | 168 | detections.append(current_detections) 169 | 170 | return detections 171 | 172 | 173 | class YOLOWorldSmallDetector(YOLOWolrdDetector): 174 | """YOLO Small detector implementation""" 175 | def __init__(self): 176 | super().__init__(model=YOLOModel.SMALL) 177 | 178 | class YOLOWorldMediumDetector(YOLOWolrdDetector): 179 | """YOLO Medium detector implementation""" 180 | def __init__(self): 181 | super().__init__(model=YOLOModel.MEDIUM) 182 | 183 | class YOLOWorldLargeDetector(YOLOWolrdDetector): 184 | """YOLO Large detector implementation""" 185 | def __init__(self): 186 | super().__init__(model=YOLOModel.LARGE) 187 | 188 | class YOLOWorldXLargeDetector(YOLOWolrdDetector): 189 | """YOLO XLarge detector implementation""" 190 | def __init__(self): 191 | super().__init__(model=YOLOModel.XLARGE) -------------------------------------------------------------------------------- /visionface/commons/image_utils.py: -------------------------------------------------------------------------------- 1 | # Part of this module is adapted from the DeepFace library 2 | # Source: https://github.com/serengil/deepface/blob/master/deepface/commons/image_utils.py 3 | # Original author: Alireza Makhzani and contributors 4 | 5 | import os 6 | from typing import Union, Tuple, IO, List 7 | import numpy as np 8 | import cv2 9 | from pathlib import Path 10 | import io 11 | import base64 12 | from PIL import Image 13 | import requests 14 | from torch.nn.functional import interpolate 15 | 16 | from visionface.models.Detector import DetectedFace 17 | 18 | 19 | def load_images( 20 | inputs: Union[str, np.ndarray, IO[bytes], List[Union[str, np.ndarray, IO[bytes]]]] 21 | ) -> List[Tuple[np.ndarray, str]]: 22 | """ 23 | Load one or more images from various sources. 24 | 25 | Args: 26 | inputs: A single image or a list of images. Each image can be: 27 | - A file path (str) 28 | - A URL (str) 29 | - A base64-encoded string (str) 30 | - A numpy array (np.ndarray) 31 | - A file-like object (IO[bytes]) 32 | 33 | Returns: 34 | List[np.ndarray]: A list of loaded images in BGR format 35 | """ 36 | if not isinstance(inputs, list): 37 | inputs = [inputs] 38 | 39 | loaded_images = [] 40 | for item in inputs: 41 | if isinstance(item, list): 42 | for i in item: 43 | if isinstance(i, DetectedFace): 44 | loaded_images.append(i.cropped_face) 45 | continue 46 | elif isinstance(item, np.ndarray): 47 | loaded_images.append(item) 48 | elif hasattr(item, 'read') and callable(item.read): 49 | if isinstance(item, io.StringIO): 50 | raise ValueError("Image requires bytes, not io.StringIO.") 51 | img_arr = load_image_from_io_object(item) 52 | loaded_images.append(img_arr) 53 | elif isinstance(item, Path): 54 | img_arr = _load_from_str(str(item)) 55 | loaded_images.append(img_arr) 56 | elif isinstance(item, str): 57 | img_arr = _load_from_str(item) 58 | loaded_images.append(img_arr) 59 | else: 60 | raise ValueError(f"Unsupported input type: {type(item)}") 61 | return loaded_images 62 | 63 | 64 | def _load_from_str(img: str) -> np.ndarray: 65 | if img.startswith("data:image/"): 66 | return load_image_from_base64(img) 67 | elif img.lower().startswith(("http://", "https://")): 68 | return load_image_from_web(url=img) 69 | elif not os.path.isfile(img): 70 | raise ValueError(f"{img} is not exists") 71 | elif not img.isascii(): 72 | raise ValueError(f"Input image must not have non-English characters - {img}") 73 | else: 74 | img_obj_bgr = cv2.imread(img) 75 | return img_obj_bgr 76 | 77 | 78 | def load_image_from_io_object(obj: IO[bytes]) -> np.ndarray: 79 | """ 80 | Load image from an object that supports being read 81 | Args: 82 | obj: a file like object. 83 | Returns: 84 | img (np.ndarray): The decoded image as a numpy array (OpenCV format). 85 | """ 86 | try: 87 | _ = obj.seek(0) 88 | except (AttributeError, TypeError, io.UnsupportedOperation): 89 | seekable = False 90 | obj = io.BytesIO(obj.read()) 91 | else: 92 | seekable = True 93 | try: 94 | nparr = np.frombuffer(obj.read(), np.uint8) 95 | img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 96 | if img is None: 97 | raise ValueError("Failed to decode image") 98 | return img 99 | finally: 100 | if not seekable: 101 | obj.close() 102 | 103 | 104 | def load_image_from_io_object(obj: IO[bytes]) -> np.ndarray: 105 | """ 106 | Load image from an object that supports being read 107 | Args: 108 | obj: a file like object. 109 | Returns: 110 | img (np.ndarray): The decoded image as a numpy array (OpenCV format). 111 | """ 112 | try: 113 | _ = obj.seek(0) 114 | except (AttributeError, TypeError, io.UnsupportedOperation): 115 | seekable = False 116 | obj = io.BytesIO(obj.read()) 117 | else: 118 | seekable = True 119 | try: 120 | nparr = np.frombuffer(obj.read(), np.uint8) 121 | img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 122 | if img is None: 123 | raise ValueError("Failed to decode image") 124 | return img 125 | finally: 126 | if not seekable: 127 | obj.close() 128 | 129 | 130 | def load_image_from_base64(uri: str) -> np.ndarray: 131 | """ 132 | Load image from base64 string. 133 | Args: 134 | uri: a base64 string. 135 | Returns: 136 | numpy array: the loaded image. 137 | """ 138 | 139 | encoded_data_parts = uri.split(",") 140 | 141 | if len(encoded_data_parts) < 2: 142 | raise ValueError("format error in base64 encoded string") 143 | 144 | encoded_data = encoded_data_parts[1] 145 | decoded_bytes = base64.b64decode(encoded_data) 146 | 147 | # similar to find functionality, we are just considering these extensions 148 | # content type is safer option than file extension 149 | with Image.open(io.BytesIO(decoded_bytes)) as img: 150 | file_type = img.format.lower() 151 | if file_type not in {"jpeg", "png"}: 152 | raise ValueError(f"Input image can be jpg or png, but it is {file_type}") 153 | 154 | nparr = np.frombuffer(decoded_bytes, np.uint8) 155 | img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR) 156 | # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) 157 | return img_bgr 158 | 159 | 160 | 161 | def load_image_from_web(url: str) -> np.ndarray: 162 | """ 163 | Loading an image from web 164 | Args: 165 | url: link for the image 166 | Returns: 167 | img (np.ndarray): equivalent to pre-loaded image from opencv (BGR format) 168 | """ 169 | response = requests.get(url, stream=True, timeout=60) 170 | response.raise_for_status() 171 | image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8) 172 | img = cv2.imdecode(image_array, cv2.IMREAD_COLOR) 173 | return img 174 | 175 | 176 | def validate_images(imgs: Union[np.ndarray, List[np.ndarray]]) -> List[np.ndarray]: 177 | """ 178 | Validates and standardizes image input for model processing. 179 | 180 | Args: 181 | imgs (Union[np.ndarray, List[np.ndarray]]): 182 | A single image with shape (H, W, 3) or a list of such images as NumPy arrays. 183 | 184 | Returns: 185 | List[np.ndarray]: A list of validated images, each with shape (H, W, 3). 186 | """ 187 | if isinstance(imgs, np.ndarray): 188 | imgs = [imgs] 189 | elif not isinstance(imgs, list): 190 | raise ValueError(f"Expected input to be a numpy array or list, but got {type(imgs)}") 191 | 192 | if not imgs: 193 | raise ValueError("Empty image list provided for face processing!") 194 | 195 | for i, img in enumerate(imgs): 196 | if not isinstance(img, np.ndarray): 197 | raise ValueError(f"Image {i} is not a numpy array. Got {type(img)} instead.") 198 | if img.ndim != 3 or img.shape[2] != 3: 199 | raise ValueError(f"Image {i} must have shape (H, W, 3), got {img.shape}") 200 | 201 | return imgs 202 | 203 | 204 | def get_cropped_face(img: np.ndarray, bbox: List[int]) -> np.ndarray: 205 | """ 206 | Crop a face region from the input image using the detected bounding box. 207 | 208 | Parameters: 209 | img (np.ndarray): The input image in BGR or RGB format. 210 | bbox (List[int]): Bounding box coordinates in [x1, y1, x2, y2] format. 211 | 212 | Returns: 213 | np.ndarray: 214 | The cropped face image as a NumPy array. If the bounding box is 215 | partially out of bounds, it will be clipped to fit within the image dimensions. 216 | """ 217 | h, w = img.shape[:2] 218 | x1 = max(0, bbox[0]) 219 | y1 = max(0, bbox[1]) 220 | x2 = min(w, bbox[2]) 221 | y2 = min(h, bbox[3]) 222 | cropped_face = img[y1:y2, x1:x2] 223 | return cropped_face 224 | 225 | 226 | def image_resample(img, sz): 227 | im_data = interpolate(img, size=sz, mode="area") 228 | return im_data 229 | -------------------------------------------------------------------------------- /visionface/annotators/landmark.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from typing import List, Union, Tuple, Optional, Mapping 4 | 5 | # VisionFace modules 6 | from visionface.annotators.base import BaseLandmarkAnnotator 7 | from visionface.models.LandmarkDetector import DetectedLandmark3D, DetectedLandmark2D 8 | from visionface.annotators.utils import denormalize_landmark 9 | from visionface.annotators.helper.landmark_connections import ( 10 | FACEMESH_TESSELATION, 11 | FACEMESH_CONTOURS, 12 | FACEMESH_IRISES, 13 | DLIB_FACE_LANDMARK_CONNECTIONS 14 | 15 | ) 16 | from visionface.annotators.helper.landmark_styles import ( 17 | FaceMeshStyle, 18 | FaceMeshContoursStyle, 19 | FaceMeshIrisStyle 20 | ) 21 | 22 | MEDIAPIPE_FACEMESH_CONNECTIONS = [ 23 | FACEMESH_TESSELATION, 24 | FACEMESH_CONTOURS, 25 | FACEMESH_IRISES 26 | ] 27 | DLIB_LANDMARK_CONNECTIONS = [ 28 | 29 | ] 30 | MEDIAPIPE_FACEMESH_STYLE = [ 31 | FaceMeshStyle(), 32 | FaceMeshContoursStyle(), 33 | FaceMeshIrisStyle() 34 | 35 | ] 36 | 37 | class MediaPipeFaceMeshAnnotator(BaseLandmarkAnnotator): 38 | def __init__( 39 | self, 40 | color: Tuple[int, int, int] = (255, 255, 255), 41 | thickness: int = 1, 42 | circle_radius: int = 2 43 | ): 44 | self.color = color 45 | self.thickness = thickness 46 | self.circle_radius = circle_radius 47 | 48 | def annotate( 49 | self, 50 | img: np.ndarray, 51 | landmarks: List[DetectedLandmark3D], 52 | connections: List[List[Tuple[int, int]]] = MEDIAPIPE_FACEMESH_CONNECTIONS, 53 | is_drawing_landmarks: bool = True 54 | ) -> np.ndarray: 55 | 56 | image_rows, image_cols, _ = img.shape 57 | idx_to_coordinates = {} 58 | 59 | for idx, lm in enumerate(landmarks): 60 | landmark_px = denormalize_landmark( 61 | normalized_x=lm.x, 62 | normalized_y=lm.y, 63 | image_width=image_cols, 64 | image_height=image_rows 65 | ) 66 | 67 | if landmark_px: 68 | idx_to_coordinates[idx] = landmark_px 69 | 70 | if connections: 71 | num_landmarks = len(landmarks) 72 | for cidx, connection_list in enumerate(connections): 73 | for connection in connection_list: 74 | start_idx = connection[0] 75 | end_idx = connection[1] 76 | if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks): 77 | raise ValueError(f'Landmark index is out of range. Invalid connection ' 78 | f'from landmark #{start_idx} to landmark #{end_idx}.') 79 | if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates: 80 | drawing_spec = MEDIAPIPE_FACEMESH_STYLE[cidx][connection] if isinstance( 81 | MEDIAPIPE_FACEMESH_STYLE[cidx], Mapping) else MEDIAPIPE_FACEMESH_STYLE[cidx] 82 | cv2.line(img, idx_to_coordinates[start_idx], 83 | idx_to_coordinates[end_idx], self.color, 84 | self.thickness) 85 | 86 | if is_drawing_landmarks: 87 | for idx, landmark_px in idx_to_coordinates.items(): 88 | circle_border_radius = max(self.circle_radius + 1, int(self.circle_radius * 1.2)) 89 | cv2.circle(img, landmark_px, circle_border_radius, self.color, self.thickness) 90 | # Fill color into the circle 91 | cv2.circle(img, landmark_px, self.circle_radius, self.color, self.thickness) 92 | 93 | return img 94 | 95 | class FaceLandmarkAnnotator(BaseLandmarkAnnotator): 96 | """ 97 | A facial landmark annotator that visualizes detected landmarks and their connections. 98 | 99 | Attributes: 100 | line_color (Tuple[int, int, int]): BGR color values for connection lines. Default is (0, 255, 0) - green. 101 | line_thickness (int): Thickness of connection lines in pixels. Default is 1. 102 | circle_color (Tuple[int, int, int]): BGR color values for landmark points. Default is (255, 255, 255) - white. 103 | circle_radius (int): Radius of landmark circles in pixels. Default is 2. 104 | 105 | Example: 106 | >>> from VisionFace.models.landmark_detection.Dlib import DlibFaceLandmarkDetector 107 | >>> from VisionFace.annotators.landmark import FaceLandmarkAnnotator 108 | >>> from VisionFace.annotators.helper.landmark_connections import DLIB_FACE_LANDMARK_CONNECTIONS 109 | >>> 110 | >>> detector = DlibFaceLandmarkDetector() 111 | >>> annotator = FaceLandmarkAnnotator( 112 | ... line_color=(0, 255, 0), 113 | ... circle_color=(255, 0, 0), 114 | ... circle_radius=3 115 | ... ) 116 | >>> 117 | >>> img = cv2.imread("face_image.jpg") 118 | >>> landmarks = detector.detect_landmarks(img) 119 | >>> annotated_img = annotator.annotate( 120 | ... img=img, 121 | ... landmarks=landmarks, 122 | ... connections=DLIB_FACE_LANDMARK_CONNECTIONS 123 | ... ) 124 | """ 125 | 126 | def __init__( 127 | self, 128 | line_color: Tuple[int, int, int] = (0, 255, 0), 129 | line_thickness: int = 1, 130 | circle_color: Tuple[int, int, int] = (255, 255, 255), 131 | circle_radius: int = 2 132 | ): 133 | """ 134 | Initialize the FaceLandmarkAnnotator with visualization parameters. 135 | 136 | Args: 137 | line_color (Tuple[int, int, int], optional): BGR color tuple for connection lines. Defaults to (0, 255, 0) - green. 138 | line_thickness (int, optional): Thickness of connection lines in pixels. Defaults to 1. 139 | circle_color (Tuple[int, int, int], optional): BGR color for landmark circles. Defaults to (255, 255, 255) - white. 140 | circle_radius (int, optional): Radius of landmark circles in pixels. Defaults to 2. 141 | """ 142 | self.line_color = line_color 143 | self.line_thickness = line_thickness 144 | self.circle_color = circle_color 145 | self.circle_radius = circle_radius 146 | 147 | def annotate( 148 | self, 149 | img: np.ndarray, 150 | landmarks: List[DetectedLandmark2D], 151 | connections: List[Tuple[int, int]] = "", 152 | is_drawing_landmarks: bool = True 153 | ) -> np.ndarray: 154 | """ 155 | Annotate an image with facial landmarks and their connections. 156 | 157 | Args: 158 | img (np.ndarray): Input image as a numpy array 159 | landmarks (List[DetectedLandmark2D]): List of detected facial landmarks. 160 | Each landmark should have 'x' and 'y' attributes representing pixel coordinates. 161 | connections (List[Tuple[int, int]], optional): landmark connections for drawing facial feature outlines. 162 | is_drawing_landmarks (bool, optional): Whether to draw landmark annotations. 163 | If False, returns the original image unchanged. Defaults to True. 164 | 165 | Returns: 166 | np.ndarray: The annotated image with landmarks and connections drawn. 167 | 168 | """ 169 | if connections and is_drawing_landmarks: 170 | # Draw connection lines 171 | for connection in connections: 172 | start_idx = connection[0] 173 | end_idx = connection[1] 174 | if start_idx < len(landmarks) and end_idx < len(landmarks): 175 | start_point = [landmarks[start_idx].x, landmarks[start_idx].y] 176 | end_point = [landmarks[end_idx].x, landmarks[end_idx].y] 177 | img = cv2.line(img, start_point, end_point, self.line_color, self.line_thickness) 178 | 179 | # Draw landmark points 180 | for point in landmarks: 181 | landmark = [point.x, point.y] 182 | cv2.circle(img, landmark, self.circle_radius, self.circle_color, -1) 183 | 184 | return img 185 | -------------------------------------------------------------------------------- /visionface/annotators/detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from typing import List, Union 4 | 5 | # VisionFace modules 6 | from visionface.annotators.base import BaseAnnotator, ImageType, RawDetection 7 | from visionface.models.Detector import Detector 8 | from visionface.annotators.utils import ( 9 | highlight_face, 10 | convert_img_to_numpy, 11 | get_xyxy 12 | ) 13 | 14 | class BoxAnnotator(BaseAnnotator): 15 | """ 16 | A class for drawing complete rectangular bounding boxes on an image using provided detections. 17 | 18 | Parameters 19 | ---------- 20 | color : tuple, optional 21 | The BGR color tuple for the bounding box lines, by default (245, 113, 47) 22 | thickness : int, optional 23 | The thickness of the bounding box lines in pixels, by default 4 24 | 25 | Attributes 26 | ---------- 27 | color : tuple 28 | The BGR color tuple used for drawing bounding box lines 29 | thickness : int 30 | The thickness of the bounding box lines in pixels 31 | 32 | Examples 33 | -------- 34 | >>> # Using Face Detector 35 | >>> from VisionFace import VisionFace, FaceAnnotators 36 | >>> detected_faces = VisionFace.detect_faces(img) 37 | >>> annotated_img = FaceAnnotators.box_annotator(img, detected_faces, highlight=True) 38 | 39 | >>> # Using raw detection lists 40 | >>> raw_detections = [[10, 20, 100, 200, 0.95, 'face'], [30, 40, 120, 220, 0.90, 'face']] 41 | >>> annotated_img = annotator.annotate(img, raw_detections, highlight=True) 42 | """ 43 | 44 | def __init__(self, color: tuple = (245, 113, 47), thickness: int = 4): 45 | self.color = color 46 | self.thickness = thickness 47 | 48 | def annotate( 49 | self, 50 | img: ImageType, 51 | detections: Union[List[Detector], List[RawDetection]], 52 | highlight: bool = True, 53 | highlight_opacity: float = 0.2, 54 | highlight_color: tuple = (255, 255, 255), 55 | ) -> ImageType: 56 | """ 57 | Annotate the image with complete rectangular bounding boxes for each detection. 58 | 59 | This method draws full rectangular bounding boxes around each detected region 60 | defined by the provided detections. Optionally, it can also highlight 61 | the detected regions with a semi-transparent overlay. 62 | 63 | Parameters 64 | ---------- 65 | img : ImageType 66 | The input image to annotate (can be a file path string, numpy array, or PIL Image) 67 | detections : Union[List[Detector], List[RawDetection]] 68 | List of detections, where each detection can be either: 69 | - Detector object with an xyxy property returning (x1, y1, x2, y2) 70 | - RawDetection list in format [x1, y1, x2, y2, confidence, class_name] 71 | highlight : bool, optional 72 | Whether to highlight the detected regions, by default True 73 | highlight_opacity : float, optional 74 | Opacity of the highlight overlay (0.0 to 1.0), by default 0.2 75 | highlight_color : tuple, optional 76 | BGR color tuple for the highlight, by default (255, 255, 255) 77 | 78 | Returns 79 | ------- 80 | ImageType 81 | The annotated image with rectangular bounding boxes and optional highlights 82 | """ 83 | if img is None: 84 | return 85 | # Convert image to numpy for processing 86 | img = convert_img_to_numpy(img) 87 | 88 | # Apply highlighting if enabled 89 | if highlight: 90 | img = highlight_face( 91 | img, 92 | detections, 93 | highlight_opacity=highlight_opacity, 94 | highlight_color=highlight_color 95 | ) 96 | 97 | # Draw complete rectangular bounding boxes 98 | for detection in detections: 99 | x1, y1, x2, y2 = get_xyxy(detection) 100 | cv2.rectangle(img, (x1, y1), (x2, y2), self.color, thickness=self.thickness) 101 | 102 | return img 103 | 104 | class BoxCornerAnnotator(BaseAnnotator): 105 | """ 106 | A class for drawing box corners on an image using provided detections. 107 | 108 | Parameters 109 | ---------- 110 | color : tuple, optional 111 | The BGR color tuple for the corner lines, by default (245, 113, 47) 112 | thickness : int, optional 113 | The thickness of the corner lines in pixels, by default 4 114 | corner_length : int, optional 115 | The length of each corner segment in pixels, by default 15 116 | 117 | Attributes 118 | ---------- 119 | color : tuple 120 | The BGR color tuple used for drawing corner lines 121 | thickness : int 122 | The thickness of the corner lines in pixels 123 | corner_length : int 124 | The length of each corner segment in pixels 125 | 126 | Examples 127 | -------- 128 | >>> # Using Face Detector 129 | >>> from VisionFace import VisionFace, FaceAnnotators 130 | >>> detected_faces = VisionFace.detect_faces(img) 131 | >>> annotated_img = FaceAnnotators.box_corner_annotator(img, detected_faces, highlight=True) 132 | 133 | >>> # Using raw detection lists 134 | >>> raw_detections = [[10, 20, 100, 200, 0.95, 'face'], [30, 40, 120, 220, 0.90, 'face']] 135 | >>> annotated_img = annotator.annotate(img, raw_detections, highlight=True) 136 | """ 137 | 138 | def __init__(self, color: tuple = (245, 113, 47), thickness: int = 4, corner_length: int = 15): 139 | self.color = color 140 | self.thickness = thickness 141 | self.corner_length = corner_length 142 | 143 | def annotate( 144 | self, 145 | img: ImageType, 146 | detections: Union[List[Detector], List[RawDetection]], 147 | highlight: bool = True, 148 | highlight_opacity: float = 0.2, 149 | highlight_color: tuple = (255, 255, 255), 150 | ) -> ImageType: 151 | """ 152 | Annotate the image with corner boxes for each detection. 153 | 154 | This method draws L-shaped corners at each corner of the bounding boxes 155 | defined by the provided detections. Optionally, it can also highlight 156 | the detected regions with a semi-transparent overlay. 157 | 158 | Parameters 159 | ---------- 160 | img : ImageType 161 | The input image to annotate (can be a file path string, numpy array, or PIL Image) 162 | detections : Union[List[Detector], List[RawDetection]] 163 | List of detections, where each detection can be either: 164 | - Detector object with an xyxy property returning (x1, y1, x2, y2) 165 | - RawDetection list in format [x1, y1, x2, y2, confidence, class_name] 166 | highlight : bool, optional 167 | Whether to highlight the detected regions, by default True 168 | highlight_opacity : float, optional 169 | Opacity of the highlight overlay (0.0 to 1.0), by default 0.2 170 | highlight_color : tuple, optional 171 | BGR color tuple for the highlight, by default (255, 255, 255) 172 | 173 | Returns 174 | ------- 175 | ImageType 176 | The annotated image with box corners and optional highlights 177 | """ 178 | # Convert image to numpy for processing 179 | img = convert_img_to_numpy(img) 180 | 181 | # Apply highlighting if enabled 182 | if highlight: 183 | img = highlight_face( 184 | img, 185 | detections, 186 | highlight_opacity=highlight_opacity, 187 | highlight_color=highlight_color 188 | ) 189 | 190 | # Draw box corners 191 | for detection in detections: 192 | x1, y1, x2, y2 = get_xyxy(detection) 193 | corners = [(x1, y1), (x2, y1), (x1, y2), (x2, y2)] 194 | for x, y in corners: 195 | x_end = x + self.corner_length if x == x1 else x - self.corner_length 196 | cv2.line(img, (x, y), (x_end, y), self.color, thickness=self.thickness) 197 | 198 | y_end = y + self.corner_length if y == y1 else y - self.corner_length 199 | cv2.line(img, (x, y), (x, y_end), self.color, thickness=self.thickness) 200 | 201 | return img -------------------------------------------------------------------------------- /visionface/modules/recognition.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional, Union 2 | 3 | import numpy as np 4 | from torch import embedding 5 | 6 | from visionface.commons.detection_utils import select_max_conf_faces 7 | from visionface.models.Detector import DetectedFace 8 | from visionface.modules.detection import FaceDetection 9 | from visionface.modules.embedding import FaceEmbedder 10 | from visionface.commons.image_utils import load_images, validate_images 11 | 12 | from visionface.modules.modeling import build_model 13 | 14 | 15 | class FaceRecognition: 16 | """ 17 | FaceRecognition pipeline for face detection, embedding, storage, and search. 18 | """ 19 | def __init__( 20 | self, 21 | detector_backbone: str = "yolo-small", 22 | embedding_backbone: str = "FaceNet-VGG", 23 | db_backend: str = "qdrant", 24 | db_config: Optional[Dict] = None 25 | ) -> None: 26 | """ 27 | Initialize the face recognition system with specified components. 28 | 29 | Args: 30 | detector_backbone: 31 | Backbone name for the face detector (e.g., "yolo-small", "mtcnn"). 32 | embedding_backbone: 33 | Backbone name for the face embedder (e.g., "FaceNet-VGG", "ArcFace"). 34 | db_backend: 35 | Database backend name. Supported values include: 36 | - 'qdrant' 37 | - 'milvus' 38 | - 'file' 39 | db_config: 40 | Optional dictionary for configuring the vector database connection. 41 | This is primarily used when `db_backend='qdrant'`. Supported keys include: 42 | 43 | - host (str): Hostname of the Qdrant server. Default is `"localhost"`. 44 | - port (int): Port number of the Qdrant server. Default is `6333`. 45 | - url (str, optional): Full URL (overrides host and port if provided). 46 | - api_key (str, optional): API key for secure Qdrant access. 47 | - https (bool): Whether to use HTTPS instead of HTTP. Default is `False`. 48 | - timeout (float): Timeout duration in seconds for requests. Default is `5.0`. 49 | """ 50 | self.face_detector = FaceDetection(detector_backbone=detector_backbone) 51 | self.face_embedder = FaceEmbedder(embedding_backbone=embedding_backbone) 52 | self.db = self._init_db_backend(db_backend, db_config or {}) 53 | 54 | def _init_db_backend(self, db_backend: str, db_config: Dict) -> Any: 55 | """ 56 | Initializes the vector database backend. 57 | 58 | Args: 59 | db_backend: The name of the backend (e.g., 'qdrant'). 60 | db_config: Configuration parameters for the backend. 61 | 62 | Returns: 63 | A vector database client instance. 64 | """ 65 | if db_backend == "qdrant": 66 | from visionface.db.qdrant_client import QdrantVectorDB 67 | return QdrantVectorDB(**db_config) 68 | elif db_backend == "milvus": 69 | pass 70 | elif db_backend == "file": 71 | pass 72 | else: 73 | raise ValueError(f"Unsupported DB backend: {db_backend}") 74 | 75 | def _compute_embeddings( 76 | self, 77 | images: Union[str, np.ndarray, List[np.ndarray], List[str]], 78 | normalize_embeddings: bool = True 79 | ) -> List[List[float]]: 80 | """ 81 | Detects and embeds the most confident face in each image. 82 | 83 | Args: 84 | images: Image(s) as file path(s) or NumPy array(s). 85 | normalize: Whether to normalize the embedding vectors. 86 | 87 | Returns: 88 | List of face embedding vectors. 89 | """ 90 | detections = self.face_detector.detect_faces(images, return_cropped_faces=True) 91 | top_faces = select_max_conf_faces(detections) 92 | embeddings = self.face_embedder.embed_faces(top_faces, normalize_embeddings=normalize_embeddings) 93 | return embeddings.to_list() 94 | 95 | def upsert_faces( 96 | self, 97 | images: Union[str, np.ndarray, List[np.ndarray], List[str]], 98 | labels: Union[str, List[str]], 99 | collection_name: str, 100 | batch_size: int = 10, 101 | normalize_embeddings: bool = True 102 | ) -> None: 103 | """ 104 | Detect, embed, and store faces in a collection with automatic face selection and upserting. 105 | 106 | Parameters: 107 | ---------- 108 | images (Union[str, np.ndarray, List[np.ndarray], List[str]]): 109 | Input image(s) containing faces to process and store. Can be: 110 | - str: Path to a single image file 111 | - np.ndarray: Single image as a numpy array (H, W, C format expected) 112 | - List[np.ndarray]: Multiple images as numpy arrays 113 | - List[str]: Multiple image file paths 114 | 115 | labels (Union[str, List[str]]): 116 | Label(s) to associate with the detected faces. 117 | 118 | collection_name (str): 119 | Name of the face collection where embeddings will be stored. If the collection 120 | doesn't exist, it will be created automatically. 121 | 122 | batch_size (int, optional): 123 | Number of images to process simultaneously in each batch. Larger batch sizes 124 | can improve processing speed but require more memory. Defaults to 10. 125 | 126 | normalize_embeddings (bool, optional): 127 | Whether to L2-normalize the computed face embeddings before storage. Defaults to True. 128 | 129 | Returns: 130 | ---------- 131 | None: This method doesn't return a value but modifies the collection state. 132 | """ 133 | vector_size = self.face_embedder.vector_size 134 | self.db.create_collection(collection_name, vector_size=vector_size) 135 | 136 | embeddings = self._compute_embeddings(images, normalize_embeddings) 137 | payloads = [{"face_name": label} for label in labels] 138 | 139 | self.db.insert_embeddings( 140 | collection_name=collection_name, 141 | embeddings=embeddings, 142 | payloads=payloads, 143 | batch_size=batch_size 144 | ) 145 | 146 | 147 | def search_faces( 148 | self, 149 | images: Union[str, np.ndarray, List[np.ndarray], List[str]], 150 | collection_name: str, 151 | score_threshold: Optional[float] = None, 152 | top_k: int = 5, 153 | ) -> List[Dict]: 154 | """ 155 | Search for similar faces in a specified collection using facial recognition embeddings. 156 | 157 | Parameters 158 | ---------- 159 | images : Union[str, np.ndarray, List[str], List[np.ndarray]] 160 | A single image or a list of images. Each image can be either a file path (str) 161 | or an image array. 162 | collection_name (str): 163 | Name of the face collection to search within. The collection must exist 164 | and contain pre-indexed face embeddings. 165 | 166 | score_threshold (Optional[float], optional): 167 | Minimum similarity score threshold for returned matches. Only faces with 168 | similarity scores above this threshold will be included in results. 169 | If None, no filtering is applied. Range typically [0.0, 1.0] where 170 | higher values indicate greater similarity. Defaults to None. 171 | 172 | top_k (int, optional): 173 | Maximum number of most similar faces to return per input image. 174 | Results are ordered by similarity score in descending order. 175 | Defaults to 5. 176 | 177 | Returns: 178 | ---------- 179 | List[Dict]: 180 | List of search results, one dictionary per input image. Each dictionary 181 | contains the top-k most similar faces found in the collection. 182 | """ 183 | embeddings = self._compute_embeddings(images) 184 | return self.db.search_embeddings( 185 | collection_name=collection_name, 186 | query_vectors=embeddings, 187 | score_threshold=score_threshold, 188 | top_k=top_k 189 | ) 190 | 191 | 192 | 193 | 194 | -------------------------------------------------------------------------------- /visionface/models/landmark_detection/utils.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | def medipipe_mesh_landmark_names(): 5 | FACE_MESH_LANDMARK_NAMES = { 6 | # Face contour (jawline and outer face boundary) 7 | 0: "face_contour_0", 1: "nose_tip", 2: "nose_bridge_2", 3: "nose_bridge_3", 4: "nose_bridge_4", 8 | 5: "nose_bridge_5", 6: "nose_bridge_6", 7: "left_eye_inner_7", 8: "nose_bridge_8", 9: "forehead_center_9", 9 | 10: "forehead_center_10", 11: "face_contour_11", 12: "face_contour_12", 13: "upper_lip_center_13", 10 | 14: "lower_lip_center_14", 15: "face_contour_15", 16: "face_contour_16", 17: "upper_lip_17", 11 | 18: "chin_18", 19: "face_contour_19", 20: "face_contour_20", 12 | 13 | # Left eye region (from viewer's perspective) 14 | 33: "left_eye_outer_33", 7: "left_eye_inner_7", 163: "left_eye_lower_163", 144: "left_eye_lower_144", 15 | 145: "left_eye_lower_145", 153: "left_eye_lower_153", 154: "left_eye_lower_154", 155: "left_eye_lower_155", 16 | 133: "left_eye_outer_133", 173: "left_eye_upper_173", 157: "left_eye_upper_157", 158: "left_eye_upper_158", 17 | 159: "left_eye_upper_159", 160: "left_eye_upper_160", 161: "left_eye_upper_161", 246: "left_eye_lower_246", 18 | 19 | # Right eye region 20 | 362: "right_eye_inner_362", 382: "right_eye_upper_382", 381: "right_eye_upper_381", 380: "right_eye_upper_380", 21 | 374: "right_eye_upper_374", 373: "right_eye_upper_373", 390: "right_eye_upper_390", 249: "right_eye_outer_249", 22 | 263: "right_eye_outer_263", 466: "right_eye_lower_466", 388: "right_eye_lower_388", 387: "right_eye_lower_387", 23 | 386: "right_eye_lower_386", 385: "right_eye_lower_385", 384: "right_eye_lower_384", 398: "right_eye_upper_398", 24 | 25 | # Left eyebrow 26 | 46: "left_eyebrow_inner_46", 53: "left_eyebrow_53", 52: "left_eyebrow_52", 51: "left_eyebrow_51", 27 | 48: "left_eyebrow_48", 115: "left_eyebrow_115", 131: "left_eyebrow_outer_131", 134: "left_eyebrow_134", 28 | 102: "left_eyebrow_102", 49: "left_eyebrow_49", 220: "left_eyebrow_220", 305: "left_eyebrow_305", 29 | 30 | # Right eyebrow 31 | 276: "right_eyebrow_inner_276", 283: "right_eyebrow_283", 282: "right_eyebrow_282", 295: "right_eyebrow_295", 32 | 285: "right_eyebrow_285", 336: "right_eyebrow_336", 296: "right_eyebrow_296", 334: "right_eyebrow_334", 33 | 293: "right_eyebrow_293", 300: "right_eyebrow_300", 441: "right_eyebrow_outer_441", 34 | 35 | # Nose detailed points 36 | 168: "nose_bridge_168", 195: "nostril_left_195", 197: "nostril_left_197", 196: "nostril_left_196", 37 | 3: "nose_bridge_3", 51: "nose_left_51", 48: "nose_left_48", 115: "nose_left_115", 131: "nose_left_131", 38 | 134: "nose_left_134", 102: "nose_left_102", 49: "nose_left_49", 220: "nose_left_220", 305: "nose_left_305", 39 | 278: "nose_right_278", 279: "nose_right_279", 420: "nostril_right_420", 456: "nostril_right_456", 40 | 248: "nose_right_248", 281: "nose_right_281", 275: "nose_right_275", 41 | 42 | # Lips outer boundary 43 | 61: "mouth_left_corner_61", 84: "upper_lip_left_84", 17: "upper_lip_17", 314: "upper_lip_right_314", 44 | 405: "mouth_right_corner_405", 320: "lower_lip_right_320", 307: "lower_lip_307", 375: "lower_lip_375", 45 | 321: "lower_lip_321", 308: "lower_lip_308", 324: "lower_lip_324", 318: "lower_lip_318", 46 | 47 | # Lips inner boundary 48 | 78: "inner_lip_upper_78", 95: "inner_lip_upper_95", 88: "inner_lip_upper_88", 178: "inner_lip_upper_178", 49 | 87: "inner_lip_upper_87", 14: "inner_lip_lower_14", 317: "inner_lip_lower_317", 402: "inner_lip_lower_402", 50 | 318: "inner_lip_lower_318", 324: "inner_lip_lower_324", 308: "inner_lip_lower_308", 415: "inner_lip_lower_415", 51 | 52 | # Additional mouth points 53 | 291: "mouth_right_corner_291", 303: "mouth_upper_303", 267: "mouth_lower_267", 269: "mouth_lower_269", 54 | 270: "mouth_lower_270", 267: "mouth_lower_267", 271: "mouth_lower_271", 272: "mouth_lower_272", 55 | 56 | # Chin and jaw 57 | 175: "chin_left_175", 199: "chin_bottom_199", 175: "chin_right_175", 18: "chin_center_18", 58 | 175: "jaw_left_175", 199: "jaw_bottom_199", 175: "jaw_right_175", 59 | 60 | # Cheek regions 61 | 116: "left_cheek_116", 117: "left_cheek_117", 118: "left_cheek_118", 119: "left_cheek_119", 62 | 120: "left_cheek_120", 121: "left_cheek_121", 126: "left_cheek_126", 142: "left_cheek_142", 63 | 36: "left_cheek_36", 205: "left_cheek_205", 206: "left_cheek_206", 207: "left_cheek_207", 64 | 213: "left_cheek_213", 192: "left_cheek_192", 147: "left_cheek_147", 65 | 66 | 345: "right_cheek_345", 346: "right_cheek_346", 347: "right_cheek_347", 348: "right_cheek_348", 67 | 349: "right_cheek_349", 350: "right_cheek_350", 451: "right_cheek_451", 452: "right_cheek_452", 68 | 453: "right_cheek_453", 464: "right_cheek_464", 435: "right_cheek_435", 410: "right_cheek_410", 69 | 454: "right_cheek_454", 70 | 71 | # Forehead points 72 | 151: "forehead_151", 337: "forehead_337", 299: "forehead_299", 333: "forehead_333", 73 | 298: "forehead_298", 301: "forehead_301", 284: "forehead_284", 251: "forehead_251", 74 | 389: "forehead_389", 356: "forehead_356", 454: "forehead_454", 323: "forehead_323", 75 | 361: "forehead_361", 340: "forehead_340", 76 | 77 | # Temple regions 78 | 103: "left_temple_103", 67: "left_temple_67", 109: "left_temple_109", 338: "temple_338", 79 | 332: "right_temple_332", 297: "right_temple_297", 80 | } 81 | 82 | # Fill remaining indices with generic names 83 | for i in range(478): 84 | if i not in FACE_MESH_LANDMARK_NAMES: 85 | # Determine general region based on index ranges 86 | if i < 17: 87 | FACE_MESH_LANDMARK_NAMES[i] = f"face_contour_{i}" 88 | elif 17 <= i < 68: 89 | FACE_MESH_LANDMARK_NAMES[i] = f"right_eyebrow_region_{i}" 90 | elif 68 <= i < 103: 91 | FACE_MESH_LANDMARK_NAMES[i] = f"nose_bridge_region_{i}" 92 | elif 103 <= i < 134: 93 | FACE_MESH_LANDMARK_NAMES[i] = f"right_eye_region_{i}" 94 | elif 134 <= i < 155: 95 | FACE_MESH_LANDMARK_NAMES[i] = f"left_eye_region_{i}" 96 | elif 155 <= i < 180: 97 | FACE_MESH_LANDMARK_NAMES[i] = f"left_eyebrow_region_{i}" 98 | elif 180 <= i < 200: 99 | FACE_MESH_LANDMARK_NAMES[i] = f"nose_tip_region_{i}" 100 | elif 200 <= i < 220: 101 | FACE_MESH_LANDMARK_NAMES[i] = f"nostril_region_{i}" 102 | elif 220 <= i < 250: 103 | FACE_MESH_LANDMARK_NAMES[i] = f"cheek_region_{i}" 104 | elif 250 <= i < 300: 105 | FACE_MESH_LANDMARK_NAMES[i] = f"mouth_region_{i}" 106 | elif 300 <= i < 340: 107 | FACE_MESH_LANDMARK_NAMES[i] = f"chin_jaw_region_{i}" 108 | elif 340 <= i < 400: 109 | FACE_MESH_LANDMARK_NAMES[i] = f"right_face_region_{i}" 110 | else: 111 | FACE_MESH_LANDMARK_NAMES[i] = f"face_mesh_{i}" 112 | 113 | return FACE_MESH_LANDMARK_NAMES 114 | 115 | 116 | def dlib_landmarks_names(): 117 | DLIB_LANDMARK_NAMES = { 118 | 0: "jaw_0", 1: "jaw_1", 2: "jaw_2", 3: "jaw_3", 4: "jaw_4", 5: "jaw_5", 119 | 6: "jaw_6", 7: "jaw_7", 8: "jaw_8", 9: "jaw_9", 10: "jaw_10", 11: "jaw_11", 120 | 12: "jaw_12", 13: "jaw_13", 14: "jaw_14", 15: "jaw_15", 16: "jaw_16", 121 | 122 | 17: "right_eyebrow_17", 18: "right_eyebrow_18", 19: "right_eyebrow_19", 123 | 20: "right_eyebrow_20", 21: "right_eyebrow_21", 124 | 125 | 22: "left_eyebrow_22", 23: "left_eyebrow_23", 24: "left_eyebrow_24", 126 | 25: "left_eyebrow_25", 26: "left_eyebrow_26", 127 | 128 | 27: "nose_27", 28: "nose_28", 29: "nose_29", 30: "nose_30", 129 | 31: "nose_31", 32: "nose_32", 33: "nose_33", 34: "nose_34", 35: "nose_35", 130 | 131 | 36: "right_eye_36", 37: "right_eye_37", 38: "right_eye_38", 132 | 39: "right_eye_39", 40: "right_eye_40", 41: "right_eye_41", 133 | 134 | 42: "left_eye_42", 43: "left_eye_43", 44: "left_eye_44", 135 | 45: "left_eye_45", 46: "left_eye_46", 47: "left_eye_47", 136 | 137 | 48: "mouth_48", 49: "mouth_49", 50: "mouth_50", 51: "mouth_51", 138 | 52: "mouth_52", 53: "mouth_53", 54: "mouth_54", 55: "mouth_55", 139 | 56: "mouth_56", 57: "mouth_57", 58: "mouth_58", 59: "mouth_59", 140 | 60: "mouth_60", 61: "mouth_61", 62: "mouth_62", 63: "mouth_63", 141 | 64: "mouth_64", 65: "mouth_65", 66: "mouth_66", 67: "mouth_67" 142 | } 143 | 144 | return DLIB_LANDMARK_NAMES -------------------------------------------------------------------------------- /visionface/models/face_embedding/FaceNet.py: -------------------------------------------------------------------------------- 1 | # This module is adapted from: 2 | # https://github.com/timesler/facenet-pytorch 3 | # 4 | # Original Author: Tim Esler (timesler) 5 | # License: MIT License 6 | 7 | from typing import Optional 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | from torchvision.transforms import functional as FT 12 | 13 | # VisionFace modules 14 | from visionface.models.FaceEmbedding import FaceEmbedder 15 | from visionface.commons.download_files import download_model_weights 16 | 17 | 18 | FACENET_VGG_WEIGHTS = "https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180402-114759-vggface2.pt" 19 | FACENET_CASIA_WEIGHTS = "https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180408-102900-casia-webface.pt" 20 | 21 | 22 | class FaceNetVGG(FaceEmbedder): 23 | """ 24 | FaceNet-VGG model class 25 | """ 26 | def __init__(self): 27 | super().__init__() 28 | self.model = InceptionResnetV1(pretrained="vggface2") 29 | self.model_name = "FaceNet-VGG" 30 | self.input_shape = (160, 160) 31 | self.output_shape = 512 32 | 33 | class FaceNetCASIA(FaceEmbedder): 34 | """ 35 | FaceNet-CASIA model class 36 | """ 37 | def __init__(self): 38 | super().__init__() 39 | self.model = InceptionResnetV1(pretrained="casia-webface") 40 | self.model_name = "FaceNet-CASIA" 41 | self.input_shape = (160, 160) 42 | self.output_shape = 512 43 | 44 | 45 | class InceptionResnetV1(nn.Module): 46 | """ 47 | FaceNet(InceptionResnetV1) model class 48 | """ 49 | def __init__( 50 | self, 51 | pretrained: Optional[str] = None, 52 | normalize_embeddings: bool = True, 53 | dropout_prob: float = 0.6, 54 | ) -> None: 55 | """ 56 | InceptionResnetV1 model for face embedding extraction, based on FaceNet. 57 | Supports pretrained weights from VGGFace2 or CASIA-WebFace. 58 | 59 | Args: 60 | pretrained (str, optional): One of 'vggface2' or 'casia-webface'. 61 | normalize_embeddings (bool, optional): Whether to L2-normalize embeddings. Default is True. 62 | dropout_prob (float, optional): Dropout probability before the embedding layer. Default is 0.6. 63 | 64 | Raises: 65 | Exception: If `pretrained` is None or invalid. 66 | """ 67 | super().__init__() 68 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 69 | self.normalize_embeddings = normalize_embeddings 70 | if pretrained == 'vggface2': 71 | tmp_classes = 8631 72 | weight_url = FACENET_VGG_WEIGHTS 73 | elif pretrained == 'casia-webface': 74 | tmp_classes = 10575 75 | weight_url = FACENET_CASIA_WEIGHTS 76 | elif pretrained is None: 77 | raise Exception('"pretrained" must be specified') 78 | # Define layers 79 | self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2) 80 | self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1) 81 | self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1) 82 | self.maxpool_3a = nn.MaxPool2d(3, stride=2) 83 | self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1) 84 | self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1) 85 | self.conv2d_4b = BasicConv2d(192, 256, kernel_size=3, stride=2) 86 | self.repeat_1 = nn.Sequential( 87 | Block35(scale=0.17), 88 | Block35(scale=0.17), 89 | Block35(scale=0.17), 90 | Block35(scale=0.17), 91 | Block35(scale=0.17), 92 | ) 93 | self.mixed_6a = Mixed_6a() 94 | self.repeat_2 = nn.Sequential( 95 | Block17(scale=0.10), 96 | Block17(scale=0.10), 97 | Block17(scale=0.10), 98 | Block17(scale=0.10), 99 | Block17(scale=0.10), 100 | Block17(scale=0.10), 101 | Block17(scale=0.10), 102 | Block17(scale=0.10), 103 | Block17(scale=0.10), 104 | Block17(scale=0.10), 105 | ) 106 | self.mixed_7a = Mixed_7a() 107 | self.repeat_3 = nn.Sequential( 108 | Block8(scale=0.20), 109 | Block8(scale=0.20), 110 | Block8(scale=0.20), 111 | Block8(scale=0.20), 112 | Block8(scale=0.20), 113 | ) 114 | self.block8 = Block8(noReLU=True) 115 | self.avgpool_1a = nn.AdaptiveAvgPool2d(1) 116 | self.dropout = nn.Dropout(dropout_prob) 117 | self.last_linear = nn.Linear(1792, 512, bias=False) 118 | self.last_bn = nn.BatchNorm1d(512, eps=0.001, momentum=0.1, affine=True) 119 | 120 | if pretrained is not None: 121 | self.logits = nn.Linear(512, tmp_classes) 122 | model_path = download_model_weights(filename=f"facenet_{pretrained}.pt", 123 | download_url=weight_url) 124 | state_dict = torch.load(model_path, weights_only=False) 125 | self.load_state_dict(state_dict) 126 | 127 | self.to(self.device) 128 | self.eval() 129 | 130 | def forward(self, inputs: torch.Tensor , normalize_embeddings: bool = True) -> torch.Tensor: 131 | """ 132 | Computes embeddings from input image tensors. 133 | 134 | Args: 135 | inputs (torch.Tensor): A batch of images as a 4D tensor of shape (B, C, H, W) 136 | normalize_embeddings (bool, optional): If True, L2-normalizes the output embeddings. 137 | Defaults to True. 138 | 139 | Returns: 140 | torch.Tensor: A tensor of shape (B, D) containing the image embeddings, 141 | where D is the embedding dimension. 142 | """ 143 | x = self.conv2d_1a(inputs) 144 | x = self.conv2d_2a(x) 145 | x = self.conv2d_2b(x) 146 | x = self.maxpool_3a(x) 147 | x = self.conv2d_3b(x) 148 | x = self.conv2d_4a(x) 149 | x = self.conv2d_4b(x) 150 | x = self.repeat_1(x) 151 | x = self.mixed_6a(x) 152 | x = self.repeat_2(x) 153 | x = self.mixed_7a(x) 154 | x = self.repeat_3(x) 155 | x = self.block8(x) 156 | x = self.avgpool_1a(x) 157 | x = self.dropout(x) 158 | x = self.last_linear(x.view(x.shape[0], -1)) 159 | x = self.last_bn(x) 160 | if normalize_embeddings: 161 | # Normalize embeddings (L2) 162 | x = F.normalize(x, p=2, dim=1) 163 | return x 164 | 165 | class BasicConv2d(nn.Module): 166 | 167 | def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0): 168 | super().__init__() 169 | self.conv = nn.Conv2d( 170 | in_planes, out_planes, 171 | kernel_size=kernel_size, stride=stride, 172 | padding=padding, bias=False 173 | ) # verify bias false 174 | self.bn = nn.BatchNorm2d( 175 | out_planes, 176 | eps=0.001, # value found in tensorflow 177 | momentum=0.1, # default pytorch value 178 | affine=True 179 | ) 180 | self.relu = nn.ReLU(inplace=False) 181 | 182 | def forward(self, x): 183 | x = self.conv(x) 184 | x = self.bn(x) 185 | x = self.relu(x) 186 | return x 187 | 188 | 189 | class Block35(nn.Module): 190 | 191 | def __init__(self, scale=1.0): 192 | super().__init__() 193 | 194 | self.scale = scale 195 | 196 | self.branch0 = BasicConv2d(256, 32, kernel_size=1, stride=1) 197 | 198 | self.branch1 = nn.Sequential( 199 | BasicConv2d(256, 32, kernel_size=1, stride=1), 200 | BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1) 201 | ) 202 | 203 | self.branch2 = nn.Sequential( 204 | BasicConv2d(256, 32, kernel_size=1, stride=1), 205 | BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1), 206 | BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1) 207 | ) 208 | 209 | self.conv2d = nn.Conv2d(96, 256, kernel_size=1, stride=1) 210 | self.relu = nn.ReLU(inplace=False) 211 | 212 | def forward(self, x): 213 | x0 = self.branch0(x) 214 | x1 = self.branch1(x) 215 | x2 = self.branch2(x) 216 | out = torch.cat((x0, x1, x2), 1) 217 | out = self.conv2d(out) 218 | out = out * self.scale + x 219 | out = self.relu(out) 220 | return out 221 | 222 | 223 | class Block17(nn.Module): 224 | 225 | def __init__(self, scale=1.0): 226 | super().__init__() 227 | 228 | self.scale = scale 229 | 230 | self.branch0 = BasicConv2d(896, 128, kernel_size=1, stride=1) 231 | 232 | self.branch1 = nn.Sequential( 233 | BasicConv2d(896, 128, kernel_size=1, stride=1), 234 | BasicConv2d(128, 128, kernel_size=(1,7), stride=1, padding=(0,3)), 235 | BasicConv2d(128, 128, kernel_size=(7,1), stride=1, padding=(3,0)) 236 | ) 237 | 238 | self.conv2d = nn.Conv2d(256, 896, kernel_size=1, stride=1) 239 | self.relu = nn.ReLU(inplace=False) 240 | 241 | def forward(self, x): 242 | x0 = self.branch0(x) 243 | x1 = self.branch1(x) 244 | out = torch.cat((x0, x1), 1) 245 | out = self.conv2d(out) 246 | out = out * self.scale + x 247 | out = self.relu(out) 248 | return out 249 | 250 | 251 | class Block8(nn.Module): 252 | 253 | def __init__(self, scale=1.0, noReLU=False): 254 | super().__init__() 255 | 256 | self.scale = scale 257 | self.noReLU = noReLU 258 | 259 | self.branch0 = BasicConv2d(1792, 192, kernel_size=1, stride=1) 260 | 261 | self.branch1 = nn.Sequential( 262 | BasicConv2d(1792, 192, kernel_size=1, stride=1), 263 | BasicConv2d(192, 192, kernel_size=(1,3), stride=1, padding=(0,1)), 264 | BasicConv2d(192, 192, kernel_size=(3,1), stride=1, padding=(1,0)) 265 | ) 266 | 267 | self.conv2d = nn.Conv2d(384, 1792, kernel_size=1, stride=1) 268 | if not self.noReLU: 269 | self.relu = nn.ReLU(inplace=False) 270 | 271 | def forward(self, x): 272 | x0 = self.branch0(x) 273 | x1 = self.branch1(x) 274 | out = torch.cat((x0, x1), 1) 275 | out = self.conv2d(out) 276 | out = out * self.scale + x 277 | if not self.noReLU: 278 | out = self.relu(out) 279 | return out 280 | 281 | 282 | class Mixed_6a(nn.Module): 283 | 284 | def __init__(self): 285 | super().__init__() 286 | 287 | self.branch0 = BasicConv2d(256, 384, kernel_size=3, stride=2) 288 | 289 | self.branch1 = nn.Sequential( 290 | BasicConv2d(256, 192, kernel_size=1, stride=1), 291 | BasicConv2d(192, 192, kernel_size=3, stride=1, padding=1), 292 | BasicConv2d(192, 256, kernel_size=3, stride=2) 293 | ) 294 | 295 | self.branch2 = nn.MaxPool2d(3, stride=2) 296 | 297 | def forward(self, x): 298 | x0 = self.branch0(x) 299 | x1 = self.branch1(x) 300 | x2 = self.branch2(x) 301 | out = torch.cat((x0, x1, x2), 1) 302 | return out 303 | 304 | 305 | class Mixed_7a(nn.Module): 306 | 307 | def __init__(self): 308 | super().__init__() 309 | 310 | self.branch0 = nn.Sequential( 311 | BasicConv2d(896, 256, kernel_size=1, stride=1), 312 | BasicConv2d(256, 384, kernel_size=3, stride=2) 313 | ) 314 | 315 | self.branch1 = nn.Sequential( 316 | BasicConv2d(896, 256, kernel_size=1, stride=1), 317 | BasicConv2d(256, 256, kernel_size=3, stride=2) 318 | ) 319 | 320 | self.branch2 = nn.Sequential( 321 | BasicConv2d(896, 256, kernel_size=1, stride=1), 322 | BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1), 323 | BasicConv2d(256, 256, kernel_size=3, stride=2) 324 | ) 325 | 326 | self.branch3 = nn.MaxPool2d(3, stride=2) 327 | 328 | def forward(self, x): 329 | x0 = self.branch0(x) 330 | x1 = self.branch1(x) 331 | x2 = self.branch2(x) 332 | x3 = self.branch3(x) 333 | out = torch.cat((x0, x1, x2, x3), 1) 334 | return out 335 | 336 | 337 | 338 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VisionFace 2 | 3 |
4 | 5 | ![VisionFace](https://github.com/user-attachments/assets/52ac9123-304c-4098-a1e5-f413d03bfec9) 6 | 7 | [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/) 8 | [![PyPI version](https://badge.fury.io/py/visionface.svg)](https://badge.fury.io/py/visionface) 9 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 10 | 11 | 12 | **Modern face detection, recognition & analysis in 3 lines of code** 13 | 14 | VisionFace is a state-of-the-art, open-source framework for comprehensive face analysis, built with PyTorch. It provides a unified interface for face detection, recognition, landmark detection, and visualization with support for multiple cutting-edge models. 15 | 16 | [Quick Start](#-quick-start) • [Examples](#-examples) • [Models](#-models) • [API Docs](https://visionface.readthedocs.io) 17 | 18 |
19 | 20 | ## ✨ What VisionFace Does 21 | 22 |
23 | 24 | 25 | 33 | 41 | 49 | 50 | 51 | 59 | 67 | 75 | 76 |
26 | 27 |
28 | Face Detection 29 |
Face Detection
30 |
31 |
32 |
34 | 35 |
36 | Face Recognition 37 |
Face Recognition
38 |
39 |
40 |
42 | 43 |
44 | Face Landmarks 45 |
Face Landmarks
46 |
47 |
48 |
52 | 53 |
54 | Face Analysis 55 |
Face Analysis
56 |
57 |
58 |
60 | 61 |
62 | Face Verification 63 |
Face Verification
64 |
65 |
66 |
68 | 69 |
70 | Face Visualization 71 |
Face Visualization
72 |
73 |
74 |
77 |
78 | 79 | 80 | - **Detect faces** in images with 12+ models (YOLO, MediaPipe, MTCNN...) 81 | - **Recognize faces** with vector search and embedding models 82 | - **Extract landmarks** (68-point, 468-point face mesh) 83 | - **Batch process** thousands of images efficiently 84 | - **Production-ready** with Docker support and REST API 85 | 86 | ## 🚀 Quick Start 87 | 88 | ```bash 89 | pip install visionface 90 | ``` 91 | 92 | ### Face Detection 93 | The `Face Detection` module is your gateway to identifying faces in any image. Built for both beginners and experts, it provides a unified interface to 12+ cutting-edge detection models. 94 | 95 | ✨ **Key Features:** 96 | * **Multiple Input Sources**: Image Files, URLs, PIL images, NumPy arrays 97 | * **Flexible Processing**: Single image or batch processing thousands of images efficiently 98 | * **12+ State-of-the-Art Models**: From ultra-fast mobile models to high-precision detectors 99 | * **One-Line Detection**: Get results with just ```detector.detect_faces(image)``` 100 | * **Rich Outputs**: Bounding boxes, confidence scores, cropped faces ready to use 101 | 102 | ![face_detection_2](https://github.com/user-attachments/assets/6cb7e953-3448-486e-b6b4-32c654da1fce) 103 | 104 | 📝 **Quick Example:** 105 | 106 | ```python 107 | import cv2 108 | from visionface import FaceDetection, FaceAnnotators 109 | 110 | # 1. Initialize detector 111 | detector = FaceDetection(detector_backbone="yolo-small") 112 | 113 | # 2. Detect faces 114 | image = cv2.imread("your_image.jpg") 115 | faces = detector.detect_faces(image) 116 | 117 | # 3. Visualize results 118 | result = FaceAnnotators.box_annotator(image, faces[0]) 119 | cv2.imwrite("detected.jpg", result) 120 | ``` 121 | 122 | ### Face Recognition 123 | The `Face Recognition` module identifies individuals by generating embeddings and comparing them in a vector database. The process includes three stages: detecting faces, creating embeddings with the chosen model, and searching the database to find the closest matches. 124 | 125 | ✨ **Key Features**: 126 | 127 | * **Multi-model support**: Choose from high-accuracy embedding backbones such as FaceNet-VGG, FaceNet-CASIA, and Dlib. 128 | * **Vector DB Integration**: Store and query embeddings using Qdrant, Milvus, or local file-based storage. 129 | * **Scalable Search**: Efficiently match thousands or millions of faces with fast search. 130 | * **Flexible Enrollment**: Add faces one-by-one or in batches with associated labels. 131 | * **Threshold & Ranking**: Control similarity thresholds and retrieve top-k matches for robust recognition results. 132 | 133 | ![face)recognition](https://github.com/user-attachments/assets/55f83bc1-93ec-479d-a86b-820c7cef0605) 134 | 135 | ```python 136 | from visionface import FaceRecognition 137 | 138 | # 1. Setup recognition system 139 | fr = FaceRecognition(detector_backbone="yolo-small", 140 | embedding_backbone="FaceNet-VGG", 141 | db_backend="qdrant") 142 | 143 | # 2. Add known faces 144 | fr.upsert_faces( 145 | images=["john.jpg", "jane.jpg", "bob.jpg"], 146 | labels=["John", "Jane", "Bob"], 147 | collection_name="employees" 148 | ) 149 | 150 | # 3. Search for matches 151 | matches = fr.search_faces("query_face_image.jpg", 152 | collection_name="employees", 153 | score_threshold=0.7, 154 | top_k=3) 155 | 156 | for match in matches: 157 | print(f"Found: {match['face_name']} (confidence: {match['score']:.2f})") 158 | ``` 159 | 160 | ### Face Embeddings 161 | The `Face Embeddings` module transforms each detected face into a high-dimensional numeric vector (embedding) that captures its unique features. 162 | These embeddings can be used for: 163 | 164 | * **Face verification**: Check if two faces belong to the same perso 165 | * **Recognition**: Match against a database of known faces 166 | * **Clustering**: Group similar faces automatically 167 | * **Advanced analytics**: 168 | 169 | **✨ Supported Embedding Models:** 170 | `FaceNet-VGG`, `FaceNet-CASIA`, `Dlib` 171 | 172 | 📝 **Quick Example:** 173 | 174 | ```python 175 | from visionface import FaceEmbedder 176 | 177 | # 1. Initialize embedder 178 | embedder = FaceEmbedder(embedding_backbone="FaceNet-VGG") 179 | 180 | # 2. Generate embeddings for face images 181 | embeddings = embedder.embed_faces( 182 | face_imgs=["face1.jpg", "face2.jpg"], 183 | normalize_embeddings=True # L2 normalization 184 | ) 185 | 186 | # 3. Use embeddings 187 | for i, embedding in enumerate(embeddings): 188 | print(f"Face {i+1} embedding shape: {embedding.shape}") # (512,) 189 | # Use for: face verification, clustering, custom databases 190 | ``` 191 | 192 | ### Face Landmarks 193 | The `Landmarks` module identifies key facial features with pixel-perfect accuracy. From eye positions to lip contours, get detailed facial geometry for advanced applications. 194 | 195 | ✨ **Key Features:** 196 | 197 | * **Multiple Input Sources**: Image Files, URLs, PIL images, NumPy arrays 198 | * **Flexible Processing**: Single image or batch processing thousands of images efficiently 199 | * **2D & 3D Support**: Standard 2D points or full 3D face mesh 200 | * **Rich Annotations**: Built-in visualization with customizable styling 201 | * **Multiple Backends**: MediaPipe (468 points) or Dlib (68 points) 202 | 203 | ![face_landmarks](https://github.com/user-attachments/assets/9b8264d1-2ea7-442c-ab08-7d11d35f1824) 204 | 205 | 📝 **Quick Example:** 206 | 207 | ```python 208 | from visionface import LandmarkDetection 209 | from visionface.annotators.landmark import MediaPipeFaceMeshAnnotator 210 | 211 | landmark_detector = LandmarkDetection(detector_backbone="mediapipe") 212 | image = cv2.imread("your_image.jpg") 213 | 214 | # Get 468 facial landmarks 215 | landmarks = landmark_detector.detect_3d_landmarks(image) 216 | 217 | # Visualize with connections 218 | vizualizer = MediaPipeFaceMeshAnnotator(thickness=2, circle_radius=3) 219 | result = vizualizer.annotate( 220 | image, landmarks[0], connections=True 221 | ) 222 | cv2.imwrite("detected_landmarks.jpg", result) 223 | ``` 224 | 225 | ## 💡 Examples 226 | 227 |
228 | 🎯 Real-time Face Detection 229 | 230 | ```python 231 | import cv2 232 | from visionface import FaceDetection, FaceAnnotators 233 | 234 | detector = FaceDetection(detector_backbone="yolo-nano") # Fastest model 235 | cap = cv2.VideoCapture(0) 236 | 237 | while True: 238 | ret, frame = cap.read() 239 | faces = detector.detect_faces(frame) 240 | annotated = FaceAnnotators.box_annotator(frame, faces) 241 | 242 | cv2.imshow('Face Detection', annotated) 243 | if cv2.waitKey(1) & 0xFF == ord('q'): 244 | break 245 | 246 | cap.release() 247 | cv2.destroyAllWindows() 248 | ``` 249 |
250 | 251 |
252 | 📊 Batch Processing 253 | 254 | ```python 255 | from visionface import FaceDetection 256 | import glob 257 | 258 | detector = FaceDetection(detector_backbone="yolo-medium") 259 | 260 | # Process entire folder 261 | image_paths = glob.glob("photos/*.jpg") 262 | images = [cv2.imread(path) for path in image_paths] 263 | 264 | # Detect all faces at once 265 | all_detections = detector.detect_faces(images) 266 | 267 | # Save cropped faces 268 | for i, detections in enumerate(all_detections): 269 | for j, face in enumerate(detections): 270 | if face.cropped_face is not None: 271 | cv2.imwrite(f"faces/image_{i}_face_{j}.jpg", face.cropped_face) 272 | ``` 273 |
274 | 275 |
276 | 🏢 Employee Recognition System 277 | 278 | ```python 279 | from visionface import FaceRecognition 280 | import os 281 | 282 | # Initialize system 283 | fr = FaceRecognition(db_backend="qdrant") 284 | 285 | # Auto-enroll from employee photos folder 286 | def enroll_employees(folder_path): 287 | for filename in os.listdir(folder_path): 288 | if filename.endswith(('.jpg', '.png')): 289 | name = filename.split('.')[0] # Use filename as name 290 | image_path = os.path.join(folder_path, filename) 291 | 292 | fr.upsert_faces( 293 | images=[image_path], 294 | labels=[name], 295 | collection_name="company_employees" 296 | ) 297 | print(f"Enrolled: {name}") 298 | 299 | # Enroll all employees 300 | enroll_employees("employee_photos/") 301 | 302 | # Check security camera feed 303 | def identify_person(camera_image): 304 | results = fr.search_faces( 305 | camera_image, 306 | collection_name="company_employees", 307 | score_threshold=0.8, 308 | top_k=1 309 | ) 310 | 311 | if results[0]: # If match found 312 | return results[0][0]['face_name'] 313 | return "Unknown person" 314 | ``` 315 |
316 | 317 | ## 🎯 Models 318 | 319 | **Choose the right model for your use case:** 320 | 321 | | Use Case | Speed | Accuracy | Recommended Model | 322 | |----------|-------|----------|------------------| 323 | | 🚀 **Real-time apps** | ⚡⚡⚡ | ⭐⭐ | `yolo-nano`, `mediapipe` | 324 | | 🎯 **General purpose** | ⚡⚡ | ⭐⭐⭐ | `yolo-small` (default) | 325 | | 🔍 **High accuracy** | ⚡ | ⭐⭐⭐⭐ | `yolo-large`, `mtcnn` | 326 | | 📱 **Mobile/Edge** | ⚡⚡⚡ | ⭐⭐ | `mediapipe`, `yolo-nano` | 327 | | 🎭 **Landmarks needed** | ⚡⚡ | ⭐⭐⭐ | `mediapipe`, `dlib` | 328 | 329 |
330 | 📋 Complete Model List 331 | 332 | **Detection Models:** 333 | - `yolo-nano`, `yolo-small`, `yolo-medium`, `yolo-large` 334 | - `yoloe-small`, `yoloe-medium`, `yoloe-large` (prompt-based) 335 | - `yolow-small`, `yolow-medium`, `yolow-large`, `yolow-xlarge` (open-vocabulary) 336 | - `mediapipe`, `mtcnn`, `opencv` 337 | 338 | **Embedding Models:** 339 | - `FaceNet-VGG` (512D) - Balanced accuracy/speed 340 | - `FaceNet-CASIA` (512D) - High precision 341 | - `Dlib` (128D) - Lightweight 342 | 343 | **Landmark Models:** 344 | - `mediapipe` - 468 points + 3D mesh 345 | - `dlib` - 68 points, robust 346 |
347 | 348 | 349 | ## 📚 Documentation 350 | 351 | - 📖 [Full Documentation](https://visionface.readthedocs.io) 352 | - 🎓 [Tutorials & Guides](https://visionface.readthedocs.io/tutorials) 353 | - 🔌 [REST API Reference](https://visionface.readthedocs.io/api) 354 | - 💡 [Use Case Examples](https://github.com/username/visionface/tree/main/examples) 355 | 356 | ## 🤝 Contributing 357 | We welcome contributions! See our [Contributing Guide](CONTRIBUTING.md). 358 | 359 | 360 | 361 | 362 |
363 | 364 | 365 | **Quick ways to help:** 366 | - ⭐ Star the repo 367 | - 🐛 Report bugs 368 | - 💡 Request features 369 | - 📝 Improve docs 370 | - 🔧 Submit PRs 371 | 372 | ## 📄 License 373 | 374 | MIT License - see [LICENSE](LICENSE) file. 375 | 376 | ## 🙏 Citation 377 | 378 | ```bibtex 379 | @software{VisionFace2025, 380 | title = {VisionFace: Modern Face Detection & Recognition Framework}, 381 | author = {VisionFace Team}, 382 | year = {2025}, 383 | url = {https://github.com/miladfa7/visionface} 384 | } 385 | ``` 386 | 387 | --- 388 | 389 |
390 | 391 | **[⬆ Back to Top](#visionface)** • **Made with ❤️ by the VisionFace team** 392 | 393 |
394 | -------------------------------------------------------------------------------- /visionface/models/face_detection/MTCNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import numpy as np 4 | import logging 5 | from typing import List, Any, Union 6 | from enum import Enum 7 | from torchvision.ops.boxes import batched_nms 8 | 9 | # VisionFace modules 10 | from visionface.models.Detector import Detector, DetectedFace 11 | from visionface.commons.download_files import download_model_weights 12 | from visionface.commons.image_utils import image_resample, get_cropped_face 13 | from visionface.commons.utils import batched_nms_numpy 14 | from visionface.commons.detection_utils import ( 15 | convert_to_square_bbox, 16 | box_padding, 17 | apply_bbox_regression 18 | ) 19 | 20 | logging.basicConfig(level=logging.INFO) 21 | 22 | 23 | class MTCCNModel(Enum): 24 | ONET = 0 25 | PNET = 1 26 | RNET = 2 27 | 28 | WEIGHT_NAMES = [ 29 | "mtccn-onet-face.pt", 30 | "mtccn-pnet-face.pt", 31 | "mtccn-rnet-face.pt", 32 | ] 33 | 34 | WEIGHT_URLS = [ 35 | "https://raw.githubusercontent.com/timesler/facenet-pytorch/master/data/onet.pt", 36 | "https://raw.githubusercontent.com/timesler/facenet-pytorch/master/data/pnet.pt", 37 | "https://raw.githubusercontent.com/timesler/facenet-pytorch/master/data/rnet.pt", 38 | ] 39 | 40 | 41 | class MTCNNDetector(Detector): 42 | """MTCNN face detection module. 43 | 44 | This class loads pretrained P-, R-, and O-nets and returns bounding boxes for detected faces 45 | 46 | Keyword Arguments: 47 | min_face_size {int} -- Minimum face size to search for. (default: {20}) 48 | thresholds {list} -- MTCNN face detection thresholds (default: {[0.6, 0.7, 0.7]}) 49 | factor {float} -- Factor used to create a scaling pyramid of face sizes. (default: {0.709}) 50 | post_process {bool} -- Whether or not to post process images tensors before returning. 51 | (default: {True}) 52 | select_largest {bool} -- If True, if multiple faces are detected, the largest is returned. 53 | If False, the face with the highest detection probability is returned. 54 | (default: {True}) 55 | selection_method {string} -- Which heuristic to use for selection. Default None. If 56 | specified, will override select_largest: 57 | "probability": highest probability selected 58 | "largest": largest box selected 59 | "largest_over_threshold": largest box over a certain probability selected 60 | "center_weighted_size": box size minus weighted squared offset from image center 61 | (default: {None}) 62 | keep_all {bool} -- If True, all detected faces are returned, in the order dictated by the 63 | select_largest parameter. (default: {False}) 64 | device {torch.device} -- The device on which to run neural net passes. (default: {None}) 65 | """ 66 | 67 | def __init__( 68 | self, 69 | min_face_size=20, 70 | thresholds=[0.6, 0.7, 0.7], 71 | factor=0.709, 72 | post_process=True, 73 | select_largest=True, 74 | selection_method=None, 75 | keep_all=True, 76 | device=None 77 | ): 78 | super().__init__() 79 | 80 | # MTCNN specific parameters 81 | self.device = device if device else torch.device('cuda' if torch.cuda.is_available() else 'cpu') 82 | self.min_face_size = min_face_size 83 | self.thresholds = thresholds 84 | self.factor = factor 85 | self.post_process = post_process 86 | self.select_largest = select_largest 87 | self.keep_all = keep_all 88 | self.selection_method = selection_method 89 | 90 | # Initialize model components 91 | self.pnet = None 92 | self.rnet = None 93 | self.onet = None 94 | 95 | # Build the models 96 | self.model = self.build_model() 97 | 98 | if not self.selection_method: 99 | self.selection_method = 'largest' if self.select_largest else 'probability' 100 | 101 | def build_model(self) -> Any: 102 | """ 103 | Build and return the MTCNN face detection model. 104 | This method loads the P-Net, R-Net, and O-Net components. 105 | 106 | Returns: 107 | dict: Dictionary containing the mtcnn network components 108 | """ 109 | self.pnet = PNet() 110 | self.rnet = RNet() 111 | self.onet = ONet() 112 | 113 | self.pnet.to(self.device) 114 | self.rnet.to(self.device) 115 | self.onet.to(self.device) 116 | 117 | return { 118 | 'pnet': self.pnet, 119 | 'rnet': self.rnet, 120 | 'onet': self.onet 121 | } 122 | 123 | def detect_faces( 124 | self, 125 | imgs: List[np.ndarray], 126 | return_cropped_faces: bool = True 127 | ) -> List[List[DetectedFace]]: 128 | """ 129 | Detect faces in one or more input images using the MTCNN model. 130 | 131 | Parameters: 132 | imgs (List[np.ndarray]): 133 | A single image or a list of images in BGR format. 134 | 135 | Args: 136 | imgs (Union[np.ndarray, List[np.ndarray]]): 137 | - A single image as a NumPy array with shape (H, W, 3), or 138 | - A list of such images. 139 | return_cropped_faces : bool, optional 140 | Whether to include cropped face images in each DetectedFace object. Default is True. 141 | 142 | Returns: 143 | List[List[DetectedFace]]: 144 | A list where each element is a list of DetectedFace objects corresponding to one input image. 145 | Each DetectedFace object contains: 146 | - Bounding box coordinates (xmin, ymin, xmax, ymax) 147 | - Confidence score (conf) 148 | - Class name ("face") 149 | - The cropped face region (cropped_face), extracted from the original image. 150 | """ 151 | processed_imgs = self._preprocess_images(imgs) 152 | batch_boxes = self._run_mtcnn_pipeline(processed_imgs) 153 | return self.process_faces(imgs, batch_boxes, return_cropped_faces) 154 | 155 | def process_faces( 156 | self, 157 | imgs: List[np.ndarray], 158 | results: np.ndarray, 159 | return_cropped_faces: bool 160 | ) -> List[List[DetectedFace]]: 161 | """ 162 | Process MTCNN detection results and convert them into DetectedFace objects. 163 | 164 | Args: 165 | imgs (List[np.ndarray]): 166 | A list of input images (as NumPy arrays). 167 | 168 | results (np.ndarray): 169 | A NumPy array of shape (batch_size, num_faces, 5), where each detected face is represented by 170 | [x1, y1, x2, y2, confidence_score]. Each sub-array corresponds to detections for a single image. 171 | 172 | return_cropped_faces : bool, 173 | Whether to include cropped face images in each DetectedFace object. Default is True. 174 | 175 | Returns: 176 | List[List[DetectedFace]]: 177 | A list where each element is a list of DetectedFace objects corresponding to one input image. 178 | Each DetectedFace object contains: 179 | - Bounding box coordinates (xmin, ymin, xmax, ymax) 180 | - Confidence score (conf) 181 | - Class name ("face") 182 | - The cropped face region (cropped_face), extracted from the original image. 183 | """ 184 | 185 | detections = [] 186 | 187 | for idx, bboxes in enumerate(results): 188 | img = imgs[idx] 189 | current_detections = [] 190 | face_no = 0 191 | for bbox in bboxes: 192 | cropped_face = get_cropped_face(img, bbox[:-1]) if return_cropped_faces else None 193 | class_name = "face" if bbox[2] != 0 and bbox[3] != 0 else None 194 | facial_info = DetectedFace( 195 | xmin=bbox[0], 196 | ymin=bbox[1], 197 | xmax=bbox[2], 198 | ymax=bbox[3], 199 | conf=round(bbox[4], 2), 200 | class_name = class_name, 201 | cropped_face=cropped_face 202 | ) 203 | current_detections.append(facial_info) 204 | face_no = face_no+1 if class_name is not None else face_no 205 | 206 | logging.info( 207 | f"[MTCNNDetector] {face_no} face(s) detected in image id: {idx}, " 208 | f"min confidence threshold 0.25." 209 | ) 210 | 211 | detections.append(current_detections) 212 | 213 | return detections 214 | 215 | def _preprocess_images(self, imgs: List[np.ndarray]) -> torch.Tensor: 216 | """Preprocess input images for MTCNN.""" 217 | 218 | if any(img.size != imgs[0].size for img in imgs): 219 | raise Exception("MTCNN batch processing only compatible with equal-dimension images.") 220 | 221 | imgs = np.stack([np.uint8(img) for img in imgs]) 222 | imgs = torch.as_tensor(imgs.copy(), device=self.device) 223 | return imgs 224 | 225 | def _run_mtcnn_pipeline(self, imgs: torch.Tensor) -> List[np.ndarray]: 226 | """ 227 | Run the complete MTCNN detection pipeline. 228 | """ 229 | model_dtype = next(self.pnet.parameters()).dtype 230 | imgs = imgs.permute(0, 3, 1, 2).type(model_dtype) 231 | 232 | batch_size = len(imgs) 233 | h, w = imgs.shape[2:4] 234 | minsize = self.min_face_size 235 | threshold = self.thresholds 236 | factor = self.factor 237 | 238 | m = 12.0 / minsize 239 | minl = min(h, w) 240 | minl = minl * m 241 | 242 | # Create scale pyramid 243 | scale_i = m 244 | scales = [] 245 | while minl >= 12: 246 | scales.append(scale_i) 247 | scale_i = scale_i * factor 248 | minl = minl * factor 249 | 250 | # First stage 251 | boxes = [] 252 | image_inds = [] 253 | scale_picks = [] 254 | all_i = 0 255 | offset = 0 256 | 257 | for scale in scales: 258 | im_data = image_resample(imgs, (int(h * scale + 1), int(w * scale + 1))) 259 | im_data = (im_data - 127.5) * 0.0078125 260 | reg, probs = self.pnet(im_data) 261 | 262 | boxes_scale, image_inds_scale = generate_bounding_box(reg, probs[:, 1], scale, threshold[0]) 263 | boxes.append(boxes_scale) 264 | image_inds.append(image_inds_scale) 265 | 266 | pick = batched_nms(boxes_scale[:, :4], boxes_scale[:, 4], image_inds_scale, 0.5) 267 | scale_picks.append(pick + offset) 268 | offset += boxes_scale.shape[0] 269 | 270 | boxes = torch.cat(boxes, dim=0) 271 | image_inds = torch.cat(image_inds, dim=0) 272 | scale_picks = torch.cat(scale_picks, dim=0) 273 | 274 | # NMS within each scale + image 275 | boxes, image_inds = boxes[scale_picks], image_inds[scale_picks] 276 | 277 | # NMS within each image 278 | pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) 279 | boxes, image_inds = boxes[pick], image_inds[pick] 280 | 281 | regw = boxes[:, 2] - boxes[:, 0] 282 | regh = boxes[:, 3] - boxes[:, 1] 283 | qq1 = boxes[:, 0] + boxes[:, 5] * regw 284 | qq2 = boxes[:, 1] + boxes[:, 6] * regh 285 | qq3 = boxes[:, 2] + boxes[:, 7] * regw 286 | qq4 = boxes[:, 3] + boxes[:, 8] * regh 287 | boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0) 288 | boxes = convert_to_square_bbox(boxes) 289 | y, ey, x, ex = box_padding(boxes, w, h) 290 | 291 | # Second stage 292 | if len(boxes) > 0: 293 | im_data = [] 294 | for k in range(len(y)): 295 | if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): 296 | img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) 297 | im_data.append(image_resample(img_k, (24, 24))) 298 | im_data = torch.cat(im_data, dim=0) 299 | im_data = (im_data - 127.5) * 0.0078125 300 | 301 | # This is equivalent to out = rnet(im_data) to avoid GPU out of memory. 302 | out = fixed_batch_process(im_data, self.rnet) 303 | 304 | out0 = out[0].permute(1, 0) 305 | out1 = out[1].permute(1, 0) 306 | score = out1[1, :] 307 | ipass = score > threshold[1] 308 | boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) 309 | image_inds = image_inds[ipass] 310 | mv = out0[:, ipass].permute(1, 0) 311 | 312 | # NMS within each image 313 | pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) 314 | boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick] 315 | boxes = apply_bbox_regression(boxes, mv) 316 | boxes = convert_to_square_bbox(boxes) 317 | 318 | # Third stage 319 | if len(boxes) > 0: 320 | y, ey, x, ex = box_padding(boxes, w, h) 321 | im_data = [] 322 | for k in range(len(y)): 323 | if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): 324 | img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) 325 | im_data.append(image_resample(img_k, (48, 48))) 326 | im_data = torch.cat(im_data, dim=0) 327 | im_data = (im_data - 127.5) * 0.0078125 328 | 329 | # This is equivalent to out = onet(im_data) to avoid GPU out of memory. 330 | out = fixed_batch_process(im_data, self.onet) 331 | 332 | out0 = out[0].permute(1, 0) 333 | out1 = out[1].permute(1, 0) 334 | out2 = out[2].permute(1, 0) 335 | score = out2[1, :] 336 | ipass = score > threshold[2] 337 | boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) 338 | image_inds = image_inds[ipass] 339 | mv = out0[:, ipass].permute(1, 0) 340 | 341 | boxes = apply_bbox_regression(boxes, mv) 342 | 343 | # NMS within each image using "Min" strategy 344 | pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7, 'Min') 345 | boxes, image_inds = boxes[pick], image_inds[pick] 346 | 347 | boxes = boxes.detach().numpy() 348 | image_inds = image_inds.cpu() 349 | 350 | # Group boxes by image 351 | batch_boxes = [] 352 | for b_i in range(batch_size): 353 | b_i_inds = np.where(image_inds == b_i) 354 | batch_boxes.append(boxes[b_i_inds].copy()) 355 | 356 | # Post-process boxes and probabilities 357 | boxes, probs = [], [] 358 | for box in batch_boxes: 359 | box = np.array(box) 360 | if len(box) == 0: 361 | boxes.append(None) 362 | probs.append([None]) 363 | elif self.select_largest: 364 | box_order = np.argsort((box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]))[::-1] 365 | box = box[box_order] 366 | boxes.append(box[:, :4]) 367 | probs.append(box[:, 4]) 368 | else: 369 | boxes.append(box[:, :4]) 370 | probs.append(box[:, 4]) 371 | 372 | boxes = np.array(boxes, dtype=object) 373 | probs = np.array(probs, dtype=object) 374 | 375 | return self._combine_boxes_and_probs(boxes, probs) 376 | 377 | def _combine_boxes_and_probs( 378 | self, 379 | boxes: List[Union[np.ndarray, None]], 380 | probs: List[Union[np.ndarray, None]] 381 | ) -> np.ndarray: 382 | combined = [] 383 | for b, p in zip(boxes, probs): 384 | if b is None or p is None: 385 | combined.append(np.array([[0, 0, 0, 0, 0]])) 386 | else: 387 | p = np.expand_dims(p, axis=1) # shape (N, 1) 388 | combined.append(np.concatenate((b.astype(np.int32), p), axis=1)) # shape (N, 5) 389 | return combined 390 | 391 | class PNet(nn.Module): 392 | """MTCNN PNet. 393 | 394 | Keyword Arguments: 395 | pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True}) 396 | """ 397 | 398 | def __init__(self, pretrained=True): 399 | super().__init__() 400 | 401 | self.conv1 = nn.Conv2d(3, 10, kernel_size=3) 402 | self.prelu1 = nn.PReLU(10) 403 | self.pool1 = nn.MaxPool2d(2, 2, ceil_mode=True) 404 | self.conv2 = nn.Conv2d(10, 16, kernel_size=3) 405 | self.prelu2 = nn.PReLU(16) 406 | self.conv3 = nn.Conv2d(16, 32, kernel_size=3) 407 | self.prelu3 = nn.PReLU(32) 408 | self.conv4_1 = nn.Conv2d(32, 2, kernel_size=1) 409 | self.softmax4_1 = nn.Softmax(dim=1) 410 | self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1) 411 | 412 | self.training = False 413 | 414 | if pretrained: 415 | model_id = MTCCNModel.PNET.value 416 | model_name = WEIGHT_NAMES[model_id] 417 | weight_url = WEIGHT_URLS[model_id] 418 | model_path = download_model_weights( 419 | filename=model_name, 420 | download_url=weight_url 421 | ) 422 | state_dict = torch.load(model_path, weights_only=False) 423 | self.load_state_dict(state_dict) 424 | 425 | def forward(self, x): 426 | x = self.conv1(x) 427 | x = self.prelu1(x) 428 | x = self.pool1(x) 429 | x = self.conv2(x) 430 | x = self.prelu2(x) 431 | x = self.conv3(x) 432 | x = self.prelu3(x) 433 | a = self.conv4_1(x) 434 | a = self.softmax4_1(a) 435 | b = self.conv4_2(x) 436 | return b, a 437 | 438 | class RNet(nn.Module): 439 | """MTCNN RNet. 440 | 441 | Keyword Arguments: 442 | pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True}) 443 | """ 444 | 445 | def __init__(self, pretrained=True): 446 | super().__init__() 447 | 448 | self.conv1 = nn.Conv2d(3, 28, kernel_size=3) 449 | self.prelu1 = nn.PReLU(28) 450 | self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True) 451 | self.conv2 = nn.Conv2d(28, 48, kernel_size=3) 452 | self.prelu2 = nn.PReLU(48) 453 | self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True) 454 | self.conv3 = nn.Conv2d(48, 64, kernel_size=2) 455 | self.prelu3 = nn.PReLU(64) 456 | self.dense4 = nn.Linear(576, 128) 457 | self.prelu4 = nn.PReLU(128) 458 | self.dense5_1 = nn.Linear(128, 2) 459 | self.softmax5_1 = nn.Softmax(dim=1) 460 | self.dense5_2 = nn.Linear(128, 4) 461 | 462 | self.training = False 463 | 464 | if pretrained: 465 | model_id = MTCCNModel.RNET.value 466 | model_name = WEIGHT_NAMES[model_id] 467 | weight_url = WEIGHT_URLS[model_id] 468 | model_path = download_model_weights( 469 | filename=model_name, 470 | download_url=weight_url 471 | ) 472 | state_dict = torch.load(model_path, weights_only=False) 473 | self.load_state_dict(state_dict) 474 | 475 | def forward(self, x): 476 | x = self.conv1(x) 477 | x = self.prelu1(x) 478 | x = self.pool1(x) 479 | x = self.conv2(x) 480 | x = self.prelu2(x) 481 | x = self.pool2(x) 482 | x = self.conv3(x) 483 | x = self.prelu3(x) 484 | x = x.permute(0, 3, 2, 1).contiguous() 485 | x = self.dense4(x.view(x.shape[0], -1)) 486 | x = self.prelu4(x) 487 | a = self.dense5_1(x) 488 | a = self.softmax5_1(a) 489 | b = self.dense5_2(x) 490 | return b, a 491 | 492 | 493 | class ONet(nn.Module): 494 | """MTCNN ONet. 495 | 496 | Keyword Arguments: 497 | pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True}) 498 | """ 499 | 500 | def __init__(self, pretrained=True): 501 | super().__init__() 502 | 503 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3) 504 | self.prelu1 = nn.PReLU(32) 505 | self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True) 506 | self.conv2 = nn.Conv2d(32, 64, kernel_size=3) 507 | self.prelu2 = nn.PReLU(64) 508 | self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True) 509 | self.conv3 = nn.Conv2d(64, 64, kernel_size=3) 510 | self.prelu3 = nn.PReLU(64) 511 | self.pool3 = nn.MaxPool2d(2, 2, ceil_mode=True) 512 | self.conv4 = nn.Conv2d(64, 128, kernel_size=2) 513 | self.prelu4 = nn.PReLU(128) 514 | self.dense5 = nn.Linear(1152, 256) 515 | self.prelu5 = nn.PReLU(256) 516 | self.dense6_1 = nn.Linear(256, 2) 517 | self.softmax6_1 = nn.Softmax(dim=1) 518 | self.dense6_2 = nn.Linear(256, 4) 519 | self.dense6_3 = nn.Linear(256, 10) 520 | 521 | self.training = False 522 | 523 | if pretrained: 524 | model_id = MTCCNModel.ONET.value 525 | model_name = WEIGHT_NAMES[model_id] 526 | weight_url = WEIGHT_URLS[model_id] 527 | model_path = download_model_weights( 528 | filename=model_name, 529 | download_url=weight_url 530 | ) 531 | state_dict = torch.load(model_path, weights_only=False) 532 | self.load_state_dict(state_dict) 533 | 534 | def forward(self, x): 535 | x = self.conv1(x) 536 | x = self.prelu1(x) 537 | x = self.pool1(x) 538 | x = self.conv2(x) 539 | x = self.prelu2(x) 540 | x = self.pool2(x) 541 | x = self.conv3(x) 542 | x = self.prelu3(x) 543 | x = self.pool3(x) 544 | x = self.conv4(x) 545 | x = self.prelu4(x) 546 | x = x.permute(0, 3, 2, 1).contiguous() 547 | x = self.dense5(x.view(x.shape[0], -1)) 548 | x = self.prelu5(x) 549 | a = self.dense6_1(x) 550 | a = self.softmax6_1(a) 551 | b = self.dense6_2(x) 552 | c = self.dense6_3(x) 553 | return b, c, a 554 | 555 | 556 | def fixed_batch_process(im_data, model): 557 | batch_size = 512 558 | out = [] 559 | for i in range(0, len(im_data), batch_size): 560 | batch = im_data[i:(i+batch_size)] 561 | out.append(model(batch)) 562 | 563 | return tuple(torch.cat(v, dim=0) for v in zip(*out)) 564 | 565 | def generate_bounding_box(reg, probs, scale, thresh): 566 | stride = 2 567 | cellsize = 12 568 | 569 | reg = reg.permute(1, 0, 2, 3) 570 | 571 | mask = probs >= thresh 572 | mask_inds = mask.nonzero() 573 | image_inds = mask_inds[:, 0] 574 | score = probs[mask] 575 | reg = reg[:, mask].permute(1, 0) 576 | bb = mask_inds[:, 1:].type(reg.dtype).flip(1) 577 | q1 = ((stride * bb + 1) / scale).floor() 578 | q2 = ((stride * bb + cellsize - 1 + 1) / scale).floor() 579 | boundingbox = torch.cat([q1, q2, score.unsqueeze(1), reg], dim=1) 580 | return boundingbox, image_inds --------------------------------------------------------------------------------