├── visionface
    ├── PyFaces.py
    ├── models
    │   ├── face_detection
    │   │   ├── mediapipe.py
    │   │   ├── OpenCV.py
    │   │   ├── YOLO.py
    │   │   ├── MediaPipe.py
    │   │   ├── YOLOEye.py
    │   │   ├── YOLOWolrd.py
    │   │   └── MTCNN.py
    │   ├── LandmarkDetector.py
    │   ├── face_embedding
    │   │   ├── Dlib.py
    │   │   ├── ArcFace.py
    │   │   └── FaceNet.py
    │   ├── Detector.py
    │   ├── landmark_detection
    │   │   ├── MediaPipeLandmark.py
    │   │   ├── Dlib.py
    │   │   └── utils.py
    │   └── FaceEmbedding.py
    ├── __init__.py
    ├── annotators
    │   ├── base.py
    │   ├── helper
    │   │   └── landmark_styles.py
    │   ├── FaceAnnotators.py
    │   ├── utils.py
    │   ├── landmark.py
    │   └── detection.py
    ├── commons
    │   ├── detection_utils.py
    │   ├── utils.py
    │   ├── download_files.py
    │   └── image_utils.py
    ├── modules
    │   ├── embedding.py
    │   ├── landmarks.py
    │   ├── modeling.py
    │   ├── detection.py
    │   └── recognition.py
    ├── db
    │   ├── qdrant
    │   │   ├── search_manager.py
    │   │   ├── config.py
    │   │   ├── data_manager.py
    │   │   └── collection_manager.py
    │   └── qdrant_client.py
    └── FaceAnnotators.py
├── .gitignore
├── banners
    ├── VisionFace2.png
    ├── face_analysis.jpg
    ├── face_detection.jpg
    ├── face_landmarks.jpg
    ├── face_recognition.jpg
    ├── face_verification.jpg
    └── face_visualization.jpg
├── requirements.txt
├── CONTRIBUTORS.md
├── LICENSE
├── setup.py
└── README.md


/visionface/PyFaces.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | testing/
2 | __pycache__/
3 | .vscode/


--------------------------------------------------------------------------------
/visionface/models/face_detection/mediapipe.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/banners/VisionFace2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/VisionFace2.png


--------------------------------------------------------------------------------
/banners/face_analysis.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_analysis.jpg


--------------------------------------------------------------------------------
/banners/face_detection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_detection.jpg


--------------------------------------------------------------------------------
/banners/face_landmarks.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_landmarks.jpg


--------------------------------------------------------------------------------
/banners/face_recognition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_recognition.jpg


--------------------------------------------------------------------------------
/banners/face_verification.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_verification.jpg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.8.0
2 | numpy>=1.19.0
3 | opencv-python>=4.5.0
4 | Pillow>=8.0.0
5 | requests>=2.25.0


--------------------------------------------------------------------------------
/banners/face_visualization.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/miladfa7/visionface/HEAD/banners/face_visualization.jpg


--------------------------------------------------------------------------------
/CONTRIBUTORS.md:
--------------------------------------------------------------------------------
1 | # Contributors
2 | 
3 | ## Core Team
4 | - **Milad Farzalizadeh** (@miladfa7) 
5 | - **Zahra Sheikhvand** (@zahra-she) 
6 | 
7 | ## All Contributors
8 | Thanks to everyone who contributed to VisionFace:
9 | - List all contributors here


--------------------------------------------------------------------------------
/visionface/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | VisionFace: Modern Face Detection & Recognition Framework
 3 | """
 4 | 
 5 | __version__ = "1.0.0"
 6 | __author__ = "VisionFace Team"
 7 | __email__ = "visio.face2025@gmail.com"
 8 | 
 9 | 
10 | try:
11 |     from .modules.recognition import FaceRecognition
12 |     from .modules.detection import FaceDetection
13 |     from .modules.embedding import FaceEmbedder
14 |     from .modules.landmarks import LandmarkDetection
15 |     from .annotators import FaceAnnotators
16 | 
17 |     __all__ = [
18 |         "FaceDetection", 
19 |         "FaceEmbedder", 
20 |         "FaceRecognition", 
21 |         "LandmarkDetection", 
22 |         "FaceAnnotators"
23 |     ]
24 | except ImportError as e:
25 |     print(f"Warning: Some modules could not be imported: {e}")
26 |     __all__ = []
27 | 


--------------------------------------------------------------------------------
/visionface/models/LandmarkDetector.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any, Optional
 3 | import numpy as np
 4 | from dataclasses import dataclass
 5 | 
 6 | 
 7 | class LandmarkDetector(ABC):
 8 |     """
 9 |     Abstract base class for a face landmark system
10 |     """
11 | 
12 |     def __init__(self):
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def build_model(self) -> Any:
17 |         pass
18 | 
19 |     @abstractmethod
20 |     def detect_landmarks(self, img: np.ndarray):
21 |         pass
22 | 
23 |     @abstractmethod
24 |     def process_landmarks(self, results):
25 |         pass
26 | 
27 | 
28 | 
29 | @dataclass 
30 | class DetectedLandmark2D:
31 |     x: float
32 |     y : float
33 |     name: Optional[str] = None
34 |     conf: Optional[float] = None
35 | 
36 | @dataclass 
37 | class DetectedLandmark3D:
38 |     x: float
39 |     y: float
40 |     z: float 
41 |     name: Optional[str] = None
42 |     conf: Optional[float] = None
43 | 


--------------------------------------------------------------------------------
/visionface/annotators/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Union, List, Optional, Tuple
 3 | import numpy as np
 4 | from PIL import Image
 5 | 
 6 | # Pyface modules 
 7 | from visionface.models.Detector import Detector   
 8 | from visionface.models.LandmarkDetector import DetectedLandmark3D, DetectedLandmark2D
 9 | 
10 | RawDetection = List[Union[int, float, str]]
11 | ImageType = Union[str, np.ndarray, Image.Image]
12 | 
13 | class BaseAnnotator(ABC):
14 |     @abstractmethod
15 |     def annotate(self, img: ImageType, detections: Union[List[Detector], List[RawDetection]]) -> np.ndarray:
16 |         pass
17 | 
18 | class BaseLandmarkAnnotator:
19 |     @abstractmethod
20 |     def annotate(
21 |         self, 
22 |         img: ImageType, 
23 |         landmarks: Union[
24 |             List[DetectedLandmark3D], 
25 |             List[DetectedLandmark2D], 
26 |         ],
27 |         connections: Optional[List[Tuple[int, int]]] = None,
28 |         is_drawing_landmarks: bool = True
29 |     ) -> np.ndarray:
30 |         pass


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 VisionFace Team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/visionface/commons/detection_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | import torch
 3 | 
 4 | from visionface.models.Detector import DetectedFace
 5 | 
 6 | 
 7 | def convert_to_square_bbox(bboxA):
 8 |     h = bboxA[:, 3] - bboxA[:, 1]
 9 |     w = bboxA[:, 2] - bboxA[:, 0]
10 |     
11 |     l = torch.max(w, h)
12 |     bboxA[:, 0] = bboxA[:, 0] + w * 0.5 - l * 0.5
13 |     bboxA[:, 1] = bboxA[:, 1] + h * 0.5 - l * 0.5
14 |     bboxA[:, 2:4] = bboxA[:, :2] + l.repeat(2, 1).permute(1, 0)
15 | 
16 |     return bboxA
17 | 
18 | 
19 | def box_padding(boxes, w, h):
20 |     boxes = boxes.trunc().int().cpu().numpy()
21 |     x = boxes[:, 0]
22 |     y = boxes[:, 1]
23 |     ex = boxes[:, 2]
24 |     ey = boxes[:, 3]
25 | 
26 |     x[x < 1] = 1
27 |     y[y < 1] = 1
28 |     ex[ex > w] = w
29 |     ey[ey > h] = h
30 | 
31 |     return y, ey, x, ex
32 | 
33 | def apply_bbox_regression(boundingbox, reg):
34 |     if reg.shape[1] == 1:
35 |         reg = torch.reshape(reg, (reg.shape[2], reg.shape[3]))
36 | 
37 |     w = boundingbox[:, 2] - boundingbox[:, 0] + 1
38 |     h = boundingbox[:, 3] - boundingbox[:, 1] + 1
39 |     b1 = boundingbox[:, 0] + reg[:, 0] * w
40 |     b2 = boundingbox[:, 1] + reg[:, 1] * h
41 |     b3 = boundingbox[:, 2] + reg[:, 2] * w
42 |     b4 = boundingbox[:, 3] + reg[:, 3] * h
43 |     boundingbox[:, :4] = torch.stack([b1, b2, b3, b4]).permute(1, 0)
44 | 
45 |     return boundingbox
46 | 
47 | def select_max_conf_faces(
48 |     face_detections: List[List[DetectedFace]]
49 | ) -> List[DetectedFace]:
50 |     """
51 |     Selects the DetectedFace with the highest confidence from each list of detections.
52 | 
53 |     Parameters
54 |     ----------
55 |     face_detections : List[List[DetectedFace]]
56 |         A list of detection lists. Each inner list contains DetectedFace objects for one image.
57 | 
58 |     Returns
59 |     -------
60 |     List[DetectedFace]
61 |         A list containing the DetectedFace with the highest confidence from each image.
62 |     """
63 |     return [[max(detections, key=lambda face: face.conf) for detections in face_detections if detections]]


--------------------------------------------------------------------------------
/visionface/modules/embedding.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List, Union
 2 | import numpy as np
 3 | 
 4 | from visionface.models.Detector import DetectedFace
 5 | from visionface.models.FaceEmbedding import FaceEmbedding
 6 | from visionface.modules.modeling import build_model
 7 | from visionface.commons.image_utils import load_images, validate_images
 8 | 
 9 | 
10 | class FaceEmbedder:
11 |     """
12 |     A class for generating embeddings from face images
13 |     using a specified face embedding model.
14 |     """
15 |     def __init__(self, embedding_backbone: str = "FaceNet-VGG") -> None:
16 |         """
17 |         Initializes the FaceEmbedder with the given embedding model.
18 | 
19 |         Parameters
20 |         ----------
21 |         embedding_backbone : str, optional
22 |             The name of the face embedding model to use. Default is "FaceNet-VGG".
23 |         """
24 |         self.face_embedder = self.build_model(embedding_backbone)
25 |         self.vector_size = self.face_embedder.output_shape
26 | 
27 |     def build_model(self, embedding_backbone) -> Any:
28 |         """
29 |         Builds and returns the face embedding model.
30 | 
31 |         Parameters
32 |         ----------
33 |         embedding_backbone : str
34 |             The name of the model to load.
35 | 
36 |         Returns
37 |         -------
38 |         Any
39 |             An initialized face embedding model instance.
40 |         """
41 |         return build_model(embedding_backbone, "face_embedding")
42 |     
43 |     def embed_faces(
44 |         self,
45 |         face_imgs: Union[str, np.ndarray, List[np.ndarray], List[str], List[DetectedFace]],
46 |         normalize_embeddings: bool = True
47 |     ) -> FaceEmbedding:
48 |         """
49 |         Computes face embeddings for one or more face images.
50 | 
51 |         Parameters
52 |         ----------
53 |         face_imgs : Union[str, np.ndarray, List[np.ndarray], List[str], List[DetectedFace]]
54 |             A single face image or a list of face images. Each image can be a file path (str),
55 |             a NumPy array, or a DetectedFace object.
56 | 
57 |         normalize_embeddings : bool, optional
58 |             Whether to apply L2 normalization to the output embeddings. Default is True.
59 | 
60 |         Returns
61 |         -------
62 |         FaceEmbedding
63 |             An object containing embedding vectors for each face.
64 |         """
65 |         face_images = load_images(face_imgs)
66 |         validated_images = validate_images(face_images)
67 |         return self.face_embedder.embed(validated_images, normalize_embeddings)
68 | 
69 | 


--------------------------------------------------------------------------------
/visionface/annotators/helper/landmark_styles.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, Mapping
 2 | from dataclasses import dataclass
 3 | 
 4 | from regex import D
 5 | 
 6 | 
 7 | from visionface.annotators.helper import landmark_connections
 8 | 
 9 | RADIUS = 5
10 | RED = (48, 48, 255)
11 | GREEN = (48, 255, 48)
12 | BLUE = (192, 101, 21)
13 | YELLOW = (0, 204, 255)
14 | GRAY = (128, 128, 128)
15 | PURPLE = (128, 64, 128)
16 | PEACH = (180, 229, 255)
17 | WHITE = (224, 224, 224)
18 | CYAN = (192, 255, 48)
19 | MAGENTA = (192, 48, 255)
20 | 
21 | THICKNESS_TESSELATION = 1
22 | THICKNESS_CONTOURS = 2
23 | CIRCLE_REDIUS = 2
24 | 
25 | 
26 | @dataclass
27 | class FaceMeshStyle: 
28 |     color: Tuple[int, int, int] = GRAY
29 |     thickness: int = THICKNESS_TESSELATION
30 |     circle_radius: int = CIRCLE_REDIUS
31 | 
32 | FACEMESH_CONTOURS_CONNECTION_STYLE = {
33 |     landmark_connections.FACEMESH_LIPS:
34 |         FaceMeshStyle(color=WHITE, thickness=THICKNESS_CONTOURS),
35 |     landmark_connections.FACEMESH_LEFT_EYE:
36 |         FaceMeshStyle(color=GREEN, thickness=THICKNESS_CONTOURS),
37 |     landmark_connections.FACEMESH_LEFT_EYEBROW:
38 |         FaceMeshStyle(color=GREEN, thickness=THICKNESS_CONTOURS),
39 |     landmark_connections.FACEMESH_RIGHT_EYE:
40 |         FaceMeshStyle(color=RED, thickness=THICKNESS_CONTOURS),
41 |     landmark_connections.FACEMESH_RIGHT_EYEBROW:
42 |         FaceMeshStyle(color=RED, thickness=THICKNESS_CONTOURS),
43 |     landmark_connections.FACEMESH_FACE_OVAL:
44 |         FaceMeshStyle(color=WHITE, thickness=THICKNESS_CONTOURS)
45 | }
46 | 
47 | class DefaultFaceMeshContoursStyle:
48 |     def __call__(self, i: int = 0) -> Mapping[Tuple[int, int], 'FaceMeshStyle']:
49 |         default_style = (FACEMESH_CONTOURS_CONNECTION_STYLE)
50 |         connection_style = {}
51 |         for k, v in default_style.items():
52 |             for connection in k:
53 |                 connection_style[connection] = v
54 |         return connection_style
55 | 
56 | 
57 | 
58 | 
59 | class DefaultFaceMeshIrisConnectionsStyle:
60 |     def __call__(self) -> Mapping[Tuple[int, int], 'FaceMeshStyle']:
61 |     
62 |         iris_style = {}
63 | 
64 |         left_spec = FaceMeshStyle(color=GREEN, thickness=THICKNESS_CONTOURS)
65 |         for connection in landmark_connections.FACEMESH_LEFT_IRIS:
66 |             iris_style[connection] = left_spec
67 | 
68 |         right_spec = FaceMeshStyle(color=RED, thickness=THICKNESS_CONTOURS)
69 |         for connection in landmark_connections.FACEMESH_RIGHT_IRIS:
70 |             iris_style[connection] = right_spec
71 | 
72 |         return iris_style
73 | 
74 | 
75 | FaceMeshContoursStyle = DefaultFaceMeshContoursStyle()
76 | FaceMeshIrisStyle = DefaultFaceMeshIrisConnectionsStyle()
77 | 


--------------------------------------------------------------------------------
/visionface/modules/landmarks.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List, Union
 2 | import numpy as np
 3 | 
 4 | # VisionFace modules
 5 | from visionface.models.LandmarkDetector import DetectedLandmark2D, DetectedLandmark3D
 6 | from visionface.modules.modeling import build_model
 7 | from visionface.commons.image_utils import load_images, validate_images
 8 | 
 9 | 
10 | class LandmarkDetection:
11 |     def __init__(self, detector_backbone: str = "mediapipe") -> None:
12 |         """
13 |         Initialize the landmark detection with the specified backbone.
14 |         
15 |         Currently supported backbones:
16 |             - "mediapipe": 3D landmark detection
17 |             - "dlib": 2D landmark detection
18 | 
19 |         Args:
20 |             detector_backbone: Backbone name for the landmark detector (e.g., "mediapipe", "dlib").
21 |         """
22 |         self.detector_backbone = detector_backbone
23 |         self.landmark_detector = self.build_model()
24 | 
25 |     def build_model(self) -> Any:
26 |         """
27 |         Builds the landmark detection model based on the specified backbone.
28 | 
29 |         Returns:
30 |             An initialized landmark detection model.
31 |         """
32 |         return build_model(self.detector_backbone, "landmark_detection")
33 | 
34 |     def detect_3d_landmarks(
35 |         self,
36 |         images: Union[str, np.ndarray, List[np.ndarray], List[str]],
37 |     ) -> List[List[DetectedLandmark3D]]:
38 |         """
39 |         Detect 3D facial landmarks in one or more images using the specified detection backbone.
40 | 
41 |         Args:
42 |             images: A single image or a list of images, each can be a file path or a NumPy array.
43 | 
44 |         Returns:
45 |             A list of lists containing DetectedLandmark3D instances with 3D coordinates.
46 |         """
47 |         loaded_images = load_images(images)
48 |         validated_images = validate_images(loaded_images)
49 |         return self.landmark_detector.detect_landmarks(validated_images)
50 | 
51 |     def detect_landmarks(
52 |         self,
53 |         images: Union[str, np.ndarray, List[np.ndarray], List[str]],
54 |     ) -> List[List[DetectedLandmark2D]]:
55 |         """
56 |         Detect 2D facial landmarks in one or more images using the specified detection backbone.
57 | 
58 |         Args:
59 |             images: A single image or a list of images, each can be a file path or a NumPy array.
60 | 
61 |         Returns:
62 |             A list of lists containing DetectedLandmark2D instances with 2D coordinates.
63 |         """
64 |         loaded_images = load_images(images)
65 |         validated_images = validate_images(loaded_images)
66 |         return self.landmark_detector.detect_landmarks(validated_images)
67 | 


--------------------------------------------------------------------------------
/visionface/commons/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from typing import List
 4 | 
 5 | 
 6 | def xywh2xyxy(detection: List[int]) -> List[int]: 
 7 |     """
 8 |     Convert bounding box coordinates from [x, y, width, height] to [x1, y1, x2, y2] format.
 9 |     
10 |     Parameters
11 |     ----------
12 |     detection : List[int]
13 |         Bounding box in [x, y, width, height] format where:
14 |         - x, y: coordinates of the top-left corner
15 |         - width, height: dimensions of the bounding box
16 |     
17 |     Returns
18 |     -------
19 |     List[int]
20 |         Bounding box in [x1, y1, x2, y2] format where:
21 |         - x1, y1: coordinates of the top-left corner
22 |         - x2, y2: coordinates of the bottom-right corner
23 |     """   
24 |     return [
25 |         detection[0],
26 |         detection[1],
27 |         detection[0] + detection[2],
28 |         detection[1] + detection[3],
29 |     ]
30 | 
31 | def get_home_directory():
32 |     return "."
33 | 
34 | def nms_numpy(boxes, scores, threshold, method):
35 |     if boxes.size == 0:
36 |         return np.empty((0, 3))
37 | 
38 |     x1 = boxes[:, 0].copy()
39 |     y1 = boxes[:, 1].copy()
40 |     x2 = boxes[:, 2].copy()
41 |     y2 = boxes[:, 3].copy()
42 |     s = scores
43 |     area = (x2 - x1 + 1) * (y2 - y1 + 1)
44 | 
45 |     I = np.argsort(s)
46 |     pick = np.zeros_like(s, dtype=np.int16)
47 |     counter = 0
48 |     while I.size > 0:
49 |         i = I[-1]
50 |         pick[counter] = i
51 |         counter += 1
52 |         idx = I[0:-1]
53 | 
54 |         xx1 = np.maximum(x1[i], x1[idx]).copy()
55 |         yy1 = np.maximum(y1[i], y1[idx]).copy()
56 |         xx2 = np.minimum(x2[i], x2[idx]).copy()
57 |         yy2 = np.minimum(y2[i], y2[idx]).copy()
58 | 
59 |         w = np.maximum(0.0, xx2 - xx1 + 1).copy()
60 |         h = np.maximum(0.0, yy2 - yy1 + 1).copy()
61 | 
62 |         inter = w * h
63 |         if method == 'Min':
64 |             o = inter / np.minimum(area[i], area[idx])
65 |         else:
66 |             o = inter / (area[i] + area[idx] - inter)
67 |         I = I[np.where(o <= threshold)]
68 | 
69 |     pick = pick[:counter].copy()
70 |     return pick
71 | 
72 | def batched_nms_numpy(boxes, scores, idxs, threshold, method):
73 |     device = boxes.device
74 |     if boxes.numel() == 0:
75 |         return torch.empty((0,), dtype=torch.int64, device=device)
76 |     max_coordinate = boxes.max()
77 |     offsets = idxs.to(boxes) * (max_coordinate + 1)
78 |     boxes_for_nms = boxes + offsets[:, None]
79 |     boxes_for_nms = boxes_for_nms.detach().numpy()
80 |     scores = scores.detach().numpy()
81 |     keep = nms_numpy(boxes_for_nms, scores, threshold, method)
82 |     return torch.as_tensor(keep, dtype=torch.long, device=device)


--------------------------------------------------------------------------------
/visionface/modules/modeling.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | # face detection models
 4 | from visionface.models.face_detection import  (
 5 |     MediaPipe,
 6 |     MTCNN,
 7 |     YOLO,
 8 |     YOLOEye,
 9 |     YOLOWolrd,
10 |     OpenCV
11 | )
12 | from visionface.models.face_embedding import (
13 |     FaceNet
14 | )
15 | 
16 | # Landmarks detection models
17 | from visionface.models.landmark_detection import (
18 |     MediaPipeLandmark,
19 |     Dlib
20 | )
21 | 
22 | 
23 | def build_model(model_name: str, task: str) -> Any:
24 |     """
25 |     Build and return a model instance based on the specified task and model name.
26 |     
27 |     This function creates and returns an appropriate model instance
28 |     for the requested task using the specified model implementation.
29 |     
30 |     Parameters
31 |     ----------
32 |     model_name : str
33 |         The name of the model implementation to use (e.g., "mediapipe").
34 |     task : str
35 |         The task category for which to build a model (e.g., "face_detection").
36 |         
37 |     Returns
38 |     -------
39 |     Any
40 |         A buit model class for the specified task.
41 |         
42 |     Raises
43 |     ------
44 |     ValueError
45 |         If the requested task is not implemented in the model registry
46 |     """
47 |     models = {
48 |         "face_detection": {
49 |             "mediapipe": MediaPipe.MediaPipeDetector,
50 |             "mtcnn": MTCNN.MTCNNDetector,
51 |             "yoloe-small": YOLOEye.YOLOEyeSmallDetector,
52 |             "yoloe-medium": YOLOEye.YOLOEyeMediumDetector,
53 |             "yoloe-large": YOLOEye.YOLOEyeLargeDetector,
54 |             "yolo-nano": YOLO.YOLONanoDetector,
55 |             "yolo-small": YOLO.YOLOSmallDetector,
56 |             "yolo-medium": YOLO.YOLOMediumDetector,
57 |             "yolo-large": YOLO.YOLOLargeDetector,
58 |             "yolow-small": YOLOWolrd.YOLOWorldSmallDetector,
59 |             "yolow-medium": YOLOWolrd.YOLOWorldMediumDetector,
60 |             "yolow-large": YOLOWolrd.YOLOWorldLargeDetector,
61 |             "yolow-xlarge": YOLOWolrd.YOLOWorldXLargeDetector,
62 |             "opencv": OpenCV.OpenCVDetector
63 |         },
64 |         "landmark_detection": {
65 |             "mediapipe": MediaPipeLandmark.MediaPipeFaceMeshDetector,
66 |             "dlib": Dlib.DlibFaceLandmarkDetector
67 |         },
68 |         "face_embedding": {
69 |             "FaceNet-VGG": FaceNet.FaceNetVGG,
70 |             "FaceNet-CASIA": FaceNet.FaceNetCASIA
71 |         }
72 |     }
73 |     
74 |     if models.get(task) is None:
75 |         raise ValueError(f"Unimplemented task: {task}")
76 |     
77 |     model = models[task].get(model_name)
78 |     if model is None:
79 |         raise ValueError(f"Invalid model_name passed - {task}/{model_name}")
80 |     return model()
81 | 


--------------------------------------------------------------------------------
/visionface/models/face_embedding/Dlib.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | import numpy as np
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | # VisionFace modules
 8 | from visionface.commons.download_files import download_model_weights
 9 | from visionface.models.FaceEmbedding import FaceEmbedder
10 | 
11 | 
12 | DLIB_WEIGHTS = "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2"
13 | DLIB_WEIGHT_FILENAME = "dlib_resnet_v1.dat"
14 | 
15 | class DlibFaceEmbedder(FaceEmbedder):
16 |     """
17 |     Dlib-based face embedding model implementation.
18 |     """
19 |     def __init__(self):
20 |         super().__init__()
21 |         self.model = DlibResNetModel()
22 |         self.model_name = "Dlib"
23 |         self.input_shape = (150, 150)
24 |         self.output_shape = 128
25 | 
26 | class DlibResNetModel(nn.Module):
27 |     """
28 |     Dlib face recognition ResNet model.
29 |     """
30 |     
31 |     def __init__(self):
32 |         self._dlib_model = self._load_dlib_model()
33 |         
34 |     def _load_dlib_model(self):
35 |         """
36 |         Load the Dlib face recognition model.
37 |         
38 |         Returns:
39 |             dlib.face_recognition_model_v1: Loaded Dlib face recognition model.
40 |         """
41 |         try:
42 |             import dlib
43 |         except ModuleNotFoundError as e:
44 |             raise ImportError(
45 |                 "Dlib is an optional dependency. Please install it using 'pip install dlib' "
46 |                 "to use the Dlib face embedder."
47 |             ) from e
48 |             
49 |         # Download model weights if necessary
50 |         weight_file_path = download_model_weights(
51 |             filename=DLIB_WEIGHT_FILENAME,
52 |             download_url=DLIB_WEIGHTS,
53 |             compression_format="bz2"
54 |         )
55 |         return dlib.face_recognition_model_v1(str(weight_file_path))
56 |     
57 |     def forward(self, imgs: List[np.ndarray], normalize_embeddings: bool = True) -> List[List[float]]:
58 |         """
59 |         Compute face embeddings for a batch of images.
60 | 
61 |         Args:
62 |             imgs (List[np.ndarray]): List of face images.
63 |             normalize_embeddings (bool): Whether to apply L2 normalization to embeddings.
64 | 
65 |         Returns:
66 |             torch.Tensor: Tensor of shape (batch_size, 128) with face embeddings.
67 |         """
68 |         
69 |         embeddings = []
70 | 
71 |         for img in imgs:
72 |             face_descriptor = self._dlib_model.compute_face_descriptor(img)
73 |             embedding_vector = np.array(face_descriptor, dtype=np.float32)
74 |             embeddings.append(embedding_vector)
75 |         
76 |         # Convert list of arrays to tensor
77 |         embeddings_tensor = torch.tensor(embeddings)
78 | 
79 |         if normalize_embeddings:
80 |             embeddings_tensor = F.normalize(embeddings_tensor, p=2, dim=1)
81 | 
82 |         return embeddings_tensor


--------------------------------------------------------------------------------
/visionface/db/qdrant/search_manager.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import List, Dict, Union, Optional
 3 | import numpy as np
 4 | 
 5 | from visionface.db.qdrant.config import SearchConfig, SearchMethod
 6 | 
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class SearchManager:
12 |     """Manages search operations for Qdrant"""
13 |     
14 |     def __init__(self, client):
15 |         self.client = client
16 |     
17 |     def search_embeddings(self,
18 |                          collection_name: str,
19 |                          query_vectors: Union[np.ndarray, List[float]],
20 |                          config: SearchConfig = SearchConfig()) -> List[Dict]:
21 |         """
22 |         Search embeddings using various methods
23 |         
24 |         Args:
25 |             collection_name: Target collection
26 |             query_vectors: Query embedding vector
27 |             config: Search configuration
28 | 
29 |         Returns:
30 |             List[Dict]: Search results
31 |         """
32 |         try:
33 |             # Validate config
34 |             config.validate()
35 |             results = []            
36 |             if config.method == SearchMethod.SIMILARITY:
37 |                 for query_vector in query_vectors:
38 |                     results.extend(
39 |                         self._similarity_search(collection_name, query_vector, config)
40 |                     )
41 |             else:
42 |                 raise ValueError(f"Unsupported search method: {config.method}")
43 |             
44 |             formatted_results = self._format_results(results, config)
45 |             
46 |             return formatted_results
47 |             
48 |         except Exception as e:
49 |             logger.error(f"Search failed: {e}")
50 |             raise ValueError(f"Search operation failed: {e}")
51 |     
52 |     def _similarity_search(self, collection_name: str, query_vector: List[float], 
53 |                           config: SearchConfig) -> List:
54 |         """Perform similarity search"""
55 |         return self.client.search(
56 |             collection_name=collection_name,
57 |             query_vector=query_vector,
58 |             limit=config.limit,
59 |             offset=config.offset,
60 |             with_payload=config.with_payload,
61 |             with_vectors=config.with_vectors,
62 |             score_threshold=config.score_threshold
63 |         )
64 |     
65 |     def _format_results(self, results: List, config: SearchConfig) -> List[Dict]:
66 |         """Format search results"""
67 |         formatted_results = []
68 |         for result in results:
69 |             formatted_result = {
70 |                 "id": result.id,
71 |                 "face_name": result.payload["face_name"] if config.with_payload else None,
72 |                 "score": getattr(result, 'score', None),
73 |                 "vector": result.vector if config.with_vectors else None
74 |             }
75 |             formatted_results.append(formatted_result)
76 |         return formatted_results


--------------------------------------------------------------------------------
/visionface/db/qdrant/config.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from dataclasses import asdict, dataclass
 3 | from enum import Enum
 4 | from typing import Any, Dict, Optional
 5 | from qdrant_client.http.models import Distance
 6 | 
 7 | 
 8 | class IndexType(Enum):
 9 |     """Available index types for payload fields"""
10 |     TEXT = "text"
11 |     INTEGER = "integer" 
12 |     FLOAT = "float"
13 |     BOOL = "bool"
14 |     GEO = "geo"
15 |     DATETIME = "datetime"
16 |     
17 | @dataclass
18 | class ConnectionConfig:
19 |     """Configuration for Qdrant connection"""
20 |     host: str = "localhost"
21 |     port: int = 6333
22 |     url: Optional[str] = None
23 |     api_key: Optional[str] = None
24 |     https: bool = False
25 |     timeout: float = 5.0
26 |     
27 |     def validate(self) -> bool:
28 |         """Validate connection configuration"""
29 |         if not self.url and not self.host:
30 |             raise ValueError(f"Either URL or host must be provided, url: {self.url}, host: {self.host}")
31 |         if self.port <= 0:
32 |             raise ValueError(f"Port must be positive, You set port to {self.port}")
33 |         if self.timeout <= 0:
34 |             raise ValueError(f"Timeout must be positive, {self.timeout}")
35 |         return True
36 | 
37 | @dataclass
38 | class CollectionConfig:
39 |     """Configuration for collection creation"""
40 |     name: str
41 |     vector_size: int
42 |     hnsw_config: Optional[Dict] = None
43 |     optimizer_config: Optional[Dict] = None
44 |     quantization_config: Optional[Dict] = None
45 |     payload_indexes: Optional[Dict[str, IndexType]] = None
46 |     replication_factor: int = 1
47 |     write_consistency_factor: int = 1
48 | 
49 |     def to_dict(self) -> Dict[str, Any]:
50 |         """Convert config to dictionary"""
51 |         return asdict(self)
52 | 
53 |     @classmethod
54 |     def from_dict(cls, data: Dict[str, Any]) -> 'CollectionConfig':
55 |         """Create config from dictionary"""
56 |         return cls(**data)
57 | 
58 |     def validate(self) -> bool:
59 |         """Validate configuration parameters"""
60 |         if self.vector_size <= 0:
61 |             raise ValueError(f"Vector size must be positive, {self.vector_size}")
62 |         if self.replication_factor <= 0:
63 |             raise ValueError(f"Replication factor must be positive, {self.replication_factor}")
64 |         if self.write_consistency_factor <= 0:
65 |             raise ValueError(f"Write consistency factor must be positive, {self.write_consistency_factor}")
66 |         return True
67 | 
68 | class SearchMethod(Enum):
69 |     """Available search methods"""
70 |     SIMILARITY = "similarity"
71 | 
72 | @dataclass
73 | class SearchConfig:
74 |     """Configuration for search operations"""
75 |     method: SearchMethod = SearchMethod.SIMILARITY
76 |     limit: int = 10
77 |     offset: int = 0
78 |     with_payload: bool = True
79 |     with_vectors: bool = False
80 |     score_threshold: Optional[float] = None
81 |     exact: bool = False
82 |     hnsw_ef: Optional[int] = None
83 |     quantization_rescore: Optional[bool] = None
84 | 
85 |     def validate(self) -> bool:
86 |         """Validate search configuration"""
87 |         if self.limit <= 0:
88 |             raise ValueError("Limit must be positive")
89 |         if self.offset < 0:
90 |             raise ValueError("Offset must be non-negative")
91 |         if self.score_threshold is not None and (self.score_threshold < 0 or self.score_threshold > 1):
92 |             raise ValueError("Score threshold must be between 0 and 1")
93 |         return True


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from setuptools import setup, find_packages
  3 | 
  4 | # Read README file
  5 | def read_readme():
  6 |     with open("README.md", "r", encoding="utf-8") as fh:
  7 |         return fh.read()
  8 | 
  9 | def read_requirements():
 10 |     requirements_path = "requirements.txt"
 11 |     if os.path.exists(requirements_path):
 12 |         with open(requirements_path, "r", encoding="utf-8") as f:
 13 |             return [line.strip() for line in f if line.strip() and not line.startswith("#")]
 14 |     return []
 15 | 
 16 | __version__ = "1.0.0"
 17 | 
 18 | CORE_REQUIRES = [
 19 |     "torch>=1.8.0",
 20 |     "torchvision>=0.9.0", 
 21 |     "numpy>=1.19.0",
 22 |     "opencv-python>=4.5.0",
 23 |     "Pillow>=8.0.0",
 24 |     "requests>=2.25.0",
 25 | ]
 26 | 
 27 | 
 28 | 
 29 | setup(
 30 |     name="visionface",
 31 |     version=__version__,
 32 |     author="VisionFace Team",
 33 |     author_email="visio.face2025@gmail.com",
 34 |     description="Modern face detection, recognition & analysis framework with 12+ models",
 35 |     long_description=read_readme(),
 36 |     long_description_content_type="text/markdown",
 37 |     url="https://github.com/miladfa7/visionface",
 38 |     project_urls={
 39 |         "Documentation": "https://visionface.readthedocs.io",
 40 |         "Source Code": "https://github.com/miladfa7/visionface",
 41 |         "Bug Tracker": "https://github.com/miladfa7/visionface/issues",
 42 |         "Changelog": "https://github.com/miladfa7/visionface/blob/main/CHANGELOG.md",
 43 |     },
 44 |     packages=find_packages(exclude=["tests*", "docs*", "examples*"]),
 45 |     classifiers=[
 46 |         "Development Status :: 4 - Beta",
 47 |         "Intended Audience :: Developers", 
 48 |         "Intended Audience :: Science/Research",
 49 |         "Intended Audience :: Education",
 50 |         "License :: OSI Approved :: MIT License",
 51 |         "Operating System :: OS Independent",
 52 |         "Programming Language :: Python :: 3",
 53 |         "Programming Language :: Python :: 3.8",
 54 |         "Programming Language :: Python :: 3.9", 
 55 |         "Programming Language :: Python :: 3.10",
 56 |         "Programming Language :: Python :: 3.11",
 57 |         "Programming Language :: Python :: 3.12",
 58 |         "Programming Language :: Python :: 3.13",
 59 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
 60 |         "Topic :: Scientific/Engineering :: Image Recognition",
 61 |         "Topic :: Software Development :: Libraries :: Python Modules",
 62 |         "Topic :: Multimedia :: Graphics :: Graphics Conversion",
 63 |         "Topic :: Security",
 64 |         "Framework :: FastAPI",
 65 |     ],
 66 |     python_requires=">=3.8",  # Fixed: was >=3.10 but classifiers show 3.8+
 67 |     install_requires=CORE_REQUIRES,
 68 |     include_package_data=True,
 69 |     package_data={
 70 |         "visionface": [
 71 |             "models/*.pth",
 72 |             "models/*.onnx", 
 73 |             "configs/*.yaml",
 74 |             "data/*.json",
 75 |         ],
 76 |     },
 77 |     entry_points={
 78 |         "console_scripts": [
 79 |             "visionface=visionface.cli:main",
 80 |         ],
 81 |     },
 82 |     keywords=[
 83 |         "computer-vision",
 84 |         "face-detection", 
 85 |         "face-recognition",
 86 |         "facial-landmarks",
 87 |         "deep-learning",
 88 |         "pytorch",  
 89 |         "yolo",
 90 |         "mediapipe",
 91 |         "artificial-intelligence",
 92 |         "biometrics",
 93 |         "image-processing",
 94 |         "real-time",
 95 |         "production-ready",
 96 |     ],
 97 |     zip_safe=False,
 98 |     platforms=["any"],
 99 |     license="MIT",
100 | )


--------------------------------------------------------------------------------
/visionface/FaceAnnotators.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Union, Tuple
  2 | 
  3 | 
  4 | #VisionFace module
  5 | from visionface.annotators.base import ImageType, RawDetection
  6 | from visionface.models.Detector import Detector
  7 | from visionface.annotators.detection import BoxCornerAnnotator, BoxAnnotator
  8 | 
  9 | 
 10 | def box_annotator(
 11 |         img: ImageType,
 12 |         detections: Union[List[Detector], List[RawDetection]],
 13 |         color: Tuple = (245, 113, 47),
 14 |         thickness: int = 4,
 15 |         highlight: bool = True,
 16 |         highlight_opacity: float = 0.2,
 17 |         highlight_color: tuple = (255, 255, 255),
 18 | ):
 19 |     """
 20 |     Annotate an image with bounding boxes around detected face(s).
 21 |     
 22 |     Parameters
 23 |     ----------
 24 |     img : ImageType
 25 |         The input image on which to draw annotations. Can be either a NumPy array
 26 |         or a PIL Image object.
 27 |     detections : List[Detector]
 28 |         A list of detection face(s) containing bounding box information.
 29 |     color : Tuple, optional
 30 |         The RGB color for the bounding boxes, default is (245, 113, 47).
 31 |     thickness : int, optional
 32 |         The thickness of the bounding box lines in pixels, default is 4.
 33 |     highlight : bool, optional
 34 |         Whether to highlight the detected regions, by default True
 35 |     highlight_opacity : float, optional
 36 |         Opacity of the highlight overlay (0.0 to 1.0), by default 0.2
 37 |     highlight_color : tuple, optional
 38 |         BGR color tuple for the highlight, by default (255, 255, 255)
 39 |     
 40 |     Returns
 41 |     -------
 42 |     ImageType
 43 |         The input image with bounding box annotations added.
 44 |     """
 45 |     annotator = BoxAnnotator(
 46 |         color=color, 
 47 |         thickness=thickness,
 48 |     )
 49 |     return annotator.annotate(
 50 |         img=img, 
 51 |         detections=detections,
 52 |         highlight=highlight,
 53 |         highlight_opacity=highlight_opacity,
 54 |         highlight_color=highlight_color
 55 |     )
 56 | 
 57 | def box_corner_annotator(
 58 |         img: ImageType,
 59 |         detections: Union[List[Detector], List[RawDetection]],
 60 |         color: Tuple = (245, 113, 47),
 61 |         thickness: int = 4,
 62 |         corner_length: int = 15,
 63 |         highlight: bool = True,
 64 |         highlight_opacity: float = 0.2,
 65 |         highlight_color: tuple = (255, 255, 255),
 66 | ):
 67 |     """
 68 |     Annotate an image with corner boxes around detected face(s).
 69 |     
 70 |     Parameters
 71 |     ----------
 72 |     img : ImageType
 73 |         The input image on which to draw annotations. Can be either a NumPy array
 74 |         or a PIL Image object.
 75 |     detections : List[Detector]
 76 |         A list of detection face(s) containing bounding box information.
 77 |     color : Tuple, optional
 78 |         The RGB color for the corner boxes, default is (245, 113, 47).
 79 |     thickness : int, optional
 80 |         The thickness of the corner box lines in pixels, default is 4.
 81 |     corner_length : int, optional
 82 |         The length of each corner in pixels, default is 15.
 83 |     
 84 |     Returns
 85 |     -------
 86 |     ImageType
 87 |         The input image with corner box annotations added.
 88 |     """
 89 |     annotator = BoxCornerAnnotator(
 90 |         color=color, 
 91 |         thickness=thickness, 
 92 |         corner_length=corner_length,
 93 |     )
 94 |     return annotator.annotate(
 95 |         img=img, 
 96 |         detections=detections,
 97 |         highlight=highlight,
 98 |         highlight_opacity=highlight_opacity,
 99 |         highlight_color=highlight_color
100 |     )


--------------------------------------------------------------------------------
/visionface/annotators/FaceAnnotators.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Union, Tuple
  2 | 
  3 | 
  4 | #VisionFace module
  5 | from visionface.annotators.base import ImageType, RawDetection
  6 | from visionface.models.Detector import Detector
  7 | from visionface.annotators.detection import BoxCornerAnnotator, BoxAnnotator
  8 | 
  9 | 
 10 | def box_annotator(
 11 |         img: ImageType,
 12 |         detections: Union[List[Detector], List[RawDetection]],
 13 |         color: Tuple = (245, 113, 47),
 14 |         thickness: int = 4,
 15 |         highlight: bool = True,
 16 |         highlight_opacity: float = 0.2,
 17 |         highlight_color: tuple = (255, 255, 255),
 18 | ):
 19 |     """
 20 |     Annotate an image with bounding boxes around detected face(s).
 21 |     
 22 |     Parameters
 23 |     ----------
 24 |     img : ImageType
 25 |         The input image on which to draw annotations. Can be either a NumPy array
 26 |         or a PIL Image object.
 27 |     detections : List[Detector]
 28 |         A list of detection face(s) containing bounding box information.
 29 |     color : Tuple, optional
 30 |         The RGB color for the bounding boxes, default is (245, 113, 47).
 31 |     thickness : int, optional
 32 |         The thickness of the bounding box lines in pixels, default is 4.
 33 |     highlight : bool, optional
 34 |         Whether to highlight the detected regions, by default True
 35 |     highlight_opacity : float, optional
 36 |         Opacity of the highlight overlay (0.0 to 1.0), by default 0.2
 37 |     highlight_color : tuple, optional
 38 |         BGR color tuple for the highlight, by default (255, 255, 255)
 39 |     
 40 |     Returns
 41 |     -------
 42 |     ImageType
 43 |         The input image with bounding box annotations added.
 44 |     """
 45 |     annotator = BoxAnnotator(
 46 |         color=color, 
 47 |         thickness=thickness,
 48 |     )
 49 |     return annotator.annotate(
 50 |         img=img, 
 51 |         detections=detections,
 52 |         highlight=highlight,
 53 |         highlight_opacity=highlight_opacity,
 54 |         highlight_color=highlight_color
 55 |     )
 56 | 
 57 | def box_corner_annotator(
 58 |         img: ImageType,
 59 |         detections: Union[List[Detector], List[RawDetection]],
 60 |         color: Tuple = (245, 113, 47),
 61 |         thickness: int = 4,
 62 |         corner_length: int = 15,
 63 |         highlight: bool = True,
 64 |         highlight_opacity: float = 0.2,
 65 |         highlight_color: tuple = (255, 255, 255),
 66 | ):
 67 |     """
 68 |     Annotate an image with corner boxes around detected face(s).
 69 |     
 70 |     Parameters
 71 |     ----------
 72 |     img : ImageType
 73 |         The input image on which to draw annotations. Can be either a NumPy array
 74 |         or a PIL Image object.
 75 |     detections : List[Detector]
 76 |         A list of detection face(s) containing bounding box information.
 77 |     color : Tuple, optional
 78 |         The RGB color for the corner boxes, default is (245, 113, 47).
 79 |     thickness : int, optional
 80 |         The thickness of the corner box lines in pixels, default is 4.
 81 |     corner_length : int, optional
 82 |         The length of each corner in pixels, default is 15.
 83 |     
 84 |     Returns
 85 |     -------
 86 |     ImageType
 87 |         The input image with corner box annotations added.
 88 |     """
 89 |     annotator = BoxCornerAnnotator(
 90 |         color=color, 
 91 |         thickness=thickness, 
 92 |         corner_length=corner_length,
 93 |     )
 94 |     return annotator.annotate(
 95 |         img=img, 
 96 |         detections=detections,
 97 |         highlight=highlight,
 98 |         highlight_opacity=highlight_opacity,
 99 |         highlight_color=highlight_color
100 |     )


--------------------------------------------------------------------------------
/visionface/models/Detector.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | import numpy as np
  3 | from typing import Any, Union, List, Optional
  4 | from dataclasses import dataclass
  5 | 
  6 | class Detector(ABC):
  7 |     """
  8 |     Abstract base class for a face detection system.
  9 | 
 10 |     This class defines the interface for building a detection model,
 11 |     running detection on images, and post-processing the results.
 12 |     Subclasses must implement all abstract methods.
 13 |     """
 14 | 
 15 |     def __init__(self, MODEL_ID: int = 0, MIN_CONFIDENCE: float = 0.5):
 16 |         """
 17 |         Initialize the base Detector with a confidence threshold.
 18 | 
 19 |         Args:
 20 |             conf (float): Minimum confidence score to consider a face detection valid. Default 0.25
 21 |         """
 22 |         self.model_id = MODEL_ID
 23 |         self.conf = MIN_CONFIDENCE
 24 | 
 25 |     @abstractmethod
 26 |     def build_model(self) -> Any:
 27 |         """
 28 |         Build and return the face detection model.
 29 | 
 30 |         This method should load or initialize the face detection model.
 31 |         Returns:
 32 |             model (Any): The model used for detection.
 33 |         """
 34 |         pass
 35 | 
 36 |     @abstractmethod
 37 |     def detect_faces(self, imgs: Union[np.ndarray, List[np.ndarray]]):
 38 |         """
 39 |         Detect faces in a single image or a list of images.
 40 | 
 41 |         Args:
 42 |             imgs (Union[np.ndarray, List[np.ndarray]]): 
 43 |                 - A single image as a NumPy array with shape (H, W, 3), or 
 44 |                 - A list of such images.
 45 | 
 46 |         Returns:
 47 |             detections (Any): Raw output of the detection model.
 48 |         """
 49 |         pass
 50 |     
 51 |     @abstractmethod
 52 |     def process_faces(self, results): 
 53 |         """
 54 |         Process the raw detections into a structured format.
 55 | 
 56 |         This could include bounding boxes, landmarks, confidence scores, etc.
 57 | 
 58 |         Args:
 59 |             results (Any): Raw model output from `detect_faces`.
 60 | 
 61 |         Returns:
 62 |             results (List[Any]): Processed list of face detection results in a consistent format.
 63 |         """
 64 |         pass
 65 | 
 66 | 
 67 | @dataclass
 68 | class DetectedFace:
 69 |     """
 70 |     Represents detected faces in an image.
 71 | 
 72 |     Attributes:
 73 |         x (int): The x-coordinate of the top-left corner of the face bounding box.
 74 |         y (int): The y-coordinate of the top-left corner of the face bounding box.
 75 |         w (int): The width of the face bounding box.
 76 |         h (int): The height of the face bounding box.
 77 |         conf (float): The confidence score of the face detection, typically between 0 and 1.
 78 |         class_name (str): The name of the detected class (e.g., "face").
 79 |     """
 80 |     xmin: int
 81 |     ymin: int
 82 |     xmax: int 
 83 |     ymax: int 
 84 |     conf: float
 85 |     class_name: Optional[str] = None
 86 |     cropped_face:  Optional[np.ndarray] = None
 87 | 
 88 |     @property
 89 |     def xyxy(self):
 90 |         """
 91 |         Returns the bounding box coordinates as a tuple (xmin, ymin, xmax, ymax).
 92 |         """
 93 |         return (self.xmin, self.ymin, self.xmax, self.ymax)
 94 |     
 95 |     @property
 96 |     def xywh(self):
 97 |         """
 98 |         Returns the bounding box coordinates as a tuple (x, y, w, h).
 99 |         """
100 |         width = self.xmax - self.xmin
101 |         height = self.ymax - self.ymin
102 |         return (self.xmin, self.ymin, width, height)
103 | 
104 |     def to_dict(self):
105 |         return {
106 |             "xywh": self.xywh,
107 |             "xyxy": self.xyxy,
108 |             "conf": self.conf,
109 |             "class_name": self.class_name
110 |         }


--------------------------------------------------------------------------------
/visionface/models/landmark_detection/MediaPipeLandmark.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from typing import List
  3 | import cv2
  4 | 
  5 | # VisionFace modules
  6 | from visionface.models.LandmarkDetector import LandmarkDetector, DetectedLandmark3D
  7 | from visionface.models.landmark_detection.utils import medipipe_mesh_landmark_names
  8 | 
  9 | class MediaPipeFaceMeshDetector(LandmarkDetector):
 10 |     """
 11 |     Landmark detector that uses MediaPipe Face Mesh to extract 3D facial landmarks.
 12 |     """
 13 |     def __init__(self):
 14 |         """
 15 |         Initialize the MediaPipe face mesh model and load landmark names.
 16 |         """
 17 |         self.mesh_landmark_names = medipipe_mesh_landmark_names()
 18 |         self.model = self.build_model()
 19 | 
 20 |     def build_model(self):
 21 |         """
 22 |         Load the MediaPipe FaceMesh model.
 23 | 
 24 |         Returns
 25 |         -------
 26 |         model : mediapipe.solutions.face_mesh.FaceMesh
 27 |             An instance of the MediaPipe FaceMesh model.
 28 |         """
 29 |         try:
 30 |             import mediapipe as mp
 31 |         except ModuleNotFoundError as error:
 32 |             raise ImportError(
 33 |                 "The 'mediapipe' library is not installed. "
 34 |                 "It is required for MediaPipeFaceMeshDetector to work. "
 35 |                 "Please install it using: pip install mediapipe"
 36 |             ) from error
 37 | 
 38 |         mp_face_mesh = mp.solutions.face_mesh
 39 |         landmark_detection = mp_face_mesh.FaceMesh(
 40 |             static_image_mode=True,
 41 |             max_num_faces=1,
 42 |             refine_landmarks=True,
 43 |             min_detection_confidence=0.5
 44 |         )
 45 |         return landmark_detection
 46 |     
 47 |     def _detect_one(self, img: np.ndarray) -> List[DetectedLandmark3D]:
 48 |         """
 49 |         Detect facial landmarks in a single image.
 50 | 
 51 |         Parameters
 52 |         ----------
 53 |         img : np.ndarray
 54 |             The input image in BGR format.
 55 | 
 56 |         Returns
 57 |         -------
 58 |         landmarks : List[DetectedLandmark3D]
 59 |             List of detected 3D landmarks for the face.
 60 |         """
 61 |         results = self.model.process(img)
 62 |         if results.multi_face_landmarks:
 63 |             return self.process_landmarks(results)
 64 |         else:
 65 |             return []
 66 | 
 67 |     def detect_landmarks(self, imgs: List[np.ndarray]) -> List[List[DetectedLandmark3D]]:
 68 |         """
 69 |         Detect facial landmarks in a list of images.
 70 | 
 71 |         Parameters
 72 |         ----------
 73 |         imgs : List[np.ndarray]
 74 |             List of images (each as a NumPy array in BGR format).
 75 | 
 76 |         Returns
 77 |         -------
 78 |         List[List[DetectedLandmark3D]]
 79 |             A list where each element contains the detected landmarks for an image.
 80 |         """
 81 |         return [self._detect_one(img) for img in imgs]
 82 |     
 83 |     def process_landmarks(self, results) -> List[DetectedLandmark3D]:
 84 |         """
 85 |         Convert MediaPipe landmark results into DetectedLandmark3D objects.
 86 | 
 87 |         Parameters
 88 |         ----------
 89 |         results : mediapipe.framework.formats.landmark_pb2.NormalizedLandmarkList
 90 |             The raw landmark output from the MediaPipe model.
 91 | 
 92 |         Returns
 93 |         -------
 94 |         landmarks : List[DetectedLandmark3D]
 95 |             List of 3D landmarks with optional names.
 96 |         """
 97 |         landmarks = []
 98 |         for face_landmarks in results.multi_face_landmarks:
 99 |             for idx, lm in enumerate(face_landmarks.landmark):
100 |                 landmark_name = self.mesh_landmark_names.get(idx, f"unknown_{idx}")
101 |                 x, y, z = lm.x, lm.y, lm.z
102 |                 facial_landmarks = DetectedLandmark3D(x=x, y=y, z=z, name=landmark_name)
103 |                 landmarks.append(facial_landmarks)
104 |         return landmarks


--------------------------------------------------------------------------------
/visionface/annotators/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import numpy as np
  4 | from PIL import Image
  5 | from typing import List, Tuple, Union
  6 | import math
  7 | 
  8 | # VisionFace modules
  9 | from visionface.annotators.base import ImageType, RawDetection
 10 | from visionface.models.Detector import Detector
 11 | 
 12 | def get_xyxy(detection: Union[Detector, RawDetection]) -> Tuple[int, int, int, int]:
 13 |     """
 14 |     Extract bounding box coordinates from detection object or list.
 15 |     
 16 |     Args:
 17 |         detection: Detector or list with [x1, y1, x2, y2]
 18 |         
 19 |     Returns:
 20 |         Tuple of (x1, y1, x2, y2)
 21 |         
 22 |     Raises:
 23 |         TypeError: If detection format is not supported!
 24 |     """
 25 |     if hasattr(detection, 'xyxy'):
 26 |         return detection.xyxy
 27 |     elif (isinstance(detection, List) or isinstance(detection, Tuple)) and len(detection) >= 4:
 28 |         return int(detection[0]), int(detection[1]), int(detection[2]), int(detection[3])
 29 |     else:
 30 |         raise TypeError(f"Unsupported detection type: {type(detection)}")
 31 |     
 32 | 
 33 | def highlight_face(
 34 |         img: ImageType,
 35 |         detections: List[Detector],
 36 |         highlight_opacity: float = 0.2,
 37 |         highlight_color: Tuple[int, int, int] = (255, 255, 255),
 38 | ) -> ImageType:
 39 |     """
 40 |     Apply semi-transparent highlight to detected regions in image.
 41 |     
 42 |     Args:
 43 |         img: Input image
 44 |         detections: List of detections to highlight
 45 |         highlight_opacity: Opacity of highlight (0.0-1.0)
 46 |         highlight_color: BGR color tuple for highlight
 47 |         
 48 |     Returns:
 49 |         Image with highlighted regions
 50 |     """
 51 |     overlay = img.copy()
 52 |     for detection in detections:
 53 |         x1, y1, x2, y2 = get_xyxy(detection)
 54 |         cv2.rectangle(
 55 |             overlay, 
 56 |             (x1, y1), 
 57 |             (x2, y2), 
 58 |             highlight_color, 
 59 |             -1
 60 |         )
 61 |         cv2.addWeighted(overlay, highlight_opacity, img, 1 - highlight_opacity, 0, img)
 62 |     return img
 63 | 
 64 | def convert_img_to_numpy(img: ImageType) -> np.ndarray:
 65 |     """
 66 |     Convert different image formats to numpy array for processing.
 67 |     
 68 |     Args:
 69 |         img: Image as file path, numpy array, or PIL Image
 70 |         
 71 |     Returns:
 72 |         Image as numpy array in BGR format
 73 |         
 74 |     Raises:
 75 |         FileNotFoundError: If image file does not exist
 76 |         ValueError: If image file cannot be loaded
 77 |         TypeError: If image format is not supported
 78 |     """
 79 |     if isinstance(img, str):
 80 |         if not os.path.exists(img):
 81 |             raise FileNotFoundError(f"Image file not found: {img}")
 82 |         img_np = cv2.imread(img)
 83 |         if img_np is None:
 84 |             raise ValueError(f"Failed to load image: {img}")
 85 |         return img_np
 86 |     
 87 |     elif isinstance(img, np.ndarray):
 88 |         return img.copy()
 89 |     
 90 |     elif isinstance(img, Image.Image):
 91 |         img_np = np.array(img)
 92 |         # Convert RGB to BGR (OpenCV format)
 93 |         if img_np.shape[-1] == 3: 
 94 |             img_np = img_np[..., ::-1].copy()
 95 |         return img_np
 96 |     
 97 |     else:
 98 |         raise TypeError(f"Unsupported image type: {type(img)}")
 99 |     
100 | 
101 | def denormalize_landmark(
102 |         normalized_x: float, 
103 |         normalized_y: float, 
104 |         image_width: int,
105 |         image_height: int
106 | ) -> Union[None, Tuple[int, int]]:
107 |     
108 |     def is_valid_normalized_value(value: float) -> bool:
109 |         return (value > 0 or math.isclose(0, value)) and (value < 1 or
110 |                                                       math.isclose(1, value))
111 | 
112 |     if not (is_valid_normalized_value(normalized_x) and
113 |             is_valid_normalized_value(normalized_y)):
114 |         # TODO: Draw coordinates even if it's outside of the image bounds.
115 |         return None
116 |     x_px = min(math.floor(normalized_x * image_width), image_width - 1)
117 |     y_px = min(math.floor(normalized_y * image_height), image_height - 1)
118 |     return x_px, y_px
119 | 


--------------------------------------------------------------------------------
/visionface/modules/detection.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Union, List
  2 | import numpy as np
  3 | 
  4 | # visionface modules 
  5 | from visionface.models.Detector import Detector, DetectedFace
  6 | from visionface.modules.modeling import build_model
  7 | from visionface.commons.image_utils import load_images, validate_images
  8 | 
  9 | class FaceDetection:
 10 |     """
 11 |     detecting faces in images using a specified detection backbone.
 12 |     """
 13 | 
 14 |     def __init__(self, detector_backbone: str = "mediapipe") -> None:
 15 |         """
 16 |         Initializes the FaceDetection class with the specified detector backbone.
 17 | 
 18 |         Parameters
 19 |         ----------
 20 |         detector_backbone : str, optional
 21 |             Name of the face detection backend to use. Default is "mediapipe".
 22 |         """
 23 |         self.face_detector = self.build_model(detector_backbone)
 24 |     
 25 |     def build_model(self, model_name: str) -> Any:
 26 |         """
 27 |         Builds the face detection model based on the specified model name.
 28 | 
 29 |         Parameters
 30 |         ----------
 31 |         model_name : str
 32 |             The name of the face detection model to use.
 33 | 
 34 |         Returns
 35 |         -------
 36 |         Any
 37 |             An initialized face detection model.
 38 |         """
 39 |         return build_model(model_name, "face_detection")
 40 | 
 41 |     def detect_faces(
 42 |         self, 
 43 |         images: Union[str, np.ndarray, List[np.ndarray], List[str]],
 44 |         return_cropped_faces: bool = True
 45 |     ) -> List[List[DetectedFace]]:
 46 |         """
 47 |         Detect faces in one or more images using the specified detector backbone.
 48 | 
 49 |         Parameters
 50 |         ----------
 51 |         images : Union[str, np.ndarray, List[str], List[np.ndarray]]
 52 |                 A single image or a list of images. Each image can be either a file path (str)
 53 |                 or an image array.
 54 |         return_cropped_faces : bool, optional
 55 |             Whether to include cropped face images in each DetectedFace object. Default is True.
 56 | 
 57 |         Returns
 58 |         -------
 59 |         List[List[DetectedFace]]: 
 60 |                 A list where each element is a list of DetectedFace objects for the corresponding input image.
 61 |         """
 62 |         loaded_images = load_images(images)
 63 |         validated_images = validate_images(loaded_images)
 64 |         return self.face_detector.detect_faces(validated_images, return_cropped_faces)
 65 | 
 66 | 
 67 |     def detect_faces_with_prompt(
 68 |             self,
 69 |             images: Union[str, np.ndarray, List[np.ndarray], List[str]],
 70 |             prompts: Union[str, List[str]],
 71 |             return_cropped_faces: bool = True
 72 |     ) -> List[List[DetectedFace]]:
 73 |         """
 74 |         Detect faces in one or more images using a prompt-based detection approach.
 75 | 
 76 |         Parameters
 77 |         ----------
 78 |         images : Union[str, np.ndarray, List[str], List[np.ndarray]]
 79 |             A single image or a list of images. Each image can be either a file path (str)
 80 |             or an image array.
 81 | 
 82 |         prompts : Union[str, List[str]]
 83 |             A single prompt or a list of prompts describing the object(s) to detect.
 84 |             For example, "face".
 85 |         
 86 |         return_cropped_faces : bool, optional
 87 |             Whether to include cropped face images in each DetectedFace object. Default is True.
 88 | 
 89 |         Returns
 90 |         -------
 91 |         List[List[DetectedFace]]
 92 |             A list where each element is a list of DetectedFace objects
 93 |             for the corresponding input image. Each detection includes bounding box
 94 |             coordinates, confidence score, class name, and optionally a cropped region.
 95 |         """
 96 |         loaded_images = load_images(images)
 97 |         validated_images = validate_images(loaded_images)
 98 | 
 99 |         if isinstance(prompts, str):
100 |             prompts = [prompts]
101 | 
102 |         # Optional: enforce prompt count matching image count
103 |         # if len(validated_images) != len(prompts):
104 |         #     raise ValueError("The number of images and prompts must be the same.")
105 | 
106 |         return self.face_detector.detect_faces_with_prompt(validated_images, prompts, return_cropped_faces)
107 | 
108 | 


--------------------------------------------------------------------------------
/visionface/models/FaceEmbedding.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC
  2 | from typing import List, Union, Any, Tuple
  3 | from dataclasses import dataclass
  4 | import numpy as np 
  5 | import cv2
  6 | import torch
  7 | from torchvision.transforms import functional as F
  8 | 
  9 | # VisionFace module 
 10 | from visionface.commons.image_utils import validate_images
 11 | 
 12 | class FaceEmbedder(ABC):
 13 |     model: Any
 14 |     model_name: str
 15 |     input_shape: Tuple[int, int]
 16 |     output_shape: int
 17 | 
 18 |     def __init__(self):
 19 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 20 | 
 21 |     def _img_preprocess(self, imgs: Union[np.ndarray, List[np.ndarray]]) -> Union[torch.Tensor, np.ndarray]:
 22 |         """
 23 |         Preprocess input images based on the model type.
 24 | 
 25 |         For PyTorch models:
 26 |             - Converts images to normalized float tensors [0, 1]
 27 |             - Returns a batched tensor of shape (N, 3, H, W)
 28 | 
 29 |         For Dlib models:
 30 |             - Ensures each image is resized to the target input shape
 31 |             - Returns a list of RGB images as NumPy arrays
 32 | 
 33 |         Args:
 34 |             imgs (List[np.ndarray]): List of images in BGR format (OpenCV)
 35 | 
 36 |         Returns:
 37 |             Union[torch.Tensor, List[np.ndarray]]: Preprocessed inputs ready for embedding
 38 |         """
 39 |         target_h, target_w = self.input_shape
 40 |         batch_size = len(imgs)
 41 | 
 42 |         if self.model_name=="Dlib":
 43 |             batch_inputs = []
 44 |             for img in imgs:
 45 |                 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 46 |                 if img.shape[:2] != (target_h, target_w):
 47 |                     img = cv2.resize(img, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
 48 |                 batch_inputs.append(img)
 49 |             return batch_inputs
 50 |         else:
 51 |             batch_tensor = torch.empty(batch_size, 3, target_h, target_w, dtype=torch.float32, device=self.device)
 52 |             for i, img in enumerate(imgs):
 53 |                 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 54 |                 if img.shape[:2] != (target_h, target_w):
 55 |                     img = cv2.resize(img, (target_w, target_h), interpolation=cv2.INTER_LINEAR)
 56 |                 img_tensor = torch.from_numpy(img.transpose(2, 0, 1)).float() / 255.0
 57 |                 batch_tensor[i] = img_tensor
 58 |             return batch_tensor
 59 |         
 60 |     @torch.no_grad()
 61 |     def embed(self, imgs: Union[np.ndarray, List[np.ndarray]], normalize_embeddings: bool = True) -> 'FaceEmbedding':
 62 |         """
 63 |         Generate face embeddings from one or more face images.
 64 | 
 65 |         Args:
 66 |             imgs (Union[np.ndarray, List[np.ndarray]]): 
 67 |                 A single image with shape (H, W, 3) or a list of such images in BGR format.
 68 |             normalize_embeddings (bool, optional): 
 69 |                 If True, applies L2 normalization to the output embeddings. Default is True.
 70 | 
 71 |         Returns:
 72 |             FaceEmbedding: 
 73 |                 An object containing the computed embedding tensor(s) with shape (N, D), 
 74 |                 where N is the number of input images and D is the embedding dimension (e.g., 128 or 512).
 75 |         """
 76 |         
 77 |         # Validate input images
 78 |         imgs = validate_images(imgs)
 79 | 
 80 |         # Preprocess images depending on model type
 81 |         batch_inputs = self._img_preprocess(imgs) 
 82 | 
 83 |         # Compute embeddings using the model's forward
 84 |         embeddings = self.model.forward(batch_inputs, normalize_embeddings)
 85 | 
 86 |         return FaceEmbedding(embeddings)
 87 |         
 88 | 
 89 | @dataclass
 90 | class FaceEmbedding:
 91 |     embeddings: torch.Tensor 
 92 | 
 93 |     def __getitem__(self, idx):
 94 |         """Get embedding vector(s) at index idx (supports int or slice)."""
 95 |         return self.embeddings[idx]
 96 | 
 97 |     def batch_size(self) -> int:
 98 |         """Returns the batch size (number of embeddings)."""
 99 |         return self.embeddings.size(0)
100 | 
101 |     def to(self, device: torch.device):
102 |         """Returns a new FaceEmbedding on the given device."""
103 |         return FaceEmbedding(self.embeddings.to(device))
104 | 
105 |     def cpu(self):
106 |         """Move embeddings to CPU."""
107 |         return self.to(torch.device('cpu'))
108 | 
109 |     def cuda(self):
110 |         """Move embeddings to CUDA device."""
111 |         return self.to(torch.device('cuda'))
112 | 
113 |     def as_numpy(self):
114 |         """Return embeddings as a NumPy array (on CPU)."""
115 |         return self.embeddings.detach().cpu().numpy()
116 |     
117 |     def to_list(self):
118 |         """Return embeddings as a list of lists (compatible with Qdrant)."""
119 |         return self.embeddings.detach().cpu().numpy().tolist()


--------------------------------------------------------------------------------
/visionface/db/qdrant_client.py:
--------------------------------------------------------------------------------
  1 | from typing import Optional, List, Dict, Union
  2 | import logging
  3 | 
  4 | import numpy as np 
  5 | from visionface.db.qdrant.config import CollectionConfig, ConnectionConfig, SearchConfig
  6 | from visionface.db.qdrant.data_manager import DataManager
  7 | from visionface.db.qdrant.search_manager import SearchManager
  8 | 
  9 | logger = logging.getLogger(__name__)
 10 | logging.basicConfig(level=logging.INFO)
 11 | 
 12 | class QdrantVectorDB:
 13 |     """
 14 |     Qdrant Vector Database for face vector storage and search operation
 15 |     """
 16 |     def __init__(self, **kwargs):
 17 |         try:
 18 |             try:
 19 |                 from qdrant_client import QdrantClient
 20 |                 from visionface.db.qdrant.collection_manager import CollectionManager
 21 |             except ImportError:
 22 |                 logger.error("Please install qdrant-client: pip install qdrant-client")
 23 |                 raise
 24 |             
 25 |             host = kwargs.get("host", "localhost")
 26 |             port = kwargs.get("port", 6333)
 27 |             url = kwargs.get("url", None)
 28 |             api_key = kwargs.get("api_key", None)
 29 |             https = kwargs.get("https", False)
 30 |             timeout = kwargs.get("timeout", 5.0)
 31 | 
 32 |             # Create connection config
 33 |             self.config = ConnectionConfig(
 34 |                 host=host, port=port, url=url, 
 35 |                 api_key=api_key, https=https, timeout=timeout
 36 |             )
 37 |             self.config.validate()
 38 |             
 39 |             # Initialize Qdrant client
 40 |             if url:
 41 |                 self.client = QdrantClient(url=url, api_key=api_key, timeout=timeout)
 42 |             else:
 43 |                 self.client = QdrantClient(
 44 |                     host=host, 
 45 |                     port=port, 
 46 |                     https=https,
 47 |                     api_key=api_key,
 48 |                     timeout=timeout
 49 |                 )
 50 |             
 51 |             # Initialize managers
 52 |             self.collections = CollectionManager(self.client)
 53 |             self.search = SearchManager(self.client)
 54 |             self.data = DataManager(self.client)
 55 |             
 56 |             logger.info(f"Connected to Qdrant at {url or f'{host}:{port}'}")
 57 |             
 58 |         except Exception as e:
 59 |             logger.error(f"Failed to connect to Qdrant: {e}")
 60 |             raise ConnectionError(f"Connection failed: {e}")
 61 |         
 62 |     def create_collection(
 63 |         self, 
 64 |         collection_name: str, 
 65 |         vector_size: int
 66 |     ) -> bool:
 67 |         """Create a new collection"""
 68 |         config = CollectionConfig(
 69 |             name=collection_name,
 70 |             vector_size=vector_size,
 71 |         )
 72 |         if not self.collection_exists(collection_name):
 73 |             self.collections.create_collection(config)
 74 |         else:
 75 |             logger.info(f"Collection [{collection_name}] already exists! ✅")
 76 | 
 77 |     def list_collections(self) -> List[str]:
 78 |         """List all collections"""
 79 |         return self.collections.list_collections()
 80 |         
 81 |     def delete_collection(self, collection_name: str) -> bool:
 82 |         """Delete a collection"""
 83 |         return self.collections.delete_collection(collection_name)
 84 |     
 85 |     def get_collection_info(self, collection_name: str) -> Optional[Dict]:
 86 |         """Get collection information"""
 87 |         return self.collections.get_collection_info(collection_name)
 88 |     
 89 |     def collection_exists(self, collection_name: str) -> bool:
 90 |         """Check if collection exists"""
 91 |         return self.collections.collection_exists(collection_name)
 92 |     
 93 |     def insert_embeddings(self, 
 94 |                          collection_name: str,
 95 |                          embeddings: List[List[float]],
 96 |                          payloads: Optional[List[Dict]] = None,
 97 |                          ids: Optional[List[Union[str, int]]] = None,
 98 |                          batch_size: int = 100) -> bool:
 99 |         """Insert embeddings with optional payloads"""
100 |         return self.data.insert_embeddings(
101 |             collection_name, embeddings, payloads, ids, batch_size
102 |         )
103 |     
104 |     def search_embeddings(self,
105 |                          collection_name: str,
106 |                          query_vectors: List[np.ndarray],
107 |                          score_threshold: Optional[float] = None,
108 |                          top_k: int = 5) -> List[Dict]:
109 |         """Search embeddings using various methods"""
110 |         config: SearchConfig = SearchConfig()
111 |         config.limit = top_k
112 |         config.score_threshold = score_threshold
113 |         return self.search.search_embeddings(
114 |             collection_name, query_vectors, config
115 |         )


--------------------------------------------------------------------------------
/visionface/models/landmark_detection/Dlib.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from typing import List, Any
  4 | import cv2
  5 | 
  6 | from visionface.models.LandmarkDetector import LandmarkDetector, DetectedLandmark2D
  7 | from visionface.commons.download_files import download_model_weights
  8 | from visionface.models.landmark_detection.utils import dlib_landmarks_names
  9 | 
 10 | 
 11 | DLIB_PREDICTOR_URL = "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2"
 12 | DEFAULT_PREDICTOR_NAME = "shape_predictor_68_face_landmarks.dat"
 13 | EXPECTED_LANDMARK_COUNT = 68
 14 | 
 15 | class DlibFaceLandmarkDetector(LandmarkDetector):
 16 |     """
 17 |     Landmark detector using dlib's 68-point facial shape predictor.
 18 | 
 19 |     Attributes
 20 |     ----------
 21 |     detector : dlib.fhog_object_detector
 22 |         Dlib's frontal face detector.
 23 | 
 24 |     predictor : dlib.shape_predictor
 25 |         Dlib's facial landmark shape predictor.
 26 | 
 27 |     dlib_landmarks_names : dict
 28 |         Mapping of landmark indices to semantic names.
 29 | 
 30 |     dlib_landmarks : int
 31 |         Expected number of facial landmarks (default: 68).
 32 |     """
 33 |     def __init__(self):
 34 |         """Initialize the DlibFaceLandmarkDetector."""
 35 |         self.detector, self.predictor = self.build_model()
 36 |         self.dlib_landmarks_names = dlib_landmarks_names()
 37 |         self.dlib_landmarks = EXPECTED_LANDMARK_COUNT
 38 | 
 39 |     def build_model(self) -> Any:
 40 |         """
 41 |         Load the dlib face detector and shape predictor.
 42 | 
 43 |         Parameters
 44 |         ----------
 45 |         predictor_name : str, optional
 46 |             Filename of the dlib predictor (default is shape_predictor_68_face_landmarks.dat)
 47 | 
 48 |         Returns
 49 |         -------
 50 |         Tuple[dlib.fhog_object_detector, dlib.shape_predictor]
 51 |             Dlib face detector and shape predictor.
 52 |         """
 53 |         try:
 54 |             import dlib 
 55 |         except ImportError as e:
 56 |             raise ImportError(
 57 |                 "dlib library is required but not installed. "
 58 |                 "Install it using: pip install dlib or from source https://github.com/davisking/dlib"
 59 |             ) from e
 60 |         
 61 |         # Get the predictor file path
 62 |         predictor_path = download_model_weights(
 63 |             filename="shape_predictor_68_face_landmarks.dat",
 64 |             download_url=DLIB_PREDICTOR_URL,
 65 |             compression_format="bz2",
 66 |         )
 67 |         # Initialize dlib components
 68 |         detector = dlib.get_frontal_face_detector()
 69 |         predictor = dlib.shape_predictor(str(predictor_path))
 70 | 
 71 |         return detector, predictor
 72 |     
 73 |     def _detect_one(self, img: np.ndarray) -> List[DetectedLandmark2D]:
 74 |         """
 75 |         Detect facial landmarks in a single image.
 76 | 
 77 |         Parameters
 78 |         ----------
 79 |         img : np.ndarray
 80 |             The input image in BGR format.
 81 | 
 82 |         Returns
 83 |         -------
 84 |         List[DetectedLandmark2D]
 85 |             List of 2D landmarks detected for all faces in the image.
 86 |         """
 87 |         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 88 |         faces = self.detector(gray)
 89 |         all_landmarks = [self.predictor(gray, face) for face in faces]
 90 |         return self.process_landmarks(all_landmarks)
 91 | 
 92 | 
 93 |     def detect_landmarks(self, imgs: List[np.ndarray]) -> List[List[DetectedLandmark2D]]:
 94 |         """
 95 |         Detects facial landmarks in a list of images using dlib's face detector and shape predictor.
 96 | 
 97 |         Parameters
 98 |         ----------
 99 |         imgs : List[np.ndarray]
100 |             List of images (each as a NumPy array in BGR format).
101 | 
102 |         Returns:
103 |             List[List[DetectedLandmark2D]]: A list of detected 2D facial landmarks with coordinates and names.
104 |         
105 |         """
106 |         return [self._detect_one(img) for img in imgs]
107 |         
108 | 
109 |     def process_landmarks(self, results: List) -> List[DetectedLandmark2D]:
110 |         """
111 |         Convert raw dlib detection results into structured landmark data.
112 | 
113 |         Parameters
114 |         ----------
115 |         results : List[dlib.full_object_detection]
116 |             Raw landmark predictions from dlib.
117 | 
118 |         Returns
119 |         -------
120 |         List[DetectedLandmark2D]
121 |             List of structured 2D facial landmarks with names and coordinates.
122 |         """
123 |         landmarks = []
124 |         for face_landmarks in results:
125 |             for idx in range(self.dlib_landmarks):
126 |                 name = self.dlib_landmarks_names.get(idx, f"unknown_{idx}")
127 |                 part = face_landmarks.part(idx)
128 |                 landmarks.append(DetectedLandmark2D(x=part.x, y=part.y, name=name))
129 |         return landmarks
130 |             


--------------------------------------------------------------------------------
/visionface/commons/download_files.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import bz2
  3 | import logging
  4 | from pathlib import Path
  5 | from typing import Optional
  6 | 
  7 | import gdown
  8 | from visionface.commons.utils import get_home_directory
  9 | 
 10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | 
 14 | class WeightsDownloadError(Exception):
 15 |     """Custom exception for weights download failures."""
 16 |     pass
 17 | 
 18 | 
 19 | 
 20 | def get_face_models_home() -> str:
 21 |     """
 22 |     Get the home directory for storing model weights
 23 | 
 24 |     Returns:
 25 |         str: the home directory.
 26 |     """
 27 |     return str(os.getenv("DEEPFACE_HOME", default=os.path.expanduser("~")))
 28 | 
 29 | 
 30 | def download_model_weights(
 31 |     filename: str,
 32 |     download_url: str,
 33 |     compression_format: Optional[str] = None,
 34 | ) -> Path:
 35 |     """
 36 |     Download and extract model weights from a URL.
 37 |     
 38 |     Args:
 39 |         filename: Name of the target file (without extension)
 40 |         download_url: URL to download the file from
 41 |         compression_format: File compression format ('zip', 'bz2' or None)        
 42 |     Returns:
 43 |         Path to the downloaded and extracted file
 44 |         
 45 |     Raises:
 46 |         WeightsDownloadError: If download fails
 47 |         FileNotFoundError: If home directory cannot be determined
 48 |     """
 49 |     
 50 |     home_dir = Path(get_face_models_home())
 51 | 
 52 |     # Create weights directory structure
 53 |     weights_dir = home_dir / ".VisionFace/weights"
 54 |     weights_dir.mkdir(parents=True, exist_ok=True)
 55 |     
 56 |     # Define target file path
 57 |     target_filepath = weights_dir / filename
 58 |     
 59 |     # Check if file already exists
 60 |     if target_filepath.exists() and target_filepath.is_file():
 61 |         logger.info(f"✓ {filename} already exists at {target_filepath}")
 62 |         return target_filepath
 63 |     
 64 |     # Download the file
 65 |     logger.info(f"Downloading {filename} model weights...")
 66 |     logger.info(f"Source URL: {download_url}")
 67 |     logger.info(f"Target directory: {weights_dir}")
 68 |     
 69 |     # Determine download filename based on compression
 70 |     if compression_format:
 71 |         download_filename = f"{filename}.{compression_format}"
 72 |         download_filepath = weights_dir / download_filename
 73 |     else:
 74 |         download_filename = filename
 75 |         download_filepath = target_filepath
 76 |     
 77 |     try:
 78 |         gdown.download(download_url, str(download_filepath), quiet=False)
 79 |         logger.info(f"✓ Successfully downloaded {download_filename}")
 80 |     except Exception as e:
 81 |         error_msg = (
 82 |             f"Failed to download {filename} from {download_url}. "
 83 |             f"Please verify the URL is accessible or download manually to {target_filepath}"
 84 |         )
 85 |         logger.error(error_msg)
 86 |         raise WeightsDownloadError(error_msg) from e
 87 |     
 88 |     # Extract file if compressed
 89 |     if compression_format:
 90 |         logger.info(f"Extracting {download_filename}...")
 91 |         _extract_compressed_file(download_filepath, target_filepath, compression_format)
 92 |         
 93 |         # Clean up compressed file after extraction
 94 |         try:
 95 |             download_filepath.unlink()
 96 |             logger.info(f"Removed compressed file: {download_filename}")
 97 |         except Exception as e:
 98 |             logger.warning(f"Could not remove compressed file {download_filename}: {e}")
 99 |     
100 |     logger.info(f"Model weights ready at: {target_filepath}")
101 |     return target_filepath
102 | 
103 | 
104 | def _extract_compressed_file(
105 |     compressed_filepath: Path,
106 |     target_filepath: Path,
107 |     compression_format: str
108 | ) -> None:
109 |     """
110 |     Extract a compressed file to the target location.
111 |     
112 |     Args:
113 |         compressed_filepath: Path to the compressed file
114 |         target_filepath: Path where extracted file should be saved
115 |         compression_format: Type of compression ('bz2')
116 |         
117 |     Raises:
118 |         WeightsDownloadError: If extraction fails
119 |     """
120 |     if compression_format.lower() == "bz2":
121 |         try:
122 |             with bz2.BZ2File(compressed_filepath, 'rb') as compressed_file:
123 |                 with open(target_filepath, 'wb') as target_file:
124 |                     chunk_size = 64 * 1024  # 64KB chunks
125 |                     while True:
126 |                         chunk = compressed_file.read(chunk_size)
127 |                         if not chunk:
128 |                             break
129 |                         target_file.write(chunk)
130 |             
131 |             logger.info(f"✓ Successfully extracted {compressed_filepath.name} to {target_filepath.name}")
132 |             
133 |         except Exception as e:
134 |             error_msg = f"Failed to extract {compressed_filepath}: {e}"
135 |             logger.error(error_msg)
136 |             raise WeightsDownloadError(error_msg) from e


--------------------------------------------------------------------------------
/visionface/db/qdrant/data_manager.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import uuid
  3 | from typing import List, Dict, Union, Optional
  4 | import numpy as np
  5 | from qdrant_client.http.models import PointStruct, models
  6 | 
  7 | logger = logging.getLogger(__name__)
  8 | logging.basicConfig(level=logging.INFO)
  9 | 
 10 | class DataManager:
 11 |     """Manages data operations for Qdrant"""
 12 | 
 13 |     def __init__(self, client):
 14 |         self.client = client
 15 |     
 16 |     def insert_embeddings(self, 
 17 |                          collection_name: str,
 18 |                          embeddings: List[List[float]],
 19 |                          payloads: Optional[List[Dict]] = None,
 20 |                          ids: Optional[List[Union[str, int]]] = None,
 21 |                          batch_size: int = 100) -> bool:
 22 |         """
 23 |         Insert embeddings with optional payloads
 24 |         
 25 |         Args:
 26 |             collection_name: Target collection
 27 |             embeddings: List of embedding vectors
 28 |             payloads: Optional metadata for each embedding
 29 |             ids: Optional custom IDs (auto-generated if None)
 30 |             batch_size: Batch size for insertion
 31 |             
 32 |         Returns:
 33 |             bool: Success status
 34 |         """
 35 |         try:
 36 |             if not embeddings:
 37 |                 logger.warning("No embeddings provided")
 38 |                 return False
 39 |             
 40 |             # Generate IDs if not provided
 41 |             if ids is None:
 42 |                 ids = [str(uuid.uuid4()) for _ in embeddings]
 43 |             
 44 |             # Ensure payloads list matches embeddings length
 45 |             if payloads is None:
 46 |                 payloads = [{}] * len(embeddings)
 47 |             elif len(payloads) != len(embeddings):
 48 |                 raise ValueError("Payloads length must match embeddings length")
 49 |             
 50 |             # Process in batches
 51 |             total_inserted = 0
 52 |             batch_size = len(embeddings) if len(embeddings)<=batch_size else batch_size
 53 | 
 54 |             for i in range(0, len(embeddings), batch_size):
 55 |                 batch_embeddings = embeddings[i:i+batch_size]
 56 |                 batch_payloads = payloads[i:i+batch_size]
 57 |                 batch_ids = ids[i:i+batch_size]
 58 |                 
 59 |                 points = [
 60 |                     PointStruct(
 61 |                         id=point_id,
 62 |                         vector=embedding,
 63 |                         payload=payload
 64 |                     )
 65 |                     for point_id, embedding, payload in zip(batch_ids, batch_embeddings, batch_payloads)
 66 |                 ]
 67 |                 
 68 |                 self.client.upsert(
 69 |                     collection_name=collection_name,
 70 |                     points=points
 71 |                 )
 72 |                 
 73 |                 total_inserted += len(points)            
 74 |             logger.info(f"Successfully inserted {total_inserted} embeddings into '{collection_name}' ✅")
 75 |             return True
 76 |             
 77 |         except Exception as e:
 78 |             logger.error(f"Failed to insert embeddings: {e}")
 79 |             raise ValueError(f"Insertion failed: {e}")
 80 |     
 81 |     def delete_embeddings(self,
 82 |                          collection_name: str,
 83 |                          ids: Optional[List[Union[str, int]]] = None) -> bool:
 84 |         """
 85 |         Delete embeddings by IDs or filter conditions
 86 |         
 87 |         Args:
 88 |             collection_name: Target collection
 89 |             ids: Specific IDs to delete
 90 |             filter_conditions: Filter conditions for deletion
 91 |             
 92 |         Returns:
 93 |             bool: Success status
 94 |         """
 95 |         try:
 96 |             if ids:
 97 |                 # Delete by IDs
 98 |                 self.client.delete(
 99 |                     collection_name=collection_name,
100 |                     points_selector=models.PointIdsList(points=ids)
101 |                 )
102 |                 logger.info(f"Deleted {len(ids)} points by ID")
103 |             else:
104 |                 raise ValueError("Either ids must be provided for removing embeddings")
105 |             
106 |             return True
107 |             
108 |         except Exception as e:
109 |             logger.error(f"Failed to delete embeddings: {e}")
110 |             raise ValueError(f"Deletion failed: {e}")
111 |     
112 | 
113 |     def get_points(self, 
114 |                    collection_name: str,
115 |                    ids: List[Union[str, int]],
116 |                    with_payload: bool = True,
117 |                    with_vectors: bool = False) -> List[Dict]:
118 |         """Retrieve specific points by ID"""
119 |         try:
120 |             points = self.client.retrieve(
121 |                 collection_name=collection_name,
122 |                 ids=ids,
123 |                 with_payload=with_payload,
124 |                 with_vectors=with_vectors
125 |             )
126 |             
127 |             return [
128 |                 {
129 |                     "id": point.id,
130 |                     "payload": point.payload if with_payload else None,
131 |                     "vector": point.vector if with_vectors else None
132 |                 }
133 |                 for point in points
134 |             ]
135 |         except Exception as e:
136 |             logger.error(f"Failed to retrieve points: {e}")
137 |             return []


--------------------------------------------------------------------------------
/visionface/models/face_embedding/ArcFace.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | 
  6 | import os
  7 | 
  8 | # VisionFace modules
  9 | from visionface.models.FaceEmbedding import FaceEmbedder
 10 | from visionface.commons.download_files import download_model_weights
 11 | 
 12 | 
 13 | ARCFACE_WEIGHTS_18 = "https://download.pytorch.org/models/resnet18-5c106cde.pth" 
 14 | 
 15 | class ArcFace18(FaceEmbedder):
 16 |     """
 17 |     ArcFace 18 model class
 18 |     """
 19 |     def __init__(self):
 20 |         super().__init__()
 21 |         self.model = ResNetFace18(block=IRBlock, 
 22 |                                   layers=[2, 2, 2, 2], 
 23 |                                   use_se=True, 
 24 |                                   pretrained="resnet-18")
 25 |         self.model_name = "ArcFace-18"
 26 |         self.input_shape = (112, 112)
 27 |         self.output_shape = 512
 28 | 
 29 | 
 30 | class ResNetFace18(nn.Module):
 31 |     def __init__(self, block, layers: List, use_se: bool = True, pretrained: Optional[str] = None):
 32 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 33 |         self.inplanes = 64
 34 |         self.use_se = use_se
 35 |         super(ResNetFace18, self).__init__()
 36 |         self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1, bias=False)
 37 |         self.bn1 = nn.BatchNorm2d(64)
 38 |         self.prelu = nn.PReLU()
 39 |         self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
 40 |         self.layer1 = self._make_layer(block, 64, layers[0])
 41 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 42 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 43 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 44 |         self.bn4 = nn.BatchNorm2d(512)
 45 |         self.dropout = nn.Dropout()
 46 |         self.fc5 = nn.Linear(512 * 8 * 8, 512)
 47 |         self.bn5 = nn.BatchNorm1d(512)
 48 | 
 49 |         for m in self.modules():
 50 |             if isinstance(m, nn.Conv2d):
 51 |                 nn.init.xavier_normal_(m.weight)
 52 |             elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
 53 |                 nn.init.constant_(m.weight, 1)
 54 |                 nn.init.constant_(m.bias, 0)
 55 |             elif isinstance(m, nn.Linear):
 56 |                 nn.init.xavier_normal_(m.weight)
 57 |                 nn.init.constant_(m.bias, 0)
 58 | 
 59 |     def _make_layer(self, block, planes, blocks, stride=1):
 60 |         downsample = None
 61 |         if stride != 1 or self.inplanes != planes * block.expansion:
 62 |             downsample = nn.Sequential(
 63 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
 64 |                           kernel_size=1, stride=stride, bias=False),
 65 |                 nn.BatchNorm2d(planes * block.expansion),
 66 |             )
 67 |         layers = []
 68 |         layers.append(block(self.inplanes, planes, stride, downsample, use_se=self.use_se))
 69 |         self.inplanes = planes
 70 |         for i in range(1, blocks):
 71 |             layers.append(block(self.inplanes, planes, use_se=self.use_se))
 72 | 
 73 |         return nn.Sequential(*layers)
 74 | 
 75 |     def forward(self, x, normalize_embeddings=True):
 76 |         x = self.conv1(x)
 77 |         x = self.bn1(x)
 78 |         x = self.prelu(x)
 79 |         x = self.maxpool(x)
 80 | 
 81 |         x = self.layer1(x)
 82 |         x = self.layer2(x)
 83 |         x = self.layer3(x)
 84 |         x = self.layer4(x)
 85 |         x = self.bn4(x)
 86 |         x = self.dropout(x)
 87 |         x = x.view(x.size(0), -1)
 88 |         x = self.fc5(x)
 89 |         x = self.bn5(x)
 90 | 
 91 |         return x
 92 | 
 93 | 
 94 | 
 95 | class IRBlock(nn.Module):
 96 |     expansion = 1
 97 |     def __init__(self, inplanes, planes, stride=1, downsample=None, use_se=True):
 98 |         super(IRBlock, self).__init__()
 99 |         self.bn0 = nn.BatchNorm2d(inplanes)
100 |         self.conv1 = self._conv3x3(inplanes, inplanes)
101 |         self.bn1 = nn.BatchNorm2d(inplanes)
102 |         self.prelu = nn.PReLU()
103 |         self.conv2 = self._conv3x3(inplanes, planes, stride)
104 |         self.bn2 = nn.BatchNorm2d(planes)
105 |         self.downsample = downsample
106 |         self.stride = stride
107 |         self.use_se = use_se
108 |         if self.use_se:
109 |             self.se = SEBlock(planes)
110 | 
111 |     def _conv3x3(self, in_planes, out_planes, stride=1):
112 |         """3x3 convolution with padding"""
113 |         return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
114 |                         padding=1, bias=False)
115 |     
116 |     def forward(self, x):
117 |         residual = x
118 |         out = self.bn0(x)
119 |         out = self.conv1(out)
120 |         out = self.bn1(out)
121 |         out = self.prelu(out)
122 | 
123 |         out = self.conv2(out)
124 |         out = self.bn2(out)
125 |         if self.use_se:
126 |             out = self.se(out)
127 | 
128 |         if self.downsample is not None:
129 |             residual = self.downsample(x)
130 | 
131 |         out += residual
132 |         out = self.prelu(out)
133 | 
134 |         return out
135 | 
136 | 
137 | class SEBlock(nn.Module):
138 |     def __init__(self, channel, reduction=16):
139 |         super(SEBlock, self).__init__()
140 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
141 |         self.fc = nn.Sequential(
142 |                 nn.Linear(channel, channel // reduction),
143 |                 nn.PReLU(),
144 |                 nn.Linear(channel // reduction, channel),
145 |                 nn.Sigmoid()
146 |         )
147 | 
148 |     def forward(self, x):
149 |         b, c, _, _ = x.size()
150 |         y = self.avg_pool(x).view(b, c)
151 |         y = self.fc(y).view(b, c, 1, 1)
152 |         return x * y
153 | 
154 | 
155 | 


--------------------------------------------------------------------------------
/visionface/models/face_detection/OpenCV.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import logging
  4 | from typing import List
  5 | 
  6 | # VisionFace modules
  7 | from visionface.models.Detector import Detector, DetectedFace
  8 | from visionface.commons.download_files import download_model_weights
  9 | from visionface.commons.image_utils import get_cropped_face
 10 | 
 11 | logging.basicConfig(level=logging.INFO)
 12 | 
 13 | 
 14 | FILE_NAMES = [
 15 |     "opencv_deploy.prototxt",
 16 |     "opencv_res10_300x300_ssd_iter_140000.caffemodel",
 17 | ]
 18 | 
 19 | FILE_URLS = [
 20 |     "https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt",
 21 |     "https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel",
 22 | ]
 23 | 
 24 | 
 25 | class OpenCVDetector(Detector):
 26 |     def __init__(self):
 27 |         """
 28 |         Initializes the OpenCV face detector using a pre-trained Caffe model.
 29 |         """
 30 |         super().__init__()
 31 |         self.input_size = (300, 300)  # Standard input size for the model    
 32 |         self.model = self.build_model()
 33 | 
 34 |     def build_model(self) -> cv2.dnn_Net:
 35 |         """
 36 |         Downloads model files and loads the OpenCV DNN face detector.
 37 | 
 38 |         Returns:
 39 |             cv2.dnn_Net: The loaded OpenCV DNN model.
 40 |         """
 41 |         prototxt_name = FILE_NAMES[0]
 42 |         prototxt_url = FILE_URLS[0]
 43 |         weights_name = FILE_NAMES[1]
 44 |         weights_url = FILE_URLS[1]
 45 | 
 46 |         prototxt_path = download_model_weights(
 47 |             filename=prototxt_name,
 48 |             download_url=prototxt_url
 49 |         )
 50 |         weights_path = download_model_weights(
 51 |             filename=weights_name,
 52 |             download_url=weights_url
 53 |         )
 54 |         # Load OpenCV DNN model
 55 |         model = cv2.dnn.readNetFromCaffe(prototxt_path, weights_path)
 56 |         # Set backend and target for better performance
 57 |         model.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
 58 |         model.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)
 59 |         return model
 60 | 
 61 |     def _detect_one(self, img: np.ndarray) -> np.ndarray:
 62 |         """
 63 |         Detects faces in a single image using the loaded OpenCV DNN model.
 64 | 
 65 |         Args:
 66 |             img (np.ndarray): Input image in BGR format.
 67 | 
 68 |         Returns:
 69 |             np.ndarray: Raw detection output from the model.
 70 |         """
 71 |         blob = cv2.dnn.blobFromImage(
 72 |             img, 
 73 |             scalefactor=1.0,
 74 |             size=self.input_size,
 75 |             mean=(104.0, 177.0, 123.0)
 76 |             )
 77 |         self.model.setInput(blob)
 78 |         return self.model.forward()
 79 |     
 80 |     def detect_faces(
 81 |         self, 
 82 |         imgs: List[np.ndarray], 
 83 |         return_cropped_faces: bool = True
 84 |     ) -> List[List[DetectedFace]]:
 85 |         """
 86 |         Detect faces in one or more input images using the Opencv model.
 87 | 
 88 |         Parameters:
 89 |             imgs (List[np.ndarray]): 
 90 |                 A single image or a list of images in BGR format.
 91 |             return_cropped_faces : bool, optional
 92 |                 Whether to include cropped face images in each DetectedFace object. Default is True.
 93 | 
 94 |         Returns:
 95 |             List[List[DetectedFace]]: 
 96 |                 A list where each element is a list of DetectedFace objects corresponding to one input image.
 97 |                 Each DetectedFace includes the bounding box coordinates, confidence score, class name,
 98 |                 and the cropped face region.
 99 |         """        
100 |         results = [self._detect_one(img) for img in imgs]
101 |         return self.process_faces(imgs, results, return_cropped_faces)
102 | 
103 |     def process_faces(
104 |         self, 
105 |         imgs: List[np.ndarray], 
106 |         results: List[np.ndarray], 
107 |         return_cropped_faces: bool
108 |     ) -> List[List[DetectedFace]]:
109 |         """
110 |         Converts raw model outputs into structured DetectedFace objects.
111 | 
112 |         Args:
113 |             imgs (List[np.ndarray]): List of original images.
114 |             results (List[np.ndarray]): List of raw model outputs per image.
115 |             return_cropped_faces: bool
116 |                 Whether to include cropped face images in each DetectedFace object.
117 | 
118 |         Returns:
119 |             List[List[DetectedFace]]: List of detections for each image.
120 |         """
121 | 
122 |         detections = [] 
123 | 
124 |         for idx, result in enumerate(results):
125 |             img = imgs[idx]
126 |             h, w = img.shape[:2]
127 |             current_detections = []
128 |             face_no = 0
129 |             for i in range(result.shape[2]):
130 |                 confidence = result[0, 0, i, 2]
131 |                 if confidence > self.conf:
132 |                     # Get bounding box coordinates
133 |                     box = result[0, 0, i, 3:7] * np.array([w, h, w, h])
134 |                     x1, y1, x2, y2 = box.astype(int)
135 |                     x1, y1 = max(0, x1),  max(0, y1)
136 |                     x2, y2 = min(w, x2),  min(h, y2)
137 |                     cropped_face = get_cropped_face(img, [x1, y1, x2, y2]) if return_cropped_faces else None
138 |                     
139 |                     facial_info = DetectedFace(
140 |                         xmin=x1,
141 |                         ymin=y1,
142 |                         xmax=x2,
143 |                         ymax=y2,
144 |                         conf=round(confidence, 2),
145 |                         class_name="face",
146 |                         cropped_face=cropped_face
147 |                     )
148 |                     current_detections.append(facial_info)
149 |                     face_no +=1
150 | 
151 |             if not len(current_detections):
152 |                 current_detections = DetectedFace(xmin=0, ymin=0, xmax=0, ymax=0, conf=0)
153 |         
154 |             logging.info(
155 |                 f"[OpenCVDetector] {face_no} face(s) detected in image id: {idx}, "
156 |                 f"min confidence threshold  0.25."
157 |             )
158 | 
159 |             detections.append(current_detections)
160 | 
161 |         return detections


--------------------------------------------------------------------------------
/visionface/models/face_detection/YOLO.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import logging
  3 | from typing import List, Any, Union
  4 | from enum import Enum
  5 | 
  6 | # VisionFace modules
  7 | from visionface.models.Detector import Detector, DetectedFace
  8 | from visionface.commons.image_utils import get_cropped_face
  9 | from visionface.commons.download_files import download_model_weights
 10 | 
 11 | logging.basicConfig(level=logging.INFO)
 12 | 
 13 | class YOLOModel(Enum):
 14 |     """Enum for YOLO model types."""
 15 |     NANO = 0
 16 |     SMALL = 1
 17 |     MEDIUM = 2
 18 |     LARGE = 3
 19 | 
 20 | WEIGHT_NAMES = [
 21 |     "yolov12n-face.pt",
 22 |     "yolov12s-face.pt",
 23 |     "yolov12m-face.pt",
 24 |     "yolov12l-face.pt",
 25 | ]
 26 | 
 27 | WEIGHT_URLS = [
 28 |     "https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov12n-face.pt",
 29 |     "https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov12s-face.pt",
 30 |     "https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov12m-face.pt",
 31 |     "https://github.com/akanametov/yolo-face/releases/download/v0.0.0/yolov12l-face.pt",
 32 | ]
 33 | 
 34 | 
 35 | class YOLODetector(Detector):
 36 |     """
 37 |     References:
 38 |         YOLO Face Detection: https://github.com/akanametov/yolo-face
 39 |     """
 40 |     def __init__(self, model: YOLOModel = YOLOModel.SMALL):
 41 |         """
 42 |         Initialize the YOLO Detector.
 43 |         """
 44 |         import torch
 45 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 46 |         self.model = self.build_model(model)
 47 | 
 48 |     def build_model(self, model: YOLOModel):
 49 |         try:
 50 |             from ultralytics import YOLO
 51 |         except ModuleNotFoundError as error:
 52 |             raise ImportError(
 53 |                 "The 'ultralytics' library is not installed. "
 54 |                 "It is required for YOLOEyeDetector to work. "
 55 |                 "Please install it using: pip install ultralytics"
 56 |             ) from error
 57 |         
 58 |         # Get the weight file (and download if necessary)
 59 |         model_id = model.value
 60 |         model_name = WEIGHT_NAMES[model_id]
 61 |         weight_url = WEIGHT_URLS[model_id]
 62 |         model_path = download_model_weights(
 63 |             filename=model_name,
 64 |             download_url=weight_url
 65 |         )
 66 |         # Load the YOLO face model
 67 |         return YOLO(model_path)
 68 | 
 69 |     def detect_faces(
 70 |         self, 
 71 |         imgs: List[np.ndarray], 
 72 |         return_cropped_faces: bool = True
 73 |     ) -> List[List[DetectedFace]]:
 74 |         """
 75 |         Detect faces in one or more input images using the MediaPipe model.
 76 | 
 77 |         Parameters:
 78 |             imgs (List[np.ndarray]): 
 79 |                 A single image or a list of images in BGR format.
 80 | 
 81 |             return_cropped_faces : bool, optional
 82 |                 Whether to include cropped face images in each DetectedFace object. Default is True.
 83 | 
 84 |         Returns:
 85 |             List[List[DetectedFace]]: 
 86 |                 A list where each element is a list of DetectedFace objects corresponding to one input image.
 87 |                 Each DetectedFace includes the bounding box coordinates, confidence score, class name,
 88 |                 and the cropped face region.
 89 |         """
 90 |         results = self.model.predict(
 91 |             imgs,
 92 |             verbose=False,
 93 |             show=False, 
 94 |             device=self.device
 95 |         )
 96 |         return self.process_faces(imgs, results, return_cropped_faces)
 97 | 
 98 |     def process_faces(
 99 |         self, 
100 |         imgs: List[np.ndarray], 
101 |         results: Any, 
102 |         return_cropped_faces: bool
103 |     ) -> List[List[DetectedFace]]:
104 |         """
105 |         Process YOLO detection results and convert them into DetectedFace objects.
106 | 
107 |         Parameters
108 |         ----------
109 |         imgs : List[np.ndarray]
110 |             A single image or a list of images (NumPy arrays).
111 |         return_cropped_faces : bool
112 |                 Whether to include cropped face images in each DetectedFace object.
113 | 
114 |         results : List[ultralytics.engine.results.Results]
115 |             A list of YOLO detection results, one for each input image.
116 | 
117 |         Returns
118 |         -------
119 |         List[List[DetectedFace]]
120 |             A list where each element is a list of DetectedFace objects corresponding to one input image.
121 |             Each DetectedFace includes the bounding box coordinates, confidence score, class name,
122 |             and the cropped face region.
123 |         """
124 | 
125 |         detections = []
126 | 
127 |         for idx, result in enumerate(results):
128 |             
129 |             if result.boxes is None:
130 |                 continue
131 | 
132 |             current_detections  = []
133 |             bboxes = result.boxes.xyxy.cpu().numpy().astype(int).tolist()
134 |             confidences = result.boxes.conf.cpu().numpy().tolist()
135 |             img = imgs[idx]
136 | 
137 |             for bbox, conf in zip(bboxes, confidences):
138 |                 cropped_face = get_cropped_face(img, bbox) if return_cropped_faces else None
139 |                 facial_info = DetectedFace(
140 |                     xmin=bbox[0],
141 |                     ymin=bbox[1],
142 |                     xmax=bbox[2],
143 |                     ymax=bbox[3],
144 |                     conf=round(conf, 2),
145 |                     class_name="face",
146 |                     cropped_face=cropped_face
147 |                 )
148 |                 current_detections.append(facial_info)
149 |             
150 |             logging.info(
151 |                 f"[YOLODetector] {len(current_detections)} face(s) detected in image id: {idx}, "
152 |                 f"min confidence threshold  0.25."
153 |             )
154 |             
155 |             detections.append(current_detections)
156 | 
157 |         return detections
158 | 
159 | 
160 | class YOLONanoDetector(YOLODetector):
161 |     """YOLO Nano detector implementation"""
162 |     def __init__(self):
163 |         super().__init__(model=YOLOModel.NANO)
164 | 
165 | class YOLOSmallDetector(YOLODetector):
166 |     """YOLO Small detector implementation"""
167 |     def __init__(self):
168 |         super().__init__(model=YOLOModel.SMALL)
169 | 
170 | class YOLOMediumDetector(YOLODetector):
171 |     """YOLO Medium detector implementation"""
172 |     def __init__(self):
173 |         super().__init__(model=YOLOModel.MEDIUM)
174 | 
175 | class YOLOLargeDetector(YOLODetector):
176 |     """YOLO Large detector implementation"""
177 |     def __init__(self):
178 |         super().__init__(model=YOLOModel.LARGE)


--------------------------------------------------------------------------------
/visionface/models/face_detection/MediaPipe.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import logging
  3 | from typing import List, Any, Union
  4 | 
  5 | # VisionFace modules
  6 | from visionface.models.Detector import Detector, DetectedFace
  7 | from visionface.commons.utils import xywh2xyxy
  8 | from visionface.commons.image_utils import get_cropped_face
  9 | 
 10 | logging.basicConfig(level=logging.INFO)
 11 | 
 12 | 
 13 | class MediaPipeDetector(Detector):
 14 |     """
 15 |     References:
 16 |         MediaPipe Face Detection: https://github.com/google-ai-edge/mediapipe/blob/master/docs/solutions/face_detection.md
 17 |     """
 18 |     def __init__(self, MODEL_ID: int = 1, MIN_CONFIDENCE: float = 0.5):
 19 |         """
 20 |         Initialize the MediaPipeDetector.
 21 | 
 22 |         Parameters:
 23 |             model_id: int, default=1
 24 |                 The MediaPipe face detection model to use:
 25 |                 - 0: Short-range model (optimized for faces within 2 meters)
 26 |                 - 1: Full-range model (optimized for faces within 5 meters)
 27 |             
 28 |             min_confidence: float, default=0.5
 29 |                 Minimum confidence threshold (0.0 to 1.0) for face detection.
 30 |                 Detections below this threshold will be filtered out.
 31 |         """
 32 |         if MODEL_ID not in (0, 1):
 33 |             raise ValueError(f"Invalid MODEL_ID: {MODEL_ID}. MediaPipe only 0 (short-range) or 1 (full-range) are supported.")
 34 |         
 35 |         super().__init__(MODEL_ID, MIN_CONFIDENCE)
 36 |         self.model = self.build_model()
 37 | 
 38 |     def build_model(self) -> Any:
 39 |         """
 40 |         Build and initialize the MediaPipe face detection model.
 41 |         
 42 |         Returns:
 43 |             An instance of MediaPipe's FaceDetection model.
 44 |         
 45 |         Raises:
 46 |             ImportError: If the 'mediapipe' library is not installed.
 47 |         """
 48 |         try:
 49 |             import mediapipe as mp
 50 |         except ModuleNotFoundError as error:
 51 |             raise ImportError(
 52 |                 "The 'mediapipe' library is not installed. "
 53 |                 "It is required for MediaPipeDetector to work. "
 54 |                 "Please install it using: pip install mediapipe"
 55 |             ) from error
 56 |         
 57 |         mp_face_detection = mp.solutions.face_detection
 58 |         face_detection = mp_face_detection.FaceDetection(
 59 |             min_detection_confidence=self.conf,
 60 |             model_selection=self.model_id
 61 |         )
 62 |         return face_detection
 63 |     
 64 |     def _detect_one(
 65 |         self, 
 66 |         img_id: int, 
 67 |         img: np.ndarray, 
 68 |         return_cropped_faces: bool
 69 |     ) -> List[DetectedFace]:
 70 |         """
 71 |         Detect faces in a single image using the MediaPipe model.
 72 | 
 73 |         Parameters:
 74 |             img_id (int): id for the image
 75 |             img (np.ndarray): The input image in BGR format
 76 |             return_cropped_faces(bool): cropped face images in each DetectedFace object.
 77 | 
 78 |         Returns:
 79 |             List[DetectedFace]: A list of DetectedFace objects.
 80 |         """
 81 |         # img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 82 |         h, w = img.shape[:2]
 83 |         results = self.model.process(img)
 84 |         if results.detections is None:
 85 |             return []
 86 |         return self.process_faces(img, results, w, h, img_id, return_cropped_faces)
 87 | 
 88 |     def detect_faces(
 89 |         self, 
 90 |         imgs: List[np.ndarray], 
 91 |         return_cropped_faces: bool = True
 92 |     ) -> List[List[DetectedFace]]:
 93 |         """
 94 |         Detect faces in one or more input images using the MediaPipe model.
 95 | 
 96 |         Parameters:
 97 |             imgs: List[np.ndarray]: 
 98 |                 A single image or a list of images in BGR format.
 99 | 
100 |             return_cropped_faces : bool, optional
101 |                 Whether to include cropped face images in each DetectedFace object. Default is True.
102 |         
103 |         Returns:
104 |             List[List[DetectedFace]]: 
105 |                 A list where each element is a list of DetectedFace objects for the corresponding input image.
106 |         """
107 |         # Run face detection on each image  
108 |         detections = [self._detect_one(img_id, img, return_cropped_faces) for img_id, img in enumerate(imgs)]
109 |         return detections
110 | 
111 |     def process_faces(
112 |         self, 
113 |         img: np.ndarray, 
114 |         results: Any, 
115 |         img_width: int, 
116 |         img_height: int, 
117 |         img_id: int, 
118 |         return_cropped_faces: bool
119 |     ) -> List[DetectedFace]:
120 |         """
121 |         Process the raw detection results from MediaPipe into DetectedFace objects.
122 |         
123 |         Parameters:
124 |             img (np.ndarray): 
125 |                 The input image in BGR or RGB format.
126 |             results: Any
127 |                 Detection results from the MediaPipe model's process.
128 |             img_width: int
129 |                 Width of the image in pixels.
130 |             img_height: int
131 |                 Height of the image in pixels.
132 |             return_cropped_faces : bool
133 |                 Whether to include cropped face images in each DetectedFace object.
134 |                 
135 |         Returns:
136 |             List[DetectedFace]
137 |                 A list of DetectedFace objects with face coordinates
138 |                 and confidence scores for each detected face.
139 |         """
140 |         
141 |         detections = []
142 | 
143 |         for detection in results.detections:
144 |             (confidence,) = detection.score
145 |             bounding_box = detection.location_data.relative_bounding_box
146 |             
147 |             # Convert relative coordinates to absolute pixel coordinates
148 |             x = int(bounding_box.xmin * img_width)
149 |             w = int(bounding_box.width * img_width)
150 |             y = int(bounding_box.ymin * img_height)
151 |             h = int(bounding_box.height * img_height)
152 |             
153 |             # Convert xywh format to xyxy
154 |             bbox = xywh2xyxy([x, y, w, h])
155 |             cropped_face = get_cropped_face(img, bbox) if return_cropped_faces else None
156 | 
157 |             facial_info = DetectedFace(
158 |                 xmin=bbox[0],
159 |                 ymin=bbox[1],
160 |                 xmax=bbox[2],
161 |                 ymax=bbox[3],
162 |                 conf=round(confidence, 2),
163 |                 class_name="face",
164 |                 cropped_face=cropped_face
165 |             )
166 |             detections.append(facial_info)
167 |             
168 |         logging.info(
169 |             f"[MediaPipeDetector] {len(detections)} face(s) detected in image id: {img_id}, "
170 |             f"min confidence threshold {self.conf:.2f}."
171 |         )
172 | 
173 |         return detections


--------------------------------------------------------------------------------
/visionface/db/qdrant/collection_manager.py:
--------------------------------------------------------------------------------
  1 | import logging 
  2 | 
  3 | from typing import Any, Dict, List, Optional
  4 | from qdrant_client.http.models import (
  5 |     VectorParams, HnswConfigDiff, OptimizersConfigDiff,
  6 |     ScalarQuantization, ProductQuantization, BinaryQuantization,
  7 | )
  8 | from qdrant_client.http.models import Distance
  9 | from visionface.db.qdrant.config import CollectionConfig
 10 | 
 11 | logger = logging.getLogger(__name__)
 12 | logging.basicConfig(level=logging.INFO)
 13 | 
 14 | class CollectionManager:
 15 |     """Manages collection operations for Qdrant"""
 16 |     
 17 |     def __init__(self, client):
 18 |         self.client = client
 19 | 
 20 |     def create_collection(self, config: CollectionConfig) -> bool:
 21 |         """
 22 |         Create a new collection with configuration
 23 |         
 24 |         Args:
 25 |             config: Collection configuration
 26 |             
 27 |         Returns:
 28 |             bool: Success status
 29 |         """
 30 |         try:
 31 |             # Validate configuration
 32 |             config.validate()
 33 | 
 34 |             # Build vector params
 35 |             vectors_config = VectorParams(
 36 |                 size=config.vector_size,
 37 |                 distance=Distance.COSINE
 38 |             )
 39 |             
 40 |             # Build HNSW config
 41 |             hnsw_config = None
 42 |             if config.hnsw_config:
 43 |                 hnsw_config = HnswConfigDiff(**config.hnsw_config)
 44 |             
 45 |             # Build optimizer config
 46 |             optimizer_config = None
 47 |             if config.optimizer_config:
 48 |                 optimizer_config = OptimizersConfigDiff(**config.optimizer_config)
 49 |             
 50 |             # Build quantization config
 51 |             quantization_config = self._build_quantization_config(config.quantization_config)
 52 |             
 53 |             # Create collection
 54 |             self.client.create_collection(
 55 |                 collection_name=config.name,
 56 |                 vectors_config=vectors_config,
 57 |                 hnsw_config=hnsw_config,
 58 |                 optimizers_config=optimizer_config,
 59 |                 quantization_config=quantization_config,
 60 |                 replication_factor=config.replication_factor,
 61 |                 write_consistency_factor=config.write_consistency_factor
 62 |             )
 63 |             
 64 |             logger.info(f"Collection '{config.name}' created successfully ✅")
 65 |             return True
 66 |             
 67 |         except Exception as e:
 68 |             logger.error(f"Failed to create collection '{config.name}': {e}")
 69 |             if "Connection refused" in str(e):
 70 |                 logger.error(
 71 |                     "Qdrant connection was refused. Make sure the Qdrant server is running.\n"
 72 |                     "To start it with Docker, run Qdrant server locally with docker:\n"
 73 |                     "docker run -d -p 6333:6333 qdrant/qdrant:latest",
 74 |                     "See more launch options in, https://github.com/qdrant/qdrant#usage"
 75 |                 )
 76 | 
 77 |             raise ValueError(f"Collection creation failed: {e}")
 78 |     
 79 |     def _build_quantization_config(self, quantization_config: Optional[Dict]) -> Optional[Any]:
 80 |         """Build quantization configuration"""
 81 |         if not quantization_config:
 82 |             return None
 83 |         
 84 |         quant_type = quantization_config.get("type", "scalar")
 85 |         
 86 |         if quant_type == "scalar":
 87 |             return ScalarQuantization(scalar=quantization_config)
 88 |         elif quant_type == "product":
 89 |             return ProductQuantization(product=quantization_config)
 90 |         elif quant_type == "binary":
 91 |             return BinaryQuantization(binary=quantization_config)
 92 |         else:
 93 |             raise ValueError(f"Unknown quantization type: {quant_type}")
 94 |     
 95 |     def get_collection_info(self, collection_name: str) -> Optional[Dict]:
 96 |         """Get detailed collection information"""
 97 |         try:
 98 |             info = self.client.get_collection(collection_name)
 99 |             return {
100 |                 "name": collection_name,
101 |                 "status": info.status,
102 |                 "optimizer_status": info.optimizer_status,
103 |                 "vectors_count": info.vectors_count,
104 |                 "indexed_vectors_count": info.indexed_vectors_count,
105 |                 "points_count": info.points_count,
106 |                 "segments_count": info.segments_count,
107 |                 "config": {
108 |                     "params": info.config.params.__dict__ if info.config.params else None,
109 |                     "hnsw_config": info.config.hnsw_config.__dict__ if info.config.hnsw_config else None,
110 |                     "optimizer_config": info.config.optimizer_config.__dict__ if info.config.optimizer_config else None,
111 |                     "quantization_config": str(info.config.quantization_config) if info.config.quantization_config else None
112 |                 },
113 |                 "payload_schema": info.payload_schema
114 |             }
115 |         except Exception as e:
116 |             logger.error(f"Failed to get collection info: {e}")
117 |             raise ValueError(f"Collection '{collection_name}' not found")
118 |     
119 |     def list_collections(self) -> List[str]:
120 |         """List all collections"""
121 |         try:
122 |             collections = self.client.get_collections()
123 |             return [collection.name for collection in collections.collections]
124 |         except Exception as e:
125 |             logger.error(f"Failed to list collections: {e}")
126 |             return []
127 |     
128 |     def delete_collection(self, collection_name: str) -> bool:
129 |         """Delete a collection"""
130 |         try:
131 |             self.client.delete_collection(collection_name)
132 |             logger.info(f"Collection '{collection_name}' deleted")
133 |             return True
134 |         except Exception as e:
135 |             logger.error(f"Failed to delete collection '{collection_name}': {e}")
136 |             return False
137 |     
138 |     def collection_exists(self, collection_name: str) -> bool:
139 |         """Check if collection exists"""
140 |         try:
141 |             self.client.get_collection(collection_name)
142 |             return True
143 |         except:
144 |             return False
145 |     
146 |     def refresh_collection(self, collection_name: str) -> bool:
147 |         """Refresh collection (optimize indexes)"""
148 |         try:
149 |             self.client.update_collection(
150 |                 collection_name=collection_name,
151 |                 optimizer_config=OptimizersConfigDiff(
152 |                     indexing_threshold=20000
153 |                 )
154 |             )
155 |             logger.info(f"Collection '{collection_name}' refresh initiated")
156 |             return True
157 |         except Exception as e:
158 |             logger.error(f"Failed to refresh collection '{collection_name}': {e}")
159 |             return False
160 | 


--------------------------------------------------------------------------------
/visionface/models/face_detection/YOLOEye.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import logging
  4 | from typing import List, Any, Union
  5 | import cv2
  6 | from enum import Enum
  7 | 
  8 | # VisionFace modules 
  9 | from visionface.models.Detector import Detector, DetectedFace
 10 | from visionface.commons.download_files import download_model_weights
 11 | from visionface.commons.image_utils import get_cropped_face
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | class YOLOEModel(Enum):
 16 |     """Enum for YOLOE model types."""
 17 |     SMALL = 0
 18 |     MEDIUM = 1
 19 |     LARGE = 2
 20 | 
 21 | #Text/Visual Prompt models
 22 | WEIGHT_NAMES = [
 23 |     "yoloe-11s-seg.pt",
 24 |     "yoloe-11m-seg.pt",
 25 |     "yoloe-11l-seg.pt"
 26 | ]
 27 | 
 28 | WEIGHT_URLS = [
 29 |     "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11s-seg.pt",
 30 |     "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11m-seg.pt",
 31 |     "https://github.com/ultralytics/assets/releases/download/v8.3.0/yoloe-11l-seg.pt"
 32 | ]
 33 | 
 34 | class YOLOEyeDetector(Detector):
 35 |     """
 36 |     Reference: https://github.com/THU-MIG/yoloe
 37 |     """
 38 |     def __init__(self, model: YOLOEModel = YOLOEModel.MEDIUM):
 39 |         """
 40 |         Initialize the YOLOEyeDetector.
 41 |         """
 42 |         import torch
 43 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 44 |         self.model = self.build_model(model)
 45 | 
 46 |     def build_model(self, model: YOLOEModel):
 47 |         try:
 48 |             from ultralytics import YOLO
 49 |         except ModuleNotFoundError as error:
 50 |             raise ImportError(
 51 |                 "The 'ultralytics' library is not installed. "
 52 |                 "It is required for YOLOEyeDetector to work. "
 53 |                 "Please install it using: pip install ultralytics"
 54 |             ) from error
 55 |         
 56 |         # Get the weight file (and download if necessary)
 57 |         model_id = model.value
 58 |         model_name = WEIGHT_NAMES[model_id]
 59 |         weight_url = WEIGHT_URLS[model_id]
 60 |         model_path = download_model_weights(
 61 |             filename=model_name,
 62 |             download_url=weight_url
 63 |         )
 64 |         return YOLO(model_path)
 65 | 
 66 |     def detect_faces(self, imgs: List[np.ndarray], return_cropped_faces: bool = True) -> List[List[DetectedFace]]:
 67 |         """
 68 |         Detect faces in one or more input images using the YOLOe model.
 69 | 
 70 |         Parameters:
 71 |             imgs (List[np.ndarray]): 
 72 |                 A single image or a list of images in BGR format.
 73 |             
 74 |             return_cropped_faces : bool, optional
 75 |                 Whether to include cropped face images in each DetectedFace object. Default is True.
 76 | 
 77 |         Returns:
 78 |             List[List[DetectedFace]]: 
 79 |                 A list where each element is a list of DetectedFace objects corresponding to one input image.
 80 |                 Each DetectedFace includes the bounding box coordinates, confidence score, class name,
 81 |         """
 82 |         # By default, use a generic "face" prompt for detection
 83 |         prompt = "face"
 84 |         return self.detect_faces_with_prompt(imgs, prompt, return_cropped_faces)
 85 |     
 86 |     def _set_text_prompt(self, prompts: List[str]) -> None:
 87 |         """
 88 |         Set the text prompt for the YOLO World model.
 89 |         """
 90 |         self.model.set_classes(prompts, self.model.get_text_pe(prompts))
 91 | 
 92 |     def detect_faces_with_prompt(
 93 |         self, 
 94 |         imgs: List[np.ndarray],
 95 |         prompts: List[str],
 96 |         return_cropped_faces: bool = True
 97 |     ) -> List[List[DetectedFace]]:
 98 |         """
 99 |         Detect faces in the given image based on text prompt guidance.
100 |         
101 |         Args:
102 |             img (np.ndarray): Input image as a NumPy array (H, W, C).
103 |             prompt (Union[str, List[str]]): Either a single text prompt or a list of text prompts
104 |                                             describing the faces to detect.
105 |             return_cropped_faces : bool, optional
106 |                 Whether to include cropped face images in each DetectedFace object. Default is True.                      
107 |             
108 |         Returns:
109 |             List[DetectedFace]: A list of detected faces that match the prompt(s).
110 |         """
111 |         self._set_text_prompt(prompts)
112 |         results = self.model.predict(
113 |             imgs,
114 |             verbose=False,
115 |             show=False, 
116 |             device=self.device
117 |         )
118 |         return self.process_faces(imgs, results, return_cropped_faces)
119 | 
120 |     def detect_faces_with_visual(self, imgs: List[np.ndarray]) -> List[DetectedFace]:
121 |         pass
122 |     
123 |     def process_faces(
124 |         self, 
125 |         imgs: List[np.ndarray], 
126 |         results: List[Any],
127 |         return_cropped_faces: bool
128 |     ) -> List[List[DetectedFace]]:
129 |         """
130 |         Process the raw detections into a structured format.
131 |         """
132 | 
133 |         detections = []
134 | 
135 |         for idx, result in enumerate(results):
136 |             
137 |             current_detections = []
138 |             class_id = result.boxes.cls.cpu().numpy().astype(int)
139 |             class_names = np.array([result.names[i] for i in class_id])
140 |             bboxes = result.boxes.xyxy.cpu().numpy().astype(int)
141 |             confidence = result.boxes.conf.cpu().numpy()
142 |             img = imgs[idx]
143 | 
144 |             if not len(bboxes):
145 |                 detections.append(DetectedFace(xmin=0, ymin=0, xmax=0, ymax=0, conf=0))
146 |                 continue
147 | 
148 |             for bbox, conf, class_name in zip(bboxes, confidence, class_names):
149 |                 cropped_face = get_cropped_face(img, bbox) if return_cropped_faces else None
150 |                 facial_info = DetectedFace(
151 |                     xmin=bbox[0], 
152 |                     ymin=bbox[1], 
153 |                     xmax=bbox[2], 
154 |                     ymax=bbox[3], 
155 |                     conf=round(conf, 2),
156 |                     class_name=class_name,
157 |                     cropped_face=cropped_face
158 |                 )
159 |                 current_detections.append(facial_info)
160 |         
161 |             logging.info(
162 |                 f"{len(current_detections)} face(s) detected in image id: {idx},"
163 |             )
164 | 
165 |             detections.append(current_detections)
166 | 
167 |         return detections
168 | 
169 | 
170 | 
171 | class YOLOEyeSmallDetector(YOLOEyeDetector):
172 |     """YOLOEye Small detector implementation"""
173 |     def __init__(self):
174 |         super().__init__(model=YOLOEModel.SMALL)
175 | 
176 | class YOLOEyeMediumDetector(YOLOEyeDetector):
177 |     """YOLOEye Medium detector implementation"""
178 |     def __init__(self):
179 |         super().__init__(model=YOLOEModel.MEDIUM)
180 | 
181 | class YOLOEyeLargeDetector(YOLOEyeDetector):
182 |     """YOLOEye Large detector implementation"""
183 |     def __init__(self):
184 |         super().__init__(model=YOLOEModel.LARGE)
185 | 


--------------------------------------------------------------------------------
/visionface/models/face_detection/YOLOWolrd.py:
--------------------------------------------------------------------------------
  1 | from click import prompt
  2 | import numpy as np
  3 | import logging
  4 | from typing import List, Any, Union
  5 | from enum import Enum
  6 | 
  7 | # VisionFace modules
  8 | from visionface.models.Detector import Detector, DetectedFace
  9 | from visionface.commons.image_utils import get_cropped_face
 10 | from visionface.commons.download_files import download_model_weights
 11 | 
 12 | logging.basicConfig(level=logging.INFO)
 13 | 
 14 | class YOLOModel(Enum):
 15 |     """Enum for YOLO World model types."""
 16 |     SMALL = 0
 17 |     MEDIUM = 1
 18 |     LARGE = 2
 19 |     XLARGE = 3
 20 | 
 21 | WEIGHT_NAMES = [
 22 |     "yolov8s-world.pt",
 23 |     "yolov8m-world.pt",
 24 |     "yolov8l-world.pt",
 25 |     "yolov8x-world.pt",
 26 | ]
 27 | 
 28 | WEIGHT_URLS = [
 29 |     "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s-world.pt",
 30 |     "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m-world.pt",
 31 |     "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l-world.pt",
 32 |     "https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x-world.pt",
 33 | ]
 34 | 
 35 | 
 36 | class YOLOWolrdDetector(Detector):
 37 |     def __init__(self, model: YOLOModel = YOLOModel.MEDIUM):
 38 |         """
 39 |         Initialize the YOLO Detector.
 40 |         """
 41 |         import torch
 42 |         self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 43 |         self.model = self.build_model(model)
 44 | 
 45 |     def build_model(self, model: YOLOModel):
 46 |         try:
 47 |             from ultralytics import YOLOWorld
 48 |         except ModuleNotFoundError as error:
 49 |             raise ImportError(
 50 |                 "The 'ultralytics' library is not installed. "
 51 |                 "It is required for YOLOEyeDetector to work. "
 52 |                 "Please install it using: pip install ultralytics"
 53 |             ) from error
 54 |         
 55 |         # Get the weight file (and download if necessary)
 56 |         model_id = model.value
 57 |         model_name = WEIGHT_NAMES[model_id]
 58 |         weight_url = WEIGHT_URLS[model_id]
 59 |         model_path = download_model_weights(
 60 |             filename=model_name,
 61 |             download_url=weight_url
 62 |         )
 63 |         return YOLOWorld(model_path)
 64 | 
 65 |     def detect_faces(
 66 |         self, 
 67 |         imgs: List[np.ndarray],
 68 |         return_cropped_faces: bool = True
 69 |     ) -> List[List[DetectedFace]]:
 70 |         """
 71 |         Detect faces in one or more input images using the YOLO Wolrd model.
 72 | 
 73 |         Parameters:
 74 |             imgs (List[np.ndarray]): 
 75 |                 A single image or a list of images in BGR format.
 76 | 
 77 |             return_cropped_faces : bool, optional
 78 |                 Whether to include cropped face images in each DetectedFace object. Default is True.
 79 | 
 80 |         Returns:
 81 |             List[List[DetectedFace]]: 
 82 |                 A list where each element is a list of DetectedFace objects corresponding to one input image.
 83 |                 Each DetectedFace includes the bounding box coordinates, confidence score, class name,
 84 |         """
 85 |         # By default, use a generic "face" prompt for detection
 86 |         prompts = "face"
 87 |         return self.detect_faces_with_prompt(imgs, prompts, return_cropped_faces)
 88 | 
 89 |     def _set_text_prompt(self, prompts: List[str]) -> None:
 90 |         """
 91 |         Set the text prompt for the YOLO World model.
 92 |         """
 93 |         self.model.set_classes(prompts)
 94 | 
 95 |     def detect_faces_with_prompt(
 96 |         self, 
 97 |         imgs: List[np.ndarray],
 98 |         prompts: List[str],
 99 |         return_cropped_faces: bool = True
100 |     ) -> List[List[DetectedFace]]:
101 |         """
102 |         Detect faces in the given image based on text prompt guidance.
103 |         
104 |         Args:
105 |             img (np.ndarray): Input image as a NumPy array (H, W, C).
106 |             prompts (Union[str, List[str]]): Either a single text prompt or a list of text prompts
107 |                                             describing the faces to detect.
108 |             return_cropped_faces : bool, optional
109 |                 Whether to include cropped face images in each DetectedFace object. Default is True.
110 | 
111 |         Returns:
112 |             List[DetectedFace]: A list of detected faces that match the prompt(s).
113 |         """
114 |         self._set_text_prompt(prompts)
115 |         results = self.model.predict(
116 |             imgs,
117 |             verbose=False,
118 |             show=False, 
119 |             device=self.device
120 |         )
121 |         return self.process_faces(imgs, results, return_cropped_faces)
122 | 
123 | 
124 |     def detect_faces_with_visual(self, imgs: List[np.ndarray]) -> List[DetectedFace]:
125 |         pass
126 |     
127 |     def process_faces(
128 |         self, 
129 |         imgs: List[np.ndarray], 
130 |         results: List[Any], 
131 |         return_cropped_faces: bool
132 |     ) -> List[List[DetectedFace]]:
133 |         """
134 |         Process the raw detections into a structured format.
135 |         """
136 | 
137 |         detections = []
138 | 
139 |         for idx, result in enumerate(results):
140 |             current_detections = []
141 |             class_id = result.boxes.cls.cpu().numpy().astype(int)
142 |             class_names = np.array([result.names[i] for i in class_id])
143 |             bboxes = result.boxes.xyxy.cpu().numpy().astype(int)
144 |             confidence = result.boxes.conf.cpu().numpy()
145 |             img = imgs[idx]
146 | 
147 |             if not len(bboxes):
148 |                 detections.append(DetectedFace(xmin=0, ymin=0, xmax=0, ymax=0, conf=0))
149 |                 continue
150 | 
151 |             for bbox, conf, class_name in zip(bboxes, confidence, class_names):
152 |                 cropped_face = get_cropped_face(img, bbox) if return_cropped_faces else None
153 |                 facial_info = DetectedFace(
154 |                     xmin=bbox[0], 
155 |                     ymin=bbox[1], 
156 |                     xmax=bbox[2], 
157 |                     ymax=bbox[3], 
158 |                     conf=round(conf, 2),
159 |                     class_name=class_name,
160 |                     cropped_face=cropped_face
161 |                 )
162 |                 current_detections.append(facial_info)
163 |         
164 |             logging.info(
165 |                 f"{len(current_detections)} face(s) detected in image id: {idx},"
166 |             )
167 | 
168 |             detections.append(current_detections)
169 | 
170 |         return detections
171 | 
172 | 
173 | class YOLOWorldSmallDetector(YOLOWolrdDetector):
174 |     """YOLO Small detector implementation"""
175 |     def __init__(self):
176 |         super().__init__(model=YOLOModel.SMALL)
177 | 
178 | class YOLOWorldMediumDetector(YOLOWolrdDetector):
179 |     """YOLO Medium detector implementation"""
180 |     def __init__(self):
181 |         super().__init__(model=YOLOModel.MEDIUM)
182 | 
183 | class YOLOWorldLargeDetector(YOLOWolrdDetector):
184 |     """YOLO Large detector implementation"""
185 |     def __init__(self):
186 |         super().__init__(model=YOLOModel.LARGE)
187 | 
188 | class YOLOWorldXLargeDetector(YOLOWolrdDetector):
189 |     """YOLO XLarge detector implementation"""
190 |     def __init__(self):
191 |         super().__init__(model=YOLOModel.XLARGE)


--------------------------------------------------------------------------------
/visionface/commons/image_utils.py:
--------------------------------------------------------------------------------
  1 | # Part of this module is adapted from the DeepFace library
  2 | # Source: https://github.com/serengil/deepface/blob/master/deepface/commons/image_utils.py
  3 | # Original author: Alireza Makhzani and contributors
  4 | 
  5 | import os
  6 | from typing import Union, Tuple, IO, List
  7 | import numpy as np
  8 | import cv2
  9 | from pathlib import Path
 10 | import io
 11 | import base64
 12 | from PIL import Image
 13 | import requests
 14 | from torch.nn.functional import interpolate
 15 | 
 16 | from visionface.models.Detector import DetectedFace
 17 | 
 18 | 
 19 | def load_images(
 20 |     inputs: Union[str, np.ndarray, IO[bytes], List[Union[str, np.ndarray, IO[bytes]]]]
 21 | ) -> List[Tuple[np.ndarray, str]]:
 22 |     """
 23 |     Load one or more images from various sources.
 24 | 
 25 |     Args:
 26 |         inputs: A single image or a list of images. Each image can be:
 27 |             - A file path (str)
 28 |             - A URL (str)
 29 |             - A base64-encoded string (str)
 30 |             - A numpy array (np.ndarray)
 31 |             - A file-like object (IO[bytes])
 32 | 
 33 |     Returns:
 34 |         List[np.ndarray]: A list of loaded images in BGR format
 35 |     """
 36 |     if not isinstance(inputs, list):
 37 |         inputs = [inputs]
 38 | 
 39 |     loaded_images = []
 40 |     for item in inputs:
 41 |         if isinstance(item, list):
 42 |             for i in item:
 43 |                 if isinstance(i, DetectedFace):
 44 |                     loaded_images.append(i.cropped_face)
 45 |             continue
 46 |         elif isinstance(item, np.ndarray):
 47 |             loaded_images.append(item)
 48 |         elif hasattr(item, 'read') and callable(item.read):
 49 |             if isinstance(item, io.StringIO):
 50 |                 raise ValueError("Image requires bytes, not io.StringIO.")
 51 |             img_arr = load_image_from_io_object(item)
 52 |             loaded_images.append(img_arr)
 53 |         elif isinstance(item, Path):
 54 |             img_arr = _load_from_str(str(item))
 55 |             loaded_images.append(img_arr)
 56 |         elif isinstance(item, str):
 57 |             img_arr = _load_from_str(item)
 58 |             loaded_images.append(img_arr)
 59 |         else:
 60 |             raise ValueError(f"Unsupported input type: {type(item)}")
 61 |     return loaded_images
 62 | 
 63 | 
 64 | def _load_from_str(img: str) -> np.ndarray:
 65 |     if img.startswith("data:image/"):
 66 |         return load_image_from_base64(img)
 67 |     elif img.lower().startswith(("http://", "https://")):
 68 |         return load_image_from_web(url=img)
 69 |     elif not os.path.isfile(img):
 70 |         raise ValueError(f"{img} is not exists")
 71 |     elif not img.isascii():
 72 |         raise ValueError(f"Input image must not have non-English characters - {img}")
 73 |     else:
 74 |         img_obj_bgr = cv2.imread(img)
 75 |         return img_obj_bgr
 76 | 
 77 | 
 78 | def load_image_from_io_object(obj: IO[bytes]) -> np.ndarray:
 79 |     """
 80 |     Load image from an object that supports being read
 81 |     Args:
 82 |         obj: a file like object.
 83 |     Returns:
 84 |         img (np.ndarray): The decoded image as a numpy array (OpenCV format).
 85 |     """
 86 |     try:
 87 |         _ = obj.seek(0)
 88 |     except (AttributeError, TypeError, io.UnsupportedOperation):
 89 |         seekable = False
 90 |         obj = io.BytesIO(obj.read())
 91 |     else:
 92 |         seekable = True
 93 |     try:
 94 |         nparr = np.frombuffer(obj.read(), np.uint8)
 95 |         img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
 96 |         if img is None:
 97 |             raise ValueError("Failed to decode image")
 98 |         return img
 99 |     finally:
100 |         if not seekable:
101 |             obj.close()
102 | 
103 | 
104 | def load_image_from_io_object(obj: IO[bytes]) -> np.ndarray:
105 |     """
106 |     Load image from an object that supports being read
107 |     Args:
108 |         obj: a file like object.
109 |     Returns:
110 |         img (np.ndarray): The decoded image as a numpy array (OpenCV format).
111 |     """
112 |     try:
113 |         _ = obj.seek(0)
114 |     except (AttributeError, TypeError, io.UnsupportedOperation):
115 |         seekable = False
116 |         obj = io.BytesIO(obj.read())
117 |     else:
118 |         seekable = True
119 |     try:
120 |         nparr = np.frombuffer(obj.read(), np.uint8)
121 |         img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
122 |         if img is None:
123 |             raise ValueError("Failed to decode image")
124 |         return img
125 |     finally:
126 |         if not seekable:
127 |             obj.close()
128 | 
129 | 
130 | def load_image_from_base64(uri: str) -> np.ndarray:
131 |     """
132 |     Load image from base64 string.
133 |     Args:
134 |         uri: a base64 string.
135 |     Returns:
136 |         numpy array: the loaded image.
137 |     """
138 | 
139 |     encoded_data_parts = uri.split(",")
140 | 
141 |     if len(encoded_data_parts) < 2:
142 |         raise ValueError("format error in base64 encoded string")
143 | 
144 |     encoded_data = encoded_data_parts[1]
145 |     decoded_bytes = base64.b64decode(encoded_data)
146 | 
147 |     # similar to find functionality, we are just considering these extensions
148 |     # content type is safer option than file extension
149 |     with Image.open(io.BytesIO(decoded_bytes)) as img:
150 |         file_type = img.format.lower()
151 |         if file_type not in {"jpeg", "png"}:
152 |             raise ValueError(f"Input image can be jpg or png, but it is {file_type}")
153 | 
154 |     nparr = np.frombuffer(decoded_bytes, np.uint8)
155 |     img_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
156 |     # img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
157 |     return img_bgr
158 | 
159 | 
160 | 
161 | def load_image_from_web(url: str) -> np.ndarray:
162 |     """
163 |     Loading an image from web
164 |     Args:
165 |         url: link for the image
166 |     Returns:
167 |         img (np.ndarray): equivalent to pre-loaded image from opencv (BGR format)
168 |     """
169 |     response = requests.get(url, stream=True, timeout=60)
170 |     response.raise_for_status()
171 |     image_array = np.asarray(bytearray(response.raw.read()), dtype=np.uint8)
172 |     img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
173 |     return img
174 | 
175 | 
176 | def validate_images(imgs: Union[np.ndarray, List[np.ndarray]]) -> List[np.ndarray]:
177 |     """
178 |     Validates and standardizes image input for model processing.
179 | 
180 |     Args:
181 |         imgs (Union[np.ndarray, List[np.ndarray]]): 
182 |             A single image with shape (H, W, 3) or a list of such images as NumPy arrays.
183 | 
184 |     Returns:
185 |         List[np.ndarray]: A list of validated images, each with shape (H, W, 3).
186 |     """
187 |     if isinstance(imgs, np.ndarray):
188 |         imgs = [imgs]
189 |     elif not isinstance(imgs, list):
190 |         raise ValueError(f"Expected input to be a numpy array or list, but got {type(imgs)}")
191 | 
192 |     if not imgs:
193 |         raise ValueError("Empty image list provided for face processing!")
194 | 
195 |     for i, img in enumerate(imgs):
196 |         if not isinstance(img, np.ndarray):
197 |             raise ValueError(f"Image {i} is not a numpy array. Got {type(img)} instead.")
198 |         if img.ndim != 3 or img.shape[2] != 3:
199 |             raise ValueError(f"Image {i} must have shape (H, W, 3), got {img.shape}")
200 |         
201 |     return imgs
202 | 
203 | 
204 | def get_cropped_face(img: np.ndarray, bbox: List[int]) -> np.ndarray:
205 |     """
206 |     Crop a face region from the input image using the detected bounding box.
207 | 
208 |     Parameters:
209 |         img (np.ndarray): The input image in BGR or RGB format.
210 |         bbox (List[int]): Bounding box coordinates in [x1, y1, x2, y2] format.
211 | 
212 |     Returns:
213 |         np.ndarray: 
214 |             The cropped face image as a NumPy array. If the bounding box is 
215 |             partially out of bounds, it will be clipped to fit within the image dimensions.
216 |     """
217 |     h, w = img.shape[:2]
218 |     x1 = max(0, bbox[0])
219 |     y1 = max(0, bbox[1])
220 |     x2 = min(w, bbox[2])
221 |     y2 = min(h, bbox[3])
222 |     cropped_face = img[y1:y2, x1:x2] 
223 |     return cropped_face
224 | 
225 | 
226 | def image_resample(img, sz):
227 |     im_data = interpolate(img, size=sz, mode="area")
228 |     return im_data
229 | 


--------------------------------------------------------------------------------
/visionface/annotators/landmark.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from typing import List, Union, Tuple, Optional, Mapping
  4 | 
  5 | # VisionFace modules
  6 | from visionface.annotators.base import BaseLandmarkAnnotator
  7 | from visionface.models.LandmarkDetector import DetectedLandmark3D, DetectedLandmark2D
  8 | from visionface.annotators.utils import denormalize_landmark
  9 | from visionface.annotators.helper.landmark_connections import (
 10 |     FACEMESH_TESSELATION,
 11 |     FACEMESH_CONTOURS,
 12 |     FACEMESH_IRISES,
 13 |     DLIB_FACE_LANDMARK_CONNECTIONS
 14 |     
 15 | )
 16 | from visionface.annotators.helper.landmark_styles import (
 17 |     FaceMeshStyle,
 18 |     FaceMeshContoursStyle,
 19 |     FaceMeshIrisStyle
 20 | )
 21 | 
 22 | MEDIAPIPE_FACEMESH_CONNECTIONS = [
 23 |     FACEMESH_TESSELATION,
 24 |     FACEMESH_CONTOURS,
 25 |     FACEMESH_IRISES
 26 | ]
 27 | DLIB_LANDMARK_CONNECTIONS = [
 28 | 
 29 | ]
 30 | MEDIAPIPE_FACEMESH_STYLE = [
 31 |     FaceMeshStyle(),
 32 |     FaceMeshContoursStyle(),
 33 |     FaceMeshIrisStyle()
 34 | 
 35 | ]
 36 | 
 37 | class MediaPipeFaceMeshAnnotator(BaseLandmarkAnnotator):
 38 |     def __init__(
 39 |             self, 
 40 |             color: Tuple[int, int, int] = (255, 255, 255),
 41 |             thickness: int = 1,
 42 |             circle_radius: int = 2
 43 |     ):
 44 |         self.color = color
 45 |         self.thickness = thickness
 46 |         self.circle_radius = circle_radius
 47 |         
 48 |     def annotate(
 49 |             self, 
 50 |             img: np.ndarray,
 51 |             landmarks: List[DetectedLandmark3D], 
 52 |             connections: List[List[Tuple[int, int]]] = MEDIAPIPE_FACEMESH_CONNECTIONS,
 53 |             is_drawing_landmarks: bool = True
 54 |     ) -> np.ndarray:
 55 |         
 56 |         image_rows, image_cols, _ = img.shape
 57 |         idx_to_coordinates = {}
 58 | 
 59 |         for idx, lm in enumerate(landmarks):
 60 |             landmark_px = denormalize_landmark(
 61 |                 normalized_x=lm.x, 
 62 |                 normalized_y=lm.y,
 63 |                 image_width=image_cols,
 64 |                 image_height=image_rows
 65 |             )
 66 | 
 67 |             if landmark_px:
 68 |                 idx_to_coordinates[idx] = landmark_px
 69 |         
 70 |         if connections:
 71 |             num_landmarks = len(landmarks)
 72 |             for cidx, connection_list in enumerate(connections):
 73 |                 for connection in connection_list:
 74 |                     start_idx = connection[0]
 75 |                     end_idx = connection[1]
 76 |                     if not (0 <= start_idx < num_landmarks and 0 <= end_idx < num_landmarks):
 77 |                         raise ValueError(f'Landmark index is out of range. Invalid connection '
 78 |                                         f'from landmark #{start_idx} to landmark #{end_idx}.')
 79 |                     if start_idx in idx_to_coordinates and end_idx in idx_to_coordinates:
 80 |                         drawing_spec = MEDIAPIPE_FACEMESH_STYLE[cidx][connection] if isinstance(
 81 |                             MEDIAPIPE_FACEMESH_STYLE[cidx], Mapping) else MEDIAPIPE_FACEMESH_STYLE[cidx]
 82 |                         cv2.line(img, idx_to_coordinates[start_idx],
 83 |                                 idx_to_coordinates[end_idx], self.color,
 84 |                                 self.thickness)
 85 | 
 86 |         if is_drawing_landmarks:
 87 |             for idx, landmark_px in idx_to_coordinates.items():
 88 |                 circle_border_radius = max(self.circle_radius + 1, int(self.circle_radius * 1.2))
 89 |                 cv2.circle(img, landmark_px, circle_border_radius, self.color, self.thickness)
 90 |                 # Fill color into the circle
 91 |                 cv2.circle(img, landmark_px, self.circle_radius, self.color, self.thickness)
 92 | 
 93 |         return img
 94 | 
 95 | class FaceLandmarkAnnotator(BaseLandmarkAnnotator):
 96 |     """
 97 |     A facial landmark annotator that visualizes detected landmarks and their connections.
 98 |     
 99 |     Attributes:
100 |         line_color (Tuple[int, int, int]): BGR color values for connection lines. Default is (0, 255, 0) - green.
101 |         line_thickness (int): Thickness of connection lines in pixels. Default is 1.
102 |         circle_color (Tuple[int, int, int]): BGR color values for landmark points. Default is (255, 255, 255) - white.
103 |         circle_radius (int): Radius of landmark circles in pixels. Default is 2.
104 |     
105 |     Example:
106 |         >>> from VisionFace.models.landmark_detection.Dlib import DlibFaceLandmarkDetector
107 |         >>> from VisionFace.annotators.landmark import FaceLandmarkAnnotator
108 |         >>> from VisionFace.annotators.helper.landmark_connections import DLIB_FACE_LANDMARK_CONNECTIONS
109 |         >>> 
110 |         >>> detector = DlibFaceLandmarkDetector()
111 |         >>> annotator = FaceLandmarkAnnotator(
112 |         ...     line_color=(0, 255, 0),
113 |         ...     circle_color=(255, 0, 0),
114 |         ...     circle_radius=3
115 |         ... )
116 |         >>> 
117 |         >>> img = cv2.imread("face_image.jpg")
118 |         >>> landmarks = detector.detect_landmarks(img)
119 |         >>> annotated_img = annotator.annotate(
120 |         ...     img=img,
121 |         ...     landmarks=landmarks,
122 |         ...     connections=DLIB_FACE_LANDMARK_CONNECTIONS
123 |         ... )
124 |     """
125 |     
126 |     def __init__(
127 |             self, 
128 |             line_color: Tuple[int, int, int] = (0, 255, 0),
129 |             line_thickness: int = 1,
130 |             circle_color: Tuple[int, int, int] = (255, 255, 255),
131 |             circle_radius: int = 2
132 |     ):
133 |         """
134 |         Initialize the FaceLandmarkAnnotator with visualization parameters.
135 |         
136 |         Args:
137 |             line_color (Tuple[int, int, int], optional): BGR color tuple for connection lines. Defaults to (0, 255, 0) - green.
138 |             line_thickness (int, optional): Thickness of connection lines in pixels. Defaults to 1.
139 |             circle_color (Tuple[int, int, int], optional): BGR color for landmark circles. Defaults to (255, 255, 255) - white.
140 |             circle_radius (int, optional): Radius of landmark circles in pixels. Defaults to 2.
141 |         """
142 |         self.line_color = line_color
143 |         self.line_thickness = line_thickness
144 |         self.circle_color = circle_color
145 |         self.circle_radius = circle_radius
146 |     
147 |     def annotate(
148 |             self, 
149 |             img: np.ndarray,
150 |             landmarks: List[DetectedLandmark2D], 
151 |             connections: List[Tuple[int, int]] = "",
152 |             is_drawing_landmarks: bool = True
153 |     ) -> np.ndarray:
154 |         """
155 |         Annotate an image with facial landmarks and their connections.
156 |         
157 |         Args:
158 |             img (np.ndarray): Input image as a numpy array
159 |             landmarks (List[DetectedLandmark2D]): List of detected facial landmarks.
160 |                 Each landmark should have 'x' and 'y' attributes representing pixel coordinates.
161 |             connections (List[Tuple[int, int]], optional): landmark connections for drawing facial feature outlines.
162 |             is_drawing_landmarks (bool, optional): Whether to draw landmark annotations.
163 |                 If False, returns the original image unchanged. Defaults to True.
164 |         
165 |         Returns:
166 |             np.ndarray: The annotated image with landmarks and connections drawn.
167 |         
168 |         """
169 |         if connections and is_drawing_landmarks:
170 |             # Draw connection lines
171 |             for connection in connections:
172 |                 start_idx = connection[0]
173 |                 end_idx = connection[1]
174 |                 if start_idx < len(landmarks) and end_idx < len(landmarks):
175 |                     start_point = [landmarks[start_idx].x, landmarks[start_idx].y]
176 |                     end_point = [landmarks[end_idx].x, landmarks[end_idx].y]
177 |                     img = cv2.line(img, start_point, end_point, self.line_color, self.line_thickness)
178 |             
179 |             # Draw landmark points
180 |             for point in landmarks:
181 |                 landmark = [point.x, point.y]
182 |                 cv2.circle(img, landmark, self.circle_radius, self.circle_color, -1)
183 |         
184 |         return img
185 | 


--------------------------------------------------------------------------------
/visionface/annotators/detection.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | from typing import List, Union
  4 | 
  5 | # VisionFace modules 
  6 | from visionface.annotators.base import BaseAnnotator, ImageType, RawDetection
  7 | from visionface.models.Detector import Detector
  8 | from visionface.annotators.utils import (
  9 |     highlight_face, 
 10 |     convert_img_to_numpy,
 11 |     get_xyxy
 12 | )
 13 | 
 14 | class BoxAnnotator(BaseAnnotator):
 15 |     """
 16 |     A class for drawing complete rectangular bounding boxes on an image using provided detections.
 17 |     
 18 |     Parameters
 19 |     ----------
 20 |     color : tuple, optional
 21 |         The BGR color tuple for the bounding box lines, by default (245, 113, 47)
 22 |     thickness : int, optional
 23 |         The thickness of the bounding box lines in pixels, by default 4
 24 |     
 25 |     Attributes
 26 |     ----------
 27 |     color : tuple
 28 |         The BGR color tuple used for drawing bounding box lines
 29 |     thickness : int
 30 |         The thickness of the bounding box lines in pixels
 31 |     
 32 |     Examples
 33 |     --------
 34 |     >>> # Using Face Detector
 35 |     >>> from VisionFace import VisionFace, FaceAnnotators
 36 |     >>> detected_faces = VisionFace.detect_faces(img)
 37 |     >>> annotated_img = FaceAnnotators.box_annotator(img, detected_faces, highlight=True)
 38 | 
 39 |     >>> # Using raw detection lists
 40 |     >>> raw_detections = [[10, 20, 100, 200, 0.95, 'face'], [30, 40, 120, 220, 0.90, 'face']]
 41 |     >>> annotated_img = annotator.annotate(img, raw_detections, highlight=True)
 42 |     """
 43 | 
 44 |     def __init__(self, color: tuple = (245, 113, 47), thickness: int = 4):
 45 |         self.color = color
 46 |         self.thickness = thickness
 47 | 
 48 |     def annotate(
 49 |             self, 
 50 |             img: ImageType, 
 51 |             detections: Union[List[Detector], List[RawDetection]],
 52 |             highlight: bool = True,
 53 |             highlight_opacity: float = 0.2,
 54 |             highlight_color: tuple = (255, 255, 255),
 55 |     ) -> ImageType:
 56 |         """
 57 |         Annotate the image with complete rectangular bounding boxes for each detection.
 58 |         
 59 |         This method draws full rectangular bounding boxes around each detected region
 60 |         defined by the provided detections. Optionally, it can also highlight
 61 |         the detected regions with a semi-transparent overlay.
 62 |         
 63 |         Parameters
 64 |         ----------
 65 |         img : ImageType
 66 |             The input image to annotate (can be a file path string, numpy array, or PIL Image)
 67 |         detections : Union[List[Detector], List[RawDetection]]
 68 |             List of detections, where each detection can be either:
 69 |             - Detector object with an xyxy property returning (x1, y1, x2, y2)
 70 |             - RawDetection list in format [x1, y1, x2, y2, confidence, class_name]
 71 |         highlight : bool, optional
 72 |             Whether to highlight the detected regions, by default True
 73 |         highlight_opacity : float, optional
 74 |             Opacity of the highlight overlay (0.0 to 1.0), by default 0.2
 75 |         highlight_color : tuple, optional
 76 |             BGR color tuple for the highlight, by default (255, 255, 255)
 77 |             
 78 |         Returns
 79 |         -------
 80 |         ImageType
 81 |             The annotated image with rectangular bounding boxes and optional highlights
 82 |         """
 83 |         if img is None:
 84 |             return 
 85 |         # Convert image to numpy for processing
 86 |         img = convert_img_to_numpy(img)
 87 | 
 88 |         # Apply highlighting if enabled
 89 |         if highlight:
 90 |             img = highlight_face(
 91 |                 img,
 92 |                 detections,
 93 |                 highlight_opacity=highlight_opacity,
 94 |                 highlight_color=highlight_color
 95 |             )
 96 | 
 97 |         # Draw complete rectangular bounding boxes
 98 |         for detection in detections:
 99 |             x1, y1, x2, y2 = get_xyxy(detection)
100 |             cv2.rectangle(img, (x1, y1), (x2, y2), self.color, thickness=self.thickness)
101 |         
102 |         return img
103 |     
104 | class BoxCornerAnnotator(BaseAnnotator):
105 |     """
106 |     A class for drawing box corners on an image using provided detections.
107 |     
108 |     Parameters
109 |     ----------
110 |     color : tuple, optional
111 |         The BGR color tuple for the corner lines, by default (245, 113, 47)
112 |     thickness : int, optional
113 |         The thickness of the corner lines in pixels, by default 4
114 |     corner_length : int, optional
115 |         The length of each corner segment in pixels, by default 15
116 |     
117 |     Attributes
118 |     ----------
119 |     color : tuple
120 |         The BGR color tuple used for drawing corner lines
121 |     thickness : int
122 |         The thickness of the corner lines in pixels
123 |     corner_length : int
124 |         The length of each corner segment in pixels
125 |     
126 |     Examples
127 |     --------
128 |     >>> # Using Face Detector
129 |     >>> from VisionFace import VisionFace, FaceAnnotators
130 |     >>> detected_faces = VisionFace.detect_faces(img)
131 |     >>> annotated_img = FaceAnnotators.box_corner_annotator(img, detected_faces, highlight=True)
132 | 
133 |     >>> # Using raw detection lists
134 |     >>> raw_detections = [[10, 20, 100, 200, 0.95, 'face'], [30, 40, 120, 220, 0.90, 'face']]
135 |     >>> annotated_img = annotator.annotate(img, raw_detections, highlight=True)
136 |     """
137 | 
138 |     def __init__(self, color: tuple = (245, 113, 47), thickness: int = 4, corner_length: int = 15):
139 |         self.color = color
140 |         self.thickness = thickness
141 |         self.corner_length = corner_length
142 | 
143 |     def annotate(
144 |             self, 
145 |             img: ImageType, 
146 |             detections: Union[List[Detector], List[RawDetection]],
147 |             highlight: bool = True,
148 |             highlight_opacity: float = 0.2,
149 |             highlight_color: tuple = (255, 255, 255),
150 |     ) -> ImageType:
151 |         """
152 |         Annotate the image with corner boxes for each detection.
153 |         
154 |         This method draws L-shaped corners at each corner of the bounding boxes
155 |         defined by the provided detections. Optionally, it can also highlight
156 |         the detected regions with a semi-transparent overlay.
157 |         
158 |         Parameters
159 |         ----------
160 |         img : ImageType
161 |             The input image to annotate (can be a file path string, numpy array, or PIL Image)
162 |         detections : Union[List[Detector], List[RawDetection]]
163 |             List of detections, where each detection can be either:
164 |             - Detector object with an xyxy property returning (x1, y1, x2, y2)
165 |             - RawDetection list in format [x1, y1, x2, y2, confidence, class_name]
166 |         highlight : bool, optional
167 |             Whether to highlight the detected regions, by default True
168 |         highlight_opacity : float, optional
169 |             Opacity of the highlight overlay (0.0 to 1.0), by default 0.2
170 |         highlight_color : tuple, optional
171 |             BGR color tuple for the highlight, by default (255, 255, 255)
172 |             
173 |         Returns
174 |         -------
175 |         ImageType
176 |             The annotated image with box corners and optional highlights
177 |         """
178 |         # Convert image to numpy for processing
179 |         img = convert_img_to_numpy(img)
180 | 
181 |         # Apply highlighting if enabled
182 |         if highlight:
183 |             img = highlight_face(
184 |                 img,
185 |                 detections,
186 |                 highlight_opacity=highlight_opacity,
187 |                 highlight_color=highlight_color
188 |             )
189 | 
190 |         # Draw box corners
191 |         for detection in detections:
192 |             x1, y1, x2, y2 = get_xyxy(detection)
193 |             corners = [(x1, y1), (x2, y1), (x1, y2), (x2, y2)]
194 |             for x, y in corners:
195 |                 x_end = x + self.corner_length if x == x1 else x - self.corner_length
196 |                 cv2.line(img, (x, y), (x_end, y), self.color, thickness=self.thickness)
197 | 
198 |                 y_end = y + self.corner_length if y == y1 else y - self.corner_length
199 |                 cv2.line(img, (x, y), (x, y_end), self.color, thickness=self.thickness)
200 |         
201 |         return img


--------------------------------------------------------------------------------
/visionface/modules/recognition.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional, Union
  2 | 
  3 | import numpy as np
  4 | from torch import embedding
  5 | 
  6 | from visionface.commons.detection_utils import select_max_conf_faces
  7 | from visionface.models.Detector import DetectedFace
  8 | from visionface.modules.detection import FaceDetection
  9 | from visionface.modules.embedding import FaceEmbedder
 10 | from visionface.commons.image_utils import load_images, validate_images
 11 | 
 12 | from visionface.modules.modeling import build_model
 13 | 
 14 | 
 15 | class FaceRecognition:
 16 |     """    
 17 |     FaceRecognition pipeline for face detection, embedding, storage, and search.
 18 |     """
 19 |     def __init__(
 20 |         self, 
 21 |         detector_backbone: str = "yolo-small",
 22 |         embedding_backbone: str = "FaceNet-VGG",
 23 |         db_backend: str = "qdrant",
 24 |         db_config: Optional[Dict] = None
 25 |     ) -> None:
 26 |         """
 27 |         Initialize the face recognition system with specified components.
 28 | 
 29 |         Args:
 30 |             detector_backbone: 
 31 |                 Backbone name for the face detector (e.g., "yolo-small", "mtcnn").
 32 |             embedding_backbone: 
 33 |                 Backbone name for the face embedder (e.g., "FaceNet-VGG", "ArcFace").
 34 |             db_backend: 
 35 |                 Database backend name. Supported values include:
 36 |                 - 'qdrant'
 37 |                 - 'milvus'
 38 |                 - 'file'
 39 |             db_config: 
 40 |                 Optional dictionary for configuring the vector database connection.
 41 |                 This is primarily used when `db_backend='qdrant'`. Supported keys include:
 42 | 
 43 |                 - host (str): Hostname of the Qdrant server. Default is `"localhost"`.
 44 |                 - port (int): Port number of the Qdrant server. Default is `6333`.
 45 |                 - url (str, optional): Full URL (overrides host and port if provided).
 46 |                 - api_key (str, optional): API key for secure Qdrant access.
 47 |                 - https (bool): Whether to use HTTPS instead of HTTP. Default is `False`.
 48 |                 - timeout (float): Timeout duration in seconds for requests. Default is `5.0`.
 49 |         """
 50 |         self.face_detector = FaceDetection(detector_backbone=detector_backbone)
 51 |         self.face_embedder = FaceEmbedder(embedding_backbone=embedding_backbone)
 52 |         self.db = self._init_db_backend(db_backend, db_config or {})
 53 |         
 54 |     def _init_db_backend(self, db_backend: str, db_config: Dict) -> Any:
 55 |         """
 56 |         Initializes the vector database backend.
 57 | 
 58 |         Args:
 59 |             db_backend: The name of the backend (e.g., 'qdrant').
 60 |             db_config: Configuration parameters for the backend.
 61 | 
 62 |         Returns:
 63 |             A vector database client instance.
 64 |         """
 65 |         if db_backend == "qdrant":
 66 |             from visionface.db.qdrant_client import QdrantVectorDB
 67 |             return QdrantVectorDB(**db_config)
 68 |         elif db_backend == "milvus":
 69 |             pass
 70 |         elif db_backend == "file":
 71 |             pass
 72 |         else:
 73 |             raise ValueError(f"Unsupported DB backend: {db_backend}")
 74 |     
 75 |     def _compute_embeddings(
 76 |         self, 
 77 |         images: Union[str, np.ndarray, List[np.ndarray], List[str]], 
 78 |         normalize_embeddings: bool = True
 79 |     ) -> List[List[float]]:
 80 |         """
 81 |         Detects and embeds the most confident face in each image.
 82 | 
 83 |         Args:
 84 |             images: Image(s) as file path(s) or NumPy array(s).
 85 |             normalize: Whether to normalize the embedding vectors.
 86 | 
 87 |         Returns:
 88 |             List of face embedding vectors.
 89 |         """
 90 |         detections = self.face_detector.detect_faces(images, return_cropped_faces=True)
 91 |         top_faces = select_max_conf_faces(detections)
 92 |         embeddings = self.face_embedder.embed_faces(top_faces, normalize_embeddings=normalize_embeddings)
 93 |         return embeddings.to_list()
 94 |     
 95 |     def upsert_faces(
 96 |         self,
 97 |         images: Union[str, np.ndarray, List[np.ndarray], List[str]],  
 98 |         labels: Union[str, List[str]],
 99 |         collection_name: str,
100 |         batch_size: int = 10,
101 |         normalize_embeddings: bool = True
102 |     ) -> None:
103 |         """
104 |         Detect, embed, and store faces in a collection with automatic face selection and upserting.
105 | 
106 |         Parameters:
107 |         ----------
108 |             images (Union[str, np.ndarray, List[np.ndarray], List[str]]): 
109 |             Input image(s) containing faces to process and store. Can be:
110 |             - str: Path to a single image file
111 |             - np.ndarray: Single image as a numpy array (H, W, C format expected)
112 |             - List[np.ndarray]: Multiple images as numpy arrays
113 |             - List[str]: Multiple image file paths
114 |             
115 |         labels (Union[str, List[str]]): 
116 |             Label(s) to associate with the detected faces. 
117 |             
118 |         collection_name (str): 
119 |             Name of the face collection where embeddings will be stored. If the collection
120 |             doesn't exist, it will be created automatically.
121 |             
122 |         batch_size (int, optional): 
123 |             Number of images to process simultaneously in each batch. Larger batch sizes
124 |             can improve processing speed but require more memory. Defaults to 10.
125 |             
126 |         normalize_embeddings (bool, optional): 
127 |             Whether to L2-normalize the computed face embeddings before storage. Defaults to True.
128 | 
129 |         Returns:
130 |         ----------
131 |             None: This method doesn't return a value but modifies the collection state.
132 |         """
133 |         vector_size = self.face_embedder.vector_size
134 |         self.db.create_collection(collection_name, vector_size=vector_size)
135 | 
136 |         embeddings = self._compute_embeddings(images, normalize_embeddings)
137 |         payloads = [{"face_name": label} for label in labels]
138 | 
139 |         self.db.insert_embeddings(
140 |             collection_name=collection_name,
141 |             embeddings=embeddings,
142 |             payloads=payloads,
143 |             batch_size=batch_size
144 |         )
145 |         
146 |         
147 |     def search_faces(
148 |         self,
149 |         images: Union[str, np.ndarray, List[np.ndarray], List[str]], 
150 |         collection_name: str,
151 |         score_threshold: Optional[float] = None,
152 |         top_k: int = 5,
153 |         ) -> List[Dict]:
154 |         """
155 |         Search for similar faces in a specified collection using facial recognition embeddings.
156 |         
157 |         Parameters
158 |         ----------
159 |         images : Union[str, np.ndarray, List[str], List[np.ndarray]]
160 |                 A single image or a list of images. Each image can be either a file path (str)
161 |                 or an image array.
162 |         collection_name (str): 
163 |             Name of the face collection to search within. The collection must exist
164 |             and contain pre-indexed face embeddings.
165 | 
166 |         score_threshold (Optional[float], optional): 
167 |             Minimum similarity score threshold for returned matches. Only faces with
168 |             similarity scores above this threshold will be included in results.
169 |             If None, no filtering is applied. Range typically [0.0, 1.0] where
170 |             higher values indicate greater similarity. Defaults to None.
171 |         
172 |         top_k (int, optional): 
173 |             Maximum number of most similar faces to return per input image.
174 |             Results are ordered by similarity score in descending order.
175 |             Defaults to 5.
176 | 
177 |         Returns:
178 |         ----------
179 |             List[Dict]: 
180 |                 List of search results, one dictionary per input image. Each dictionary
181 |                 contains the top-k most similar faces found in the collection.
182 |         """
183 |         embeddings = self._compute_embeddings(images)
184 |         return self.db.search_embeddings(
185 |             collection_name=collection_name,
186 |             query_vectors=embeddings,
187 |             score_threshold=score_threshold,
188 |             top_k=top_k
189 |         )
190 | 
191 | 
192 | 
193 | 
194 | 


--------------------------------------------------------------------------------
/visionface/models/landmark_detection/utils.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | def medipipe_mesh_landmark_names():
  5 |     FACE_MESH_LANDMARK_NAMES = {
  6 |         # Face contour (jawline and outer face boundary)
  7 |         0: "face_contour_0", 1: "nose_tip", 2: "nose_bridge_2", 3: "nose_bridge_3", 4: "nose_bridge_4",
  8 |         5: "nose_bridge_5", 6: "nose_bridge_6", 7: "left_eye_inner_7", 8: "nose_bridge_8", 9: "forehead_center_9",
  9 |         10: "forehead_center_10", 11: "face_contour_11", 12: "face_contour_12", 13: "upper_lip_center_13", 
 10 |         14: "lower_lip_center_14", 15: "face_contour_15", 16: "face_contour_16", 17: "upper_lip_17",
 11 |         18: "chin_18", 19: "face_contour_19", 20: "face_contour_20",
 12 |         
 13 |         # Left eye region (from viewer's perspective)
 14 |         33: "left_eye_outer_33", 7: "left_eye_inner_7", 163: "left_eye_lower_163", 144: "left_eye_lower_144",
 15 |         145: "left_eye_lower_145", 153: "left_eye_lower_153", 154: "left_eye_lower_154", 155: "left_eye_lower_155",
 16 |         133: "left_eye_outer_133", 173: "left_eye_upper_173", 157: "left_eye_upper_157", 158: "left_eye_upper_158",
 17 |         159: "left_eye_upper_159", 160: "left_eye_upper_160", 161: "left_eye_upper_161", 246: "left_eye_lower_246",
 18 |         
 19 |         # Right eye region
 20 |         362: "right_eye_inner_362", 382: "right_eye_upper_382", 381: "right_eye_upper_381", 380: "right_eye_upper_380",
 21 |         374: "right_eye_upper_374", 373: "right_eye_upper_373", 390: "right_eye_upper_390", 249: "right_eye_outer_249",
 22 |         263: "right_eye_outer_263", 466: "right_eye_lower_466", 388: "right_eye_lower_388", 387: "right_eye_lower_387",
 23 |         386: "right_eye_lower_386", 385: "right_eye_lower_385", 384: "right_eye_lower_384", 398: "right_eye_upper_398",
 24 |         
 25 |         # Left eyebrow
 26 |         46: "left_eyebrow_inner_46", 53: "left_eyebrow_53", 52: "left_eyebrow_52", 51: "left_eyebrow_51",
 27 |         48: "left_eyebrow_48", 115: "left_eyebrow_115", 131: "left_eyebrow_outer_131", 134: "left_eyebrow_134",
 28 |         102: "left_eyebrow_102", 49: "left_eyebrow_49", 220: "left_eyebrow_220", 305: "left_eyebrow_305",
 29 |         
 30 |         # Right eyebrow
 31 |         276: "right_eyebrow_inner_276", 283: "right_eyebrow_283", 282: "right_eyebrow_282", 295: "right_eyebrow_295",
 32 |         285: "right_eyebrow_285", 336: "right_eyebrow_336", 296: "right_eyebrow_296", 334: "right_eyebrow_334",
 33 |         293: "right_eyebrow_293", 300: "right_eyebrow_300", 441: "right_eyebrow_outer_441",
 34 |         
 35 |         # Nose detailed points
 36 |         168: "nose_bridge_168", 195: "nostril_left_195", 197: "nostril_left_197", 196: "nostril_left_196",
 37 |         3: "nose_bridge_3", 51: "nose_left_51", 48: "nose_left_48", 115: "nose_left_115", 131: "nose_left_131",
 38 |         134: "nose_left_134", 102: "nose_left_102", 49: "nose_left_49", 220: "nose_left_220", 305: "nose_left_305",
 39 |         278: "nose_right_278", 279: "nose_right_279", 420: "nostril_right_420", 456: "nostril_right_456",
 40 |         248: "nose_right_248", 281: "nose_right_281", 275: "nose_right_275",
 41 |         
 42 |         # Lips outer boundary
 43 |         61: "mouth_left_corner_61", 84: "upper_lip_left_84", 17: "upper_lip_17", 314: "upper_lip_right_314",
 44 |         405: "mouth_right_corner_405", 320: "lower_lip_right_320", 307: "lower_lip_307", 375: "lower_lip_375",
 45 |         321: "lower_lip_321", 308: "lower_lip_308", 324: "lower_lip_324", 318: "lower_lip_318",
 46 |         
 47 |         # Lips inner boundary
 48 |         78: "inner_lip_upper_78", 95: "inner_lip_upper_95", 88: "inner_lip_upper_88", 178: "inner_lip_upper_178",
 49 |         87: "inner_lip_upper_87", 14: "inner_lip_lower_14", 317: "inner_lip_lower_317", 402: "inner_lip_lower_402",
 50 |         318: "inner_lip_lower_318", 324: "inner_lip_lower_324", 308: "inner_lip_lower_308", 415: "inner_lip_lower_415",
 51 |         
 52 |         # Additional mouth points
 53 |         291: "mouth_right_corner_291", 303: "mouth_upper_303", 267: "mouth_lower_267", 269: "mouth_lower_269",
 54 |         270: "mouth_lower_270", 267: "mouth_lower_267", 271: "mouth_lower_271", 272: "mouth_lower_272",
 55 |         
 56 |         # Chin and jaw
 57 |         175: "chin_left_175", 199: "chin_bottom_199", 175: "chin_right_175", 18: "chin_center_18",
 58 |         175: "jaw_left_175", 199: "jaw_bottom_199", 175: "jaw_right_175",
 59 |         
 60 |         # Cheek regions
 61 |         116: "left_cheek_116", 117: "left_cheek_117", 118: "left_cheek_118", 119: "left_cheek_119",
 62 |         120: "left_cheek_120", 121: "left_cheek_121", 126: "left_cheek_126", 142: "left_cheek_142",
 63 |         36: "left_cheek_36", 205: "left_cheek_205", 206: "left_cheek_206", 207: "left_cheek_207",
 64 |         213: "left_cheek_213", 192: "left_cheek_192", 147: "left_cheek_147",
 65 |         
 66 |         345: "right_cheek_345", 346: "right_cheek_346", 347: "right_cheek_347", 348: "right_cheek_348",
 67 |         349: "right_cheek_349", 350: "right_cheek_350", 451: "right_cheek_451", 452: "right_cheek_452",
 68 |         453: "right_cheek_453", 464: "right_cheek_464", 435: "right_cheek_435", 410: "right_cheek_410",
 69 |         454: "right_cheek_454",
 70 |         
 71 |         # Forehead points
 72 |         151: "forehead_151", 337: "forehead_337", 299: "forehead_299", 333: "forehead_333",
 73 |         298: "forehead_298", 301: "forehead_301", 284: "forehead_284", 251: "forehead_251",
 74 |         389: "forehead_389", 356: "forehead_356", 454: "forehead_454", 323: "forehead_323",
 75 |         361: "forehead_361", 340: "forehead_340",
 76 |         
 77 |         # Temple regions
 78 |         103: "left_temple_103", 67: "left_temple_67", 109: "left_temple_109", 338: "temple_338",
 79 |         332: "right_temple_332", 297: "right_temple_297",
 80 |     }
 81 | 
 82 |     # Fill remaining indices with generic names
 83 |     for i in range(478):
 84 |         if i not in FACE_MESH_LANDMARK_NAMES:
 85 |             # Determine general region based on index ranges
 86 |             if i < 17:
 87 |                 FACE_MESH_LANDMARK_NAMES[i] = f"face_contour_{i}"
 88 |             elif 17 <= i < 68:
 89 |                 FACE_MESH_LANDMARK_NAMES[i] = f"right_eyebrow_region_{i}"
 90 |             elif 68 <= i < 103:
 91 |                 FACE_MESH_LANDMARK_NAMES[i] = f"nose_bridge_region_{i}"
 92 |             elif 103 <= i < 134:
 93 |                 FACE_MESH_LANDMARK_NAMES[i] = f"right_eye_region_{i}"
 94 |             elif 134 <= i < 155:
 95 |                 FACE_MESH_LANDMARK_NAMES[i] = f"left_eye_region_{i}"
 96 |             elif 155 <= i < 180:
 97 |                 FACE_MESH_LANDMARK_NAMES[i] = f"left_eyebrow_region_{i}"
 98 |             elif 180 <= i < 200:
 99 |                 FACE_MESH_LANDMARK_NAMES[i] = f"nose_tip_region_{i}"
100 |             elif 200 <= i < 220:
101 |                 FACE_MESH_LANDMARK_NAMES[i] = f"nostril_region_{i}"
102 |             elif 220 <= i < 250:
103 |                 FACE_MESH_LANDMARK_NAMES[i] = f"cheek_region_{i}"
104 |             elif 250 <= i < 300:
105 |                 FACE_MESH_LANDMARK_NAMES[i] = f"mouth_region_{i}"
106 |             elif 300 <= i < 340:
107 |                 FACE_MESH_LANDMARK_NAMES[i] = f"chin_jaw_region_{i}"
108 |             elif 340 <= i < 400:
109 |                 FACE_MESH_LANDMARK_NAMES[i] = f"right_face_region_{i}"
110 |             else:
111 |                 FACE_MESH_LANDMARK_NAMES[i] = f"face_mesh_{i}"
112 | 
113 |     return FACE_MESH_LANDMARK_NAMES
114 | 
115 | 
116 | def dlib_landmarks_names():
117 |     DLIB_LANDMARK_NAMES = {
118 |         0: "jaw_0", 1: "jaw_1", 2: "jaw_2", 3: "jaw_3", 4: "jaw_4", 5: "jaw_5",
119 |         6: "jaw_6", 7: "jaw_7", 8: "jaw_8", 9: "jaw_9", 10: "jaw_10", 11: "jaw_11",
120 |         12: "jaw_12", 13: "jaw_13", 14: "jaw_14", 15: "jaw_15", 16: "jaw_16",
121 | 
122 |         17: "right_eyebrow_17", 18: "right_eyebrow_18", 19: "right_eyebrow_19",
123 |         20: "right_eyebrow_20", 21: "right_eyebrow_21",
124 | 
125 |         22: "left_eyebrow_22", 23: "left_eyebrow_23", 24: "left_eyebrow_24",
126 |         25: "left_eyebrow_25", 26: "left_eyebrow_26",
127 | 
128 |         27: "nose_27", 28: "nose_28", 29: "nose_29", 30: "nose_30",
129 |         31: "nose_31", 32: "nose_32", 33: "nose_33", 34: "nose_34", 35: "nose_35",
130 | 
131 |         36: "right_eye_36", 37: "right_eye_37", 38: "right_eye_38",
132 |         39: "right_eye_39", 40: "right_eye_40", 41: "right_eye_41",
133 | 
134 |         42: "left_eye_42", 43: "left_eye_43", 44: "left_eye_44",
135 |         45: "left_eye_45", 46: "left_eye_46", 47: "left_eye_47",
136 | 
137 |         48: "mouth_48", 49: "mouth_49", 50: "mouth_50", 51: "mouth_51",
138 |         52: "mouth_52", 53: "mouth_53", 54: "mouth_54", 55: "mouth_55",
139 |         56: "mouth_56", 57: "mouth_57", 58: "mouth_58", 59: "mouth_59",
140 |         60: "mouth_60", 61: "mouth_61", 62: "mouth_62", 63: "mouth_63",
141 |         64: "mouth_64", 65: "mouth_65", 66: "mouth_66", 67: "mouth_67"
142 |     }
143 | 
144 |     return DLIB_LANDMARK_NAMES


--------------------------------------------------------------------------------
/visionface/models/face_embedding/FaceNet.py:
--------------------------------------------------------------------------------
  1 | # This module is adapted from:
  2 | # https://github.com/timesler/facenet-pytorch
  3 | #
  4 | # Original Author: Tim Esler (timesler)
  5 | # License: MIT License
  6 | 
  7 | from typing import Optional
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.functional as F
 11 | from torchvision.transforms import functional as FT
 12 | 
 13 | # VisionFace modules
 14 | from visionface.models.FaceEmbedding import FaceEmbedder
 15 | from visionface.commons.download_files import download_model_weights
 16 | 
 17 | 
 18 | FACENET_VGG_WEIGHTS = "https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180402-114759-vggface2.pt"
 19 | FACENET_CASIA_WEIGHTS = "https://github.com/timesler/facenet-pytorch/releases/download/v2.2.9/20180408-102900-casia-webface.pt"
 20 | 
 21 | 
 22 | class FaceNetVGG(FaceEmbedder):
 23 |     """
 24 |     FaceNet-VGG model class
 25 |     """
 26 |     def __init__(self):
 27 |         super().__init__()
 28 |         self.model = InceptionResnetV1(pretrained="vggface2")
 29 |         self.model_name = "FaceNet-VGG"
 30 |         self.input_shape = (160, 160)
 31 |         self.output_shape = 512
 32 | 
 33 | class FaceNetCASIA(FaceEmbedder):
 34 |     """
 35 |     FaceNet-CASIA model class
 36 |     """
 37 |     def __init__(self):
 38 |         super().__init__()
 39 |         self.model = InceptionResnetV1(pretrained="casia-webface")
 40 |         self.model_name = "FaceNet-CASIA"
 41 |         self.input_shape = (160, 160)
 42 |         self.output_shape = 512
 43 | 
 44 | 
 45 | class InceptionResnetV1(nn.Module):
 46 |     """
 47 |     FaceNet(InceptionResnetV1) model class
 48 |     """
 49 |     def __init__(
 50 |     self,
 51 |     pretrained: Optional[str] = None,
 52 |     normalize_embeddings: bool = True,
 53 |     dropout_prob: float = 0.6,
 54 | ) -> None:
 55 |         """
 56 |         InceptionResnetV1 model for face embedding extraction, based on FaceNet. 
 57 |         Supports pretrained weights from VGGFace2 or CASIA-WebFace.
 58 | 
 59 |         Args:
 60 |             pretrained (str, optional): One of 'vggface2' or 'casia-webface'.
 61 |             normalize_embeddings (bool, optional): Whether to L2-normalize embeddings. Default is True.
 62 |             dropout_prob (float, optional): Dropout probability before the embedding layer. Default is 0.6.
 63 | 
 64 |         Raises:
 65 |             Exception: If `pretrained` is None or invalid.
 66 |         """
 67 |         super().__init__()
 68 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 69 |         self.normalize_embeddings = normalize_embeddings
 70 |         if pretrained == 'vggface2':
 71 |             tmp_classes = 8631
 72 |             weight_url = FACENET_VGG_WEIGHTS
 73 |         elif pretrained == 'casia-webface':
 74 |             tmp_classes = 10575
 75 |             weight_url = FACENET_CASIA_WEIGHTS
 76 |         elif pretrained is None:
 77 |             raise Exception('"pretrained" must be specified')
 78 |         # Define layers
 79 |         self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)
 80 |         self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1)
 81 |         self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1)
 82 |         self.maxpool_3a = nn.MaxPool2d(3, stride=2)
 83 |         self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1)
 84 |         self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1)
 85 |         self.conv2d_4b = BasicConv2d(192, 256, kernel_size=3, stride=2)
 86 |         self.repeat_1 = nn.Sequential(
 87 |             Block35(scale=0.17),
 88 |             Block35(scale=0.17),
 89 |             Block35(scale=0.17),
 90 |             Block35(scale=0.17),
 91 |             Block35(scale=0.17),
 92 |         )
 93 |         self.mixed_6a = Mixed_6a()
 94 |         self.repeat_2 = nn.Sequential(
 95 |             Block17(scale=0.10),
 96 |             Block17(scale=0.10),
 97 |             Block17(scale=0.10),
 98 |             Block17(scale=0.10),
 99 |             Block17(scale=0.10),
100 |             Block17(scale=0.10),
101 |             Block17(scale=0.10),
102 |             Block17(scale=0.10),
103 |             Block17(scale=0.10),
104 |             Block17(scale=0.10),
105 |         )
106 |         self.mixed_7a = Mixed_7a()
107 |         self.repeat_3 = nn.Sequential(
108 |             Block8(scale=0.20),
109 |             Block8(scale=0.20),
110 |             Block8(scale=0.20),
111 |             Block8(scale=0.20),
112 |             Block8(scale=0.20),
113 |         )
114 |         self.block8 = Block8(noReLU=True)
115 |         self.avgpool_1a = nn.AdaptiveAvgPool2d(1)
116 |         self.dropout = nn.Dropout(dropout_prob)
117 |         self.last_linear = nn.Linear(1792, 512, bias=False)
118 |         self.last_bn = nn.BatchNorm1d(512, eps=0.001, momentum=0.1, affine=True)
119 | 
120 |         if pretrained is not None:
121 |             self.logits = nn.Linear(512, tmp_classes)
122 |             model_path = download_model_weights(filename=f"facenet_{pretrained}.pt", 
123 |                                                 download_url=weight_url)
124 |             state_dict = torch.load(model_path, weights_only=False)
125 |             self.load_state_dict(state_dict)
126 | 
127 |         self.to(self.device)
128 |         self.eval()
129 | 
130 |     def forward(self, inputs: torch.Tensor , normalize_embeddings: bool = True) -> torch.Tensor:
131 |         """
132 |         Computes embeddings from input image tensors.
133 | 
134 |         Args:
135 |             inputs (torch.Tensor): A batch of images as a 4D tensor of shape (B, C, H, W)
136 |             normalize_embeddings (bool, optional): If True, L2-normalizes the output embeddings.
137 |                 Defaults to True.
138 | 
139 |         Returns:
140 |             torch.Tensor: A tensor of shape (B, D) containing the image embeddings,
141 |                 where D is the embedding dimension.
142 |         """
143 |         x = self.conv2d_1a(inputs)
144 |         x = self.conv2d_2a(x)
145 |         x = self.conv2d_2b(x)
146 |         x = self.maxpool_3a(x)
147 |         x = self.conv2d_3b(x)
148 |         x = self.conv2d_4a(x)
149 |         x = self.conv2d_4b(x)
150 |         x = self.repeat_1(x)
151 |         x = self.mixed_6a(x)
152 |         x = self.repeat_2(x)
153 |         x = self.mixed_7a(x)
154 |         x = self.repeat_3(x)
155 |         x = self.block8(x)
156 |         x = self.avgpool_1a(x)
157 |         x = self.dropout(x)
158 |         x = self.last_linear(x.view(x.shape[0], -1))
159 |         x = self.last_bn(x)
160 |         if normalize_embeddings:
161 |             # Normalize embeddings (L2)
162 |             x = F.normalize(x, p=2, dim=1)
163 |         return x
164 | 
165 | class BasicConv2d(nn.Module):
166 | 
167 |     def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):
168 |         super().__init__()
169 |         self.conv = nn.Conv2d(
170 |             in_planes, out_planes,
171 |             kernel_size=kernel_size, stride=stride,
172 |             padding=padding, bias=False
173 |         ) # verify bias false
174 |         self.bn = nn.BatchNorm2d(
175 |             out_planes,
176 |             eps=0.001, # value found in tensorflow
177 |             momentum=0.1, # default pytorch value
178 |             affine=True
179 |         )
180 |         self.relu = nn.ReLU(inplace=False)
181 | 
182 |     def forward(self, x):
183 |         x = self.conv(x)
184 |         x = self.bn(x)
185 |         x = self.relu(x)
186 |         return x
187 | 
188 | 
189 | class Block35(nn.Module):
190 | 
191 |     def __init__(self, scale=1.0):
192 |         super().__init__()
193 | 
194 |         self.scale = scale
195 | 
196 |         self.branch0 = BasicConv2d(256, 32, kernel_size=1, stride=1)
197 | 
198 |         self.branch1 = nn.Sequential(
199 |             BasicConv2d(256, 32, kernel_size=1, stride=1),
200 |             BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
201 |         )
202 | 
203 |         self.branch2 = nn.Sequential(
204 |             BasicConv2d(256, 32, kernel_size=1, stride=1),
205 |             BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1),
206 |             BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)
207 |         )
208 | 
209 |         self.conv2d = nn.Conv2d(96, 256, kernel_size=1, stride=1)
210 |         self.relu = nn.ReLU(inplace=False)
211 | 
212 |     def forward(self, x):
213 |         x0 = self.branch0(x)
214 |         x1 = self.branch1(x)
215 |         x2 = self.branch2(x)
216 |         out = torch.cat((x0, x1, x2), 1)
217 |         out = self.conv2d(out)
218 |         out = out * self.scale + x
219 |         out = self.relu(out)
220 |         return out
221 | 
222 | 
223 | class Block17(nn.Module):
224 | 
225 |     def __init__(self, scale=1.0):
226 |         super().__init__()
227 | 
228 |         self.scale = scale
229 | 
230 |         self.branch0 = BasicConv2d(896, 128, kernel_size=1, stride=1)
231 | 
232 |         self.branch1 = nn.Sequential(
233 |             BasicConv2d(896, 128, kernel_size=1, stride=1),
234 |             BasicConv2d(128, 128, kernel_size=(1,7), stride=1, padding=(0,3)),
235 |             BasicConv2d(128, 128, kernel_size=(7,1), stride=1, padding=(3,0))
236 |         )
237 | 
238 |         self.conv2d = nn.Conv2d(256, 896, kernel_size=1, stride=1)
239 |         self.relu = nn.ReLU(inplace=False)
240 | 
241 |     def forward(self, x):
242 |         x0 = self.branch0(x)
243 |         x1 = self.branch1(x)
244 |         out = torch.cat((x0, x1), 1)
245 |         out = self.conv2d(out)
246 |         out = out * self.scale + x
247 |         out = self.relu(out)
248 |         return out
249 | 
250 | 
251 | class Block8(nn.Module):
252 | 
253 |     def __init__(self, scale=1.0, noReLU=False):
254 |         super().__init__()
255 | 
256 |         self.scale = scale
257 |         self.noReLU = noReLU
258 | 
259 |         self.branch0 = BasicConv2d(1792, 192, kernel_size=1, stride=1)
260 | 
261 |         self.branch1 = nn.Sequential(
262 |             BasicConv2d(1792, 192, kernel_size=1, stride=1),
263 |             BasicConv2d(192, 192, kernel_size=(1,3), stride=1, padding=(0,1)),
264 |             BasicConv2d(192, 192, kernel_size=(3,1), stride=1, padding=(1,0))
265 |         )
266 | 
267 |         self.conv2d = nn.Conv2d(384, 1792, kernel_size=1, stride=1)
268 |         if not self.noReLU:
269 |             self.relu = nn.ReLU(inplace=False)
270 | 
271 |     def forward(self, x):
272 |         x0 = self.branch0(x)
273 |         x1 = self.branch1(x)
274 |         out = torch.cat((x0, x1), 1)
275 |         out = self.conv2d(out)
276 |         out = out * self.scale + x
277 |         if not self.noReLU:
278 |             out = self.relu(out)
279 |         return out
280 | 
281 | 
282 | class Mixed_6a(nn.Module):
283 | 
284 |     def __init__(self):
285 |         super().__init__()
286 | 
287 |         self.branch0 = BasicConv2d(256, 384, kernel_size=3, stride=2)
288 | 
289 |         self.branch1 = nn.Sequential(
290 |             BasicConv2d(256, 192, kernel_size=1, stride=1),
291 |             BasicConv2d(192, 192, kernel_size=3, stride=1, padding=1),
292 |             BasicConv2d(192, 256, kernel_size=3, stride=2)
293 |         )
294 | 
295 |         self.branch2 = nn.MaxPool2d(3, stride=2)
296 | 
297 |     def forward(self, x):
298 |         x0 = self.branch0(x)
299 |         x1 = self.branch1(x)
300 |         x2 = self.branch2(x)
301 |         out = torch.cat((x0, x1, x2), 1)
302 |         return out
303 | 
304 | 
305 | class Mixed_7a(nn.Module):
306 | 
307 |     def __init__(self):
308 |         super().__init__()
309 | 
310 |         self.branch0 = nn.Sequential(
311 |             BasicConv2d(896, 256, kernel_size=1, stride=1),
312 |             BasicConv2d(256, 384, kernel_size=3, stride=2)
313 |         )
314 | 
315 |         self.branch1 = nn.Sequential(
316 |             BasicConv2d(896, 256, kernel_size=1, stride=1),
317 |             BasicConv2d(256, 256, kernel_size=3, stride=2)
318 |         )
319 | 
320 |         self.branch2 = nn.Sequential(
321 |             BasicConv2d(896, 256, kernel_size=1, stride=1),
322 |             BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),
323 |             BasicConv2d(256, 256, kernel_size=3, stride=2)
324 |         )
325 | 
326 |         self.branch3 = nn.MaxPool2d(3, stride=2)
327 | 
328 |     def forward(self, x):
329 |         x0 = self.branch0(x)
330 |         x1 = self.branch1(x)
331 |         x2 = self.branch2(x)
332 |         x3 = self.branch3(x)
333 |         out = torch.cat((x0, x1, x2, x3), 1)
334 |         return out
335 | 
336 | 
337 | 
338 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # VisionFace
  2 | 
  3 | <div align="center">
  4 | 
  5 |   ![VisionFace](https://github.com/user-attachments/assets/52ac9123-304c-4098-a1e5-f413d03bfec9)
  6 | 
  7 | [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
  8 | [![PyPI version](https://badge.fury.io/py/visionface.svg)](https://badge.fury.io/py/visionface)
  9 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 10 | 
 11 | 
 12 | **Modern face detection, recognition & analysis in 3 lines of code**
 13 | 
 14 | VisionFace is a state-of-the-art, open-source framework for comprehensive face analysis, built with PyTorch. It provides a unified interface for face detection, recognition, landmark detection, and visualization with support for multiple cutting-edge models.
 15 | 
 16 | [Quick Start](#-quick-start) • [Examples](#-examples) • [Models](#-models) • [API Docs](https://visionface.readthedocs.io)
 17 | 
 18 | </div>
 19 | 
 20 | ## ✨ What VisionFace Does
 21 | 
 22 | <div align="center">
 23 | <table>
 24 |   <tr>
 25 |     <td>
 26 |       <a href="#face-detection">
 27 |         <figure>
 28 |           <img src="banners/face_detection.jpg" alt="Face Detection" width="500"/>
 29 |           <figcaption style="text-align: center;">Face Detection</figcaption>
 30 |         </figure>
 31 |       </a>
 32 |     </td>
 33 |     <td>
 34 |       <a href="#face-recognition">
 35 |         <figure>
 36 |           <img src="banners/face_recognition.jpg" alt="Face Recognition" width="500"/>
 37 |           <figcaption style="text-align: center;">Face Recognition</figcaption>
 38 |         </figure>
 39 |       </a>
 40 |     </td>
 41 |     <td>
 42 |       <a href="#face-landmarks">
 43 |         <figure>
 44 |           <img src="banners/face_landmarks.jpg" alt="Face Landmarks" width="500"/>
 45 |           <figcaption style="text-align: center;">Face Landmarks</figcaption>
 46 |         </figure>
 47 |       </a>
 48 |     </td>
 49 |   </tr>
 50 |   <tr>
 51 |     <td>
 52 |       <a href="#-examples">
 53 |         <figure>
 54 |           <img src="banners/face_analysis.jpg" alt="Face Analysis" width="500"/>
 55 |           <figcaption style="text-align: center;">Face Analysis</figcaption>
 56 |         </figure>
 57 |       </a>
 58 |     </td>
 59 |     <td>
 60 |       <a href="#face-embeddings-1-minute">
 61 |         <figure>
 62 |           <img src="banners/face_verification.jpg" alt="Face Verification" width="500"/>
 63 |           <figcaption style="text-align: center;">Face Verification</figcaption>
 64 |         </figure>
 65 |       </a>
 66 |     </td>
 67 |     <td>
 68 |       <a href="#-examples">
 69 |         <figure>
 70 |           <img src="banners/face_visualization.jpg" alt="Face Visualization" width="500"/>
 71 |           <figcaption style="text-align: center;">Face Visualization</figcaption>
 72 |         </figure>
 73 |       </a>
 74 |     </td>
 75 |   </tr>
 76 | </table>
 77 | </div>
 78 | 
 79 | 
 80 | - **Detect faces** in images with 12+ models (YOLO, MediaPipe, MTCNN...)
 81 | - **Recognize faces** with vector search and embedding models
 82 | - **Extract landmarks** (68-point, 468-point face mesh)
 83 | - **Batch process** thousands of images efficiently
 84 | - **Production-ready** with Docker support and REST API
 85 | 
 86 | ## 🚀 Quick Start
 87 | 
 88 | ```bash
 89 | pip install visionface
 90 | ```
 91 | 
 92 | ### Face Detection 
 93 | The `Face Detection` module is your gateway to identifying faces in any image. Built for both beginners and experts, it provides a unified interface to 12+ cutting-edge detection models.
 94 | 
 95 | ✨ **Key Features:**
 96 |    * **Multiple Input Sources**: Image Files, URLs, PIL images, NumPy arrays
 97 |    * **Flexible Processing**: Single image or batch processing thousands of images efficiently
 98 |    * **12+ State-of-the-Art Models**: From ultra-fast mobile models to high-precision detectors
 99 |    * **One-Line Detection**: Get results with just ```detector.detect_faces(image)```
100 |    * **Rich Outputs**: Bounding boxes, confidence scores, cropped faces ready to use
101 | 
102 | ![face_detection_2](https://github.com/user-attachments/assets/6cb7e953-3448-486e-b6b4-32c654da1fce)
103 | 
104 | 📝 **Quick Example:**
105 | 
106 | ```python
107 | import cv2
108 | from visionface import FaceDetection, FaceAnnotators
109 | 
110 | # 1. Initialize detector
111 | detector = FaceDetection(detector_backbone="yolo-small")
112 | 
113 | # 2. Detect faces
114 | image = cv2.imread("your_image.jpg")
115 | faces = detector.detect_faces(image)
116 | 
117 | # 3. Visualize results
118 | result = FaceAnnotators.box_annotator(image, faces[0])
119 | cv2.imwrite("detected.jpg", result)
120 | ```
121 | 
122 | ### Face Recognition
123 | The `Face Recognition` module identifies individuals by generating embeddings and comparing them in a vector database. The process includes three stages: detecting faces, creating embeddings with the chosen model, and searching the database to find the closest matches.
124 | 
125 | ✨ **Key Features**:
126 | 
127 | * **Multi-model support**: Choose from high-accuracy embedding backbones such as FaceNet-VGG, FaceNet-CASIA, and Dlib.
128 | * **Vector DB Integration**: Store and query embeddings using Qdrant, Milvus, or local file-based storage.
129 | * **Scalable Search**: Efficiently match thousands or millions of faces with fast search.
130 | * **Flexible Enrollment**: Add faces one-by-one or in batches with associated labels.
131 | * **Threshold & Ranking**: Control similarity thresholds and retrieve top-k matches for robust recognition results.
132 | 
133 | ![face)recognition](https://github.com/user-attachments/assets/55f83bc1-93ec-479d-a86b-820c7cef0605)
134 | 
135 | ```python
136 | from visionface import FaceRecognition
137 | 
138 | # 1. Setup recognition system
139 | fr = FaceRecognition(detector_backbone="yolo-small", 
140 |                      embedding_backbone="FaceNet-VGG",
141 |                      db_backend="qdrant")
142 | 
143 | # 2. Add known faces
144 | fr.upsert_faces(
145 |     images=["john.jpg", "jane.jpg", "bob.jpg"],
146 |     labels=["John", "Jane", "Bob"],
147 |     collection_name="employees"
148 | )
149 | 
150 | # 3. Search for matches
151 | matches = fr.search_faces("query_face_image.jpg", 
152 |                          collection_name="employees",
153 |                          score_threshold=0.7,
154 |                          top_k=3)
155 | 
156 | for match in matches:
157 |     print(f"Found: {match['face_name']} (confidence: {match['score']:.2f})")
158 | ```
159 | 
160 | ### Face Embeddings 
161 | The `Face Embeddings` module transforms each detected face into a high-dimensional numeric vector (embedding) that captures its unique features.
162 | These embeddings can be used for:
163 | 
164 | * **Face verification**: Check if two faces belong to the same perso
165 | * **Recognition**: Match against a database of known faces
166 | * **Clustering**: Group similar faces automatically
167 | * **Advanced analytics**: 
168 | 
169 | **✨ Supported Embedding Models:**
170 | `FaceNet-VGG`, `FaceNet-CASIA`, `Dlib`
171 | 
172 | 📝 **Quick Example:**
173 | 
174 | ```python
175 | from visionface import FaceEmbedder
176 | 
177 | # 1. Initialize embedder
178 | embedder = FaceEmbedder(embedding_backbone="FaceNet-VGG")
179 | 
180 | # 2. Generate embeddings for face images
181 | embeddings = embedder.embed_faces(
182 |     face_imgs=["face1.jpg", "face2.jpg"],
183 |     normalize_embeddings=True  # L2 normalization
184 | )
185 | 
186 | # 3. Use embeddings
187 | for i, embedding in enumerate(embeddings):
188 |     print(f"Face {i+1} embedding shape: {embedding.shape}")  # (512,)
189 |     # Use for: face verification, clustering, custom databases
190 | ```
191 | 
192 | ### Face Landmarks
193 | The `Landmarks` module identifies key facial features with pixel-perfect accuracy. From eye positions to lip contours, get detailed facial geometry for advanced applications.
194 | 
195 | ✨ **Key Features:**
196 | 
197 | * **Multiple Input Sources**: Image Files, URLs, PIL images, NumPy arrays
198 | * **Flexible Processing**: Single image or batch processing thousands of images efficiently
199 | * **2D & 3D Support**: Standard 2D points or full 3D face mesh
200 | * **Rich Annotations**: Built-in visualization with customizable styling
201 | * **Multiple Backends**: MediaPipe (468 points) or Dlib (68 points)
202 | 
203 | ![face_landmarks](https://github.com/user-attachments/assets/9b8264d1-2ea7-442c-ab08-7d11d35f1824)
204 | 
205 | 📝 **Quick Example:**
206 | 
207 | ```python
208 | from visionface import LandmarkDetection
209 | from visionface.annotators.landmark import MediaPipeFaceMeshAnnotator
210 | 
211 | landmark_detector = LandmarkDetection(detector_backbone="mediapipe")
212 | image = cv2.imread("your_image.jpg")
213 | 
214 | # Get 468 facial landmarks
215 | landmarks = landmark_detector.detect_3d_landmarks(image)
216 | 
217 | # Visualize with connections
218 | vizualizer = MediaPipeFaceMeshAnnotator(thickness=2, circle_radius=3)
219 | result = vizualizer.annotate(
220 |     image, landmarks[0], connections=True
221 | )
222 | cv2.imwrite("detected_landmarks.jpg", result)
223 | ```
224 | 
225 | ## 💡 Examples
226 | 
227 | <details>
228 | <summary><b>🎯 Real-time Face Detection</b></summary>
229 | 
230 | ```python
231 | import cv2
232 | from visionface import FaceDetection, FaceAnnotators
233 | 
234 | detector = FaceDetection(detector_backbone="yolo-nano")  # Fastest model
235 | cap = cv2.VideoCapture(0)
236 | 
237 | while True:
238 |     ret, frame = cap.read()
239 |     faces = detector.detect_faces(frame)
240 |     annotated = FaceAnnotators.box_annotator(frame, faces)
241 |     
242 |     cv2.imshow('Face Detection', annotated)
243 |     if cv2.waitKey(1) & 0xFF == ord('q'):
244 |         break
245 | 
246 | cap.release()
247 | cv2.destroyAllWindows()
248 | ```
249 | </details>
250 | 
251 | <details>
252 | <summary><b>📊 Batch Processing</b></summary>
253 | 
254 | ```python
255 | from visionface import FaceDetection
256 | import glob
257 | 
258 | detector = FaceDetection(detector_backbone="yolo-medium")
259 | 
260 | # Process entire folder
261 | image_paths = glob.glob("photos/*.jpg")
262 | images = [cv2.imread(path) for path in image_paths]
263 | 
264 | # Detect all faces at once
265 | all_detections = detector.detect_faces(images)
266 | 
267 | # Save cropped faces
268 | for i, detections in enumerate(all_detections):
269 |     for j, face in enumerate(detections):
270 |         if face.cropped_face is not None:
271 |             cv2.imwrite(f"faces/image_{i}_face_{j}.jpg", face.cropped_face)
272 | ```
273 | </details>
274 | 
275 | <details>
276 | <summary><b>🏢 Employee Recognition System</b></summary>
277 | 
278 | ```python
279 | from visionface import FaceRecognition
280 | import os
281 | 
282 | # Initialize system
283 | fr = FaceRecognition(db_backend="qdrant")
284 | 
285 | # Auto-enroll from employee photos folder
286 | def enroll_employees(folder_path):
287 |     for filename in os.listdir(folder_path):
288 |         if filename.endswith(('.jpg', '.png')):
289 |             name = filename.split('.')[0]  # Use filename as name
290 |             image_path = os.path.join(folder_path, filename)
291 |             
292 |             fr.upsert_faces(
293 |                 images=[image_path],
294 |                 labels=[name],
295 |                 collection_name="company_employees"
296 |             )
297 |             print(f"Enrolled: {name}")
298 | 
299 | # Enroll all employees
300 | enroll_employees("employee_photos/")
301 | 
302 | # Check security camera feed
303 | def identify_person(camera_image):
304 |     results = fr.search_faces(
305 |         camera_image,
306 |         collection_name="company_employees",
307 |         score_threshold=0.8,
308 |         top_k=1
309 |     )
310 |     
311 |     if results[0]:  # If match found
312 |         return results[0][0]['face_name']
313 |     return "Unknown person"
314 | ```
315 | </details>
316 | 
317 | ## 🎯 Models
318 | 
319 | **Choose the right model for your use case:**
320 | 
321 | | Use Case | Speed | Accuracy | Recommended Model |
322 | |----------|-------|----------|------------------|
323 | | 🚀 **Real-time apps** | ⚡⚡⚡ | ⭐⭐ | `yolo-nano`, `mediapipe` |
324 | | 🎯 **General purpose** | ⚡⚡ | ⭐⭐⭐ | `yolo-small` (default) |
325 | | 🔍 **High accuracy** | ⚡ | ⭐⭐⭐⭐ | `yolo-large`, `mtcnn` |
326 | | 📱 **Mobile/Edge** | ⚡⚡⚡ | ⭐⭐ | `mediapipe`, `yolo-nano` |
327 | | 🎭 **Landmarks needed** | ⚡⚡ | ⭐⭐⭐ | `mediapipe`, `dlib` |
328 | 
329 | <details>
330 | <summary><b>📋 Complete Model List</b></summary>
331 | 
332 | **Detection Models:**
333 | - `yolo-nano`, `yolo-small`, `yolo-medium`, `yolo-large`
334 | - `yoloe-small`, `yoloe-medium`, `yoloe-large` (prompt-based)  
335 | - `yolow-small`, `yolow-medium`, `yolow-large`, `yolow-xlarge` (open-vocabulary)
336 | - `mediapipe`, `mtcnn`, `opencv`
337 | 
338 | **Embedding Models:**
339 | - `FaceNet-VGG` (512D) - Balanced accuracy/speed
340 | - `FaceNet-CASIA` (512D) - High precision
341 | - `Dlib` (128D) - Lightweight
342 | 
343 | **Landmark Models:**
344 | - `mediapipe` - 468 points + 3D mesh
345 | - `dlib` - 68 points, robust
346 | </details>
347 | 
348 | 
349 | ## 📚 Documentation
350 | 
351 | - 📖 [Full Documentation](https://visionface.readthedocs.io)
352 | - 🎓 [Tutorials & Guides](https://visionface.readthedocs.io/tutorials)
353 | - 🔌 [REST API Reference](https://visionface.readthedocs.io/api)
354 | - 💡 [Use Case Examples](https://github.com/username/visionface/tree/main/examples)
355 | 
356 | ## 🤝 Contributing
357 | We welcome contributions! See our [Contributing Guide](CONTRIBUTING.md).
358 | 
359 | <a href="https://github.com/miladfa7/visionface/graphs/contributors">
360 |   <img src="https://contrib.rocks/image?repo=miladfa7/visionface" />
361 | </a>
362 | <br>
363 | 
364 | 
365 | **Quick ways to help:**
366 | - ⭐ Star the repo
367 | - 🐛 Report bugs
368 | - 💡 Request features  
369 | - 📝 Improve docs
370 | - 🔧 Submit PRs
371 | 
372 | ## 📄 License
373 | 
374 | MIT License - see [LICENSE](LICENSE) file.
375 | 
376 | ## 🙏 Citation
377 | 
378 | ```bibtex
379 | @software{VisionFace2025,
380 |   title = {VisionFace: Modern Face Detection & Recognition Framework},
381 |   author = {VisionFace Team},
382 |   year = {2025},
383 |   url = {https://github.com/miladfa7/visionface}
384 | }
385 | ```
386 | 
387 | ---
388 | 
389 | <div align="center">
390 | 
391 | **[⬆ Back to Top](#visionface)** • **Made with ❤️ by the VisionFace team**
392 | 
393 | </div>
394 | 


--------------------------------------------------------------------------------
/visionface/models/face_detection/MTCNN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import numpy as np
  4 | import logging
  5 | from typing import List, Any, Union
  6 | from enum import Enum
  7 | from torchvision.ops.boxes import batched_nms
  8 | 
  9 | # VisionFace modules
 10 | from visionface.models.Detector import Detector, DetectedFace
 11 | from visionface.commons.download_files import download_model_weights
 12 | from visionface.commons.image_utils import image_resample, get_cropped_face
 13 | from visionface.commons.utils import batched_nms_numpy
 14 | from visionface.commons.detection_utils import (
 15 |     convert_to_square_bbox,
 16 |     box_padding,
 17 |     apply_bbox_regression
 18 | )
 19 | 
 20 | logging.basicConfig(level=logging.INFO)
 21 | 
 22 | 
 23 | class MTCCNModel(Enum):
 24 |     ONET = 0
 25 |     PNET = 1
 26 |     RNET = 2
 27 | 
 28 | WEIGHT_NAMES = [
 29 |     "mtccn-onet-face.pt",
 30 |     "mtccn-pnet-face.pt",
 31 |     "mtccn-rnet-face.pt",
 32 | ]
 33 | 
 34 | WEIGHT_URLS = [
 35 |     "https://raw.githubusercontent.com/timesler/facenet-pytorch/master/data/onet.pt",
 36 |     "https://raw.githubusercontent.com/timesler/facenet-pytorch/master/data/pnet.pt",
 37 |     "https://raw.githubusercontent.com/timesler/facenet-pytorch/master/data/rnet.pt",
 38 | ]
 39 | 
 40 | 
 41 | class MTCNNDetector(Detector):
 42 |     """MTCNN face detection module.
 43 | 
 44 |         This class loads pretrained P-, R-, and O-nets and returns bounding boxes for detected faces
 45 |         
 46 |         Keyword Arguments:
 47 |             min_face_size {int} -- Minimum face size to search for. (default: {20})
 48 |             thresholds {list} -- MTCNN face detection thresholds (default: {[0.6, 0.7, 0.7]})
 49 |             factor {float} -- Factor used to create a scaling pyramid of face sizes. (default: {0.709})
 50 |             post_process {bool} -- Whether or not to post process images tensors before returning.
 51 |                 (default: {True})
 52 |             select_largest {bool} -- If True, if multiple faces are detected, the largest is returned.
 53 |                 If False, the face with the highest detection probability is returned.
 54 |                 (default: {True})
 55 |             selection_method {string} -- Which heuristic to use for selection. Default None. If
 56 |                 specified, will override select_largest:
 57 |                         "probability": highest probability selected
 58 |                         "largest": largest box selected
 59 |                         "largest_over_threshold": largest box over a certain probability selected
 60 |                         "center_weighted_size": box size minus weighted squared offset from image center
 61 |                     (default: {None})
 62 |             keep_all {bool} -- If True, all detected faces are returned, in the order dictated by the
 63 |                 select_largest parameter. (default: {False})
 64 |             device {torch.device} -- The device on which to run neural net passes. (default: {None})
 65 |     """
 66 | 
 67 |     def __init__(
 68 |         self, 
 69 |         min_face_size=20,
 70 |         thresholds=[0.6, 0.7, 0.7], 
 71 |         factor=0.709, 
 72 |         post_process=True,
 73 |         select_largest=True, 
 74 |         selection_method=None, 
 75 |         keep_all=True, 
 76 |         device=None
 77 |     ):
 78 |         super().__init__()
 79 |         
 80 |         # MTCNN specific parameters
 81 |         self.device = device if device else torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 82 |         self.min_face_size = min_face_size
 83 |         self.thresholds = thresholds
 84 |         self.factor = factor
 85 |         self.post_process = post_process
 86 |         self.select_largest = select_largest
 87 |         self.keep_all = keep_all
 88 |         self.selection_method = selection_method
 89 |         
 90 |         # Initialize model components
 91 |         self.pnet = None
 92 |         self.rnet = None  
 93 |         self.onet = None
 94 |         
 95 |         # Build the models
 96 |         self.model = self.build_model()
 97 |         
 98 |         if not self.selection_method:
 99 |             self.selection_method = 'largest' if self.select_largest else 'probability'
100 | 
101 |     def build_model(self) -> Any:
102 |         """
103 |         Build and return the MTCNN face detection model.
104 |         This method loads the P-Net, R-Net, and O-Net components.
105 |         
106 |         Returns:
107 |             dict: Dictionary containing the mtcnn network components
108 |         """
109 |         self.pnet = PNet()
110 |         self.rnet = RNet()
111 |         self.onet = ONet()
112 |         
113 |         self.pnet.to(self.device)
114 |         self.rnet.to(self.device)
115 |         self.onet.to(self.device)
116 |         
117 |         return {
118 |             'pnet': self.pnet,
119 |             'rnet': self.rnet,
120 |             'onet': self.onet
121 |         }
122 | 
123 |     def detect_faces(
124 |         self, 
125 |         imgs: List[np.ndarray], 
126 |         return_cropped_faces: bool = True
127 |     ) -> List[List[DetectedFace]]:
128 |         """
129 |         Detect faces in one or more input images using the MTCNN model.
130 | 
131 |         Parameters:
132 |             imgs (List[np.ndarray]): 
133 |                 A single image or a list of images in BGR format.
134 |         
135 |         Args:
136 |             imgs (Union[np.ndarray, List[np.ndarray]]):
137 |                 - A single image as a NumPy array with shape (H, W, 3), or
138 |                 - A list of such images.
139 |             return_cropped_faces : bool, optional
140 |                 Whether to include cropped face images in each DetectedFace object. Default is True.
141 | 
142 |         Returns:
143 |             List[List[DetectedFace]]: 
144 |                 A list where each element is a list of DetectedFace objects corresponding to one input image.
145 |                 Each DetectedFace object contains:
146 |                     - Bounding box coordinates (xmin, ymin, xmax, ymax)
147 |                     - Confidence score (conf)
148 |                     - Class name ("face")
149 |                     - The cropped face region (cropped_face), extracted from the original image.
150 |         """
151 |         processed_imgs = self._preprocess_images(imgs)
152 |         batch_boxes = self._run_mtcnn_pipeline(processed_imgs)
153 |         return self.process_faces(imgs, batch_boxes, return_cropped_faces)
154 | 
155 |     def process_faces(
156 |         self, 
157 |         imgs: List[np.ndarray], 
158 |         results: np.ndarray, 
159 |         return_cropped_faces: bool
160 |     ) -> List[List[DetectedFace]]:
161 |         """
162 |         Process MTCNN detection results and convert them into DetectedFace objects.
163 | 
164 |         Args:
165 |             imgs (List[np.ndarray]): 
166 |                 A list of input images (as NumPy arrays).
167 |             
168 |             results (np.ndarray): 
169 |                 A NumPy array of shape (batch_size, num_faces, 5), where each detected face is represented by 
170 |                 [x1, y1, x2, y2, confidence_score]. Each sub-array corresponds to detections for a single image.
171 |             
172 |             return_cropped_faces : bool,
173 |                 Whether to include cropped face images in each DetectedFace object. Default is True.
174 | 
175 |         Returns:
176 |             List[List[DetectedFace]]: 
177 |                 A list where each element is a list of DetectedFace objects corresponding to one input image.
178 |                 Each DetectedFace object contains:
179 |                     - Bounding box coordinates (xmin, ymin, xmax, ymax)
180 |                     - Confidence score (conf)
181 |                     - Class name ("face")
182 |                     - The cropped face region (cropped_face), extracted from the original image.
183 |         """
184 | 
185 |         detections = []
186 |         
187 |         for idx, bboxes in enumerate(results):
188 |             img = imgs[idx]
189 |             current_detections = []
190 |             face_no = 0
191 |             for bbox in bboxes:
192 |                 cropped_face = get_cropped_face(img, bbox[:-1]) if return_cropped_faces else None
193 |                 class_name = "face" if bbox[2] != 0 and bbox[3] != 0 else None
194 |                 facial_info = DetectedFace(
195 |                     xmin=bbox[0],
196 |                     ymin=bbox[1],
197 |                     xmax=bbox[2],
198 |                     ymax=bbox[3],
199 |                     conf=round(bbox[4], 2),
200 |                     class_name = class_name,
201 |                     cropped_face=cropped_face
202 |                 )
203 |                 current_detections.append(facial_info)
204 |                 face_no = face_no+1 if class_name is not None else face_no
205 | 
206 |             logging.info(
207 |                 f"[MTCNNDetector] {face_no} face(s) detected in image id: {idx}, "
208 |                 f"min confidence threshold  0.25."
209 |             )
210 |             
211 |             detections.append(current_detections)
212 | 
213 |         return detections
214 | 
215 |     def _preprocess_images(self, imgs: List[np.ndarray]) -> torch.Tensor:
216 |         """Preprocess input images for MTCNN."""
217 |         
218 |         if any(img.size != imgs[0].size for img in imgs):
219 |             raise Exception("MTCNN batch processing only compatible with equal-dimension images.")
220 |         
221 |         imgs = np.stack([np.uint8(img) for img in imgs])
222 |         imgs = torch.as_tensor(imgs.copy(), device=self.device)
223 |         return imgs
224 | 
225 |     def _run_mtcnn_pipeline(self, imgs: torch.Tensor) -> List[np.ndarray]:
226 |         """
227 |         Run the complete MTCNN detection pipeline.
228 |         """
229 |         model_dtype = next(self.pnet.parameters()).dtype
230 |         imgs = imgs.permute(0, 3, 1, 2).type(model_dtype)
231 | 
232 |         batch_size = len(imgs)
233 |         h, w = imgs.shape[2:4]
234 |         minsize = self.min_face_size
235 |         threshold = self.thresholds
236 |         factor = self.factor
237 |         
238 |         m = 12.0 / minsize
239 |         minl = min(h, w)
240 |         minl = minl * m
241 | 
242 |         # Create scale pyramid
243 |         scale_i = m
244 |         scales = []
245 |         while minl >= 12:
246 |             scales.append(scale_i)
247 |             scale_i = scale_i * factor
248 |             minl = minl * factor
249 | 
250 |         # First stage
251 |         boxes = []
252 |         image_inds = []
253 |         scale_picks = []
254 |         all_i = 0
255 |         offset = 0
256 |         
257 |         for scale in scales:
258 |             im_data = image_resample(imgs, (int(h * scale + 1), int(w * scale + 1)))
259 |             im_data = (im_data - 127.5) * 0.0078125
260 |             reg, probs = self.pnet(im_data)
261 |         
262 |             boxes_scale, image_inds_scale = generate_bounding_box(reg, probs[:, 1], scale, threshold[0])
263 |             boxes.append(boxes_scale)
264 |             image_inds.append(image_inds_scale)
265 | 
266 |             pick = batched_nms(boxes_scale[:, :4], boxes_scale[:, 4], image_inds_scale, 0.5)
267 |             scale_picks.append(pick + offset)
268 |             offset += boxes_scale.shape[0]
269 | 
270 |         boxes = torch.cat(boxes, dim=0)
271 |         image_inds = torch.cat(image_inds, dim=0)
272 |         scale_picks = torch.cat(scale_picks, dim=0)
273 | 
274 |         # NMS within each scale + image
275 |         boxes, image_inds = boxes[scale_picks], image_inds[scale_picks]
276 | 
277 |         # NMS within each image
278 |         pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
279 |         boxes, image_inds = boxes[pick], image_inds[pick]
280 | 
281 |         regw = boxes[:, 2] - boxes[:, 0]
282 |         regh = boxes[:, 3] - boxes[:, 1]
283 |         qq1 = boxes[:, 0] + boxes[:, 5] * regw
284 |         qq2 = boxes[:, 1] + boxes[:, 6] * regh
285 |         qq3 = boxes[:, 2] + boxes[:, 7] * regw
286 |         qq4 = boxes[:, 3] + boxes[:, 8] * regh
287 |         boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0)
288 |         boxes = convert_to_square_bbox(boxes)
289 |         y, ey, x, ex = box_padding(boxes, w, h)
290 |         
291 |         # Second stage
292 |         if len(boxes) > 0:
293 |             im_data = []
294 |             for k in range(len(y)):
295 |                 if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1):
296 |                     img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0)
297 |                     im_data.append(image_resample(img_k, (24, 24)))
298 |             im_data = torch.cat(im_data, dim=0)
299 |             im_data = (im_data - 127.5) * 0.0078125
300 | 
301 |             # This is equivalent to out = rnet(im_data) to avoid GPU out of memory.
302 |             out = fixed_batch_process(im_data, self.rnet)
303 | 
304 |             out0 = out[0].permute(1, 0)
305 |             out1 = out[1].permute(1, 0)
306 |             score = out1[1, :]
307 |             ipass = score > threshold[1]
308 |             boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1)
309 |             image_inds = image_inds[ipass]
310 |             mv = out0[:, ipass].permute(1, 0)
311 | 
312 |             # NMS within each image
313 |             pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
314 |             boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick]
315 |             boxes = apply_bbox_regression(boxes, mv)
316 |             boxes = convert_to_square_bbox(boxes)
317 | 
318 |         # Third stage
319 |         if len(boxes) > 0:
320 |             y, ey, x, ex = box_padding(boxes, w, h)
321 |             im_data = []
322 |             for k in range(len(y)):
323 |                 if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1):
324 |                     img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0)
325 |                     im_data.append(image_resample(img_k, (48, 48)))
326 |             im_data = torch.cat(im_data, dim=0)
327 |             im_data = (im_data - 127.5) * 0.0078125
328 |             
329 |             # This is equivalent to out = onet(im_data) to avoid GPU out of memory.
330 |             out = fixed_batch_process(im_data, self.onet)
331 | 
332 |             out0 = out[0].permute(1, 0)
333 |             out1 = out[1].permute(1, 0)
334 |             out2 = out[2].permute(1, 0)
335 |             score = out2[1, :]
336 |             ipass = score > threshold[2]
337 |             boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1)
338 |             image_inds = image_inds[ipass]
339 |             mv = out0[:, ipass].permute(1, 0)
340 | 
341 |             boxes = apply_bbox_regression(boxes, mv)
342 | 
343 |             # NMS within each image using "Min" strategy
344 |             pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7, 'Min')
345 |             boxes, image_inds = boxes[pick], image_inds[pick]
346 | 
347 |         boxes = boxes.detach().numpy()
348 |         image_inds = image_inds.cpu()
349 | 
350 |         # Group boxes by image
351 |         batch_boxes = []
352 |         for b_i in range(batch_size):
353 |             b_i_inds = np.where(image_inds == b_i)
354 |             batch_boxes.append(boxes[b_i_inds].copy())
355 | 
356 |         # Post-process boxes and probabilities
357 |         boxes, probs = [], []
358 |         for box in batch_boxes:
359 |             box = np.array(box)
360 |             if len(box) == 0:
361 |                 boxes.append(None)
362 |                 probs.append([None])
363 |             elif self.select_largest:
364 |                 box_order = np.argsort((box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1]))[::-1]
365 |                 box = box[box_order]
366 |                 boxes.append(box[:, :4])
367 |                 probs.append(box[:, 4])
368 |             else:
369 |                 boxes.append(box[:, :4])
370 |                 probs.append(box[:, 4])
371 | 
372 |         boxes = np.array(boxes, dtype=object)
373 |         probs = np.array(probs, dtype=object)
374 | 
375 |         return self._combine_boxes_and_probs(boxes, probs)
376 | 
377 |     def _combine_boxes_and_probs(
378 |         self, 
379 |         boxes: List[Union[np.ndarray, None]],
380 |         probs: List[Union[np.ndarray, None]]
381 |     ) -> np.ndarray:
382 |         combined = []
383 |         for b, p in zip(boxes, probs):
384 |             if b is None or p is None:
385 |                 combined.append(np.array([[0, 0, 0, 0, 0]]))
386 |             else:
387 |                 p = np.expand_dims(p, axis=1)  # shape (N, 1)
388 |                 combined.append(np.concatenate((b.astype(np.int32), p), axis=1))  # shape (N, 5)
389 |         return combined
390 | 
391 | class PNet(nn.Module):
392 |     """MTCNN PNet.
393 |     
394 |     Keyword Arguments:
395 |         pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
396 |     """
397 | 
398 |     def __init__(self, pretrained=True):
399 |         super().__init__()
400 | 
401 |         self.conv1 = nn.Conv2d(3, 10, kernel_size=3)
402 |         self.prelu1 = nn.PReLU(10)
403 |         self.pool1 = nn.MaxPool2d(2, 2, ceil_mode=True)
404 |         self.conv2 = nn.Conv2d(10, 16, kernel_size=3)
405 |         self.prelu2 = nn.PReLU(16)
406 |         self.conv3 = nn.Conv2d(16, 32, kernel_size=3)
407 |         self.prelu3 = nn.PReLU(32)
408 |         self.conv4_1 = nn.Conv2d(32, 2, kernel_size=1)
409 |         self.softmax4_1 = nn.Softmax(dim=1)
410 |         self.conv4_2 = nn.Conv2d(32, 4, kernel_size=1)
411 | 
412 |         self.training = False
413 | 
414 |         if pretrained:
415 |             model_id = MTCCNModel.PNET.value
416 |             model_name = WEIGHT_NAMES[model_id]
417 |             weight_url = WEIGHT_URLS[model_id]
418 |             model_path = download_model_weights(
419 |                 filename=model_name,
420 |                 download_url=weight_url
421 |             )
422 |             state_dict = torch.load(model_path, weights_only=False)
423 |             self.load_state_dict(state_dict)
424 | 
425 |     def forward(self, x):
426 |         x = self.conv1(x)
427 |         x = self.prelu1(x)
428 |         x = self.pool1(x)
429 |         x = self.conv2(x)
430 |         x = self.prelu2(x)
431 |         x = self.conv3(x)
432 |         x = self.prelu3(x)
433 |         a = self.conv4_1(x)
434 |         a = self.softmax4_1(a)
435 |         b = self.conv4_2(x)
436 |         return b, a
437 | 
438 | class RNet(nn.Module):
439 |     """MTCNN RNet.
440 |     
441 |     Keyword Arguments:
442 |         pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
443 |     """
444 | 
445 |     def __init__(self, pretrained=True):
446 |         super().__init__()
447 | 
448 |         self.conv1 = nn.Conv2d(3, 28, kernel_size=3)
449 |         self.prelu1 = nn.PReLU(28)
450 |         self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True)
451 |         self.conv2 = nn.Conv2d(28, 48, kernel_size=3)
452 |         self.prelu2 = nn.PReLU(48)
453 |         self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True)
454 |         self.conv3 = nn.Conv2d(48, 64, kernel_size=2)
455 |         self.prelu3 = nn.PReLU(64)
456 |         self.dense4 = nn.Linear(576, 128)
457 |         self.prelu4 = nn.PReLU(128)
458 |         self.dense5_1 = nn.Linear(128, 2)
459 |         self.softmax5_1 = nn.Softmax(dim=1)
460 |         self.dense5_2 = nn.Linear(128, 4)
461 | 
462 |         self.training = False
463 | 
464 |         if pretrained:
465 |             model_id = MTCCNModel.RNET.value
466 |             model_name = WEIGHT_NAMES[model_id]
467 |             weight_url = WEIGHT_URLS[model_id]
468 |             model_path = download_model_weights(
469 |                 filename=model_name,
470 |                 download_url=weight_url
471 |             )
472 |             state_dict = torch.load(model_path, weights_only=False)
473 |             self.load_state_dict(state_dict)
474 | 
475 |     def forward(self, x):
476 |         x = self.conv1(x)
477 |         x = self.prelu1(x)
478 |         x = self.pool1(x)
479 |         x = self.conv2(x)
480 |         x = self.prelu2(x)
481 |         x = self.pool2(x)
482 |         x = self.conv3(x)
483 |         x = self.prelu3(x)
484 |         x = x.permute(0, 3, 2, 1).contiguous()
485 |         x = self.dense4(x.view(x.shape[0], -1))
486 |         x = self.prelu4(x)
487 |         a = self.dense5_1(x)
488 |         a = self.softmax5_1(a)
489 |         b = self.dense5_2(x)
490 |         return b, a
491 |     
492 | 
493 | class ONet(nn.Module):
494 |     """MTCNN ONet.
495 |     
496 |     Keyword Arguments:
497 |         pretrained {bool} -- Whether or not to load saved pretrained weights (default: {True})
498 |     """
499 | 
500 |     def __init__(self, pretrained=True):
501 |         super().__init__()
502 | 
503 |         self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
504 |         self.prelu1 = nn.PReLU(32)
505 |         self.pool1 = nn.MaxPool2d(3, 2, ceil_mode=True)
506 |         self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
507 |         self.prelu2 = nn.PReLU(64)
508 |         self.pool2 = nn.MaxPool2d(3, 2, ceil_mode=True)
509 |         self.conv3 = nn.Conv2d(64, 64, kernel_size=3)
510 |         self.prelu3 = nn.PReLU(64)
511 |         self.pool3 = nn.MaxPool2d(2, 2, ceil_mode=True)
512 |         self.conv4 = nn.Conv2d(64, 128, kernel_size=2)
513 |         self.prelu4 = nn.PReLU(128)
514 |         self.dense5 = nn.Linear(1152, 256)
515 |         self.prelu5 = nn.PReLU(256)
516 |         self.dense6_1 = nn.Linear(256, 2)
517 |         self.softmax6_1 = nn.Softmax(dim=1)
518 |         self.dense6_2 = nn.Linear(256, 4)
519 |         self.dense6_3 = nn.Linear(256, 10)
520 | 
521 |         self.training = False
522 | 
523 |         if pretrained:
524 |             model_id = MTCCNModel.ONET.value
525 |             model_name = WEIGHT_NAMES[model_id]
526 |             weight_url = WEIGHT_URLS[model_id]
527 |             model_path = download_model_weights(
528 |                 filename=model_name,
529 |                 download_url=weight_url
530 |             )
531 |             state_dict = torch.load(model_path, weights_only=False)
532 |             self.load_state_dict(state_dict)
533 | 
534 |     def forward(self, x):
535 |         x = self.conv1(x)
536 |         x = self.prelu1(x)
537 |         x = self.pool1(x)
538 |         x = self.conv2(x)
539 |         x = self.prelu2(x)
540 |         x = self.pool2(x)
541 |         x = self.conv3(x)
542 |         x = self.prelu3(x)
543 |         x = self.pool3(x)
544 |         x = self.conv4(x)
545 |         x = self.prelu4(x)
546 |         x = x.permute(0, 3, 2, 1).contiguous()
547 |         x = self.dense5(x.view(x.shape[0], -1))
548 |         x = self.prelu5(x)
549 |         a = self.dense6_1(x)
550 |         a = self.softmax6_1(a)
551 |         b = self.dense6_2(x)
552 |         c = self.dense6_3(x)
553 |         return b, c, a
554 | 
555 | 
556 | def fixed_batch_process(im_data, model):
557 |     batch_size = 512
558 |     out = []
559 |     for i in range(0, len(im_data), batch_size):
560 |         batch = im_data[i:(i+batch_size)]
561 |         out.append(model(batch))
562 | 
563 |     return tuple(torch.cat(v, dim=0) for v in zip(*out))
564 | 
565 | def generate_bounding_box(reg, probs, scale, thresh):
566 |     stride = 2
567 |     cellsize = 12
568 | 
569 |     reg = reg.permute(1, 0, 2, 3)
570 | 
571 |     mask = probs >= thresh
572 |     mask_inds = mask.nonzero()
573 |     image_inds = mask_inds[:, 0]
574 |     score = probs[mask]
575 |     reg = reg[:, mask].permute(1, 0)
576 |     bb = mask_inds[:, 1:].type(reg.dtype).flip(1)
577 |     q1 = ((stride * bb + 1) / scale).floor()
578 |     q2 = ((stride * bb + cellsize - 1 + 1) / scale).floor()
579 |     boundingbox = torch.cat([q1, q2, score.unsqueeze(1), reg], dim=1)
580 |     return boundingbox, image_inds


--------------------------------------------------------------------------------