├── .MATLABDriveTag ├── .gitignore ├── AngleBuffer.py ├── LICENSE ├── README.md ├── main.py ├── mediapipe_landmarks_test.py ├── requirements.txt └── test.py /.MATLABDriveTag: -------------------------------------------------------------------------------- 1 | cbc9c000-1ce9-4c60-8c76-7c3ffdb9b96f -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.mp4 6 | # C extensions 7 | *.so 8 | *.avi 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | logs/ 111 | ENV/ 112 | env.bak/ 113 | venv.bak/ 114 | 115 | # Spyder project settings 116 | .spyderproject 117 | .spyproject 118 | 119 | # Rope project settings 120 | .ropeproject 121 | 122 | # mkdocs documentation 123 | /site 124 | 125 | # mypy 126 | .mypy_cache/ 127 | .dmypy.json 128 | dmypy.json 129 | 130 | # Pyre type checker 131 | .pyre/ 132 | -------------------------------------------------------------------------------- /AngleBuffer.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import numpy as np 3 | 4 | 5 | class AngleBuffer: 6 | def __init__(self, size=40): 7 | self.size = size 8 | self.buffer = collections.deque(maxlen=size) 9 | 10 | def add(self, angles): 11 | self.buffer.append(angles) 12 | 13 | def get_average(self): 14 | return np.mean(self.buffer, axis=0) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Alireza Ghaderi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Python-Gaze-Face-Tracker 3 | 4 | ### Advanced Real-Time Eye, Facial Landmark, Head Pose, Gaze Direction Tracking System 5 | 6 | --- 7 | ![image](https://github.com/alireza787b/Python-Gaze-Face-Tracker/assets/30341941/08db0391-c13f-4252-9a88-9d32b77181b9) 8 | ![image](https://github.com/alireza787b/Python-Gaze-Face-Tracker/assets/30341941/8ad43aa9-dd3f-48b5-9e61-e375bc1db70f) 9 | 10 | 11 | 12 | 13 | ![image](https://github.com/alireza787b/Python-Gaze-Face-Tracker/assets/30341941/ce20ac3a-6785-448e-85df-4d2dd5f22040) 14 | 15 | ## Description 16 | **Python-Gaze-Face-Tracker** is a Python-based application designed for advanced real-time eye tracking, facial landmark detection, and head position (orientation) estimator and gaze estimation using OpenCV and MediaPipe technology. Specializing in uncalibrated gaze tracking and head orientation analysis this tool is an easy-to-use Python eye and facial landmark tracker. It excels in visualizing iris positions and offers robust logging capabilities for both eye and facial landmark data. Equipped with the ability to transmit this iris and gaze information over UDP sockets, Python-Gaze-Face-Tracker stands out for various applications, including aviation, human-computer interaction (HCI), and augmented reality (AR). The tool also includes a blink detection feature, contributing to detailed eye movement analysis and supporting head tracking. This makes it a comprehensive package for advanced gaze tracking and facial feature analysis in interactive technology applications. 17 | 18 | 19 | 20 | --- 21 | 22 | ## Features 23 | - **Real-Time Eye Tracking**: Tracks and visualizes iris and eye corner positions in real-time using webcam input. 24 | - **Facial Landmark Detection**: Detects and displays up to 468 facial landmarks. 25 | - **Data Logging**: Records tracking data to CSV files, including timestamps, eye positions, and optional facial landmark data. *Note: Enabling logging of all 468 facial landmarks can result in large log files.* 26 | - **Socket Communication**: Supports transmitting only iris tracking data via UDP sockets for integration with other systems or applications. 27 | - **Blink Detection**: Monitors and records blink frequency, enhancing eye movement analysis. 28 | - **Real-Time Head Pose Estimation**: Accurately estimates the roll, pitch, and yaw of the user's head in real-time. 29 | - **Filtering and Smoothing**: Implements filtering and smoothing algorithms to ensure stable and accurate head orientation readings. 30 | - **Gaze Estimation**: Visualizes the direction of gaze by projecting a 3D point from the nose tip onto the 2D camera plane. 31 | - **Custom Real-Time Facial Landmark Visualization**: Utilize the `mediapipe_landmarks_test.py` script to visualize and track each of the MediaPipe facial landmark indices in real time. This feature is particularly useful for identifying the most relevant facial landmarks for your project and observing them directly in the video feed. 32 | 33 | --- 34 | 35 | ## Requirements 36 | - Python 3.x 37 | - OpenCV (opencv-python) 38 | - MediaPipe (mediapipe) 39 | - Other Python standard libraries: `math`, `socket`, `argparse`, `time`, `csv`, `datetime`, `os` 40 | 41 | --- 42 | ## Tutorial Video 43 | 🎥 **Watch the Setup and Usage Tutorial**: Discover how to install and use the Python-Gaze-Face-Tracker with our step-by-step video guide on YouTube: [Watch Tutorial](https://www.youtube.com/watch?v=UgC2GggTks0) 44 | 45 | This video tutorial will walk you through the installation process, demonstrate how to run the code, and show you the real-time tracking features in action. 46 | 47 | 48 | --- 49 | 50 | ## Installation & Usage 51 | 52 | 1. **Clone the Repository:** 53 | ``` 54 | git clone https://github.com/alireza787b/Python-Gaze-Face-Tracker.git 55 | ``` 56 | 57 | 2. **Navigate to the Repository Directory:** 58 | ``` 59 | cd Python-Gaze-Face-Tracker 60 | ``` 61 | 62 | 3. **Install Dependencies:** 63 | ``` 64 | pip install -r requirements.txt 65 | ``` 66 | 67 | 4. **Run the Application:** 68 | ``` 69 | python main.py 70 | ``` 71 | 72 | Optionally, specify the camera source: 73 | ``` 74 | python main.py -c 75 | ``` 76 | 77 | 5. **Open in VS Code:** 78 | ``` 79 | code . 80 | ``` 81 | Optionally, open the project in VS Code: 82 | 83 | 84 | 85 | 86 | --- 87 | 88 | ## Parameters 89 | - **USER_FACE_WIDTH**: The horizontal distance between the outer edges of the user's cheekbones in millimeters. Adjust this value based on your face width for accurate head pose estimation. 90 | - **NOSE_TO_CAMERA_DISTANCE**: The distance from the tip of the nose to the camera lens in millimeters. Intended for future enhancements. 91 | - **PRINT_DATA**: Enable or disable console data printing for debugging. 92 | - **DEFAULT_WEBCAM**: Default camera source index. '0' usually refers to the built-in webcam. 93 | - **SHOW_ALL_FEATURES**: Display all facial landmarks on the video feed if set to True. 94 | - **LOG_DATA**: Enable or disable logging of data to a CSV file. 95 | - **LOG_ALL_FEATURES**: Log all facial landmarks to the CSV file if set to True. 96 | - **ENABLE_HEAD_POSE**: Enable the head position and orientation estimator. 97 | - **LOG_FOLDER**: Directory for storing log files. 98 | - **SERVER_IP**: IP address for UDP data transmission (default is localhost). 99 | - **SERVER_PORT**: Port number for the server to listen on. 100 | - **SHOW_ON_SCREEN_DATA**: Display blink count and head pose angles on the video feed if set to True. 101 | - **EYES_BLINK_FRAME_COUNTER**: Counter for consecutive frames with detected potential blinks. 102 | - **BLINK_THRESHOLD**: Eye aspect ratio threshold for blink detection. 103 | - **EYE_AR_CONSEC_FRAMES**: Number of consecutive frames below the threshold required to confirm a blink. 104 | - **MIN_DETECTION_CONFIDENCE**: Confidence threshold for model detection. 105 | - **MIN_TRACKING_CONFIDENCE**: Confidence threshold for model tracking. 106 | - **MOVING_AVERAGE_WINDOW**: Number of frames for calculating the moving average for smoothing angles. 107 | - **SHOW_BLINK_COUNT_ON_SCREEN**: Toggle to show the blink count on the video feed. 108 | - **IS_RECORDING**: Controls whether data is being logged automatically. Set to false to wait for the 'r' command to start logging. 109 | - **SERVER_ADDRESS**: Tuple containing the SERVER_IP and SERVER_PORT for UDP communication. 110 | 111 | 112 | --- 113 | 114 | ## Interactive Commands 115 | 116 | While running the Eye Tracking and Head Pose Estimation script, you can interact with the program using the following keyboard commands: 117 | 118 | - **'c' Key**: Calibrate Head Pose 119 | - Pressing the 'c' key recalibrates the head pose estimation to the current orientation of the user's head. This sets the current head pose as the new reference point. 120 | 121 | - **'r' Key**: Start/Stop Recording 122 | - Toggling the 'r' key starts or pauses the recording of data to log folder. 123 | 124 | - **'q' Key**: Quit Program 125 | - Pressing the 'q' key will exit the program. 126 | 127 | 128 | --- 129 | ## Data Logging & Telemetry 130 | - **CSV Logging**: The application generates CSV files with tracking data including timestamps, eye positions, and optional facial landmarks. These files are stored in the `logs` folder. 131 | 132 | - **UDP Telemetry**: The application sends iris position data through UDP sockets as defined by `SERVER_IP` and `SERVER_PORT`. The data is sent in the following order: [Timestamp, Left Eye Center X, Left Eye Center Y, Left Iris Relative Pos Dx, Left Iris Relative Pos Dy]. 133 | 134 | ### UDP Packet Structure 135 | - **Packet Type**: Mixed (int64 for timestamp, int32 for other values) 136 | - **Packet Structure**: 137 | - Timestamp (int64) 138 | - Left Eye Center X (int32) 139 | - Left Eye Center Y (int32) 140 | - Left Iris Relative Pos Dx (int32) 141 | - Left Iris Relative Pos Dy (int32) 142 | - **Packet Size**: 24 bytes (8 bytes for int64 timestamp, 4 bytes each for the four int32 values) 143 | 144 | ### Example Packets 145 | - **Example**: 146 | - Timestamp: 1623447890123 147 | - Left Eye Center X: 315 148 | - Left Eye Center Y: 225 149 | - Left Iris Relative Pos Dx: 66 150 | - Left Iris Relative Pos Dy: -3 151 | - Packet: [1623447890123, 315, 225, 66, -3] 152 | 153 | 154 | 155 | --- 156 | 157 | ## Acknowledgements 158 | This project was initially inspired by [Asadullah Dal's iris segmentation project](https://github.com/Asadullah-Dal17/iris-Segmentation-mediapipe-python). 159 | The blink detection and gaze direction visualization feature is also contributed by Asadullah Dal. 160 | 161 | --- 162 | 163 | ## Note 164 | The **Python-Gaze-Face-Tracker** is intended for educational and research purposes and is particularly suited for applications in aviation, HCI, AR, and similar fields. 165 | 166 | --- 167 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Eye Tracking and Head Pose Estimation 3 | 4 | This script is designed to perform real-time eye tracking and head pose estimation using a webcam feed. 5 | It utilizes the MediaPipe library for facial landmark detection, which informs both eye tracking and 6 | head pose calculations. The purpose is to track the user's eye movements and head orientation, 7 | which can be applied in various domains such as HCI (Human-Computer Interaction), gaming, and accessibility tools. 8 | 9 | Features: 10 | - Real-time eye tracking to count blinks and calculate the eye aspect ratio for each frame. 11 | - Head pose estimation to determine the orientation of the user's head in terms of pitch, yaw, and roll angles. 12 | - Calibration feature to set the initial head pose as the reference zero position. 13 | - Data logging for further analysis and debugging. 14 | 15 | Requirements: 16 | - Python 3.x 17 | - OpenCV (opencv-python) 18 | - MediaPipe (mediapipe) 19 | - Other Dependencies: math, socket, argparse, time, csv, datetime, os 20 | 21 | Methodology: 22 | - The script uses the 468 facial landmarks provided by MediaPipe's FaceMesh model. 23 | - Eye tracking is achieved by calculating the Eye Aspect Ratio (EAR) for each eye and detecting blinks based on EAR thresholds. 24 | - Head pose is estimated using the solvePnP algorithm with a predefined 3D facial model and corresponding 2D landmarks detected from the camera feed. 25 | - Angles are normalized to intuitive ranges (pitch: [-90, 90], yaw and roll: [-180, 180]). 26 | 27 | Theory: 28 | - EAR is used as a simple yet effective metric for eye closure detection. 29 | - Head pose angles are derived using a perspective-n-point approach, which estimates an object's pose from its 2D image points and 3D model points. 30 | 31 | UDP Packet Structure: 32 | - The UDP packet consists of a timestamp and four other integer values. 33 | - Packet Type: Mixed (int64 for timestamp, int32 for other values) 34 | - Packet Structure: [timestamp (int64), l_cx (int32), l_cy (int32), l_dx (int32), l_dy (int32)] 35 | - Packet Size: 24 bytes (8 bytes for int64 timestamp, 4 bytes each for the four int32 values) 36 | 37 | Example Packets: 38 | - Example 1: [1623447890123, 315, 225, 66, -3] 39 | - Example 2: [1623447891123, 227, 68, -1, 316] 40 | 41 | Parameters: 42 | You can change parameters such as face width, moving average window, webcam ID, terminal outputs, on-screen data, logging detail, etc., from the code. 43 | 44 | Author: Alireza Bagheri 45 | GitHub: https://github.com/alireza787b/Python-Gaze-Face-Tracker 46 | Email: p30planets@gmail.com 47 | LinkedIn: https://www.linkedin.com/in/alireza787b 48 | Date: November 2023 49 | 50 | Inspiration: 51 | Initially inspired by Asadullah Dal's iris segmentation project (https://github.com/Asadullah-Dal17/iris-Segmentation-mediapipe-python). 52 | The blink detection feature is also contributed by Asadullah Dal (GitHub: Asadullah-Dal17). 53 | 54 | Usage: 55 | - Run the script in a Python environment with the necessary dependencies installed. The script accepts command-line arguments for camera source configuration. 56 | - Press 'c' to recalibrate the head pose estimation to the current orientation. 57 | - Press 'r' to start/stop logging. 58 | - Press 'q' to exit the program. 59 | - Output is displayed in a window with live feed and annotations, and logged to a CSV file for further analysis. 60 | 61 | Ensure that all dependencies, especially MediaPipe, OpenCV, and NumPy, are installed before running the script. 62 | 63 | Note: 64 | This project is intended for educational and research purposes in fields like aviation, human-computer interaction, and more. 65 | """ 66 | 67 | 68 | import cv2 as cv 69 | import numpy as np 70 | import mediapipe as mp 71 | import math 72 | import socket 73 | import argparse 74 | import time 75 | import csv 76 | from datetime import datetime 77 | import os 78 | from AngleBuffer import AngleBuffer 79 | 80 | 81 | #----------------------------------------------------------------------------------------------------------------------------------- 82 | #----------------------------------------------------------------------------------------------------------------------------------- 83 | 84 | # Parameters Documentation 85 | 86 | ## User-Specific Measurements 87 | # USER_FACE_WIDTH: The horizontal distance between the outer edges of the user's cheekbones in millimeters. 88 | # This measurement is used to scale the 3D model points for head pose estimation. 89 | # Measure your face width and adjust the value accordingly. 90 | USER_FACE_WIDTH = 140 # [mm] 91 | 92 | ## Camera Parameters (not currently used in calculations) 93 | # NOSE_TO_CAMERA_DISTANCE: The distance from the tip of the nose to the camera lens in millimeters. 94 | # Intended for future use where accurate physical distance measurements may be necessary. 95 | NOSE_TO_CAMERA_DISTANCE = 600 # [mm] 96 | 97 | ## Configuration Parameters 98 | # PRINT_DATA: Enable or disable the printing of data to the console for debugging. 99 | PRINT_DATA = True 100 | 101 | # DEFAULT_WEBCAM: Default camera source index. '0' usually refers to the built-in webcam. 102 | DEFAULT_WEBCAM = 0 103 | 104 | # SHOW_ALL_FEATURES: If True, display all facial landmarks on the video feed. 105 | SHOW_ALL_FEATURES = True 106 | 107 | # LOG_DATA: Enable or disable logging of data to a CSV file. 108 | LOG_DATA = True 109 | 110 | # LOG_ALL_FEATURES: If True, log all facial landmarks to the CSV file. 111 | LOG_ALL_FEATURES = False 112 | 113 | # ENABLE_HEAD_POSE: Enable the head position and orientation estimator. 114 | ENABLE_HEAD_POSE = True 115 | 116 | ## Logging Configuration 117 | # LOG_FOLDER: Directory where log files will be stored. 118 | LOG_FOLDER = "logs" 119 | 120 | ## Server Configuration 121 | # SERVER_IP: IP address of the server for sending data via UDP (default is localhost). 122 | SERVER_IP = "127.0.0.1" 123 | 124 | # SERVER_PORT: Port number for the server to listen on. 125 | SERVER_PORT = 7070 126 | 127 | ## Blink Detection Parameters 128 | # SHOW_ON_SCREEN_DATA: If True, display blink count and head pose angles on the video feed. 129 | SHOW_ON_SCREEN_DATA = True 130 | 131 | # TOTAL_BLINKS: Counter for the total number of blinks detected. 132 | TOTAL_BLINKS = 0 133 | 134 | # EYES_BLINK_FRAME_COUNTER: Counter for consecutive frames with detected potential blinks. 135 | EYES_BLINK_FRAME_COUNTER = 0 136 | 137 | # BLINK_THRESHOLD: Eye aspect ratio threshold below which a blink is registered. 138 | BLINK_THRESHOLD = 0.51 139 | 140 | # EYE_AR_CONSEC_FRAMES: Number of consecutive frames below the threshold required to confirm a blink. 141 | EYE_AR_CONSEC_FRAMES = 2 142 | 143 | ## Head Pose Estimation Landmark Indices 144 | # These indices correspond to the specific facial landmarks used for head pose estimation. 145 | LEFT_EYE_IRIS = [474, 475, 476, 477] 146 | RIGHT_EYE_IRIS = [469, 470, 471, 472] 147 | LEFT_EYE_OUTER_CORNER = [33] 148 | LEFT_EYE_INNER_CORNER = [133] 149 | RIGHT_EYE_OUTER_CORNER = [362] 150 | RIGHT_EYE_INNER_CORNER = [263] 151 | RIGHT_EYE_POINTS = [33, 160, 159, 158, 133, 153, 145, 144] 152 | LEFT_EYE_POINTS = [362, 385, 386, 387, 263, 373, 374, 380] 153 | NOSE_TIP_INDEX = 4 154 | CHIN_INDEX = 152 155 | LEFT_EYE_LEFT_CORNER_INDEX = 33 156 | RIGHT_EYE_RIGHT_CORNER_INDEX = 263 157 | LEFT_MOUTH_CORNER_INDEX = 61 158 | RIGHT_MOUTH_CORNER_INDEX = 291 159 | 160 | ## MediaPipe Model Confidence Parameters 161 | # These thresholds determine how confidently the model must detect or track to consider the results valid. 162 | MIN_DETECTION_CONFIDENCE = 0.8 163 | MIN_TRACKING_CONFIDENCE = 0.8 164 | 165 | ## Angle Normalization Parameters 166 | # MOVING_AVERAGE_WINDOW: The number of frames over which to calculate the moving average for smoothing angles. 167 | MOVING_AVERAGE_WINDOW = 10 168 | 169 | # Initial Calibration Flags 170 | # initial_pitch, initial_yaw, initial_roll: Store the initial head pose angles for calibration purposes. 171 | # calibrated: A flag indicating whether the initial calibration has been performed. 172 | initial_pitch, initial_yaw, initial_roll = None, None, None 173 | calibrated = False 174 | 175 | # User-configurable parameters 176 | PRINT_DATA = True # Enable/disable data printing 177 | DEFAULT_WEBCAM = 0 # Default webcam number 178 | SHOW_ALL_FEATURES = True # Show all facial landmarks if True 179 | LOG_DATA = True # Enable logging to CSV 180 | LOG_ALL_FEATURES = False # Log all facial landmarks if True 181 | LOG_FOLDER = "logs" # Folder to store log files 182 | 183 | # Server configuration 184 | SERVER_IP = "127.0.0.1" # Set the server IP address (localhost) 185 | SERVER_PORT = 7070 # Set the server port 186 | 187 | # eyes blinking variables 188 | SHOW_BLINK_COUNT_ON_SCREEN = True # Toggle to show the blink count on the video feed 189 | TOTAL_BLINKS = 0 # Tracks the total number of blinks detected 190 | EYES_BLINK_FRAME_COUNTER = ( 191 | 0 # Counts the number of consecutive frames with a potential blink 192 | ) 193 | BLINK_THRESHOLD = 0.51 # Threshold for the eye aspect ratio to trigger a blink 194 | EYE_AR_CONSEC_FRAMES = ( 195 | 2 # Number of consecutive frames below the threshold to confirm a blink 196 | ) 197 | # SERVER_ADDRESS: Tuple containing the SERVER_IP and SERVER_PORT for UDP communication. 198 | SERVER_ADDRESS = (SERVER_IP, SERVER_PORT) 199 | 200 | 201 | #If set to false it will wait for your command (hittig 'r') to start logging. 202 | IS_RECORDING = False # Controls whether data is being logged 203 | 204 | # Command-line arguments for camera source 205 | parser = argparse.ArgumentParser(description="Eye Tracking Application") 206 | parser.add_argument( 207 | "-c", "--camSource", help="Source of camera", default=str(DEFAULT_WEBCAM) 208 | ) 209 | args = parser.parse_args() 210 | 211 | # Iris and eye corners landmarks indices 212 | LEFT_IRIS = [474, 475, 476, 477] 213 | RIGHT_IRIS = [469, 470, 471, 472] 214 | L_H_LEFT = [33] # Left eye Left Corner 215 | L_H_RIGHT = [133] # Left eye Right Corner 216 | R_H_LEFT = [362] # Right eye Left Corner 217 | R_H_RIGHT = [263] # Right eye Right Corner 218 | 219 | # Blinking Detection landmark's indices. 220 | # P0, P3, P4, P5, P8, P11, P12, P13 221 | RIGHT_EYE_POINTS = [33, 160, 159, 158, 133, 153, 145, 144] 222 | LEFT_EYE_POINTS = [362, 385, 386, 387, 263, 373, 374, 380] 223 | 224 | # Face Selected points indices for Head Pose Estimation 225 | _indices_pose = [1, 33, 61, 199, 263, 291] 226 | 227 | # Server address for UDP socket communication 228 | SERVER_ADDRESS = (SERVER_IP, 7070) 229 | 230 | 231 | # Function to calculate vector position 232 | def vector_position(point1, point2): 233 | x1, y1 = point1.ravel() 234 | x2, y2 = point2.ravel() 235 | return x2 - x1, y2 - y1 236 | 237 | 238 | def euclidean_distance_3D(points): 239 | """Calculates the Euclidean distance between two points in 3D space. 240 | 241 | Args: 242 | points: A list of 3D points. 243 | 244 | Returns: 245 | The Euclidean distance between the two points. 246 | 247 | # Comment: This function calculates the Euclidean distance between two points in 3D space. 248 | """ 249 | 250 | # Get the three points. 251 | P0, P3, P4, P5, P8, P11, P12, P13 = points 252 | 253 | # Calculate the numerator. 254 | numerator = ( 255 | np.linalg.norm(P3 - P13) ** 3 256 | + np.linalg.norm(P4 - P12) ** 3 257 | + np.linalg.norm(P5 - P11) ** 3 258 | ) 259 | 260 | # Calculate the denominator. 261 | denominator = 3 * np.linalg.norm(P0 - P8) ** 3 262 | 263 | # Calculate the distance. 264 | distance = numerator / denominator 265 | 266 | return distance 267 | 268 | def estimate_head_pose(landmarks, image_size): 269 | # Scale factor based on user's face width (assumes model face width is 150mm) 270 | scale_factor = USER_FACE_WIDTH / 150.0 271 | # 3D model points. 272 | model_points = np.array([ 273 | (0.0, 0.0, 0.0), # Nose tip 274 | (0.0, -330.0 * scale_factor, -65.0 * scale_factor), # Chin 275 | (-225.0 * scale_factor, 170.0 * scale_factor, -135.0 * scale_factor), # Left eye left corner 276 | (225.0 * scale_factor, 170.0 * scale_factor, -135.0 * scale_factor), # Right eye right corner 277 | (-150.0 * scale_factor, -150.0 * scale_factor, -125.0 * scale_factor), # Left Mouth corner 278 | (150.0 * scale_factor, -150.0 * scale_factor, -125.0 * scale_factor) # Right mouth corner 279 | ]) 280 | 281 | 282 | # Camera internals 283 | focal_length = image_size[1] 284 | center = (image_size[1]/2, image_size[0]/2) 285 | camera_matrix = np.array( 286 | [[focal_length, 0, center[0]], 287 | [0, focal_length, center[1]], 288 | [0, 0, 1]], dtype = "double" 289 | ) 290 | 291 | # Assuming no lens distortion 292 | dist_coeffs = np.zeros((4,1)) 293 | 294 | # 2D image points from landmarks, using defined indices 295 | image_points = np.array([ 296 | landmarks[NOSE_TIP_INDEX], # Nose tip 297 | landmarks[CHIN_INDEX], # Chin 298 | landmarks[LEFT_EYE_LEFT_CORNER_INDEX], # Left eye left corner 299 | landmarks[RIGHT_EYE_RIGHT_CORNER_INDEX], # Right eye right corner 300 | landmarks[LEFT_MOUTH_CORNER_INDEX], # Left mouth corner 301 | landmarks[RIGHT_MOUTH_CORNER_INDEX] # Right mouth corner 302 | ], dtype="double") 303 | 304 | 305 | # Solve for pose 306 | (success, rotation_vector, translation_vector) = cv.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, flags=cv.SOLVEPNP_ITERATIVE) 307 | 308 | # Convert rotation vector to rotation matrix 309 | rotation_matrix, _ = cv.Rodrigues(rotation_vector) 310 | 311 | # Combine rotation matrix and translation vector to form a 3x4 projection matrix 312 | projection_matrix = np.hstack((rotation_matrix, translation_vector.reshape(-1, 1))) 313 | 314 | # Decompose the projection matrix to extract Euler angles 315 | _, _, _, _, _, _, euler_angles = cv.decomposeProjectionMatrix(projection_matrix) 316 | pitch, yaw, roll = euler_angles.flatten()[:3] 317 | 318 | 319 | # Normalize the pitch angle 320 | pitch = normalize_pitch(pitch) 321 | 322 | return pitch, yaw, roll 323 | 324 | def normalize_pitch(pitch): 325 | """ 326 | Normalize the pitch angle to be within the range of [-90, 90]. 327 | 328 | Args: 329 | pitch (float): The raw pitch angle in degrees. 330 | 331 | Returns: 332 | float: The normalized pitch angle. 333 | """ 334 | # Map the pitch angle to the range [-180, 180] 335 | if pitch > 180: 336 | pitch -= 360 337 | 338 | # Invert the pitch angle for intuitive up/down movement 339 | pitch = -pitch 340 | 341 | # Ensure that the pitch is within the range of [-90, 90] 342 | if pitch < -90: 343 | pitch = -(180 + pitch) 344 | elif pitch > 90: 345 | pitch = 180 - pitch 346 | 347 | pitch = -pitch 348 | 349 | return pitch 350 | 351 | 352 | # This function calculates the blinking ratio of a person. 353 | def blinking_ratio(landmarks): 354 | """Calculates the blinking ratio of a person. 355 | 356 | Args: 357 | landmarks: A facial landmarks in 3D normalized. 358 | 359 | Returns: 360 | The blinking ratio of the person, between 0 and 1, where 0 is fully open and 1 is fully closed. 361 | 362 | """ 363 | 364 | # Get the right eye ratio. 365 | right_eye_ratio = euclidean_distance_3D(landmarks[RIGHT_EYE_POINTS]) 366 | 367 | # Get the left eye ratio. 368 | left_eye_ratio = euclidean_distance_3D(landmarks[LEFT_EYE_POINTS]) 369 | 370 | # Calculate the blinking ratio. 371 | ratio = (right_eye_ratio + left_eye_ratio + 1) / 2 372 | 373 | return ratio 374 | 375 | 376 | # Initializing MediaPipe face mesh and camera 377 | if PRINT_DATA: 378 | print("Initializing the face mesh and camera...") 379 | if PRINT_DATA: 380 | head_pose_status = "enabled" if ENABLE_HEAD_POSE else "disabled" 381 | print(f"Head pose estimation is {head_pose_status}.") 382 | 383 | mp_face_mesh = mp.solutions.face_mesh.FaceMesh( 384 | max_num_faces=1, 385 | refine_landmarks=True, 386 | min_detection_confidence=MIN_DETECTION_CONFIDENCE, 387 | min_tracking_confidence=MIN_TRACKING_CONFIDENCE, 388 | ) 389 | cam_source = int(args.camSource) 390 | cap = cv.VideoCapture(cam_source) 391 | 392 | # Initializing socket for data transmission 393 | iris_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 394 | 395 | # Preparing for CSV logging 396 | csv_data = [] 397 | if not os.path.exists(LOG_FOLDER): 398 | os.makedirs(LOG_FOLDER) 399 | 400 | # Column names for CSV file 401 | column_names = [ 402 | "Timestamp (ms)", 403 | "Left Eye Center X", 404 | "Left Eye Center Y", 405 | "Right Eye Center X", 406 | "Right Eye Center Y", 407 | "Left Iris Relative Pos Dx", 408 | "Left Iris Relative Pos Dy", 409 | "Right Iris Relative Pos Dx", 410 | "Right Iris Relative Pos Dy", 411 | "Total Blink Count", 412 | ] 413 | # Add head pose columns if head pose estimation is enabled 414 | if ENABLE_HEAD_POSE: 415 | column_names.extend(["Pitch", "Yaw", "Roll"]) 416 | 417 | if LOG_ALL_FEATURES: 418 | column_names.extend( 419 | [f"Landmark_{i}_X" for i in range(468)] 420 | + [f"Landmark_{i}_Y" for i in range(468)] 421 | ) 422 | 423 | # Main loop for video capture and processing 424 | try: 425 | angle_buffer = AngleBuffer(size=MOVING_AVERAGE_WINDOW) # Adjust size for smoothing 426 | 427 | while True: 428 | ret, frame = cap.read() 429 | if not ret: 430 | break 431 | 432 | # Flipping the frame for a mirror effect 433 | # I think we better not flip to correspond with real world... need to make sure later... 434 | #frame = cv.flip(frame, 1) 435 | rgb_frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB) 436 | img_h, img_w = frame.shape[:2] 437 | results = mp_face_mesh.process(rgb_frame) 438 | 439 | if results.multi_face_landmarks: 440 | mesh_points = np.array( 441 | [ 442 | np.multiply([p.x, p.y], [img_w, img_h]).astype(int) 443 | for p in results.multi_face_landmarks[0].landmark 444 | ] 445 | ) 446 | 447 | # Get the 3D landmarks from facemesh x, y and z(z is distance from 0 points) 448 | # just normalize values 449 | mesh_points_3D = np.array( 450 | [[n.x, n.y, n.z] for n in results.multi_face_landmarks[0].landmark] 451 | ) 452 | # getting the head pose estimation 3d points 453 | head_pose_points_3D = np.multiply( 454 | mesh_points_3D[_indices_pose], [img_w, img_h, 1] 455 | ) 456 | head_pose_points_2D = mesh_points[_indices_pose] 457 | 458 | # collect nose three dimension and two dimension points 459 | nose_3D_point = np.multiply(head_pose_points_3D[0], [1, 1, 3000]) 460 | nose_2D_point = head_pose_points_2D[0] 461 | 462 | # create the camera matrix 463 | focal_length = 1 * img_w 464 | 465 | cam_matrix = np.array( 466 | [[focal_length, 0, img_h / 2], [0, focal_length, img_w / 2], [0, 0, 1]] 467 | ) 468 | 469 | # The distortion parameters 470 | dist_matrix = np.zeros((4, 1), dtype=np.float64) 471 | 472 | head_pose_points_2D = np.delete(head_pose_points_3D, 2, axis=1) 473 | head_pose_points_3D = head_pose_points_3D.astype(np.float64) 474 | head_pose_points_2D = head_pose_points_2D.astype(np.float64) 475 | # Solve PnP 476 | success, rot_vec, trans_vec = cv.solvePnP( 477 | head_pose_points_3D, head_pose_points_2D, cam_matrix, dist_matrix 478 | ) 479 | # Get rotational matrix 480 | rotation_matrix, jac = cv.Rodrigues(rot_vec) 481 | 482 | # Get angles 483 | angles, mtxR, mtxQ, Qx, Qy, Qz = cv.RQDecomp3x3(rotation_matrix) 484 | 485 | # Get the y rotation degree 486 | angle_x = angles[0] * 360 487 | angle_y = angles[1] * 360 488 | z = angles[2] * 360 489 | 490 | # if angle cross the values then 491 | threshold_angle = 10 492 | # See where the user's head tilting 493 | if angle_y < -threshold_angle: 494 | face_looks = "Left" 495 | elif angle_y > threshold_angle: 496 | face_looks = "Right" 497 | elif angle_x < -threshold_angle: 498 | face_looks = "Down" 499 | elif angle_x > threshold_angle: 500 | face_looks = "Up" 501 | else: 502 | face_looks = "Forward" 503 | if SHOW_ON_SCREEN_DATA: 504 | cv.putText( 505 | frame, 506 | f"Face Looking at {face_looks}", 507 | (img_w - 400, 80), 508 | cv.FONT_HERSHEY_TRIPLEX, 509 | 0.8, 510 | (0, 255, 0), 511 | 2, 512 | cv.LINE_AA, 513 | ) 514 | # Display the nose direction 515 | nose_3d_projection, jacobian = cv.projectPoints( 516 | nose_3D_point, rot_vec, trans_vec, cam_matrix, dist_matrix 517 | ) 518 | 519 | p1 = nose_2D_point 520 | p2 = ( 521 | int(nose_2D_point[0] + angle_y * 10), 522 | int(nose_2D_point[1] - angle_x * 10), 523 | ) 524 | 525 | cv.line(frame, p1, p2, (255, 0, 255), 3) 526 | # getting the blinking ratio 527 | eyes_aspect_ratio = blinking_ratio(mesh_points_3D) 528 | # print(f"Blinking ratio : {ratio}") 529 | # checking if ear less then or equal to required threshold if yes then 530 | # count the number of frame frame while eyes are closed. 531 | if eyes_aspect_ratio <= BLINK_THRESHOLD: 532 | EYES_BLINK_FRAME_COUNTER += 1 533 | # else check if eyes are closed is greater EYE_AR_CONSEC_FRAMES frame then 534 | # count the this as a blink 535 | # make frame counter equal to zero 536 | 537 | else: 538 | if EYES_BLINK_FRAME_COUNTER > EYE_AR_CONSEC_FRAMES: 539 | TOTAL_BLINKS += 1 540 | EYES_BLINK_FRAME_COUNTER = 0 541 | 542 | # Display all facial landmarks if enabled 543 | if SHOW_ALL_FEATURES: 544 | for point in mesh_points: 545 | cv.circle(frame, tuple(point), 1, (0, 255, 0), -1) 546 | # Process and display eye features 547 | (l_cx, l_cy), l_radius = cv.minEnclosingCircle(mesh_points[LEFT_EYE_IRIS]) 548 | (r_cx, r_cy), r_radius = cv.minEnclosingCircle(mesh_points[RIGHT_EYE_IRIS]) 549 | center_left = np.array([l_cx, l_cy], dtype=np.int32) 550 | center_right = np.array([r_cx, r_cy], dtype=np.int32) 551 | 552 | # Highlighting the irises and corners of the eyes 553 | cv.circle( 554 | frame, center_left, int(l_radius), (255, 0, 255), 2, cv.LINE_AA 555 | ) # Left iris 556 | cv.circle( 557 | frame, center_right, int(r_radius), (255, 0, 255), 2, cv.LINE_AA 558 | ) # Right iris 559 | cv.circle( 560 | frame, mesh_points[LEFT_EYE_INNER_CORNER][0], 3, (255, 255, 255), -1, cv.LINE_AA 561 | ) # Left eye right corner 562 | cv.circle( 563 | frame, mesh_points[LEFT_EYE_OUTER_CORNER][0], 3, (0, 255, 255), -1, cv.LINE_AA 564 | ) # Left eye left corner 565 | cv.circle( 566 | frame, mesh_points[RIGHT_EYE_INNER_CORNER][0], 3, (255, 255, 255), -1, cv.LINE_AA 567 | ) # Right eye right corner 568 | cv.circle( 569 | frame, mesh_points[RIGHT_EYE_OUTER_CORNER][0], 3, (0, 255, 255), -1, cv.LINE_AA 570 | ) # Right eye left corner 571 | 572 | # Calculating relative positions 573 | l_dx, l_dy = vector_position(mesh_points[LEFT_EYE_OUTER_CORNER], center_left) 574 | r_dx, r_dy = vector_position(mesh_points[RIGHT_EYE_OUTER_CORNER], center_right) 575 | 576 | # Printing data if enabled 577 | if PRINT_DATA: 578 | print(f"Total Blinks: {TOTAL_BLINKS}") 579 | print(f"Left Eye Center X: {l_cx} Y: {l_cy}") 580 | print(f"Right Eye Center X: {r_cx} Y: {r_cy}") 581 | print(f"Left Iris Relative Pos Dx: {l_dx} Dy: {l_dy}") 582 | print(f"Right Iris Relative Pos Dx: {r_dx} Dy: {r_dy}\n") 583 | # Check if head pose estimation is enabled 584 | if ENABLE_HEAD_POSE: 585 | pitch, yaw, roll = estimate_head_pose(mesh_points, (img_h, img_w)) 586 | angle_buffer.add([pitch, yaw, roll]) 587 | pitch, yaw, roll = angle_buffer.get_average() 588 | 589 | # Set initial angles on first successful estimation or recalibrate 590 | if initial_pitch is None or (key == ord('c') and calibrated): 591 | initial_pitch, initial_yaw, initial_roll = pitch, yaw, roll 592 | calibrated = True 593 | if PRINT_DATA: 594 | print("Head pose recalibrated.") 595 | 596 | # Adjust angles based on initial calibration 597 | if calibrated: 598 | pitch -= initial_pitch 599 | yaw -= initial_yaw 600 | roll -= initial_roll 601 | 602 | 603 | if PRINT_DATA: 604 | print(f"Head Pose Angles: Pitch={pitch}, Yaw={yaw}, Roll={roll}") 605 | # Logging data 606 | if LOG_DATA: 607 | timestamp = int(time.time() * 1000) # Current timestamp in milliseconds 608 | log_entry = [ 609 | timestamp, 610 | l_cx, 611 | l_cy, 612 | r_cx, 613 | r_cy, 614 | l_dx, 615 | l_dy, 616 | r_dx, 617 | r_dy, 618 | TOTAL_BLINKS, 619 | ] # Include blink count in CSV 620 | log_entry = [timestamp, l_cx, l_cy, r_cx, r_cy, l_dx, l_dy, r_dx, r_dy, TOTAL_BLINKS] # Include blink count in CSV 621 | 622 | # Append head pose data if enabled 623 | if ENABLE_HEAD_POSE: 624 | log_entry.extend([pitch, yaw, roll]) 625 | csv_data.append(log_entry) 626 | if LOG_ALL_FEATURES: 627 | log_entry.extend([p for point in mesh_points for p in point]) 628 | csv_data.append(log_entry) 629 | 630 | # Sending data through socket 631 | timestamp = int(time.time() * 1000) # Current timestamp in milliseconds 632 | # Create a packet with mixed types (int64 for timestamp and int32 for the rest) 633 | packet = np.array([timestamp], dtype=np.int64).tobytes() + np.array([l_cx, l_cy, l_dx, l_dy], dtype=np.int32).tobytes() 634 | 635 | SERVER_ADDRESS = ("127.0.0.1", 7070) 636 | iris_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 637 | iris_socket.sendto(packet, SERVER_ADDRESS) 638 | 639 | print(f'Sent UDP packet to {SERVER_ADDRESS}: {packet}') 640 | 641 | 642 | # Writing the on screen data on the frame 643 | if SHOW_ON_SCREEN_DATA: 644 | if IS_RECORDING: 645 | cv.circle(frame, (30, 30), 10, (0, 0, 255), -1) # Red circle at the top-left corner 646 | cv.putText(frame, f"Blinks: {TOTAL_BLINKS}", (30, 80), cv.FONT_HERSHEY_DUPLEX, 0.8, (0, 255, 0), 2, cv.LINE_AA) 647 | if ENABLE_HEAD_POSE: 648 | cv.putText(frame, f"Pitch: {int(pitch)}", (30, 110), cv.FONT_HERSHEY_DUPLEX, 0.8, (0, 255, 0), 2, cv.LINE_AA) 649 | cv.putText(frame, f"Yaw: {int(yaw)}", (30, 140), cv.FONT_HERSHEY_DUPLEX, 0.8, (0, 255, 0), 2, cv.LINE_AA) 650 | cv.putText(frame, f"Roll: {int(roll)}", (30, 170), cv.FONT_HERSHEY_DUPLEX, 0.8, (0, 255, 0), 2, cv.LINE_AA) 651 | 652 | 653 | 654 | # Displaying the processed frame 655 | cv.imshow("Eye Tracking", frame) 656 | # Handle key presses 657 | key = cv.waitKey(1) & 0xFF 658 | 659 | # Calibrate on 'c' key press 660 | if key == ord('c'): 661 | initial_pitch, initial_yaw, initial_roll = pitch, yaw, roll 662 | if PRINT_DATA: 663 | print("Head pose recalibrated.") 664 | 665 | # Inside the main loop, handle the 'r' key press 666 | if key == ord('r'): 667 | 668 | IS_RECORDING = not IS_RECORDING 669 | if IS_RECORDING: 670 | print("Recording started.") 671 | else: 672 | print("Recording paused.") 673 | 674 | 675 | # Exit on 'q' key press 676 | if key == ord('q'): 677 | if PRINT_DATA: 678 | print("Exiting program...") 679 | break 680 | 681 | except Exception as e: 682 | print(f"An error occurred: {e}") 683 | finally: 684 | # Releasing camera and closing windows 685 | cap.release() 686 | cv.destroyAllWindows() 687 | iris_socket.close() 688 | if PRINT_DATA: 689 | print("Program exited successfully.") 690 | 691 | # Writing data to CSV file 692 | if LOG_DATA and IS_RECORDING: 693 | if PRINT_DATA: 694 | print("Writing data to CSV...") 695 | timestamp_str = datetime.now().strftime("%d-%m-%Y_%H-%M-%S") 696 | csv_file_name = os.path.join( 697 | LOG_FOLDER, f"eye_tracking_log_{timestamp_str}.csv" 698 | ) 699 | with open(csv_file_name, "w", newline="") as file: 700 | writer = csv.writer(file) 701 | writer.writerow(column_names) # Writing column names 702 | writer.writerows(csv_data) # Writing data rows 703 | if PRINT_DATA: 704 | print(f"Data written to {csv_file_name}") 705 | -------------------------------------------------------------------------------- /mediapipe_landmarks_test.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import mediapipe as mp 3 | import threading 4 | import queue 5 | 6 | # Parameters (easy to change) 7 | WEBCAM_NUMBER = 0 # Change this to use a different webcam 8 | MIN_DETECTION_CONFIDENCE = 0.5 9 | MIN_TRACKING_CONFIDENCE = 0.5 10 | MAX_LANDMARKS = 467 # Max landmark number in MediaPipe (0-467) 11 | 12 | # Initialize MediaPipe Face Mesh. 13 | mp_face_mesh = mp.solutions.face_mesh 14 | face_mesh = mp_face_mesh.FaceMesh( 15 | min_detection_confidence=MIN_DETECTION_CONFIDENCE, 16 | min_tracking_confidence=MIN_TRACKING_CONFIDENCE 17 | ) 18 | 19 | # Function to mark landmarks on the image. 20 | def mark_landmarks(image, landmarks, landmark_ids): 21 | img_height, img_width, _ = image.shape 22 | for landmark_id in landmark_ids: 23 | if 0 <= landmark_id <= MAX_LANDMARKS: 24 | landmark = landmarks.landmark[landmark_id] 25 | x = int(landmark.x * img_width) 26 | y = int(landmark.y * img_height) 27 | cv2.circle(image, (x, y), 5, (0, 255, 0), -1) # Increased dot size 28 | cv2.putText(image, str(landmark_id), (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2) # Larger text 29 | return image 30 | 31 | def validate_input(user_input): 32 | try: 33 | landmark_ids = [int(id.strip()) for id in user_input.split(',') if id.strip().isdigit()] 34 | if all(0 <= id <= MAX_LANDMARKS for id in landmark_ids): 35 | return landmark_ids 36 | else: 37 | raise ValueError 38 | except ValueError: 39 | print(f"Invalid input. Please enter numbers between 0 and {MAX_LANDMARKS}, comma-separated.") 40 | return None 41 | 42 | # Function to handle user input in a separate thread. 43 | def input_thread(input_queue): 44 | while True: 45 | user_input = input() 46 | input_queue.put(user_input) 47 | 48 | def main(): 49 | print("MediaPipe Landmark Visualizer") 50 | print("Instructions:") 51 | print("1. Enter landmark IDs in the console (comma-separated, e.g., 1,5,30,150).") 52 | print("2. Press 'q' to quit the application.") 53 | print("3. You can enter new landmark IDs anytime to update the visualization.") 54 | 55 | # Open webcam. 56 | cap = cv2.VideoCapture(WEBCAM_NUMBER) 57 | if not cap.isOpened(): 58 | print(f"Could not open webcam #{WEBCAM_NUMBER}.") 59 | return 60 | 61 | landmark_ids = [] 62 | input_queue = queue.Queue() 63 | 64 | # Start the thread for handling user input. 65 | threading.Thread(target=input_thread, args=(input_queue,), daemon=True).start() 66 | 67 | try: 68 | while True: 69 | success, image = cap.read() 70 | if not success: 71 | print("Ignoring empty camera frame.") 72 | continue 73 | 74 | # Flip the image horizontally for a later selfie-view display, and convert the BGR image to RGB. 75 | image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB) 76 | results = face_mesh.process(image) 77 | 78 | # Convert back to BGR for OpenCV rendering. 79 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) 80 | 81 | if results.multi_face_landmarks: 82 | for face_landmarks in results.multi_face_landmarks: 83 | image = mark_landmarks(image, face_landmarks, landmark_ids) 84 | 85 | cv2.imshow('MediaPipe Landmarks', image) 86 | 87 | # Check for 'q' key to quit 88 | if cv2.waitKey(5) & 0xFF == ord('q'): 89 | break 90 | 91 | # Check for input from the input thread 92 | try: 93 | user_input = input_queue.get_nowait() 94 | validated_ids = validate_input(user_input) 95 | if validated_ids is not None: 96 | landmark_ids = validated_ids 97 | print("Selected Landmarks: ", ", ".join(map(str, landmark_ids))) 98 | print("To see new landmarks, type their IDs again (comma-separated) and press enter.") 99 | except queue.Empty: 100 | pass 101 | 102 | finally: 103 | cap.release() 104 | cv2.destroyAllWindows() 105 | 106 | if __name__ == '__main__': 107 | main() 108 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | mediapipe 3 | numpy 4 | scipy 5 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | print(cv2.__version__) 3 | --------------------------------------------------------------------------------