├── .MATLABDriveTag
├── .gitignore
├── AngleBuffer.py
├── LICENSE
├── README.md
├── main.py
├── mediapipe_landmarks_test.py
├── requirements.txt
└── test.py
/.MATLABDriveTag:
--------------------------------------------------------------------------------
1 | cbc9c000-1ce9-4c60-8c76-7c3ffdb9b96f
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.mp4
6 | # C extensions
7 | *.so
8 | *.avi
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | pip-wheel-metadata/
25 | share/python-wheels/
26 | *.egg-info/
27 | .installed.cfg
28 | *.egg
29 | MANIFEST
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .nox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | *.py,cover
52 | .hypothesis/
53 | .pytest_cache/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | .python-version
87 |
88 | # pipenv
89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
92 | # install all needed dependencies.
93 | #Pipfile.lock
94 |
95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
96 | __pypackages__/
97 |
98 | # Celery stuff
99 | celerybeat-schedule
100 | celerybeat.pid
101 |
102 | # SageMath parsed files
103 | *.sage.py
104 |
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | logs/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 |
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 |
119 | # Rope project settings
120 | .ropeproject
121 |
122 | # mkdocs documentation
123 | /site
124 |
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 |
130 | # Pyre type checker
131 | .pyre/
132 |
--------------------------------------------------------------------------------
/AngleBuffer.py:
--------------------------------------------------------------------------------
1 | import collections
2 | import numpy as np
3 |
4 |
5 | class AngleBuffer:
6 | def __init__(self, size=40):
7 | self.size = size
8 | self.buffer = collections.deque(maxlen=size)
9 |
10 | def add(self, angles):
11 | self.buffer.append(angles)
12 |
13 | def get_average(self):
14 | return np.mean(self.buffer, axis=0)
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Alireza Ghaderi
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Python-Gaze-Face-Tracker
3 |
4 | ### Advanced Real-Time Eye, Facial Landmark, Head Pose, Gaze Direction Tracking System
5 |
6 | ---
7 | 
8 | 
9 |
10 |
11 |
12 |
13 | 
14 |
15 | ## Description
16 | **Python-Gaze-Face-Tracker** is a Python-based application designed for advanced real-time eye tracking, facial landmark detection, and head position (orientation) estimator and gaze estimation using OpenCV and MediaPipe technology. Specializing in uncalibrated gaze tracking and head orientation analysis this tool is an easy-to-use Python eye and facial landmark tracker. It excels in visualizing iris positions and offers robust logging capabilities for both eye and facial landmark data. Equipped with the ability to transmit this iris and gaze information over UDP sockets, Python-Gaze-Face-Tracker stands out for various applications, including aviation, human-computer interaction (HCI), and augmented reality (AR). The tool also includes a blink detection feature, contributing to detailed eye movement analysis and supporting head tracking. This makes it a comprehensive package for advanced gaze tracking and facial feature analysis in interactive technology applications.
17 |
18 |
19 |
20 | ---
21 |
22 | ## Features
23 | - **Real-Time Eye Tracking**: Tracks and visualizes iris and eye corner positions in real-time using webcam input.
24 | - **Facial Landmark Detection**: Detects and displays up to 468 facial landmarks.
25 | - **Data Logging**: Records tracking data to CSV files, including timestamps, eye positions, and optional facial landmark data. *Note: Enabling logging of all 468 facial landmarks can result in large log files.*
26 | - **Socket Communication**: Supports transmitting only iris tracking data via UDP sockets for integration with other systems or applications.
27 | - **Blink Detection**: Monitors and records blink frequency, enhancing eye movement analysis.
28 | - **Real-Time Head Pose Estimation**: Accurately estimates the roll, pitch, and yaw of the user's head in real-time.
29 | - **Filtering and Smoothing**: Implements filtering and smoothing algorithms to ensure stable and accurate head orientation readings.
30 | - **Gaze Estimation**: Visualizes the direction of gaze by projecting a 3D point from the nose tip onto the 2D camera plane.
31 | - **Custom Real-Time Facial Landmark Visualization**: Utilize the `mediapipe_landmarks_test.py` script to visualize and track each of the MediaPipe facial landmark indices in real time. This feature is particularly useful for identifying the most relevant facial landmarks for your project and observing them directly in the video feed.
32 |
33 | ---
34 |
35 | ## Requirements
36 | - Python 3.x
37 | - OpenCV (opencv-python)
38 | - MediaPipe (mediapipe)
39 | - Other Python standard libraries: `math`, `socket`, `argparse`, `time`, `csv`, `datetime`, `os`
40 |
41 | ---
42 | ## Tutorial Video
43 | 🎥 **Watch the Setup and Usage Tutorial**: Discover how to install and use the Python-Gaze-Face-Tracker with our step-by-step video guide on YouTube: [Watch Tutorial](https://www.youtube.com/watch?v=UgC2GggTks0)
44 |
45 | This video tutorial will walk you through the installation process, demonstrate how to run the code, and show you the real-time tracking features in action.
46 |
47 |
48 | ---
49 |
50 | ## Installation & Usage
51 |
52 | 1. **Clone the Repository:**
53 | ```
54 | git clone https://github.com/alireza787b/Python-Gaze-Face-Tracker.git
55 | ```
56 |
57 | 2. **Navigate to the Repository Directory:**
58 | ```
59 | cd Python-Gaze-Face-Tracker
60 | ```
61 |
62 | 3. **Install Dependencies:**
63 | ```
64 | pip install -r requirements.txt
65 | ```
66 |
67 | 4. **Run the Application:**
68 | ```
69 | python main.py
70 | ```
71 |
72 | Optionally, specify the camera source:
73 | ```
74 | python main.py -c
75 | ```
76 |
77 | 5. **Open in VS Code:**
78 | ```
79 | code .
80 | ```
81 | Optionally, open the project in VS Code:
82 |
83 |
84 |
85 |
86 | ---
87 |
88 | ## Parameters
89 | - **USER_FACE_WIDTH**: The horizontal distance between the outer edges of the user's cheekbones in millimeters. Adjust this value based on your face width for accurate head pose estimation.
90 | - **NOSE_TO_CAMERA_DISTANCE**: The distance from the tip of the nose to the camera lens in millimeters. Intended for future enhancements.
91 | - **PRINT_DATA**: Enable or disable console data printing for debugging.
92 | - **DEFAULT_WEBCAM**: Default camera source index. '0' usually refers to the built-in webcam.
93 | - **SHOW_ALL_FEATURES**: Display all facial landmarks on the video feed if set to True.
94 | - **LOG_DATA**: Enable or disable logging of data to a CSV file.
95 | - **LOG_ALL_FEATURES**: Log all facial landmarks to the CSV file if set to True.
96 | - **ENABLE_HEAD_POSE**: Enable the head position and orientation estimator.
97 | - **LOG_FOLDER**: Directory for storing log files.
98 | - **SERVER_IP**: IP address for UDP data transmission (default is localhost).
99 | - **SERVER_PORT**: Port number for the server to listen on.
100 | - **SHOW_ON_SCREEN_DATA**: Display blink count and head pose angles on the video feed if set to True.
101 | - **EYES_BLINK_FRAME_COUNTER**: Counter for consecutive frames with detected potential blinks.
102 | - **BLINK_THRESHOLD**: Eye aspect ratio threshold for blink detection.
103 | - **EYE_AR_CONSEC_FRAMES**: Number of consecutive frames below the threshold required to confirm a blink.
104 | - **MIN_DETECTION_CONFIDENCE**: Confidence threshold for model detection.
105 | - **MIN_TRACKING_CONFIDENCE**: Confidence threshold for model tracking.
106 | - **MOVING_AVERAGE_WINDOW**: Number of frames for calculating the moving average for smoothing angles.
107 | - **SHOW_BLINK_COUNT_ON_SCREEN**: Toggle to show the blink count on the video feed.
108 | - **IS_RECORDING**: Controls whether data is being logged automatically. Set to false to wait for the 'r' command to start logging.
109 | - **SERVER_ADDRESS**: Tuple containing the SERVER_IP and SERVER_PORT for UDP communication.
110 |
111 |
112 | ---
113 |
114 | ## Interactive Commands
115 |
116 | While running the Eye Tracking and Head Pose Estimation script, you can interact with the program using the following keyboard commands:
117 |
118 | - **'c' Key**: Calibrate Head Pose
119 | - Pressing the 'c' key recalibrates the head pose estimation to the current orientation of the user's head. This sets the current head pose as the new reference point.
120 |
121 | - **'r' Key**: Start/Stop Recording
122 | - Toggling the 'r' key starts or pauses the recording of data to log folder.
123 |
124 | - **'q' Key**: Quit Program
125 | - Pressing the 'q' key will exit the program.
126 |
127 |
128 | ---
129 | ## Data Logging & Telemetry
130 | - **CSV Logging**: The application generates CSV files with tracking data including timestamps, eye positions, and optional facial landmarks. These files are stored in the `logs` folder.
131 |
132 | - **UDP Telemetry**: The application sends iris position data through UDP sockets as defined by `SERVER_IP` and `SERVER_PORT`. The data is sent in the following order: [Timestamp, Left Eye Center X, Left Eye Center Y, Left Iris Relative Pos Dx, Left Iris Relative Pos Dy].
133 |
134 | ### UDP Packet Structure
135 | - **Packet Type**: Mixed (int64 for timestamp, int32 for other values)
136 | - **Packet Structure**:
137 | - Timestamp (int64)
138 | - Left Eye Center X (int32)
139 | - Left Eye Center Y (int32)
140 | - Left Iris Relative Pos Dx (int32)
141 | - Left Iris Relative Pos Dy (int32)
142 | - **Packet Size**: 24 bytes (8 bytes for int64 timestamp, 4 bytes each for the four int32 values)
143 |
144 | ### Example Packets
145 | - **Example**:
146 | - Timestamp: 1623447890123
147 | - Left Eye Center X: 315
148 | - Left Eye Center Y: 225
149 | - Left Iris Relative Pos Dx: 66
150 | - Left Iris Relative Pos Dy: -3
151 | - Packet: [1623447890123, 315, 225, 66, -3]
152 |
153 |
154 |
155 | ---
156 |
157 | ## Acknowledgements
158 | This project was initially inspired by [Asadullah Dal's iris segmentation project](https://github.com/Asadullah-Dal17/iris-Segmentation-mediapipe-python).
159 | The blink detection and gaze direction visualization feature is also contributed by Asadullah Dal.
160 |
161 | ---
162 |
163 | ## Note
164 | The **Python-Gaze-Face-Tracker** is intended for educational and research purposes and is particularly suited for applications in aviation, HCI, AR, and similar fields.
165 |
166 | ---
167 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """
2 | Eye Tracking and Head Pose Estimation
3 |
4 | This script is designed to perform real-time eye tracking and head pose estimation using a webcam feed.
5 | It utilizes the MediaPipe library for facial landmark detection, which informs both eye tracking and
6 | head pose calculations. The purpose is to track the user's eye movements and head orientation,
7 | which can be applied in various domains such as HCI (Human-Computer Interaction), gaming, and accessibility tools.
8 |
9 | Features:
10 | - Real-time eye tracking to count blinks and calculate the eye aspect ratio for each frame.
11 | - Head pose estimation to determine the orientation of the user's head in terms of pitch, yaw, and roll angles.
12 | - Calibration feature to set the initial head pose as the reference zero position.
13 | - Data logging for further analysis and debugging.
14 |
15 | Requirements:
16 | - Python 3.x
17 | - OpenCV (opencv-python)
18 | - MediaPipe (mediapipe)
19 | - Other Dependencies: math, socket, argparse, time, csv, datetime, os
20 |
21 | Methodology:
22 | - The script uses the 468 facial landmarks provided by MediaPipe's FaceMesh model.
23 | - Eye tracking is achieved by calculating the Eye Aspect Ratio (EAR) for each eye and detecting blinks based on EAR thresholds.
24 | - Head pose is estimated using the solvePnP algorithm with a predefined 3D facial model and corresponding 2D landmarks detected from the camera feed.
25 | - Angles are normalized to intuitive ranges (pitch: [-90, 90], yaw and roll: [-180, 180]).
26 |
27 | Theory:
28 | - EAR is used as a simple yet effective metric for eye closure detection.
29 | - Head pose angles are derived using a perspective-n-point approach, which estimates an object's pose from its 2D image points and 3D model points.
30 |
31 | UDP Packet Structure:
32 | - The UDP packet consists of a timestamp and four other integer values.
33 | - Packet Type: Mixed (int64 for timestamp, int32 for other values)
34 | - Packet Structure: [timestamp (int64), l_cx (int32), l_cy (int32), l_dx (int32), l_dy (int32)]
35 | - Packet Size: 24 bytes (8 bytes for int64 timestamp, 4 bytes each for the four int32 values)
36 |
37 | Example Packets:
38 | - Example 1: [1623447890123, 315, 225, 66, -3]
39 | - Example 2: [1623447891123, 227, 68, -1, 316]
40 |
41 | Parameters:
42 | You can change parameters such as face width, moving average window, webcam ID, terminal outputs, on-screen data, logging detail, etc., from the code.
43 |
44 | Author: Alireza Bagheri
45 | GitHub: https://github.com/alireza787b/Python-Gaze-Face-Tracker
46 | Email: p30planets@gmail.com
47 | LinkedIn: https://www.linkedin.com/in/alireza787b
48 | Date: November 2023
49 |
50 | Inspiration:
51 | Initially inspired by Asadullah Dal's iris segmentation project (https://github.com/Asadullah-Dal17/iris-Segmentation-mediapipe-python).
52 | The blink detection feature is also contributed by Asadullah Dal (GitHub: Asadullah-Dal17).
53 |
54 | Usage:
55 | - Run the script in a Python environment with the necessary dependencies installed. The script accepts command-line arguments for camera source configuration.
56 | - Press 'c' to recalibrate the head pose estimation to the current orientation.
57 | - Press 'r' to start/stop logging.
58 | - Press 'q' to exit the program.
59 | - Output is displayed in a window with live feed and annotations, and logged to a CSV file for further analysis.
60 |
61 | Ensure that all dependencies, especially MediaPipe, OpenCV, and NumPy, are installed before running the script.
62 |
63 | Note:
64 | This project is intended for educational and research purposes in fields like aviation, human-computer interaction, and more.
65 | """
66 |
67 |
68 | import cv2 as cv
69 | import numpy as np
70 | import mediapipe as mp
71 | import math
72 | import socket
73 | import argparse
74 | import time
75 | import csv
76 | from datetime import datetime
77 | import os
78 | from AngleBuffer import AngleBuffer
79 |
80 |
81 | #-----------------------------------------------------------------------------------------------------------------------------------
82 | #-----------------------------------------------------------------------------------------------------------------------------------
83 |
84 | # Parameters Documentation
85 |
86 | ## User-Specific Measurements
87 | # USER_FACE_WIDTH: The horizontal distance between the outer edges of the user's cheekbones in millimeters.
88 | # This measurement is used to scale the 3D model points for head pose estimation.
89 | # Measure your face width and adjust the value accordingly.
90 | USER_FACE_WIDTH = 140 # [mm]
91 |
92 | ## Camera Parameters (not currently used in calculations)
93 | # NOSE_TO_CAMERA_DISTANCE: The distance from the tip of the nose to the camera lens in millimeters.
94 | # Intended for future use where accurate physical distance measurements may be necessary.
95 | NOSE_TO_CAMERA_DISTANCE = 600 # [mm]
96 |
97 | ## Configuration Parameters
98 | # PRINT_DATA: Enable or disable the printing of data to the console for debugging.
99 | PRINT_DATA = True
100 |
101 | # DEFAULT_WEBCAM: Default camera source index. '0' usually refers to the built-in webcam.
102 | DEFAULT_WEBCAM = 0
103 |
104 | # SHOW_ALL_FEATURES: If True, display all facial landmarks on the video feed.
105 | SHOW_ALL_FEATURES = True
106 |
107 | # LOG_DATA: Enable or disable logging of data to a CSV file.
108 | LOG_DATA = True
109 |
110 | # LOG_ALL_FEATURES: If True, log all facial landmarks to the CSV file.
111 | LOG_ALL_FEATURES = False
112 |
113 | # ENABLE_HEAD_POSE: Enable the head position and orientation estimator.
114 | ENABLE_HEAD_POSE = True
115 |
116 | ## Logging Configuration
117 | # LOG_FOLDER: Directory where log files will be stored.
118 | LOG_FOLDER = "logs"
119 |
120 | ## Server Configuration
121 | # SERVER_IP: IP address of the server for sending data via UDP (default is localhost).
122 | SERVER_IP = "127.0.0.1"
123 |
124 | # SERVER_PORT: Port number for the server to listen on.
125 | SERVER_PORT = 7070
126 |
127 | ## Blink Detection Parameters
128 | # SHOW_ON_SCREEN_DATA: If True, display blink count and head pose angles on the video feed.
129 | SHOW_ON_SCREEN_DATA = True
130 |
131 | # TOTAL_BLINKS: Counter for the total number of blinks detected.
132 | TOTAL_BLINKS = 0
133 |
134 | # EYES_BLINK_FRAME_COUNTER: Counter for consecutive frames with detected potential blinks.
135 | EYES_BLINK_FRAME_COUNTER = 0
136 |
137 | # BLINK_THRESHOLD: Eye aspect ratio threshold below which a blink is registered.
138 | BLINK_THRESHOLD = 0.51
139 |
140 | # EYE_AR_CONSEC_FRAMES: Number of consecutive frames below the threshold required to confirm a blink.
141 | EYE_AR_CONSEC_FRAMES = 2
142 |
143 | ## Head Pose Estimation Landmark Indices
144 | # These indices correspond to the specific facial landmarks used for head pose estimation.
145 | LEFT_EYE_IRIS = [474, 475, 476, 477]
146 | RIGHT_EYE_IRIS = [469, 470, 471, 472]
147 | LEFT_EYE_OUTER_CORNER = [33]
148 | LEFT_EYE_INNER_CORNER = [133]
149 | RIGHT_EYE_OUTER_CORNER = [362]
150 | RIGHT_EYE_INNER_CORNER = [263]
151 | RIGHT_EYE_POINTS = [33, 160, 159, 158, 133, 153, 145, 144]
152 | LEFT_EYE_POINTS = [362, 385, 386, 387, 263, 373, 374, 380]
153 | NOSE_TIP_INDEX = 4
154 | CHIN_INDEX = 152
155 | LEFT_EYE_LEFT_CORNER_INDEX = 33
156 | RIGHT_EYE_RIGHT_CORNER_INDEX = 263
157 | LEFT_MOUTH_CORNER_INDEX = 61
158 | RIGHT_MOUTH_CORNER_INDEX = 291
159 |
160 | ## MediaPipe Model Confidence Parameters
161 | # These thresholds determine how confidently the model must detect or track to consider the results valid.
162 | MIN_DETECTION_CONFIDENCE = 0.8
163 | MIN_TRACKING_CONFIDENCE = 0.8
164 |
165 | ## Angle Normalization Parameters
166 | # MOVING_AVERAGE_WINDOW: The number of frames over which to calculate the moving average for smoothing angles.
167 | MOVING_AVERAGE_WINDOW = 10
168 |
169 | # Initial Calibration Flags
170 | # initial_pitch, initial_yaw, initial_roll: Store the initial head pose angles for calibration purposes.
171 | # calibrated: A flag indicating whether the initial calibration has been performed.
172 | initial_pitch, initial_yaw, initial_roll = None, None, None
173 | calibrated = False
174 |
175 | # User-configurable parameters
176 | PRINT_DATA = True # Enable/disable data printing
177 | DEFAULT_WEBCAM = 0 # Default webcam number
178 | SHOW_ALL_FEATURES = True # Show all facial landmarks if True
179 | LOG_DATA = True # Enable logging to CSV
180 | LOG_ALL_FEATURES = False # Log all facial landmarks if True
181 | LOG_FOLDER = "logs" # Folder to store log files
182 |
183 | # Server configuration
184 | SERVER_IP = "127.0.0.1" # Set the server IP address (localhost)
185 | SERVER_PORT = 7070 # Set the server port
186 |
187 | # eyes blinking variables
188 | SHOW_BLINK_COUNT_ON_SCREEN = True # Toggle to show the blink count on the video feed
189 | TOTAL_BLINKS = 0 # Tracks the total number of blinks detected
190 | EYES_BLINK_FRAME_COUNTER = (
191 | 0 # Counts the number of consecutive frames with a potential blink
192 | )
193 | BLINK_THRESHOLD = 0.51 # Threshold for the eye aspect ratio to trigger a blink
194 | EYE_AR_CONSEC_FRAMES = (
195 | 2 # Number of consecutive frames below the threshold to confirm a blink
196 | )
197 | # SERVER_ADDRESS: Tuple containing the SERVER_IP and SERVER_PORT for UDP communication.
198 | SERVER_ADDRESS = (SERVER_IP, SERVER_PORT)
199 |
200 |
201 | #If set to false it will wait for your command (hittig 'r') to start logging.
202 | IS_RECORDING = False # Controls whether data is being logged
203 |
204 | # Command-line arguments for camera source
205 | parser = argparse.ArgumentParser(description="Eye Tracking Application")
206 | parser.add_argument(
207 | "-c", "--camSource", help="Source of camera", default=str(DEFAULT_WEBCAM)
208 | )
209 | args = parser.parse_args()
210 |
211 | # Iris and eye corners landmarks indices
212 | LEFT_IRIS = [474, 475, 476, 477]
213 | RIGHT_IRIS = [469, 470, 471, 472]
214 | L_H_LEFT = [33] # Left eye Left Corner
215 | L_H_RIGHT = [133] # Left eye Right Corner
216 | R_H_LEFT = [362] # Right eye Left Corner
217 | R_H_RIGHT = [263] # Right eye Right Corner
218 |
219 | # Blinking Detection landmark's indices.
220 | # P0, P3, P4, P5, P8, P11, P12, P13
221 | RIGHT_EYE_POINTS = [33, 160, 159, 158, 133, 153, 145, 144]
222 | LEFT_EYE_POINTS = [362, 385, 386, 387, 263, 373, 374, 380]
223 |
224 | # Face Selected points indices for Head Pose Estimation
225 | _indices_pose = [1, 33, 61, 199, 263, 291]
226 |
227 | # Server address for UDP socket communication
228 | SERVER_ADDRESS = (SERVER_IP, 7070)
229 |
230 |
231 | # Function to calculate vector position
232 | def vector_position(point1, point2):
233 | x1, y1 = point1.ravel()
234 | x2, y2 = point2.ravel()
235 | return x2 - x1, y2 - y1
236 |
237 |
238 | def euclidean_distance_3D(points):
239 | """Calculates the Euclidean distance between two points in 3D space.
240 |
241 | Args:
242 | points: A list of 3D points.
243 |
244 | Returns:
245 | The Euclidean distance between the two points.
246 |
247 | # Comment: This function calculates the Euclidean distance between two points in 3D space.
248 | """
249 |
250 | # Get the three points.
251 | P0, P3, P4, P5, P8, P11, P12, P13 = points
252 |
253 | # Calculate the numerator.
254 | numerator = (
255 | np.linalg.norm(P3 - P13) ** 3
256 | + np.linalg.norm(P4 - P12) ** 3
257 | + np.linalg.norm(P5 - P11) ** 3
258 | )
259 |
260 | # Calculate the denominator.
261 | denominator = 3 * np.linalg.norm(P0 - P8) ** 3
262 |
263 | # Calculate the distance.
264 | distance = numerator / denominator
265 |
266 | return distance
267 |
268 | def estimate_head_pose(landmarks, image_size):
269 | # Scale factor based on user's face width (assumes model face width is 150mm)
270 | scale_factor = USER_FACE_WIDTH / 150.0
271 | # 3D model points.
272 | model_points = np.array([
273 | (0.0, 0.0, 0.0), # Nose tip
274 | (0.0, -330.0 * scale_factor, -65.0 * scale_factor), # Chin
275 | (-225.0 * scale_factor, 170.0 * scale_factor, -135.0 * scale_factor), # Left eye left corner
276 | (225.0 * scale_factor, 170.0 * scale_factor, -135.0 * scale_factor), # Right eye right corner
277 | (-150.0 * scale_factor, -150.0 * scale_factor, -125.0 * scale_factor), # Left Mouth corner
278 | (150.0 * scale_factor, -150.0 * scale_factor, -125.0 * scale_factor) # Right mouth corner
279 | ])
280 |
281 |
282 | # Camera internals
283 | focal_length = image_size[1]
284 | center = (image_size[1]/2, image_size[0]/2)
285 | camera_matrix = np.array(
286 | [[focal_length, 0, center[0]],
287 | [0, focal_length, center[1]],
288 | [0, 0, 1]], dtype = "double"
289 | )
290 |
291 | # Assuming no lens distortion
292 | dist_coeffs = np.zeros((4,1))
293 |
294 | # 2D image points from landmarks, using defined indices
295 | image_points = np.array([
296 | landmarks[NOSE_TIP_INDEX], # Nose tip
297 | landmarks[CHIN_INDEX], # Chin
298 | landmarks[LEFT_EYE_LEFT_CORNER_INDEX], # Left eye left corner
299 | landmarks[RIGHT_EYE_RIGHT_CORNER_INDEX], # Right eye right corner
300 | landmarks[LEFT_MOUTH_CORNER_INDEX], # Left mouth corner
301 | landmarks[RIGHT_MOUTH_CORNER_INDEX] # Right mouth corner
302 | ], dtype="double")
303 |
304 |
305 | # Solve for pose
306 | (success, rotation_vector, translation_vector) = cv.solvePnP(model_points, image_points, camera_matrix, dist_coeffs, flags=cv.SOLVEPNP_ITERATIVE)
307 |
308 | # Convert rotation vector to rotation matrix
309 | rotation_matrix, _ = cv.Rodrigues(rotation_vector)
310 |
311 | # Combine rotation matrix and translation vector to form a 3x4 projection matrix
312 | projection_matrix = np.hstack((rotation_matrix, translation_vector.reshape(-1, 1)))
313 |
314 | # Decompose the projection matrix to extract Euler angles
315 | _, _, _, _, _, _, euler_angles = cv.decomposeProjectionMatrix(projection_matrix)
316 | pitch, yaw, roll = euler_angles.flatten()[:3]
317 |
318 |
319 | # Normalize the pitch angle
320 | pitch = normalize_pitch(pitch)
321 |
322 | return pitch, yaw, roll
323 |
324 | def normalize_pitch(pitch):
325 | """
326 | Normalize the pitch angle to be within the range of [-90, 90].
327 |
328 | Args:
329 | pitch (float): The raw pitch angle in degrees.
330 |
331 | Returns:
332 | float: The normalized pitch angle.
333 | """
334 | # Map the pitch angle to the range [-180, 180]
335 | if pitch > 180:
336 | pitch -= 360
337 |
338 | # Invert the pitch angle for intuitive up/down movement
339 | pitch = -pitch
340 |
341 | # Ensure that the pitch is within the range of [-90, 90]
342 | if pitch < -90:
343 | pitch = -(180 + pitch)
344 | elif pitch > 90:
345 | pitch = 180 - pitch
346 |
347 | pitch = -pitch
348 |
349 | return pitch
350 |
351 |
352 | # This function calculates the blinking ratio of a person.
353 | def blinking_ratio(landmarks):
354 | """Calculates the blinking ratio of a person.
355 |
356 | Args:
357 | landmarks: A facial landmarks in 3D normalized.
358 |
359 | Returns:
360 | The blinking ratio of the person, between 0 and 1, where 0 is fully open and 1 is fully closed.
361 |
362 | """
363 |
364 | # Get the right eye ratio.
365 | right_eye_ratio = euclidean_distance_3D(landmarks[RIGHT_EYE_POINTS])
366 |
367 | # Get the left eye ratio.
368 | left_eye_ratio = euclidean_distance_3D(landmarks[LEFT_EYE_POINTS])
369 |
370 | # Calculate the blinking ratio.
371 | ratio = (right_eye_ratio + left_eye_ratio + 1) / 2
372 |
373 | return ratio
374 |
375 |
376 | # Initializing MediaPipe face mesh and camera
377 | if PRINT_DATA:
378 | print("Initializing the face mesh and camera...")
379 | if PRINT_DATA:
380 | head_pose_status = "enabled" if ENABLE_HEAD_POSE else "disabled"
381 | print(f"Head pose estimation is {head_pose_status}.")
382 |
383 | mp_face_mesh = mp.solutions.face_mesh.FaceMesh(
384 | max_num_faces=1,
385 | refine_landmarks=True,
386 | min_detection_confidence=MIN_DETECTION_CONFIDENCE,
387 | min_tracking_confidence=MIN_TRACKING_CONFIDENCE,
388 | )
389 | cam_source = int(args.camSource)
390 | cap = cv.VideoCapture(cam_source)
391 |
392 | # Initializing socket for data transmission
393 | iris_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
394 |
395 | # Preparing for CSV logging
396 | csv_data = []
397 | if not os.path.exists(LOG_FOLDER):
398 | os.makedirs(LOG_FOLDER)
399 |
400 | # Column names for CSV file
401 | column_names = [
402 | "Timestamp (ms)",
403 | "Left Eye Center X",
404 | "Left Eye Center Y",
405 | "Right Eye Center X",
406 | "Right Eye Center Y",
407 | "Left Iris Relative Pos Dx",
408 | "Left Iris Relative Pos Dy",
409 | "Right Iris Relative Pos Dx",
410 | "Right Iris Relative Pos Dy",
411 | "Total Blink Count",
412 | ]
413 | # Add head pose columns if head pose estimation is enabled
414 | if ENABLE_HEAD_POSE:
415 | column_names.extend(["Pitch", "Yaw", "Roll"])
416 |
417 | if LOG_ALL_FEATURES:
418 | column_names.extend(
419 | [f"Landmark_{i}_X" for i in range(468)]
420 | + [f"Landmark_{i}_Y" for i in range(468)]
421 | )
422 |
423 | # Main loop for video capture and processing
424 | try:
425 | angle_buffer = AngleBuffer(size=MOVING_AVERAGE_WINDOW) # Adjust size for smoothing
426 |
427 | while True:
428 | ret, frame = cap.read()
429 | if not ret:
430 | break
431 |
432 | # Flipping the frame for a mirror effect
433 | # I think we better not flip to correspond with real world... need to make sure later...
434 | #frame = cv.flip(frame, 1)
435 | rgb_frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
436 | img_h, img_w = frame.shape[:2]
437 | results = mp_face_mesh.process(rgb_frame)
438 |
439 | if results.multi_face_landmarks:
440 | mesh_points = np.array(
441 | [
442 | np.multiply([p.x, p.y], [img_w, img_h]).astype(int)
443 | for p in results.multi_face_landmarks[0].landmark
444 | ]
445 | )
446 |
447 | # Get the 3D landmarks from facemesh x, y and z(z is distance from 0 points)
448 | # just normalize values
449 | mesh_points_3D = np.array(
450 | [[n.x, n.y, n.z] for n in results.multi_face_landmarks[0].landmark]
451 | )
452 | # getting the head pose estimation 3d points
453 | head_pose_points_3D = np.multiply(
454 | mesh_points_3D[_indices_pose], [img_w, img_h, 1]
455 | )
456 | head_pose_points_2D = mesh_points[_indices_pose]
457 |
458 | # collect nose three dimension and two dimension points
459 | nose_3D_point = np.multiply(head_pose_points_3D[0], [1, 1, 3000])
460 | nose_2D_point = head_pose_points_2D[0]
461 |
462 | # create the camera matrix
463 | focal_length = 1 * img_w
464 |
465 | cam_matrix = np.array(
466 | [[focal_length, 0, img_h / 2], [0, focal_length, img_w / 2], [0, 0, 1]]
467 | )
468 |
469 | # The distortion parameters
470 | dist_matrix = np.zeros((4, 1), dtype=np.float64)
471 |
472 | head_pose_points_2D = np.delete(head_pose_points_3D, 2, axis=1)
473 | head_pose_points_3D = head_pose_points_3D.astype(np.float64)
474 | head_pose_points_2D = head_pose_points_2D.astype(np.float64)
475 | # Solve PnP
476 | success, rot_vec, trans_vec = cv.solvePnP(
477 | head_pose_points_3D, head_pose_points_2D, cam_matrix, dist_matrix
478 | )
479 | # Get rotational matrix
480 | rotation_matrix, jac = cv.Rodrigues(rot_vec)
481 |
482 | # Get angles
483 | angles, mtxR, mtxQ, Qx, Qy, Qz = cv.RQDecomp3x3(rotation_matrix)
484 |
485 | # Get the y rotation degree
486 | angle_x = angles[0] * 360
487 | angle_y = angles[1] * 360
488 | z = angles[2] * 360
489 |
490 | # if angle cross the values then
491 | threshold_angle = 10
492 | # See where the user's head tilting
493 | if angle_y < -threshold_angle:
494 | face_looks = "Left"
495 | elif angle_y > threshold_angle:
496 | face_looks = "Right"
497 | elif angle_x < -threshold_angle:
498 | face_looks = "Down"
499 | elif angle_x > threshold_angle:
500 | face_looks = "Up"
501 | else:
502 | face_looks = "Forward"
503 | if SHOW_ON_SCREEN_DATA:
504 | cv.putText(
505 | frame,
506 | f"Face Looking at {face_looks}",
507 | (img_w - 400, 80),
508 | cv.FONT_HERSHEY_TRIPLEX,
509 | 0.8,
510 | (0, 255, 0),
511 | 2,
512 | cv.LINE_AA,
513 | )
514 | # Display the nose direction
515 | nose_3d_projection, jacobian = cv.projectPoints(
516 | nose_3D_point, rot_vec, trans_vec, cam_matrix, dist_matrix
517 | )
518 |
519 | p1 = nose_2D_point
520 | p2 = (
521 | int(nose_2D_point[0] + angle_y * 10),
522 | int(nose_2D_point[1] - angle_x * 10),
523 | )
524 |
525 | cv.line(frame, p1, p2, (255, 0, 255), 3)
526 | # getting the blinking ratio
527 | eyes_aspect_ratio = blinking_ratio(mesh_points_3D)
528 | # print(f"Blinking ratio : {ratio}")
529 | # checking if ear less then or equal to required threshold if yes then
530 | # count the number of frame frame while eyes are closed.
531 | if eyes_aspect_ratio <= BLINK_THRESHOLD:
532 | EYES_BLINK_FRAME_COUNTER += 1
533 | # else check if eyes are closed is greater EYE_AR_CONSEC_FRAMES frame then
534 | # count the this as a blink
535 | # make frame counter equal to zero
536 |
537 | else:
538 | if EYES_BLINK_FRAME_COUNTER > EYE_AR_CONSEC_FRAMES:
539 | TOTAL_BLINKS += 1
540 | EYES_BLINK_FRAME_COUNTER = 0
541 |
542 | # Display all facial landmarks if enabled
543 | if SHOW_ALL_FEATURES:
544 | for point in mesh_points:
545 | cv.circle(frame, tuple(point), 1, (0, 255, 0), -1)
546 | # Process and display eye features
547 | (l_cx, l_cy), l_radius = cv.minEnclosingCircle(mesh_points[LEFT_EYE_IRIS])
548 | (r_cx, r_cy), r_radius = cv.minEnclosingCircle(mesh_points[RIGHT_EYE_IRIS])
549 | center_left = np.array([l_cx, l_cy], dtype=np.int32)
550 | center_right = np.array([r_cx, r_cy], dtype=np.int32)
551 |
552 | # Highlighting the irises and corners of the eyes
553 | cv.circle(
554 | frame, center_left, int(l_radius), (255, 0, 255), 2, cv.LINE_AA
555 | ) # Left iris
556 | cv.circle(
557 | frame, center_right, int(r_radius), (255, 0, 255), 2, cv.LINE_AA
558 | ) # Right iris
559 | cv.circle(
560 | frame, mesh_points[LEFT_EYE_INNER_CORNER][0], 3, (255, 255, 255), -1, cv.LINE_AA
561 | ) # Left eye right corner
562 | cv.circle(
563 | frame, mesh_points[LEFT_EYE_OUTER_CORNER][0], 3, (0, 255, 255), -1, cv.LINE_AA
564 | ) # Left eye left corner
565 | cv.circle(
566 | frame, mesh_points[RIGHT_EYE_INNER_CORNER][0], 3, (255, 255, 255), -1, cv.LINE_AA
567 | ) # Right eye right corner
568 | cv.circle(
569 | frame, mesh_points[RIGHT_EYE_OUTER_CORNER][0], 3, (0, 255, 255), -1, cv.LINE_AA
570 | ) # Right eye left corner
571 |
572 | # Calculating relative positions
573 | l_dx, l_dy = vector_position(mesh_points[LEFT_EYE_OUTER_CORNER], center_left)
574 | r_dx, r_dy = vector_position(mesh_points[RIGHT_EYE_OUTER_CORNER], center_right)
575 |
576 | # Printing data if enabled
577 | if PRINT_DATA:
578 | print(f"Total Blinks: {TOTAL_BLINKS}")
579 | print(f"Left Eye Center X: {l_cx} Y: {l_cy}")
580 | print(f"Right Eye Center X: {r_cx} Y: {r_cy}")
581 | print(f"Left Iris Relative Pos Dx: {l_dx} Dy: {l_dy}")
582 | print(f"Right Iris Relative Pos Dx: {r_dx} Dy: {r_dy}\n")
583 | # Check if head pose estimation is enabled
584 | if ENABLE_HEAD_POSE:
585 | pitch, yaw, roll = estimate_head_pose(mesh_points, (img_h, img_w))
586 | angle_buffer.add([pitch, yaw, roll])
587 | pitch, yaw, roll = angle_buffer.get_average()
588 |
589 | # Set initial angles on first successful estimation or recalibrate
590 | if initial_pitch is None or (key == ord('c') and calibrated):
591 | initial_pitch, initial_yaw, initial_roll = pitch, yaw, roll
592 | calibrated = True
593 | if PRINT_DATA:
594 | print("Head pose recalibrated.")
595 |
596 | # Adjust angles based on initial calibration
597 | if calibrated:
598 | pitch -= initial_pitch
599 | yaw -= initial_yaw
600 | roll -= initial_roll
601 |
602 |
603 | if PRINT_DATA:
604 | print(f"Head Pose Angles: Pitch={pitch}, Yaw={yaw}, Roll={roll}")
605 | # Logging data
606 | if LOG_DATA:
607 | timestamp = int(time.time() * 1000) # Current timestamp in milliseconds
608 | log_entry = [
609 | timestamp,
610 | l_cx,
611 | l_cy,
612 | r_cx,
613 | r_cy,
614 | l_dx,
615 | l_dy,
616 | r_dx,
617 | r_dy,
618 | TOTAL_BLINKS,
619 | ] # Include blink count in CSV
620 | log_entry = [timestamp, l_cx, l_cy, r_cx, r_cy, l_dx, l_dy, r_dx, r_dy, TOTAL_BLINKS] # Include blink count in CSV
621 |
622 | # Append head pose data if enabled
623 | if ENABLE_HEAD_POSE:
624 | log_entry.extend([pitch, yaw, roll])
625 | csv_data.append(log_entry)
626 | if LOG_ALL_FEATURES:
627 | log_entry.extend([p for point in mesh_points for p in point])
628 | csv_data.append(log_entry)
629 |
630 | # Sending data through socket
631 | timestamp = int(time.time() * 1000) # Current timestamp in milliseconds
632 | # Create a packet with mixed types (int64 for timestamp and int32 for the rest)
633 | packet = np.array([timestamp], dtype=np.int64).tobytes() + np.array([l_cx, l_cy, l_dx, l_dy], dtype=np.int32).tobytes()
634 |
635 | SERVER_ADDRESS = ("127.0.0.1", 7070)
636 | iris_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
637 | iris_socket.sendto(packet, SERVER_ADDRESS)
638 |
639 | print(f'Sent UDP packet to {SERVER_ADDRESS}: {packet}')
640 |
641 |
642 | # Writing the on screen data on the frame
643 | if SHOW_ON_SCREEN_DATA:
644 | if IS_RECORDING:
645 | cv.circle(frame, (30, 30), 10, (0, 0, 255), -1) # Red circle at the top-left corner
646 | cv.putText(frame, f"Blinks: {TOTAL_BLINKS}", (30, 80), cv.FONT_HERSHEY_DUPLEX, 0.8, (0, 255, 0), 2, cv.LINE_AA)
647 | if ENABLE_HEAD_POSE:
648 | cv.putText(frame, f"Pitch: {int(pitch)}", (30, 110), cv.FONT_HERSHEY_DUPLEX, 0.8, (0, 255, 0), 2, cv.LINE_AA)
649 | cv.putText(frame, f"Yaw: {int(yaw)}", (30, 140), cv.FONT_HERSHEY_DUPLEX, 0.8, (0, 255, 0), 2, cv.LINE_AA)
650 | cv.putText(frame, f"Roll: {int(roll)}", (30, 170), cv.FONT_HERSHEY_DUPLEX, 0.8, (0, 255, 0), 2, cv.LINE_AA)
651 |
652 |
653 |
654 | # Displaying the processed frame
655 | cv.imshow("Eye Tracking", frame)
656 | # Handle key presses
657 | key = cv.waitKey(1) & 0xFF
658 |
659 | # Calibrate on 'c' key press
660 | if key == ord('c'):
661 | initial_pitch, initial_yaw, initial_roll = pitch, yaw, roll
662 | if PRINT_DATA:
663 | print("Head pose recalibrated.")
664 |
665 | # Inside the main loop, handle the 'r' key press
666 | if key == ord('r'):
667 |
668 | IS_RECORDING = not IS_RECORDING
669 | if IS_RECORDING:
670 | print("Recording started.")
671 | else:
672 | print("Recording paused.")
673 |
674 |
675 | # Exit on 'q' key press
676 | if key == ord('q'):
677 | if PRINT_DATA:
678 | print("Exiting program...")
679 | break
680 |
681 | except Exception as e:
682 | print(f"An error occurred: {e}")
683 | finally:
684 | # Releasing camera and closing windows
685 | cap.release()
686 | cv.destroyAllWindows()
687 | iris_socket.close()
688 | if PRINT_DATA:
689 | print("Program exited successfully.")
690 |
691 | # Writing data to CSV file
692 | if LOG_DATA and IS_RECORDING:
693 | if PRINT_DATA:
694 | print("Writing data to CSV...")
695 | timestamp_str = datetime.now().strftime("%d-%m-%Y_%H-%M-%S")
696 | csv_file_name = os.path.join(
697 | LOG_FOLDER, f"eye_tracking_log_{timestamp_str}.csv"
698 | )
699 | with open(csv_file_name, "w", newline="") as file:
700 | writer = csv.writer(file)
701 | writer.writerow(column_names) # Writing column names
702 | writer.writerows(csv_data) # Writing data rows
703 | if PRINT_DATA:
704 | print(f"Data written to {csv_file_name}")
705 |
--------------------------------------------------------------------------------
/mediapipe_landmarks_test.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import mediapipe as mp
3 | import threading
4 | import queue
5 |
6 | # Parameters (easy to change)
7 | WEBCAM_NUMBER = 0 # Change this to use a different webcam
8 | MIN_DETECTION_CONFIDENCE = 0.5
9 | MIN_TRACKING_CONFIDENCE = 0.5
10 | MAX_LANDMARKS = 467 # Max landmark number in MediaPipe (0-467)
11 |
12 | # Initialize MediaPipe Face Mesh.
13 | mp_face_mesh = mp.solutions.face_mesh
14 | face_mesh = mp_face_mesh.FaceMesh(
15 | min_detection_confidence=MIN_DETECTION_CONFIDENCE,
16 | min_tracking_confidence=MIN_TRACKING_CONFIDENCE
17 | )
18 |
19 | # Function to mark landmarks on the image.
20 | def mark_landmarks(image, landmarks, landmark_ids):
21 | img_height, img_width, _ = image.shape
22 | for landmark_id in landmark_ids:
23 | if 0 <= landmark_id <= MAX_LANDMARKS:
24 | landmark = landmarks.landmark[landmark_id]
25 | x = int(landmark.x * img_width)
26 | y = int(landmark.y * img_height)
27 | cv2.circle(image, (x, y), 5, (0, 255, 0), -1) # Increased dot size
28 | cv2.putText(image, str(landmark_id), (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2) # Larger text
29 | return image
30 |
31 | def validate_input(user_input):
32 | try:
33 | landmark_ids = [int(id.strip()) for id in user_input.split(',') if id.strip().isdigit()]
34 | if all(0 <= id <= MAX_LANDMARKS for id in landmark_ids):
35 | return landmark_ids
36 | else:
37 | raise ValueError
38 | except ValueError:
39 | print(f"Invalid input. Please enter numbers between 0 and {MAX_LANDMARKS}, comma-separated.")
40 | return None
41 |
42 | # Function to handle user input in a separate thread.
43 | def input_thread(input_queue):
44 | while True:
45 | user_input = input()
46 | input_queue.put(user_input)
47 |
48 | def main():
49 | print("MediaPipe Landmark Visualizer")
50 | print("Instructions:")
51 | print("1. Enter landmark IDs in the console (comma-separated, e.g., 1,5,30,150).")
52 | print("2. Press 'q' to quit the application.")
53 | print("3. You can enter new landmark IDs anytime to update the visualization.")
54 |
55 | # Open webcam.
56 | cap = cv2.VideoCapture(WEBCAM_NUMBER)
57 | if not cap.isOpened():
58 | print(f"Could not open webcam #{WEBCAM_NUMBER}.")
59 | return
60 |
61 | landmark_ids = []
62 | input_queue = queue.Queue()
63 |
64 | # Start the thread for handling user input.
65 | threading.Thread(target=input_thread, args=(input_queue,), daemon=True).start()
66 |
67 | try:
68 | while True:
69 | success, image = cap.read()
70 | if not success:
71 | print("Ignoring empty camera frame.")
72 | continue
73 |
74 | # Flip the image horizontally for a later selfie-view display, and convert the BGR image to RGB.
75 | image = cv2.cvtColor(cv2.flip(image, 1), cv2.COLOR_BGR2RGB)
76 | results = face_mesh.process(image)
77 |
78 | # Convert back to BGR for OpenCV rendering.
79 | image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
80 |
81 | if results.multi_face_landmarks:
82 | for face_landmarks in results.multi_face_landmarks:
83 | image = mark_landmarks(image, face_landmarks, landmark_ids)
84 |
85 | cv2.imshow('MediaPipe Landmarks', image)
86 |
87 | # Check for 'q' key to quit
88 | if cv2.waitKey(5) & 0xFF == ord('q'):
89 | break
90 |
91 | # Check for input from the input thread
92 | try:
93 | user_input = input_queue.get_nowait()
94 | validated_ids = validate_input(user_input)
95 | if validated_ids is not None:
96 | landmark_ids = validated_ids
97 | print("Selected Landmarks: ", ", ".join(map(str, landmark_ids)))
98 | print("To see new landmarks, type their IDs again (comma-separated) and press enter.")
99 | except queue.Empty:
100 | pass
101 |
102 | finally:
103 | cap.release()
104 | cv2.destroyAllWindows()
105 |
106 | if __name__ == '__main__':
107 | main()
108 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | mediapipe
3 | numpy
4 | scipy
5 |
--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | print(cv2.__version__)
3 |
--------------------------------------------------------------------------------