├── LICENSE ├── README.md ├── getMeanIntensity.py ├── selectTrainTestValData.py ├── trainModel.py └── vid2Frames.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Melih Altun 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Eye_Tracking_with_Deep_CNN 2 | A deep convolutional neural network implementation for tracking eye movements in videos 3 | 4 | A tensorflow/keras model is developed here to track eye motions in videos. 5 | The model is trained, validated and tested with Labeled Pupils in the Wild (LPW) dataset: 6 | https://perceptualui.org/research/datasets/LPW/ 7 | 8 | ![image](https://user-images.githubusercontent.com/40482921/234497211-f76d938e-45b3-4c86-a4e5-c013bbd53c55.png) 9 | 10 | The repository consists of 4 Python scripts. Once the dataset is downloaded and the paths in the scripts are modified to point to the dataset, run the scripts in the given order. 11 | 12 | 1) selectTrainTestValData.py randomly selects videos for training, validation and, test sets 13 | 14 | 2) getMeanIntensity.py scans the training data and finds the mean intensity for all the frames in all of the videos 15 | 16 | 3) vid2Frames.py extracts frames from videos, resizes, converts them to grayscale, and saves them in train, validation and test folders 17 | 18 | 4) trainModel.py forms a CNN model, forms train, test and, validation sets and trains the model with selected parameters. Finally, it measures the accuracy of the model using the test set. 19 | 20 | To use the scripts, download the LPW dataset to your local and adjust folder locations in the scripts. 21 | 22 | Utilizing GPU is recommended. A single epoch with CPU may take over an hour to complete. 23 | 24 | Recommended configuration is: Python 3.8, Tensorflow 2.10.0, CUDA 11.2, CUDNN 8.8.1 and Zlib. 25 | 26 | ![eye_tracker_2](https://user-images.githubusercontent.com/40482921/235327664-1a9024f0-cfaa-4776-a058-c03385fd02c8.PNG) 27 | 28 | ![eye_tracker](https://user-images.githubusercontent.com/40482921/235327674-7dff69a1-2a11-44fb-86b2-851c6124a26e.PNG) 29 | -------------------------------------------------------------------------------- /getMeanIntensity.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | # Read training file paths from a text file and store in a list 5 | with open('./train_val_test/train_files.txt', 'r') as f: 6 | train_files = f.readlines() 7 | train_files = [file_path.strip() for file_path in train_files] 8 | 9 | 10 | meanFrameVals = [] 11 | # Loop over all training files 12 | for file_path in train_files: 13 | cap = cv2.VideoCapture(file_path) 14 | frame_count = 0 15 | mean_gray_value = 0 16 | 17 | # Loop over all frames in the video 18 | while cap.isOpened(): 19 | ret, frame = cap.read() 20 | if not ret: 21 | break 22 | 23 | total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 24 | # Convert frame to grayscale 25 | gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 26 | 27 | # Compute mean grayscale value of frame 28 | mean_gray_value += np.mean(gray_frame)/total_frames/255 29 | 30 | # Print mean grayscale value of video 31 | print(f"Mean grayscale value of {file_path}: {mean_gray_value}") 32 | meanFrameVals.append(mean_gray_value) 33 | 34 | # Release video capture object 35 | cap.release() 36 | 37 | meanMeanFrameVal = np.mean(meanFrameVals) 38 | 39 | np.savetxt("meanIntensity.txt", [meanMeanFrameVal]) -------------------------------------------------------------------------------- /selectTrainTestValData.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import random 4 | 5 | folder_path = 'D:/eye_tracking/LPW/' 6 | avi_files = glob.glob(os.path.join(folder_path, "**/*.avi"), recursive=True) 7 | 8 | for i in range(len(avi_files)): 9 | avi_files[i] = avi_files[i].replace('\\', '/') 10 | 11 | #for file_path in avi_files: 12 | # print(file_path) 13 | 14 | N = len(avi_files) 15 | 16 | # Calculate number of files for each set 17 | train_N = int(N * 0.7) 18 | valid_N = int(N * 0.2) 19 | test_N = N - train_N - valid_N 20 | 21 | # Shuffle the list of file names randomly 22 | random.shuffle(avi_files) 23 | 24 | # Split the files into training, validation, and test sets 25 | train_files = avi_files[:train_N] 26 | valid_files = avi_files[train_N:train_N+valid_N] 27 | test_files = avi_files[train_N+valid_N:] 28 | 29 | # Print the number of files in each set 30 | print(f"Number of training files: {len(train_files)}") 31 | print(f"Number of validation files: {len(valid_files)}") 32 | print(f"Number of test files: {len(test_files)}") 33 | 34 | def getTargetList(datasetFiles): 35 | targetFiles = [] 36 | for file in datasetFiles: 37 | target_file = file[:-3] + "txt" 38 | targetFiles.append(target_file) 39 | return targetFiles 40 | 41 | train_labels = getTargetList(train_files) 42 | valid_labels = getTargetList(valid_files) 43 | test_labels = getTargetList(test_files) 44 | 45 | 46 | # Write file paths to a new text file 47 | def writeFileLists(folder, fileName, fileList): 48 | os.makedirs(folder, exist_ok=True) 49 | with open(os.path.join(folder, fileName), 'w') as f: 50 | for file in fileList: 51 | f.write(file + '\n') 52 | 53 | writeFileLists('./train_val_test/', 'train_files.txt', train_files) 54 | writeFileLists('./train_val_test/', 'train_labels.txt', train_labels) 55 | writeFileLists('./train_val_test/', 'test_files.txt', test_files) 56 | writeFileLists('./train_val_test/', 'test_labels.txt', test_labels) 57 | writeFileLists('./train_val_test/', 'valid_files.txt', valid_files) 58 | writeFileLists('./train_val_test/', 'valid_labels.txt', valid_labels) 59 | -------------------------------------------------------------------------------- /trainModel.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import os 4 | import tensorflow as tf 5 | import keras 6 | from keras.models import Model, load_model 7 | from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout 8 | from keras.callbacks import ModelCheckpoint 9 | 10 | physical_devices = tf.config.experimental.list_physical_devices('GPU') 11 | print('Num GPUs Available: ', len(physical_devices)) 12 | if len(physical_devices) > 0: 13 | tf.config.experimental.set_memory_growth(physical_devices[0], True) 14 | 15 | N = 120 # input y size 16 | M = 160 # input x size 17 | batch_sz = 128 18 | num_epochs = 24 19 | 20 | train_folder = 'D:/eye_tracking/LPW/train_frames/' 21 | valid_folder = 'D:/eye_tracking/LPW/valid_frames/' 22 | test_folder = 'D:/eye_tracking/LPW/test_frames/' 23 | 24 | if (os.path.isfile('./meanIntensity.txt')): 25 | meanIntensity = np.loadtxt("meanIntensity.txt") 26 | else: 27 | meanIntensity = np.float64(0.48) 28 | 29 | def get_dataset_files(datasetFolder, M, N): 30 | 31 | df = pd.read_csv(datasetFolder+'targets.csv') 32 | # add a filename column based on Subject Id, Video Id, and Frame Id 33 | df['Filename'] = 'subj_' + df['Subject Id'].astype(str) + '_vid_' + df['Video Id'].astype(str) + '_frame_' + df['Frame Id'].apply(lambda x: f'{x:04d}.png') 34 | 35 | # extract x Value and y value columns into numpy arrays 36 | x_values = df['x Value'].to_numpy()/M 37 | y_values = df['y Value'].to_numpy()/N 38 | filename_list = df['Filename'].tolist() 39 | filename_list_w_path = [f"{datasetFolder}{file}" for file in filename_list] 40 | train_targets = np.concatenate([x_values.reshape(-1, 1), y_values.reshape(-1, 1)], axis=1) 41 | return filename_list_w_path, train_targets 42 | 43 | 44 | train_files, train_targets = get_dataset_files(train_folder, M, N) 45 | test_files, test_targets = get_dataset_files(test_folder, M, N) 46 | valid_files, valid_targets = get_dataset_files(valid_folder, M, N) 47 | 48 | 49 | # function to load and preprocess images 50 | def load_and_preprocess_image(image_path, mean_gray): 51 | # load image from file path 52 | image = tf.io.read_file(image_path) 53 | # decode jpeg encoded image 54 | image = tf.image.decode_jpeg(image, channels=1) 55 | # normalize pixel values to be in the range [0, 1] and subtract mean intensity 56 | image = tf.cast(image, tf.float32) / 255.0 57 | image = tf.subtract(image, mean_gray) 58 | return image 59 | 60 | 61 | train_dataset = tf.data.Dataset.from_tensor_slices((train_files, train_targets)) 62 | train_dataset = train_dataset.map(lambda x, y: (load_and_preprocess_image(x, meanIntensity), y)) 63 | train_dataset = train_dataset.batch(batch_sz) 64 | 65 | test_dataset = tf.data.Dataset.from_tensor_slices((test_files, test_targets)) 66 | test_dataset = test_dataset.map(lambda x, y: (load_and_preprocess_image(x, meanIntensity), y)) 67 | test_dataset = test_dataset.batch(batch_sz) 68 | 69 | valid_dataset = tf.data.Dataset.from_tensor_slices((valid_files, valid_targets)) 70 | valid_dataset = valid_dataset.map(lambda x, y: (load_and_preprocess_image(x, meanIntensity), y)) 71 | valid_dataset = valid_dataset.batch(batch_sz) 72 | 73 | #train_dataset = train_dataset.shuffle(buffer_size=10000) 74 | #train_dataset = train_dataset.shuffle(buffer_size=10000) 75 | 76 | def create_model(input_shape): 77 | input_layer = Input(shape=input_shape) 78 | conv1 = Conv2D(8, (17, 17), activation='relu', padding='same')(input_layer) 79 | pool1 = MaxPooling2D(pool_size=(2,2))(conv1) 80 | conv2 = Conv2D(16, (9, 9), activation='relu', padding='same')(pool1) 81 | conv3 = Conv2D(32, (5, 5), activation='relu', padding='same')(conv2) 82 | conv4 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv3) 83 | flatten = Flatten()(conv4) 84 | dense1 = Dense(128, activation='relu')(flatten) 85 | #dropout1 = Dropout(0.01)(dense1) 86 | dense2 = Dense(64, activation='relu')(dense1) 87 | x_output = Dense(1, activation='linear', name='x_output')(dense2) 88 | y_output = Dense(1, activation='linear', name='y_output')(dense2) 89 | model = Model(inputs=input_layer, outputs=[x_output, y_output]) 90 | #model.compile(optimizer=Adam(learning_rate=0.05), loss='mse') 91 | model.compile(optimizer='sgd', loss='mse') 92 | return model 93 | 94 | # Create the model 95 | input_shape = (M, N, 1) 96 | model = create_model(input_shape) 97 | model.summary() 98 | 99 | # set checkpoints to save after each epoch 100 | checkpoint_filepath = './models/model_checkpoint.h5' 101 | os.makedirs('./models', exist_ok=True) 102 | 103 | model_checkpoint_callback = ModelCheckpoint( 104 | filepath=checkpoint_filepath, 105 | save_weights_only=False, 106 | monitor='val_loss', 107 | mode='min', 108 | save_best_only=True) 109 | 110 | # continue training from last checkpoint if the model was trained earlier 111 | if os.path.isfile('./models/model_checkpoint.h5'): 112 | model = load_model('./models/model_checkpoint.h5') 113 | 114 | # Train the model 115 | model.fit(train_dataset, epochs=num_epochs, batch_size=batch_sz, validation_data=valid_dataset, callbacks=[model_checkpoint_callback]) 116 | 117 | predictions = model.predict(x=test_dataset) 118 | 119 | pred2 = np.transpose(np.squeeze(np.array(predictions))) 120 | test_errors = test_targets - pred2 121 | mse_test_err = np.mean(test_errors**2, axis=0) 122 | 123 | print('Mean Square Test Errors(x, y) = ') 124 | print(mse_test_err) 125 | 126 | # save target vs predicted x, y 127 | latestRes = np.savetxt('res.csv', np.concatenate((test_targets, pred2), axis=1), delimiter=',') 128 | -------------------------------------------------------------------------------- /vid2Frames.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import os 3 | import pandas as pd 4 | import tqdm 5 | 6 | # Read training file paths from a text file and store in a list 7 | 8 | def readStoredFiles(folder, fileName): 9 | with open(os.path.join(folder, fileName), 'r') as f: 10 | files = f.readlines() 11 | files = [file_path.strip() for file_path in files] 12 | return files 13 | 14 | folder = './train_val_test/' 15 | train_files = 'train_files.txt' 16 | valid_files = 'valid_files.txt' 17 | test_files = 'test_files.txt' 18 | 19 | train_targets = 'train_labels.txt' 20 | valid_targets = 'valid_labels.txt' 21 | test_targets = 'test_labels.txt' 22 | 23 | downSample = 4 24 | 25 | def grayscale_and_resize(videoFiles, targetFiles, downSample, outputPath): 26 | os.makedirs(outputPath, exist_ok=True) 27 | 28 | targets_x = [] 29 | targets_y = [] 30 | subjects = [] 31 | vid_nums = [] 32 | frame_nums = [] 33 | for file_path in targetFiles: 34 | file_parts = file_path.split('/') 35 | subject_id = file_parts[-2] 36 | video_id = file_parts[-1].split('.')[0] 37 | with open(file_path, "r") as f: 38 | # Read all lines and extract x and y values 39 | lines = f.readlines() 40 | x_values = [float(line.split()[0]) / downSample for line in lines] 41 | y_values = [float(line.split()[1]) / downSample for line in lines] 42 | targets_x.extend(x_values) 43 | targets_y.extend(y_values) 44 | subjects.extend([subject_id] * len(x_values)) 45 | vid_nums.extend([video_id] * len(x_values)) 46 | frame_nums.extend(range(len(x_values))) 47 | 48 | data = {'Subject Id': subjects, 'Video Id': vid_nums, 'Frame Id': frame_nums, 'x Value': targets_x, 'y Value': targets_y} 49 | df = pd.DataFrame(data) 50 | df.to_csv(outputPath+'targets.csv', index=False) 51 | 52 | for file_path in videoFiles: 53 | cap = cv2.VideoCapture(file_path) 54 | frame_count = 0 55 | file_parts = file_path.split('/') 56 | subject_id = file_parts[-2] 57 | video_id = file_parts[-1].split('.')[0] 58 | 59 | # Loop over all frames in the video 60 | while cap.isOpened(): 61 | ret, frame = cap.read() 62 | if not ret: 63 | break 64 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 65 | hg, wd = frame.shape 66 | frame = cv2.resize(frame, (int(wd / downSample), int(hg / downSample))) 67 | outputFrame = f'subj_{subject_id}_vid_{video_id}_frame_{frame_count:04}.png' 68 | frame_count += 1 69 | 70 | cv2.imwrite(outputPath+outputFrame, frame) 71 | # Release video capture object 72 | cap.release() 73 | 74 | train_file_list = readStoredFiles(folder, train_files) 75 | valid_file_list = readStoredFiles(folder, valid_files) 76 | test_file_list = readStoredFiles(folder, test_files) 77 | 78 | train_targets_list = readStoredFiles(folder, train_targets) 79 | valid_targets_list = readStoredFiles(folder, valid_targets) 80 | test_targets_list = readStoredFiles(folder, test_targets) 81 | 82 | train_output_folder = 'D:/eye_tracking/LPW/train_frames/' 83 | test_output_folder = 'D:/eye_tracking/LPW/test_frames/' 84 | valid_output_folder = 'D:/eye_tracking/LPW/valid_frames/' 85 | 86 | grayscale_and_resize(train_file_list, train_targets_list, downSample, train_output_folder) 87 | grayscale_and_resize(valid_file_list, valid_targets_list, downSample, valid_output_folder) 88 | grayscale_and_resize(test_file_list, test_targets_list, downSample, test_output_folder) 89 | --------------------------------------------------------------------------------