├── LICENSE
├── README.md
├── getMeanIntensity.py
├── selectTrainTestValData.py
├── trainModel.py
└── vid2Frames.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Melih Altun
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Eye_Tracking_with_Deep_CNN
 2 | A deep convolutional neural network implementation for tracking eye movements in videos
 3 | 
 4 | A tensorflow/keras model is developed here to track eye motions in videos. 
 5 | The model is trained, validated and tested with Labeled Pupils in the Wild (LPW) dataset:
 6 | https://perceptualui.org/research/datasets/LPW/
 7 | 
 8 | ![image](https://user-images.githubusercontent.com/40482921/234497211-f76d938e-45b3-4c86-a4e5-c013bbd53c55.png)
 9 | 
10 | The repository consists of 4 Python scripts. Once the dataset is downloaded and the paths in the scripts are modified to point to the dataset, run the scripts in the given order.
11 | 
12 | 1) selectTrainTestValData.py randomly selects videos for training, validation and, test sets
13 | 
14 | 2) getMeanIntensity.py scans the training data and finds the mean intensity for all the frames in all of the videos
15 | 
16 | 3) vid2Frames.py extracts frames from videos, resizes, converts them to grayscale, and saves them in train, validation and test folders
17 | 
18 | 4) trainModel.py forms a CNN model, forms train, test and, validation sets and trains the model with selected parameters. Finally, it measures the accuracy of the model using the test set.
19 | 
20 | To use the scripts, download the LPW dataset to your local and adjust folder locations in the scripts. 
21 | 
22 | Utilizing GPU is recommended. A single epoch with CPU may take over an hour to complete.  
23 | 
24 | Recommended configuration is: Python 3.8, Tensorflow 2.10.0, CUDA 11.2, CUDNN 8.8.1 and Zlib.
25 | 
26 | ![eye_tracker_2](https://user-images.githubusercontent.com/40482921/235327664-1a9024f0-cfaa-4776-a058-c03385fd02c8.PNG)
27 | 
28 | ![eye_tracker](https://user-images.githubusercontent.com/40482921/235327674-7dff69a1-2a11-44fb-86b2-851c6124a26e.PNG)
29 | 


--------------------------------------------------------------------------------
/getMeanIntensity.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | # Read training file paths from a text file and store in a list
 5 | with open('./train_val_test/train_files.txt', 'r') as f:
 6 |     train_files = f.readlines()
 7 |     train_files = [file_path.strip() for file_path in train_files]
 8 | 
 9 | 
10 | meanFrameVals = []
11 | # Loop over all training files
12 | for file_path in train_files:
13 |     cap = cv2.VideoCapture(file_path)
14 |     frame_count = 0
15 |     mean_gray_value = 0
16 | 
17 |     # Loop over all frames in the video
18 |     while cap.isOpened():
19 |         ret, frame = cap.read()
20 |         if not ret:
21 |             break
22 | 
23 |         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
24 |         # Convert frame to grayscale
25 |         gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
26 | 
27 |         # Compute mean grayscale value of frame
28 |         mean_gray_value += np.mean(gray_frame)/total_frames/255
29 | 
30 |     # Print mean grayscale value of video
31 |     print(f"Mean grayscale value of {file_path}: {mean_gray_value}")
32 |     meanFrameVals.append(mean_gray_value)
33 | 
34 |     # Release video capture object
35 |     cap.release()
36 | 
37 | meanMeanFrameVal = np.mean(meanFrameVals)
38 | 
39 | np.savetxt("meanIntensity.txt", [meanMeanFrameVal])


--------------------------------------------------------------------------------
/selectTrainTestValData.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import random
 4 | 
 5 | folder_path = 'D:/eye_tracking/LPW/'
 6 | avi_files = glob.glob(os.path.join(folder_path, "**/*.avi"), recursive=True)
 7 | 
 8 | for i in range(len(avi_files)):
 9 |     avi_files[i] = avi_files[i].replace('\\', '/')
10 | 
11 | #for file_path in avi_files:
12 | #    print(file_path)
13 | 
14 | N = len(avi_files)
15 | 
16 | # Calculate number of files for each set
17 | train_N = int(N * 0.7)
18 | valid_N = int(N * 0.2)
19 | test_N = N - train_N - valid_N
20 | 
21 | # Shuffle the list of file names randomly
22 | random.shuffle(avi_files)
23 | 
24 | # Split the files into training, validation, and test sets
25 | train_files = avi_files[:train_N]
26 | valid_files = avi_files[train_N:train_N+valid_N]
27 | test_files = avi_files[train_N+valid_N:]
28 | 
29 | # Print the number of files in each set
30 | print(f"Number of training files: {len(train_files)}")
31 | print(f"Number of validation files: {len(valid_files)}")
32 | print(f"Number of test files: {len(test_files)}")
33 | 
34 | def getTargetList(datasetFiles):
35 |     targetFiles = []
36 |     for file in datasetFiles:
37 |         target_file = file[:-3] + "txt"
38 |         targetFiles.append(target_file)
39 |     return targetFiles
40 | 
41 | train_labels = getTargetList(train_files)
42 | valid_labels = getTargetList(valid_files)
43 | test_labels = getTargetList(test_files)
44 | 
45 | 
46 | # Write file paths to a new text file
47 | def writeFileLists(folder, fileName, fileList):
48 |     os.makedirs(folder, exist_ok=True)
49 |     with open(os.path.join(folder, fileName), 'w') as f:
50 |         for file in fileList:
51 |             f.write(file + '\n')
52 | 
53 | writeFileLists('./train_val_test/', 'train_files.txt', train_files)
54 | writeFileLists('./train_val_test/', 'train_labels.txt', train_labels)
55 | writeFileLists('./train_val_test/', 'test_files.txt', test_files)
56 | writeFileLists('./train_val_test/', 'test_labels.txt', test_labels)
57 | writeFileLists('./train_val_test/', 'valid_files.txt', valid_files)
58 | writeFileLists('./train_val_test/', 'valid_labels.txt', valid_labels)
59 | 


--------------------------------------------------------------------------------
/trainModel.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import numpy as np
  3 | import os
  4 | import tensorflow as tf
  5 | import keras
  6 | from keras.models import Model, load_model
  7 | from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
  8 | from keras.callbacks import ModelCheckpoint
  9 | 
 10 | physical_devices = tf.config.experimental.list_physical_devices('GPU')
 11 | print('Num GPUs Available: ', len(physical_devices))
 12 | if len(physical_devices) > 0:
 13 |     tf.config.experimental.set_memory_growth(physical_devices[0], True)
 14 | 
 15 | N = 120  # input y size
 16 | M = 160  # input x size
 17 | batch_sz = 128
 18 | num_epochs = 24
 19 | 
 20 | train_folder = 'D:/eye_tracking/LPW/train_frames/'
 21 | valid_folder = 'D:/eye_tracking/LPW/valid_frames/'
 22 | test_folder = 'D:/eye_tracking/LPW/test_frames/'
 23 | 
 24 | if (os.path.isfile('./meanIntensity.txt')):
 25 |     meanIntensity = np.loadtxt("meanIntensity.txt")
 26 | else:
 27 |     meanIntensity = np.float64(0.48)
 28 | 
 29 | def get_dataset_files(datasetFolder, M, N):
 30 | 
 31 |     df = pd.read_csv(datasetFolder+'targets.csv')
 32 |     # add a filename column based on Subject Id, Video Id, and Frame Id
 33 |     df['Filename'] = 'subj_' + df['Subject Id'].astype(str) + '_vid_' + df['Video Id'].astype(str) + '_frame_' + df['Frame Id'].apply(lambda x: f'{x:04d}.png')
 34 | 
 35 |     # extract x Value and y value columns into numpy arrays
 36 |     x_values = df['x Value'].to_numpy()/M
 37 |     y_values = df['y Value'].to_numpy()/N
 38 |     filename_list = df['Filename'].tolist()
 39 |     filename_list_w_path = [f"{datasetFolder}{file}" for file in filename_list]
 40 |     train_targets = np.concatenate([x_values.reshape(-1, 1), y_values.reshape(-1, 1)], axis=1)
 41 |     return filename_list_w_path, train_targets
 42 | 
 43 | 
 44 | train_files, train_targets = get_dataset_files(train_folder, M, N)
 45 | test_files, test_targets = get_dataset_files(test_folder, M, N)
 46 | valid_files, valid_targets = get_dataset_files(valid_folder, M, N)
 47 | 
 48 | 
 49 | # function to load and preprocess images
 50 | def load_and_preprocess_image(image_path, mean_gray):
 51 |     # load image from file path
 52 |     image = tf.io.read_file(image_path)
 53 |     # decode jpeg encoded image
 54 |     image = tf.image.decode_jpeg(image, channels=1)
 55 |     # normalize pixel values to be in the range [0, 1] and subtract mean intensity
 56 |     image = tf.cast(image, tf.float32) / 255.0
 57 |     image = tf.subtract(image, mean_gray)
 58 |     return image
 59 | 
 60 | 
 61 | train_dataset = tf.data.Dataset.from_tensor_slices((train_files, train_targets))
 62 | train_dataset = train_dataset.map(lambda x, y: (load_and_preprocess_image(x, meanIntensity), y))
 63 | train_dataset = train_dataset.batch(batch_sz)
 64 | 
 65 | test_dataset = tf.data.Dataset.from_tensor_slices((test_files, test_targets))
 66 | test_dataset = test_dataset.map(lambda x, y: (load_and_preprocess_image(x, meanIntensity), y))
 67 | test_dataset = test_dataset.batch(batch_sz)
 68 | 
 69 | valid_dataset = tf.data.Dataset.from_tensor_slices((valid_files, valid_targets))
 70 | valid_dataset = valid_dataset.map(lambda x, y: (load_and_preprocess_image(x, meanIntensity), y))
 71 | valid_dataset = valid_dataset.batch(batch_sz)
 72 | 
 73 | #train_dataset = train_dataset.shuffle(buffer_size=10000)
 74 | #train_dataset = train_dataset.shuffle(buffer_size=10000)
 75 | 
 76 | def create_model(input_shape):
 77 |     input_layer = Input(shape=input_shape)
 78 |     conv1 = Conv2D(8, (17, 17), activation='relu', padding='same')(input_layer)
 79 |     pool1 = MaxPooling2D(pool_size=(2,2))(conv1)
 80 |     conv2 = Conv2D(16, (9, 9), activation='relu', padding='same')(pool1)
 81 |     conv3 = Conv2D(32, (5, 5), activation='relu', padding='same')(conv2)
 82 |     conv4 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv3)
 83 |     flatten = Flatten()(conv4)
 84 |     dense1 = Dense(128, activation='relu')(flatten)
 85 |     #dropout1 = Dropout(0.01)(dense1)
 86 |     dense2 = Dense(64, activation='relu')(dense1)
 87 |     x_output = Dense(1, activation='linear', name='x_output')(dense2)
 88 |     y_output = Dense(1, activation='linear', name='y_output')(dense2)
 89 |     model = Model(inputs=input_layer, outputs=[x_output, y_output])
 90 |     #model.compile(optimizer=Adam(learning_rate=0.05), loss='mse')
 91 |     model.compile(optimizer='sgd', loss='mse')
 92 |     return model
 93 | 
 94 | # Create the model
 95 | input_shape = (M, N, 1)
 96 | model = create_model(input_shape)
 97 | model.summary()
 98 | 
 99 | # set checkpoints to save after each epoch
100 | checkpoint_filepath = './models/model_checkpoint.h5'
101 | os.makedirs('./models', exist_ok=True)
102 | 
103 | model_checkpoint_callback = ModelCheckpoint(
104 |     filepath=checkpoint_filepath,
105 |     save_weights_only=False,
106 |     monitor='val_loss',
107 |     mode='min',
108 |     save_best_only=True)
109 | 
110 | # continue training from last checkpoint if the model was trained earlier
111 | if os.path.isfile('./models/model_checkpoint.h5'):
112 |     model = load_model('./models/model_checkpoint.h5')
113 | 
114 | # Train the model
115 | model.fit(train_dataset, epochs=num_epochs, batch_size=batch_sz, validation_data=valid_dataset, callbacks=[model_checkpoint_callback])
116 | 
117 | predictions = model.predict(x=test_dataset)
118 | 
119 | pred2 = np.transpose(np.squeeze(np.array(predictions)))
120 | test_errors = test_targets - pred2
121 | mse_test_err = np.mean(test_errors**2, axis=0)
122 | 
123 | print('Mean Square Test Errors(x, y) = ')
124 | print(mse_test_err)
125 | 
126 | # save target vs predicted x, y
127 | latestRes = np.savetxt('res.csv', np.concatenate((test_targets, pred2), axis=1), delimiter=',')
128 | 


--------------------------------------------------------------------------------
/vid2Frames.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import os
 3 | import pandas as pd
 4 | import tqdm
 5 | 
 6 | # Read training file paths from a text file and store in a list
 7 | 
 8 | def readStoredFiles(folder, fileName):
 9 |     with open(os.path.join(folder, fileName), 'r') as f:
10 |         files = f.readlines()
11 |         files = [file_path.strip() for file_path in files]
12 |     return files
13 | 
14 | folder = './train_val_test/'
15 | train_files = 'train_files.txt'
16 | valid_files = 'valid_files.txt'
17 | test_files = 'test_files.txt'
18 | 
19 | train_targets = 'train_labels.txt'
20 | valid_targets = 'valid_labels.txt'
21 | test_targets = 'test_labels.txt'
22 | 
23 | downSample = 4
24 | 
25 | def grayscale_and_resize(videoFiles, targetFiles, downSample, outputPath):
26 |     os.makedirs(outputPath, exist_ok=True)
27 | 
28 |     targets_x = []
29 |     targets_y = []
30 |     subjects = []
31 |     vid_nums = []
32 |     frame_nums = []
33 |     for file_path in targetFiles:
34 |         file_parts = file_path.split('/')
35 |         subject_id = file_parts[-2]
36 |         video_id = file_parts[-1].split('.')[0]
37 |         with open(file_path, "r") as f:
38 |             # Read all lines and extract x and y values
39 |             lines = f.readlines()
40 |             x_values = [float(line.split()[0]) / downSample for line in lines]
41 |             y_values = [float(line.split()[1]) / downSample for line in lines]
42 |             targets_x.extend(x_values)
43 |             targets_y.extend(y_values)
44 |             subjects.extend([subject_id] * len(x_values))
45 |             vid_nums.extend([video_id] * len(x_values))
46 |             frame_nums.extend(range(len(x_values)))
47 | 
48 |     data = {'Subject Id': subjects, 'Video Id': vid_nums, 'Frame Id': frame_nums, 'x Value': targets_x, 'y Value': targets_y}
49 |     df = pd.DataFrame(data)
50 |     df.to_csv(outputPath+'targets.csv', index=False)
51 | 
52 |     for file_path in videoFiles:
53 |         cap = cv2.VideoCapture(file_path)
54 |         frame_count = 0
55 |         file_parts = file_path.split('/')
56 |         subject_id = file_parts[-2]
57 |         video_id = file_parts[-1].split('.')[0]
58 | 
59 |         # Loop over all frames in the video
60 |         while cap.isOpened():
61 |             ret, frame = cap.read()
62 |             if not ret:
63 |                 break
64 |             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
65 |             hg, wd = frame.shape
66 |             frame = cv2.resize(frame, (int(wd / downSample), int(hg / downSample)))
67 |             outputFrame = f'subj_{subject_id}_vid_{video_id}_frame_{frame_count:04}.png'
68 |             frame_count += 1
69 | 
70 |             cv2.imwrite(outputPath+outputFrame, frame)
71 |         # Release video capture object
72 |         cap.release()
73 | 
74 | train_file_list = readStoredFiles(folder, train_files)
75 | valid_file_list = readStoredFiles(folder, valid_files)
76 | test_file_list = readStoredFiles(folder, test_files)
77 | 
78 | train_targets_list = readStoredFiles(folder, train_targets)
79 | valid_targets_list = readStoredFiles(folder, valid_targets)
80 | test_targets_list = readStoredFiles(folder, test_targets)
81 | 
82 | train_output_folder = 'D:/eye_tracking/LPW/train_frames/'
83 | test_output_folder = 'D:/eye_tracking/LPW/test_frames/'
84 | valid_output_folder = 'D:/eye_tracking/LPW/valid_frames/'
85 | 
86 | grayscale_and_resize(train_file_list, train_targets_list, downSample, train_output_folder)
87 | grayscale_and_resize(valid_file_list, valid_targets_list, downSample, valid_output_folder)
88 | grayscale_and_resize(test_file_list, test_targets_list, downSample, test_output_folder)
89 | 


--------------------------------------------------------------------------------