├── src ├── 32d.jpg ├── CNN.jpg ├── are.jpg ├── K-fold.jpg ├── result.jpg ├── figmodal.jpg ├── headpose.jpg └── camera coordinate.jpg ├── pre ├── week1.pptx ├── week2.pptx └── ~$week2.pptx ├── essay ├── 1711.09017.pdf ├── 1905.01941v2.pdf ├── zhang_CVPR15.pdf └── Sugano_Learning-by-Synthesis_for_Appearance-based_2014_CVPR_paper.pdf ├── two_eye ├── __pycache__ │ ├── ARNet.cpython-37.pyc │ ├── utils.cpython-37.pyc │ └── Dataloader.cpython-37.pyc ├── see_data.py ├── Dataloader.py ├── README.md ├── utils.py ├── ARNet.py └── main.py ├── single_eye_normalized ├── gpu │ ├── __pycache__ │ │ ├── LeNet.cpython-37.pyc │ │ ├── LeNet1.cpython-37.pyc │ │ └── utils.cpython-37.pyc │ ├── LeNet.py │ ├── LeNet1.py │ ├── train1.py │ ├── train.py │ └── utils.py ├── validation │ ├── __pycache__ │ │ ├── LeNet.cpython-37.pyc │ │ └── utils.cpython-37.pyc │ ├── data.txt │ ├── LeNet.py │ ├── train2.py │ ├── train.py │ ├── train_onetime.py │ └── utils.py ├── visualize │ ├── draw_picture.py │ └── draw.ipynb └── train_cpu │ ├── LeNet.py │ ├── LeNet2.py │ ├── train.py │ ├── train_without_headpose.py │ ├── utils.py │ └── Normalized_process.ipynb ├── note └── Apperance-based+gaze+estimation+in+the+wild_LR.md └── README.md /src/32d.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/32d.jpg -------------------------------------------------------------------------------- /src/CNN.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/CNN.jpg -------------------------------------------------------------------------------- /src/are.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/are.jpg -------------------------------------------------------------------------------- /pre/week1.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/pre/week1.pptx -------------------------------------------------------------------------------- /pre/week2.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/pre/week2.pptx -------------------------------------------------------------------------------- /src/K-fold.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/K-fold.jpg -------------------------------------------------------------------------------- /src/result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/result.jpg -------------------------------------------------------------------------------- /src/figmodal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/figmodal.jpg -------------------------------------------------------------------------------- /src/headpose.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/headpose.jpg -------------------------------------------------------------------------------- /essay/1711.09017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/essay/1711.09017.pdf -------------------------------------------------------------------------------- /essay/1905.01941v2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/essay/1905.01941v2.pdf -------------------------------------------------------------------------------- /essay/zhang_CVPR15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/essay/zhang_CVPR15.pdf -------------------------------------------------------------------------------- /src/camera coordinate.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/camera coordinate.jpg -------------------------------------------------------------------------------- /two_eye/__pycache__/ARNet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/two_eye/__pycache__/ARNet.cpython-37.pyc -------------------------------------------------------------------------------- /two_eye/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/two_eye/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /two_eye/__pycache__/Dataloader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/two_eye/__pycache__/Dataloader.cpython-37.pyc -------------------------------------------------------------------------------- /single_eye_normalized/gpu/__pycache__/LeNet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/gpu/__pycache__/LeNet.cpython-37.pyc -------------------------------------------------------------------------------- /single_eye_normalized/gpu/__pycache__/LeNet1.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/gpu/__pycache__/LeNet1.cpython-37.pyc -------------------------------------------------------------------------------- /single_eye_normalized/gpu/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/gpu/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /pre/~$week2.pptx: -------------------------------------------------------------------------------- 1 | Li Qilin Li Qilin -------------------------------------------------------------------------------- /single_eye_normalized/validation/__pycache__/LeNet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/validation/__pycache__/LeNet.cpython-37.pyc -------------------------------------------------------------------------------- /single_eye_normalized/validation/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/validation/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /essay/Sugano_Learning-by-Synthesis_for_Appearance-based_2014_CVPR_paper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/essay/Sugano_Learning-by-Synthesis_for_Appearance-based_2014_CVPR_paper.pdf -------------------------------------------------------------------------------- /single_eye_normalized/validation/data.txt: -------------------------------------------------------------------------------- 1 | [90.0482177734375,89.9613265991211] 2 | [89.95109558105469,89.93241882324219] 3 | [65.5342788696289,76.80120086669922] 4 | [17.73709487915039,15.879902839660645] 5 | [14.747998237609863,12.863384246826172] 6 | [11.418121337890625,9.741545677185059] 7 | [9.759241104125977,8.560312271118164] 8 | [9.765317916870117,9.141117095947266] 9 | [12.103734016418457,12.338122367858887] 10 | [13.03825855255127,13.487483024597168] 11 | [13.195599555969238,13.720359802246094] 12 | [12.461770057678223,12.860777854919434] 13 | [11.072410583496094,11.032851219177246] 14 | [9.931607246398926,9.157052993774414] 15 | [9.824358940124512,8.315235137939453] 16 | -------------------------------------------------------------------------------- /two_eye/see_data.py: -------------------------------------------------------------------------------- 1 | import glob 2 | from tqdm import tqdm 3 | import math 4 | from scipy.io import loadmat 5 | 6 | path = "/Users/liqilin/PycharmProjects/untitled/EyeGaze/single_eye_normalized" 7 | 8 | def read_eye_data(mat, label): 9 | mat_data = loadmat(mat) 10 | right_info = mat_data['data'][label][0, 0] 11 | gaze = right_info['gaze'][0, 0] 12 | image = right_info['image'][0, 0] 13 | pose = right_info['pose'][0, 0] 14 | return gaze, image, pose 15 | 16 | def calc_angle(gaze1, gaze2): 17 | angle = 0 18 | for i in range(3): 19 | angle += gaze1[i] * gaze2[i] 20 | s1 = math.sqrt(gaze1[1] **2 + gaze1[2]**2 + gaze1[0] ** 2) 21 | s2 = math.sqrt(gaze2[1]**2 + gaze2[2]**2 + gaze2[0] ** 2) 22 | return angle / (s1 * s2) 23 | 24 | def collect_data_from_mat(): 25 | mat_files = glob.glob(path+'/Normalized/**/*.mat', recursive = True) 26 | for matfile in tqdm(mat_files[:1]): 27 | rgaze, rimage, rpose = read_eye_data(matfile, "right") 28 | lgaze, limage, lpose = read_eye_data(matfile, "left") 29 | 30 | for i in range(len(rgaze)): 31 | print("left:", rgaze[i]) 32 | print("right:", lgaze[i]) 33 | collect_data_from_mat() -------------------------------------------------------------------------------- /single_eye_normalized/visualize/draw_picture.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | Y = [90.2514, 90.0982, 89.9913, 48.2418, 18.7832, 16.3784, 13.7179, 11.1366, 9.6803,9.7897, 10.9040, 12.3392, 13.4419, 12.8745, 11.5562, 10.3523, 9.8592, 10.2269, 10.9432, 11.3758, 11.4323,11.5021,11.4895, 10.9601, 9.9650, 8.4208, 8.1934, 7.9030] 4 | Y_3d=[46.3943,15.7942,9.1083,10.8060,14.1114,22.6173,31.8720,28.3000,20.2483,15.2311,10.8004,20.2338,25.6033,35.2207,29.1831, 16.3451,14.9554,16.2854,26.3623,34.4985,24.2523, 20.6192,18.1405,17.3484,28.0920,35.9993,23.1917,15.3877] 5 | Y1 = [90.0585, 89.9763, 90.1588, 10.8239, 8.8382, 9.0440, 9.9118, 11.0468, 12.0813, 12.3653, 11.5053, 9.9235, 8.7291, 8.5098, 9.1294, 10.2406, 10.9890, 10.7011, 9.6494, 8.5989, 8.4489, 9.3461, 10.0073, 10.2401, 9.9965, 9.0982, 7.9569, 8.4805, 8.5101, 9.0833, 9.4951, 9.7672, 9.1740, 8.5483, 8.7967, 9.5826, 10.3982, 9.9145, 8.9524, 8.5694, 9.1277, 9.8620, 9.4036, 8.6904, 8.4803, 9.0725, 9.4882, 9.2406, 8.9340, 8.6731] 6 | X = [i for i in range(len(Y))] 7 | plt.plot(X, Y) 8 | plt.plot(X,Y_3d) 9 | plt.title("mean angle error of output") 10 | plt.xlabel("epoch") 11 | plt.ylabel("angle err (degree)") 12 | plt.legend(['use of 2D vector', 'use of 3D vector']) 13 | plt.show() -------------------------------------------------------------------------------- /single_eye_normalized/gpu/LeNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def initialize_weights(module): 7 | if isinstance(module, nn.Conv2d): 8 | nn.init.constant_(module.bias, 0) 9 | elif isinstance(module, nn.Linear): 10 | nn.init.xavier_uniform_(module.weight) 11 | nn.init.constant_(module.bias, 0) 12 | 13 | class Model(nn.Module): 14 | def __init__(self): 15 | super(Model, self).__init__() 16 | 17 | self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0) 18 | self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0) 19 | self.fc1 = nn.Linear(3600, 500) 20 | self.fc2 = nn.Linear(502, 2) 21 | 22 | self._initialize_weight() 23 | 24 | def _initialize_weight(self): 25 | nn.init.normal_(self.conv1.weight, mean=0, std=0.1) 26 | nn.init.normal_(self.conv2.weight, mean=0, std=0.01) 27 | self.apply(initialize_weights) 28 | 29 | def forward(self, x, y): 30 | x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2) 31 | x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2) 32 | x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten 33 | x = torch.cat([x, y], dim=1) 34 | x = self.fc2(x) 35 | return x -------------------------------------------------------------------------------- /single_eye_normalized/train_cpu/LeNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def initialize_weights(module): 7 | if isinstance(module, nn.Conv2d): 8 | nn.init.constant_(module.bias, 0) 9 | elif isinstance(module, nn.Linear): 10 | nn.init.xavier_uniform_(module.weight) 11 | nn.init.constant_(module.bias, 0) 12 | 13 | class Model(nn.Module): 14 | def __init__(self): 15 | super(Model, self).__init__() 16 | 17 | self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0) 18 | self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0) 19 | self.fc1 = nn.Linear(3600, 500) 20 | self.fc2 = nn.Linear(502, 2) 21 | 22 | self._initialize_weight() 23 | 24 | def _initialize_weight(self): 25 | nn.init.normal_(self.conv1.weight, mean=0, std=0.1) 26 | nn.init.normal_(self.conv2.weight, mean=0, std=0.01) 27 | self.apply(initialize_weights) 28 | 29 | def forward(self, x, y): 30 | x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2) 31 | x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2) 32 | x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten 33 | x = torch.cat([x, y], dim=1) 34 | x = self.fc2(x) 35 | return x -------------------------------------------------------------------------------- /single_eye_normalized/train_cpu/LeNet2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def initialize_weights(module): 7 | if isinstance(module, nn.Conv2d): 8 | nn.init.constant_(module.bias, 0) 9 | elif isinstance(module, nn.Linear): 10 | nn.init.xavier_uniform_(module.weight) 11 | nn.init.constant_(module.bias, 0) 12 | 13 | class Model(nn.Module): 14 | def __init__(self): 15 | super(Model, self).__init__() 16 | 17 | self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0) 18 | self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0) 19 | self.fc1 = nn.Linear(3600, 500) 20 | self.fc2 = nn.Linear(503, 3) 21 | 22 | self._initialize_weight() 23 | 24 | def _initialize_weight(self): 25 | nn.init.normal_(self.conv1.weight, mean=0, std=0.1) 26 | nn.init.normal_(self.conv2.weight, mean=0, std=0.01) 27 | self.apply(initialize_weights) 28 | 29 | def forward(self, x, y): 30 | x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2) 31 | x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2) 32 | x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten 33 | x = torch.cat([x, y], dim=1) 34 | x = self.fc2(x) 35 | return x -------------------------------------------------------------------------------- /single_eye_normalized/validation/LeNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def initialize_weights(module): 7 | if isinstance(module, nn.Conv2d): 8 | nn.init.constant_(module.bias, 0) 9 | elif isinstance(module, nn.Linear): 10 | nn.init.xavier_uniform_(module.weight) 11 | nn.init.constant_(module.bias, 0) 12 | 13 | class Model(nn.Module): 14 | def __init__(self): 15 | super(Model, self).__init__() 16 | 17 | self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0) 18 | self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0) 19 | self.fc1 = nn.Linear(3600, 500) 20 | self.fc2 = nn.Linear(502, 2) 21 | 22 | self._initialize_weight() 23 | 24 | def _initialize_weight(self): 25 | nn.init.normal_(self.conv1.weight, mean=0, std=0.1) 26 | nn.init.normal_(self.conv2.weight, mean=0, std=0.01) 27 | self.apply(initialize_weights) 28 | 29 | def forward(self, x, y): 30 | x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2) 31 | x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2) 32 | x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten 33 | x = torch.cat([x, y], dim=1) 34 | x = self.fc2(x) 35 | return x -------------------------------------------------------------------------------- /note/Apperance-based+gaze+estimation+in+the+wild_LR.md: -------------------------------------------------------------------------------- 1 | ### Appearance-Based Gaze Estimation in the Wild 2 | 3 | #### 0x00 The MPIIGaze Dataset 4 | 5 | - 15 participants, 213,659 pictures 6 | 7 | - outside of laboratory conditions, i.e during daliy routine 8 | - wilder range of recording location, time, illumination and eye appearance 9 | 10 | how to collect: use of laptop application to let volunteers to look at a fixed place, and take pictures of their eyes. 11 | 12 | use of laptop to collect: laptops are suited for long-term daily recordings but also because they are an important platform for *eye tracking application*. 13 | 14 | #### 0x01 Calibration settings 15 | 16 | I think this is used in 3d head pose estimation and face aligment process. I don't use that. 17 | 18 | #### 0x02 Method 19 | 20 | The CNN is to learn the mapping from *head poses and eye images* to *gaze directions* in the camera coordinate system. 21 | 22 | i) Face alignment and 3d head pose estimation 23 | 24 | - detect face 25 | - generate 6D landmarks 26 | 27 | ii) Data normalisation 28 | 29 | > first proposed in *Learning-by-Synthesis for Appearance-based 3D Gaze Estimation* 30 | 31 | iii) Multimodal CNNs 32 | 33 | - Process the input 2D head angle `h` and the normalized eye image 𝑒 to get the final 2D line of sight angle vector `g` 34 | 35 | - use LeNet 36 | 37 | - add `h` in the full connect layer. -------------------------------------------------------------------------------- /single_eye_normalized/gpu/LeNet1.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This is a LeNet vesion to ingore pose information 3 | in the CNN. the corresponding training code is in 4 | train1.py. Same utils.py is used here. 5 | ''' 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | 11 | def initialize_weights(module): 12 | if isinstance(module, nn.Conv2d): 13 | nn.init.constant_(module.bias, 0) 14 | elif isinstance(module, nn.Linear): 15 | nn.init.xavier_uniform_(module.weight) 16 | nn.init.constant_(module.bias, 0) 17 | 18 | class Model(nn.Module): 19 | def __init__(self): 20 | super(Model, self).__init__() 21 | 22 | self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0) 23 | self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0) 24 | self.fc1 = nn.Linear(3600, 500) 25 | self.fc2 = nn.Linear(500, 2) 26 | 27 | self._initialize_weight() 28 | 29 | def _initialize_weight(self): 30 | nn.init.normal_(self.conv1.weight, mean=0, std=0.1) 31 | nn.init.normal_(self.conv2.weight, mean=0, std=0.01) 32 | self.apply(initialize_weights) 33 | 34 | def forward(self, x): 35 | x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2) 36 | x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2) 37 | x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten 38 | # x = torch.cat([x, y], dim=1) 39 | x = self.fc2(x) 40 | return x -------------------------------------------------------------------------------- /single_eye_normalized/train_cpu/train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Why is it neccessary to covert 3D vector into 2D? 3 | What if we train using 3D poses and gazes? 4 | ''' 5 | from utils import * 6 | from LeNet2 import * 7 | import torch 8 | 9 | def batch_process(j, batch, img, pose, gaze): 10 | ''' 11 | :return: a-img, b-pose, c-gaze 12 | ''' 13 | a = torch.randn(batch, 1, 36, 60) 14 | b = torch.randn(batch,3) 15 | c = torch.randn(batch,3) 16 | for i in range(batch): 17 | a[i, 0] = torch.tensor(img[j * batch + i]) 18 | b[i] = torch.tensor(pose[j * batch + i]) 19 | c[i] = torch.tensor(gaze[j * batch + i]) 20 | return a, b, c 21 | 22 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose = collect_data_from_mat() 23 | 24 | ltrain = len(train_gaze) 25 | ltest = len(test_gaze) 26 | print("training dataset size:", len(train_gaze)) 27 | print("test dataset size:", len(test_gaze)) 28 | 29 | 30 | ### training process ### 31 | cuda_gpu = torch.cuda.is_available() 32 | GazeCNN = Model() 33 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001) 34 | criterion = torch.nn.SmoothL1Loss(reduction="mean") 35 | batch = 512 36 | train_range = int(ltrain / batch) 37 | test_range = int(ltest / batch) 38 | 39 | 40 | for epoch in range(1): 41 | for i in tqdm(range(1)): 42 | img, pose, gaze = batch_process(i, batch, train_image, train_pose, train_gaze) 43 | np.array(train_gaze) 44 | gaze_pred = GazeCNN(img, pose) 45 | loss = criterion(gaze_pred, gaze) 46 | loss.backward() 47 | optimizer.step() 48 | 49 | angle_loss=0 50 | for j in tqdm(range(1)): 51 | timg, tpose, tgaze = batch_process(j, batch, train_image, train_pose, train_gaze) 52 | tgaze_pred = GazeCNN(timg, tpose) 53 | print(mean_angle_loss(tgaze_pred, tgaze)) 54 | 55 | print("epoch", epoch, "average loss on test dataset:", angle_loss / test_range) 56 | 57 | -------------------------------------------------------------------------------- /two_eye/Dataloader.py: -------------------------------------------------------------------------------- 1 | from scipy.io import loadmat 2 | import glob 3 | from tqdm import tqdm 4 | import numpy as np 5 | 6 | 7 | path = "/Users/liqilin/PycharmProjects/untitled/EyeGaze/single_eye_normalized" 8 | 9 | def read_eye_data(mat, label): 10 | ''' 11 | read data from each .mat 12 | :param mat: file name 13 | :param label: right/ left 14 | :return: gaze, image, pose 15 | ''' 16 | mat_data = loadmat(mat) 17 | right_info = mat_data['data'][label][0, 0] 18 | gaze = right_info['gaze'][0, 0] 19 | image = right_info['image'][0, 0] 20 | pose = right_info['pose'][0, 0] 21 | return gaze, image, pose 22 | 23 | def collect_data_from_mat(label): 24 | ''' 25 | collect data from annotation part 26 | :return: list of index, image, pose, gaze 27 | ''' 28 | mat_files = glob.glob(path+'/Normalized/**/*.mat', recursive = True) 29 | mat_files.sort() 30 | i = 0 31 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose=[],[],[],[],[],[] 32 | for matfile in tqdm(mat_files[:2]): 33 | pnum = matfile.split('/')[-2] # pxx 34 | fgaze, fimage, fpose = read_eye_data(matfile, label) 35 | if int(pnum[1:]) < 7: 36 | if train_gaze == []: 37 | train_gaze = fgaze 38 | train_image = fimage 39 | train_pose = fpose 40 | else: 41 | train_gaze = np.append(train_gaze, fgaze, axis = 0) 42 | train_image = np.append(train_image, fimage, axis = 0) 43 | train_pose = np.append(train_pose, fpose, axis = 0) 44 | else: 45 | if test_gaze == []: 46 | test_gaze = fgaze 47 | test_image = fimage 48 | test_pose = fpose 49 | else: 50 | test_gaze = np.append(test_gaze, fgaze, axis = 0) 51 | test_image = np.append(test_image, fimage, axis = 0) 52 | test_pose = np.append(test_pose, fpose, axis = 0) 53 | i += 1 54 | return train_gaze, train_image, train_pose, test_gaze, test_image, test_pose -------------------------------------------------------------------------------- /two_eye/README.md: -------------------------------------------------------------------------------- 1 | ### Literature Review 2 | 3 | #### 0x0. Get Start 4 | 5 | for two eye gaze problem, my read this article: *Appearance-Based Gaze Estimation via Evaluation-Guided Asymmetric Regression* 6 | 7 | #### 0x1. Knowledge 8 | 9 | 1. the gaze directions of two eyes should be consistent physically 10 | 2. even if we apply the same regression method, the gaze estimation performance on two eyes can be very different 11 | 12 | Hence we need a new strategy that no longer treat both eyes equally. 13 | 14 | Strategy: **guide the asymmetric gaze regression by evaluating the performance of the regression strategy w.r.t.different eyes.** 15 | 16 | 17 | 18 | #### 0x2. Main Work 19 | 20 | 1. Propose a *multi-stream* AR-Net and E-Net. 21 | 2. Propose new mechanism of evaluation-guided asymmetric regression. 22 | 3. Design ARE-Net 23 | 24 | 25 | 26 | #### 0x3. Two eye asymmetry 27 | 28 | Previous work: treat two eyes indiffrently. 29 | 30 | Observation: we cannot expect the same accuracy for two eyes, either eye has a chance to be more accurate. 31 | 32 | Why asymmetry: head pose, image quality and individuality. 33 | 34 | How to solve: propose a network which can tell which eye is of high quality. 35 | 36 | 37 | 38 | #### 0x4. ARE-Net 39 | 40 | - AR-Net: 41 | - it is designed to be able to optimize the two eyes in an asymmetric way 42 | - structure: 43 | - the first two streams to extract a 500D deep features from each eye independently, and the last two streams to produce a joint 500D feature in the end 44 | - input the head pose vector (3D for each eye) before the final regression 45 | - Base-CNN: similar to AlexNet 46 | - loss function: weighted angular error 47 | - The weights λl and λr determine whether the accuracy of the left or the right eye should be considered more important 48 | 49 | - E-Net: 50 | - the evaluation network is trained to predict the probability of the left/right eye image being more efficient in gaze estimation. 51 | 52 | 53 | 54 | 55 | 56 | #### 0x5. My plan 57 | 58 | - First try to implement AR-Net, and see the mean err 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /two_eye/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 as cv 3 | import torch 4 | 5 | def pose3D_to_2D(pose): 6 | ''' 7 | pose (a, b, c) is rotation (angle) 8 | M = Rodrigues((x,y,z)) 9 | Zv = (the third column of M) 10 | theta = asin(Zv[1]) 11 | phi = atan2(Zv[0], Zv[2]) 12 | ''' 13 | M, _ = cv.Rodrigues(np.array(pose).astype(np.float32)) 14 | vec = M[:, 2] 15 | yaw = np.arctan2(vec[0], vec[2]) 16 | pitch = np.arcsin(vec[1]) 17 | return np.array([pitch, yaw]) 18 | 19 | 20 | def gaze3D_to_2D(gaze): 21 | ''' 22 | gaze (x, y, z) is direction 23 | theta = asin(-y) 24 | phi = atan2(-x, -z) 25 | ''' 26 | x, y, z = (gaze[i] for i in range(3)) 27 | pitch = np.arcsin(-y) 28 | yaw = np.arctan2(-x, -z) 29 | return np.stack((pitch, yaw)).T 30 | 31 | 32 | def gaze2D_to_3D(gaze): 33 | ''' 34 | :param gaze: gaze (yaw, pitch) is the rotation angle, type=(list) 35 | :return: gaze=(x,y,z) 36 | ''' 37 | pitch = gaze[0] 38 | yaw = gaze[1] 39 | x = -np.cos(pitch) * np.sin(yaw) 40 | y = -np.sin(pitch) 41 | z = -np.cos(pitch) * np.cos(yaw) 42 | norm = np.sqrt(x**2 + y**2 + z**2) 43 | x /= norm 44 | y /= norm 45 | z /= norm # all normalized 46 | return x, y, z 47 | 48 | 49 | def angle_error(pred, truth1, truth2): 50 | ''' 51 | :param pred: 52 | :param truth1: 53 | :param truth2: 54 | :return: 55 | ''' 56 | pred1 = pred[:,:2] # left 57 | pred2 = pred[:2,:] # right 58 | ans1 = mean_angle_loss(pred2, truth2) 59 | ans2 = mean_angle_loss(pred1, truth1) 60 | if ans2 > ans1: 61 | return ans1 62 | else: 63 | return ans2 64 | 65 | 66 | def mean_angle_loss(pred, truth): 67 | ''' 68 | :param pred,truth: type=torch.Tensor 69 | :return: 70 | ''' 71 | pred = pred.detach().numpy() 72 | ans = 0 73 | for i in range(len(pred)): 74 | p_x, p_y, p_z = gaze2D_to_3D(pred[i]) 75 | t_x, t_y, t_z = gaze2D_to_3D(truth[i]) 76 | angles = p_x * t_x + p_y * t_y + p_z * t_z 77 | ans += torch.acos(angles) * 180 / np.pi 78 | return ans / len(pred) 79 | 80 | -------------------------------------------------------------------------------- /single_eye_normalized/gpu/train1.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | from LeNet1 import * 3 | import torch 4 | 5 | def get_2D_vector(pose, gaze): 6 | pose2d = [] 7 | gaze2d = [] 8 | for i in np.arange(0, len(pose), 1): 9 | pose2d.append(pose3D_to_2D(pose[i])) 10 | gaze2d.append(gaze3D_to_2D(gaze[i])) 11 | poses = np.array(pose2d) 12 | gazes = np.array(gaze2d) 13 | return poses, gazes 14 | 15 | def batch_process(j, batch, img, pose, gaze): 16 | ''' 17 | :return: a-img, b-pose, c-gaze 18 | ''' 19 | a = torch.randn(batch, 1, 36, 60) 20 | b = torch.randn(batch,2) 21 | c = torch.randn(batch,2) 22 | for i in range(batch): 23 | a[i, 0] = torch.tensor(img[j * batch + i]) 24 | b[i] = torch.tensor(pose[j * batch + i]) 25 | c[i] = torch.tensor(gaze[j * batch + i]) 26 | return a, b, c 27 | 28 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose = collect_data_from_mat() 29 | 30 | train_pose2D, train_gaze2D = get_2D_vector(train_pose, train_gaze) 31 | test_pose2D, test_gaze2D = get_2D_vector(test_pose, test_gaze) 32 | 33 | ltrain = len(train_gaze) 34 | ltest = len(test_gaze) 35 | print("training dataset size:", len(train_gaze)) 36 | print("test dataset size:", len(test_gaze)) 37 | 38 | 39 | ### training process ### 40 | cuda_gpu = torch.cuda.is_available() 41 | print("cuda is", cuda_gpu) 42 | 43 | GazeCNN = Model() 44 | criterion = torch.nn.MSELoss(reduction="mean") 45 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.01) 46 | 47 | batch = 512 48 | train_range = int(ltrain / batch) 49 | test_range = int(ltest / batch) 50 | 51 | loss_list = [] 52 | 53 | for epoch in range(50): 54 | for i in tqdm(range(train_range)): 55 | img, pose, gaze = batch_process(i, batch, train_image, train_pose2D, train_gaze2D) 56 | gaze_pred_2D = GazeCNN(img) 57 | loss = criterion(gaze_pred_2D, gaze) 58 | loss.backward() 59 | optimizer.step() 60 | 61 | angle_loss=0 62 | for j in tqdm(range(test_range)): 63 | timg, tpose, tgaze = batch_process(j, batch, train_image, train_pose2D, train_gaze2D) 64 | tgaze_pred_2D = GazeCNN(timg) 65 | 66 | 67 | angle_loss += mean_angle_loss(tgaze_pred_2D, tgaze) 68 | 69 | print("epoch", epoch, "average loss on test dataset:", angle_loss / test_range) 70 | loss_list.append(angle_loss/test_range) 71 | 72 | print(loss_list) 73 | -------------------------------------------------------------------------------- /single_eye_normalized/gpu/train.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | from LeNet import * 3 | import torch 4 | 5 | def get_2D_vector(pose, gaze): 6 | pose2d = [] 7 | gaze2d = [] 8 | for i in np.arange(0, len(pose), 1): 9 | pose2d.append(pose3D_to_2D(pose[i])) 10 | gaze2d.append(gaze3D_to_2D(gaze[i])) 11 | poses = np.array(pose2d) 12 | gazes = np.array(gaze2d) 13 | return poses, gazes 14 | 15 | def batch_process(j, batch, img, pose, gaze): 16 | ''' 17 | :return: a-img, b-pose, c-gaze 18 | ''' 19 | a = torch.randn(batch, 1, 36, 60) 20 | b = torch.randn(batch,2) 21 | c = torch.randn(batch,2) 22 | for i in range(batch): 23 | a[i, 0] = torch.tensor(img[j * batch + i]) 24 | b[i] = torch.tensor(pose[j * batch + i]) 25 | c[i] = torch.tensor(gaze[j * batch + i]) 26 | return a, b, c 27 | 28 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose = collect_data_from_mat() 29 | 30 | train_pose2D, train_gaze2D = get_2D_vector(train_pose, train_gaze) 31 | test_pose2D, test_gaze2D = get_2D_vector(test_pose, test_gaze) 32 | 33 | ltrain = len(train_gaze) 34 | ltest = len(test_gaze) 35 | print("training dataset size:", len(train_gaze)) 36 | print("test dataset size:", len(test_gaze)) 37 | 38 | 39 | ### training process ### 40 | cuda_gpu = torch.cuda.is_available() 41 | GazeCNN = Model() 42 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001) 43 | criterion = torch.nn.SmoothL1Loss(reduction="mean") 44 | batch = 512 45 | train_range = int(ltrain / batch) 46 | test_range = int(ltest / batch) 47 | 48 | 49 | for epoch in range(30): 50 | for i in tqdm(range(train_range)): 51 | img, pose, gaze = batch_process(i, batch, train_image, train_pose2D, train_gaze2D) 52 | if cuda_gpu: 53 | GazeCNN = GazeCNN.cuda() 54 | criterion = criterion.cuda() 55 | img = img.cuda() 56 | pose = pose.cuda() 57 | gaze = gaze.cuda() 58 | 59 | gaze_pred_2D = GazeCNN(img, pose) 60 | loss = criterion(gaze_pred_2D, gaze) 61 | loss.backward() 62 | optimizer.step() 63 | 64 | valid_loss=0 65 | for j in tqdm(range(test_range - 1)): 66 | vimg, vpose, vgaze = batch_process(j, batch, test_image, test_pose2D, test_gaze2D) 67 | if cuda_gpu: 68 | GazeCNN = GazeCNN.cpu() 69 | vgaze_pred_2D = GazeCNN(vimg, vpose) 70 | valid_loss += mean_angle_loss(vgaze_pred_2D, vgaze) 71 | 72 | print(valid_loss / (test_range-1)) 73 | 74 | -------------------------------------------------------------------------------- /two_eye/ARNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | ############# definition of angle ################# 6 | 7 | def AngularErr(input,target): 8 | input = F.normalize(input) 9 | target = F.normalize(target) 10 | cosineLoss = nn.CosineSimilarity() 11 | return cosineLoss(input, target) 12 | 13 | class Criterion(nn.Module): 14 | def __init__(self): 15 | super(Criterion, self).__init__() 16 | 17 | def forward(self, pred_vec, left_gt, right_gt): 18 | left_pd = pred_vec[:,:2] 19 | right_pd = pred_vec[:,2:] 20 | vall = AngularErr(left_gt, left_pd) 21 | vall = torch.acos(vall) 22 | dvall = torch.div(1, vall) 23 | valr = AngularErr(right_gt, right_pd) 24 | valr = torch.acos(valr) 25 | dvalr = torch.div(1, valr) 26 | sum = torch.add(dvalr, dvall) 27 | weightl = torch.div(dvall, sum) 28 | weightr = torch.div(dvalr, sum) 29 | return torch.add(torch.mul(weightl, vall), torch.mul(weightr, valr)) 30 | 31 | ########### Base-CNN set up ############ 32 | 33 | def initialize_weights(module): 34 | if isinstance(module, nn.Conv2d): 35 | nn.init.constant_(module.bias, 0) 36 | elif isinstance(module, nn.Linear): 37 | nn.init.xavier_uniform_(module.weight) 38 | nn.init.constant_(module.bias, 0) 39 | 40 | class BaseCNN(nn.Module): 41 | def __init__(self): 42 | super(BaseCNN, self).__init__() 43 | 44 | self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0) 45 | self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0) 46 | self.fc1 = nn.Linear(3600, 500) 47 | # self.fc2 = nn.Linear(503, 3) 48 | 49 | self._initialize_weight() 50 | 51 | def _initialize_weight(self): 52 | nn.init.normal_(self.conv1.weight, mean=0, std=0.1) 53 | nn.init.normal_(self.conv2.weight, mean=0, std=0.01) 54 | self.apply(initialize_weights) 55 | 56 | def forward(self, x): 57 | x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2) 58 | x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2) 59 | x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten 60 | # x = torch.cat([x, y], dim=1) 61 | # x = self.fc2(x) 62 | return x 63 | 64 | ############### AR-NET set up ################ 65 | 66 | class ARNet(nn.Module): 67 | def __init__(self): 68 | super(ARNet, self).__init__() 69 | 70 | self.bCNN1 = BaseCNN() 71 | self.bCNN2 = BaseCNN() 72 | self.fc1 = nn.Linear(1004, 4) 73 | 74 | def forward(self, x1, x2, y1, y2): 75 | ### x1: left image, x2: right image, y: head pose 76 | x1 = self.bCNN1(x1) 77 | x2 = self.bCNN2(x2) 78 | x = torch.cat([x1, x2], dim=1) 79 | x = torch.cat([x, y1], dim=1) 80 | x = torch.cat([x, y2], dim=1) 81 | x = self.fc1(x) 82 | 83 | return x -------------------------------------------------------------------------------- /single_eye_normalized/validation/train2.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | from LeNet import * 3 | import torch 4 | 5 | def get_2D_vector(pose, gaze): 6 | pose2d = [] 7 | gaze2d = [] 8 | for i in np.arange(0, len(pose), 1): 9 | pose2d.append(pose3D_to_2D(pose[i])) 10 | gaze2d.append(gaze3D_to_2D(gaze[i])) 11 | poses = np.array(pose2d) 12 | gazes = np.array(gaze2d) 13 | return poses, gazes 14 | 15 | def batch_process(j, batch, img, pose, gaze): 16 | ''' 17 | :return: a-img, b-pose, c-gaze 18 | ''' 19 | a = torch.randn(batch, 1, 36, 60) 20 | b = torch.randn(batch, 2) 21 | c = torch.randn(batch, 2) 22 | for i in range(batch): 23 | a[i, 0] = torch.tensor(img[j * batch + i]) 24 | b[i] = torch.tensor(pose[j * batch + i]) 25 | c[i] = torch.tensor(gaze[j * batch + i]) 26 | return a, b, c 27 | 28 | 29 | if __name__ == "__main__": 30 | 31 | raw_gaze, raw_image, raw_pose, raw_index = collect_data_from_mat() 32 | tk = 10 33 | is_gpu = torch.cuda.is_available() 34 | print("Use of gpu", is_gpu) 35 | for i in range(tk): 36 | t_gaze, t_pose, t_image, v_gaze, v_pose, v_image = get_kfold_data(tk, i, raw_gaze, raw_image, raw_pose) 37 | 38 | t_pose_2D, t_gaze_2D = get_2D_vector(t_pose, t_gaze) 39 | v_pose_2D, v_gaze_2D = get_2D_vector(v_pose, v_gaze) 40 | 41 | ltrain = len(t_gaze) 42 | lvaild = len(v_gaze) 43 | print("training dataset size:", len(t_gaze)) 44 | print("test dataset size:", len(v_gaze)) 45 | 46 | 47 | ##### CNN definition ##### 48 | GazeCNN = Model() 49 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001) 50 | criterion = torch.nn.SmoothL1Loss(reduction="mean") 51 | 52 | if is_gpu: 53 | GazeCNN = GazeCNN.cuda() 54 | criterion = criterion.cuda() 55 | 56 | batch = 512 # 57 | train_range = int(ltrain / batch) 58 | test_range = int(lvaild / batch) 59 | 60 | for epoch in tqdm(range(35)):# 61 | for i in range(train_range): 62 | img, pose, gaze = batch_process(i, batch, t_image, t_pose_2D, t_gaze_2D) 63 | if is_gpu: 64 | img = img.cuda() 65 | pose = pose.cuda() 66 | gaze = gaze.cuda() 67 | gaze_pred_2D = GazeCNN(img, pose) 68 | 69 | loss = criterion(gaze_pred_2D, gaze) 70 | loss.backward() 71 | optimizer.step() 72 | 73 | ## train result 74 | train_loss = 0 75 | for k in tqdm(range(train_range-1)): 76 | img, pose, gaze = batch_process(k, batch, t_image, t_pose_2D, t_gaze_2D) 77 | GazeCNN = GazeCNN.cpu() 78 | gaze_pred_2D = GazeCNN(img, pose) 79 | train_loss += mean_angle_loss(gaze_pred_2D, gaze) 80 | 81 | ## validation result 82 | valid_loss = 0 83 | for j in tqdm(range(test_range-1)): 84 | img, pose, gaze = batch_process(j, batch, v_image, v_pose_2D, v_gaze_2D) 85 | GazeCNN = GazeCNN.cpu() 86 | gaze_pred_2D = GazeCNN(img, pose) 87 | valid_loss += mean_angle_loss(gaze_pred_2D, gaze) 88 | 89 | print("train_loss, valid_loss = [{},{}]".format(train_loss/(train_range-1), valid_loss/(test_range-1))) 90 | -------------------------------------------------------------------------------- /two_eye/main.py: -------------------------------------------------------------------------------- 1 | from Dataloader import * 2 | from ARNet import * 3 | from utils import * 4 | import torch 5 | 6 | def get_2D_vector(pose, gaze): 7 | pose2d = [] 8 | gaze2d = [] 9 | for i in np.arange(0, len(pose), 1): 10 | pose2d.append(pose3D_to_2D(pose[i])) 11 | gaze2d.append(gaze3D_to_2D(gaze[i])) 12 | poses = np.array(pose2d) 13 | gazes = np.array(gaze2d) 14 | return poses, gazes 15 | 16 | def batch_process(j, batch, img, pose, gaze): 17 | ''' 18 | :return: a-img, b-pose, c-gaze 19 | ''' 20 | a = torch.randn(batch, 1, 36, 60) 21 | b = torch.randn(batch,2) 22 | c = torch.randn(batch,2) 23 | for i in range(batch): 24 | a[i, 0] = torch.tensor(img[j * batch + i]) 25 | b[i] = torch.tensor(pose[j * batch + i]) 26 | c[i] = torch.tensor(gaze[j * batch + i]) 27 | return a, b, c 28 | 29 | 30 | train_gazel, train_imagel, train_posel, test_gazel, test_imagel, test_posel = collect_data_from_mat("left") 31 | train_gazer, train_imager, train_poser, test_gazer, test_imager, test_poser = collect_data_from_mat("right") 32 | 33 | ###### transfer to 2D vectors ###### 34 | 35 | train_pose2Dl, train_gaze2Dl = get_2D_vector(train_posel, train_gazel) 36 | test_pose2Dl, test_gaze2Dl = get_2D_vector(test_posel, test_gazel) 37 | train_pose2Dr, train_gaze2Dr = get_2D_vector(train_poser, train_gazer) 38 | test_pose2Dr, test_gaze2Dr = get_2D_vector(test_poser, test_gazer) 39 | 40 | print("training dataset size:", len(train_gazel)) 41 | print("test dataset size:", len(test_gazel)) 42 | 43 | print("training dataset size:", len(train_gazer)) 44 | print("test dataset size:", len(test_gazer)) 45 | 46 | cuda_gpu = torch.cuda.is_available() 47 | 48 | GazeNet = ARNet() 49 | optimizer = torch.optim.Adam(GazeNet.parameters(), lr=0.0001) 50 | # criterion = Criterion() 51 | criterion = torch.nn.SmoothL1Loss(reduction="mean") 52 | batch = 10 53 | train_range = int(len(train_gaze2Dl) / batch) 54 | test_range = int(len(test_gaze2Dl) / batch) 55 | 56 | for epoch in range(1): 57 | for i in tqdm(range(2)): 58 | imgl, posel, gazel = batch_process(i, batch, train_imagel, train_pose2Dl, train_gaze2Dl) 59 | imgr, poser, gazer = batch_process(i, batch, train_imagel, train_pose2Dl, train_gaze2Dl) 60 | if cuda_gpu: 61 | GazeNet = GazeNet.cuda() 62 | criterion = criterion.cuda() 63 | imgl = imgl.cuda() 64 | posel = posel.cuda() 65 | gazel = gazel.cuda() 66 | imgr = imgr.cuda() 67 | poser = poser.cuda() 68 | gazer = gazer.cuda() 69 | gaze_pred_2D = GazeNet(imgl, imgr, posel, poser) 70 | gaze_trut_2D = torch.cat([gazel, gazer], dim=1) 71 | loss = criterion(gaze_pred_2D, gaze_trut_2D) 72 | 73 | loss.backward(loss.clone().detach()) 74 | optimizer.step() 75 | 76 | valid_loss=0 77 | for j in tqdm(range(test_range - 1)): 78 | vimgl, vposel, vgazel = batch_process(j, batch, test_imagel, test_pose2Dl, test_gaze2Dl) 79 | vimgr, vposer, vgazer = batch_process(j, batch, test_imager, test_pose2Dr, test_gaze2Dr) 80 | if cuda_gpu: 81 | GazeCNN = GazeNet.cpu() 82 | vgaze_pred_2D = GazeNet(vimgl, vimgr, vposel, vposer) 83 | valid_loss += angle_error(vgaze_pred_2D, vgazel, vgazer) 84 | 85 | print(valid_loss/(test_range-1)) 86 | -------------------------------------------------------------------------------- /single_eye_normalized/validation/train.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This is to examine a model by K-fold validation 3 | calculate and get the mean error for the model 4 | ''' 5 | 6 | from utils import * 7 | from LeNet import * 8 | import torch 9 | 10 | def get_2D_vector(pose, gaze): 11 | pose2d = [] 12 | gaze2d = [] 13 | for i in np.arange(0, len(pose), 1): 14 | pose2d.append(pose3D_to_2D(pose[i])) 15 | gaze2d.append(gaze3D_to_2D(gaze[i])) 16 | poses = np.array(pose2d) 17 | gazes = np.array(gaze2d) 18 | return poses, gazes 19 | 20 | def batch_process(j, batch, img, pose, gaze): 21 | ''' 22 | :return: a-img, b-pose, c-gaze 23 | ''' 24 | a = torch.randn(batch, 1, 36, 60) 25 | b = torch.randn(batch, 2) 26 | c = torch.randn(batch, 2) 27 | for i in range(batch): 28 | a[i, 0] = torch.tensor(img[j * batch + i]) 29 | b[i] = torch.tensor(pose[j * batch + i]) 30 | c[i] = torch.tensor(gaze[j * batch + i]) 31 | return a, b, c 32 | 33 | 34 | if __name__ == "__main__": 35 | 36 | raw_gaze, raw_image, raw_pose, raw_index = collect_data_from_mat() 37 | tk = 5 38 | is_gpu = torch.cuda.is_available() 39 | print("Use of gpu", is_gpu) 40 | for i in range(tk): 41 | t_gaze, t_pose, t_image, v_gaze, v_pose, v_image = get_kfold_data(tk, i, raw_gaze, raw_image, raw_pose) 42 | 43 | t_pose_2D, t_gaze_2D = get_2D_vector(t_pose, t_gaze) 44 | v_pose_2D, v_gaze_2D = get_2D_vector(v_pose, v_gaze) 45 | 46 | ltrain = len(t_gaze) 47 | lvaild = len(v_gaze) 48 | print("training dataset size:", len(t_gaze)) 49 | print("test dataset size:", len(v_gaze)) 50 | 51 | 52 | ##### CNN definition ##### 53 | GazeCNN = Model() 54 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001) 55 | criterion = torch.nn.SmoothL1Loss(reduction="mean") 56 | 57 | if is_gpu: 58 | GazeCNN = GazeCNN.cuda() 59 | criterion = criterion.cuda() 60 | 61 | batch = 512 # 62 | train_range = int(ltrain / batch) 63 | test_range = int(lvaild / batch) 64 | 65 | for epoch in tqdm(range(40)):# 66 | for i in range(train_range): 67 | img, pose, gaze = batch_process(i, batch, t_image, t_pose_2D, t_gaze_2D) 68 | if is_gpu: 69 | img = img.cuda() 70 | pose = pose.cuda() 71 | gaze = gaze.cuda() 72 | gaze_pred_2D = GazeCNN(img, pose) 73 | 74 | loss = criterion(gaze_pred_2D, gaze) 75 | loss.backward() 76 | optimizer.step() 77 | 78 | ## train result 79 | train_loss = 0 80 | for k in tqdm(range(train_range)): 81 | img, pose, gaze = batch_process(k, batch, t_image, t_pose_2D, t_gaze_2D) 82 | GazeCNN = GazeCNN.cpu() 83 | gaze_pred_2D = GazeCNN(img, pose) 84 | train_loss += mean_angle_loss(gaze_pred_2D, gaze) 85 | 86 | ## validation result 87 | valid_loss = 0 88 | for j in tqdm(range(test_range)): 89 | img, pose, gaze = batch_process(j, batch, v_image, v_pose_2D, v_gaze_2D) 90 | GazeCNN = GazeCNN.cpu() 91 | gaze_pred_2D = GazeCNN(img, pose) 92 | valid_loss += mean_angle_loss(gaze_pred_2D, gaze) 93 | 94 | print("train_loss, valid_loss = [{},{}]".format(train_loss/train_range, valid_loss/test_range)) 95 | -------------------------------------------------------------------------------- /single_eye_normalized/train_cpu/train_without_headpose.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | def initialize_weights(module): 7 | if isinstance(module, nn.Conv2d): 8 | nn.init.constant_(module.bias, 0) 9 | elif isinstance(module, nn.Linear): 10 | nn.init.xavier_uniform_(module.weight) 11 | nn.init.constant_(module.bias, 0) 12 | 13 | class Model(nn.Module): 14 | def __init__(self): 15 | super(Model, self).__init__() 16 | 17 | self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0) 18 | self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0) 19 | self.fc1 = nn.Linear(3600, 500) 20 | self.fc2 = nn.Linear(500, 2) 21 | 22 | self._initialize_weight() 23 | 24 | def _initialize_weight(self): 25 | nn.init.normal_(self.conv1.weight, mean=0, std=0.1) 26 | nn.init.normal_(self.conv2.weight, mean=0, std=0.01) 27 | self.apply(initialize_weights) 28 | 29 | def forward(self, x): 30 | x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2) 31 | x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2) 32 | x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten 33 | x = self.fc2(x) 34 | return x 35 | 36 | def get_2D_vector(pose, gaze): 37 | pose2d = [] 38 | gaze2d = [] 39 | for i in np.arange(0, len(pose), 1): 40 | pose2d.append(pose3D_to_2D(pose[i])) 41 | gaze2d.append(gaze3D_to_2D(gaze[i])) 42 | poses = np.array(pose2d) 43 | gazes = np.array(gaze2d) 44 | return poses, gazes 45 | 46 | def batch_process(j, batch, img, pose, gaze): 47 | ''' 48 | :return: a-img, b-pose, c-gaze 49 | ''' 50 | a = torch.randn(batch, 1, 36, 60) 51 | b = torch.randn(batch,2) 52 | c = torch.randn(batch,2) 53 | for i in range(batch): 54 | a[i, 0] = torch.tensor(img[j * batch + i]) 55 | b[i] = torch.tensor(pose[j * batch + i]) 56 | c[i] = torch.tensor(gaze[j * batch + i]) 57 | return a, b, c 58 | 59 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose = collect_data_from_mat() 60 | 61 | train_pose2D, train_gaze2D = get_2D_vector(train_pose, train_gaze) 62 | test_pose2D, test_gaze2D = get_2D_vector(test_pose, test_gaze) 63 | 64 | ltrain = len(train_gaze) 65 | ltest = len(test_gaze) 66 | print("training dataset size:", len(train_gaze)) 67 | print("test dataset size:", len(test_gaze)) 68 | 69 | 70 | ### training process ### 71 | GazeCNN = Model() 72 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.001) 73 | criterion = torch.nn.SmoothL1Loss(reduction="mean") 74 | batch = 10 75 | train_range = int(ltrain / batch) 76 | test_range = int(ltest / batch) 77 | 78 | 79 | for epoch in range(10): 80 | for i in tqdm(range(train_range)): 81 | img, pose, gaze = batch_process(i, batch, train_image, train_pose2D, train_gaze2D) 82 | gaze_pred_2D = GazeCNN(img) 83 | 84 | loss = criterion(gaze_pred_2D, gaze) 85 | loss.retain_grad() 86 | loss.backward() 87 | optimizer.step() 88 | 89 | angle_loss=0 90 | for j in tqdm(range(test_range)): 91 | timg, tpose, tgaze = batch_process(j, batch, test_image, test_pose2D, test_gaze2D) 92 | tgaze_pred_2D = GazeCNN(timg) 93 | angle_loss += mean_angle_loss(tgaze_pred_2D, tgaze) 94 | 95 | print("epoch", epoch, "average loss on test dataset:", angle_loss / test_range) 96 | 97 | -------------------------------------------------------------------------------- /single_eye_normalized/validation/train_onetime.py: -------------------------------------------------------------------------------- 1 | ''' 2 | This train_onetime.py is to train for onetime, 3 | which will help to draw a curve on validation loss 4 | and train loss. 5 | ''' 6 | from utils import * 7 | from LeNet import * 8 | import torch 9 | 10 | 11 | def get_2D_vector(pose, gaze): 12 | pose2d = [] 13 | gaze2d = [] 14 | for i in np.arange(0, len(pose), 1): 15 | pose2d.append(pose3D_to_2D(pose[i])) 16 | gaze2d.append(gaze3D_to_2D(gaze[i])) 17 | poses = np.array(pose2d) 18 | gazes = np.array(gaze2d) 19 | return poses, gazes 20 | 21 | def batch_process(j, batch, img, pose, gaze): 22 | ''' 23 | :return: a-img, b-pose, c-gaze 24 | ''' 25 | 26 | a = torch.randn(batch, 1, 36, 60) 27 | b = torch.randn(batch, 2) 28 | c = torch.randn(batch, 2) 29 | for i in range(batch): 30 | a[i, 0] = torch.tensor(img[j * batch + i]) 31 | b[i] = torch.tensor(pose[j * batch + i]) 32 | c[i] = torch.tensor(gaze[j * batch + i]) 33 | return a, b, c 34 | 35 | if __name__ == "__main__": 36 | 37 | raw_gaze, raw_image, raw_pose, raw_index = collect_data_from_mat() 38 | is_gpu = torch.cuda.is_available() 39 | t_gaze, t_pose, t_image, v_gaze, v_pose, v_image = get_kfold_data(5, 3, raw_gaze, raw_image, raw_pose) 40 | t_pose_2D, t_gaze_2D = get_2D_vector(t_pose, t_gaze) 41 | v_pose_2D, v_gaze_2D = get_2D_vector(v_pose, v_gaze) 42 | 43 | ltrain = len(t_gaze) 44 | lvaild = len(v_gaze) 45 | print("training dataset size:", ltrain) 46 | print("test dataset size:", lvaild) 47 | 48 | GazeCNN = Model() 49 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001) 50 | criterion = torch.nn.SmoothL1Loss(reduction="mean") 51 | if is_gpu: 52 | criterion = criterion.cuda() 53 | 54 | batch = 128 55 | train_range = int(ltrain / batch) 56 | test_range = int(lvaild / batch) 57 | 58 | train_loss_list = [] 59 | valid_loss_list = [] 60 | for epoch in range(100): 61 | for i in tqdm(range(train_range)): 62 | img, pose, gaze = batch_process(i, batch, t_image, t_pose_2D, t_gaze_2D) 63 | if is_gpu: 64 | GazeCNN = GazeCNN.cuda() 65 | img = img.cuda() 66 | pose = pose.cuda() 67 | gaze = gaze.cuda() 68 | 69 | gaze_pred_2D = GazeCNN(img, pose) 70 | 71 | loss = criterion(gaze_pred_2D, gaze) 72 | loss.backward() 73 | optimizer.step() 74 | 75 | train_loss = 0 76 | for k in tqdm(range(train_range - 1)): 77 | timg, tpose, tgaze = batch_process(k, batch, t_image, t_pose_2D, t_gaze_2D) 78 | GazeCNN = GazeCNN.cpu() 79 | tgaze_pred_2D = GazeCNN(timg, tpose) 80 | train_loss += mean_angle_loss(tgaze_pred_2D, tgaze) 81 | 82 | train_loss = train_loss / (train_range - 1) 83 | train_loss_list.append(train_loss) 84 | 85 | ## validation result 86 | valid_loss = 0 87 | for j in tqdm(range(test_range - 1)): 88 | vimg, vpose, vgaze = batch_process(j, batch, v_image, v_pose_2D, v_gaze_2D) 89 | GazeCNN = GazeCNN.cpu() 90 | vgaze_pred_2D = GazeCNN(vimg, vpose) 91 | valid_loss += mean_angle_loss(vgaze_pred_2D, vgaze) 92 | 93 | valid_loss = valid_loss / (test_range - 1) 94 | valid_loss_list.append(valid_loss) 95 | 96 | print("train_loss, valid_loss = [{},{}]".format(train_loss, valid_loss)) 97 | 98 | print("valid loss result:", valid_loss_list) 99 | print("train loss result:", train_loss_list) 100 | 101 | 102 | -------------------------------------------------------------------------------- /single_eye_normalized/gpu/utils.py: -------------------------------------------------------------------------------- 1 | from scipy.io import loadmat 2 | import glob 3 | from tqdm import tqdm 4 | import numpy as np 5 | import cv2 as cv 6 | import torch 7 | 8 | def read_eye_data(mat): 9 | ''' 10 | read each mat file info 11 | ''' 12 | mat_data = loadmat(mat) 13 | right_info = mat_data['data']['right'][0, 0] 14 | gaze = right_info['gaze'][0, 0] 15 | image = right_info['image'][0, 0] 16 | pose = right_info['pose'][0, 0] 17 | return gaze, image, pose 18 | 19 | def collect_data_from_mat(): 20 | ''' 21 | collect data from annotation part 22 | :return: list of index, image, pose, gaze 23 | ''' 24 | mat_files = glob.glob('Normalized/**/*.mat', recursive = True) 25 | mat_files.sort() 26 | index = list() 27 | # X: image, head_pose 28 | # y: gaze vector 29 | # index: pnum, pday 30 | i = 0 31 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose=[],[],[],[],[],[] 32 | for matfile in tqdm(mat_files): 33 | pnum = matfile.split('/')[-2] # pxx 34 | pday = matfile.split('/')[-1].split('.')[0] # day0x 35 | index.append(pnum + '/' + pday) 36 | 37 | fgaze, fimage, fpose = read_eye_data(matfile) 38 | if int(pnum[1:]) < 7: 39 | if train_gaze == []: 40 | train_gaze = fgaze 41 | train_image = fimage 42 | train_pose = fpose 43 | else: 44 | train_gaze = np.append(train_gaze, fgaze, axis = 0) 45 | train_image = np.append(train_image, fimage, axis = 0) 46 | train_pose = np.append(train_pose, fpose, axis = 0) 47 | else: 48 | if test_gaze == []: 49 | test_gaze = fgaze 50 | test_image = fimage 51 | test_pose = fpose 52 | else: 53 | test_gaze = np.append(test_gaze, fgaze, axis = 0) 54 | test_image = np.append(test_image, fimage, axis = 0) 55 | test_pose = np.append(test_pose, fpose, axis = 0) 56 | i += 1 57 | return train_gaze, train_image, train_pose, test_gaze, test_image, test_pose 58 | 59 | def pose3D_to_2D(pose): 60 | ''' 61 | pose (a, b, c) is rotation (angle) 62 | M = Rodrigues((x,y,z)) 63 | Zv = (the third column of M) 64 | theta = asin(Zv[1]) 65 | phi = atan2(Zv[0], Zv[2]) 66 | ''' 67 | M, _ = cv.Rodrigues(np.array(pose).astype(np.float32)) 68 | vec = M[:, 2] 69 | yaw = np.arctan2(vec[0], vec[2]) 70 | pitch = np.arcsin(vec[1]) 71 | return np.array([pitch, yaw]) 72 | 73 | 74 | def gaze3D_to_2D(gaze): 75 | ''' 76 | gaze (x, y, z) is direction 77 | theta = asin(-y) 78 | phi = atan2(-x, -z) 79 | ''' 80 | x, y, z = (gaze[i] for i in range(3)) 81 | pitch = np.arcsin(-y) 82 | yaw = np.arctan2(-x, -z) 83 | return np.stack((pitch, yaw)).T 84 | 85 | 86 | def gaze2D_to_3D(gaze): 87 | ''' 88 | :param gaze: gaze (yaw, pitch) is the rotation angle, type=(list) 89 | :return: gaze=(x,y,z) 90 | ''' 91 | pitch = gaze[0] 92 | yaw = gaze[1] 93 | x = -np.cos(pitch) * np.sin(yaw) 94 | y = -np.sin(pitch) 95 | z = -np.cos(pitch) * np.cos(yaw) 96 | norm = np.sqrt(x**2 + y**2 + z**2) 97 | x /= norm 98 | y /= norm 99 | z /= norm # all normalized 100 | return x, y, z 101 | 102 | 103 | def mean_angle_loss(pred, truth): 104 | ''' 105 | :param pred,truth: type=torch.Tensor 106 | :return: 107 | ''' 108 | pred = pred.detach().numpy() 109 | ans = 0 110 | for i in range(len(pred)): 111 | p_x, p_y, p_z = gaze2D_to_3D(pred[i]) 112 | t_x, t_y, t_z = gaze2D_to_3D(truth[i]) 113 | angles = p_x * t_x + p_y * t_y + p_z * t_z 114 | ans += torch.acos(angles) * 180 / np.pi 115 | return ans / len(pred) 116 | 117 | -------------------------------------------------------------------------------- /single_eye_normalized/train_cpu/utils.py: -------------------------------------------------------------------------------- 1 | from scipy.io import loadmat 2 | import glob 3 | from tqdm import tqdm 4 | import numpy as np 5 | import cv2 as cv 6 | import torch 7 | import math 8 | 9 | def read_eye_data(mat): 10 | ''' 11 | read each mat file info 12 | ''' 13 | mat_data = loadmat(mat) 14 | right_info = mat_data['data']['right'][0, 0] 15 | gaze = right_info['gaze'][0, 0] 16 | image = right_info['image'][0, 0] 17 | pose = right_info['pose'][0, 0] 18 | return gaze, image, pose 19 | 20 | def collect_data_from_mat(): 21 | ''' 22 | collect data from annotation part 23 | :return: list of index, image, pose, gaze 24 | ''' 25 | mat_files = glob.glob('Normalized/**/*.mat', recursive = True) 26 | mat_files.sort() 27 | index = list() 28 | # X: image, head_pose 29 | # y: gaze vector 30 | # index: pnum, pday 31 | i = 0 32 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose=[],[],[],[],[],[] 33 | for matfile in tqdm(mat_files): 34 | pnum = matfile.split('/')[-2] # pxx 35 | pday = matfile.split('/')[-1].split('.')[0] # day0x 36 | index.append(pnum + '/' + pday) 37 | 38 | fgaze, fimage, fpose = read_eye_data(matfile) 39 | if int(pnum[1:]) < 7: 40 | if train_gaze == []: 41 | train_gaze = fgaze 42 | train_image = fimage 43 | train_pose = fpose 44 | else: 45 | train_gaze = np.append(train_gaze, fgaze, axis = 0) 46 | train_image = np.append(train_image, fimage, axis = 0) 47 | train_pose = np.append(train_pose, fpose, axis = 0) 48 | else: 49 | if test_gaze == []: 50 | test_gaze = fgaze 51 | test_image = fimage 52 | test_pose = fpose 53 | else: 54 | test_gaze = np.append(test_gaze, fgaze, axis = 0) 55 | test_image = np.append(test_image, fimage, axis = 0) 56 | test_pose = np.append(test_pose, fpose, axis = 0) 57 | i += 1 58 | return train_gaze, train_image, train_pose, test_gaze, test_image, test_pose 59 | 60 | def pose3D_to_2D(pose): 61 | ''' 62 | pose (a, b, c) is rotation (angle) 63 | M = Rodrigues((x,y,z)) 64 | Zv = (the third column of M) 65 | theta = asin(Zv[1]) 66 | phi = atan2(Zv[0], Zv[2]) 67 | ''' 68 | M, _ = cv.Rodrigues(np.array(pose).astype(np.float32)) 69 | vec = M[:, 2] 70 | yaw = np.arctan2(vec[0], vec[2]) 71 | pitch = np.arcsin(vec[1]) 72 | return np.array([pitch, yaw]) 73 | 74 | 75 | def gaze3D_to_2D(gaze): 76 | ''' 77 | gaze (x, y, z) is direction 78 | theta = asin(-y) 79 | phi = atan2(-x, -z) 80 | ''' 81 | x, y, z = (gaze[i] for i in range(3)) 82 | pitch = np.arcsin(-y) 83 | yaw = np.arctan2(-x, -z) 84 | return np.stack((pitch, yaw)).T 85 | 86 | 87 | def gaze2D_to_3D(gaze): 88 | ''' 89 | :param gaze: gaze (yaw, pitch) is the rotation angle, type=(list) 90 | :return: gaze=(x,y,z) 91 | ''' 92 | pitch = gaze[0] 93 | yaw = gaze[1] 94 | x = -np.cos(pitch) * np.sin(yaw) 95 | y = -np.sin(pitch) 96 | z = -np.cos(pitch) * np.cos(yaw) 97 | norm = np.sqrt(x**2 + y**2 + z**2) 98 | x /= norm 99 | y /= norm 100 | z /= norm 101 | return x, y, z 102 | 103 | 104 | def mean_angle_loss(pred, truth): 105 | ''' 106 | :param pred,truth: type=torch.Tensor 107 | :return: 108 | ''' 109 | pred = pred.detach().numpy() 110 | ans = 0 111 | for i in range(len(pred)): 112 | p_x, p_y, p_z = (pred[i][j] for j in range(3)) 113 | t_x, t_y, t_z = (truth[i][j] for j in range(3)) 114 | # print("p_x={}, p_y={}, p_z={}".format(p_x, p_y, p_z)) 115 | # print("t_x={}, t_y={}, t_z={}".format(t_x, t_y, t_z)) 116 | angles = (p_x * t_x + p_y * t_y + p_z * t_z)/math.sqrt(p_x**2+p_y**2+p_z**2) * math.sqrt(t_x**2+t_y**2+t_z**2) 117 | ans += math.acos(angles) * 180 / np.pi 118 | return ans / len(pred) 119 | -------------------------------------------------------------------------------- /single_eye_normalized/validation/utils.py: -------------------------------------------------------------------------------- 1 | from scipy.io import loadmat 2 | import glob 3 | from tqdm import tqdm 4 | import numpy as np 5 | import cv2 as cv 6 | import torch 7 | import math 8 | from scipy.io import loadmat 9 | 10 | path = "/Users/liqilin/PycharmProjects/untitled/EyeGaze/single_eye_normalized" 11 | 12 | data_dict = dict() 13 | 14 | def read_eye_data(mat): 15 | ''' 16 | read each mat file info 17 | ''' 18 | mat_data = loadmat(mat) 19 | right_info = mat_data['data']['right'][0, 0] 20 | gaze = right_info['gaze'][0, 0] 21 | image = right_info['image'][0, 0] 22 | pose = right_info['pose'][0, 0] 23 | return gaze, image, pose 24 | 25 | def collect_data_from_mat(): 26 | ''' 27 | collect data from annotation part 28 | :param path: path of normalized data 29 | :return: list of index, image, pose, gaze 30 | ''' 31 | mat_files = glob.glob('Normalized/**/*.mat', recursive = True) 32 | # mat_files.sort() 33 | gaze = list() 34 | image = list() 35 | index = list() 36 | pose = list() 37 | for matfile in tqdm(mat_files): 38 | pnum = matfile.split('/')[-2] # pxx 39 | pday = matfile.split('/')[-1].split('.')[0] # day0x 40 | index.append(pnum + '/' + pday) 41 | 42 | fgaze, fimage, fpose = read_eye_data(matfile) 43 | 44 | if gaze == []: 45 | gaze = fgaze 46 | image = fimage 47 | pose = fpose 48 | else: 49 | gaze = np.append(gaze, fgaze, axis = 0) 50 | image = np.append(image, fimage, axis = 0) 51 | pose = np.append(pose, fpose, axis = 0) 52 | 53 | return gaze, image, pose, index 54 | 55 | 56 | def get_kfold_data(k, i, gaze, image, pose): 57 | ''' 58 | implement k-fold validation 59 | input type = numpy.narray 60 | output type = numoy.narray 61 | ''' 62 | fold_size = gaze.shape[0] // k 63 | start = i * fold_size 64 | if i != k - 1: # Not the final round 65 | end = (i + 1) * fold_size 66 | v_gaze, v_pose, v_image = gaze[start:end], pose[start:end],image[start:end] 67 | t_gaze = np.concatenate((gaze[0:start], gaze[end:]), axis=0) 68 | t_pose = np.concatenate((pose[0:start], pose[end:]), axis=0) 69 | t_image = np.concatenate((image[0:start], image[end:]), axis=0) 70 | else: 71 | v_gaze, v_pose, v_image = gaze[start:], pose[start:],image[start:] 72 | t_gaze, t_pose, t_image = gaze[0:start], pose[0:start],image[0:start] 73 | 74 | return t_gaze, t_pose, t_image, v_gaze, v_pose, v_image 75 | 76 | 77 | def pose3D_to_2D(pose): 78 | ''' 79 | pose (a, b, c) is rotation (angle) 80 | M = Rodrigues((x,y,z)) 81 | Zv = (the third column of M) 82 | theta = asin(Zv[1]) 83 | phi = atan2(Zv[0], Zv[2]) 84 | ''' 85 | M, _ = cv.Rodrigues(np.array(pose).astype(np.float32)) 86 | vec = M[:, 2] 87 | yaw = np.arctan2(vec[0], vec[2]) 88 | pitch = np.arcsin(vec[1]) 89 | return np.array([pitch, yaw]) 90 | 91 | 92 | def gaze3D_to_2D(gaze): 93 | ''' 94 | gaze (x, y, z) is direction 95 | theta = asin(-y) 96 | phi = atan2(-x, -z) 97 | ''' 98 | x, y, z = (gaze[i] for i in range(3)) 99 | pitch = np.arcsin(-y) 100 | yaw = np.arctan2(-x, -z) 101 | return np.stack((pitch, yaw)).T 102 | 103 | 104 | def gaze2D_to_3D(gaze): 105 | ''' 106 | :param gaze: gaze (yaw, pitch) is the rotation angle, type=(list) 107 | :return: gaze=(x,y,z) 108 | ''' 109 | pitch = gaze[0] 110 | yaw = gaze[1] 111 | x = -np.cos(pitch) * np.sin(yaw) 112 | y = -np.sin(pitch) 113 | z = -np.cos(pitch) * np.cos(yaw) 114 | norm = np.sqrt(x**2 + y**2 + z**2) 115 | x /= norm 116 | y /= norm 117 | z /= norm 118 | return x, y, z 119 | 120 | 121 | def mean_angle_loss(pred, truth): 122 | ''' 123 | :param pred,truth: type=torch.Tensor 124 | :return: 125 | ''' 126 | pred = pred.detach().numpy() 127 | ans = 0 128 | for i in range(len(pred)): 129 | p_x, p_y, p_z = gaze2D_to_3D(pred[i]) 130 | t_x, t_y, t_z = gaze2D_to_3D(truth[i]) 131 | angles = p_x * t_x + p_y * t_y + p_z * t_z 132 | ans += torch.acos(angles) * 180 / np.pi 133 | return ans / len(pred) 134 | 135 | 136 | # gaze, image, pose, index = collect_data_from_mat() 137 | # t_gaze, t_pose, t_image, v_gaze, v_pose, v_image=get_kfold_data(10,0,gaze,image,pose) 138 | -------------------------------------------------------------------------------- /single_eye_normalized/visualize/draw.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import matplotlib.pyplot as plt\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 3, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/plain": "Text(0.5, 1.0, 'dataset distribution(by person)')" 22 | }, 23 | "execution_count": 3, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | }, 27 | { 28 | "data": { 29 | "text/plain": "
", 30 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAFNCAYAAADRi2EuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3debicZX3/8fdHQEFEFokRBAwCRdEKakSs1qpoRaFC1SL8xEalUq/i1mo16s+6t6h1oT+tLaISEUEWKRQshSJKrWtYlE2LSxCQJQoIisr2/f3xPEcnx3OSScjcMzm8X9d1rnn2+zszyZzPue9nnidVhSRJkkbvXuMuQJIk6Z7C4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLykCZXkqCTvGncdd1eSZUme3k+/KcmRa/HYP0/y0H56rb5eSf4lyVvW0rHmJflOko36+S8m+Yu1cex1QZJvJHnEuOuQJoHBS5oDWv0iv7vtVNXfV9Uq9x+2naq6X1X9YE3rGWjvxUm+PO3YL6+qd97dY/cWA0dV1S/X0vHWNf8IvGPcRUiTwOAlaZ2TZP1x1zCsJPcBFgGfHnctKzPi1/RU4KlJHjTCNqR1gsFLmhBJHp3k/CS3JPkssOHAus2TnJZkeZIb++lt+nXvBv4Q+HA/9PbhfvnhSa5McnOS85L84cDxdk+ytF93XZIPDKzbI8lXktyU5FtJnrKydmZ4Hi9KckWSnyZ587R1b0vy6X56wySf7re7Kck3k8xfyfOpJIcmuRy4fGDZjgNNbJnkrP41/FKSh/TbLei3XX+gli8m+YskDwf+BXhC395N/foVhi6TvCzJ95LckOTUJFsPrKskL09yef9cPpIk/erHAzdV1VXTXqod+iG4m5OckmSL/linJ3nltNft20n+dIbXeup5HZLkx0muSfK6gfX3SrI4yff71/n4gXam9j04yY+AL8z2nvTbb90/7xv61+Fl097X45N8qn/tL0mycGp9Vf0KOA945vTnIN3TGLykCZDk3sC/AUcDWwAnAM8b2ORewCeBhwDbAb8EPgxQVW8G/ht4RT/09op+n28Cu/XH+wxwQpKpMHc4cHhV3R/YATi+r+PBwOnAu/r9XgeclGTeStoZfB67AB8FXgRsDTwA2GaWp70I2BTYtt/u5cAvV9HOfnRBZpdZjvlC4J3AlsCFwDGzbPcbVXVZ3/ZX+/Y2m+F5PQ34B2B/YCvgCuC4aZvtAzwOeFS/3VTI+H3guzM0/efAS/vj3QH8U798CXDQQNu7AlPvy2yeCuwE/DHwhvTn1AGvpHvN/oju/bgR+Mi0ff8IeHhf74zvSb/dccBV/XGeD/x9/7pMeU6/zWZ0PVzTg/llwK4reQ7SPYLBS5oMewAbAB+qqtur6kS64ARAVf20qk6qqlur6hbg3XS/MGdVVZ/u97ujqt4P3AfYuV99O7Bjki2r6udV9bV++UHA56vq81V1V1WdBSwFnj3k83g+cFpVnVtVvwbeAtw1y7a30/1y37Gq7qyq86rq5lUc/x+q6oaVnCt1+kDbb6brxdp2yNpX5oXAJ6rq/P7Yb+yPvWBgm8Oq6qaq+hFwDl3ohS6I3DLDMY+uqour6hd0r9P+SdajCy2/l2SnfrsXAZ+tqttWUt/bq+oXVXURXUA/sF/+cuDNVXVVX/fbgOdnxWHFt/X7/pJZ3pP+NXwi8Iaq+lVVXQgcSRcep3y5/3dzJ90fENND1i39ayHdoxm8pMmwNXB1rXjX+iumJpLcN8m/9kN4NwPnApv1v6hnlOR1SS5L8rN++GxTup4ggIOB3wO+0w8n7dMvfwjwZ/0w0039fk+i65UZ9nlcOTXTh4qfzrLt0cB/Asf1w2TvTbLBKo5/5bDrq+rnwA19TXfX1gy8H/2xf0rXEzXl2oHpW4H79dM3ApusrNb+2BsAW/bDcp8FDkpyL7oQdfQq6pt+rKnn/BDg5IH38jLgTmD+LPvO9p5sDdzQh/7Bdlb2/DecFvA2AW5axfOQ5jyDlzQZrgEePHBeEHRDilNeS9db9fh+ePDJ/fKp7QcDG+nO53o93ZDX5v3w2c+mtq+qy6vqQOCBwHuAE5NsTPdL+Oiq2mzgZ+OqOmymdmZ5Hr/pYUpyX7oelN/R9+y9vap2Af6AbqhuqgdltnZW1f5g2/ejGy79MfCLfvF9B7YdPNF7Vcf9MV2ImTr2xnTP6+pV7AfwbbqQO2utdO/17cBP+vkldL1sewK3VtVXV9HG9GP9uJ++EnjWtPdzw6oarPs3z30l78mPgS2SDAbI7Rju+U95OPCt1dhempMMXtJk+CrdeT6vSrJBkucCuw+s34TuXJub+pOj3zpt/+uAh07b/g5gObB+kr8D7j+1MslB/Xlbd/HbXoi76L559ydJnplkvf5k66ekP5F/hnamOxHYJ8mT+vPW3sEsnzNJnprk9/teu5vpgsfUsOSq2pnNswfafifwtaq6sqqW04WEg/rn9VK6c9umXAds0+83k2OBlyTZLd23FP8e+HpVLRuipm/Q9U4+eNryg5Ls0ofTdwAn9sN09EHrLuD9rLq3C+Atfa/oI4CX0PWYQfelgXfnt18ymJdk39kOMtt7UlVXAl8B/qH/N/Eoul7Tob6p2Z9b+FjgrGG2l+Yyg5c0Afrzd54LvJhueOwFwOcGNvkQsBFdj8jXgDOmHeJwunN3bkzyT3TDRWcA/0s3JPQrVhxS2gu4JMnP+30PqKpf9r9g9wXeRBfargT+lt9+VkxvZ/rzuAQ4lO5k/mvohtmmf5tvyoPogtrNdENgX+K3IWOl7azEZ+hC6Q10v+gPGlj3sv65/BR4BF2QmPIF4BLg2iQ/YZqq+i+687BO6p/XDsABwxTUv7dHTasFuud6FN0Q3YbAq6at/xTdifnDhJsvAd8Dzgb+sarO7JcfTnfO2JlJbqH7t/P4lRxnZe/JgcACut6vk4G39q/LMP4E+GJV/XiVW0pzXFY8pUSStLYlmUf3Tc1HD3sR1SR/DhxSVU9ayTYLgB8CG1TVHWuh1JFI8nXg4Kq6eNy1SOO2zlyEUJLWVf1Q58OG3b4ffvwr4J9HVlRDVbWyXjbpHsWhRkmaIEmeSTfMex3d0KmkOcShRkmSpEbs8ZIkSWrE4CVJktTIOnFy/ZZbblkLFiwYdxmSJEmrdN555/2kqubNtG6dCF4LFixg6dKl4y5DkiRplZJcMds6hxolSZIaMXhJkiQ1YvCSJElqZGTBK8nOSS4c+Lk5yWuSbJHkrCSX94+bj6oGSZKkSTKy4FVV362q3apqN7qb1d5Kd2PVxcDZVbUT3Q1dF4+qBkmSpEnSaqhxT+D7VXUFsC+wpF++BNivUQ2SJElj1Sp4HQAc20/Pr6pr+ulrgfmNapAkSRqrkQevJPcGngOcMH1ddTeKnPFmkUkOSbI0ydLly5ePuEpJkqTRa9Hj9Szg/Kq6rp+/LslWAP3j9TPtVFVHVNXCqlo4b96MF3+VJElap7QIXgfy22FGgFOBRf30IuCUBjVIkiSN3UiDV5KNgWcAnxtYfBjwjCSXA0/v5yVJkua8kd6rsap+ATxg2rKf0n3LUTNYsPj0pu0tO2zvpu1JknRP5pXrJUmSGjF4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIj64+7AGlVFiw+vVlbyw7bu1lbkqR7Hnu8JEmSGjF4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDXiLYMkrTFv5yRJq8ceL0mSpEYMXpIkSY0YvCRJkhoZafBKslmSE5N8J8llSZ6QZIskZyW5vH/cfJQ1SJIkTYpR93gdDpxRVQ8DdgUuAxYDZ1fVTsDZ/bwkSdKcN7LglWRT4MnAxwGq6raqugnYF1jSb7YE2G9UNUiSJE2SUfZ4bQ8sBz6Z5IIkRybZGJhfVdf021wLzB9hDZIkSRNjlMFrfeAxwEer6tHAL5g2rFhVBdRMOyc5JMnSJEuXL18+wjIlSZLaGGXwugq4qqq+3s+fSBfErkuyFUD/eP1MO1fVEVW1sKoWzps3b4RlSpIktTGy4FVV1wJXJtm5X7QncClwKrCoX7YIOGVUNUiSJE2SUd8y6JXAMUnuDfwAeAld2Ds+ycHAFcD+I65BkiRpIow0eFXVhcDCGVbtOcp2JUmSJpFXrpckSWrE4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLwkSZIaMXhJkiQ1YvCSJElqxOAlSZLUyKjv1ShJI7dg8elN21t22N5N25M0d9jjJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLwkSZIaMXhJkiQ1YvCSJElqxOAlSZLUyPqjPHiSZcAtwJ3AHVW1MMkWwGeBBcAyYP+qunGUdUiSJE2CFj1eT62q3apqYT+/GDi7qnYCzu7nJUmS5rxxDDXuCyzpp5cA+42hBkmSpOZGHbwKODPJeUkO6ZfNr6pr+ulrgfkjrkGSJGkijPQcL+BJVXV1kgcCZyX5zuDKqqokNdOOfVA7BGC77bYbcZmSJEmjN9Ier6q6un+8HjgZ2B24LslWAP3j9bPse0RVLayqhfPmzRtlmZIkSU2MLHgl2TjJJlPTwB8DFwOnAov6zRYBp4yqBkmSpEkyyqHG+cDJSaba+UxVnZHkm8DxSQ4GrgD2H2ENkiRJE2NkwauqfgDsOsPynwJ7jqpdSZKkSeWV6yVJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIjo75lkCTdYyxYfHrT9pYdtnfT9iTdffZ4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktTIKoNXkvsmeUuSj/XzOyXZZ/SlSZIkzS3D9Hh9Evg18IR+/mrgXSOrSJIkaY4aJnjtUFXvBW4HqKpbgYy0KkmSpDlomOB1W5KNgAJIsgNdD5gkSZJWw/pDbPM24Axg2yTHAE8EXjzCmiRJkuakVQavqjozyXnAHnRDjK+uqp+MvDJJkqQ5ZphvNZ4NPL6qTq+q06rqJ0mOaFCbJEnSnDLMOV7bA29I8taBZQtHVI8kSdKcNUzwugnYE5if5N+TbDrimiRJkuakYYJXquqOqvor4CTgy8ADR1uWJEnS3DPMtxr/ZWqiqo5KchFw6OhKkiRJmptm7fFKcv9+8oQkW0z9AD8EXjdsA0nWS3JBktP6+e2TfD3J95J8Nsm979YzkCRJWkesbKjxM/3jecDS/vG8gflhvRq4bGD+PcAHq2pH4Ebg4NU4liRJ0jpr1uBVVfv0j9tX1UP7x6mfhw5z8CTbAHsDR/bzAZ4GnNhvsgTY7+48AUmSpHXFMNfxemKSjfvpg5J8IMl2Qx7/Q8Drgbv6+QcAN1XVHf38VcCDV7NmSZKkddIw32r8KHBrkl2B1wLfB45e1U5J9gGur6rz1qSwJIckWZpk6fLly9fkEJIkSRNlmOB1R1UVsC/w4ar6CLDJEPs9EXhOkmXAcXRDjIcDmyWZ+jblNsDVM+1cVUdU1cKqWjhv3rwhmpMkSZpswwSvW5K8ETgIOD3JvYANVrVTVb2xqrapqgXAAcAXquqFwDnA8/vNFgGnrFHlkiRJ65hhruP1AuD/AAdX1bX9+V3vuxttvgE4Lsm7gAuAj9+NY601Cxaf3rS9ZYft3bS9NdHyNVkXXg9Jku6uVQavqroW+MDA/I+AT61OI1X1ReCL/fQPgN1XZ39JkqS5YJihRkmSJK0FBi9JkqRGVjnUmOTVVXX4qpZJasfz7yRp3TRMj9eiGZa9eC3XIUmSNOfN2uOV5EC6bzNun+TUgVWbADeMujBJkqS5ZmVDjV8BrgG2BN4/sPwW4NujLEqSJGkumjV4VdUVwBXAE5I8BNipqv4ryUbARnQBTJIkSUMa5ibZLwNOBP61X7QN8G+jLEqSJGkuGubk+kPp7rt4M0BVXQ48cJRFSZIkzUXDBK9fV9VtUzP9Da5rdCVJkiTNTcMEry8leROwUZJnACcA/z7asiRJkuaeYYLXYmA5cBHwl8Dngf87yqIkSZLmomFukn0X8DHgY0m2ALapKocaJUmSVtMw32r8YpL796HrPLoA9sHRlyZJkjS3DDPUuGlV3Qw8F/hUVT0e2HO0ZUmSJM09wwSv9ZNsBewPnDbieiRJkuasYYLXO4D/BL5XVd9M8lDg8tGWJUmSNPcMc3L9CXSXkJia/wHwvFEWJUmSNBetMngl2RA4GHgEsOHU8qp66QjrkiRJmnOGGWo8GngQ8EzgS3T3avQG2ZIkSatpmOC1Y1W9BfhFVS0B9gYeP9qyJEmS5p5hgtft/eNNSR4JbIo3yZYkSVptqzzHCzgiyeZ0twk6Fbgf8JaRViVJkjQHDRO8zq6qG4FzgYcCJNl+pFVJkiTNQcMMNZ40w7IT13YhkiRJc92sPV5JHkZ3CYlNkzx3YNX9GbishCRJkoazsqHGnYF9gM2APxlYfgvwslEWJUmSNBfNGryq6hTglCRPqKqvNqxJkiRpThrm5PoLkhzKal65vr/i/bnAffp2Tqyqt/Yn5h8HPAA4D3hRVd22hvVLkiStM0Z55fpfA0+rql2B3YC9kuwBvAf4YFXtCNxIdzsiSZKkOW9kV66vzs/72Q36nwKexm+/FbkE2G+1q5YkSVoHjfTK9UnWS3IhcD1wFvB94KaquqPf5CrgwatXsiRJ0rppmOA1deX6t9Bduf5S4L3DHLyq7qyq3eiGJ3cHHjZsYUkOSbI0ydLly5cPu5skSdLEWuXJ9VV1ZD/5Jfor16+uqropyTnAE4DNkqzf93ptA1w9yz5HAEcALFy4sNakXUmSpEmysguo/s3KdqyqD6xsfZJ5wO196NoIeAbdifXnAM+n+2bjIuCU1S1akiRpXbSyHq9N+sedgcfRDTNCdzHVbwxx7K2AJUnWoxvSPL6qTktyKXBckncBFwAfX6PKJUmS1jEru4Dq2wGSnAs8pqpu6effBpy+qgNX1beBR8+w/Ad053tJkiTdowxzcv18YPACp7f1yyRJkrQahrly/aeAbyQ5uZ/fDzhqZBVJkiTNUcN8q/HdSf4D+MN+0Uuq6oLRliVJkjT3DNPjRVWdD5w/4lokSZLmtGHO8ZIkSdJaYPCSJElqxOAlSZLUiMFLkiSpEYOXJElSIwYvSZKkRgxekiRJjRi8JEmSGjF4SZIkNWLwkiRJamSoWwZJggWLT2/a3rLD9m7aniRp9OzxkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNjCx4Jdk2yTlJLk1ySZJX98u3SHJWksv7x81HVYMkSdIkGWWP1x3Aa6tqF2AP4NAkuwCLgbOraifg7H5ekiRpzhtZ8Kqqa6rq/H76FuAy4MHAvsCSfrMlwH6jqkGSJGmSNDnHK8kC4NHA14H5VXVNv+paYH6LGiRJksZt/VE3kOR+wEnAa6rq5iS/WVdVlaRm2e8Q4BCA7bbbbtRlStKcsmDx6c3aWnbY3s3aktZ1I+3xSrIBXeg6pqo+1y++LslW/fqtgOtn2reqjqiqhVW1cN68eaMsU5IkqYlRfqsxwMeBy6rqAwOrTgUW9dOLgFNGVYMkSdIkGeVQ4xOBFwEXJbmwX/Ym4DDg+CQHA1cA+4+wBkmSpIkxsuBVVV8GMsvqPUfVriRJ07U85w08702z88r1kiRJjRi8JEmSGjF4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoZ5b0aJUn3cC1v1eNterQusMdLkiSpEYOXJElSIwYvSZKkRgxekiRJjRi8JEmSGjF4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDWy/rgLkCRJ7S1YfHqztpYdtneztiadPV6SJEmNGLwkSZIaGVnwSvKJJNcnuXhg2RZJzkpyef+4+ajalyRJmjSj7PE6Cthr2rLFwNlVtRNwdj8vSZJ0jzCy4FVV5wI3TFu8L7Ckn14C7Deq9iVJkiZN63O85lfVNf30tcD8xu1LkiSNzdhOrq+qAmq29UkOSbI0ydLly5c3rEySJGk0Wgev65JsBdA/Xj/bhlV1RFUtrKqF8+bNa1agJEnSqLQOXqcCi/rpRcApjduXJEkam1FeTuJY4KvAzkmuSnIwcBjwjCSXA0/v5yVJku4RRnbLoKo6cJZVe46qTUmSpEnmleslSZIaMXhJkiQ1YvCSJElqxOAlSZLUiMFLkiSpEYOXJElSIwYvSZKkRkZ2HS9JkrSiBYtPb9ressP2btremmj5mkzC62GPlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLwkSZIaMXhJkiQ1YvCSJElqxOAlSZLUiMFLkiSpEYOXJElSIwYvSZKkRgxekiRJjRi8JEmSGjF4SZIkNTKW4JVkryTfTfK9JIvHUYMkSVJrzYNXkvWAjwDPAnYBDkyyS+s6JEmSWhtHj9fuwPeq6gdVdRtwHLDvGOqQJElqahzB68HAlQPzV/XLJEmS5rRUVdsGk+cDe1XVX/TzLwIeX1WvmLbdIcAh/ezOwHebFjq8LYGfjLsIrGMmk1KLdfyuSanFOlY0KXXA5NRiHSualDpgsmqZ7iFVNW+mFeu3rgS4Gth2YH6bftkKquoI4IhWRa2pJEuraqF1TFYdMDm1WMfvmpRarGMy64DJqcU6JrMOmKxaVsc4hhq/CeyUZPsk9wYOAE4dQx2SJElNNe/xqqo7krwC+E9gPeATVXVJ6zokSZJaG8dQI1X1eeDz42h7BCZlONQ6ftek1GIdv2tSarGOFU1KHTA5tVjHiialDpisWobW/OR6SZKkeypvGSRJktSIwWsNJflEkuuTXDzmOrZNck6SS5NckuTVY6pjwyTfSPKtvo63j6OOgXrWS3JBktPGXMeyJBcluTDJ0jHWsVmSE5N8J8llSZ4whhp27l+HqZ+bk7ymdR19LX/d/zu9OMmxSTYcRx19La/u67ik5esx02dYki2SnJXk8v5x8zHV8Wf963FXkmbfWpullvf1/2++neTkJJuNqY539jVcmOTMJFuPo46Bda9NUkm2HHUds9WS5G1Jrh74THl2i1ruLoPXmjsK2GvcRQB3AK+tql2APYBDx3QLpl8DT6uqXYHdgL2S7DGGOqa8GrhsjO0PempV7Tbmrz0fDpxRVQ8DdmUMr01Vfbd/HXYDHgvcCpzcuo4kDwZeBSysqkfSfcnngNZ19LU8EngZ3R09dgX2SbJjo+aP4nc/wxYDZ1fVTsDZ/fw46rgYeC5wboP2V1XLWcAjq+pRwP8CbxxTHe+rqkf1/39OA/5uTHWQZFvgj4EfNahhpbUAH5z6XOnPH594Bq81VFXnAjdMQB3XVNX5/fQtdL9Qm98JoDo/72c36H/GcgJhkm2AvYEjx9H+pEmyKfBk4OMAVXVbVd003qrYE/h+VV0xpvbXBzZKsj5wX+DHY6rj4cDXq+rWqroD+BJd4Bi5WT7D9gWW9NNLgP3GUUdVXVZVzS+aPUstZ/bvDcDX6K49OY46bh6Y3ZgGn68r+T33QeD1LWoYopZ1jsFrDkmyAHg08PUxtb9ekguB64GzqmosdQAfovtQuGtM7Q8q4Mwk5/V3YxiH7YHlwCf74dcjk2w8plqmHAAcO46Gq+pq4B/p/lq/BvhZVZ05jlroenb+MMkDktwXeDYrXmC6tflVdU0/fS0wf4y1TKKXAv8xrsaTvDvJlcALadPjNVMN+wJXV9W3xtH+DF7RD8F+osXQ+Npg8JojktwPOAl4zbS/jJqpqjv7bvBtgN37YZSmkuwDXF9V57VuexZPqqrHAM+iGwZ+8hhqWB94DPDRqno08AvaDCHNqL9w8nOAE8bU/uZ0PTvbA1sDGyc5aBy1VNVlwHuAM4EzgAuBO8dRy3TVfeXdr733kryZ7tSOY8ZVQ1W9uaq27Wt4xaq2X9v6Pw7exJhC3ww+CuxAd3rLNcD7x1vOcAxec0CSDehC1zFV9blx19MPY53DeM6BeyLwnCTLgOOApyX59BjqAH7Tu0JVXU93PtPuYyjjKuCqgR7IE+mC2Lg8Czi/qq4bU/tPB35YVcur6nbgc8AfjKkWqurjVfXYqnoycCPdeUTjcl2SrQD6x+vHWMvESPJiYB/ghTUZ12A6BnjeGNrdge4Plm/1n7HbAOcnedAYaqGqruv/4L8L+Bjj+XxdbQavdVyS0J27c1lVfWCMdcyb+rZPko2AZwDfaV1HVb2xqrapqgV0w1lfqKqx9GYk2TjJJlPTdCejNv8WbFVdC1yZZOd+0Z7Apa3rGHAgYxpm7P0I2CPJffv/P3syxi9iJHlg/7gd3fldnxlXLXS3b1vUTy8CThljLRMhyV50py48p6puHWMdOw3M7st4Pl8vqqoHVtWC/jP2KuAx/WdMc1N/JPT+lDF8vq6JsVy5fi5IcizwFGDLJFcBb62qj4+hlCcCLwIu6s+vAnjTGL7dsRWwJMl6dIH++Koa66UcJsB84OTudzvrA5+pqjPGVMsrgWP6Yb4fAC8ZRxF9AH0G8JfjaB+gqr6e5ETgfLqhowsY7xWwT0ryAOB24NBWX3yY6TMMOAw4PsnBwBXA/mOq4wbg/wHzgNOTXFhVzxxTLW8E7gOc1f9f/lpVvXwMdTy7/+PpLrr3ZqQ1zFbHmH7PzfaaPCXJbnRD4ssY4+fK6vDK9ZIkSY041ChJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLwkjUWSBerVMzgAAAMXSURBVEnW+Lo7SfYb0w3h75Yky5JsOe46JI2HwUvSumo/YK0Er/5m2RNvXalT0uwMXpLWiiSHJTl0YP5tSV6XzvuSXJzkoiQvmGHfFyf58MD8aUme0k//vL858LeSfC3J/CR/QHe/x/cluTDJDv3PGf0Nyf87ycP6/eclOSnJN/ufJw7Ud3SS/wGOnlbPU5J8McmJSb6T5Jj+Kvcr9FglWZjkiwPHW9K3fUWS5yZ5b/+cz+hv7TXl9f3ybyTZcU3rlLTuMXhJWls+y4pXOt+/X/ZcupvY7kp3n8T3TbvVx6psTHe18F2Bc4GXVdVX6G5v87dVtVtVfZ/u6vOvrKrHAq8D/rnf/3Dgg1X1OLr72x05cOxdgKdX1YEztPto4DX9Ng+lu0vEquwAPI0uFH4aOKeqfh/4JbD3wHY/65d/GPjQ3axT0jrEbmtJa0VVXZDkgUm2prvVy41VdWWSvwGOrao76W7C/CXgccC3hzz0bcDU7afOo7vl0AqS3I/uRtcn9B1T0N3mBbqwt8vA8vv32wOcWlW/nKXdb1TVVf3xLwQWAF9eRa3/UVW3J7kIWA+YukXURf3+U44dePzg3axT0jrE4CVpbToBeD7wILrermHdwYo98BsOTN9ev7232Z3M/Ll1L+CmqtptlnV7VNWvBhf2AecXK6np1wPTg+0O1rohK/o1QFXdlWSw7rum1V0zTK9pnZLWIQ41SlqbPgscQBe+TuiX/TfwgiTrJZkHPBn4xrT9lgG7JblXkm2B3Ydo6xZgE4Cquhn4YZI/A+jPK9u13+5MupuE06+bKZytjmXAY/vp563hMV4w8PjVfnpt1ylpAhm8JK01VXUJXRi6uqqu6RefTDes+C3gC8Drq+raabv+D/BD4FLgn4Dzh2juOOBvk1yQZAfghcDBSb4FXALs22/3KmBhkm8nuRR4+Ro/wc7bgcOTLKXrCVsTmyf5NvBq4K9HVKekCZTf9oRLkiRplOzxkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDXy/wGW/mQoMVrYgQAAAABJRU5ErkJggg==\n" 31 | }, 32 | "metadata": { 33 | "needs_background": "light" 34 | }, 35 | "output_type": "display_data" 36 | } 37 | ], 38 | "source": [ 39 | "cols_target = [i for i in range(1, 16)]\n", 40 | "sum = [39,69,39,65,25,38,62,56,47,20,16,19,7,12,7]\n", 41 | "plt.figure(figsize=(10,5))\n", 42 | "plt.bar(range(len(sum)),sum,tick_label=cols_target)\n", 43 | "plt.xlabel(\"volunteer number\")\n", 44 | "plt.ylabel('dataset size')\n", 45 | "plt.title('dataset distribution(by person)')" 46 | ], 47 | "metadata": { 48 | "collapsed": false, 49 | "pycharm": { 50 | "name": "#%%\n" 51 | } 52 | } 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 3, 57 | "outputs": [], 58 | "source": [], 59 | "metadata": { 60 | "collapsed": false, 61 | "pycharm": { 62 | "name": "#%%" 63 | } 64 | } 65 | } 66 | ], 67 | "metadata": { 68 | "kernelspec": { 69 | "display_name": "Python 3", 70 | "language": "python", 71 | "name": "python3" 72 | }, 73 | "language_info": { 74 | "codemirror_mode": { 75 | "name": "ipython", 76 | "version": 2 77 | }, 78 | "file_extension": ".py", 79 | "mimetype": "text/x-python", 80 | "name": "python", 81 | "nbconvert_exporter": "python", 82 | "pygments_lexer": "ipython2", 83 | "version": "2.7.6" 84 | } 85 | }, 86 | "nbformat": 4, 87 | "nbformat_minor": 0 88 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Eye Gaze Estimation 2 | 3 | ### 1. Project Overview 4 | 5 | This is a research topic in computer vision to predict **where** a person is looking at given the person’s full face. 6 | 7 | Generally, there are two directions of the task: 8 | 9 | - 3-D gaze vector estimation is to predict the gaze vector, which is usually used in the automotive safety. 10 | - 2-D gaze position estimation is to predict the horizontal and vertical coordinates on a 2-D screen, which allows utilizing gaze point to control a cursor for human-machine interaction. 11 | 12 | Given the training dataset, we can resolve two types of problems: single-eye gaze estimation and two-eye gaze estimation. Apparently, this means our task is to predict one eye gaze direction or both eye. 13 | 14 | Usability: Track the eye movement, provide detailed insights into users' attention. 15 | 16 | Challenges: (a) low sensor quality or unknown/challenging environments, and (b) large variations in eye region appearance. 17 | 18 | ### 2. Related work 19 | 20 | #### 2.1 Gaze Estimation Methods 21 | 22 | There are two widely accepted methods for estimating gaze direction: **model-based** and **appearance-based**. Model-based method uses 3D eyeball models and estimate the gaze direction using geometric eye features, while appearance-based method learns generic gaze estimators from large amounts of person, and head pose-independent training data. 23 | 24 | Model-based method largely depend on the requirement of external light source to detect eye feature so the modelling process could be a complexing one, and the accuracy for this method is still lower and the robustness is unclear.[1] Appearance-based gaze estimation methods directly use eye images as input and can therefore potentially work with low-resolution eye images. Since the eye images contain many information, so this method needs large amount of data than model-based for the training process. 25 | 26 | #### 2.2 Dataset collection 27 | 28 | The Eyediap[2] dataset contains 94 video sequences of 16 participants looking at three different targets. So the gaze direction can be very limited and coarse and can't train a generalized gaze estimator. The UT Multiview[3] dataset collected 50 participants and can be used to sythesise images for head poses. But the problem for these two dataset is that they both record the gaze images under contolled laboratory environment. 29 | 30 | The MPIIGaze[1] gaze dataset is used in the task for two reasons: 31 | 32 | - It's recorded outside the lab: when people are at home doing their work, the application on **laptop** capture the images. 33 | - It takes months to record the data, so it contains wider range of recording locations and times, illuminations, and eye appearances. 34 | 35 | The MPIIGaze dataset details are shown below: 36 | 37 | - 15 participants, 213,659 pictures 38 | 39 | - outside of laboratory conditions, i.e during daliy routine 40 | - wider range of recording location, time, illumination and eye appearance 41 | 42 | How to collect: use of laptop application to let volunteers to look at a fixed place, and take pictures of their eyes. (Laptops are suited for long-term daily recordings but also because they are an important platform for *eye tracking application*.) 43 | 44 | #### 2.3 Calibration Settings 45 | 46 | No matter model-based or appearance-based methods, they both need to collect person-specific data during a calibration step. Previous works on gaze estimation didn't take person-specific caliberation settings into consideration. 47 | 48 | But for the MPIIGaze dataset, since they were collected using different laptops, so the screen size, resolution would be different. Furthermore, the camera coordinate system can also be wide-ranging. What their team did was to obtain the intrinsic parameters for the laptops. In this way, we can add the influence of participant-specific data into our model: 3D positions of each screen plane were estimated using a mirror-based calibration method. 49 | 50 | To summarize, MPIIGaze dataset is giving images of the face, calibration settings for a specific participant, 3D gaze vectors of eyes, which is the ground truth for the problem. 51 | 52 | ### 3. Method 53 | 54 | The task is generally divided into two parts: determine single eye gaze direction for one person, and determine directions for both eyes. Each problem has distinctive method to resolve it. 55 | 56 | #### 3.1 Single-eye problem 57 | 58 | ##### 3.1.1 Problem analysis 59 | 60 | For single-eye problem, the overview of the task is to predict a 3D gaze direction for one person, given his face image and head pose information. In the MPIIGaze dataset, the head pose was calculated by the calibration parameters, and the eye is extracted from the face image so we can just focus on the eye image for prediction instead of the whole face. Predicting an eye gaze direction from a single image can be difficult, because the conditions of the images can be very different: illuminations, eye glasses, image resolution. 61 | 62 | To properly learn the image attribute, we apply to Deep Learning algorithms with efficiently learn the features in quick time. I followed the guidance[1] of building a multi-modal CNN, the general process is shown in Fig.1. Before the training for CNN model starts, we should preprocess the data from the dataset, that is to first detect the face from the input raw image, then use the calibration parameters to derive 3D head rotation $r$. Then is the normalisation process for eye image is to adjust the head pose direction so as to directly pointing at the camera, so each input image can be executed in the same coodrinate system. 63 | 64 | ![](src/CNN.jpg) 65 | 66 |
Fig.1 Workflow of gaze estimation
67 | 68 | ##### 3.1.2 Head pose Estimation 69 | 70 | We didn't directly get the head pose rotation from the record. It's calcualted from the calibartion parameters like screen size and intrinsic parameters from each participant's laptop camera. Since the camera can't directly point at the object, we will need to use images of Planar Mirror Reflections and to calculate the head pose vectors. Head pose could be influential for the model establishment, this would be covered in the discussion part. 71 | 72 | ##### 3.1.3 Normalisation 73 | 74 | The purpose for normalisation process is to adjust the head pose direction. From the dataset we can see that the range for the head poses go wide, so the head is not always directly pointing at the camera shoot. The consequence of being in this form would reduce the accuracy for the training process because the angle of the head coordinate and the camera coordinate would influence the image representation: we need the eye image which the head coodinate's z-axis should be perpendicular to the camera coordinate panel. After the normalisation process, we can get the grey image for both eyes and head pose vectors $h$. The transforming process is shown below: 75 | 76 | ![](src/camera%20coordinate.jpg) 77 | 78 |
Fig.2 Normalisation process
79 | 80 | ##### 3.1.4 Multi-modal CNN 81 | 82 | The task for the CNN is to learn the mapping from the input feature. The network architecture here is the adaptation from LeNet framework. We have two input data for this model: the normalised eye image and the 2D head pose vectors, and the model would output the predicting 2D gaze vector. Here we need to convert all the 3D vectors into 2D vectors. The differences of using 2D or 3D would also be dicussed in part 5. 83 | 84 | ![](src/figmodal.jpg) 85 | 86 |
Fig.3 Multi-modal CNN
87 | 88 | #### 3.2 Two-eye problem 89 | 90 | ##### 3.2.1 Problem analysis 91 | 92 | The two-eye gaze estimation is to predict the gaze vectors for left eye and right eye for one face image. The MPI Team have made some assumptions on this: 93 | 94 | - User head pose can be obtained by using existing head trackers; 95 | - The user should roughly fixate on the same targets with both eyes. 96 | 97 | There have been some works on the two-eye problem. In 2017’s paper *MPIIGaze: Real-World Dataset and Deep Appearance-Based Gaze Estimation*[4], it proposed a method that set the ground truth of both eyes. They newly defined a ground truth: mean gaze vector as the output of the defining model. Through several trials, they got a rough conclusion that two-eye estimation can improve the predicted result. 98 | 99 | From the above conclusion, we know that we can predict the vectors for two eyes at one time in order to revise the outcome for predicting single gaze vectors. 100 | 101 | #### 3.2.2 Knowledge 102 | 103 | In the previous work for two-eye gaze estimation, two eyes are treated indifferently. But from the observation of some statistics, we can find that we cannot expect the same accuracy for two eyes, either eye has a chance to be more accurate. This observation is called two-eye asymmetry, and it's caused by the very different head poses, image qualities, and illumination on the face. 104 | 105 | ##### 3.2.3 AR-E Net 106 | 107 | AR-E net was proposed by *Appearance-Based Gaze Estimation via Evaluation-Guided Asymmetric Regression*[4]. This net is built by AR-Net and Ep-Net. Two nets have different functions. 108 | 109 | ![](src/are.jpg) 110 | 111 |
Fig.4 Architecture for AR-E Net
112 | 113 | For AR-Net (Asymmetric Regression-Net), it's to predict two gaze vectors for both left and right eyes simultaneously. The salient difference compared to previous network is the loss function. AR-Net would calculate the acrcosine value for both eyes, and set them as weight in the loss function. 114 | 115 | E-Net(Evaluation-Net) was to help further decide which eye is more reliable. It would append AR-Net and give feedback to it. 116 | 117 | ### 4. Experiments and evaluation 118 | 119 | #### 4.1 Unified Measurement 120 | 121 | To examine the model result for each training using different loss functions, gradient descents and other factors that might affect the output, we should be setting a criteria for the measurement. So the degree mean error which is to calculate the angle between two vectors is applied here. 122 | 123 | Let’s say two **normalised** vectors $ p_1=(x_1,y_1,z_1 )$, $p_2=(x_2,y_2,z_2 )$, the angle is: 124 | 125 | $ angle= ∑_{i=0}^3(p_1 [i]∗p_2 [i])$. Note we should be getting the normalised vectors here, otherwise the calculated angular erorrs can be far-fecthing. 126 | 127 | We also need to convert angles into degree, the format is: $degree= arccos⁡(angle)∗180÷\pi$. 128 | 129 | #### 4.2 Hyper parameters 130 | 131 | Under the best model for single-eye estimation, the batch size is 512, adn the learning rate is setted to be 0.0001. The loss function applied is SmoothL1Loss, and the optimizer is adamGrad. For the data splitting, I tried random splitting and splitting by person, the latter one get better result. 132 | 133 | #### 4.3 Validation 134 | 135 | For the validation process, I tried different dataset spliting method. In general, it can be classified as split by people and random spliting. Split by people was to elict data for one person as validation data, and rest of them are all for training. In this way, each hypothesis would be using different size of validation data. Don't know if this fluctuation would affect the result. 136 | 137 | I implemented **K-fold validation** for the single-eye model (randomly split the dataset), which is to elicit $1/k$ data points from the dataset and use it as the validation data, the rest of the data is for trainning. For k = 5, got the best result at 7.82 (not improve so much). For k = 3, got best result at 8.97. For k = 10, got best result at 9.69. The MPI team had 6.3[1] mean degree error for this model. 138 | 139 | ![](src/K-fold.jpg) 140 | 141 |
Fig.5 K-fold validation outcome
142 | 143 | #### 4.4 Result 144 | 145 | **Single-eye problem** 146 | 147 | I applied the previous mentioned multi-modal CNN in my work. The train-test curve is showing the trend in the below graph. It took 100 epochs to train the model. Generally, the curve for both training loss and test loss are decreasing after more training times. The zigzags in the curve could be the result of the mini-batch training and adam grad. The best outcome ever for the single-eye model is 8.92. 148 | 149 | ![](src/result.jpg) 150 | 151 |
Fig.6 train-test loss curve
152 | 153 | **Two-eye problem** 154 | 155 | For the two-eye problem, I first tried to use model for single-eye problem and separately predict the gaze vectors for both eyes, but the result is not always closed, this can also reveal the conclusion from MPI team, which is: we can't treat two eyes indifferently. Then I tried to combine two streams of the single-eye model together (they are to seperatly predict left and right eyes), concatenate their result and redifine the loss function as an AR-Net's form. In this way, the result didn't go well as it never convergent. Then I follow the rules by standard ARE-Net and implement the AR-Net (failed to run E-Net because CUDA are always out of memory.) For now, the best accuracy for two-eye problem stops at 13.4104. 156 | 157 | 158 | 159 | ### 5. Discussion 160 | 161 | In this part, we dicuss about several effects that might influence the accuracy of our model. 162 | 163 | #### 5.1 Influence from head pose 164 | 165 | Head pose is needed both in single-eye problem and two-eye problem. However, the problem is that, since we have normalised our data and will feed them into our network, why is still necessary to inject head poses to help us predict? This puzzle can be relieved by theoretical analysis: Normalised images was to make the eye directly looking at the camera, but we are not predicting the gaze vectors in this senario, instead, we are predicting the gaze directions for the original photos, in which the head pose would affect the final result. Leave alone head poses can have terrible of the training result, as shown in the following graph: 166 | 167 | ![](src/headpose.jpg) 168 | 169 |
Fig.7 comparing result for whether head pose is added
170 | 171 | #### 5.2 Influence from dimensions of vectors 172 | 173 | To emphasise the necessity of using 2D vectors, I tried to use vectors with different dimensions to see the training outcome. From fig.8 we can clearly see that using 2D could convergent with no more than 10 epochs, while 3D vectors would never be like that. However, this could only indicate that this particular network structure would only be suitable for 2D vectors instead of 3D. 174 | 175 | ![](src/32d.jpg) 176 | 177 |
Fig.8 comparing results for using 3D and 2D vectors
178 | 179 | #### 5.3 Determine two-eye ground truth 180 | 181 | For the two-eye estimation problem, In th 2017’s paper *MPIIGaze: Real-World Dataset and Deep Appearance-Based Gaze Estimation*, they propose a method that set the ground truth of both eyes, which is, the mean gaze vector originating from the centre of both eyes is defined as ground-truth of gaze vectors. Nonetheless, this can't acurrately represent the true relationship between two eyes. Instead, we should collect data for both eyes separately. 182 | 183 | 184 | 185 | ### 6. Conclusion 186 | 187 | MPIIGaze dataset is collected through long observations on various volunteers. The wider range of various factors make it predominant in deep learning model training. To predict the gaze vector for a single eye, we need to convert both 3D gaze vectors (ground truth) and calculated head poses into 2D version. In this way, we can reduce the calculation complexity and also have better results for prediction. The convert process contains geometric formula and needed to be implemented using cv library. This CNN-based model takes less time to train, and can also reach high quality results. 188 | 189 | Two-eye gaze estimation is more like an adjustment of the original result for single-eye prediction. It combines the two images and take the inter-relationship and discrepancies of two eyes into consideration. Instead of just output the raw result, this problem mainly discuss how can the predicting results feedback to the training process and have a better outcome after the adjustments. 190 | 191 | 192 | 193 | ### 7. My work 194 | 195 | | Week time | Main task |
Details
| 196 | | --------- | ---------------------------- | ------------------------------------------------------------ | 197 | | 3 | Lead in to the project | - Search for previous works on gaze estimation, build a basic understanding of the problem;
- Look for available datasets and manageable methods for problem solving. | 198 | | 4 | EDA | - Determine the method to be applied to;
- Explore the attributes and characteristics of the dataset, understand the labels.
- Understand the dataset collecting process. | 199 | | 5 | Learn multi-modal CNN[1] | - Get familiar with the process of using calibration parameters to calculate head pose rotations and normalising the images;
- Understand the deep learning architecture and clarify the possible problems. | 200 | | 6 | Implement CNN[1] | - Based on PyTorch, Implement the multi-modal CNN with normalised images;
- Train the model with the dataset (dataset spliting: random spliting) | 201 | | 7 | Discuss about the facts | - Consider the influence of the dimension of the vectors and head poses for the model;
- Coding to prove the thinking;
- Revise the dataset spliting method.
- Change to GPU device. 202 | | 8 | Improve the Result | - Look into the bad prediction and find possible causes;
- Adjust the hyperparameters and optimzers;
- Use of new judging metrics (mean degree error). | 203 | | 9-10 | Evaluation | - Do the validation process, implement K-fold validation process. | 204 | | 11 | Start of two-eye problem | - Run a model to predict two eye's vectors separately;
- Implement a CNN (by self) based on previous architecture;
- Analyze the problems occured in the two models. | 205 | | 12 | Learn Asymmetry technique[5] | - Learn the architecture of AR-E Net. | 206 | | 13 | Implement AR-Net | - Based on the paper[5], implement AR Net;
- Tune the structure and improve the result. | 207 | 208 | 209 | 210 | ### 8. Limitations 211 | 212 | 1. The model for single-eye problem hasn't go through cross-dataset validation. If applicable, can use other datasets like Eyediap[2] to test the model outcome, so to improve the architecture's generalization ability. 213 | 2. The self-designed CNN didn't work well in predicting the result, further adjustment and revision is needed in order to better run the model. 214 | 3. The AR-E Net costs too much memory when training the model, So during my work, CUDA is out of memory and can not go on training it. AR-E Net is relatively complex, a simpler but also effective model is needed to address the two-eye problem. 215 | 4. This project didn't do much about decrease noises in the training image. However the poor illuminations, coverings like eye-glasses can reduce the accuracy of the CNN, so for further work, the preprocess metrics for the dataset is needed. 216 | 217 | 218 | 219 | ### 9. Environment 220 | 221 | | | | | | | | 222 | | ----- | ---- | ------ | -------------------- | ----- | ---------- | 223 | | cgpb0 | 1 | Ubuntu | 2x Xeon Silver 4210 | 256GB | 3.2TB SSD | 224 | 225 | 226 | 227 | ### 10. Directory description 228 | 229 | ``` 230 | +--EyeGaze # root 231 | | +--essay # essays related 232 | | +--note # literature review on essays 233 | | +--pre # presentation slides 234 | | +--single_eye_normalized 235 | | +--gpu # code suitable for cuda 236 | | +--train_cpu # training on cpu device 237 | | +--validation 238 | | +--visualize # draw curves 239 | | +--src 240 | | +--two_eye 241 | ``` 242 | 243 | 244 | 245 | ### Appendix 246 | 247 | [1] Zhang, Xucong, et al. "Appearance-based gaze estimation in the wild." *Proceedings of the IEEE conference on computer vision and pattern recognition*. 2015. 248 | 249 | [2] Funes Mora, Kenneth Alberto, Florent Monay, and Jean-Marc Odobez. "Eyediap: A database for the development and evaluation of gaze estimation algorithms from rgb and rgb-d cameras." *Proceedings of the Symposium on Eye Tracking Research and Applications*. 2014. 250 | 251 | [3] Sugano, Yusuke, Yasuyuki Matsushita, and Yoichi Sato. "Learning-by-synthesis for appearance-based 3d gaze estimation." *Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition*. 2014. 252 | 253 | [4] Zhang X, Sugano Y, Fritz M, et al. Mpiigaze: Real-world dataset and deep appearance-based gaze estimation[J]. IEEE transactions on pattern analysis and machine intelligence, 2017, 41(1): 162-175. 254 | 255 | [5] Cheng Y, Lu F, Zhang X. Appearance-based gaze estimation via evaluation-guided asymmetric regression[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 100-115. 256 | -------------------------------------------------------------------------------- /single_eye_normalized/train_cpu/Normalized_process.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "Normalized_process.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "authorship_tag": "ABX9TyMyGsYoHOYrGrmocLFFjuGb", 10 | "include_colab_link": true 11 | }, 12 | "kernelspec": { 13 | "name": "python3", 14 | "display_name": "Python 3" 15 | } 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "view-in-github", 22 | "colab_type": "text" 23 | }, 24 | "source": [ 25 | "\"Open" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": { 31 | "id": "ZFOWQGZbV4dZ" 32 | }, 33 | "source": [ 34 | "## Appearance-based Gaze Estimation (Feb 20)\n", 35 | "\n", 36 | "dataset: MPIIGaze https://www.perceptualui.org/research/datasets/MPIIGaze/ \n", 37 | "\n", 38 | "CNN frame: LeNet " 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": { 44 | "id": "qjSsP2_rWEil" 45 | }, 46 | "source": [ 47 | "### 1. Pre-processing data " 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "TZtFQOIIWCLY" 54 | }, 55 | "source": [ 56 | "from scipy.io import loadmat\n", 57 | "import numpy as np\n", 58 | "import pandas as pd\n", 59 | "from PIL import Image\n", 60 | "import glob\n", 61 | "from tqdm import tqdm\n", 62 | "from sklearn.model_selection import train_test_split\n", 63 | "from google.colab import drive " 64 | ], 65 | "execution_count": null, 66 | "outputs": [] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": { 71 | "id": "jhYsyQZy4_KJ" 72 | }, 73 | "source": [ 74 | "To use COLAB, we should first connect to the drive then retreive the data." 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "metadata": { 80 | "colab": { 81 | "base_uri": "https://localhost:8080/" 82 | }, 83 | "id": "7HnveBscWRiH", 84 | "outputId": "599cd832-7391-4d2f-a6e4-1dedf3245caf" 85 | }, 86 | "source": [ 87 | "# Use google.colab to use drive dataset \n", 88 | "import os\n", 89 | "drive.mount('/content/drive')\n", 90 | "path = \"/content/drive/MyDrive/EyeGaze\"\n", 91 | "os.listdir(path)" 92 | ], 93 | "execution_count": null, 94 | "outputs": [ 95 | { 96 | "output_type": "stream", 97 | "text": [ 98 | "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" 99 | ], 100 | "name": "stdout" 101 | }, 102 | { 103 | "output_type": "execute_result", 104 | "data": { 105 | "text/plain": [ 106 | "['Normalized', 'Normalized_process.ipynb']" 107 | ] 108 | }, 109 | "metadata": { 110 | "tags": [] 111 | }, 112 | "execution_count": 4 113 | } 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "colab": { 120 | "base_uri": "https://localhost:8080/" 121 | }, 122 | "id": "0xSyWtfBX-1w", 123 | "outputId": "5bf23c68-7295-433b-cfad-b21900cd2730" 124 | }, 125 | "source": [ 126 | "mat_files = glob.glob(path + '/Normalized/**/*.mat', recursive=True)\n", 127 | "mat_files.sort()\n", 128 | "print(mat_files)" 129 | ], 130 | "execution_count": null, 131 | "outputs": [ 132 | { 133 | "output_type": "stream", 134 | "text": [ 135 | "['/content/drive/MyDrive/EyeGaze/Normalized/p00/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day48.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day49.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day50.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day51.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day52.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day53.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day54.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day55.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day56.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day57.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day58.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day59.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day60.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day61.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day62.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day63.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day64.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day65.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day66.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day67.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day68.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day69.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day48.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day49.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day50.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day51.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day52.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day53.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day54.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day55.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day56.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day57.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day58.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day59.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day60.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day61.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day62.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day63.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day64.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day65.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day48.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day49.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day50.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day51.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day52.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day53.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day54.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day55.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day56.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day57.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day58.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day59.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day60.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day61.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day62.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day48.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day49.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day50.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day51.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day52.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day53.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day54.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day55.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day56.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day07.mat']\n" 136 | ], 137 | "name": "stdout" 138 | } 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "metadata": { 144 | "id": "7oL7zkFRYf9z" 145 | }, 146 | "source": [ 147 | "from scipy.io import loadmat\n", 148 | "def read_eye_data(mat):\n", 149 | " '''\n", 150 | " read each mat file info \n", 151 | " '''\n", 152 | " mat_data = loadmat(mat)\n", 153 | " right_info = mat_data['data']['right'][0, 0]\n", 154 | " gaze = right_info['gaze'][0, 0]\n", 155 | " image = right_info['image'][0, 0]\n", 156 | " pose = right_info['pose'][0, 0]\n", 157 | " return gaze, image, pose\n", 158 | "\n", 159 | "def collect_data_from_mat(path):\n", 160 | " '''\n", 161 | " collect data from annotation part\n", 162 | " :param path: path of normalized data \n", 163 | " :return: list of index, image, pose, gaze\n", 164 | " '''\n", 165 | " mat_files = glob.glob(path + '/Normalized/**/*.mat', recursive = True)\n", 166 | " mat_files.sort()\n", 167 | " # dict to store\n", 168 | " gaze = list()\n", 169 | " image = list()\n", 170 | " index = list()\n", 171 | " pose = list()\n", 172 | " # X: image, head_pose \n", 173 | " # y: gaze vector\n", 174 | " # index: pnum, pday\n", 175 | " for matfile in tqdm(mat_files):\n", 176 | " pnum = matfile.split('/')[-2] # pxx\n", 177 | " pday = matfile.split('/')[-1].split('.')[0] # day0x\n", 178 | " index.append(pnum + '/' + pday)\n", 179 | " \n", 180 | " fgaze, fimage, fpose = read_eye_data(matfile)\n", 181 | "\n", 182 | " if gaze == []:\n", 183 | " gaze = fgaze\n", 184 | " image = fimage\n", 185 | " pose = fpose\n", 186 | " else:\n", 187 | " gaze = np.append(gaze, fgaze, axis = 0)\n", 188 | " image = np.append(image, fimage, axis = 0)\n", 189 | " pose = np.append(pose, fpose, axis = 0)\n", 190 | "\n", 191 | " return gaze, image, pose, index " 192 | ], 193 | "execution_count": null, 194 | "outputs": [] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "metadata": { 199 | "id": "qZ3oEjqY40B8", 200 | "colab": { 201 | "base_uri": "https://localhost:8080/" 202 | }, 203 | "outputId": "32c60ac0-5867-416a-885d-02212d5090eb" 204 | }, 205 | "source": [ 206 | "gaze, image, pose, index = collect_data_from_mat(path)" 207 | ], 208 | "execution_count": null, 209 | "outputs": [ 210 | { 211 | "output_type": "stream", 212 | "text": [ 213 | " 0%| | 0/521 [00:00" 298 | ] 299 | }, 300 | "metadata": { 301 | "tags": [] 302 | }, 303 | "execution_count": 9 304 | } 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": { 310 | "id": "vU4hXiBtfS2r" 311 | }, 312 | "source": [ 313 | "gaze(x, y, z) should be normalized 3D **vector** of the eye. x axis is parallel to the line that connects the midpoint of two eyes.\n", 314 | "To simpify our CNN model, there's no need for us to use a 3D vector which might be more complicated, so the author just use two crutial angles to denote the gaze direction. \n", 315 | "\n", 316 | "pose(a, b, c) represents the rotation(**angle**) of the head. When dealing with the 3D transformation, the rotation matrix is usually used, but the rotation transformation actually only has three degrees of freedom, which is more concise when expressed in a rotation vector. Therefore, it is necessary to realize the mutual conversion between the rotation vector and the rotation matrix." 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": { 322 | "id": "dalKmHvrQw45" 323 | }, 324 | "source": [ 325 | "### 2. Dimension transformation\n", 326 | "In this part, we should change pose and gaze vector from 3D to 2D. The suggested code is shown on the dataset website." 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "metadata": { 332 | "id": "lvNfmin0GZA-" 333 | }, 334 | "source": [ 335 | "import cv2 as cv\n", 336 | "def pose3D_to_2D(pose):\n", 337 | " '''\n", 338 | " pose (a, b, c) is rotation (angle)\n", 339 | " M = Rodrigues((x,y,z))\n", 340 | " Zv = (the third column of M)\n", 341 | " theta = asin(Zv[1])\n", 342 | " phi = atan2(Zv[0], Zv[2])\n", 343 | " '''\n", 344 | " M, _ = cv.Rodrigues(np.array(pose).astype(np.float32))\n", 345 | " vec = M[:, 2]\n", 346 | " phi = np.arctan2(vec[0], vec[2])\n", 347 | " theta = np.arcsin(vec[1])\n", 348 | " return np.array([theta, phi])\n", 349 | "\n", 350 | "def gaze3D_to_2D(gaze):\n", 351 | " '''\n", 352 | " gaze (x, y, z) is direction\n", 353 | " theta = asin(-y)\n", 354 | " phi = atan2(-x, -z)\n", 355 | " '''\n", 356 | " x, y, z = (gaze[i] for i in range(3))\n", 357 | " theta = np.arcsin(-y)\n", 358 | " phi = np.arctan2(-x, -z)\n", 359 | " return np.stack((theta, phi)).T" 360 | ], 361 | "execution_count": null, 362 | "outputs": [] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "metadata": { 367 | "id": "TjF79qRzn2t2" 368 | }, 369 | "source": [ 370 | "pose2d = []\n", 371 | "gaze2d = []\n", 372 | "for i in np.arange(0, len(gaze), 1):\n", 373 | " pose2d.append(pose3D_to_2D(pose[i]))\n", 374 | " gaze2d.append(gaze3D_to_2D(gaze[i]))\n", 375 | "\n", 376 | "poses = np.array(pose2d)\n", 377 | "gazes = np.array(gaze2d)" 378 | ], 379 | "execution_count": null, 380 | "outputs": [] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "metadata": { 385 | "id": "xBsMSwNdyk7m" 386 | }, 387 | "source": [ 388 | "img_train, img_test, pose_train, pose_test, gaze_train, gaze_test = train_test_split(\n", 389 | " image, gazes, poses, test_size = 0.33, random_state = 0\n", 390 | ")" 391 | ], 392 | "execution_count": null, 393 | "outputs": [] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "metadata": { 398 | "colab": { 399 | "base_uri": "https://localhost:8080/" 400 | }, 401 | "id": "xYoWc3iQ8ILi", 402 | "outputId": "f62ded32-3d6c-4bc3-fa77-d93853c68ba8" 403 | }, 404 | "source": [ 405 | "print(img_train[1])\n", 406 | "print(img_test.shape)\n", 407 | "print(pose_train[1])\n", 408 | "print(\"training data size:\",end=\"\")\n", 409 | "print(pose_train.shape[0])\n", 410 | "print(\"test data size:\", end=\"\")\n", 411 | "print(pose_test.shape[0])\n" 412 | ], 413 | "execution_count": null, 414 | "outputs": [ 415 | { 416 | "output_type": "stream", 417 | "text": [ 418 | "[[255 255 254 ... 150 150 125]\n", 419 | " [254 254 254 ... 125 95 95]\n", 420 | " [254 254 254 ... 67 95 95]\n", 421 | " ...\n", 422 | " [250 250 250 ... 246 250 250]\n", 423 | " [250 250 246 ... 242 246 246]\n", 424 | " [252 252 246 ... 246 250 250]]\n", 425 | "(70508, 36, 60)\n", 426 | "[-0.30859049 0.16778534]\n", 427 | "training data size:143150\n", 428 | "test data size:70508\n" 429 | ], 430 | "name": "stdout" 431 | } 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": { 437 | "id": "u9XCCUpV6BZf" 438 | }, 439 | "source": [ 440 | "### 3. Muti-modal CNN" 441 | ] 442 | }, 443 | { 444 | "cell_type": "markdown", 445 | "metadata": { 446 | "id": "Skgn_0IMtOfA" 447 | }, 448 | "source": [ 449 | "Use the LeNet framework." 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "metadata": { 455 | "id": "9jRZnuXStATC" 456 | }, 457 | "source": [ 458 | "import torch\n", 459 | "import torch.nn as nn\n", 460 | "import torch.nn.functional as F\n", 461 | "\n", 462 | "\n", 463 | "def initialize_weights(module):\n", 464 | " if isinstance(module, nn.Conv2d):\n", 465 | " nn.init.constant_(module.bias, 0)\n", 466 | " elif isinstance(module, nn.Linear):\n", 467 | " nn.init.xavier_uniform_(module.weight)\n", 468 | " nn.init.constant_(module.bias, 0)\n", 469 | "\n", 470 | "class Model(nn.Module):\n", 471 | " def __init__(self):\n", 472 | " super(Model, self).__init__()\n", 473 | "\n", 474 | " self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)\n", 475 | " self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)\n", 476 | " self.fc1 = nn.Linear(3600, 500) \n", 477 | " self.fc2 = nn.Linear(502, 2)\n", 478 | "\n", 479 | " self._initialize_weight()\n", 480 | "\n", 481 | " def _initialize_weight(self):\n", 482 | " nn.init.normal_(self.conv1.weight, mean=0, std=0.1)\n", 483 | " nn.init.normal_(self.conv2.weight, mean=0, std=0.01)\n", 484 | " self.apply(initialize_weights)\n", 485 | "\n", 486 | " def forward(self, x, y):\n", 487 | " x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)\n", 488 | " x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)\n", 489 | " x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten \n", 490 | " x = torch.cat([x, y], dim=1)\n", 491 | " x = self.fc2(x)\n", 492 | " return x" 493 | ], 494 | "execution_count": null, 495 | "outputs": [] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "metadata": { 500 | "colab": { 501 | "base_uri": "https://localhost:8080/" 502 | }, 503 | "id": "w1AbDx-Y_xTk", 504 | "outputId": "e2df1a5c-f22d-4ea9-a84c-85d3306eddac" 505 | }, 506 | "source": [ 507 | "GazeCNN = Model()\n", 508 | "\n", 509 | "optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001)\n", 510 | "criterion = torch.nn.SmoothL1Loss(reduction='mean')\n", 511 | "\n", 512 | "def batch_training(img, gaze, pose, j, bt):\n", 513 | " a = torch.randn(batch,1,36, 60)\n", 514 | " b = torch.randn(batch,2)\n", 515 | " c = torch.randn(batch,2)\n", 516 | " for i in range(batch):\n", 517 | " a[i, 0] = torch.tensor(img_train[j * bt + i])\n", 518 | " b[i] = torch.tensor(pose_train[j * bt + i])\n", 519 | " c[i] = torch.tensor(gaze_train[j * bt + i])\n", 520 | " return a, b, c\n", 521 | "\n", 522 | "def batch_test(img, gaze, pose, j, bt):\n", 523 | " a = torch.randn(batch,1,36, 60)\n", 524 | " b = torch.randn(batch,2)\n", 525 | " c = torch.randn(batch,2)\n", 526 | " for i in range(batch):\n", 527 | " a[i, 0] = torch.tensor(img_test[j * bt + i])\n", 528 | " b[i] = torch.tensor(pose_test[j * bt + i])\n", 529 | " c[i] = torch.tensor(gaze_test[j * bt + i])\n", 530 | " return a, b, c\n", 531 | "\n", 532 | "\n", 533 | "for epoch in range(3):\n", 534 | " batch = 10\n", 535 | " for i in tqdm(range(14315)):\n", 536 | " # training data \n", 537 | " img = torch.randn(batch, 1, 36, 60)\n", 538 | " gaze = torch.randn(batch, 2)\n", 539 | " pose = torch.randn(batch, 2)\n", 540 | " img, gaze, pose = batch_training(img, gaze, pose, i, batch)\n", 541 | "\n", 542 | " gaze_pred = GazeCNN(img, pose)\n", 543 | " loss = criterion(gaze_pred, gaze)\n", 544 | " loss.backward()\n", 545 | " optimizer.step()\n", 546 | "\n", 547 | " timg, tgaze, tpose = batch_test(img, gaze, pose, 0, batch)\n", 548 | " gaze_pred = GazeCNN(timg, tpose)\n", 549 | " loss = criterion(gaze_pred, tgaze)\n", 550 | " print(\"epoch \", epoch, \", test loss:\", loss)\n", 551 | "\n", 552 | " " 553 | ], 554 | "execution_count": null, 555 | "outputs": [ 556 | { 557 | "output_type": "stream", 558 | "text": [ 559 | "100%|██████████| 14315/14315 [11:55<00:00, 20.00it/s]\n", 560 | " 0%| | 3/14315 [00:00<11:09, 21.37it/s]" 561 | ], 562 | "name": "stderr" 563 | }, 564 | { 565 | "output_type": "stream", 566 | "text": [ 567 | "epoch 0 , test loss: tensor(0.0090, grad_fn=)\n" 568 | ], 569 | "name": "stdout" 570 | }, 571 | { 572 | "output_type": "stream", 573 | "text": [ 574 | "100%|██████████| 14315/14315 [11:29<00:00, 20.76it/s]\n", 575 | " 0%| | 2/14315 [00:00<12:19, 19.36it/s]" 576 | ], 577 | "name": "stderr" 578 | }, 579 | { 580 | "output_type": "stream", 581 | "text": [ 582 | "epoch 1 , test loss: tensor(0.0084, grad_fn=)\n" 583 | ], 584 | "name": "stdout" 585 | }, 586 | { 587 | "output_type": "stream", 588 | "text": [ 589 | "100%|██████████| 14315/14315 [11:27<00:00, 20.82it/s]" 590 | ], 591 | "name": "stderr" 592 | }, 593 | { 594 | "output_type": "stream", 595 | "text": [ 596 | "epoch 2 , test loss: tensor(0.0075, grad_fn=)\n" 597 | ], 598 | "name": "stdout" 599 | }, 600 | { 601 | "output_type": "stream", 602 | "text": [ 603 | "\n" 604 | ], 605 | "name": "stderr" 606 | } 607 | ] 608 | }, 609 | { 610 | "cell_type": "markdown", 611 | "metadata": { 612 | "id": "sPoFkEORJOrt" 613 | }, 614 | "source": [ 615 | "epoch 0 , test loss: tensor(0.0096, grad_fn=)\n", 616 | "epoch 1 , test loss: tensor(0.0109, grad_fn=)\n", 617 | "epoch 2 , test loss: tensor(0.0088, grad_fn=)\n", 618 | "epoch 3 , test loss: tensor(0.0078, grad_fn=)\n", 619 | "epoch 4 , test loss: tensor(0.0082, grad_fn=)\n", 620 | "\n", 621 | "> Indented block\n", 622 | "\n" 623 | ] 624 | }, 625 | { 626 | "cell_type": "markdown", 627 | "metadata": { 628 | "id": "83TYES7yWWiQ" 629 | }, 630 | "source": [ 631 | "100%|██████████| 14315/14315 [11:25<00:00, 20.89it/s]\n", 632 | " 0%| | 3/14315 [00:00<10:48, 22.07it/s]epoch 0 , test loss: tensor(0.0099, grad_fn=)\n", 633 | "100%|██████████| 14315/14315 [10:59<00:00, 21.72it/s]\n", 634 | " 0%| | 3/14315 [00:00<11:02, 21.60it/s]epoch 1 , test loss: tensor(0.0086, grad_fn=)\n", 635 | "100%|██████████| 14315/14315 [10:59<00:00, 21.69it/s]\n", 636 | " 0%| | 3/14315 [00:00<11:02, 21.59it/s]epoch 2 , test loss: tensor(0.0082, grad_fn=)\n", 637 | "100%|██████████| 14315/14315 [11:05<00:00, 21.50it/s]\n", 638 | " 0%| | 3/14315 [00:00<11:36, 20.56it/s]epoch 3 , test loss: tensor(0.0077, grad_fn=)\n", 639 | "100%|██████████| 14315/14315 [11:02<00:00, 21.62it/s]\n", 640 | "epoch 4 , test loss: tensor(0.0080, grad_fn=)" 641 | ] 642 | }, 643 | { 644 | "cell_type": "code", 645 | "metadata": { 646 | "colab": { 647 | "base_uri": "https://localhost:8080/" 648 | }, 649 | "id": "nwEw-taW56MT", 650 | "outputId": "10271c1d-942d-44e8-f72c-95090dcdcdf8" 651 | }, 652 | "source": [ 653 | "test_loss = 0\n", 654 | "for i in tqdm(range(3000)):\n", 655 | " timg, tgaze, tpose = batch_test(img, gaze, pose, i, batch)\n", 656 | " gaze_pred = GazeCNN(timg, tpose)\n", 657 | " test_loss += criterion(gaze_pred, tgaze)\n", 658 | "\n", 659 | "print(test_loss / 3000)" 660 | ], 661 | "execution_count": null, 662 | "outputs": [ 663 | { 664 | "output_type": "stream", 665 | "text": [ 666 | "100%|██████████| 3000/3000 [01:02<00:00, 48.18it/s]" 667 | ], 668 | "name": "stderr" 669 | }, 670 | { 671 | "output_type": "stream", 672 | "text": [ 673 | "tensor(0.0081, grad_fn=)\n" 674 | ], 675 | "name": "stdout" 676 | }, 677 | { 678 | "output_type": "stream", 679 | "text": [ 680 | "\n" 681 | ], 682 | "name": "stderr" 683 | } 684 | ] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": { 689 | "id": "v1QDfOjzjKmA" 690 | }, 691 | "source": [ 692 | "probLems:\n", 693 | "1. server crashed when processing test datasets\n", 694 | "\n", 695 | "2. measurement on loss/ accuracy -- how to judge my model\n" 696 | ] 697 | }, 698 | { 699 | "cell_type": "markdown", 700 | "metadata": { 701 | "id": "Syp3TkADvPLG" 702 | }, 703 | "source": [ 704 | "### References\n", 705 | "1. https://www.mpi-inf.mpg.de/de/departments/computer-vision-and-machine-learning/research/gaze-based-human-computer-interaction/appearance-based-gaze-estimation-in-the-wild\n", 706 | "\n", 707 | "2. Y. Sugano, Y. Matsushita, and Y. Sato. Learning-by-synthesis for appearance-based 3d gaze estimation. In Computer Vision and Pattern Recognition (CVPR), 2014 IEEE Conference on, pages 1821–1828. IEEE, 2014.\n", 708 | "\n", 709 | "3. \n" 710 | ] 711 | } 712 | ] 713 | } --------------------------------------------------------------------------------