├── src
    ├── 32d.jpg
    ├── CNN.jpg
    ├── are.jpg
    ├── K-fold.jpg
    ├── result.jpg
    ├── figmodal.jpg
    ├── headpose.jpg
    └── camera coordinate.jpg
├── pre
    ├── week1.pptx
    ├── week2.pptx
    └── ~$week2.pptx
├── essay
    ├── 1711.09017.pdf
    ├── 1905.01941v2.pdf
    ├── zhang_CVPR15.pdf
    └── Sugano_Learning-by-Synthesis_for_Appearance-based_2014_CVPR_paper.pdf
├── two_eye
    ├── __pycache__
    │   ├── ARNet.cpython-37.pyc
    │   ├── utils.cpython-37.pyc
    │   └── Dataloader.cpython-37.pyc
    ├── see_data.py
    ├── Dataloader.py
    ├── README.md
    ├── utils.py
    ├── ARNet.py
    └── main.py
├── single_eye_normalized
    ├── gpu
    │   ├── __pycache__
    │   │   ├── LeNet.cpython-37.pyc
    │   │   ├── LeNet1.cpython-37.pyc
    │   │   └── utils.cpython-37.pyc
    │   ├── LeNet.py
    │   ├── LeNet1.py
    │   ├── train1.py
    │   ├── train.py
    │   └── utils.py
    ├── validation
    │   ├── __pycache__
    │   │   ├── LeNet.cpython-37.pyc
    │   │   └── utils.cpython-37.pyc
    │   ├── data.txt
    │   ├── LeNet.py
    │   ├── train2.py
    │   ├── train.py
    │   ├── train_onetime.py
    │   └── utils.py
    ├── visualize
    │   ├── draw_picture.py
    │   └── draw.ipynb
    └── train_cpu
    │   ├── LeNet.py
    │   ├── LeNet2.py
    │   ├── train.py
    │   ├── train_without_headpose.py
    │   ├── utils.py
    │   └── Normalized_process.ipynb
├── note
    └── Apperance-based+gaze+estimation+in+the+wild_LR.md
└── README.md


/src/32d.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/32d.jpg


--------------------------------------------------------------------------------
/src/CNN.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/CNN.jpg


--------------------------------------------------------------------------------
/src/are.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/are.jpg


--------------------------------------------------------------------------------
/pre/week1.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/pre/week1.pptx


--------------------------------------------------------------------------------
/pre/week2.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/pre/week2.pptx


--------------------------------------------------------------------------------
/src/K-fold.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/K-fold.jpg


--------------------------------------------------------------------------------
/src/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/result.jpg


--------------------------------------------------------------------------------
/src/figmodal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/figmodal.jpg


--------------------------------------------------------------------------------
/src/headpose.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/headpose.jpg


--------------------------------------------------------------------------------
/essay/1711.09017.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/essay/1711.09017.pdf


--------------------------------------------------------------------------------
/essay/1905.01941v2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/essay/1905.01941v2.pdf


--------------------------------------------------------------------------------
/essay/zhang_CVPR15.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/essay/zhang_CVPR15.pdf


--------------------------------------------------------------------------------
/src/camera coordinate.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/src/camera coordinate.jpg


--------------------------------------------------------------------------------
/two_eye/__pycache__/ARNet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/two_eye/__pycache__/ARNet.cpython-37.pyc


--------------------------------------------------------------------------------
/two_eye/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/two_eye/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/two_eye/__pycache__/Dataloader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/two_eye/__pycache__/Dataloader.cpython-37.pyc


--------------------------------------------------------------------------------
/single_eye_normalized/gpu/__pycache__/LeNet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/gpu/__pycache__/LeNet.cpython-37.pyc


--------------------------------------------------------------------------------
/single_eye_normalized/gpu/__pycache__/LeNet1.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/gpu/__pycache__/LeNet1.cpython-37.pyc


--------------------------------------------------------------------------------
/single_eye_normalized/gpu/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/gpu/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/pre/~$week2.pptx:
--------------------------------------------------------------------------------
1 | Li Qilin                                               L i   Q i l i n                                                                                             


--------------------------------------------------------------------------------
/single_eye_normalized/validation/__pycache__/LeNet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/validation/__pycache__/LeNet.cpython-37.pyc


--------------------------------------------------------------------------------
/single_eye_normalized/validation/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/single_eye_normalized/validation/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/essay/Sugano_Learning-by-Synthesis_for_Appearance-based_2014_CVPR_paper.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SiciliaLeco/EyeGaze/HEAD/essay/Sugano_Learning-by-Synthesis_for_Appearance-based_2014_CVPR_paper.pdf


--------------------------------------------------------------------------------
/single_eye_normalized/validation/data.txt:
--------------------------------------------------------------------------------
 1 | [90.0482177734375,89.9613265991211]
 2 | [89.95109558105469,89.93241882324219]
 3 | [65.5342788696289,76.80120086669922]
 4 | [17.73709487915039,15.879902839660645]
 5 | [14.747998237609863,12.863384246826172]
 6 | [11.418121337890625,9.741545677185059]
 7 | [9.759241104125977,8.560312271118164]
 8 | [9.765317916870117,9.141117095947266]
 9 | [12.103734016418457,12.338122367858887]
10 | [13.03825855255127,13.487483024597168]
11 | [13.195599555969238,13.720359802246094]
12 | [12.461770057678223,12.860777854919434]
13 | [11.072410583496094,11.032851219177246]
14 | [9.931607246398926,9.157052993774414]
15 | [9.824358940124512,8.315235137939453]
16 | 


--------------------------------------------------------------------------------
/two_eye/see_data.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | from tqdm import tqdm
 3 | import math
 4 | from scipy.io import loadmat
 5 | 
 6 | path = "/Users/liqilin/PycharmProjects/untitled/EyeGaze/single_eye_normalized"
 7 | 
 8 | def read_eye_data(mat, label):
 9 |     mat_data = loadmat(mat)
10 |     right_info = mat_data['data'][label][0, 0]
11 |     gaze = right_info['gaze'][0, 0]
12 |     image = right_info['image'][0, 0]
13 |     pose = right_info['pose'][0, 0]
14 |     return gaze, image, pose
15 | 
16 | def calc_angle(gaze1, gaze2):
17 |     angle = 0
18 |     for i in range(3):
19 |         angle += gaze1[i] * gaze2[i]
20 |     s1 = math.sqrt(gaze1[1] **2 + gaze1[2]**2 + gaze1[0] ** 2)
21 |     s2 = math.sqrt(gaze2[1]**2 + gaze2[2]**2 + gaze2[0] ** 2)
22 |     return angle / (s1 * s2)
23 | 
24 | def collect_data_from_mat():
25 |     mat_files = glob.glob(path+'/Normalized/**/*.mat', recursive = True)
26 |     for matfile in tqdm(mat_files[:1]):
27 |         rgaze, rimage, rpose = read_eye_data(matfile, "right")
28 |         lgaze, limage, lpose = read_eye_data(matfile, "left")
29 | 
30 |         for i in range(len(rgaze)):
31 |             print("left:", rgaze[i])
32 |             print("right:", lgaze[i])
33 | collect_data_from_mat()


--------------------------------------------------------------------------------
/single_eye_normalized/visualize/draw_picture.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | Y = [90.2514, 90.0982, 89.9913, 48.2418, 18.7832, 16.3784, 13.7179, 11.1366, 9.6803,9.7897, 10.9040, 12.3392, 13.4419, 12.8745, 11.5562, 10.3523, 9.8592, 10.2269, 10.9432, 11.3758, 11.4323,11.5021,11.4895, 10.9601, 9.9650, 8.4208, 8.1934, 7.9030]
 4 | Y_3d=[46.3943,15.7942,9.1083,10.8060,14.1114,22.6173,31.8720,28.3000,20.2483,15.2311,10.8004,20.2338,25.6033,35.2207,29.1831, 16.3451,14.9554,16.2854,26.3623,34.4985,24.2523, 20.6192,18.1405,17.3484,28.0920,35.9993,23.1917,15.3877]
 5 | Y1 = [90.0585, 89.9763, 90.1588, 10.8239, 8.8382, 9.0440, 9.9118, 11.0468, 12.0813, 12.3653, 11.5053, 9.9235, 8.7291, 8.5098, 9.1294, 10.2406, 10.9890, 10.7011, 9.6494, 8.5989, 8.4489, 9.3461, 10.0073, 10.2401, 9.9965, 9.0982, 7.9569, 8.4805, 8.5101, 9.0833, 9.4951, 9.7672, 9.1740, 8.5483, 8.7967, 9.5826, 10.3982, 9.9145, 8.9524, 8.5694, 9.1277, 9.8620, 9.4036, 8.6904, 8.4803, 9.0725, 9.4882, 9.2406, 8.9340, 8.6731]
 6 | X = [i for i in range(len(Y))]
 7 | plt.plot(X, Y)
 8 | plt.plot(X,Y_3d)
 9 | plt.title("mean angle error of output")
10 | plt.xlabel("epoch")
11 | plt.ylabel("angle err (degree)")
12 | plt.legend(['use of 2D vector', 'use of 3D vector'])
13 | plt.show()


--------------------------------------------------------------------------------
/single_eye_normalized/gpu/LeNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def initialize_weights(module):
 7 |     if isinstance(module, nn.Conv2d):
 8 |         nn.init.constant_(module.bias, 0)
 9 |     elif isinstance(module, nn.Linear):
10 |         nn.init.xavier_uniform_(module.weight)
11 |         nn.init.constant_(module.bias, 0)
12 | 
13 | class Model(nn.Module):
14 |     def __init__(self):
15 |         super(Model, self).__init__()
16 | 
17 |         self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)
18 |         self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)
19 |         self.fc1 = nn.Linear(3600, 500)
20 |         self.fc2 = nn.Linear(502, 2)
21 | 
22 |         self._initialize_weight()
23 | 
24 |     def _initialize_weight(self):
25 |         nn.init.normal_(self.conv1.weight, mean=0, std=0.1)
26 |         nn.init.normal_(self.conv2.weight, mean=0, std=0.01)
27 |         self.apply(initialize_weights)
28 | 
29 |     def forward(self, x, y):
30 |         x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
31 |         x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
32 |         x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten
33 |         x = torch.cat([x, y], dim=1)
34 |         x = self.fc2(x)
35 |         return x


--------------------------------------------------------------------------------
/single_eye_normalized/train_cpu/LeNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def initialize_weights(module):
 7 |     if isinstance(module, nn.Conv2d):
 8 |         nn.init.constant_(module.bias, 0)
 9 |     elif isinstance(module, nn.Linear):
10 |         nn.init.xavier_uniform_(module.weight)
11 |         nn.init.constant_(module.bias, 0)
12 | 
13 | class Model(nn.Module):
14 |     def __init__(self):
15 |         super(Model, self).__init__()
16 | 
17 |         self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)
18 |         self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)
19 |         self.fc1 = nn.Linear(3600, 500)
20 |         self.fc2 = nn.Linear(502, 2)
21 | 
22 |         self._initialize_weight()
23 | 
24 |     def _initialize_weight(self):
25 |         nn.init.normal_(self.conv1.weight, mean=0, std=0.1)
26 |         nn.init.normal_(self.conv2.weight, mean=0, std=0.01)
27 |         self.apply(initialize_weights)
28 | 
29 |     def forward(self, x, y):
30 |         x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
31 |         x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
32 |         x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten
33 |         x = torch.cat([x, y], dim=1)
34 |         x = self.fc2(x)
35 |         return x


--------------------------------------------------------------------------------
/single_eye_normalized/train_cpu/LeNet2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def initialize_weights(module):
 7 |     if isinstance(module, nn.Conv2d):
 8 |         nn.init.constant_(module.bias, 0)
 9 |     elif isinstance(module, nn.Linear):
10 |         nn.init.xavier_uniform_(module.weight)
11 |         nn.init.constant_(module.bias, 0)
12 | 
13 | class Model(nn.Module):
14 |     def __init__(self):
15 |         super(Model, self).__init__()
16 | 
17 |         self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)
18 |         self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)
19 |         self.fc1 = nn.Linear(3600, 500)
20 |         self.fc2 = nn.Linear(503, 3)
21 | 
22 |         self._initialize_weight()
23 | 
24 |     def _initialize_weight(self):
25 |         nn.init.normal_(self.conv1.weight, mean=0, std=0.1)
26 |         nn.init.normal_(self.conv2.weight, mean=0, std=0.01)
27 |         self.apply(initialize_weights)
28 | 
29 |     def forward(self, x, y):
30 |         x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
31 |         x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
32 |         x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten
33 |         x = torch.cat([x, y], dim=1)
34 |         x = self.fc2(x)
35 |         return x


--------------------------------------------------------------------------------
/single_eye_normalized/validation/LeNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def initialize_weights(module):
 7 |     if isinstance(module, nn.Conv2d):
 8 |         nn.init.constant_(module.bias, 0)
 9 |     elif isinstance(module, nn.Linear):
10 |         nn.init.xavier_uniform_(module.weight)
11 |         nn.init.constant_(module.bias, 0)
12 | 
13 | class Model(nn.Module):
14 |     def __init__(self):
15 |         super(Model, self).__init__()
16 | 
17 |         self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)
18 |         self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)
19 |         self.fc1 = nn.Linear(3600, 500)
20 |         self.fc2 = nn.Linear(502, 2)
21 | 
22 |         self._initialize_weight()
23 | 
24 |     def _initialize_weight(self):
25 |         nn.init.normal_(self.conv1.weight, mean=0, std=0.1)
26 |         nn.init.normal_(self.conv2.weight, mean=0, std=0.01)
27 |         self.apply(initialize_weights)
28 | 
29 |     def forward(self, x, y):
30 |         x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
31 |         x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
32 |         x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten
33 |         x = torch.cat([x, y], dim=1)
34 |         x = self.fc2(x)
35 |         return x


--------------------------------------------------------------------------------
/note/Apperance-based+gaze+estimation+in+the+wild_LR.md:
--------------------------------------------------------------------------------
 1 | ### Appearance-Based Gaze Estimation in the Wild
 2 | 
 3 | #### 0x00 The MPIIGaze Dataset
 4 | 
 5 | - 15 participants, 213,659 pictures
 6 | 
 7 | - outside of laboratory conditions, i.e during daliy routine
 8 | - wilder range of recording location, time, illumination and eye appearance
 9 | 
10 | how to collect: use of laptop application to let volunteers to look at a fixed place, and take pictures of their eyes.
11 | 
12 | use of laptop to collect: laptops are suited for long-term daily recordings but also because they are an important platform for *eye tracking application*.
13 | 
14 | #### 0x01 Calibration settings
15 | 
16 | I think this is used in 3d head pose estimation and face aligment process. I don't use that.
17 | 
18 | #### 0x02 Method
19 | 
20 | The CNN is to learn the mapping from *head poses and eye images* to *gaze directions* in the camera coordinate system.
21 | 
22 | i) Face alignment and 3d head pose estimation
23 | 
24 |  - detect face
25 |  - generate 6D landmarks
26 | 
27 | ii) Data normalisation
28 | 
29 | > first proposed in *Learning-by-Synthesis for Appearance-based 3D Gaze Estimation*
30 | 
31 | iii) Multimodal CNNs
32 | 
33 | - Process the input 2D head angle `h` and the normalized eye image 𝑒 to get the final 2D line of sight angle vector `g`
34 | 
35 | - use LeNet
36 | 
37 | - add `h` in the full connect layer.


--------------------------------------------------------------------------------
/single_eye_normalized/gpu/LeNet1.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This is a LeNet vesion to ingore pose information
 3 | in the CNN. the corresponding training code is in
 4 | train1.py. Same utils.py is used here.
 5 | '''
 6 | import torch
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | def initialize_weights(module):
12 |     if isinstance(module, nn.Conv2d):
13 |         nn.init.constant_(module.bias, 0)
14 |     elif isinstance(module, nn.Linear):
15 |         nn.init.xavier_uniform_(module.weight)
16 |         nn.init.constant_(module.bias, 0)
17 | 
18 | class Model(nn.Module):
19 |     def __init__(self):
20 |         super(Model, self).__init__()
21 | 
22 |         self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)
23 |         self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)
24 |         self.fc1 = nn.Linear(3600, 500)
25 |         self.fc2 = nn.Linear(500, 2)
26 | 
27 |         self._initialize_weight()
28 | 
29 |     def _initialize_weight(self):
30 |         nn.init.normal_(self.conv1.weight, mean=0, std=0.1)
31 |         nn.init.normal_(self.conv2.weight, mean=0, std=0.01)
32 |         self.apply(initialize_weights)
33 | 
34 |     def forward(self, x):
35 |         x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
36 |         x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
37 |         x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten
38 |         # x = torch.cat([x, y], dim=1)
39 |         x = self.fc2(x)
40 |         return x


--------------------------------------------------------------------------------
/single_eye_normalized/train_cpu/train.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Why is it neccessary to covert 3D vector into 2D?
 3 | What if we train using 3D poses and gazes?
 4 | '''
 5 | from utils import *
 6 | from LeNet2 import *
 7 | import torch
 8 | 
 9 | def batch_process(j, batch, img, pose, gaze):
10 |     '''
11 |     :return: a-img, b-pose, c-gaze
12 |     '''
13 |     a = torch.randn(batch, 1, 36, 60)
14 |     b = torch.randn(batch,3)
15 |     c = torch.randn(batch,3)
16 |     for i in range(batch):
17 |         a[i, 0] = torch.tensor(img[j * batch + i])
18 |         b[i] = torch.tensor(pose[j * batch + i])
19 |         c[i] = torch.tensor(gaze[j * batch + i])
20 |     return a, b, c
21 | 
22 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose = collect_data_from_mat()
23 | 
24 | ltrain = len(train_gaze)
25 | ltest = len(test_gaze)
26 | print("training dataset size:", len(train_gaze))
27 | print("test dataset size:", len(test_gaze))
28 | 
29 | 
30 | ### training process ###
31 | cuda_gpu = torch.cuda.is_available()
32 | GazeCNN = Model()
33 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001)
34 | criterion = torch.nn.SmoothL1Loss(reduction="mean")
35 | batch = 512
36 | train_range = int(ltrain / batch)
37 | test_range = int(ltest / batch)
38 | 
39 | 
40 | for epoch in range(1):
41 |     for i in tqdm(range(1)):
42 |         img, pose, gaze = batch_process(i, batch, train_image, train_pose, train_gaze)
43 |         np.array(train_gaze)
44 |         gaze_pred = GazeCNN(img, pose)
45 |         loss = criterion(gaze_pred, gaze)
46 |         loss.backward()
47 |         optimizer.step()
48 | 
49 |     angle_loss=0
50 |     for j in tqdm(range(1)):
51 |         timg, tpose, tgaze = batch_process(j, batch, train_image, train_pose, train_gaze)
52 |         tgaze_pred = GazeCNN(timg, tpose)
53 |         print(mean_angle_loss(tgaze_pred, tgaze))
54 | 
55 |     print("epoch", epoch, "average loss on test dataset:", angle_loss / test_range)
56 | 
57 | 


--------------------------------------------------------------------------------
/two_eye/Dataloader.py:
--------------------------------------------------------------------------------
 1 | from scipy.io import loadmat
 2 | import glob
 3 | from tqdm import tqdm
 4 | import numpy as np
 5 | 
 6 | 
 7 | path = "/Users/liqilin/PycharmProjects/untitled/EyeGaze/single_eye_normalized"
 8 | 
 9 | def read_eye_data(mat, label):
10 |     '''
11 |     read data from each .mat
12 |     :param mat: file name
13 |     :param label: right/ left
14 |     :return: gaze, image, pose
15 |     '''
16 |     mat_data = loadmat(mat)
17 |     right_info = mat_data['data'][label][0, 0]
18 |     gaze = right_info['gaze'][0, 0]
19 |     image = right_info['image'][0, 0]
20 |     pose = right_info['pose'][0, 0]
21 |     return gaze, image, pose
22 | 
23 | def collect_data_from_mat(label):
24 |     '''
25 |     collect data from annotation part
26 |     :return:  list of index, image, pose, gaze
27 |     '''
28 |     mat_files = glob.glob(path+'/Normalized/**/*.mat', recursive = True)
29 |     mat_files.sort()
30 |     i = 0
31 |     train_gaze, train_image, train_pose, test_gaze, test_image, test_pose=[],[],[],[],[],[]
32 |     for matfile in tqdm(mat_files[:2]):
33 |         pnum = matfile.split('/')[-2]  # pxx
34 |         fgaze, fimage, fpose = read_eye_data(matfile, label)
35 |         if int(pnum[1:]) < 7:
36 |             if train_gaze == []:
37 |                 train_gaze = fgaze
38 |                 train_image = fimage
39 |                 train_pose = fpose
40 |             else:
41 |                 train_gaze = np.append(train_gaze, fgaze, axis = 0)
42 |                 train_image = np.append(train_image, fimage, axis = 0)
43 |                 train_pose = np.append(train_pose, fpose, axis = 0)
44 |         else:
45 |             if test_gaze == []:
46 |                 test_gaze = fgaze
47 |                 test_image = fimage
48 |                 test_pose = fpose
49 |             else:
50 |                 test_gaze = np.append(test_gaze, fgaze, axis = 0)
51 |                 test_image = np.append(test_image, fimage, axis = 0)
52 |                 test_pose = np.append(test_pose, fpose, axis = 0)
53 |         i += 1
54 |     return train_gaze, train_image, train_pose, test_gaze, test_image, test_pose


--------------------------------------------------------------------------------
/two_eye/README.md:
--------------------------------------------------------------------------------
 1 | ### Literature Review
 2 | 
 3 | #### 0x0. Get Start
 4 | 
 5 | for two eye gaze problem, my read this article: *Appearance-Based Gaze Estimation via Evaluation-Guided Asymmetric Regression*
 6 | 
 7 | #### 0x1. Knowledge
 8 | 
 9 | 1. the gaze directions of two eyes should be consistent physically
10 | 2. even if we apply the same regression method, the gaze estimation performance on two eyes can be very different
11 | 
12 | Hence we need a new strategy that no longer treat both eyes equally.
13 | 
14 | Strategy: **guide the asymmetric gaze regression by evaluating the performance of the regression strategy w.r.t.different eyes.**
15 | 
16 | 
17 | 
18 | #### 0x2. Main Work
19 | 
20 | 1. Propose a *multi-stream* AR-Net and E-Net.
21 | 2. Propose new mechanism of evaluation-guided asymmetric regression.
22 | 3. Design ARE-Net
23 | 
24 | 
25 | 
26 | #### 0x3. Two eye asymmetry
27 | 
28 | Previous work: treat two eyes indiffrently.
29 | 
30 | Observation: we cannot expect the same accuracy for two eyes, either eye has a chance to be more accurate.
31 | 
32 | Why asymmetry: head pose, image quality and individuality.
33 | 
34 | How to solve: propose a network which can tell which eye is of high quality.
35 | 
36 | 
37 | 
38 | #### 0x4. ARE-Net
39 | 
40 | - AR-Net:
41 |     - it is designed to be able to optimize the two eyes in an asymmetric way
42 |     - structure:
43 |         - the first two streams to extract a 500D deep features from each eye independently, and the last two streams to produce a joint 500D feature in the end
44 |         - input the head pose vector (3D for each eye) before the final regression
45 |         - Base-CNN: similar to AlexNet
46 |     - loss function: weighted angular error
47 |         - The weights λl and λr determine whether the accuracy of the left or the right eye should be considered more important
48 | 
49 | - E-Net:
50 |     - the evaluation network is trained to predict the probability of the left/right eye image being more efficient in gaze estimation.
51 |     
52 | 
53 | 
54 | 
55 | 
56 | #### 0x5. My plan
57 | 
58 | - First try to implement AR-Net, and see the mean err
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/two_eye/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2 as cv
 3 | import torch
 4 | 
 5 | def pose3D_to_2D(pose):
 6 |     '''
 7 |       pose (a, b, c) is rotation (angle)
 8 |       M = Rodrigues((x,y,z))
 9 |       Zv = (the third column of M)
10 |       theta = asin(Zv[1])
11 |       phi = atan2(Zv[0], Zv[2])
12 |     '''
13 |     M, _ = cv.Rodrigues(np.array(pose).astype(np.float32))
14 |     vec = M[:, 2]
15 |     yaw = np.arctan2(vec[0], vec[2])
16 |     pitch = np.arcsin(vec[1])
17 |     return np.array([pitch, yaw])
18 | 
19 | 
20 | def gaze3D_to_2D(gaze):
21 |     '''
22 |       gaze (x, y, z) is direction
23 |       theta = asin(-y)
24 |       phi = atan2(-x, -z)
25 |     '''
26 |     x, y, z = (gaze[i] for i in range(3))
27 |     pitch = np.arcsin(-y)
28 |     yaw = np.arctan2(-x, -z)
29 |     return np.stack((pitch, yaw)).T
30 | 
31 | 
32 | def gaze2D_to_3D(gaze):
33 |     '''
34 |     :param gaze: gaze (yaw, pitch) is the rotation angle, type=(list)
35 |     :return: gaze=(x,y,z)
36 |     '''
37 |     pitch = gaze[0]
38 |     yaw = gaze[1]
39 |     x = -np.cos(pitch) * np.sin(yaw)
40 |     y = -np.sin(pitch)
41 |     z = -np.cos(pitch) * np.cos(yaw)
42 |     norm = np.sqrt(x**2 + y**2 + z**2)
43 |     x /= norm
44 |     y /= norm
45 |     z /= norm # all normalized
46 |     return x, y, z
47 | 
48 | 
49 | def angle_error(pred, truth1, truth2):
50 |     '''
51 |     :param pred:
52 |     :param truth1:
53 |     :param truth2:
54 |     :return:
55 |     '''
56 |     pred1 = pred[:,:2] # left
57 |     pred2 = pred[:2,:] # right
58 |     ans1 = mean_angle_loss(pred2, truth2)
59 |     ans2 = mean_angle_loss(pred1, truth1)
60 |     if ans2 > ans1:
61 |         return ans1
62 |     else:
63 |         return ans2
64 | 
65 | 
66 | def mean_angle_loss(pred, truth):
67 |     '''
68 |     :param pred,truth: type=torch.Tensor
69 |     :return:
70 |     '''
71 |     pred = pred.detach().numpy()
72 |     ans = 0
73 |     for i in range(len(pred)):
74 |         p_x, p_y, p_z = gaze2D_to_3D(pred[i])
75 |         t_x, t_y, t_z = gaze2D_to_3D(truth[i])
76 |         angles = p_x * t_x + p_y * t_y + p_z * t_z
77 |         ans += torch.acos(angles) * 180 / np.pi
78 |     return ans / len(pred)
79 | 
80 | 


--------------------------------------------------------------------------------
/single_eye_normalized/gpu/train1.py:
--------------------------------------------------------------------------------
 1 | from utils import *
 2 | from LeNet1 import *
 3 | import torch
 4 | 
 5 | def get_2D_vector(pose, gaze):
 6 |     pose2d = []
 7 |     gaze2d = []
 8 |     for i in np.arange(0, len(pose), 1):
 9 |         pose2d.append(pose3D_to_2D(pose[i]))
10 |         gaze2d.append(gaze3D_to_2D(gaze[i]))
11 |     poses = np.array(pose2d)
12 |     gazes = np.array(gaze2d)
13 |     return poses, gazes
14 | 
15 | def batch_process(j, batch, img, pose, gaze):
16 |     '''
17 |     :return: a-img, b-pose, c-gaze
18 |     '''
19 |     a = torch.randn(batch, 1, 36, 60)
20 |     b = torch.randn(batch,2)
21 |     c = torch.randn(batch,2)
22 |     for i in range(batch):
23 |         a[i, 0] = torch.tensor(img[j * batch + i])
24 |         b[i] = torch.tensor(pose[j * batch + i])
25 |         c[i] = torch.tensor(gaze[j * batch + i])
26 |     return a, b, c
27 | 
28 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose = collect_data_from_mat()
29 | 
30 | train_pose2D, train_gaze2D = get_2D_vector(train_pose, train_gaze)
31 | test_pose2D, test_gaze2D = get_2D_vector(test_pose, test_gaze)
32 | 
33 | ltrain = len(train_gaze)
34 | ltest = len(test_gaze)
35 | print("training dataset size:", len(train_gaze))
36 | print("test dataset size:", len(test_gaze))
37 | 
38 | 
39 | ### training process ###
40 | cuda_gpu = torch.cuda.is_available()
41 | print("cuda is", cuda_gpu)
42 | 
43 | GazeCNN = Model()
44 | criterion = torch.nn.MSELoss(reduction="mean")
45 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.01)
46 | 
47 | batch = 512
48 | train_range = int(ltrain / batch)
49 | test_range = int(ltest / batch)
50 | 
51 | loss_list = []
52 | 
53 | for epoch in range(50):
54 |     for i in tqdm(range(train_range)):
55 |         img, pose, gaze = batch_process(i, batch, train_image, train_pose2D, train_gaze2D)
56 |         gaze_pred_2D = GazeCNN(img)
57 |         loss = criterion(gaze_pred_2D, gaze)
58 |         loss.backward()
59 |         optimizer.step()
60 | 
61 |     angle_loss=0
62 |     for j in tqdm(range(test_range)):
63 |         timg, tpose, tgaze = batch_process(j, batch, train_image, train_pose2D, train_gaze2D)
64 |         tgaze_pred_2D = GazeCNN(timg)
65 | 
66 | 
67 |         angle_loss += mean_angle_loss(tgaze_pred_2D, tgaze)
68 | 
69 |     print("epoch", epoch, "average loss on test dataset:", angle_loss / test_range)
70 |     loss_list.append(angle_loss/test_range)
71 | 
72 | print(loss_list)
73 | 


--------------------------------------------------------------------------------
/single_eye_normalized/gpu/train.py:
--------------------------------------------------------------------------------
 1 | from utils import *
 2 | from LeNet import *
 3 | import torch
 4 | 
 5 | def get_2D_vector(pose, gaze):
 6 |     pose2d = []
 7 |     gaze2d = []
 8 |     for i in np.arange(0, len(pose), 1):
 9 |         pose2d.append(pose3D_to_2D(pose[i]))
10 |         gaze2d.append(gaze3D_to_2D(gaze[i]))
11 |     poses = np.array(pose2d)
12 |     gazes = np.array(gaze2d)
13 |     return poses, gazes
14 | 
15 | def batch_process(j, batch, img, pose, gaze):
16 |     '''
17 |     :return: a-img, b-pose, c-gaze
18 |     '''
19 |     a = torch.randn(batch, 1, 36, 60)
20 |     b = torch.randn(batch,2)
21 |     c = torch.randn(batch,2)
22 |     for i in range(batch):
23 |         a[i, 0] = torch.tensor(img[j * batch + i])
24 |         b[i] = torch.tensor(pose[j * batch + i])
25 |         c[i] = torch.tensor(gaze[j * batch + i])
26 |     return a, b, c
27 | 
28 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose = collect_data_from_mat()
29 | 
30 | train_pose2D, train_gaze2D = get_2D_vector(train_pose, train_gaze)
31 | test_pose2D, test_gaze2D = get_2D_vector(test_pose, test_gaze)
32 | 
33 | ltrain = len(train_gaze)
34 | ltest = len(test_gaze)
35 | print("training dataset size:", len(train_gaze))
36 | print("test dataset size:", len(test_gaze))
37 | 
38 | 
39 | ### training process ###
40 | cuda_gpu = torch.cuda.is_available()
41 | GazeCNN = Model()
42 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001)
43 | criterion = torch.nn.SmoothL1Loss(reduction="mean")
44 | batch = 512
45 | train_range = int(ltrain / batch)
46 | test_range = int(ltest / batch)
47 | 
48 | 
49 | for epoch in range(30):
50 |     for i in tqdm(range(train_range)):
51 |         img, pose, gaze = batch_process(i, batch, train_image, train_pose2D, train_gaze2D)
52 |         if cuda_gpu:
53 |             GazeCNN = GazeCNN.cuda()
54 |             criterion = criterion.cuda()
55 |             img = img.cuda()
56 |             pose = pose.cuda()
57 |             gaze = gaze.cuda()
58 | 
59 |         gaze_pred_2D = GazeCNN(img, pose)
60 |         loss = criterion(gaze_pred_2D, gaze)
61 |         loss.backward()
62 |         optimizer.step()
63 | 
64 |     valid_loss=0
65 |     for j in tqdm(range(test_range - 1)):
66 |         vimg, vpose, vgaze = batch_process(j, batch, test_image, test_pose2D, test_gaze2D)
67 |         if cuda_gpu:
68 |             GazeCNN = GazeCNN.cpu()
69 |         vgaze_pred_2D = GazeCNN(vimg, vpose)
70 |         valid_loss += mean_angle_loss(vgaze_pred_2D, vgaze)
71 | 
72 |     print(valid_loss / (test_range-1))
73 | 
74 | 


--------------------------------------------------------------------------------
/two_eye/ARNet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | ############# definition of angle #################
 6 | 
 7 | def AngularErr(input,target):
 8 |     input = F.normalize(input)
 9 |     target = F.normalize(target)
10 |     cosineLoss = nn.CosineSimilarity()
11 |     return cosineLoss(input, target)
12 | 
13 | class Criterion(nn.Module):
14 |     def __init__(self):
15 |         super(Criterion, self).__init__()
16 | 
17 |     def forward(self, pred_vec, left_gt, right_gt):
18 |         left_pd = pred_vec[:,:2]
19 |         right_pd = pred_vec[:,2:]
20 |         vall = AngularErr(left_gt, left_pd)
21 |         vall = torch.acos(vall)
22 |         dvall = torch.div(1, vall)
23 |         valr = AngularErr(right_gt, right_pd)
24 |         valr = torch.acos(valr)
25 |         dvalr = torch.div(1, valr)
26 |         sum = torch.add(dvalr, dvall)
27 |         weightl = torch.div(dvall, sum)
28 |         weightr = torch.div(dvalr, sum)
29 |         return torch.add(torch.mul(weightl, vall), torch.mul(weightr, valr))
30 | 
31 | ########### Base-CNN set up ############
32 | 
33 | def initialize_weights(module):
34 |     if isinstance(module, nn.Conv2d):
35 |         nn.init.constant_(module.bias, 0)
36 |     elif isinstance(module, nn.Linear):
37 |         nn.init.xavier_uniform_(module.weight)
38 |         nn.init.constant_(module.bias, 0)
39 | 
40 | class BaseCNN(nn.Module):
41 |     def __init__(self):
42 |         super(BaseCNN, self).__init__()
43 | 
44 |         self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)
45 |         self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)
46 |         self.fc1 = nn.Linear(3600, 500)
47 |         # self.fc2 = nn.Linear(503, 3)
48 | 
49 |         self._initialize_weight()
50 | 
51 |     def _initialize_weight(self):
52 |         nn.init.normal_(self.conv1.weight, mean=0, std=0.1)
53 |         nn.init.normal_(self.conv2.weight, mean=0, std=0.01)
54 |         self.apply(initialize_weights)
55 | 
56 |     def forward(self, x):
57 |         x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
58 |         x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
59 |         x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten
60 |         # x = torch.cat([x, y], dim=1)
61 |         # x = self.fc2(x)
62 |         return x
63 | 
64 | ############### AR-NET set up ################
65 | 
66 | class ARNet(nn.Module):
67 |     def __init__(self):
68 |         super(ARNet, self).__init__()
69 | 
70 |         self.bCNN1 = BaseCNN()
71 |         self.bCNN2 = BaseCNN()
72 |         self.fc1 = nn.Linear(1004, 4)
73 | 
74 |     def forward(self, x1, x2, y1, y2):
75 |         ### x1: left image, x2: right image, y: head pose
76 |         x1 = self.bCNN1(x1)
77 |         x2 = self.bCNN2(x2)
78 |         x = torch.cat([x1, x2], dim=1)
79 |         x = torch.cat([x, y1], dim=1)
80 |         x = torch.cat([x, y2], dim=1)
81 |         x = self.fc1(x)
82 | 
83 |         return x


--------------------------------------------------------------------------------
/single_eye_normalized/validation/train2.py:
--------------------------------------------------------------------------------
 1 | from utils import *
 2 | from LeNet import *
 3 | import torch
 4 | 
 5 | def get_2D_vector(pose, gaze):
 6 |     pose2d = []
 7 |     gaze2d = []
 8 |     for i in np.arange(0, len(pose), 1):
 9 |         pose2d.append(pose3D_to_2D(pose[i]))
10 |         gaze2d.append(gaze3D_to_2D(gaze[i]))
11 |     poses = np.array(pose2d)
12 |     gazes = np.array(gaze2d)
13 |     return poses, gazes
14 | 
15 | def batch_process(j, batch, img, pose, gaze):
16 |     '''
17 |     :return: a-img, b-pose, c-gaze
18 |     '''
19 |     a = torch.randn(batch, 1, 36, 60)
20 |     b = torch.randn(batch, 2)
21 |     c = torch.randn(batch, 2)
22 |     for i in range(batch):
23 |         a[i, 0] = torch.tensor(img[j * batch + i])
24 |         b[i] = torch.tensor(pose[j * batch + i])
25 |         c[i] = torch.tensor(gaze[j * batch + i])
26 |     return a, b, c
27 | 
28 | 
29 | if __name__ == "__main__":
30 | 
31 |     raw_gaze, raw_image, raw_pose, raw_index = collect_data_from_mat()
32 |     tk = 10
33 |     is_gpu = torch.cuda.is_available()
34 |     print("Use of gpu", is_gpu)
35 |     for i in range(tk):
36 |         t_gaze, t_pose, t_image, v_gaze, v_pose, v_image = get_kfold_data(tk, i, raw_gaze, raw_image, raw_pose)
37 | 
38 |         t_pose_2D, t_gaze_2D = get_2D_vector(t_pose, t_gaze)
39 |         v_pose_2D, v_gaze_2D = get_2D_vector(v_pose, v_gaze)
40 | 
41 |         ltrain = len(t_gaze)
42 |         lvaild = len(v_gaze)
43 |         print("training dataset size:", len(t_gaze))
44 |         print("test dataset size:", len(v_gaze))
45 | 
46 | 
47 |     ##### CNN definition #####
48 |         GazeCNN = Model()
49 |         optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001)
50 |         criterion = torch.nn.SmoothL1Loss(reduction="mean")
51 | 
52 |         if is_gpu:
53 |             GazeCNN = GazeCNN.cuda()
54 |             criterion = criterion.cuda()
55 | 
56 |         batch = 512 #
57 |         train_range = int(ltrain / batch)
58 |         test_range = int(lvaild / batch)
59 | 
60 |         for epoch in tqdm(range(35)):#
61 |             for i in range(train_range):
62 |                 img, pose, gaze = batch_process(i, batch, t_image, t_pose_2D, t_gaze_2D)
63 |                 if is_gpu:
64 |                     img = img.cuda()
65 |                     pose = pose.cuda()
66 |                     gaze = gaze.cuda()
67 |                 gaze_pred_2D = GazeCNN(img, pose)
68 | 
69 |                 loss = criterion(gaze_pred_2D, gaze)
70 |                 loss.backward()
71 |                 optimizer.step()
72 | 
73 |         ## train result
74 |         train_loss = 0
75 |         for k in tqdm(range(train_range-1)):
76 |             img, pose, gaze = batch_process(k, batch, t_image, t_pose_2D, t_gaze_2D)
77 |             GazeCNN = GazeCNN.cpu()
78 |             gaze_pred_2D = GazeCNN(img, pose)
79 |             train_loss += mean_angle_loss(gaze_pred_2D, gaze)
80 | 
81 |         ## validation result
82 |         valid_loss = 0
83 |         for j in tqdm(range(test_range-1)):
84 |             img, pose, gaze = batch_process(j, batch, v_image, v_pose_2D, v_gaze_2D)
85 |             GazeCNN = GazeCNN.cpu()
86 |             gaze_pred_2D = GazeCNN(img, pose)
87 |             valid_loss += mean_angle_loss(gaze_pred_2D, gaze)
88 | 
89 |         print("train_loss, valid_loss = [{},{}]".format(train_loss/(train_range-1), valid_loss/(test_range-1)))
90 | 


--------------------------------------------------------------------------------
/two_eye/main.py:
--------------------------------------------------------------------------------
 1 | from Dataloader import *
 2 | from ARNet import *
 3 | from utils import *
 4 | import torch
 5 | 
 6 | def get_2D_vector(pose, gaze):
 7 |     pose2d = []
 8 |     gaze2d = []
 9 |     for i in np.arange(0, len(pose), 1):
10 |         pose2d.append(pose3D_to_2D(pose[i]))
11 |         gaze2d.append(gaze3D_to_2D(gaze[i]))
12 |     poses = np.array(pose2d)
13 |     gazes = np.array(gaze2d)
14 |     return poses, gazes
15 | 
16 | def batch_process(j, batch, img, pose, gaze):
17 |     '''
18 |     :return: a-img, b-pose, c-gaze
19 |     '''
20 |     a = torch.randn(batch, 1, 36, 60)
21 |     b = torch.randn(batch,2)
22 |     c = torch.randn(batch,2)
23 |     for i in range(batch):
24 |         a[i, 0] = torch.tensor(img[j * batch + i])
25 |         b[i] = torch.tensor(pose[j * batch + i])
26 |         c[i] = torch.tensor(gaze[j * batch + i])
27 |     return a, b, c
28 | 
29 | 
30 | train_gazel, train_imagel, train_posel, test_gazel, test_imagel, test_posel = collect_data_from_mat("left")
31 | train_gazer, train_imager, train_poser, test_gazer, test_imager, test_poser = collect_data_from_mat("right")
32 | 
33 | ###### transfer to 2D vectors ######
34 | 
35 | train_pose2Dl, train_gaze2Dl = get_2D_vector(train_posel, train_gazel)
36 | test_pose2Dl, test_gaze2Dl = get_2D_vector(test_posel, test_gazel)
37 | train_pose2Dr, train_gaze2Dr = get_2D_vector(train_poser, train_gazer)
38 | test_pose2Dr, test_gaze2Dr = get_2D_vector(test_poser, test_gazer)
39 | 
40 | print("training dataset size:", len(train_gazel))
41 | print("test dataset size:", len(test_gazel))
42 | 
43 | print("training dataset size:", len(train_gazer))
44 | print("test dataset size:", len(test_gazer))
45 | 
46 | cuda_gpu = torch.cuda.is_available()
47 | 
48 | GazeNet = ARNet()
49 | optimizer = torch.optim.Adam(GazeNet.parameters(), lr=0.0001)
50 | # criterion = Criterion()
51 | criterion = torch.nn.SmoothL1Loss(reduction="mean")
52 | batch = 10
53 | train_range = int(len(train_gaze2Dl) / batch)
54 | test_range = int(len(test_gaze2Dl) / batch)
55 | 
56 | for epoch in range(1):
57 |     for i in tqdm(range(2)):
58 |         imgl, posel, gazel = batch_process(i, batch, train_imagel, train_pose2Dl, train_gaze2Dl)
59 |         imgr, poser, gazer = batch_process(i, batch, train_imagel, train_pose2Dl, train_gaze2Dl)
60 |         if cuda_gpu:
61 |             GazeNet = GazeNet.cuda()
62 |             criterion = criterion.cuda()
63 |             imgl = imgl.cuda()
64 |             posel = posel.cuda()
65 |             gazel = gazel.cuda()
66 |             imgr = imgr.cuda()
67 |             poser = poser.cuda()
68 |             gazer = gazer.cuda()
69 |         gaze_pred_2D = GazeNet(imgl, imgr, posel, poser)
70 |         gaze_trut_2D = torch.cat([gazel, gazer], dim=1)
71 |         loss = criterion(gaze_pred_2D, gaze_trut_2D)
72 | 
73 |         loss.backward(loss.clone().detach())
74 |         optimizer.step()
75 | 
76 |     valid_loss=0
77 |     for j in tqdm(range(test_range - 1)):
78 |         vimgl, vposel, vgazel = batch_process(j, batch, test_imagel, test_pose2Dl, test_gaze2Dl)
79 |         vimgr, vposer, vgazer = batch_process(j, batch, test_imager, test_pose2Dr, test_gaze2Dr)
80 |         if cuda_gpu:
81 |             GazeCNN = GazeNet.cpu()
82 |         vgaze_pred_2D = GazeNet(vimgl, vimgr, vposel, vposer)
83 |         valid_loss += angle_error(vgaze_pred_2D, vgazel, vgazer)
84 | 
85 |     print(valid_loss/(test_range-1))
86 | 


--------------------------------------------------------------------------------
/single_eye_normalized/validation/train.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | This is to examine a model by K-fold validation
 3 | calculate and get the mean error for the model
 4 | '''
 5 | 
 6 | from utils import *
 7 | from LeNet import *
 8 | import torch
 9 | 
10 | def get_2D_vector(pose, gaze):
11 |     pose2d = []
12 |     gaze2d = []
13 |     for i in np.arange(0, len(pose), 1):
14 |         pose2d.append(pose3D_to_2D(pose[i]))
15 |         gaze2d.append(gaze3D_to_2D(gaze[i]))
16 |     poses = np.array(pose2d)
17 |     gazes = np.array(gaze2d)
18 |     return poses, gazes
19 | 
20 | def batch_process(j, batch, img, pose, gaze):
21 |     '''
22 |     :return: a-img, b-pose, c-gaze
23 |     '''
24 |     a = torch.randn(batch, 1, 36, 60)
25 |     b = torch.randn(batch, 2)
26 |     c = torch.randn(batch, 2)
27 |     for i in range(batch):
28 |         a[i, 0] = torch.tensor(img[j * batch + i])
29 |         b[i] = torch.tensor(pose[j * batch + i])
30 |         c[i] = torch.tensor(gaze[j * batch + i])
31 |     return a, b, c
32 | 
33 | 
34 | if __name__ == "__main__":
35 | 
36 |     raw_gaze, raw_image, raw_pose, raw_index = collect_data_from_mat()
37 |     tk = 5
38 |     is_gpu = torch.cuda.is_available()
39 |     print("Use of gpu", is_gpu)
40 |     for i in range(tk):
41 |         t_gaze, t_pose, t_image, v_gaze, v_pose, v_image = get_kfold_data(tk, i, raw_gaze, raw_image, raw_pose)
42 | 
43 |         t_pose_2D, t_gaze_2D = get_2D_vector(t_pose, t_gaze)
44 |         v_pose_2D, v_gaze_2D = get_2D_vector(v_pose, v_gaze)
45 | 
46 |         ltrain = len(t_gaze)
47 |         lvaild = len(v_gaze)
48 |         print("training dataset size:", len(t_gaze))
49 |         print("test dataset size:", len(v_gaze))
50 | 
51 | 
52 |     ##### CNN definition #####
53 |         GazeCNN = Model()
54 |         optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001)
55 |         criterion = torch.nn.SmoothL1Loss(reduction="mean")
56 | 
57 |         if is_gpu:
58 |             GazeCNN = GazeCNN.cuda()
59 |             criterion = criterion.cuda()
60 | 
61 |         batch = 512 #
62 |         train_range = int(ltrain / batch)
63 |         test_range = int(lvaild / batch)
64 | 
65 |         for epoch in tqdm(range(40)):#
66 |             for i in range(train_range):
67 |                 img, pose, gaze = batch_process(i, batch, t_image, t_pose_2D, t_gaze_2D)
68 |                 if is_gpu:
69 |                     img = img.cuda()
70 |                     pose = pose.cuda()
71 |                     gaze = gaze.cuda()
72 |                 gaze_pred_2D = GazeCNN(img, pose)
73 | 
74 |                 loss = criterion(gaze_pred_2D, gaze)
75 |                 loss.backward()
76 |                 optimizer.step()
77 | 
78 |         ## train result
79 |         train_loss = 0
80 |         for k in tqdm(range(train_range)):
81 |             img, pose, gaze = batch_process(k, batch, t_image, t_pose_2D, t_gaze_2D)
82 |             GazeCNN = GazeCNN.cpu()
83 |             gaze_pred_2D = GazeCNN(img, pose)
84 |             train_loss += mean_angle_loss(gaze_pred_2D, gaze)
85 | 
86 |         ## validation result
87 |         valid_loss = 0
88 |         for j in tqdm(range(test_range)):
89 |             img, pose, gaze = batch_process(j, batch, v_image, v_pose_2D, v_gaze_2D)
90 |             GazeCNN = GazeCNN.cpu()
91 |             gaze_pred_2D = GazeCNN(img, pose)
92 |             valid_loss += mean_angle_loss(gaze_pred_2D, gaze)
93 | 
94 |         print("train_loss, valid_loss = [{},{}]".format(train_loss/train_range, valid_loss/test_range))
95 | 


--------------------------------------------------------------------------------
/single_eye_normalized/train_cpu/train_without_headpose.py:
--------------------------------------------------------------------------------
 1 | from utils import *
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | def initialize_weights(module):
 7 |     if isinstance(module, nn.Conv2d):
 8 |         nn.init.constant_(module.bias, 0)
 9 |     elif isinstance(module, nn.Linear):
10 |         nn.init.xavier_uniform_(module.weight)
11 |         nn.init.constant_(module.bias, 0)
12 | 
13 | class Model(nn.Module):
14 |     def __init__(self):
15 |         super(Model, self).__init__()
16 | 
17 |         self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)
18 |         self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)
19 |         self.fc1 = nn.Linear(3600, 500)
20 |         self.fc2 = nn.Linear(500, 2)
21 | 
22 |         self._initialize_weight()
23 | 
24 |     def _initialize_weight(self):
25 |         nn.init.normal_(self.conv1.weight, mean=0, std=0.1)
26 |         nn.init.normal_(self.conv2.weight, mean=0, std=0.01)
27 |         self.apply(initialize_weights)
28 | 
29 |     def forward(self, x):
30 |         x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)
31 |         x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
32 |         x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten
33 |         x = self.fc2(x)
34 |         return x
35 | 
36 | def get_2D_vector(pose, gaze):
37 |     pose2d = []
38 |     gaze2d = []
39 |     for i in np.arange(0, len(pose), 1):
40 |         pose2d.append(pose3D_to_2D(pose[i]))
41 |         gaze2d.append(gaze3D_to_2D(gaze[i]))
42 |     poses = np.array(pose2d)
43 |     gazes = np.array(gaze2d)
44 |     return poses, gazes
45 | 
46 | def batch_process(j, batch, img, pose, gaze):
47 |     '''
48 |     :return: a-img, b-pose, c-gaze
49 |     '''
50 |     a = torch.randn(batch, 1, 36, 60)
51 |     b = torch.randn(batch,2)
52 |     c = torch.randn(batch,2)
53 |     for i in range(batch):
54 |         a[i, 0] = torch.tensor(img[j * batch + i])
55 |         b[i] = torch.tensor(pose[j * batch + i])
56 |         c[i] = torch.tensor(gaze[j * batch + i])
57 |     return a, b, c
58 | 
59 | train_gaze, train_image, train_pose, test_gaze, test_image, test_pose = collect_data_from_mat()
60 | 
61 | train_pose2D, train_gaze2D = get_2D_vector(train_pose, train_gaze)
62 | test_pose2D, test_gaze2D = get_2D_vector(test_pose, test_gaze)
63 | 
64 | ltrain = len(train_gaze)
65 | ltest = len(test_gaze)
66 | print("training dataset size:", len(train_gaze))
67 | print("test dataset size:", len(test_gaze))
68 | 
69 | 
70 | ### training process ###
71 | GazeCNN = Model()
72 | optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.001)
73 | criterion = torch.nn.SmoothL1Loss(reduction="mean")
74 | batch = 10
75 | train_range = int(ltrain / batch)
76 | test_range = int(ltest / batch)
77 | 
78 | 
79 | for epoch in range(10):
80 |     for i in tqdm(range(train_range)):
81 |         img, pose, gaze = batch_process(i, batch, train_image, train_pose2D, train_gaze2D)
82 |         gaze_pred_2D = GazeCNN(img)
83 | 
84 |         loss = criterion(gaze_pred_2D, gaze)
85 |         loss.retain_grad()
86 |         loss.backward()
87 |         optimizer.step()
88 | 
89 |     angle_loss=0
90 |     for j in tqdm(range(test_range)):
91 |         timg, tpose, tgaze = batch_process(j, batch, test_image, test_pose2D, test_gaze2D)
92 |         tgaze_pred_2D = GazeCNN(timg)
93 |         angle_loss += mean_angle_loss(tgaze_pred_2D, tgaze)
94 | 
95 |     print("epoch", epoch, "average loss on test dataset:", angle_loss / test_range)
96 | 
97 | 


--------------------------------------------------------------------------------
/single_eye_normalized/validation/train_onetime.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | This train_onetime.py is to train for onetime,
  3 | which will help to draw a curve on validation loss
  4 | and train loss.
  5 | '''
  6 | from utils import *
  7 | from LeNet import *
  8 | import torch
  9 | 
 10 | 
 11 | def get_2D_vector(pose, gaze):
 12 |     pose2d = []
 13 |     gaze2d = []
 14 |     for i in np.arange(0, len(pose), 1):
 15 |         pose2d.append(pose3D_to_2D(pose[i]))
 16 |         gaze2d.append(gaze3D_to_2D(gaze[i]))
 17 |     poses = np.array(pose2d)
 18 |     gazes = np.array(gaze2d)
 19 |     return poses, gazes
 20 | 
 21 | def batch_process(j, batch, img, pose, gaze):
 22 |     '''
 23 |     :return: a-img, b-pose, c-gaze
 24 |     '''
 25 | 
 26 |     a = torch.randn(batch, 1, 36, 60)
 27 |     b = torch.randn(batch, 2)
 28 |     c = torch.randn(batch, 2)
 29 |     for i in range(batch):
 30 |         a[i, 0] = torch.tensor(img[j * batch + i])
 31 |         b[i] = torch.tensor(pose[j * batch + i])
 32 |         c[i] = torch.tensor(gaze[j * batch + i])
 33 |     return a, b, c
 34 | 
 35 | if __name__ == "__main__":
 36 | 
 37 |     raw_gaze, raw_image, raw_pose, raw_index = collect_data_from_mat()
 38 |     is_gpu = torch.cuda.is_available()
 39 |     t_gaze, t_pose, t_image, v_gaze, v_pose, v_image = get_kfold_data(5, 3, raw_gaze, raw_image, raw_pose)
 40 |     t_pose_2D, t_gaze_2D = get_2D_vector(t_pose, t_gaze)
 41 |     v_pose_2D, v_gaze_2D = get_2D_vector(v_pose, v_gaze)
 42 | 
 43 |     ltrain = len(t_gaze)
 44 |     lvaild = len(v_gaze)
 45 |     print("training dataset size:", ltrain)
 46 |     print("test dataset size:", lvaild)
 47 | 
 48 |     GazeCNN = Model()
 49 |     optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001)
 50 |     criterion = torch.nn.SmoothL1Loss(reduction="mean")
 51 |     if is_gpu:
 52 |         criterion = criterion.cuda()
 53 | 
 54 |     batch = 128
 55 |     train_range = int(ltrain / batch)
 56 |     test_range = int(lvaild / batch)
 57 | 
 58 |     train_loss_list = []
 59 |     valid_loss_list = []
 60 |     for epoch in range(100):
 61 |         for i in tqdm(range(train_range)):
 62 |             img, pose, gaze = batch_process(i, batch, t_image, t_pose_2D, t_gaze_2D)
 63 |             if is_gpu:
 64 |                 GazeCNN = GazeCNN.cuda()
 65 |                 img = img.cuda()
 66 |                 pose = pose.cuda()
 67 |                 gaze = gaze.cuda()
 68 | 
 69 |             gaze_pred_2D = GazeCNN(img, pose)
 70 | 
 71 |             loss = criterion(gaze_pred_2D, gaze)
 72 |             loss.backward()
 73 |             optimizer.step()
 74 | 
 75 |         train_loss = 0
 76 |         for k in tqdm(range(train_range - 1)):
 77 |             timg, tpose, tgaze = batch_process(k, batch, t_image, t_pose_2D, t_gaze_2D)
 78 |             GazeCNN = GazeCNN.cpu()
 79 |             tgaze_pred_2D = GazeCNN(timg, tpose)
 80 |             train_loss += mean_angle_loss(tgaze_pred_2D, tgaze)
 81 | 
 82 |         train_loss = train_loss / (train_range - 1)
 83 |         train_loss_list.append(train_loss)
 84 | 
 85 |         ## validation result
 86 |         valid_loss = 0
 87 |         for j in tqdm(range(test_range - 1)):
 88 |             vimg, vpose, vgaze = batch_process(j, batch, v_image, v_pose_2D, v_gaze_2D)
 89 |             GazeCNN = GazeCNN.cpu()
 90 |             vgaze_pred_2D = GazeCNN(vimg, vpose)
 91 |             valid_loss += mean_angle_loss(vgaze_pred_2D, vgaze)
 92 | 
 93 |         valid_loss = valid_loss / (test_range - 1)
 94 |         valid_loss_list.append(valid_loss)
 95 | 
 96 |         print("train_loss, valid_loss = [{},{}]".format(train_loss, valid_loss))
 97 | 
 98 |     print("valid loss result:", valid_loss_list)
 99 |     print("train loss result:", train_loss_list)
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/single_eye_normalized/gpu/utils.py:
--------------------------------------------------------------------------------
  1 | from scipy.io import loadmat
  2 | import glob
  3 | from tqdm import tqdm
  4 | import numpy as np
  5 | import cv2 as cv
  6 | import torch
  7 | 
  8 | def read_eye_data(mat):
  9 |     '''
 10 |     read each mat file info
 11 |     '''
 12 |     mat_data = loadmat(mat)
 13 |     right_info = mat_data['data']['right'][0, 0]
 14 |     gaze = right_info['gaze'][0, 0]
 15 |     image = right_info['image'][0, 0]
 16 |     pose = right_info['pose'][0, 0]
 17 |     return gaze, image, pose
 18 | 
 19 | def collect_data_from_mat():
 20 |     '''
 21 |     collect data from annotation part
 22 |     :return:  list of index, image, pose, gaze
 23 |     '''
 24 |     mat_files = glob.glob('Normalized/**/*.mat', recursive = True)
 25 |     mat_files.sort()
 26 |     index = list()
 27 |     # X: image, head_pose
 28 |     # y: gaze vector
 29 |     # index: pnum, pday
 30 |     i = 0
 31 |     train_gaze, train_image, train_pose, test_gaze, test_image, test_pose=[],[],[],[],[],[]
 32 |     for matfile in tqdm(mat_files):
 33 |         pnum = matfile.split('/')[-2]  # pxx
 34 |         pday = matfile.split('/')[-1].split('.')[0] # day0x
 35 |         index.append(pnum + '/' + pday)
 36 | 
 37 |         fgaze, fimage, fpose = read_eye_data(matfile)
 38 |         if int(pnum[1:]) < 7:
 39 |             if train_gaze == []:
 40 |                 train_gaze = fgaze
 41 |                 train_image = fimage
 42 |                 train_pose = fpose
 43 |             else:
 44 |                 train_gaze = np.append(train_gaze, fgaze, axis = 0)
 45 |                 train_image = np.append(train_image, fimage, axis = 0)
 46 |                 train_pose = np.append(train_pose, fpose, axis = 0)
 47 |         else:
 48 |             if test_gaze == []:
 49 |                 test_gaze = fgaze
 50 |                 test_image = fimage
 51 |                 test_pose = fpose
 52 |             else:
 53 |                 test_gaze = np.append(test_gaze, fgaze, axis = 0)
 54 |                 test_image = np.append(test_image, fimage, axis = 0)
 55 |                 test_pose = np.append(test_pose, fpose, axis = 0)
 56 |         i += 1
 57 |     return train_gaze, train_image, train_pose, test_gaze, test_image, test_pose
 58 | 
 59 | def pose3D_to_2D(pose):
 60 |     '''
 61 |       pose (a, b, c) is rotation (angle)
 62 |       M = Rodrigues((x,y,z))
 63 |       Zv = (the third column of M)
 64 |       theta = asin(Zv[1])
 65 |       phi = atan2(Zv[0], Zv[2])
 66 |     '''
 67 |     M, _ = cv.Rodrigues(np.array(pose).astype(np.float32))
 68 |     vec = M[:, 2]
 69 |     yaw = np.arctan2(vec[0], vec[2])
 70 |     pitch = np.arcsin(vec[1])
 71 |     return np.array([pitch, yaw])
 72 | 
 73 | 
 74 | def gaze3D_to_2D(gaze):
 75 |     '''
 76 |       gaze (x, y, z) is direction
 77 |       theta = asin(-y)
 78 |       phi = atan2(-x, -z)
 79 |     '''
 80 |     x, y, z = (gaze[i] for i in range(3))
 81 |     pitch = np.arcsin(-y)
 82 |     yaw = np.arctan2(-x, -z)
 83 |     return np.stack((pitch, yaw)).T
 84 | 
 85 | 
 86 | def gaze2D_to_3D(gaze):
 87 |     '''
 88 |     :param gaze: gaze (yaw, pitch) is the rotation angle, type=(list)
 89 |     :return: gaze=(x,y,z)
 90 |     '''
 91 |     pitch = gaze[0]
 92 |     yaw = gaze[1]
 93 |     x = -np.cos(pitch) * np.sin(yaw)
 94 |     y = -np.sin(pitch)
 95 |     z = -np.cos(pitch) * np.cos(yaw)
 96 |     norm = np.sqrt(x**2 + y**2 + z**2)
 97 |     x /= norm
 98 |     y /= norm
 99 |     z /= norm # all normalized
100 |     return x, y, z
101 | 
102 | 
103 | def mean_angle_loss(pred, truth):
104 |     '''
105 |     :param pred,truth: type=torch.Tensor
106 |     :return:
107 |     '''
108 |     pred = pred.detach().numpy()
109 |     ans = 0
110 |     for i in range(len(pred)):
111 |         p_x, p_y, p_z = gaze2D_to_3D(pred[i])
112 |         t_x, t_y, t_z = gaze2D_to_3D(truth[i])
113 |         angles = p_x * t_x + p_y * t_y + p_z * t_z
114 |         ans += torch.acos(angles) * 180 / np.pi
115 |     return ans / len(pred)
116 | 
117 | 


--------------------------------------------------------------------------------
/single_eye_normalized/train_cpu/utils.py:
--------------------------------------------------------------------------------
  1 | from scipy.io import loadmat
  2 | import glob
  3 | from tqdm import tqdm
  4 | import numpy as np
  5 | import cv2 as cv
  6 | import torch
  7 | import math
  8 | 
  9 | def read_eye_data(mat):
 10 |     '''
 11 |     read each mat file info
 12 |     '''
 13 |     mat_data = loadmat(mat)
 14 |     right_info = mat_data['data']['right'][0, 0]
 15 |     gaze = right_info['gaze'][0, 0]
 16 |     image = right_info['image'][0, 0]
 17 |     pose = right_info['pose'][0, 0]
 18 |     return gaze, image, pose
 19 | 
 20 | def collect_data_from_mat():
 21 |     '''
 22 |     collect data from annotation part
 23 |     :return:  list of index, image, pose, gaze
 24 |     '''
 25 |     mat_files = glob.glob('Normalized/**/*.mat', recursive = True)
 26 |     mat_files.sort()
 27 |     index = list()
 28 |     # X: image, head_pose
 29 |     # y: gaze vector
 30 |     # index: pnum, pday
 31 |     i = 0
 32 |     train_gaze, train_image, train_pose, test_gaze, test_image, test_pose=[],[],[],[],[],[]
 33 |     for matfile in tqdm(mat_files):
 34 |         pnum = matfile.split('/')[-2]  # pxx
 35 |         pday = matfile.split('/')[-1].split('.')[0] # day0x
 36 |         index.append(pnum + '/' + pday)
 37 | 
 38 |         fgaze, fimage, fpose = read_eye_data(matfile)
 39 |         if int(pnum[1:]) < 7:
 40 |             if train_gaze == []:
 41 |                 train_gaze = fgaze
 42 |                 train_image = fimage
 43 |                 train_pose = fpose
 44 |             else:
 45 |                 train_gaze = np.append(train_gaze, fgaze, axis = 0)
 46 |                 train_image = np.append(train_image, fimage, axis = 0)
 47 |                 train_pose = np.append(train_pose, fpose, axis = 0)
 48 |         else:
 49 |             if test_gaze == []:
 50 |                 test_gaze = fgaze
 51 |                 test_image = fimage
 52 |                 test_pose = fpose
 53 |             else:
 54 |                 test_gaze = np.append(test_gaze, fgaze, axis = 0)
 55 |                 test_image = np.append(test_image, fimage, axis = 0)
 56 |                 test_pose = np.append(test_pose, fpose, axis = 0)
 57 |         i += 1
 58 |     return train_gaze, train_image, train_pose, test_gaze, test_image, test_pose
 59 | 
 60 | def pose3D_to_2D(pose):
 61 |     '''
 62 |       pose (a, b, c) is rotation (angle)
 63 |       M = Rodrigues((x,y,z))
 64 |       Zv = (the third column of M)
 65 |       theta = asin(Zv[1])
 66 |       phi = atan2(Zv[0], Zv[2])
 67 |     '''
 68 |     M, _ = cv.Rodrigues(np.array(pose).astype(np.float32))
 69 |     vec = M[:, 2]
 70 |     yaw = np.arctan2(vec[0], vec[2])
 71 |     pitch = np.arcsin(vec[1])
 72 |     return np.array([pitch, yaw])
 73 | 
 74 | 
 75 | def gaze3D_to_2D(gaze):
 76 |     '''
 77 |       gaze (x, y, z) is direction
 78 |       theta = asin(-y)
 79 |       phi = atan2(-x, -z)
 80 |     '''
 81 |     x, y, z = (gaze[i] for i in range(3))
 82 |     pitch = np.arcsin(-y)
 83 |     yaw = np.arctan2(-x, -z)
 84 |     return np.stack((pitch, yaw)).T
 85 | 
 86 | 
 87 | def gaze2D_to_3D(gaze):
 88 |     '''
 89 |     :param gaze: gaze (yaw, pitch) is the rotation angle, type=(list)
 90 |     :return: gaze=(x,y,z)
 91 |     '''
 92 |     pitch = gaze[0]
 93 |     yaw = gaze[1]
 94 |     x = -np.cos(pitch) * np.sin(yaw)
 95 |     y = -np.sin(pitch)
 96 |     z = -np.cos(pitch) * np.cos(yaw)
 97 |     norm = np.sqrt(x**2 + y**2 + z**2)
 98 |     x /= norm
 99 |     y /= norm
100 |     z /= norm
101 |     return x, y, z
102 | 
103 | 
104 | def mean_angle_loss(pred, truth):
105 |     '''
106 |     :param pred,truth: type=torch.Tensor
107 |     :return:
108 |     '''
109 |     pred = pred.detach().numpy()
110 |     ans = 0
111 |     for i in range(len(pred)):
112 |         p_x, p_y, p_z = (pred[i][j] for j in range(3))
113 |         t_x, t_y, t_z = (truth[i][j] for j in range(3))
114 |         # print("p_x={}, p_y={}, p_z={}".format(p_x, p_y, p_z))
115 |         # print("t_x={}, t_y={}, t_z={}".format(t_x, t_y, t_z))
116 |         angles = (p_x * t_x + p_y * t_y + p_z * t_z)/math.sqrt(p_x**2+p_y**2+p_z**2) * math.sqrt(t_x**2+t_y**2+t_z**2)
117 |         ans += math.acos(angles) * 180 / np.pi
118 |     return ans / len(pred)
119 | 


--------------------------------------------------------------------------------
/single_eye_normalized/validation/utils.py:
--------------------------------------------------------------------------------
  1 | from scipy.io import loadmat
  2 | import glob
  3 | from tqdm import tqdm
  4 | import numpy as np
  5 | import cv2 as cv
  6 | import torch
  7 | import math
  8 | from scipy.io import loadmat
  9 | 
 10 | path = "/Users/liqilin/PycharmProjects/untitled/EyeGaze/single_eye_normalized"
 11 | 
 12 | data_dict = dict()
 13 | 
 14 | def read_eye_data(mat):
 15 |     '''
 16 |     read each mat file info
 17 |     '''
 18 |     mat_data = loadmat(mat)
 19 |     right_info = mat_data['data']['right'][0, 0]
 20 |     gaze = right_info['gaze'][0, 0]
 21 |     image = right_info['image'][0, 0]
 22 |     pose = right_info['pose'][0, 0]
 23 |     return gaze, image, pose
 24 | 
 25 | def collect_data_from_mat():
 26 |     '''
 27 |     collect data from annotation part
 28 |     :param path: path of normalized data
 29 |     :return:  list of index, image, pose, gaze
 30 |     '''
 31 |     mat_files = glob.glob('Normalized/**/*.mat', recursive = True)
 32 |     # mat_files.sort()
 33 |     gaze = list()
 34 |     image = list()
 35 |     index = list()
 36 |     pose = list()
 37 |     for matfile in tqdm(mat_files):
 38 |         pnum = matfile.split('/')[-2]  # pxx
 39 |         pday = matfile.split('/')[-1].split('.')[0] # day0x
 40 |         index.append(pnum + '/' + pday)
 41 | 
 42 |         fgaze, fimage, fpose = read_eye_data(matfile)
 43 | 
 44 |         if gaze == []:
 45 |             gaze = fgaze
 46 |             image = fimage
 47 |             pose = fpose
 48 |         else:
 49 |             gaze = np.append(gaze, fgaze, axis = 0)
 50 |             image = np.append(image, fimage, axis = 0)
 51 |             pose = np.append(pose, fpose, axis = 0)
 52 | 
 53 |     return gaze, image, pose, index
 54 | 
 55 | 
 56 | def get_kfold_data(k, i, gaze, image, pose):
 57 |     '''
 58 |     implement k-fold validation
 59 |     input type = numpy.narray
 60 |     output type = numoy.narray
 61 |     '''
 62 |     fold_size = gaze.shape[0] // k
 63 |     start = i * fold_size
 64 |     if i != k - 1: # Not the final round
 65 |         end = (i + 1) * fold_size
 66 |         v_gaze, v_pose, v_image = gaze[start:end], pose[start:end],image[start:end]
 67 |         t_gaze = np.concatenate((gaze[0:start], gaze[end:]), axis=0)
 68 |         t_pose = np.concatenate((pose[0:start], pose[end:]), axis=0)
 69 |         t_image = np.concatenate((image[0:start], image[end:]), axis=0)
 70 |     else:
 71 |         v_gaze, v_pose, v_image = gaze[start:], pose[start:],image[start:]
 72 |         t_gaze, t_pose, t_image = gaze[0:start], pose[0:start],image[0:start]
 73 | 
 74 |     return t_gaze, t_pose, t_image, v_gaze, v_pose, v_image
 75 | 
 76 | 
 77 | def pose3D_to_2D(pose):
 78 |     '''
 79 |       pose (a, b, c) is rotation (angle)
 80 |       M = Rodrigues((x,y,z))
 81 |       Zv = (the third column of M)
 82 |       theta = asin(Zv[1])
 83 |       phi = atan2(Zv[0], Zv[2])
 84 |     '''
 85 |     M, _ = cv.Rodrigues(np.array(pose).astype(np.float32))
 86 |     vec = M[:, 2]
 87 |     yaw = np.arctan2(vec[0], vec[2])
 88 |     pitch = np.arcsin(vec[1])
 89 |     return np.array([pitch, yaw])
 90 | 
 91 | 
 92 | def gaze3D_to_2D(gaze):
 93 |     '''
 94 |       gaze (x, y, z) is direction
 95 |       theta = asin(-y)
 96 |       phi = atan2(-x, -z)
 97 |     '''
 98 |     x, y, z = (gaze[i] for i in range(3))
 99 |     pitch = np.arcsin(-y)
100 |     yaw = np.arctan2(-x, -z)
101 |     return np.stack((pitch, yaw)).T
102 | 
103 | 
104 | def gaze2D_to_3D(gaze):
105 |     '''
106 |     :param gaze: gaze (yaw, pitch) is the rotation angle, type=(list)
107 |     :return: gaze=(x,y,z)
108 |     '''
109 |     pitch = gaze[0]
110 |     yaw = gaze[1]
111 |     x = -np.cos(pitch) * np.sin(yaw)
112 |     y = -np.sin(pitch)
113 |     z = -np.cos(pitch) * np.cos(yaw)
114 |     norm = np.sqrt(x**2 + y**2 + z**2)
115 |     x /= norm
116 |     y /= norm
117 |     z /= norm
118 |     return x, y, z
119 | 
120 | 
121 | def mean_angle_loss(pred, truth):
122 |     '''
123 |     :param pred,truth: type=torch.Tensor
124 |     :return:
125 |     '''
126 |     pred = pred.detach().numpy()
127 |     ans = 0
128 |     for i in range(len(pred)):
129 |         p_x, p_y, p_z = gaze2D_to_3D(pred[i])
130 |         t_x, t_y, t_z = gaze2D_to_3D(truth[i])
131 |         angles = p_x * t_x + p_y * t_y + p_z * t_z
132 |         ans += torch.acos(angles) * 180 / np.pi
133 |     return ans / len(pred)
134 | 
135 | 
136 | # gaze, image, pose, index = collect_data_from_mat()
137 | # t_gaze, t_pose, t_image, v_gaze, v_pose, v_image=get_kfold_data(10,0,gaze,image,pose)
138 | 


--------------------------------------------------------------------------------
/single_eye_normalized/visualize/draw.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 2,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import matplotlib.pyplot as plt\n",
12 |     "import numpy as np"
13 |    ]
14 |   },
15 |   {
16 |    "cell_type": "code",
17 |    "execution_count": 3,
18 |    "outputs": [
19 |     {
20 |      "data": {
21 |       "text/plain": "Text(0.5, 1.0, 'dataset distribution(by person)')"
22 |      },
23 |      "execution_count": 3,
24 |      "metadata": {},
25 |      "output_type": "execute_result"
26 |     },
27 |     {
28 |      "data": {
29 |       "text/plain": "<Figure size 720x360 with 1 Axes>",
30 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAl4AAAFNCAYAAADRi2EuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3debicZX3/8fdHQEFEFokRBAwCRdEKakSs1qpoRaFC1SL8xEalUq/i1mo16s+6t6h1oT+tLaISEUEWKRQshSJKrWtYlE2LSxCQJQoIisr2/f3xPEcnx3OSScjcMzm8X9d1rnn2+zszyZzPue9nnidVhSRJkkbvXuMuQJIk6Z7C4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLykCZXkqCTvGncdd1eSZUme3k+/KcmRa/HYP0/y0H56rb5eSf4lyVvW0rHmJflOko36+S8m+Yu1cex1QZJvJHnEuOuQJoHBS5oDWv0iv7vtVNXfV9Uq9x+2naq6X1X9YE3rGWjvxUm+PO3YL6+qd97dY/cWA0dV1S/X0vHWNf8IvGPcRUiTwOAlaZ2TZP1x1zCsJPcBFgGfHnctKzPi1/RU4KlJHjTCNqR1gsFLmhBJHp3k/CS3JPkssOHAus2TnJZkeZIb++lt+nXvBv4Q+HA/9PbhfvnhSa5McnOS85L84cDxdk+ytF93XZIPDKzbI8lXktyU5FtJnrKydmZ4Hi9KckWSnyZ587R1b0vy6X56wySf7re7Kck3k8xfyfOpJIcmuRy4fGDZjgNNbJnkrP41/FKSh/TbLei3XX+gli8m+YskDwf+BXhC395N/foVhi6TvCzJ95LckOTUJFsPrKskL09yef9cPpIk/erHAzdV1VXTXqod+iG4m5OckmSL/linJ3nltNft20n+dIbXeup5HZLkx0muSfK6gfX3SrI4yff71/n4gXam9j04yY+AL8z2nvTbb90/7xv61+Fl097X45N8qn/tL0mycGp9Vf0KOA945vTnIN3TGLykCZDk3sC/AUcDWwAnAM8b2ORewCeBhwDbAb8EPgxQVW8G/ht4RT/09op+n28Cu/XH+wxwQpKpMHc4cHhV3R/YATi+r+PBwOnAu/r9XgeclGTeStoZfB67AB8FXgRsDTwA2GaWp70I2BTYtt/u5cAvV9HOfnRBZpdZjvlC4J3AlsCFwDGzbPcbVXVZ3/ZX+/Y2m+F5PQ34B2B/YCvgCuC4aZvtAzwOeFS/3VTI+H3guzM0/efAS/vj3QH8U798CXDQQNu7AlPvy2yeCuwE/DHwhvTn1AGvpHvN/oju/bgR+Mi0ff8IeHhf74zvSb/dccBV/XGeD/x9/7pMeU6/zWZ0PVzTg/llwK4reQ7SPYLBS5oMewAbAB+qqtur6kS64ARAVf20qk6qqlur6hbg3XS/MGdVVZ/u97ujqt4P3AfYuV99O7Bjki2r6udV9bV++UHA56vq81V1V1WdBSwFnj3k83g+cFpVnVtVvwbeAtw1y7a30/1y37Gq7qyq86rq5lUc/x+q6oaVnCt1+kDbb6brxdp2yNpX5oXAJ6rq/P7Yb+yPvWBgm8Oq6qaq+hFwDl3ohS6I3DLDMY+uqour6hd0r9P+SdajCy2/l2SnfrsXAZ+tqttWUt/bq+oXVXURXUA/sF/+cuDNVXVVX/fbgOdnxWHFt/X7/pJZ3pP+NXwi8Iaq+lVVXQgcSRcep3y5/3dzJ90fENND1i39ayHdoxm8pMmwNXB1rXjX+iumJpLcN8m/9kN4NwPnApv1v6hnlOR1SS5L8rN++GxTup4ggIOB3wO+0w8n7dMvfwjwZ/0w0039fk+i65UZ9nlcOTXTh4qfzrLt0cB/Asf1w2TvTbLBKo5/5bDrq+rnwA19TXfX1gy8H/2xf0rXEzXl2oHpW4H79dM3ApusrNb+2BsAW/bDcp8FDkpyL7oQdfQq6pt+rKnn/BDg5IH38jLgTmD+LPvO9p5sDdzQh/7Bdlb2/DecFvA2AW5axfOQ5jyDlzQZrgEePHBeEHRDilNeS9db9fh+ePDJ/fKp7QcDG+nO53o93ZDX5v3w2c+mtq+qy6vqQOCBwHuAE5NsTPdL+Oiq2mzgZ+OqOmymdmZ5Hr/pYUpyX7oelN/R9+y9vap2Af6AbqhuqgdltnZW1f5g2/ejGy79MfCLfvF9B7YdPNF7Vcf9MV2ImTr2xnTP6+pV7AfwbbqQO2utdO/17cBP+vkldL1sewK3VtVXV9HG9GP9uJ++EnjWtPdzw6oarPs3z30l78mPgS2SDAbI7Rju+U95OPCt1dhempMMXtJk+CrdeT6vSrJBkucCuw+s34TuXJub+pOj3zpt/+uAh07b/g5gObB+kr8D7j+1MslB/Xlbd/HbXoi76L559ydJnplkvf5k66ekP5F/hnamOxHYJ8mT+vPW3sEsnzNJnprk9/teu5vpgsfUsOSq2pnNswfafifwtaq6sqqW04WEg/rn9VK6c9umXAds0+83k2OBlyTZLd23FP8e+HpVLRuipm/Q9U4+eNryg5Ls0ofTdwAn9sN09EHrLuD9rLq3C+Atfa/oI4CX0PWYQfelgXfnt18ymJdk39kOMtt7UlVXAl8B/qH/N/Eoul7Tob6p2Z9b+FjgrGG2l+Yyg5c0Afrzd54LvJhueOwFwOcGNvkQsBFdj8jXgDOmHeJwunN3bkzyT3TDRWcA/0s3JPQrVhxS2gu4JMnP+30PqKpf9r9g9wXeRBfargT+lt9+VkxvZ/rzuAQ4lO5k/mvohtmmf5tvyoPogtrNdENgX+K3IWOl7azEZ+hC6Q10v+gPGlj3sv65/BR4BF2QmPIF4BLg2iQ/YZqq+i+687BO6p/XDsABwxTUv7dHTasFuud6FN0Q3YbAq6at/xTdifnDhJsvAd8Dzgb+sarO7JcfTnfO2JlJbqH7t/P4lRxnZe/JgcACut6vk4G39q/LMP4E+GJV/XiVW0pzXFY8pUSStLYlmUf3Tc1HD3sR1SR/DhxSVU9ayTYLgB8CG1TVHWuh1JFI8nXg4Kq6eNy1SOO2zlyEUJLWVf1Q58OG3b4ffvwr4J9HVlRDVbWyXjbpHsWhRkmaIEmeSTfMex3d0KmkOcShRkmSpEbs8ZIkSWrE4CVJktTIOnFy/ZZbblkLFiwYdxmSJEmrdN555/2kqubNtG6dCF4LFixg6dKl4y5DkiRplZJcMds6hxolSZIaMXhJkiQ1YvCSJElqZGTBK8nOSS4c+Lk5yWuSbJHkrCSX94+bj6oGSZKkSTKy4FVV362q3apqN7qb1d5Kd2PVxcDZVbUT3Q1dF4+qBkmSpEnSaqhxT+D7VXUFsC+wpF++BNivUQ2SJElj1Sp4HQAc20/Pr6pr+ulrgfmNapAkSRqrkQevJPcGngOcMH1ddTeKnPFmkUkOSbI0ydLly5ePuEpJkqTRa9Hj9Szg/Kq6rp+/LslWAP3j9TPtVFVHVNXCqlo4b96MF3+VJElap7QIXgfy22FGgFOBRf30IuCUBjVIkiSN3UiDV5KNgWcAnxtYfBjwjCSXA0/v5yVJkua8kd6rsap+ATxg2rKf0n3LUTNYsPj0pu0tO2zvpu1JknRP5pXrJUmSGjF4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIj64+7AGlVFiw+vVlbyw7bu1lbkqR7Hnu8JEmSGjF4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDXiLYMkrTFv5yRJq8ceL0mSpEYMXpIkSY0YvCRJkhoZafBKslmSE5N8J8llSZ6QZIskZyW5vH/cfJQ1SJIkTYpR93gdDpxRVQ8DdgUuAxYDZ1fVTsDZ/bwkSdKcN7LglWRT4MnAxwGq6raqugnYF1jSb7YE2G9UNUiSJE2SUfZ4bQ8sBz6Z5IIkRybZGJhfVdf021wLzB9hDZIkSRNjlMFrfeAxwEer6tHAL5g2rFhVBdRMOyc5JMnSJEuXL18+wjIlSZLaGGXwugq4qqq+3s+fSBfErkuyFUD/eP1MO1fVEVW1sKoWzps3b4RlSpIktTGy4FVV1wJXJtm5X7QncClwKrCoX7YIOGVUNUiSJE2SUd8y6JXAMUnuDfwAeAld2Ds+ycHAFcD+I65BkiRpIow0eFXVhcDCGVbtOcp2JUmSJpFXrpckSWrE4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLwkSZIaMXhJkiQ1YvCSJElqxOAlSZLUyKjv1ShJI7dg8elN21t22N5N25M0d9jjJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLwkSZIaMXhJkiQ1YvCSJElqxOAlSZLUyPqjPHiSZcAtwJ3AHVW1MMkWwGeBBcAyYP+qunGUdUiSJE2CFj1eT62q3apqYT+/GDi7qnYCzu7nJUmS5rxxDDXuCyzpp5cA+42hBkmSpOZGHbwKODPJeUkO6ZfNr6pr+ulrgfkjrkGSJGkijPQcL+BJVXV1kgcCZyX5zuDKqqokNdOOfVA7BGC77bYbcZmSJEmjN9Ier6q6un+8HjgZ2B24LslWAP3j9bPse0RVLayqhfPmzRtlmZIkSU2MLHgl2TjJJlPTwB8DFwOnAov6zRYBp4yqBkmSpEkyyqHG+cDJSaba+UxVnZHkm8DxSQ4GrgD2H2ENkiRJE2NkwauqfgDsOsPynwJ7jqpdSZKkSeWV6yVJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIjo75lkCTdYyxYfHrT9pYdtnfT9iTdffZ4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktTIKoNXkvsmeUuSj/XzOyXZZ/SlSZIkzS3D9Hh9Evg18IR+/mrgXSOrSJIkaY4aJnjtUFXvBW4HqKpbgYy0KkmSpDlomOB1W5KNgAJIsgNdD5gkSZJWw/pDbPM24Axg2yTHAE8EXjzCmiRJkuakVQavqjozyXnAHnRDjK+uqp+MvDJJkqQ5ZphvNZ4NPL6qTq+q06rqJ0mOaFCbJEnSnDLMOV7bA29I8taBZQtHVI8kSdKcNUzwugnYE5if5N+TbDrimiRJkuakYYJXquqOqvor4CTgy8ADR1uWJEnS3DPMtxr/ZWqiqo5KchFw6OhKkiRJmptm7fFKcv9+8oQkW0z9AD8EXjdsA0nWS3JBktP6+e2TfD3J95J8Nsm979YzkCRJWkesbKjxM/3jecDS/vG8gflhvRq4bGD+PcAHq2pH4Ebg4NU4liRJ0jpr1uBVVfv0j9tX1UP7x6mfhw5z8CTbAHsDR/bzAZ4GnNhvsgTY7+48AUmSpHXFMNfxemKSjfvpg5J8IMl2Qx7/Q8Drgbv6+QcAN1XVHf38VcCDV7NmSZKkddIw32r8KHBrkl2B1wLfB45e1U5J9gGur6rz1qSwJIckWZpk6fLly9fkEJIkSRNlmOB1R1UVsC/w4ar6CLDJEPs9EXhOkmXAcXRDjIcDmyWZ+jblNsDVM+1cVUdU1cKqWjhv3rwhmpMkSZpswwSvW5K8ETgIOD3JvYANVrVTVb2xqrapqgXAAcAXquqFwDnA8/vNFgGnrFHlkiRJ65hhruP1AuD/AAdX1bX9+V3vuxttvgE4Lsm7gAuAj9+NY601Cxaf3rS9ZYft3bS9NdHyNVkXXg9Jku6uVQavqroW+MDA/I+AT61OI1X1ReCL/fQPgN1XZ39JkqS5YJihRkmSJK0FBi9JkqRGVjnUmOTVVXX4qpZJasfz7yRp3TRMj9eiGZa9eC3XIUmSNOfN2uOV5EC6bzNun+TUgVWbADeMujBJkqS5ZmVDjV8BrgG2BN4/sPwW4NujLEqSJGkumjV4VdUVwBXAE5I8BNipqv4ryUbARnQBTJIkSUMa5ibZLwNOBP61X7QN8G+jLEqSJGkuGubk+kPp7rt4M0BVXQ48cJRFSZIkzUXDBK9fV9VtUzP9Da5rdCVJkiTNTcMEry8leROwUZJnACcA/z7asiRJkuaeYYLXYmA5cBHwl8Dngf87yqIkSZLmomFukn0X8DHgY0m2ALapKocaJUmSVtMw32r8YpL796HrPLoA9sHRlyZJkjS3DDPUuGlV3Qw8F/hUVT0e2HO0ZUmSJM09wwSv9ZNsBewPnDbieiRJkuasYYLXO4D/BL5XVd9M8lDg8tGWJUmSNPcMc3L9CXSXkJia/wHwvFEWJUmSNBetMngl2RA4GHgEsOHU8qp66QjrkiRJmnOGGWo8GngQ8EzgS3T3avQG2ZIkSatpmOC1Y1W9BfhFVS0B9gYeP9qyJEmS5p5hgtft/eNNSR4JbIo3yZYkSVptqzzHCzgiyeZ0twk6Fbgf8JaRViVJkjQHDRO8zq6qG4FzgYcCJNl+pFVJkiTNQcMMNZ40w7IT13YhkiRJc92sPV5JHkZ3CYlNkzx3YNX9GbishCRJkoazsqHGnYF9gM2APxlYfgvwslEWJUmSNBfNGryq6hTglCRPqKqvNqxJkiRpThrm5PoLkhzKal65vr/i/bnAffp2Tqyqt/Yn5h8HPAA4D3hRVd22hvVLkiStM0Z55fpfA0+rql2B3YC9kuwBvAf4YFXtCNxIdzsiSZKkOW9kV66vzs/72Q36nwKexm+/FbkE2G+1q5YkSVoHjfTK9UnWS3IhcD1wFvB94KaquqPf5CrgwatXsiRJ0rppmOA1deX6t9Bduf5S4L3DHLyq7qyq3eiGJ3cHHjZsYUkOSbI0ydLly5cPu5skSdLEWuXJ9VV1ZD/5Jfor16+uqropyTnAE4DNkqzf93ptA1w9yz5HAEcALFy4sNakXUmSpEmysguo/s3KdqyqD6xsfZJ5wO196NoIeAbdifXnAM+n+2bjIuCU1S1akiRpXbSyHq9N+sedgcfRDTNCdzHVbwxx7K2AJUnWoxvSPL6qTktyKXBckncBFwAfX6PKJUmS1jEru4Dq2wGSnAs8pqpu6effBpy+qgNX1beBR8+w/Ad053tJkiTdowxzcv18YPACp7f1yyRJkrQahrly/aeAbyQ5uZ/fDzhqZBVJkiTNUcN8q/HdSf4D+MN+0Uuq6oLRliVJkjT3DNPjRVWdD5w/4lokSZLmtGHO8ZIkSdJaYPCSJElqxOAlSZLUiMFLkiSpEYOXJElSIwYvSZKkRgxekiRJjRi8JEmSGjF4SZIkNWLwkiRJamSoWwZJggWLT2/a3rLD9m7aniRp9OzxkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNjCx4Jdk2yTlJLk1ySZJX98u3SHJWksv7x81HVYMkSdIkGWWP1x3Aa6tqF2AP4NAkuwCLgbOraifg7H5ekiRpzhtZ8Kqqa6rq/H76FuAy4MHAvsCSfrMlwH6jqkGSJGmSNDnHK8kC4NHA14H5VXVNv+paYH6LGiRJksZt/VE3kOR+wEnAa6rq5iS/WVdVlaRm2e8Q4BCA7bbbbtRlStKcsmDx6c3aWnbY3s3aktZ1I+3xSrIBXeg6pqo+1y++LslW/fqtgOtn2reqjqiqhVW1cN68eaMsU5IkqYlRfqsxwMeBy6rqAwOrTgUW9dOLgFNGVYMkSdIkGeVQ4xOBFwEXJbmwX/Ym4DDg+CQHA1cA+4+wBkmSpIkxsuBVVV8GMsvqPUfVriRJ07U85w08702z88r1kiRJjRi8JEmSGjF4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoZ5b0aJUn3cC1v1eNterQusMdLkiSpEYOXJElSIwYvSZKkRgxekiRJjRi8JEmSGjF4SZIkNWLwkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDWy/rgLkCRJ7S1YfHqztpYdtneztiadPV6SJEmNGLwkSZIaGVnwSvKJJNcnuXhg2RZJzkpyef+4+ajalyRJmjSj7PE6Cthr2rLFwNlVtRNwdj8vSZJ0jzCy4FVV5wI3TFu8L7Ckn14C7Deq9iVJkiZN63O85lfVNf30tcD8xu1LkiSNzdhOrq+qAmq29UkOSbI0ydLly5c3rEySJGk0Wgev65JsBdA/Xj/bhlV1RFUtrKqF8+bNa1agJEnSqLQOXqcCi/rpRcApjduXJEkam1FeTuJY4KvAzkmuSnIwcBjwjCSXA0/v5yVJku4RRnbLoKo6cJZVe46qTUmSpEnmleslSZIaMXhJkiQ1YvCSJElqxOAlSZLUiMFLkiSpEYOXJElSIwYvSZKkRkZ2HS9JkrSiBYtPb9ressP2btremmj5mkzC62GPlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDVi8JIkSWrE4CVJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLwkSZIaMXhJkiQ1YvCSJElqxOAlSZLUiMFLkiSpEYOXJElSIwYvSZKkRgxekiRJjRi8JEmSGjF4SZIkNTKW4JVkryTfTfK9JIvHUYMkSVJrzYNXkvWAjwDPAnYBDkyyS+s6JEmSWhtHj9fuwPeq6gdVdRtwHLDvGOqQJElqahzB68HAlQPzV/XLJEmS5rRUVdsGk+cDe1XVX/TzLwIeX1WvmLbdIcAh/ezOwHebFjq8LYGfjLsIrGMmk1KLdfyuSanFOlY0KXXA5NRiHSualDpgsmqZ7iFVNW+mFeu3rgS4Gth2YH6bftkKquoI4IhWRa2pJEuraqF1TFYdMDm1WMfvmpRarGMy64DJqcU6JrMOmKxaVsc4hhq/CeyUZPsk9wYOAE4dQx2SJElNNe/xqqo7krwC+E9gPeATVXVJ6zokSZJaG8dQI1X1eeDz42h7BCZlONQ6ftek1GIdv2tSarGOFU1KHTA5tVjHiialDpisWobW/OR6SZKkeypvGSRJktSIwWsNJflEkuuTXDzmOrZNck6SS5NckuTVY6pjwyTfSPKtvo63j6OOgXrWS3JBktPGXMeyJBcluTDJ0jHWsVmSE5N8J8llSZ4whhp27l+HqZ+bk7ymdR19LX/d/zu9OMmxSTYcRx19La/u67ik5esx02dYki2SnJXk8v5x8zHV8Wf963FXkmbfWpullvf1/2++neTkJJuNqY539jVcmOTMJFuPo46Bda9NUkm2HHUds9WS5G1Jrh74THl2i1ruLoPXmjsK2GvcRQB3AK+tql2APYBDx3QLpl8DT6uqXYHdgL2S7DGGOqa8GrhsjO0PempV7Tbmrz0fDpxRVQ8DdmUMr01Vfbd/HXYDHgvcCpzcuo4kDwZeBSysqkfSfcnngNZ19LU8EngZ3R09dgX2SbJjo+aP4nc/wxYDZ1fVTsDZ/fw46rgYeC5wboP2V1XLWcAjq+pRwP8CbxxTHe+rqkf1/39OA/5uTHWQZFvgj4EfNahhpbUAH5z6XOnPH594Bq81VFXnAjdMQB3XVNX5/fQtdL9Qm98JoDo/72c36H/GcgJhkm2AvYEjx9H+pEmyKfBk4OMAVXVbVd003qrYE/h+VV0xpvbXBzZKsj5wX+DHY6rj4cDXq+rWqroD+BJd4Bi5WT7D9gWW9NNLgP3GUUdVXVZVzS+aPUstZ/bvDcDX6K49OY46bh6Y3ZgGn68r+T33QeD1LWoYopZ1jsFrDkmyAHg08PUxtb9ekguB64GzqmosdQAfovtQuGtM7Q8q4Mwk5/V3YxiH7YHlwCf74dcjk2w8plqmHAAcO46Gq+pq4B/p/lq/BvhZVZ05jlroenb+MMkDktwXeDYrXmC6tflVdU0/fS0wf4y1TKKXAv8xrsaTvDvJlcALadPjNVMN+wJXV9W3xtH+DF7RD8F+osXQ+Npg8JojktwPOAl4zbS/jJqpqjv7bvBtgN37YZSmkuwDXF9V57VuexZPqqrHAM+iGwZ+8hhqWB94DPDRqno08AvaDCHNqL9w8nOAE8bU/uZ0PTvbA1sDGyc5aBy1VNVlwHuAM4EzgAuBO8dRy3TVfeXdr733kryZ7tSOY8ZVQ1W9uaq27Wt4xaq2X9v6Pw7exJhC3ww+CuxAd3rLNcD7x1vOcAxec0CSDehC1zFV9blx19MPY53DeM6BeyLwnCTLgOOApyX59BjqAH7Tu0JVXU93PtPuYyjjKuCqgR7IE+mC2Lg8Czi/qq4bU/tPB35YVcur6nbgc8AfjKkWqurjVfXYqnoycCPdeUTjcl2SrQD6x+vHWMvESPJiYB/ghTUZ12A6BnjeGNrdge4Plm/1n7HbAOcnedAYaqGqruv/4L8L+Bjj+XxdbQavdVyS0J27c1lVfWCMdcyb+rZPko2AZwDfaV1HVb2xqrapqgV0w1lfqKqx9GYk2TjJJlPTdCejNv8WbFVdC1yZZOd+0Z7Apa3rGHAgYxpm7P0I2CPJffv/P3syxi9iJHlg/7gd3fldnxlXLXS3b1vUTy8CThljLRMhyV50py48p6puHWMdOw3M7st4Pl8vqqoHVtWC/jP2KuAx/WdMc1N/JPT+lDF8vq6JsVy5fi5IcizwFGDLJFcBb62qj4+hlCcCLwIu6s+vAnjTGL7dsRWwJMl6dIH++Koa66UcJsB84OTudzvrA5+pqjPGVMsrgWP6Yb4fAC8ZRxF9AH0G8JfjaB+gqr6e5ETgfLqhowsY7xWwT0ryAOB24NBWX3yY6TMMOAw4PsnBwBXA/mOq4wbg/wHzgNOTXFhVzxxTLW8E7gOc1f9f/lpVvXwMdTy7/+PpLrr3ZqQ1zFbHmH7PzfaaPCXJbnRD4ssY4+fK6vDK9ZIkSY041ChJktSIwUuSJKkRg5ckSVIjBi9JkqRGDF6SJEmNGLwkjUWSBerVMzgAAAMXSURBVEnW+Lo7SfYb0w3h75Yky5JsOe46JI2HwUvSumo/YK0Er/5m2RNvXalT0uwMXpLWiiSHJTl0YP5tSV6XzvuSXJzkoiQvmGHfFyf58MD8aUme0k//vL858LeSfC3J/CR/QHe/x/cluTDJDv3PGf0Nyf87ycP6/eclOSnJN/ufJw7Ud3SS/wGOnlbPU5J8McmJSb6T5Jj+Kvcr9FglWZjkiwPHW9K3fUWS5yZ5b/+cz+hv7TXl9f3ybyTZcU3rlLTuMXhJWls+y4pXOt+/X/ZcupvY7kp3n8T3TbvVx6psTHe18F2Bc4GXVdVX6G5v87dVtVtVfZ/u6vOvrKrHAq8D/rnf/3Dgg1X1OLr72x05cOxdgKdX1YEztPto4DX9Ng+lu0vEquwAPI0uFH4aOKeqfh/4JbD3wHY/65d/GPjQ3axT0jrEbmtJa0VVXZDkgUm2prvVy41VdWWSvwGOrao76W7C/CXgccC3hzz0bcDU7afOo7vl0AqS3I/uRtcn9B1T0N3mBbqwt8vA8vv32wOcWlW/nKXdb1TVVf3xLwQWAF9eRa3/UVW3J7kIWA+YukXURf3+U44dePzg3axT0jrE4CVpbToBeD7wILrermHdwYo98BsOTN9ev7232Z3M/Ll1L+CmqtptlnV7VNWvBhf2AecXK6np1wPTg+0O1rohK/o1QFXdlWSw7rum1V0zTK9pnZLWIQ41SlqbPgscQBe+TuiX/TfwgiTrJZkHPBn4xrT9lgG7JblXkm2B3Ydo6xZgE4Cquhn4YZI/A+jPK9u13+5MupuE06+bKZytjmXAY/vp563hMV4w8PjVfnpt1ylpAhm8JK01VXUJXRi6uqqu6RefTDes+C3gC8Drq+raabv+D/BD4FLgn4Dzh2juOOBvk1yQZAfghcDBSb4FXALs22/3KmBhkm8nuRR4+Ro/wc7bgcOTLKXrCVsTmyf5NvBq4K9HVKekCZTf9oRLkiRplOzxkiRJasTgJUmS1IjBS5IkqRGDlyRJUiMGL0mSpEYMXpIkSY0YvCRJkhoxeEmSJDXy/wGW/mQoMVrYgQAAAABJRU5ErkJggg==\n"
31 |      },
32 |      "metadata": {
33 |       "needs_background": "light"
34 |      },
35 |      "output_type": "display_data"
36 |     }
37 |    ],
38 |    "source": [
39 |     "cols_target = [i for i in range(1, 16)]\n",
40 |     "sum = [39,69,39,65,25,38,62,56,47,20,16,19,7,12,7]\n",
41 |     "plt.figure(figsize=(10,5))\n",
42 |     "plt.bar(range(len(sum)),sum,tick_label=cols_target)\n",
43 |     "plt.xlabel(\"volunteer number\")\n",
44 |     "plt.ylabel('dataset size')\n",
45 |     "plt.title('dataset distribution(by person)')"
46 |    ],
47 |    "metadata": {
48 |     "collapsed": false,
49 |     "pycharm": {
50 |      "name": "#%%\n"
51 |     }
52 |    }
53 |   },
54 |   {
55 |    "cell_type": "code",
56 |    "execution_count": 3,
57 |    "outputs": [],
58 |    "source": [],
59 |    "metadata": {
60 |     "collapsed": false,
61 |     "pycharm": {
62 |      "name": "#%%"
63 |     }
64 |    }
65 |   }
66 |  ],
67 |  "metadata": {
68 |   "kernelspec": {
69 |    "display_name": "Python 3",
70 |    "language": "python",
71 |    "name": "python3"
72 |   },
73 |   "language_info": {
74 |    "codemirror_mode": {
75 |     "name": "ipython",
76 |     "version": 2
77 |    },
78 |    "file_extension": ".py",
79 |    "mimetype": "text/x-python",
80 |    "name": "python",
81 |    "nbconvert_exporter": "python",
82 |    "pygments_lexer": "ipython2",
83 |    "version": "2.7.6"
84 |   }
85 |  },
86 |  "nbformat": 4,
87 |  "nbformat_minor": 0
88 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Eye Gaze Estimation
  2 | 
  3 | ### 1. Project Overview
  4 | 
  5 | This is a research topic in computer vision to predict **where** a person is looking at given the person’s full face. 
  6 | 
  7 | Generally, there are two directions of the task:
  8 | 
  9 | - 3-D gaze vector estimation is to predict the gaze vector, which is usually used in the automotive safety.
 10 | -  2-D gaze position estimation is to predict the horizontal and vertical coordinates on a 2-D screen, which allows utilizing gaze point to control a cursor for human-machine interaction.
 11 | 
 12 | Given the training dataset, we can resolve two types of problems: single-eye gaze estimation and two-eye gaze estimation. Apparently, this means our task is to predict one eye gaze direction or both eye. 
 13 | 
 14 | Usability: Track the eye movement, provide detailed insights into users' attention.
 15 | 
 16 | Challenges: (a) low sensor quality or unknown/challenging environments, and (b) large variations in eye region appearance.
 17 | 
 18 | ### 2. Related work
 19 | 
 20 | #### 2.1 Gaze Estimation Methods
 21 | 
 22 | There are two widely accepted methods for estimating gaze direction: **model-based** and **appearance-based**. Model-based method uses 3D eyeball models and estimate the gaze direction using geometric eye features, while appearance-based method learns generic gaze estimators from large amounts of person, and head pose-independent training data.
 23 | 
 24 | Model-based method largely depend on the requirement of external light source to detect eye feature so the modelling process could be a complexing one, and the accuracy for this method is still lower and the robustness is unclear.[1] Appearance-based gaze estimation methods directly use eye images as input and can therefore potentially work with low-resolution eye images. Since the eye images contain many information, so this method needs large amount of data than model-based for the training process.
 25 | 
 26 | #### 2.2 Dataset collection 
 27 | 
 28 | The Eyediap[2] dataset contains 94 video sequences of 16 participants looking at three different targets. So the gaze direction can be very limited and coarse and can't train a generalized gaze estimator. The UT Multiview[3] dataset collected 50 participants and can be used to sythesise images for head poses. But the problem for these two dataset is that they both record the gaze images under contolled laboratory environment.
 29 | 
 30 | The MPIIGaze[1] gaze dataset is used in the task for two reasons: 
 31 | 
 32 | - It's recorded outside the lab: when people are at home doing their work, the application on **laptop** capture the images.
 33 | - It takes months to record the data, so it contains wider range of recording locations and times, illuminations, and eye appearances.
 34 | 
 35 | The MPIIGaze dataset details are shown below:
 36 | 
 37 | - 15 participants, 213,659 pictures
 38 | 
 39 | - outside of laboratory conditions, i.e during daliy routine
 40 | - wider range of recording location, time, illumination and eye appearance
 41 | 
 42 | How to collect: use of laptop application to let volunteers to look at a fixed place, and take pictures of their eyes. (Laptops are suited for long-term daily recordings but also because they are an important platform for *eye tracking application*.)
 43 | 
 44 | #### 2.3 Calibration Settings
 45 | 
 46 | No matter model-based or appearance-based methods, they both need to collect person-specific data during a calibration step. Previous works on gaze estimation didn't take person-specific caliberation settings into consideration. 
 47 | 
 48 | But for the MPIIGaze dataset, since they were collected using different laptops, so the screen size, resolution would be different. Furthermore, the camera coordinate system can also be wide-ranging. What their team did was to obtain the intrinsic parameters for the laptops. In this way, we can add the influence of participant-specific data into our model: 3D positions of each screen plane were estimated using a mirror-based calibration method.
 49 | 
 50 | To summarize, MPIIGaze dataset is giving images of the face, calibration settings for a specific participant, 3D gaze vectors of eyes, which is the ground truth for the problem.
 51 | 
 52 | ### 3. Method
 53 | 
 54 | The task is generally divided into two parts: determine single eye gaze direction for one person, and determine directions for both eyes. Each problem has distinctive method to resolve it.
 55 | 
 56 | #### 3.1 Single-eye problem
 57 | 
 58 | ##### 3.1.1 Problem analysis
 59 | 
 60 | For single-eye problem, the overview of the task is to predict a 3D gaze direction for one person, given his face image and head pose information. In the MPIIGaze dataset, the head pose was calculated by the calibration parameters, and the eye is extracted from the face image so we can just focus on the eye image for prediction instead of the whole face. Predicting an eye gaze direction from a single image can be difficult, because the conditions of the images can be very different: illuminations, eye glasses, image resolution. 
 61 | 
 62 | To properly learn the image attribute, we apply to Deep Learning algorithms with efficiently learn the features in quick time. I followed the guidance[1] of building a multi-modal CNN, the general process is shown in Fig.1. Before the training for CNN model starts, we should preprocess the data from the dataset, that is to first detect the face from the input raw image, then use the calibration parameters to derive 3D head rotation $r$. Then is the normalisation process for eye image is to adjust the head pose direction so as to directly pointing at the camera, so each input image can be executed in the same coodrinate system.
 63 | 
 64 | ![](src/CNN.jpg)
 65 | 
 66 | <div style="text-align: center;">Fig.1 Workflow of gaze estimation</div>
 67 | 
 68 | ##### 3.1.2 Head pose Estimation 
 69 | 
 70 | We didn't directly get the head pose rotation from the record. It's calcualted from the calibartion parameters like screen size and intrinsic parameters from each participant's laptop camera. Since the camera can't directly point at the object, we will need to use images of Planar Mirror Reflections and to calculate the head pose vectors. Head pose could be influential for the model establishment, this would be covered in the discussion part.
 71 | 
 72 | ##### 3.1.3 Normalisation
 73 | 
 74 | The purpose for normalisation process is to adjust the head pose direction. From the dataset we can see that the range for the head poses go wide, so the head is not always directly pointing at the camera shoot. The consequence of being in this form would reduce the accuracy for the training process because the angle of the head coordinate and the camera coordinate would influence the image representation: we need the eye image which the head coodinate's z-axis should be perpendicular to the camera coordinate panel. After the normalisation process, we can get the grey image for both eyes and head pose vectors $h$. The transforming process is shown below: 
 75 | 
 76 | ![](src/camera%20coordinate.jpg)
 77 | 
 78 | <div style="text-align: center;">Fig.2 Normalisation process</div>
 79 | 
 80 | ##### 3.1.4 Multi-modal CNN
 81 | 
 82 | The task for the CNN is to learn the mapping from the input feature. The network architecture here is the adaptation from LeNet framework. We have two input data for this model: the normalised eye image and the 2D head pose vectors, and the model would output the predicting 2D gaze vector. Here we need to convert all the 3D vectors into 2D vectors. The differences of using 2D or 3D would also be dicussed in part 5. 
 83 | 
 84 | ![](src/figmodal.jpg)
 85 | 
 86 |   <div style="text-align: center;">Fig.3 Multi-modal CNN</div>
 87 | 
 88 | #### 3.2 Two-eye problem 
 89 | 
 90 | ##### 3.2.1 Problem analysis
 91 | 
 92 | The two-eye gaze estimation is to predict the gaze vectors for left eye and right eye for one face image. The MPI Team have made some assumptions on this: 
 93 | 
 94 | - User head pose can be obtained by using existing head trackers; 
 95 | - The user should roughly fixate on the same targets with both eyes.
 96 | 
 97 | There have been some works on the two-eye problem. In 2017’s paper *MPIIGaze: Real-World Dataset and Deep Appearance-Based Gaze Estimation*[4],  it proposed a method that set the ground truth of both eyes. They newly defined a ground truth: mean gaze vector as the output of the defining model. Through several trials, they got a rough conclusion that two-eye estimation can improve the predicted result.
 98 | 
 99 | From the above conclusion, we know that we can predict the vectors for two eyes at one time in order to revise the outcome for predicting single gaze vectors. 
100 | 
101 | #### 3.2.2 Knowledge 
102 | 
103 | In the previous work for two-eye gaze estimation, two eyes are treated indifferently. But from the observation of some statistics, we can find that we cannot expect the same accuracy for two eyes, either eye has a chance to be more accurate. This observation is called two-eye asymmetry, and it's caused by the very different head poses, image qualities, and illumination on the face. 
104 | 
105 | ##### 3.2.3 AR-E Net
106 | 
107 | AR-E net was proposed by *Appearance-Based Gaze Estimation via Evaluation-Guided Asymmetric Regression*[4]. This net is built by AR-Net and Ep-Net. Two nets have different functions. 
108 | 
109 | ![](src/are.jpg)
110 | 
111 |   <div style="text-align: center;">Fig.4 Architecture for AR-E Net </div>
112 | 
113 | For AR-Net (Asymmetric Regression-Net), it's to predict two gaze vectors for both left and right eyes simultaneously. The salient difference compared to previous network is the loss function. AR-Net would calculate the acrcosine value for both eyes, and set them as weight in the loss function. 
114 | 
115 | E-Net(Evaluation-Net) was to help further decide which eye is more reliable. It would append AR-Net and give feedback to it. 
116 | 
117 | ### 4. Experiments and evaluation
118 | 
119 | #### 4.1 Unified Measurement
120 | 
121 | To examine the model result for each training using different loss functions, gradient descents and other factors that might affect the output, we should be setting a criteria for the measurement. So the degree mean error which is to calculate the angle between two vectors is applied here. 
122 | 
123 | Let’s say two **normalised** vectors $ p_1=(x_1,y_1,z_1 )$, $p_2=(x_2,y_2,z_2 )$, the angle is:
124 | 
125 | $ angle= ∑_{i=0}^3(p_1 [i]∗p_2 [i])$. Note we should be getting the normalised vectors here, otherwise the calculated angular erorrs can be far-fecthing.
126 | 
127 | We also need to convert angles into degree, the format is: $degree= arccos⁡(angle)∗180÷\pi$.
128 | 
129 | #### 4.2 Hyper parameters
130 | 
131 | Under the best model for single-eye estimation, the batch size is 512, adn the learning rate is setted to be 0.0001. The loss function applied is SmoothL1Loss, and the optimizer is adamGrad. For the data splitting, I tried random splitting and splitting by person, the latter one get better result.
132 | 
133 | #### 4.3 Validation 
134 | 
135 | For the validation process, I tried different dataset spliting method. In general, it can be classified as split by people and random spliting. Split by people was to elict data for one person as validation data, and rest of them are all for training. In this way, each hypothesis would be using different size of validation data. Don't know if this fluctuation would affect the result. 
136 | 
137 | I implemented **K-fold validation** for the single-eye model (randomly split the dataset), which is to elicit $1/k$ data points from the dataset and use it as the validation data, the rest of the data is for trainning. For k = 5, got the best result at 7.82 (not improve so much). For k = 3, got best result at 8.97. For k = 10, got best result at 9.69. The MPI team had 6.3[1] mean degree error for this model.
138 | 
139 | ![](src/K-fold.jpg)
140 | 
141 | <div style="text-align: center;">Fig.5 K-fold validation outcome</div>
142 | 
143 | #### 4.4 Result 
144 | 
145 | **Single-eye problem**
146 | 
147 | I applied the previous mentioned multi-modal CNN in my work. The train-test curve is showing the trend in the below graph. It took 100 epochs to train the model. Generally, the curve for both training loss and test loss are decreasing after more training times. The zigzags in the curve could be the result of the mini-batch training and adam grad. The best outcome ever for the single-eye model is 8.92.
148 | 
149 | ![](src/result.jpg)
150 | 
151 | <div style="text-align: center;">Fig.6 train-test loss curve</div>
152 | 
153 | **Two-eye problem**
154 | 
155 | For the two-eye problem, I first tried to use model for single-eye problem and separately predict the gaze vectors for both eyes, but the result is not always closed, this can also reveal the conclusion from MPI team, which is: we can't treat two eyes indifferently. Then I tried to combine two streams of the single-eye model together (they are to seperatly predict left and right eyes), concatenate their result and redifine the loss function as an AR-Net's form. In this way, the result didn't go well as it never convergent. Then I follow the rules by standard ARE-Net and implement the AR-Net (failed to run E-Net because CUDA are always out of memory.) For now, the best accuracy for two-eye problem stops at 13.4104.
156 | 
157 | 
158 | 
159 | ### 5. Discussion
160 | 
161 | In this part, we dicuss about several effects that might influence the accuracy of our model. 
162 | 
163 | #### 5.1 Influence from head pose 
164 | 
165 | Head pose is needed both in single-eye problem and two-eye problem. However, the problem is that, since we have normalised our data and will feed them into our network, why is still necessary to inject head poses to help us predict? This puzzle can be relieved by theoretical analysis: Normalised images was to make the eye directly looking at the camera, but we are not predicting the gaze vectors in this senario, instead, we are predicting the gaze directions for the original photos, in which the head pose would affect the final result. Leave alone head poses can have terrible of the training result, as shown in the following graph:
166 | 
167 | ![](src/headpose.jpg)
168 | 
169 | <div style="text-align: center;">Fig.7 comparing result for whether head pose is added</div>
170 | 
171 | #### 5.2 Influence from dimensions of vectors 
172 | 
173 | To emphasise the necessity of using 2D vectors, I tried to use vectors with different dimensions to see the training outcome. From fig.8 we can clearly see that using 2D could convergent with no more than 10 epochs, while 3D vectors would never be like that. However, this could only indicate that this particular network structure would only be suitable for 2D vectors instead of 3D.
174 | 
175 | ![](src/32d.jpg)
176 | 
177 | <div style="text-align: center;">Fig.8 comparing results for using 3D and 2D vectors</div>
178 | 
179 | #### 5.3 Determine two-eye ground truth 
180 | 
181 | For the two-eye estimation problem, In th  2017’s paper *MPIIGaze: Real-World Dataset and Deep Appearance-Based Gaze Estimation*, they propose a method that set the ground truth of both eyes, which is, the mean gaze vector originating from the centre of both eyes is defined as ground-truth of gaze vectors. Nonetheless, this can't acurrately represent the true relationship between two eyes. Instead, we should collect data for both eyes separately.
182 | 
183 | 
184 | 
185 | ### 6. Conclusion
186 | 
187 | MPIIGaze dataset is collected through long observations on various volunteers. The wider range of various factors make it predominant in deep learning model training. To predict the gaze vector for a  single eye, we need to convert both 3D gaze vectors (ground truth) and calculated head poses into 2D version. In this way, we can reduce the calculation complexity and also have better results for prediction. The convert process contains geometric formula and needed to be implemented using cv library. This CNN-based model takes less time to train, and can also reach high quality results. 
188 | 
189 | Two-eye gaze estimation is more like an adjustment of the original result for single-eye prediction. It combines the two images and take the inter-relationship and discrepancies of two eyes into consideration. Instead of just output the raw result, this problem mainly discuss how can the predicting results feedback to the training process and have a better outcome after the adjustments. 
190 | 
191 | 
192 | 
193 | ### 7. My work 
194 | 
195 | | Week time | Main task                    | <center>Details</center>                                     |
196 | | --------- | ---------------------------- | ------------------------------------------------------------ |
197 | | 3         | Lead in to the project       | - Search for previous works on gaze estimation, build a basic understanding of the problem;<br />- Look for available datasets and manageable methods for problem solving. |
198 | | 4         | EDA                          | - Determine the method to be applied to;<br />- Explore the attributes and characteristics of the dataset, understand the labels. <br />- Understand the dataset collecting process. |
199 | | 5         | Learn multi-modal CNN[1]     | - Get familiar with the process of using calibration parameters to calculate head pose rotations and normalising the images;<br />- Understand the deep learning architecture and clarify the possible problems. |
200 | | 6         | Implement CNN[1]             | - Based on PyTorch, Implement the multi-modal CNN with normalised images;<br />- Train the model with the dataset (dataset spliting: random spliting) |
201 | | 7         | Discuss about the facts      | - Consider the influence of the dimension of the vectors and head poses for the model;<br/>- Coding to prove the thinking;<br/>- Revise the dataset spliting method. <br />- Change to GPU device.
202 | | 8         | Improve the Result           | - Look into the bad prediction and find possible causes;<br/>- Adjust the hyperparameters and optimzers;<br/>- Use of new judging metrics (mean degree error). |
203 | | 9-10      | Evaluation                   | - Do the validation process, implement K-fold validation process. |
204 | | 11        | Start of two-eye problem     | - Run a model to predict two eye's vectors separately;<br/>- Implement a CNN (by self) based on previous architecture;<br/>- Analyze the problems occured in the two models. |
205 | | 12        | Learn Asymmetry technique[5] | - Learn the architecture of AR-E Net.                        |
206 | | 13        | Implement AR-Net             | - Based on the paper[5], implement AR Net; <br/>- Tune the structure and improve the result. |
207 | 
208 | 
209 | 
210 | ### 8. Limitations
211 | 
212 | 1. The model for single-eye problem hasn't go through cross-dataset validation. If applicable, can use other datasets like Eyediap[2] to test the model outcome, so to improve the architecture's generalization ability.
213 | 2. The self-designed CNN didn't work well in predicting the result, further adjustment and revision is needed in order to better run the model.
214 | 3. The AR-E Net costs too much memory when training the model, So during my work, CUDA is out of memory and can not go on training it. AR-E Net is relatively complex, a simpler but also effective model is needed to address the two-eye problem.
215 | 4. This project didn't do much about decrease noises in the training image. However the poor illuminations, coverings like eye-glasses can reduce the accuracy of the CNN, so for further work, the preprocess metrics for the dataset is needed. 
216 | 
217 | 
218 | 
219 | ### 9. Environment
220 | 
221 | |       |      |        |                      |       |            |
222 | | ----- | ---- | ------ | -------------------- | ----- | ---------- |
223 | | cgpb0 | 1    | Ubuntu | 2x  Xeon Silver 4210 | 256GB | 3.2TB  SSD |
224 | 
225 | 
226 | 
227 | ### 10. Directory description 
228 | 
229 | ```
230 | +--EyeGaze # root 
231 | | +--essay # essays related 
232 | | +--note # literature review on essays
233 | | +--pre # presentation slides
234 | | +--single_eye_normalized
235 | 	| +--gpu # code suitable for cuda 
236 | 	| +--train_cpu # training on cpu device
237 | 	| +--validation 
238 | 	| +--visualize # draw curves
239 | | +--src
240 | | +--two_eye
241 | ```
242 | 
243 | 
244 | 
245 | ### Appendix
246 | 
247 | [1] Zhang, Xucong, et al. "Appearance-based gaze estimation in the wild." *Proceedings of the IEEE conference on computer vision and pattern recognition*. 2015.
248 | 
249 | [2] Funes Mora, Kenneth Alberto, Florent Monay, and Jean-Marc Odobez. "Eyediap: A database for the development and evaluation of gaze estimation algorithms from rgb and rgb-d cameras." *Proceedings of the Symposium on Eye Tracking Research and Applications*. 2014.
250 | 
251 | [3] Sugano, Yusuke, Yasuyuki Matsushita, and Yoichi Sato. "Learning-by-synthesis for appearance-based 3d gaze estimation." *Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition*. 2014.
252 | 
253 | [4] Zhang X, Sugano Y, Fritz M, et al. Mpiigaze: Real-world dataset and deep appearance-based gaze estimation[J]. IEEE transactions on pattern analysis and machine intelligence, 2017, 41(1): 162-175.
254 | 
255 | [5] Cheng Y, Lu F, Zhang X. Appearance-based gaze estimation via evaluation-guided asymmetric regression[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 100-115.
256 | 


--------------------------------------------------------------------------------
/single_eye_normalized/train_cpu/Normalized_process.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "nbformat": 4,
  3 |  "nbformat_minor": 0,
  4 |  "metadata": {
  5 |   "colab": {
  6 |    "name": "Normalized_process.ipynb",
  7 |    "provenance": [],
  8 |    "collapsed_sections": [],
  9 |    "authorship_tag": "ABX9TyMyGsYoHOYrGrmocLFFjuGb",
 10 |    "include_colab_link": true
 11 |   },
 12 |   "kernelspec": {
 13 |    "name": "python3",
 14 |    "display_name": "Python 3"
 15 |   }
 16 |  },
 17 |  "cells": [
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {
 21 |     "id": "view-in-github",
 22 |     "colab_type": "text"
 23 |    },
 24 |    "source": [
 25 |     "<a href=\"https://colab.research.google.com/github/SiciliaLeco/EyeGaze/blob/master/Normalized_process.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {
 31 |     "id": "ZFOWQGZbV4dZ"
 32 |    },
 33 |    "source": [
 34 |     "## Appearance-based Gaze Estimation (Feb 20)\n",
 35 |     "\n",
 36 |     "dataset: MPIIGaze https://www.perceptualui.org/research/datasets/MPIIGaze/ \n",
 37 |     "\n",
 38 |     "CNN frame: LeNet "
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {
 44 |     "id": "qjSsP2_rWEil"
 45 |    },
 46 |    "source": [
 47 |     "### 1. Pre-processing data "
 48 |    ]
 49 |   },
 50 |   {
 51 |    "cell_type": "code",
 52 |    "metadata": {
 53 |     "id": "TZtFQOIIWCLY"
 54 |    },
 55 |    "source": [
 56 |     "from scipy.io import loadmat\n",
 57 |     "import numpy as np\n",
 58 |     "import pandas as pd\n",
 59 |     "from PIL import Image\n",
 60 |     "import glob\n",
 61 |     "from tqdm import tqdm\n",
 62 |     "from sklearn.model_selection import train_test_split\n",
 63 |     "from google.colab import drive "
 64 |    ],
 65 |    "execution_count": null,
 66 |    "outputs": []
 67 |   },
 68 |   {
 69 |    "cell_type": "markdown",
 70 |    "metadata": {
 71 |     "id": "jhYsyQZy4_KJ"
 72 |    },
 73 |    "source": [
 74 |     "To use COLAB, we should first connect to the drive then retreive the data."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "metadata": {
 80 |     "colab": {
 81 |      "base_uri": "https://localhost:8080/"
 82 |     },
 83 |     "id": "7HnveBscWRiH",
 84 |     "outputId": "599cd832-7391-4d2f-a6e4-1dedf3245caf"
 85 |    },
 86 |    "source": [
 87 |     "# Use google.colab to use drive dataset  \n",
 88 |     "import os\n",
 89 |     "drive.mount('/content/drive')\n",
 90 |     "path = \"/content/drive/MyDrive/EyeGaze\"\n",
 91 |     "os.listdir(path)"
 92 |    ],
 93 |    "execution_count": null,
 94 |    "outputs": [
 95 |     {
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
 99 |      ],
100 |      "name": "stdout"
101 |     },
102 |     {
103 |      "output_type": "execute_result",
104 |      "data": {
105 |       "text/plain": [
106 |        "['Normalized', 'Normalized_process.ipynb']"
107 |       ]
108 |      },
109 |      "metadata": {
110 |       "tags": []
111 |      },
112 |      "execution_count": 4
113 |     }
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "metadata": {
119 |     "colab": {
120 |      "base_uri": "https://localhost:8080/"
121 |     },
122 |     "id": "0xSyWtfBX-1w",
123 |     "outputId": "5bf23c68-7295-433b-cfad-b21900cd2730"
124 |    },
125 |    "source": [
126 |     "mat_files = glob.glob(path + '/Normalized/**/*.mat', recursive=True)\n",
127 |     "mat_files.sort()\n",
128 |     "print(mat_files)"
129 |    ],
130 |    "execution_count": null,
131 |    "outputs": [
132 |     {
133 |      "output_type": "stream",
134 |      "text": [
135 |       "['/content/drive/MyDrive/EyeGaze/Normalized/p00/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p00/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day48.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day49.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day50.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day51.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day52.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day53.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day54.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day55.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day56.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day57.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day58.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day59.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day60.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day61.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day62.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day63.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day64.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day65.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day66.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day67.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day68.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p01/day69.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p02/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day48.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day49.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day50.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day51.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day52.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day53.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day54.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day55.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day56.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day57.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day58.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day59.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day60.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day61.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day62.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day63.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day64.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p03/day65.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p04/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p05/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day48.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day49.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day50.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day51.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day52.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day53.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day54.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day55.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day56.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day57.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day58.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day59.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day60.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day61.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p06/day62.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day48.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day49.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day50.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day51.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day52.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day53.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day54.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day55.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p07/day56.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day21.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day22.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day23.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day24.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day25.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day26.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day27.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day28.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day29.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day30.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day31.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day32.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day33.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day34.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day35.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day36.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day37.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day38.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day39.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day40.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day41.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day42.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day43.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day44.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day45.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day46.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p08/day47.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p09/day20.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p10/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day13.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day14.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day15.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day16.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day17.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day18.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p11/day19.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p12/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day07.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day08.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day09.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day10.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day11.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p13/day12.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day01.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day02.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day03.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day04.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day05.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day06.mat', '/content/drive/MyDrive/EyeGaze/Normalized/p14/day07.mat']\n"
136 |      ],
137 |      "name": "stdout"
138 |     }
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "metadata": {
144 |     "id": "7oL7zkFRYf9z"
145 |    },
146 |    "source": [
147 |     "from scipy.io import loadmat\n",
148 |     "def read_eye_data(mat):\n",
149 |     "  '''\n",
150 |     "  read each mat file info \n",
151 |     "  '''\n",
152 |     "  mat_data = loadmat(mat)\n",
153 |     "  right_info = mat_data['data']['right'][0, 0]\n",
154 |     "  gaze = right_info['gaze'][0, 0]\n",
155 |     "  image = right_info['image'][0, 0]\n",
156 |     "  pose = right_info['pose'][0, 0]\n",
157 |     "  return gaze, image, pose\n",
158 |     "\n",
159 |     "def collect_data_from_mat(path):\n",
160 |     "  '''\n",
161 |     "  collect data from annotation part\n",
162 |     "  :param path: path of normalized data \n",
163 |     "  :return:  list of index, image, pose, gaze\n",
164 |     "  '''\n",
165 |     "  mat_files = glob.glob(path + '/Normalized/**/*.mat', recursive = True)\n",
166 |     "  mat_files.sort()\n",
167 |     "  # dict to store\n",
168 |     "  gaze = list()\n",
169 |     "  image = list()\n",
170 |     "  index = list()\n",
171 |     "  pose = list()\n",
172 |     "  # X: image, head_pose \n",
173 |     "  # y: gaze vector\n",
174 |     "  # index: pnum, pday\n",
175 |     "  for matfile in tqdm(mat_files):\n",
176 |     "    pnum = matfile.split('/')[-2]  # pxx\n",
177 |     "    pday = matfile.split('/')[-1].split('.')[0] # day0x\n",
178 |     "    index.append(pnum + '/' + pday)\n",
179 |     "    \n",
180 |     "    fgaze, fimage, fpose = read_eye_data(matfile)\n",
181 |     "\n",
182 |     "    if gaze == []:\n",
183 |     "      gaze = fgaze\n",
184 |     "      image = fimage\n",
185 |     "      pose = fpose\n",
186 |     "    else:\n",
187 |     "      gaze = np.append(gaze, fgaze, axis = 0)\n",
188 |     "      image = np.append(image, fimage, axis = 0)\n",
189 |     "      pose = np.append(pose, fpose, axis = 0)\n",
190 |     "\n",
191 |     "  return gaze, image, pose, index "
192 |    ],
193 |    "execution_count": null,
194 |    "outputs": []
195 |   },
196 |   {
197 |    "cell_type": "code",
198 |    "metadata": {
199 |     "id": "qZ3oEjqY40B8",
200 |     "colab": {
201 |      "base_uri": "https://localhost:8080/"
202 |     },
203 |     "outputId": "32c60ac0-5867-416a-885d-02212d5090eb"
204 |    },
205 |    "source": [
206 |     "gaze, image, pose, index = collect_data_from_mat(path)"
207 |    ],
208 |    "execution_count": null,
209 |    "outputs": [
210 |     {
211 |      "output_type": "stream",
212 |      "text": [
213 |       "  0%|          | 0/521 [00:00<?, ?it/s]/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:36: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.\n",
214 |       "100%|██████████| 521/521 [00:55<00:00,  9.40it/s]\n"
215 |      ],
216 |      "name": "stderr"
217 |     }
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "metadata": {
223 |     "colab": {
224 |      "base_uri": "https://localhost:8080/"
225 |     },
226 |     "id": "2zN_XRE-r02-",
227 |     "outputId": "676c254d-1e21-4585-918a-1fbf5f1a3909"
228 |    },
229 |    "source": [
230 |     "print(\"gaze vector shape\")\n",
231 |     "print(gaze.shape)\n",
232 |     "print(\"pose vector shape\")\n",
233 |     "print(pose.shape)\n",
234 |     "print(\"image vector shape\")\n",
235 |     "print(image.shape)"
236 |    ],
237 |    "execution_count": null,
238 |    "outputs": [
239 |     {
240 |      "output_type": "stream",
241 |      "text": [
242 |       "gaze vector shape\n",
243 |       "(213658, 3)\n",
244 |       "pose vector shape\n",
245 |       "(213658, 3)\n",
246 |       "image vector shape\n",
247 |       "(213658, 36, 60)\n"
248 |      ],
249 |      "name": "stdout"
250 |     }
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "markdown",
255 |    "metadata": {
256 |     "id": "Pp4H16Fp4QBE"
257 |    },
258 |    "source": [
259 |     "From the above data processing, we can know the dimension and size of the vector.\n",
260 |     "Now the gaze vector and pose vector are all 3 dimension, while the eye image (we only process one eye in this training task) is a 36 * 60 pixels grey image.\n",
261 |     "I'll print an example of the data:"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "metadata": {
267 |     "colab": {
268 |      "base_uri": "https://localhost:8080/",
269 |      "height": 104
270 |     },
271 |     "id": "LwBS1dNJoaEk",
272 |     "outputId": "1b9ea6e1-df36-430c-bae4-045160f8b285"
273 |    },
274 |    "source": [
275 |     "idx = 0\n",
276 |     "print(index[idx])\n",
277 |     "print(pose[idx])\n",
278 |     "print(gaze[idx])\n",
279 |     "Image.fromarray(image[idx].reshape((36, 60)))"
280 |    ],
281 |    "execution_count": null,
282 |    "outputs": [
283 |     {
284 |      "output_type": "stream",
285 |      "text": [
286 |       "p00/day01\n",
287 |       "[-0.15190792  0.24172508  0.01848624]\n",
288 |       "[-0.12202543  0.09011014 -0.98842802]\n"
289 |      ],
290 |      "name": "stdout"
291 |     },
292 |     {
293 |      "output_type": "execute_result",
294 |      "data": {
295 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAADwAAAAkCAAAAADwEgZYAAAFrUlEQVR4nCXTwXIcSREG4L8ys6q6e6ZnJNnSygaWA0EQeyAgOHLZM09MBA+wPAEQmLDB3pVlW5Y0Gk1PT1dlZnHYR/gOX/hhvV4lodAcqu5WHObemnk1hZrXpVCtZbefis4z4DAwSLrUyXo7SgqtucGbqbm7way1ZgY1r0axIpCsYi3duJihAky8ySxjzkTNvcHUatXgbu7QZurmXquZWwUyQEAW1ZJg4BxTlJUEOFyb1aqu3qo6mmtb3N2suNXZAdiibo4IEYVxXnFMIpmsBTdTb25erFRUrSfTplIng04OELAYrAKAQFMHT2ChZh6amZrW6jpPy+k0y83BPIJLZdSTGTgDAKACgQiIYxQRVgoOs6bz8bDU05cdprJ9fF+hO9n0eu+SmYBMII5srJDkTAyIBFggbT4fD8+7u9uSTuOmuzqrrbfj6XD7xVzL5IQUA1Ik4simHTEAiItXq8fp+Wn/r+cB/Oq6H7sWVGtb55zPr053T4epFGZJyhgExqyESARB9XY6PT8+/PMrxt+dpW1s2nELuIcFjrG3M1A8iJppIjlyTphTz0zciVe06f7h7uHV71+slUKTHFRR5qlaOU77cqyc3aXoUgr3MFtyjxoRuySOOu8+/p2+/1aIrKXojv3Rzaf3WOZ6OhbzCgGESillJMkVZojGUawe7t/98Pr7X7NqA0XU4+4w7T5M5Wnb3z8iQwErjSEkx7LPKwsA0ogYpMwPH//627+8DkYd6TSXp8efbj89Ho+LFXNKfQeJgLmqIEHtRKwEYJWz6N3HN3/68yUCvC7Hp8fb9z9+Lo+zGhoABGLhLAMWBpBkMSsyRO5iFJbH+1v+43Wy01J2z3dv390/Pp1UvVUHyImEKpZwiGABAAbgyNuhj0zycLN893pVnpbD45f//efx81RUrbRAbgAoaI9goSkRFwrMLATv+vWq77IcPv/qepgOT8+ffvzp88PueHKtIIMpADgBMRNg7r4IMzM6B4+X6xcUZLq4Hubj/vHD25+eDofqcAMI7k6AEsjRjaCnYt60cmQFaPfL65CI5EO3avvjzdt/f9yZIgvcHYQGAjkBCDSOCOm0X6BuGnghSzVxDCyI+fDp9s3bL8caw3p9QiVt5uoEhxPQqgxJBYsD1iwY0rz/bGdoWbpv6vO79//4sUoY/tDtD8fGUHcL9nMctuZpM6NmA8jJ3dWONzdX0ZIwTZ/evrszisNF2V7fIIAABAQAJJHYZ5bkS4KRwc1BXvanpLmTPH198/6DUhzOt5vxJd9NBrfyMzllaRbMFESi41JGmnuCy9C1lHrx+68fv7qENA4XLy/6q8uvi0kjCIGCoAXmAcUWMGE1Dj0Nad7n1y+s75LY7c2XOfEwjKvNahXj+XgoUQUUGhMBCIxZsXgQUHc2AquL86vfXHWrvpevn3b+TUk+bC6jQn1Yx9T6qoJqACCSqYAjYeXU9/2AuOpfbc/GOGRZrI894jFeXkMmwuWLL6fqiYAMBphX5+sxdsiJOyCLX77itr5Y5Y00wbmef3v/6QVGXGDmOg/rY/EWHQEk/fbyIiZZJWKJALqOhsbrTV4NkV2+w2rdPb0+TJDQ+sJRtg/E5mn7YjNeXL2Mu/mEThBFNMecidN6M/QxU2vhb13mOh+fF+lQYYrlgI/z+S/6GNe2n2vVqXLfqUqOKXJM3Xbb59h1oXl4x8eTtKBzocnbtK+bUIrF2GIth4UFwgbVjqlPOUjq15shpRyptSa9dSmk1ui0P6Xi9fm50xmE3LsJZIgNWrsUJIgID6uh73OWFOCAcOu7VJrV2J89TyeX5b/PxmfbVR9Djl60VfRdBDgK82YcEuckBPOAsLMYEZrVMh2EpO53xxO6QZhqOQFFW5eIgUCJpV9nkSFxQNMGSG5MCI1gXUJzjENhFAUcgRzSpRQBVM4c8ypFYeEAtNBa+D9B15yqh72t2wAAAABJRU5ErkJggg==\n",
296 |       "text/plain": [
297 |        "<PIL.Image.Image image mode=L size=60x36 at 0x7F669ED38E50>"
298 |       ]
299 |      },
300 |      "metadata": {
301 |       "tags": []
302 |      },
303 |      "execution_count": 9
304 |     }
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "metadata": {
310 |     "id": "vU4hXiBtfS2r"
311 |    },
312 |    "source": [
313 |     "gaze(x, y, z) should be normalized 3D **vector** of the eye. x axis is parallel to the line that connects the midpoint of two eyes.\n",
314 |     "To simpify our CNN model, there's no need for us to use a 3D vector which might be more complicated, so the author just use two crutial angles to denote the gaze direction. \n",
315 |     "\n",
316 |     "pose(a, b, c) represents the rotation(**angle**) of the head. When dealing with the 3D transformation, the rotation matrix is usually used, but the rotation transformation actually only has three degrees of freedom, which is more concise when expressed in a rotation vector. Therefore, it is necessary to realize the mutual conversion between the rotation vector and the rotation matrix."
317 |    ]
318 |   },
319 |   {
320 |    "cell_type": "markdown",
321 |    "metadata": {
322 |     "id": "dalKmHvrQw45"
323 |    },
324 |    "source": [
325 |     "### 2. Dimension transformation\n",
326 |     "In this part, we should change pose and gaze vector from 3D to 2D. The suggested code is shown on the dataset website."
327 |    ]
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "metadata": {
332 |     "id": "lvNfmin0GZA-"
333 |    },
334 |    "source": [
335 |     "import cv2 as cv\n",
336 |     "def pose3D_to_2D(pose):\n",
337 |     "  '''\n",
338 |     "    pose (a, b, c) is rotation (angle)\n",
339 |     "    M = Rodrigues((x,y,z))\n",
340 |     "    Zv = (the third column of M)\n",
341 |     "    theta = asin(Zv[1])\n",
342 |     "    phi = atan2(Zv[0], Zv[2])\n",
343 |     "  '''\n",
344 |     "  M, _ = cv.Rodrigues(np.array(pose).astype(np.float32))\n",
345 |     "  vec = M[:, 2]\n",
346 |     "  phi = np.arctan2(vec[0], vec[2])\n",
347 |     "  theta = np.arcsin(vec[1])\n",
348 |     "  return np.array([theta, phi])\n",
349 |     "\n",
350 |     "def gaze3D_to_2D(gaze):\n",
351 |     "  '''\n",
352 |     "    gaze (x, y, z) is direction\n",
353 |     "    theta = asin(-y)\n",
354 |     "    phi = atan2(-x, -z)\n",
355 |     "  '''\n",
356 |     "  x, y, z = (gaze[i] for i in range(3))\n",
357 |     "  theta = np.arcsin(-y)\n",
358 |     "  phi = np.arctan2(-x, -z)\n",
359 |     "  return np.stack((theta, phi)).T"
360 |    ],
361 |    "execution_count": null,
362 |    "outputs": []
363 |   },
364 |   {
365 |    "cell_type": "code",
366 |    "metadata": {
367 |     "id": "TjF79qRzn2t2"
368 |    },
369 |    "source": [
370 |     "pose2d = []\n",
371 |     "gaze2d = []\n",
372 |     "for i in np.arange(0, len(gaze), 1):\n",
373 |     "  pose2d.append(pose3D_to_2D(pose[i]))\n",
374 |     "  gaze2d.append(gaze3D_to_2D(gaze[i]))\n",
375 |     "\n",
376 |     "poses = np.array(pose2d)\n",
377 |     "gazes = np.array(gaze2d)"
378 |    ],
379 |    "execution_count": null,
380 |    "outputs": []
381 |   },
382 |   {
383 |    "cell_type": "code",
384 |    "metadata": {
385 |     "id": "xBsMSwNdyk7m"
386 |    },
387 |    "source": [
388 |     "img_train, img_test, pose_train, pose_test, gaze_train, gaze_test = train_test_split(\n",
389 |     "    image, gazes, poses, test_size = 0.33, random_state = 0\n",
390 |     ")"
391 |    ],
392 |    "execution_count": null,
393 |    "outputs": []
394 |   },
395 |   {
396 |    "cell_type": "code",
397 |    "metadata": {
398 |     "colab": {
399 |      "base_uri": "https://localhost:8080/"
400 |     },
401 |     "id": "xYoWc3iQ8ILi",
402 |     "outputId": "f62ded32-3d6c-4bc3-fa77-d93853c68ba8"
403 |    },
404 |    "source": [
405 |     "print(img_train[1])\n",
406 |     "print(img_test.shape)\n",
407 |     "print(pose_train[1])\n",
408 |     "print(\"training data size:\",end=\"\")\n",
409 |     "print(pose_train.shape[0])\n",
410 |     "print(\"test data size:\", end=\"\")\n",
411 |     "print(pose_test.shape[0])\n"
412 |    ],
413 |    "execution_count": null,
414 |    "outputs": [
415 |     {
416 |      "output_type": "stream",
417 |      "text": [
418 |       "[[255 255 254 ... 150 150 125]\n",
419 |       " [254 254 254 ... 125  95  95]\n",
420 |       " [254 254 254 ...  67  95  95]\n",
421 |       " ...\n",
422 |       " [250 250 250 ... 246 250 250]\n",
423 |       " [250 250 246 ... 242 246 246]\n",
424 |       " [252 252 246 ... 246 250 250]]\n",
425 |       "(70508, 36, 60)\n",
426 |       "[-0.30859049  0.16778534]\n",
427 |       "training data size:143150\n",
428 |       "test data size:70508\n"
429 |      ],
430 |      "name": "stdout"
431 |     }
432 |    ]
433 |   },
434 |   {
435 |    "cell_type": "markdown",
436 |    "metadata": {
437 |     "id": "u9XCCUpV6BZf"
438 |    },
439 |    "source": [
440 |     "### 3. Muti-modal CNN"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "markdown",
445 |    "metadata": {
446 |     "id": "Skgn_0IMtOfA"
447 |    },
448 |    "source": [
449 |     "Use the LeNet framework."
450 |    ]
451 |   },
452 |   {
453 |    "cell_type": "code",
454 |    "metadata": {
455 |     "id": "9jRZnuXStATC"
456 |    },
457 |    "source": [
458 |     "import torch\n",
459 |     "import torch.nn as nn\n",
460 |     "import torch.nn.functional as F\n",
461 |     "\n",
462 |     "\n",
463 |     "def initialize_weights(module):\n",
464 |     "    if isinstance(module, nn.Conv2d):\n",
465 |     "        nn.init.constant_(module.bias, 0)\n",
466 |     "    elif isinstance(module, nn.Linear):\n",
467 |     "        nn.init.xavier_uniform_(module.weight)\n",
468 |     "        nn.init.constant_(module.bias, 0)\n",
469 |     "\n",
470 |     "class Model(nn.Module):\n",
471 |     "    def __init__(self):\n",
472 |     "        super(Model, self).__init__()\n",
473 |     "\n",
474 |     "        self.conv1 = nn.Conv2d(1, 20, kernel_size=5, stride=1, padding=0)\n",
475 |     "        self.conv2 = nn.Conv2d(20, 50, kernel_size=5, stride=1, padding=0)\n",
476 |     "        self.fc1 = nn.Linear(3600, 500)  \n",
477 |     "        self.fc2 = nn.Linear(502, 2)\n",
478 |     "\n",
479 |     "        self._initialize_weight()\n",
480 |     "\n",
481 |     "    def _initialize_weight(self):\n",
482 |     "        nn.init.normal_(self.conv1.weight, mean=0, std=0.1)\n",
483 |     "        nn.init.normal_(self.conv2.weight, mean=0, std=0.01)\n",
484 |     "        self.apply(initialize_weights)\n",
485 |     "\n",
486 |     "    def forward(self, x, y):\n",
487 |     "        x = F.max_pool2d(self.conv1(x), kernel_size=2, stride=2)\n",
488 |     "        x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)\n",
489 |     "        x = F.relu(self.fc1(x.view(x.size(0), -1)), inplace=True) #flatten        \n",
490 |     "        x = torch.cat([x, y], dim=1)\n",
491 |     "        x = self.fc2(x)\n",
492 |     "        return x"
493 |    ],
494 |    "execution_count": null,
495 |    "outputs": []
496 |   },
497 |   {
498 |    "cell_type": "code",
499 |    "metadata": {
500 |     "colab": {
501 |      "base_uri": "https://localhost:8080/"
502 |     },
503 |     "id": "w1AbDx-Y_xTk",
504 |     "outputId": "e2df1a5c-f22d-4ea9-a84c-85d3306eddac"
505 |    },
506 |    "source": [
507 |     "GazeCNN = Model()\n",
508 |     "\n",
509 |     "optimizer = torch.optim.Adam(GazeCNN.parameters(), lr=0.0001)\n",
510 |     "criterion = torch.nn.SmoothL1Loss(reduction='mean')\n",
511 |     "\n",
512 |     "def batch_training(img, gaze, pose, j, bt):\n",
513 |     "  a = torch.randn(batch,1,36, 60)\n",
514 |     "  b = torch.randn(batch,2)\n",
515 |     "  c = torch.randn(batch,2)\n",
516 |     "  for i in range(batch):\n",
517 |     "    a[i, 0] = torch.tensor(img_train[j * bt + i])\n",
518 |     "    b[i] = torch.tensor(pose_train[j * bt + i])\n",
519 |     "    c[i] = torch.tensor(gaze_train[j * bt + i])\n",
520 |     "  return a, b, c\n",
521 |     "\n",
522 |     "def batch_test(img, gaze, pose, j, bt):\n",
523 |     "  a = torch.randn(batch,1,36, 60)\n",
524 |     "  b = torch.randn(batch,2)\n",
525 |     "  c = torch.randn(batch,2)\n",
526 |     "  for i in range(batch):\n",
527 |     "    a[i, 0] = torch.tensor(img_test[j * bt + i])\n",
528 |     "    b[i] = torch.tensor(pose_test[j * bt + i])\n",
529 |     "    c[i] = torch.tensor(gaze_test[j * bt + i])\n",
530 |     "  return a, b, c\n",
531 |     "\n",
532 |     "\n",
533 |     "for epoch in range(3):\n",
534 |     "  batch = 10\n",
535 |     "  for i in tqdm(range(14315)):\n",
536 |     "  # training data \n",
537 |     "    img = torch.randn(batch, 1, 36, 60)\n",
538 |     "    gaze = torch.randn(batch, 2)\n",
539 |     "    pose = torch.randn(batch, 2)\n",
540 |     "    img, gaze, pose = batch_training(img, gaze, pose, i, batch)\n",
541 |     "\n",
542 |     "    gaze_pred = GazeCNN(img, pose)\n",
543 |     "    loss = criterion(gaze_pred, gaze)\n",
544 |     "    loss.backward()\n",
545 |     "    optimizer.step()\n",
546 |     "\n",
547 |     "  timg, tgaze, tpose = batch_test(img, gaze, pose, 0, batch)\n",
548 |     "  gaze_pred = GazeCNN(timg, tpose)\n",
549 |     "  loss = criterion(gaze_pred, tgaze)\n",
550 |     "  print(\"epoch \", epoch, \", test loss:\", loss)\n",
551 |     "\n",
552 |     "  "
553 |    ],
554 |    "execution_count": null,
555 |    "outputs": [
556 |     {
557 |      "output_type": "stream",
558 |      "text": [
559 |       "100%|██████████| 14315/14315 [11:55<00:00, 20.00it/s]\n",
560 |       "  0%|          | 3/14315 [00:00<11:09, 21.37it/s]"
561 |      ],
562 |      "name": "stderr"
563 |     },
564 |     {
565 |      "output_type": "stream",
566 |      "text": [
567 |       "epoch  0 , test loss: tensor(0.0090, grad_fn=<SmoothL1LossBackward>)\n"
568 |      ],
569 |      "name": "stdout"
570 |     },
571 |     {
572 |      "output_type": "stream",
573 |      "text": [
574 |       "100%|██████████| 14315/14315 [11:29<00:00, 20.76it/s]\n",
575 |       "  0%|          | 2/14315 [00:00<12:19, 19.36it/s]"
576 |      ],
577 |      "name": "stderr"
578 |     },
579 |     {
580 |      "output_type": "stream",
581 |      "text": [
582 |       "epoch  1 , test loss: tensor(0.0084, grad_fn=<SmoothL1LossBackward>)\n"
583 |      ],
584 |      "name": "stdout"
585 |     },
586 |     {
587 |      "output_type": "stream",
588 |      "text": [
589 |       "100%|██████████| 14315/14315 [11:27<00:00, 20.82it/s]"
590 |      ],
591 |      "name": "stderr"
592 |     },
593 |     {
594 |      "output_type": "stream",
595 |      "text": [
596 |       "epoch  2 , test loss: tensor(0.0075, grad_fn=<SmoothL1LossBackward>)\n"
597 |      ],
598 |      "name": "stdout"
599 |     },
600 |     {
601 |      "output_type": "stream",
602 |      "text": [
603 |       "\n"
604 |      ],
605 |      "name": "stderr"
606 |     }
607 |    ]
608 |   },
609 |   {
610 |    "cell_type": "markdown",
611 |    "metadata": {
612 |     "id": "sPoFkEORJOrt"
613 |    },
614 |    "source": [
615 |     "epoch  0 , test loss: tensor(0.0096, grad_fn=<SmoothL1LossBackward>)\n",
616 |     "epoch  1 , test loss: tensor(0.0109, grad_fn=<SmoothL1LossBackward>)\n",
617 |     "epoch  2 , test loss: tensor(0.0088, grad_fn=<SmoothL1LossBackward>)\n",
618 |     "epoch  3 , test loss: tensor(0.0078, grad_fn=<SmoothL1LossBackward>)\n",
619 |     "epoch  4 , test loss: tensor(0.0082, grad_fn=<SmoothL1LossBackward>)\n",
620 |     "\n",
621 |     "> Indented block\n",
622 |     "\n"
623 |    ]
624 |   },
625 |   {
626 |    "cell_type": "markdown",
627 |    "metadata": {
628 |     "id": "83TYES7yWWiQ"
629 |    },
630 |    "source": [
631 |     "100%|██████████| 14315/14315 [11:25<00:00, 20.89it/s]\n",
632 |     "  0%|          | 3/14315 [00:00<10:48, 22.07it/s]epoch  0 , test loss: tensor(0.0099, grad_fn=<SmoothL1LossBackward>)\n",
633 |     "100%|██████████| 14315/14315 [10:59<00:00, 21.72it/s]\n",
634 |     "  0%|          | 3/14315 [00:00<11:02, 21.60it/s]epoch  1 , test loss: tensor(0.0086, grad_fn=<SmoothL1LossBackward>)\n",
635 |     "100%|██████████| 14315/14315 [10:59<00:00, 21.69it/s]\n",
636 |     "  0%|          | 3/14315 [00:00<11:02, 21.59it/s]epoch  2 , test loss: tensor(0.0082, grad_fn=<SmoothL1LossBackward>)\n",
637 |     "100%|██████████| 14315/14315 [11:05<00:00, 21.50it/s]\n",
638 |     "  0%|          | 3/14315 [00:00<11:36, 20.56it/s]epoch  3 , test loss: tensor(0.0077, grad_fn=<SmoothL1LossBackward>)\n",
639 |     "100%|██████████| 14315/14315 [11:02<00:00, 21.62it/s]\n",
640 |     "epoch  4 , test loss: tensor(0.0080, grad_fn=<SmoothL1LossBackward>)"
641 |    ]
642 |   },
643 |   {
644 |    "cell_type": "code",
645 |    "metadata": {
646 |     "colab": {
647 |      "base_uri": "https://localhost:8080/"
648 |     },
649 |     "id": "nwEw-taW56MT",
650 |     "outputId": "10271c1d-942d-44e8-f72c-95090dcdcdf8"
651 |    },
652 |    "source": [
653 |     "test_loss = 0\n",
654 |     "for i in tqdm(range(3000)):\n",
655 |     "  timg, tgaze, tpose = batch_test(img, gaze, pose, i, batch)\n",
656 |     "  gaze_pred = GazeCNN(timg, tpose)\n",
657 |     "  test_loss += criterion(gaze_pred, tgaze)\n",
658 |     "\n",
659 |     "print(test_loss / 3000)"
660 |    ],
661 |    "execution_count": null,
662 |    "outputs": [
663 |     {
664 |      "output_type": "stream",
665 |      "text": [
666 |       "100%|██████████| 3000/3000 [01:02<00:00, 48.18it/s]"
667 |      ],
668 |      "name": "stderr"
669 |     },
670 |     {
671 |      "output_type": "stream",
672 |      "text": [
673 |       "tensor(0.0081, grad_fn=<DivBackward0>)\n"
674 |      ],
675 |      "name": "stdout"
676 |     },
677 |     {
678 |      "output_type": "stream",
679 |      "text": [
680 |       "\n"
681 |      ],
682 |      "name": "stderr"
683 |     }
684 |    ]
685 |   },
686 |   {
687 |    "cell_type": "markdown",
688 |    "metadata": {
689 |     "id": "v1QDfOjzjKmA"
690 |    },
691 |    "source": [
692 |     "probLems:\n",
693 |     "1. server crashed when processing test datasets\n",
694 |     "\n",
695 |     "2. measurement on loss/ accuracy -- how to judge my model\n"
696 |    ]
697 |   },
698 |   {
699 |    "cell_type": "markdown",
700 |    "metadata": {
701 |     "id": "Syp3TkADvPLG"
702 |    },
703 |    "source": [
704 |     "### References\n",
705 |     "1. https://www.mpi-inf.mpg.de/de/departments/computer-vision-and-machine-learning/research/gaze-based-human-computer-interaction/appearance-based-gaze-estimation-in-the-wild\n",
706 |     "\n",
707 |     "2. Y. Sugano, Y. Matsushita, and Y. Sato. Learning-by-synthesis for appearance-based 3d gaze estimation. In Computer Vision and Pattern Recognition (CVPR), 2014 IEEE Conference on, pages 1821–1828. IEEE, 2014.\n",
708 |     "\n",
709 |     "3. \n"
710 |    ]
711 |   }
712 |  ]
713 | }


--------------------------------------------------------------------------------