├── utils ├── __init__.py ├── model_utils.py ├── img_utils.py ├── utils.py └── infer_engine.py ├── dataset ├── __init__.py └── gfie.py ├── gfiemodel ├── __init__.py ├── gfienet.py ├── gfiemodule.py └── resnet.py ├── config ├── __init__.py ├── cad120evaluation.yaml ├── gfiebenchmark.yaml └── default.py ├── download_cad120.sh ├── download_gfie.sh ├── tester.py ├── main.py ├── README.md ├── inference.py ├── gfie.yaml ├── trainer.py └── LICENSE /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gfiemodel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /config/__init__.py: -------------------------------------------------------------------------------- 1 | from .default import _C as cfg -------------------------------------------------------------------------------- /download_cad120.sh: -------------------------------------------------------------------------------- 1 | mkdir CAD120_dataset 2 | cd CAD120_dataset 3 | 4 | # download the rgb file 5 | wget -c -t 10 https://airobot.nankai.edu.cn/rgb.zip 6 | 7 | 7z x rgb.zip 8 | 9 | # download the depth file 10 | wget -c -t 10 https://airobot.nankai.edu.cn/depth.zip 11 | 12 | 7z x depth.zip 13 | 14 | # download the aux file 15 | wget -c -t 10 https://airobot.nankai.edu.cn/cad120_annotation_CameraKinect.zip 16 | 17 | 7z x cad120_annotation_CameraKinect.zip 18 | 19 | -------------------------------------------------------------------------------- /config/cad120evaluation.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | 3 | root_dir: "/home/hzx/Dataset/CAD120" 4 | rgb: "rgb" 5 | depth: "depth" 6 | camerapara: "CameraKinect.npy" 7 | 8 | test: "annotation.txt" 9 | 10 | 11 | test_batch_size: 64 12 | 13 | TRAIN: 14 | lr: 1e-4 15 | weightDecay: 1e-4 16 | 17 | resume: False 18 | resume_add: "" 19 | 20 | store: "/extend/hzx/Model_para/GFIEpub_model/" 21 | 22 | save_intervel: 3 23 | 24 | end_epoch: 18 25 | 26 | OTHER: 27 | 28 | lossrec_every: 100 29 | 30 | evalrec_every: 1000 31 | 32 | cpkt: "/extend/hzx/Model_para/GFIEpub_model/gfiemodel_11epoch.pth.tar" -------------------------------------------------------------------------------- /config/gfiebenchmark.yaml: -------------------------------------------------------------------------------- 1 | DATASET: 2 | 3 | root_dir: "/home/hzx/Dataset/GFIE_dataset" 4 | rgb: "rgb" 5 | depth: "depth" 6 | camerapara: "CameraKinect.npy" 7 | 8 | train: "train_annotation.txt" 9 | valid: "valid_annotation.txt" 10 | test: "test_annotation.txt" 11 | 12 | 13 | train_batch_size: 32 14 | 15 | MODEL: 16 | backboneptpath: "/extend/hzx/Model_para/ptbackbone.pt" 17 | 18 | TRAIN: 19 | lr: 1e-4 20 | weightDecay: 1e-4 21 | 22 | resume: False 23 | resume_add: "" 24 | 25 | store: "/extend/hzx/Model_para/GFIEpub_model/" 26 | 27 | save_intervel: 1 28 | 29 | end_epoch: 20 30 | 31 | OTHER: 32 | 33 | lossrec_every: 100 34 | 35 | evalrec_every: 1000 36 | 37 | cpkt: "/extend/hzx/Model_para/GFIEpub_model/gfiemodel_11epoch.pth.tar" -------------------------------------------------------------------------------- /download_gfie.sh: -------------------------------------------------------------------------------- 1 | root_url="https://airobot.nankai.edu.cn/" 2 | 3 | mkdir GFIE_dataset 4 | cd GFIE_dataset 5 | 6 | #Download the rgb file of GFIE 7 | 8 | for i in $(seq 1 32) 9 | do 10 | file_id=`echo $i | awk '{printf("%03d\n",$0)}'` 11 | wget -c -t 10 $root_url"rgb"$file_id".zip"; 12 | done 13 | 14 | 15 | for i in $(seq 1 32) 16 | do 17 | file_id=`echo $i | awk '{printf("%03d\n",$0)}'` 18 | org_file_name="rgb"$file_id".zip" 19 | replace_file_name="rgb.zip."$file_id 20 | echo "replace the zip file name: "$org_file_name" "$replace_file_name 21 | mv $org_file_name $replace_file_name 22 | done 23 | 24 | 7z x rgb.zip.001 25 | 26 | rm -rf rgb.zip.* 27 | 28 | # Download the depth file of GFIE 29 | 30 | for i in $(seq 1 35) 31 | do 32 | file_id=`echo $i | awk '{printf("%03d\n",$0)}'` 33 | wget -c -t 10 $root_url"depth"$file_id".zip"; 34 | done 35 | 36 | for i in $(seq 1 35) 37 | do 38 | file_id=`echo $i | awk '{printf("%03d\n",$0)}'` 39 | org_file_name="depth"$file_id".zip" 40 | replace_file_name="depth.zip."$file_id 41 | echo "replace the zip file name: "$org_file_name" "$replace_file_name 42 | mv $org_file_name $replace_file_name 43 | done 44 | 45 | 7z x depth.zip.001 46 | 47 | rm -rf depth.zip.* 48 | 49 | 7z x depth.zip 50 | 51 | # download the aux file 52 | wget -c -t 10 https://airobot.nankai.edu.cn/annotation_CameraKinect.zip 53 | 54 | 7z x annotation_CameraKinect.zip 55 | 56 | -------------------------------------------------------------------------------- /gfiemodel/gfienet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from gfiemodel.gfiemodule import PSFoVModule,EGDModule,GGHModule 4 | 5 | class GFIENet(nn.Module): 6 | 7 | def __init__(self,pretrained=False): 8 | 9 | super(GFIENet,self).__init__() 10 | 11 | self.psfov_module=PSFoVModule() 12 | self.egd_module=EGDModule() 13 | self.ggh_module=GGHModule(pretrained) 14 | 15 | 16 | def forward(self,simg,himg,headloc,matrix_T): 17 | """ 18 | Args: 19 | simg: scene image 20 | himg: cropped head image 21 | headloc: mask representing the position of the head in scene image 22 | matrix_T: matrix_T: unprojected coordiantes represent by matrix T 23 | Returns: 24 | pred_heatmap: A heatmap representing a 2D gaze target 25 | """ 26 | 27 | 28 | # Estimate gaze direction 29 | pred_gazevector=self.egd_module(himg) 30 | 31 | # Perceive Stero FoV 32 | Stereo_Fov=self.psfov_module(matrix_T,pred_gazevector,simg.shape) 33 | 34 | # Generate gaze heatmap 35 | pred_heatmap=self.ggh_module(simg,Stereo_Fov,headloc) 36 | 37 | return {"pred_heatmap":pred_heatmap, 38 | "pred_gazevector":pred_gazevector, 39 | "Stereo_Fov":Stereo_Fov} 40 | 41 | if __name__ == '__main__': 42 | 43 | rgbimg=torch.randn((4,3,224,224)) 44 | 45 | faceimg=torch.randn((4,3,224,224)) 46 | 47 | head_location=torch.randn((4,1,224,224)) 48 | 49 | gaze_vs=torch.randn((4,224,224,3)) 50 | 51 | print(rgbimg.shape) 52 | 53 | testnet=GFIENet() 54 | 55 | output=testnet(rgbimg,faceimg,head_location,gaze_vs) 56 | 57 | print(output["pred_gazevector"].shape) 58 | 59 | -------------------------------------------------------------------------------- /config/default.py: -------------------------------------------------------------------------------- 1 | from yacs.config import CfgNode as CN 2 | 3 | # ----------------------------------------------------------------------------- 4 | # Default Config definition 5 | # ----------------------------------------------------------------------------- 6 | 7 | _C=CN() 8 | 9 | # ----------------------------------------------------------------------------- 10 | # Dataset Path 11 | # ----------------------------------------------------------------------------- 12 | 13 | _C.DATASET=CN() 14 | 15 | # dataset dir 16 | _C.DATASET.root_dir = "" 17 | _C.DATASET.rgb ="" 18 | _C.DATASET.depth ="" 19 | _C.DATASET.camerapara ="" 20 | 21 | _C.DATASET.train = "" 22 | _C.DATASET.valid="" 23 | _C.DATASET.test="" 24 | 25 | 26 | # dataset loader 27 | _C.DATASET.load_workers=24 28 | _C.DATASET.train_batch_size=64 29 | _C.DATASET.valid_batch_size=64 30 | _C.DATASET.test_batch_size=64 31 | 32 | 33 | # ----------------------------------------------------------------------------- 34 | # Model 35 | # ----------------------------------------------------------------------------- 36 | _C.MODEL=CN() 37 | 38 | # pre-trained parameters 39 | _C.MODEL.backboneptpath="" 40 | 41 | 42 | # ----------------------------------------------------------------------------- 43 | # Training 44 | # ----------------------------------------------------------------------------- 45 | # model optimizer and criterion type and relative parameters 46 | _C.TRAIN=CN() 47 | 48 | 49 | _C.TRAIN.criterion="mixed" 50 | _C.TRAIN.optimizer="adam" 51 | 52 | _C.TRAIN.lr=3.5e-4 53 | _C.TRAIN.weightDecay=1e-4 54 | 55 | 56 | _C.TRAIN.start_epoch=0 57 | _C.TRAIN.end_epoch=200 58 | 59 | # model save interval and address 60 | _C.TRAIN.store='' 61 | _C.TRAIN.save_intervel=1 62 | 63 | # model resume 64 | _C.TRAIN.resume=False 65 | _C.TRAIN.resume_add='' 66 | 67 | 68 | # input and output resolution 69 | _C.TRAIN.input_size=224 70 | _C.TRAIN.output_size=64 71 | 72 | 73 | # ----------------------------------------------------------------------------- 74 | # Other Default 75 | # ----------------------------------------------------------------------------- 76 | _C.OTHER=CN() 77 | 78 | _C.OTHER.seed=235 79 | # if gpu is used 80 | _C.OTHER.device='cpu' 81 | 82 | _C.OTHER.cpkt="" 83 | 84 | # log for tensorboardx 85 | _C.OTHER.logdir='./logs' 86 | 87 | _C.OTHER.global_step=0 88 | 89 | _C.OTHER.lossrec_every=10 90 | 91 | _C.OTHER.evalrec_every=600 92 | 93 | -------------------------------------------------------------------------------- /tester.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from utils.utils import AverageMeter,MovingAverageMeter,euclid_dist,visualized,cosine_sim 5 | from tqdm import tqdm 6 | 7 | class Tester(object): 8 | 9 | def __init__(self,model,criterion,testloader,opt,writer=None): 10 | 11 | self.model=model 12 | self.criterion=criterion 13 | 14 | self.testloader=testloader 15 | 16 | self.test_dist=AverageMeter() 17 | self.test_cosine=AverageMeter() 18 | 19 | self.device=torch.device(opt.OTHER.device) 20 | 21 | self.opt=opt 22 | self.writer=writer 23 | 24 | @torch.no_grad() 25 | def test(self,epoch,opt): 26 | 27 | self.model.eval() 28 | 29 | self.test_dist.reset() 30 | self.test_cosine.reset() 31 | 32 | loader_capacity=len(self.testloader) 33 | pbar=tqdm(total=loader_capacity) 34 | 35 | for i,data in enumerate(self.testloader,0): 36 | 37 | 38 | x_simg, x_himg, x_hc = data["simg"], data["himg"], data["headloc"] 39 | 40 | x_matrixT=data["matrixT"] 41 | 42 | gaze_vector=data["gaze_vector"] 43 | gaze_target2d = data["gaze_target2d"] 44 | 45 | x_simg=x_simg.to(self.device) 46 | x_himg=x_himg.to(self.device) 47 | x_hc=x_hc.to(self.device) 48 | x_matrixT=x_matrixT.to(self.device) 49 | 50 | inputs_size=x_simg.size(0) 51 | 52 | outs=self.model(x_simg, x_himg, x_hc,x_matrixT) 53 | 54 | 55 | pred_heatmap=outs['pred_heatmap'] 56 | pred_heatmap=pred_heatmap.squeeze(1) 57 | pred_heatmap=pred_heatmap.data.cpu().numpy() 58 | 59 | pred_gazevector=outs['pred_gazevector'] 60 | pred_gazevector=pred_gazevector.data.cpu().numpy() 61 | 62 | 63 | gaze_vector=gaze_vector.numpy() 64 | 65 | 66 | distval = euclid_dist(pred_heatmap, gaze_target2d) 67 | cosineval=cosine_sim(pred_gazevector,gaze_vector) 68 | 69 | self.test_dist.update(distval,inputs_size) 70 | self.test_cosine.update(cosineval,inputs_size) 71 | 72 | pbar.set_postfix(dist=self.test_dist.avg, 73 | cosine=self.test_cosine.avg) 74 | pbar.update(1) 75 | 76 | pbar.close() 77 | 78 | if self.writer is not None: 79 | 80 | self.writer.add_scalar("Eval dist", self.test_dist.avg, global_step=opt.OTHER.global_step) 81 | self.writer.add_scalar("Eval cosine", self.test_cosine.avg, global_step=opt.OTHER.global_step) 82 | 83 | return self.test_dist.avg,self.test_cosine.avg -------------------------------------------------------------------------------- /utils/model_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import os 5 | 6 | from gfiemodel.gfienet import GFIENet 7 | 8 | def weights_init(m): 9 | classname=m.__class__.__name__ 10 | if classname.find('Conv')!=-1 : 11 | nn.init.kaiming_normal_(m.weight.data) 12 | elif classname.find('Linear')!=-1: 13 | nn.init.xavier_normal_(m.weight) 14 | nn.init.constant_(m.bias,0) 15 | elif classname.find('BatchNorm') != -1: 16 | m.weight.data.fill_(1.) 17 | m.bias.data.zero_() 18 | 19 | def init_model(opt): 20 | 21 | 22 | # encoder in GGH module pretrained on ImageNet 23 | model=GFIENet(pretrained=True) 24 | 25 | # backbone in EGD module pretrained on Gaze360 26 | model.egd_module.backbone.load_state_dict(torch.load(opt.MODEL.backboneptpath)) 27 | 28 | model.egd_module.fc.apply(weights_init) 29 | model.psfov_module.apply(weights_init) 30 | model.ggh_module.decoder.apply(weights_init) 31 | 32 | model=model.to(opt.OTHER.device) 33 | return model 34 | 35 | 36 | def setup_model(model,opt): 37 | 38 | 39 | if opt.TRAIN.criterion=="mixed": 40 | criterion=[nn.MSELoss(reduction='none'),nn.CosineSimilarity()] 41 | else: 42 | raise NotImplemented 43 | 44 | 45 | if opt.TRAIN.optimizer=="adam": 46 | 47 | optimizer=optim.Adam(model.parameters(), 48 | lr=opt.TRAIN.lr, 49 | weight_decay=opt.TRAIN.weightDecay) 50 | else: 51 | raise NotImplemented 52 | 53 | return criterion,optimizer 54 | 55 | 56 | def save_checkpoint(model,optimizer,best_error,best_flag,epoch,opt): 57 | 58 | cur_state={ 59 | 'epoch':epoch+1, 60 | 'state_dict':model.state_dict(), 61 | 'best_dist_err':best_error[0], 62 | 'best_cosine_err':best_error[1], 63 | 'optimizer':optimizer.state_dict() 64 | } 65 | 66 | epochnum=str(epoch) 67 | 68 | if best_flag: 69 | filename='gfiemodel'+'_'+'best.pth.tar' 70 | else: 71 | filename='gfiemodel'+'_'+epochnum+'epoch.pth.tar' 72 | 73 | torch.save(cur_state,os.path.join(opt.TRAIN.store,filename)) 74 | 75 | 76 | def resume_checkpoint(model,optimizer,opt): 77 | 78 | checkpoint=torch.load(opt.TRAIN.resume_add) 79 | opt.TRAIN.start_epoch=checkpoint['epoch'] 80 | best_dist_error=checkpoint['best_dist_err'] 81 | best_cosine_error=checkpoint['best_cosine_err'] 82 | model.load_state_dict(checkpoint['state_dict']) 83 | optimizer.load_state_dict(checkpoint['optimizer']) 84 | 85 | print("=> Loading checkpoint '{}' (epoch {})".format(opt.TRAIN.resume,opt.TRAIN.start_epoch)) 86 | 87 | return model, optimizer, best_dist_error,best_cosine_error, opt 88 | 89 | def init_checkpoint(model,opt): 90 | 91 | 92 | checkpoint=torch.load(opt.TRAIN.initmodel_add) 93 | 94 | model.load_state_dict(checkpoint['state_dict']) 95 | 96 | print("=> Loading init checkpoint ".format(opt.TRAIN.initmodel)) 97 | 98 | return model -------------------------------------------------------------------------------- /utils/img_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import torch 4 | 5 | def to_numpy(tensor): 6 | ''' tensor to numpy ''' 7 | if torch.is_tensor(tensor): 8 | return tensor.cpu().numpy() 9 | elif type(tensor).__module__ != 'numpy': 10 | raise ValueError("Cannot convert {} to numpy array" 11 | .format(type(tensor))) 12 | return tensor 13 | 14 | 15 | def to_torch(ndarray): 16 | if type(ndarray).__module__ == 'numpy': 17 | return torch.from_numpy(ndarray) 18 | elif not torch.is_tensor(ndarray): 19 | raise ValueError("Cannot convert {} to torch tensor" 20 | .format(type(ndarray))) 21 | return ndarray 22 | 23 | 24 | def get_head_box_channel(x_min, y_min, x_max, y_max, width, height, resolution, coordconv=False): 25 | head_box = np.array([x_min/width, y_min/height, x_max/width, y_max/height])*resolution 26 | head_box = head_box.astype(int) 27 | head_box = np.clip(head_box, 0, resolution-1) 28 | if coordconv: 29 | unit = np.array(range(0,resolution), dtype=np.float32) 30 | head_channel = [] 31 | for i in unit: 32 | head_channel.append([unit+i]) 33 | head_channel = np.squeeze(np.array(head_channel)) / float(np.max(head_channel)) 34 | head_channel[head_box[1]:head_box[3],head_box[0]:head_box[2]] = 0 35 | else: 36 | head_channel = np.zeros((resolution,resolution), dtype=np.float32) 37 | head_channel[head_box[1]:head_box[3],head_box[0]:head_box[2]] = 1 38 | head_channel = torch.from_numpy(head_channel) 39 | return head_channel 40 | 41 | def expand_head_box(hbbox,img_shape,k=0.1): 42 | 43 | x_min,y_min,x_max,y_max=hbbox 44 | width,height=img_shape 45 | 46 | x_min -= k * abs(x_max - x_min) 47 | y_min -= k * abs(y_max - y_min) 48 | x_max += k * abs(x_max - x_min) 49 | y_max += k * abs(y_max - y_min) 50 | 51 | return [x_min,y_min,x_max,y_max] 52 | 53 | def draw_labelmap(img, pt, sigma, type='Gaussian'): 54 | # Draw a 2D gaussian 55 | # Adopted from https://github.com/anewell/pose-hg-train/blob/master/src/pypose/draw.py 56 | img = to_numpy(img) 57 | 58 | # Check that any part of the gaussian is in-bounds 59 | ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)] 60 | br = [int(pt[0] + 3 * sigma + 1), int(pt[1] + 3 * sigma + 1)] 61 | if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or 62 | br[0] < 0 or br[1] < 0): 63 | # If not, just return the image as is 64 | return to_torch(img) 65 | 66 | # Generate gaussian 67 | size = 6 * sigma + 1 68 | x = np.arange(0, size, 1, float) 69 | y = x[:, np.newaxis] 70 | x0 = y0 = size // 2 71 | # The gaussian is not normalized, we want the center value to equal 1 72 | if type == 'Gaussian': 73 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 74 | elif type == 'Cauchy': 75 | g = sigma / (((x - x0) ** 2 + (y - y0) ** 2 + sigma ** 2) ** 1.5) 76 | 77 | # Usable gaussian range 78 | g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0] 79 | g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1] 80 | # Image range 81 | img_x = max(0, ul[0]), min(br[0], img.shape[1]) 82 | img_y = max(0, ul[1]), min(br[1], img.shape[0]) 83 | 84 | img[img_y[0]:img_y[1], img_x[0]:img_x[1]] += g[g_y[0]:g_y[1], g_x[0]:g_x[1]] 85 | if np.max(img)!=0: 86 | img = img/np.max(img) # normalize heatmap so it has max value of 1 87 | return to_torch(img) 88 | 89 | 90 | def multi_hot_targets(gaze_pts, out_res): 91 | w, h = out_res 92 | target_map = np.zeros((h, w)) 93 | for p in gaze_pts: 94 | if p[0] >= 0: 95 | x, y = map(int,[p[0]*w.float(), p[1]*h.float()]) 96 | x = min(x, w-1) 97 | y = min(y, h-1) 98 | target_map[y, x] = 1 99 | return target_map 100 | 101 | 102 | def argmax_pts(heatmap): 103 | 104 | idx=np.unravel_index(heatmap.argmax(),heatmap.shape) 105 | pred_y,pred_x=map(float,idx) 106 | 107 | return pred_x,pred_y 108 | 109 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.backends.cudnn as cudnn 3 | import argparse 4 | import os 5 | import shutil 6 | import sys 7 | import random 8 | import time 9 | import numpy as np 10 | from datetime import datetime 11 | 12 | from config import cfg 13 | from dataset.gfie import GFIELoader 14 | from utils.model_utils import init_model,setup_model,save_checkpoint,resume_checkpoint,init_checkpoint 15 | 16 | from trainer import Trainer 17 | from tester import Tester 18 | from tensorboardX import SummaryWriter 19 | 20 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 21 | 22 | 23 | def train_engine(opt): 24 | 25 | best_dist_error=sys.maxsize 26 | best_cosine_error=sys.maxsize 27 | # init gaze model 28 | gazemodel=init_model(opt) 29 | 30 | # set criterion and optimizer for gaze model 31 | criterion,optimizer=setup_model(gazemodel,opt) 32 | 33 | writer=False 34 | # create log dir for tensorboardx 35 | if writer is not None: 36 | opt.OTHER.logdir=os.path.join(opt.OTHER.logdir, 37 | datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) 38 | 39 | if os.path.exists(opt.OTHER.logdir): 40 | shutil.rmtree(opt.OTHER.logdir) 41 | os.makedirs(opt.OTHER.logdir) 42 | writer = SummaryWriter(opt.OTHER.logdir) 43 | 44 | 45 | 46 | # set random seed for reduce the randomness 47 | random.seed(opt.OTHER.seed) 48 | np.random.seed(opt.OTHER.seed) 49 | torch.manual_seed(opt.OTHER.seed) 50 | 51 | # reduce the randomness 52 | cudnn.benchmark = False 53 | cudnn.deterministic=True 54 | 55 | # resume the training or initmodel 56 | if opt.TRAIN.resume==True: 57 | 58 | if os.path.isfile(opt.TRAIN.resume_add): 59 | gazemodel, optimizer, best_dist_error,best_cosine_error, opt = resume_checkpoint(gazemodel, optimizer, opt) 60 | 61 | else: 62 | raise Exception("No such checkpoint file") 63 | 64 | 65 | dataloader=GFIELoader(opt) 66 | train_loader=dataloader.train_loader 67 | val_loader=dataloader.val_loader 68 | test_loader=dataloader.test_loader 69 | 70 | # init trainer and validator for gazemodel 71 | trainer=Trainer(gazemodel,criterion,optimizer,train_loader,val_loader,opt,writer=writer) 72 | 73 | tester=Tester(gazemodel,criterion,test_loader,opt,writer=writer) 74 | 75 | trainer.get_best_error(best_dist_error,best_cosine_error) 76 | 77 | optimizer.zero_grad() 78 | 79 | 80 | print("Total epoch:{}".format(opt.TRAIN.end_epoch)) 81 | 82 | 83 | for epoch in range(opt.TRAIN.start_epoch,opt.TRAIN.end_epoch): 84 | 85 | 86 | print("Epoch number:{} | Learning rate:{}\n".format(epoch,optimizer.param_groups[0]["lr"])) 87 | 88 | trainer.train(epoch, opt) 89 | 90 | # save the parameters of model 91 | if epoch%opt.TRAIN.save_intervel==0: 92 | 93 | valid_error=[trainer.eval_dist.avg,trainer.eval_cosine.avg] 94 | 95 | save_checkpoint(gazemodel,optimizer,valid_error,False,epoch,opt) 96 | 97 | # save the parameters of model with the best performance on valid dataset 98 | if trainer.best_flag: 99 | valid_error = [trainer.eval_dist.avg, trainer.eval_cosine.avg] 100 | 101 | save_checkpoint(gazemodel, optimizer, valid_error,trainer.best_flag, epoch, opt) 102 | 103 | 104 | time.sleep(0.03) 105 | 106 | dist_error,gaze_error=tester.test(epoch,opt) 107 | print("current error| L2 dist: {:.2f}/Gaze cosine {:.2f}".format(dist_error,gaze_error)) 108 | 109 | 110 | if __name__ == '__main__': 111 | 112 | parser = argparse.ArgumentParser( 113 | description="GFIE benchmark Model" 114 | ) 115 | 116 | parser.add_argument( 117 | "--cfg", 118 | default="config/gfiebenchmark.yaml", 119 | metavar="FILE", 120 | help="path to config file", 121 | type=str, 122 | ) 123 | parser.add_argument( 124 | "--gpu", 125 | action="store_true", 126 | default=True, 127 | help="choose if use gpus" 128 | ) 129 | parser.add_argument( 130 | "opts", 131 | help="Modify config options using the command-line", 132 | default=None, 133 | nargs=argparse.REMAINDER, 134 | ) 135 | 136 | args = parser.parse_args() 137 | 138 | cfg.merge_from_file(args.cfg) 139 | cfg.merge_from_list(args.opts) 140 | 141 | cfg.OTHER.device='cuda:0' if (torch.cuda.is_available() and args.gpu) else 'cpu' 142 | print("The model running on {}".format(cfg.OTHER.device)) 143 | 144 | train_engine(cfg) -------------------------------------------------------------------------------- /gfiemodel/gfiemodule.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from gfiemodel.resnet import resnet50 5 | 6 | 7 | class Encoder(nn.Module): 8 | ''' Encoder in the Module for Generating GazeHeatmap ''' 9 | 10 | def __init__(self,pretrained=False): 11 | 12 | super(Encoder,self).__init__() 13 | 14 | org_resnet=resnet50(pretrained) 15 | 16 | self.conv1=nn.Conv2d(6,64,kernel_size=7,stride=2,padding=3,bias=False) 17 | self.bn1=org_resnet.bn1 18 | self.relu=org_resnet.relu 19 | self.maxpool=org_resnet.maxpool 20 | self.layer1=org_resnet.layer1 21 | self.layer2=org_resnet.layer2 22 | self.layer3=org_resnet.layer3 23 | self.layer4=org_resnet.layer4 24 | 25 | def forward(self,x): 26 | 27 | x=self.conv1(x) 28 | x=self.bn1(x) 29 | x=self.relu(x) 30 | x=self.maxpool(x) 31 | x=self.layer1(x) 32 | x=self.layer2(x) 33 | x=self.layer3(x) 34 | x=self.layer4(x) 35 | 36 | return x 37 | 38 | class Decoder(nn.Module): 39 | ''' Decoder in the Module for Generating GazeHeatmap ''' 40 | 41 | 42 | def __init__(self): 43 | super(Decoder,self).__init__() 44 | 45 | self.relu=nn.ReLU(inplace=True) 46 | 47 | self.compress_conv1 = nn.Conv2d(2048, 1024, kernel_size=1, stride=1, padding=0, bias=False) 48 | self.compress_bn1 = nn.BatchNorm2d(1024) 49 | self.compress_conv2 = nn.Conv2d(1024, 512, kernel_size=1, stride=1, padding=0, bias=False) 50 | self.compress_bn2 = nn.BatchNorm2d(512) 51 | 52 | self.deconv1 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2) 53 | self.deconv_bn1 = nn.BatchNorm2d(256) 54 | self.deconv2 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2) 55 | self.deconv_bn2 = nn.BatchNorm2d(128) 56 | self.deconv3 = nn.ConvTranspose2d(128, 1, kernel_size=4, stride=2) 57 | self.deconv_bn3 = nn.BatchNorm2d(1) 58 | self.conv4 = nn.Conv2d(1, 1, kernel_size=1, stride=1) 59 | 60 | def forward(self,x): 61 | 62 | x = self.compress_conv1(x) 63 | x = self.compress_bn1(x) 64 | x = self.relu(x) 65 | x = self.compress_conv2(x) 66 | x = self.compress_bn2(x) 67 | x = self.relu(x) 68 | 69 | x = self.deconv1(x) 70 | x = self.deconv_bn1(x) 71 | x = self.relu(x) 72 | x = self.deconv2(x) 73 | x = self.deconv_bn2(x) 74 | x = self.relu(x) 75 | x = self.deconv3(x) 76 | x = self.deconv_bn3(x) 77 | x = self.relu(x) 78 | x = self.conv4(x) 79 | 80 | return x 81 | 82 | 83 | 84 | class EGDModule(nn.Module): 85 | ''' The Module for Estimating Gaze Direction ''' 86 | 87 | def __init__(self): 88 | 89 | super(EGDModule,self).__init__() 90 | 91 | org_resnet50= resnet50(pretrained=False) 92 | 93 | # backbone in the module 94 | self.backbone=nn.Sequential(*list(org_resnet50.children())[:-1]) 95 | 96 | self.fc=nn.Sequential(nn.Linear(2048,256), 97 | nn.ReLU(), 98 | nn.Linear(256,3)) 99 | 100 | def forward(self,himg): 101 | 102 | """ 103 | Args: 104 | himg: cropped head image 105 | Returns: 106 | gazevector: normalized gaze direction predicted by the module. 107 | """ 108 | headfeat=self.backbone(himg) 109 | headfeat=torch.flatten(headfeat,1) 110 | gazevector=self.fc(headfeat) 111 | gazevector=F.normalize(gazevector,p=2,dim=1) 112 | 113 | return gazevector 114 | 115 | class PSFoVModule(nn.Module): 116 | ''' The Module for Perceiving Stero FoV ''' 117 | 118 | def __init__(self): 119 | 120 | super(PSFoVModule, self).__init__() 121 | 122 | self.relu=nn.ReLU() 123 | self.alpha=3 124 | 125 | def forward(self,matrix_T,gazevector,simg_shape): 126 | """ 127 | Args: 128 | matrix_T: unprojected coordiantes represent by matrix T 129 | gazevector: normalized gaze direction predicted by the EGD module. 130 | simg_shape: the shape of the simg 131 | Returns: 132 | SFoVheatmap: F and F' in paper 133 | Notes: 134 | for convenience, depthmap->matrix_T (Eq. 3) in paper is implemented in the data processing. 135 | """ 136 | bs=matrix_T.shape[0] 137 | h,w=simg_shape[2:] 138 | 139 | gazevector=gazevector.unsqueeze(2) 140 | 141 | matrix_T=matrix_T.reshape([bs,-1,3]) 142 | 143 | F=torch.matmul(matrix_T,gazevector) 144 | F=F.reshape([bs,1,h,w]) 145 | 146 | F=self.relu(F) 147 | F_alpha=torch.pow(F,self.alpha) 148 | 149 | SFoVheatmap=[F,F_alpha] 150 | 151 | return SFoVheatmap 152 | 153 | class GGHModule(nn.Module): 154 | ''' The Module for Generating GazeHeatmap ''' 155 | 156 | def __init__(self,pretrained=False): 157 | 158 | super(GGHModule,self).__init__() 159 | 160 | self.encoder=Encoder(pretrained=pretrained) 161 | self.decoder=Decoder() 162 | 163 | def forward(self,simg,SFoVheatmap,headloc): 164 | """ 165 | Args: 166 | simg: scene image 167 | SFoVheatmap: Stereo FoV heatmap generated by the Module for Perceiving Stero FoV 168 | headloc: mask representing the position of the head in scene image 169 | Returns: 170 | gazeheatmap: A heatmap representing a 2D gaze target 171 | """ 172 | 173 | input=torch.cat([simg,headloc]+SFoVheatmap,dim=1) 174 | 175 | global_feat=self.encoder(input) 176 | 177 | gazeheatmap=self.decoder(global_feat) 178 | 179 | return gazeheatmap 180 | 181 | 182 | 183 | 184 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GFIE: A Dataset and Baseline for Gaze-Following from 2D to 3D in Indoor Environments 2 | 3 | This official repository is the pytorch implementation of our CVPR 2023 work: GFIE: A Dataset and Baseline for Gaze-Following from 2D to 3D in Indoor Environments 4 | 5 | [![](https://shields.io/badge/homepage-website-pink?logo=appveyor&style=for-the-badge)](https://sites.google.com/view/gfie) 6 | 7 | ## Prerequisite 8 | 1. Clone our repo 9 | ```bash 10 | git clone https://github.com/nkuhzx/GFIE 11 | ``` 12 | 13 | 2. (optional) Use the `.yaml` file to re-create the environment 14 | ```bash 15 | conda env create -f gfie.yaml 16 | conda activate gfie.yaml 17 | ``` 18 | 19 | 3. Set the available GPU in [`inference.py`](inference.py) and [`main.py`](inference.py) 20 | ```bash 21 | os.environ['CUDA_VISIBLE_DEVICES'] = ID 22 | ``` 23 | `ID` is available gpu id. 24 | 25 | ## Dataset Preparation 26 | 27 | ### Download from Google Drive 28 | 1. Please download the [GFIE dataset](https://drive.google.com/drive/folders/1AKA1jCVdrMmLIXqTeNNCFo5VnUrAcplq?usp=sharing) and [CAD120 dataset](https://drive.google.com/drive/folders/1PNe5AYHd2pdMJin4YzO1ntsdpoZH7pQb?usp=sharing) from the Google Drive. 29 | 2. Unzip rgb.zip and depth.zip into corresponding folders. 30 | 31 | ### Download via shell script 32 | 33 | 1. If the Google Drive is not available, you can run a script to download the GFIE dataset 34 | 2. run the `download_gfie.sh/ download_cad120.sh` script 35 | 36 | ```bash 37 | sh download_gfie.sh 38 | sh download_cad120.sh 39 | ``` 40 | 41 | 3. The file structure should be as follows: 42 | 43 | ```bash 44 | ├── GFIE_dataset/ 45 | | ├── rgb/ 46 | | | ├── train/ 47 | | | | ├── scene1/ # scene id 48 | | | | | └── '*.jpg' # n frame jpg` 49 | | | | └── ... 50 | | | ├── valid/ 51 | | | | └── ... 52 | | | ├── test/ 53 | | | | └── ... 54 | | ├── depth/ 55 | | | ├── train/ 56 | | | | ├── scene1/ # scene id 57 | | | | | └── '*.npy' # n frame npy` 58 | | | ├── valid/ 59 | | | | └── ... 60 | | | ├── test/ 61 | | | | └── ... 62 | | ├── CameraKinect.npy 63 | | ├── train_annotation.txt 64 | | ├── valid_annotation.txt 65 | | └── test_annotation.txt 66 | ├── CAD120_dataset/ 67 | | ├── rgb/ 68 | | | ├── D1S1A001/ 69 | | | | └── 'RGB_*.png' # n frame png` 70 | | | └── ... 71 | | ├── depth/ 72 | | | ├── D1S1A001/ 73 | | | | └── 'Depth_*.png' # n frame png` 74 | | | └── ... 75 | | ├── CameraKinect.npy 76 | └── └── annotation.txt 77 | ``` 78 | `Note: The decompressed file is about 350 GB, please check the capacity of your hard disk to ensrure that the dataset can be stored.` 79 | 80 | 4. Then you need to modify the address of the configuration ([`cad120evaluation.yaml`](config/cad120evaluation.yaml) | [`gfiebenchmark.yaml`](config/gfiebenchmark.yaml)) 81 | 82 | [`gfiebenchmark.yaml`](config/gfiebenchmark.yaml) 83 | ```bash 84 | DATASET: 85 | root_dir: "YOUR_PATH/GFIE_dataset" 86 | ``` 87 | 88 | [`cad120evaluation.yaml`](config/cad120evaluation.yaml) 89 | ```bash 90 | DATASET: 91 | root_dir: "YOUR_PATH/CAD120_dataset" 92 | ``` 93 | 94 | `YOUR_PATH` is the root path of `GFIE_dataset` and `CAD120_dataset`. 95 | 96 | ## Getting start 97 | 98 | ### Training 99 | 100 | After all the prerequisites are met, you can train the GFIE baseline method we proposed in the paper. 101 | 102 | 1. Set the path `STORE_PATH` to save the model file in the [`gfiebenchmark.yaml`](config/gfiebenchmark.yaml) 103 | 104 | ```bash 105 | TRAIN: 106 | store: "STORE_PATH" 107 | ``` 108 | 109 | 2. Download the [pre-trained model weights](https://drive.google.com/file/d/1eXWy4-bg5BQeCHbyH6R1dbWceGCNKPe4/view?usp=sharing) to `PATH`, and then set the path of pre-trained weights in [`gfiebenchmark.yaml`](config/gfiebenchmark.yaml) 110 | 111 | ```bash 112 | MODEL: 113 | backboneptpath: "PATH/ptbackbone.pt" 114 | ``` 115 | 116 | 3. Then run the training procedure 117 | ```bash 118 | python main.py 119 | ``` 120 | 121 | ### Evaluation 122 | 123 | 1. Set the absolute path of the model weight `cpkt_PATH` in the [`cad120evaluation.yaml`](config/cad120evaluation.yaml) and [`gfiebenchmark.yaml`](config/gfiebenchmark.yaml) 124 | 125 | ```bash 126 | OTHER: 127 | cpkt: "cpkt_PATH" 128 | ``` 129 | 130 | 2. Run the inference program and the evaluation results will be displayed in the termainal. 131 | 132 | ```bash 133 | # evaluation on GFIE dataset 134 | python inference.py --mode gfie 135 | 136 | # evaluation on CAD120 dataset 137 | python inference.py --mode cad120 138 | ``` 139 | 140 | ### Model weights 141 | 142 | We also provide the model weights for evaluation. 143 | 144 | [`gfiemodel.pt.tar`](https://drive.google.com/file/d/1VVpAC1z5sQA0niuA92nmQIRRnin7TvIH/view?usp=sharing) 145 | 146 | ## Citation 147 | If you fine our dataset/code useful for your research, please cite our paper 148 | ``` 149 | @InProceedings{Hu_2023_CVPR, 150 | author = {Hu, Zhengxi and Yang, Yuxue and Zhai, Xiaolin and Yang, Dingye and Zhou, Bohan and Liu, Jingtai}, 151 | title = {GFIE: A Dataset and Baseline for Gaze-Following From 2D to 3D in Indoor Environments}, 152 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 153 | month = {June}, 154 | year = {2023}, 155 | pages = {8907-8916} 156 | } 157 | ``` 158 | 159 | ## Acknowledgements 160 | 161 | We would like to thank Eunji Chong for [her work publised on CVPR 2020](https://github.com/ejcgt/attention-target-detection) and others that have contributed to gaze-following. 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | from torch.utils.data import DataLoader 10 | from utils.utils import AverageMeter,auc 11 | from tqdm import tqdm 12 | from config import cfg 13 | from utils.infer_engine import GFIETestDataset,collate_fn,model_init,strategy3dGazeFollowing 14 | from utils.infer_engine import CAD120TestDataset 15 | 16 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 17 | 18 | @ torch.no_grad() 19 | def inference(cfg,mode="gfie"): 20 | 21 | # init the model 22 | device=cfg.OTHER.device 23 | 24 | gazemodel=model_init(device,cfg.OTHER.cpkt) 25 | 26 | if mode=="gfie": 27 | test_dataset=GFIETestDataset(cfg) 28 | elif mode=="cad120": 29 | test_dataset=CAD120TestDataset(cfg) 30 | else: 31 | raise NotImplemented 32 | 33 | test_loader = DataLoader(test_dataset, 34 | batch_size=cfg.DATASET.test_batch_size, 35 | num_workers=cfg.DATASET.load_workers, 36 | shuffle=False, 37 | collate_fn=collate_fn) 38 | 39 | eval_L2dist_counter=AverageMeter() 40 | eval_3Ddist_counter=AverageMeter() 41 | eval_AngleError_counter=AverageMeter() 42 | eval_AUC_counter=AverageMeter() 43 | 44 | pbar=tqdm(total=len(test_loader)) 45 | for i,data in enumerate(test_loader,0): 46 | 47 | x_simg, x_himg, x_hc = data["simg"], data["himg"], data["headloc"] 48 | 49 | x_matrixT = data["matrixT"] 50 | 51 | x_simg = x_simg.to(device) 52 | x_himg = x_himg.to(device) 53 | x_hc = x_hc.to(device) 54 | x_matrixT = x_matrixT.to(device) 55 | 56 | bs = x_simg.size(0) 57 | 58 | i_depmap=data["depthmap"] 59 | i_eye3d=data["eye3d"] 60 | i_img_size=[np.array([i_depmap[i].shape[1],i_depmap[i].shape[0]])[np.newaxis,:] for i in range(i_depmap.shape[0])] 61 | i_img_size=np.concatenate(i_img_size,axis=0) 62 | 63 | # predict gaze heatmap and gaze vector 64 | outs = gazemodel(x_simg, x_himg, x_hc, x_matrixT) 65 | 66 | pred_heatmap = outs['pred_heatmap'] 67 | pred_heatmap = pred_heatmap.squeeze(1) 68 | pred_heatmap = pred_heatmap.data.cpu().numpy() 69 | 70 | 71 | pred_gazevector = outs['pred_gazevector'] 72 | pred_gazevector = pred_gazevector.data.cpu().numpy() 73 | 74 | 75 | gaze_vector = data["gt_gaze_vector"] 76 | gaze_target2d = data["gt_gaze_target2d"] 77 | gaze_target3d = data["gt_gaze_target3d"] 78 | 79 | # strategy for 3d gaze-following and evaluation 80 | 81 | pred_gazevector_list=[] 82 | pred_gazetarget2d_list=[] 83 | pred_gazetarget3d_list=[] 84 | for b_idx in range(bs): 85 | 86 | cur_depmap=i_depmap[b_idx] 87 | cur_pred_gazeheatmap=pred_heatmap[b_idx] 88 | cur_pred_gazevector=pred_gazevector[b_idx] 89 | cur_eye_3d=i_eye3d[b_idx] 90 | 91 | pred_result=strategy3dGazeFollowing(cur_depmap,cur_pred_gazeheatmap,cur_pred_gazevector,cur_eye_3d,test_dataset.camerapara) 92 | 93 | pred_gazevector_list.append(pred_result["pred_gazevector"]) 94 | pred_gazetarget2d_list.append(pred_result["pred_gazetarget_2d"]) 95 | pred_gazetarget3d_list.append(pred_result["pred_gazetarget_3d"]) 96 | 97 | 98 | pred_gazetarget3d =np.concatenate(pred_gazetarget3d_list,axis=0) 99 | pred_gazetarget2d =np.concatenate(pred_gazetarget2d_list,axis=0) 100 | pred_gazevector=np.concatenate(pred_gazevector_list,axis=0) 101 | 102 | # evaluation 103 | eval_batch_3Ddist=np.sum(np.linalg.norm(pred_gazetarget3d-gaze_target3d,axis=1))/bs 104 | eval_batch_l2dist=np.sum(np.linalg.norm(pred_gazetarget2d-gaze_target2d,axis=1))/bs 105 | 106 | eval_batch_cosine_similarity=np.sum(pred_gazevector*gaze_vector,axis=1) 107 | eval_batch_angle_error=np.arccos(eval_batch_cosine_similarity) 108 | eval_batch_angle_error=np.sum(np.rad2deg(eval_batch_angle_error))/bs 109 | 110 | eval_batch_auc=auc(gaze_target2d,pred_heatmap,i_img_size) 111 | eval_AUC_counter.update(eval_batch_auc,bs) 112 | eval_L2dist_counter.update(eval_batch_l2dist,bs) 113 | eval_3Ddist_counter.update(eval_batch_3Ddist,bs) 114 | eval_AngleError_counter.update(eval_batch_angle_error,bs) 115 | 116 | 117 | pbar.set_postfix(eval_3D_dist=eval_3Ddist_counter.avg, 118 | eval_L2_dist=eval_L2dist_counter.avg, 119 | eval_Angle_error=eval_AngleError_counter.avg, 120 | eval_AUC=eval_AUC_counter.avg) 121 | 122 | pbar.update(1) 123 | 124 | pbar.close() 125 | 126 | 127 | 128 | 129 | if __name__ == '__main__': 130 | 131 | parser = argparse.ArgumentParser( 132 | description="GFIE benchmark Model" 133 | ) 134 | 135 | parser.add_argument( 136 | "--cfg", 137 | default="config/default.yaml", 138 | metavar="FILE", 139 | help="path to config file", 140 | type=str, 141 | ) 142 | parser.add_argument( 143 | "--gpu", 144 | action="store_true", 145 | default=True, 146 | help="choose if use gpus" 147 | ) 148 | 149 | parser.add_argument( 150 | "--mode", 151 | default="gfie", 152 | help="choose a dataset to evaluate", 153 | type=str, 154 | ) 155 | 156 | args=parser.parse_args() 157 | 158 | if args.mode=="cad120": 159 | args.cfg="config/cad120evaluation.yaml" 160 | elif args.mode=="gfie": 161 | args.cfg="config/gfiebenchmark.yaml" 162 | else: 163 | raise NotImplementedError("Please select the correct dataset for evalution (gfie or cad120)") 164 | 165 | cfg.merge_from_file(args.cfg) 166 | 167 | cfg.OTHER.device='cuda:0' if (torch.cuda.is_available() and args.gpu) else 'cpu' 168 | print("The model running on {}".format(cfg.OTHER.device)) 169 | 170 | inference(cfg,mode=args.mode) 171 | 172 | -------------------------------------------------------------------------------- /gfie.yaml: -------------------------------------------------------------------------------- 1 | name: gfie 2 | channels: 3 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch 4 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free 5 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main 6 | - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge 7 | - defaults 8 | dependencies: 9 | - _libgcc_mutex=0.1=conda_forge 10 | - _openmp_mutex=4.5=1_gnu 11 | - _tflow_select=2.3.0=eigen 12 | - absl-py=0.11.0=py38h578d9bd_0 13 | - aiohttp=3.7.3=py38h497a2fe_1 14 | - astunparse=1.6.3=py_0 15 | - async-timeout=3.0.1=py_1000 16 | - attrs=20.3.0=pyhd3deb0d_0 17 | - blas=1.0=mkl 18 | - blinker=1.4=py_1 19 | - brotlipy=0.7.0=py38h8df0ef7_1001 20 | - c-ares=1.17.1=h36c2ea0_0 21 | - ca-certificates=2022.3.29=h06a4308_1 22 | - cachetools=4.1.1=py_0 23 | - certifi=2021.10.8=py38h06a4308_2 24 | - cffi=1.14.4=py38ha312104_0 25 | - chardet=3.0.4=py38h924ce5b_1008 26 | - click=7.1.2=pyh9f0ad1d_0 27 | - cryptography=3.3.1=py38h2b97feb_1 28 | - cudatoolkit=11.0.3=h15472ef_6 29 | - cycler=0.10.0=py_2 30 | - dbus=1.13.18=hb2f20db_0 31 | - dill=0.3.3=pyhd8ed1ab_0 32 | - expat=2.2.10=he6710b0_2 33 | - fontconfig=2.13.1=he4413a7_1000 34 | - freetype=2.10.4=h7ca028e_0 35 | - gast=0.3.3=py_0 36 | - gettext=0.19.8.1=hf34092f_1004 37 | - giflib=5.2.1=h7b6447c_0 38 | - glib=2.58.3=py38h73cb85d_1004 39 | - google-auth=1.24.0=pyhd3deb0d_0 40 | - google-auth-oauthlib=0.4.1=py_2 41 | - google-pasta=0.2.0=pyh8c360ce_0 42 | - grpcio=1.34.0=py38hdd6454d_1 43 | - gst-plugins-base=1.14.5=h0935bb2_2 44 | - gstreamer=1.14.5=h36ae1b5_2 45 | - h5py=2.10.0=py38hd6299e0_1 46 | - hdf5=1.10.6=nompi_h7c3c948_1111 47 | - icu=58.2=hf484d3e_1000 48 | - idna=2.10=pyh9f0ad1d_0 49 | - importlib-metadata=3.4.0=py38h578d9bd_0 50 | - intel-openmp=2020.2=254 51 | - jpeg=9b=0 52 | - keras-preprocessing=1.1.0=py_0 53 | - kiwisolver=1.3.1=py38h1fd1430_1 54 | - krb5=1.17.2=h926e7f8_0 55 | - lcms2=2.12=h3be6417_0 56 | - ld_impl_linux-64=2.35.1=hea4e1c9_1 57 | - libcurl=7.71.1=hcdd3856_8 58 | - libedit=3.1.20191231=he28a2e2_2 59 | - libev=4.33=h516909a_1 60 | - libffi=3.2.1=he1b5a44_1007 61 | - libgcc-ng=9.3.0=h5dbcf3e_17 62 | - libgfortran-ng=7.5.0=hae1eefd_17 63 | - libgfortran4=7.5.0=hae1eefd_17 64 | - libgomp=9.3.0=h5dbcf3e_17 65 | - libiconv=1.16=h516909a_0 66 | - libnghttp2=1.41.0=h8cfc5f6_2 67 | - libpng=1.6.37=h21135ba_2 68 | - libprotobuf=3.14.0=h780b84a_0 69 | - libssh2=1.9.0=hab1572f_5 70 | - libstdcxx-ng=9.3.0=h2ae2ef3_17 71 | - libtiff=4.1.0=h2733197_1 72 | - libuuid=2.32.1=h7f98852_1000 73 | - libuv=1.40.0=h7f98852_0 74 | - libwebp=1.2.0=h89dd481_0 75 | - libxcb=1.13=h7f98852_1003 76 | - libxml2=2.9.10=hb55368b_3 77 | - lz4-c=1.9.3=h9c3ff4c_0 78 | - markdown=3.3.3=pyh9f0ad1d_0 79 | - matplotlib=3.3.3=py38h578d9bd_0 80 | - matplotlib-base=3.3.3=py38h5c7f4ab_0 81 | - mkl=2020.2=256 82 | - mkl-service=2.3.0=py38h1e0a361_2 83 | - mkl_fft=1.2.0=py38hab2c0dc_1 84 | - mkl_random=1.2.0=py38hc5bc63f_1 85 | - multidict=5.1.0=py38h497a2fe_1 86 | - ncurses=6.2=h58526e2_4 87 | - networkx=2.6.3=pyhd3eb1b0_0 88 | - ninja=1.10.2=h4bd325d_0 89 | - numpy=1.19.2=py38h54aff64_0 90 | - numpy-base=1.19.2=py38hfa32c7d_0 91 | - oauthlib=3.0.1=py_0 92 | - olefile=0.46=pyh9f0ad1d_1 93 | - openssl=1.1.1n=h7f8727e_0 94 | - opt_einsum=3.3.0=py_0 95 | - pandas=1.2.0=py38h51da96c_0 96 | - pcre=8.44=he1b5a44_0 97 | - pillow=9.0.1=py38h22f2fdc_0 98 | - pip=20.3.3=pyhd8ed1ab_0 99 | - protobuf=3.14.0=py38h709712a_1 100 | - pthread-stubs=0.4=h36c2ea0_1001 101 | - pyasn1=0.4.8=py_0 102 | - pyasn1-modules=0.2.7=py_0 103 | - pycparser=2.20=pyh9f0ad1d_2 104 | - pyjwt=2.0.0=pyhd8ed1ab_0 105 | - pyopenssl=20.0.1=pyhd8ed1ab_0 106 | - pyparsing=2.4.7=pyh9f0ad1d_0 107 | - pyqt=5.9.2=py38h05f1152_4 108 | - pysocks=1.7.1=py38h578d9bd_3 109 | - python=3.8.0=h357f687_5 110 | - python-dateutil=2.8.1=py_0 111 | - python_abi=3.8=1_cp38 112 | - pytorch=1.7.1=py3.8_cuda11.0.221_cudnn8.0.5_0 113 | - pytz=2020.5=pyhd8ed1ab_0 114 | - pyyaml=5.3.1=py38h497a2fe_2 115 | - qt=5.9.7=h5867ecd_1 116 | - readline=8.0=he28a2e2_2 117 | - requests=2.25.1=pyhd3deb0d_0 118 | - requests-oauthlib=1.3.0=pyh9f0ad1d_0 119 | - rsa=4.7=pyhd3deb0d_0 120 | - scipy=1.5.2=py38h0b6359f_0 121 | - setuptools=49.6.0=py38h578d9bd_3 122 | - sip=4.19.13=py38he6710b0_0 123 | - six=1.15.0=pyh9f0ad1d_0 124 | - sqlite=3.34.0=h74cdb3f_0 125 | - tensorboard=2.4.0=pyhd8ed1ab_0 126 | - tensorboard-plugin-wit=1.7.0=pyh9f0ad1d_0 127 | - tensorflow=2.3.0=eigen_py38h71ff20e_0 128 | - tensorflow-base=2.3.0=eigen_py38hb57a387_0 129 | - tensorflow-estimator=2.3.0=pyheb71bc4_0 130 | - termcolor=1.1.0=py_2 131 | - tk=8.6.10=h21135ba_1 132 | - torchvision=0.8.2=py38_cu110 133 | - tornado=6.1=py38h497a2fe_1 134 | - tqdm=4.56.0=pyhd8ed1ab_0 135 | - typing-extensions=3.7.4.3=0 136 | - typing_extensions=3.7.4.3=py_0 137 | - urllib3=1.26.2=pyhd8ed1ab_0 138 | - werkzeug=1.0.1=pyh9f0ad1d_0 139 | - wheel=0.36.2=pyhd3deb0d_0 140 | - wrapt=1.12.1=py38h497a2fe_3 141 | - xorg-libxau=1.0.9=h7f98852_0 142 | - xorg-libxdmcp=1.1.3=h7f98852_0 143 | - xz=5.2.5=h516909a_1 144 | - yacs=0.1.6=py_0 145 | - yaml=0.2.5=h516909a_0 146 | - yarl=1.6.3=py38h497a2fe_1 147 | - zipp=3.4.0=py_0 148 | - zlib=1.2.11=h516909a_1010 149 | - zstd=1.4.8=ha95c52a_1 150 | - pip: 151 | - addict==2.4.0 152 | - argon2-cffi==20.1.0 153 | - async-generator==1.10 154 | - backcall==0.2.0 155 | - bleach==3.3.0 156 | - decorator==4.4.2 157 | - defusedxml==0.7.0 158 | - entrypoints==0.3 159 | - ipykernel==5.5.0 160 | - ipython==7.21.0 161 | - ipython-genutils==0.2.0 162 | - ipywidgets==7.6.3 163 | - jedi==0.18.0 164 | - jinja2==2.11.3 165 | - joblib==1.0.1 166 | - jsonschema==3.2.0 167 | - jupyter-client==6.1.11 168 | - jupyter-core==4.7.1 169 | - jupyterlab-pygments==0.1.2 170 | - jupyterlab-widgets==1.0.0 171 | - markupsafe==1.1.1 172 | - memory-profiler==0.61.0 173 | - mistune==0.8.4 174 | - nbclient==0.5.3 175 | - nbconvert==6.0.7 176 | - nbformat==5.1.2 177 | - nest-asyncio==1.5.1 178 | - notebook==6.2.0 179 | - opencv-python==4.5.5.64 180 | - packaging==20.9 181 | - pandocfilters==1.4.3 182 | - parso==0.8.1 183 | - pexpect==4.8.0 184 | - pickleshare==0.7.5 185 | - plyfile==0.7.3 186 | - prometheus-client==0.9.0 187 | - prompt-toolkit==3.0.16 188 | - psutil==5.9.4 189 | - ptyprocess==0.7.0 190 | - pygments==2.8.0 191 | - pyrsistent==0.17.3 192 | - pyzmq==22.0.3 193 | - scikit-learn==0.24.1 194 | - send2trash==1.5.0 195 | - sklearn==0.0 196 | - tensorboardx==2.1 197 | - terminado==0.9.2 198 | - testpath==0.4.4 199 | - threadpoolctl==2.1.0 200 | - timm==0.4.12 201 | - traitlets==5.0.5 202 | - wcwidth==0.2.5 203 | - webencodings==0.5.1 204 | - widgetsnbextension==3.5.1 205 | prefix: /home/hzx/.conda/envs/hzx 206 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from collections import deque 4 | from sklearn.metrics import average_precision_score 5 | from PIL import Image 6 | from sklearn.metrics import roc_auc_score 7 | import time 8 | 9 | class AverageMeter(): 10 | 11 | def __init__(self): 12 | 13 | self.reset() 14 | 15 | def reset(self): 16 | 17 | self.count=0 18 | self.newval=0 19 | self.sum=0 20 | self.avg=0 21 | 22 | def update(self,newval,n=1): 23 | 24 | self.newval=newval 25 | self.sum+=newval*n 26 | self.count+=n 27 | self.avg=self.sum/self.count 28 | 29 | class MovingAverageMeter(): 30 | 31 | def __init__(self,max_len=30): 32 | 33 | self.max_len=max_len 34 | 35 | self.reset() 36 | 37 | def reset(self): 38 | 39 | self.dq=deque(maxlen=self.max_len) 40 | self.count=0 41 | self.avg=0 42 | self.sum=0 43 | 44 | 45 | def update(self,newval): 46 | 47 | self.dq.append(newval) 48 | self.count=len(self.dq) 49 | self.sum=np.array(self.dq).sum() 50 | self.avg=self.sum/float(self.count) 51 | 52 | def argmax_pts(heatmap): 53 | 54 | idx=np.unravel_index(heatmap.argmax(),heatmap.shape) 55 | pred_y,pred_x=map(float,idx) 56 | 57 | return pred_x,pred_y 58 | 59 | 60 | # # Metric functions 61 | def euclid_dist(pred,target): 62 | 63 | batch_dist=0. 64 | batch_size=pred.shape[0] 65 | pred_H,pred_W=pred.shape[1:] 66 | 67 | for b_idx in range(batch_size): 68 | 69 | pred_x,pred_y=argmax_pts(pred[b_idx]) 70 | norm_p=np.array([pred_x,pred_y])/np.array([pred_W,pred_H]) 71 | 72 | 73 | sample_target=target[b_idx] 74 | sample_target=sample_target.view(-1,2).numpy() 75 | 76 | sample_dist=np.linalg.norm(sample_target-norm_p) 77 | 78 | batch_dist+=sample_dist 79 | 80 | euclid_dist=batch_dist/float(batch_size) 81 | 82 | return euclid_dist 83 | 84 | def auc(gt_gaze,pred_heatmap,imsize): 85 | batch_size=len(gt_gaze) 86 | auc_score_list=[] 87 | for b_idx in range(batch_size): 88 | 89 | multi_hot=multi_hot_targets(gt_gaze[b_idx],imsize[b_idx]) 90 | scaled_heatmap=Image.fromarray(pred_heatmap[b_idx]).resize(size=(imsize[b_idx][0],imsize[b_idx][1]), 91 | resample=0) 92 | 93 | scaled_heatmap=np.array(scaled_heatmap) 94 | sample_auc_score=roc_auc_score(np.reshape(multi_hot,multi_hot.size), 95 | np.reshape(scaled_heatmap,scaled_heatmap.size)) 96 | 97 | auc_score_list.append(sample_auc_score) 98 | 99 | auc_score=sum(auc_score_list)/len(auc_score_list) 100 | return auc_score 101 | 102 | 103 | 104 | def ap(label,pred): 105 | return average_precision_score(label,pred) 106 | 107 | def multi_hot_targets(gaze_pts,out_res): 108 | w,h= out_res 109 | target_map=np.zeros((h,w)) 110 | # for p in gaze_pts: 111 | if gaze_pts[0]>=0: 112 | x,y=map(int,[gaze_pts[0]*float(w),gaze_pts[1]*float(h)]) 113 | x=min(x,w-1) 114 | y=min(y,h-1) 115 | target_map[y,x]=1 116 | return target_map 117 | 118 | def draw_labelmap(img, pt, sigma, type='Gaussian'): 119 | # Draw a 2D gaussian 120 | # Adopted from https://github.com/anewell/pose-hg-train/blob/master/src/pypose/draw.py 121 | # img = to_numpy(img) 122 | 123 | # Check that any part of the gaussian is in-bounds 124 | ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)] 125 | br = [int(pt[0] + 3 * sigma + 1), int(pt[1] + 3 * sigma + 1)] 126 | if (ul[0] >= img.shape[1] or ul[1] >= img.shape[0] or 127 | br[0] < 0 or br[1] < 0): 128 | # If not, just return the image as is 129 | return img 130 | 131 | # Generate gaussian 132 | size = 6 * sigma + 1 133 | x = np.arange(0, size, 1, float) 134 | y = x[:, np.newaxis] 135 | x0 = y0 = size // 2 136 | # The gaussian is not normalized, we want the center value to equal 1 137 | if type == 'Gaussian': 138 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 139 | elif type == 'Cauchy': 140 | g = sigma / (((x - x0) ** 2 + (y - y0) ** 2 + sigma ** 2) ** 1.5) 141 | 142 | # Usable gaussian range 143 | g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0] 144 | g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1] 145 | # Image range 146 | img_x = max(0, ul[0]), min(br[0], img.shape[1]) 147 | img_y = max(0, ul[1]), min(br[1], img.shape[0]) 148 | 149 | img[img_y[0]:img_y[1], img_x[0]:img_x[1]] += g[g_y[0]:g_y[1], g_x[0]:g_x[1]] 150 | img = img/np.max(img) # normalize heatmap so it has max value of 1 151 | return img 152 | 153 | def cosine_sim(pred_gv,target_gv): 154 | 155 | cosine=pred_gv*target_gv 156 | # print(np.linalg.norm(pred_gv,axis=1),np.linalg.norm(target_gv,axis=1)) 157 | cosine=np.sum(cosine,axis=1)/(np.linalg.norm(pred_gv,axis=1)*np.linalg.norm(target_gv,axis=1)) 158 | cosine[cosine<1e-6]=1e-6 159 | cosine=np.sum(cosine)/cosine.shape[0] 160 | 161 | # if cosine is np.nan: 162 | # print(cosine) 163 | # raise NotImplemented 164 | 165 | return cosine 166 | 167 | 168 | def unnorm(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): 169 | std = np.array(std).reshape(1, 1, 3) 170 | mean = np.array(mean).reshape(1, 1, 3) 171 | return img * std + mean 172 | 173 | def visualized(rgb_image,face_image,pred_heatmap,gt_heatmap,gt_gaze): 174 | 175 | batch_size=pred_heatmap.shape[0] 176 | 177 | output_shape=pred_heatmap.shape[1:] 178 | H,W=rgb_image.shape[2:] 179 | 180 | 181 | 182 | 183 | gt_mean_dist,bs_dist=euclid_dist(pred_heatmap, gt_gaze, type='retained') 184 | 185 | 186 | gt_gaze=gt_gaze.numpy() 187 | 188 | 189 | 190 | for index in range(batch_size): 191 | 192 | print("index, avg_dist",index,bs_dist[index]) 193 | 194 | figure, ax = plt.subplots(2, 4) 195 | figure.set_size_inches(20, 8) 196 | 197 | # scene image 198 | cur_img=rgb_image[index] 199 | cur_img=cur_img.swapaxes(0,1).swapaxes(1,2) 200 | cur_img = unnorm(cur_img) * 255 201 | cur_img = np.clip(cur_img, 0, 255) 202 | cur_img = cur_img.astype(np.uint8) 203 | 204 | # face image 205 | cur_face=face_image[index] 206 | cur_face=cur_face.swapaxes(0,1).swapaxes(1,2) 207 | cur_face=unnorm(cur_face)*255 208 | cur_face = np.clip(cur_face, 0, 255) 209 | cur_face = cur_face.astype(np.uint8) 210 | 211 | # ground truth gaze 212 | 213 | cur_gaze=gt_gaze[index] 214 | 215 | cur_gaze=cur_gaze[cur_gaze!=[-1,-1]] 216 | 217 | 218 | cur_gaze=cur_gaze.reshape([-1,2]) 219 | cur_gaze[:,1]=cur_gaze[:,1]*H 220 | cur_gaze[:,0]=cur_gaze[:,0]*W 221 | 222 | 223 | ax[0][0].scatter(cur_gaze[:,0],cur_gaze[:,1],s=1,c='white') 224 | ax[0][0].imshow(cur_img) 225 | 226 | 227 | ax[0][1].imshow(cur_face) 228 | 229 | ax[1][0].imshow(pred_heatmap[index],cmap='jet') 230 | 231 | ax[1][1].imshow(gt_heatmap[index],cmap='jet') 232 | 233 | # ax[0][2].imshow(pred_heatmap[index]+xz_heatmap[index]*0.5) 234 | 235 | 236 | plt.show() 237 | 238 | 239 | -------------------------------------------------------------------------------- /trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from utils.utils import AverageMeter,MovingAverageMeter,euclid_dist,cosine_sim 3 | from tqdm import tqdm 4 | 5 | 6 | class Trainer(object): 7 | def __init__(self,model,criterion,optimizer,trainloader,valloader,opt,writer=None): 8 | 9 | self.model=model 10 | self.criterion=criterion 11 | self.optimizer=optimizer 12 | 13 | self.trainloader=trainloader 14 | self.valloader=valloader 15 | 16 | # for train 17 | self.losses=MovingAverageMeter() 18 | self.l2loss=MovingAverageMeter() 19 | self.vecloss=MovingAverageMeter() 20 | 21 | self.train_dist=MovingAverageMeter() 22 | 23 | # for eval 24 | self.eval_dist=AverageMeter() 25 | self.eval_cosine=AverageMeter() 26 | 27 | self.best_error=None 28 | self.best_flag=False 29 | 30 | self.device=torch.device(opt.OTHER.device) 31 | 32 | self.opt=opt 33 | self.writer=writer 34 | 35 | 36 | def get_best_error(self,bs_dist,bs_cosine): 37 | 38 | self.best_dist=bs_dist 39 | self.best_cosine=bs_cosine 40 | 41 | 42 | def train(self,epoch,opt): 43 | 44 | self.model.train() 45 | 46 | # reset loss value 47 | self.losses.reset() 48 | self.l2loss.reset() 49 | self.vecloss.reset() 50 | 51 | self.train_dist.reset() 52 | 53 | self.eval_dist.reset() 54 | self.eval_cosine.reset() 55 | 56 | loader_capacity=len(self.trainloader) 57 | pbar=tqdm(total=loader_capacity) 58 | for i, data in enumerate(self.trainloader,0): 59 | 60 | self.optimizer.zero_grad() 61 | 62 | opt.OTHER.global_step=opt.OTHER.global_step+1 63 | 64 | x_simg, x_himg, x_hc = data["simg"], data["himg"], data["headloc"] 65 | 66 | x_matrixT=data["matrixT"] 67 | 68 | gaze_heatmap = data["gaze_heatmap"] 69 | gaze_vector=data["gaze_vector"] 70 | gaze_target2d = data["gaze_target2d"] 71 | 72 | x_simg=x_simg.to(self.device) 73 | x_himg=x_himg.to(self.device) 74 | x_hc=x_hc.to(self.device) 75 | x_matrixT=x_matrixT.to(self.device) 76 | 77 | y_gaze_heatmap = gaze_heatmap.to(self.device) 78 | y_gaze_vector=gaze_vector.to(self.device) 79 | 80 | bs=x_simg.size(0) 81 | 82 | outs=self.model(x_simg, x_himg, x_hc,x_matrixT) 83 | 84 | pred_gheatmap=outs['pred_heatmap'] 85 | pred_gheatmap=pred_gheatmap.squeeze() 86 | 87 | pred_gvec=outs["pred_gazevector"] 88 | pred_gvec=pred_gvec.squeeze() 89 | 90 | 91 | # gaze heatmap loss 92 | l2_loss=self.criterion[0](pred_gheatmap,y_gaze_heatmap) 93 | l2_loss=torch.mean(l2_loss,dim=1) 94 | l2_loss = torch.mean(l2_loss, dim=1) 95 | l2_loss=torch.sum(l2_loss)/bs 96 | 97 | # gaze vector loss 98 | vec_loss=1-self.criterion[1](pred_gvec,y_gaze_vector) 99 | vec_loss=torch.sum(vec_loss)/bs 100 | 101 | total_loss= l2_loss*10000 + 10 * vec_loss 102 | total_loss.backward() 103 | self.optimizer.step() 104 | 105 | # record the loss 106 | self.losses.update(total_loss.item()) 107 | self.l2loss.update(l2_loss.item()) 108 | self.vecloss.update(vec_loss.item()) 109 | 110 | # for tensorboardx writer 111 | if i%opt.OTHER.lossrec_every==0 : 112 | 113 | self.writer.add_scalar("Train TotalLoss", total_loss.item(), global_step=opt.OTHER.global_step) 114 | self.writer.add_scalar("Train L2Loss", l2_loss.item()*10000, global_step=opt.OTHER.global_step) 115 | self.writer.add_scalar("Train CosLoss", vec_loss.item()*10, global_step=opt.OTHER.global_step) 116 | 117 | pred_gheatmap = pred_gheatmap.squeeze(1) 118 | pred_gheatmap = pred_gheatmap.data.cpu().numpy() 119 | distrain_avg = euclid_dist(pred_gheatmap, gaze_target2d) 120 | self.train_dist.update(distrain_avg) 121 | 122 | # eval in train procedure on valid dataset 123 | if (i%opt.OTHER.evalrec_every==0 and i>0) or i==(loader_capacity-1): 124 | 125 | self.valid() 126 | 127 | # record L2 distance between predicted 2d gaze target adn GT 128 | self.writer.add_scalar("Eval dist", self.eval_dist.avg, global_step=opt.OTHER.global_step) 129 | # record the similarity between predicted gaze vectors and GT 130 | self.writer.add_scalar("Eval cosine", self.eval_cosine.avg, global_step=opt.OTHER.global_step) 131 | 132 | self.best_flag=False 133 | if i==(loader_capacity-1): 134 | if self.best_dist>self.eval_dist.avg: 135 | self.best_dist=self.eval_dist.avg 136 | self.best_flag=True 137 | 138 | if self.best_cosine>self.eval_cosine.avg: 139 | self.best_cosine=self.eval_cosine.avg 140 | self.best_flag=True 141 | 142 | 143 | # for tqdm show 144 | pbar.set_description("Epoch: [{0}]".format(epoch)) 145 | pbar.set_postfix(eval_dist=self.eval_dist.avg, 146 | eval_cos=self.eval_cosine.avg, 147 | train_dist=self.train_dist.avg, 148 | totalloss=self.losses.avg, 149 | l2loss=self.l2loss.avg, 150 | vecloss=self.vecloss.avg, 151 | learning_rate=self.optimizer.param_groups[0]["lr"]) 152 | 153 | pbar.update(1) 154 | 155 | pbar.close() 156 | 157 | @torch.no_grad() 158 | def valid(self): 159 | 160 | self.model.eval() 161 | 162 | self.eval_dist.reset() 163 | self.eval_cosine.reset() 164 | 165 | for i,data in enumerate(self.valloader,0): 166 | 167 | x_simg, x_himg, x_hc = data["simg"], data["himg"], data["headloc"] 168 | 169 | x_matrixT=data["matrixT"] 170 | 171 | gaze_vector=data["gaze_vector"] 172 | gaze_target2d = data["gaze_target2d"] 173 | 174 | x_simg=x_simg.to(self.device) 175 | x_himg=x_himg.to(self.device) 176 | x_hc=x_hc.to(self.device) 177 | x_matrixT=x_matrixT.to(self.device) 178 | 179 | bs=x_simg.size(0) 180 | outs=self.model(x_simg, x_himg, x_hc,x_matrixT) 181 | 182 | pred_heatmap = outs['pred_heatmap'] 183 | pred_heatmap = pred_heatmap.squeeze(1) 184 | pred_heatmap = pred_heatmap.data.cpu().numpy() 185 | 186 | pred_gazevector=outs['pred_gazevector'] 187 | pred_gazevector=pred_gazevector.data.cpu().numpy() 188 | gaze_vector=gaze_vector.numpy() 189 | 190 | distval = euclid_dist(pred_heatmap, gaze_target2d) 191 | cosineval=cosine_sim(pred_gazevector,gaze_vector) 192 | 193 | # eval L2 distance between predicted 2d gaze target adn GT 194 | self.eval_dist.update(distval,bs) 195 | 196 | # eval the similarity between predicted gaze vectors and GT 197 | self.eval_cosine.update(cosineval,bs) 198 | 199 | self.model.train() 200 | -------------------------------------------------------------------------------- /gfiemodel/resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import Tensor 3 | import torch.nn as nn 4 | import torch.utils.model_zoo as model_zoo 5 | from typing import Type, Any, Callable, Union, List, Optional 6 | 7 | try: 8 | from torch.hub import load_state_dict_from_url 9 | except ImportError: 10 | from torch.utils.model_zoo import load_url as load_state_dict_from_url 11 | 12 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 13 | 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 14 | ] 15 | 16 | 17 | model_urls = { 18 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 19 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 20 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 21 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 22 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 23 | 'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth', 24 | 'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth', 25 | } 26 | 27 | 28 | def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d: 29 | """3x3 convolution with padding""" 30 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 31 | padding=dilation, groups=groups, bias=False, dilation=dilation) 32 | 33 | 34 | def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: 35 | """1x1 convolution""" 36 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 37 | 38 | 39 | class BasicBlock(nn.Module): 40 | expansion: int = 1 41 | 42 | def __init__( 43 | self, 44 | inplanes: int, 45 | planes: int, 46 | stride: int = 1, 47 | downsample: Optional[nn.Module] = None, 48 | groups: int = 1, 49 | base_width: int = 64, 50 | dilation: int = 1, 51 | norm_layer: Optional[Callable[..., nn.Module]] = None 52 | ) -> None: 53 | super(BasicBlock, self).__init__() 54 | if norm_layer is None: 55 | norm_layer = nn.BatchNorm2d 56 | if groups != 1 or base_width != 64: 57 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 58 | if dilation > 1: 59 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 60 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 61 | self.conv1 = conv3x3(inplanes, planes, stride) 62 | self.bn1 = norm_layer(planes) 63 | self.relu = nn.ReLU(inplace=True) 64 | self.conv2 = conv3x3(planes, planes) 65 | self.bn2 = norm_layer(planes) 66 | self.downsample = downsample 67 | self.stride = stride 68 | 69 | def forward(self, x: Tensor) -> Tensor: 70 | identity = x 71 | 72 | out = self.conv1(x) 73 | out = self.bn1(out) 74 | out = self.relu(out) 75 | 76 | out = self.conv2(out) 77 | out = self.bn2(out) 78 | 79 | if self.downsample is not None: 80 | identity = self.downsample(x) 81 | 82 | out += identity 83 | out = self.relu(out) 84 | 85 | return out 86 | 87 | 88 | class Bottleneck(nn.Module): 89 | # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2) 90 | # while original implementation places the stride at the first 1x1 convolution(self.conv1) 91 | # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385. 92 | # This variant is also known as ResNet V1.5 and improves accuracy according to 93 | # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch. 94 | 95 | expansion: int = 4 96 | 97 | def __init__( 98 | self, 99 | inplanes: int, 100 | planes: int, 101 | stride: int = 1, 102 | downsample: Optional[nn.Module] = None, 103 | groups: int = 1, 104 | base_width: int = 64, 105 | dilation: int = 1, 106 | norm_layer: Optional[Callable[..., nn.Module]] = None 107 | ) -> None: 108 | super(Bottleneck, self).__init__() 109 | if norm_layer is None: 110 | norm_layer = nn.BatchNorm2d 111 | width = int(planes * (base_width / 64.)) * groups 112 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 113 | self.conv1 = conv1x1(inplanes, width) 114 | self.bn1 = norm_layer(width) 115 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 116 | self.bn2 = norm_layer(width) 117 | self.conv3 = conv1x1(width, planes * self.expansion) 118 | self.bn3 = norm_layer(planes * self.expansion) 119 | self.relu = nn.ReLU(inplace=True) 120 | self.downsample = downsample 121 | self.stride = stride 122 | 123 | def forward(self, x: Tensor) -> Tensor: 124 | identity = x 125 | 126 | out = self.conv1(x) 127 | out = self.bn1(out) 128 | out = self.relu(out) 129 | 130 | out = self.conv2(out) 131 | out = self.bn2(out) 132 | out = self.relu(out) 133 | 134 | out = self.conv3(out) 135 | out = self.bn3(out) 136 | 137 | if self.downsample is not None: 138 | identity = self.downsample(x) 139 | 140 | out += identity 141 | out = self.relu(out) 142 | 143 | return out 144 | 145 | 146 | class ResNet(nn.Module): 147 | 148 | def __init__( 149 | self, 150 | block: Type[Union[BasicBlock, Bottleneck]], 151 | layers: List[int], 152 | num_classes: int = 1000, 153 | zero_init_residual: bool = False, 154 | groups: int = 1, 155 | width_per_group: int = 64, 156 | replace_stride_with_dilation: Optional[List[bool]] = None, 157 | norm_layer: Optional[Callable[..., nn.Module]] = None 158 | ) -> None: 159 | super(ResNet, self).__init__() 160 | if norm_layer is None: 161 | norm_layer = nn.BatchNorm2d 162 | self._norm_layer = norm_layer 163 | 164 | self.inplanes = 64 165 | self.dilation = 1 166 | if replace_stride_with_dilation is None: 167 | # each element in the tuple indicates if we should replace 168 | # the 2x2 stride with a dilated convolution instead 169 | replace_stride_with_dilation = [False, False, False] 170 | if len(replace_stride_with_dilation) != 3: 171 | raise ValueError("replace_stride_with_dilation should be None " 172 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 173 | self.groups = groups 174 | self.base_width = width_per_group 175 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 176 | bias=False) 177 | self.bn1 = norm_layer(self.inplanes) 178 | self.relu = nn.ReLU(inplace=True) 179 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 180 | self.layer1 = self._make_layer(block, 64, layers[0]) 181 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 182 | dilate=replace_stride_with_dilation[0]) 183 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 184 | dilate=replace_stride_with_dilation[1]) 185 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 186 | dilate=replace_stride_with_dilation[2]) 187 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 188 | self.fc = nn.Linear(512 * block.expansion, num_classes) 189 | 190 | for m in self.modules(): 191 | if isinstance(m, nn.Conv2d): 192 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 193 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 194 | nn.init.constant_(m.weight, 1) 195 | nn.init.constant_(m.bias, 0) 196 | 197 | # Zero-initialize the last BN in each residual branch, 198 | # so that the residual branch starts with zeros, and each residual block behaves like an identity. 199 | # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 200 | if zero_init_residual: 201 | for m in self.modules(): 202 | if isinstance(m, Bottleneck): 203 | nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type] 204 | elif isinstance(m, BasicBlock): 205 | nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type] 206 | 207 | def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int, 208 | stride: int = 1, dilate: bool = False) -> nn.Sequential: 209 | norm_layer = self._norm_layer 210 | downsample = None 211 | previous_dilation = self.dilation 212 | if dilate: 213 | self.dilation *= stride 214 | stride = 1 215 | if stride != 1 or self.inplanes != planes * block.expansion: 216 | downsample = nn.Sequential( 217 | conv1x1(self.inplanes, planes * block.expansion, stride), 218 | norm_layer(planes * block.expansion), 219 | ) 220 | 221 | layers = [] 222 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 223 | self.base_width, previous_dilation, norm_layer)) 224 | self.inplanes = planes * block.expansion 225 | for _ in range(1, blocks): 226 | layers.append(block(self.inplanes, planes, groups=self.groups, 227 | base_width=self.base_width, dilation=self.dilation, 228 | norm_layer=norm_layer)) 229 | 230 | return nn.Sequential(*layers) 231 | 232 | def _forward_impl(self, x: Tensor) -> Tensor: 233 | # See note [TorchScript super()] 234 | x = self.conv1(x) 235 | x = self.bn1(x) 236 | x = self.relu(x) 237 | x = self.maxpool(x) 238 | 239 | x = self.layer1(x) 240 | x = self.layer2(x) 241 | x = self.layer3(x) 242 | x = self.layer4(x) 243 | 244 | x = self.avgpool(x) 245 | x = torch.flatten(x, 1) 246 | x = self.fc(x) 247 | 248 | return x 249 | 250 | def forward(self, x: Tensor) -> Tensor: 251 | return self._forward_impl(x) 252 | 253 | 254 | def _resnet( 255 | arch: str, 256 | block: Type[Union[BasicBlock, Bottleneck]], 257 | layers: List[int], 258 | pretrained: bool, 259 | progress: bool, 260 | **kwargs: Any 261 | ) -> ResNet: 262 | model = ResNet(block, layers, **kwargs) 263 | if pretrained: 264 | state_dict = load_state_dict_from_url(model_urls[arch], 265 | progress=progress) 266 | model.load_state_dict(state_dict) 267 | return model 268 | 269 | 270 | def resnet18(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: 271 | r"""ResNet-18 model from 272 | `"Deep Residual Learning for Image Recognition" `_. 273 | Args: 274 | pretrained (bool): If True, returns a model pre-trained on ImageNet 275 | progress (bool): If True, displays a progress bar of the download to stderr 276 | """ 277 | return _resnet('resnet18', BasicBlock, [2, 2, 2, 2], pretrained, progress, 278 | **kwargs) 279 | 280 | 281 | def resnet34(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: 282 | r"""ResNet-34 model from 283 | `"Deep Residual Learning for Image Recognition" `_. 284 | Args: 285 | pretrained (bool): If True, returns a model pre-trained on ImageNet 286 | progress (bool): If True, displays a progress bar of the download to stderr 287 | """ 288 | return _resnet('resnet34', BasicBlock, [3, 4, 6, 3], pretrained, progress, 289 | **kwargs) 290 | 291 | 292 | def resnet50(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: 293 | r"""ResNet-50 model from 294 | `"Deep Residual Learning for Image Recognition" `_. 295 | Args: 296 | pretrained (bool): If True, returns a model pre-trained on ImageNet 297 | progress (bool): If True, displays a progress bar of the download to stderr 298 | """ 299 | return _resnet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, 300 | **kwargs) 301 | 302 | 303 | def resnet101(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: 304 | r"""ResNet-101 model from 305 | `"Deep Residual Learning for Image Recognition" `_. 306 | Args: 307 | pretrained (bool): If True, returns a model pre-trained on ImageNet 308 | progress (bool): If True, displays a progress bar of the download to stderr 309 | """ 310 | return _resnet('resnet101', Bottleneck, [3, 4, 23, 3], pretrained, progress, 311 | **kwargs) 312 | 313 | 314 | def resnet152(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: 315 | r"""ResNet-152 model from 316 | `"Deep Residual Learning for Image Recognition" `_. 317 | Args: 318 | pretrained (bool): If True, returns a model pre-trained on ImageNet 319 | progress (bool): If True, displays a progress bar of the download to stderr 320 | """ 321 | return _resnet('resnet152', Bottleneck, [3, 8, 36, 3], pretrained, progress, 322 | **kwargs) 323 | 324 | 325 | def resnext50_32x4d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: 326 | r"""ResNeXt-50 32x4d model from 327 | `"Aggregated Residual Transformation for Deep Neural Networks" `_. 328 | Args: 329 | pretrained (bool): If True, returns a model pre-trained on ImageNet 330 | progress (bool): If True, displays a progress bar of the download to stderr 331 | """ 332 | kwargs['groups'] = 32 333 | kwargs['width_per_group'] = 4 334 | return _resnet('resnext50_32x4d', Bottleneck, [3, 4, 6, 3], 335 | pretrained, progress, **kwargs) 336 | 337 | 338 | def resnext101_32x8d(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> ResNet: 339 | r"""ResNeXt-101 32x8d model from 340 | `"Aggregated Residual Transformation for Deep Neural Networks" `_. 341 | Args: 342 | pretrained (bool): If True, returns a model pre-trained on ImageNet 343 | progress (bool): If True, displays a progress bar of the download to stderr 344 | """ 345 | kwargs['groups'] = 32 346 | kwargs['width_per_group'] = 8 347 | return _resnet('resnext101_32x8d', Bottleneck, [3, 4, 23, 3], 348 | pretrained, progress, **kwargs) 349 | 350 | # if __name__ == '__main__': 351 | # net=resnet50(pretrained=True) 352 | # 353 | # print(net) 354 | 355 | 356 | 357 | -------------------------------------------------------------------------------- /dataset/gfie.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import os 4 | 5 | from PIL import Image 6 | 7 | import torch 8 | 9 | from torch.utils.data.dataset import Dataset 10 | from torch.utils.data import DataLoader 11 | from torchvision import transforms 12 | import torchvision.transforms.functional as TF 13 | 14 | from utils import img_utils 15 | import matplotlib.pyplot as plt 16 | 17 | class GFIELoader(object): 18 | 19 | def __init__(self,opt): 20 | 21 | self.train_gaze = GFIEDataset( 'train', opt, show=False) 22 | self.val_gaze = GFIEDataset( 'valid', opt, show=False) 23 | self.test_gaze=GFIEDataset( 'test', opt, show=False) 24 | 25 | 26 | self.train_loader=DataLoader(self.train_gaze, 27 | batch_size=opt.DATASET.train_batch_size, 28 | num_workers=opt.DATASET.load_workers, 29 | shuffle=True, 30 | collate_fn=collate_fn) 31 | 32 | 33 | self.val_loader=DataLoader(self.val_gaze, 34 | batch_size=opt.DATASET.test_batch_size, 35 | num_workers=opt.DATASET.load_workers, 36 | shuffle=False, 37 | collate_fn=collate_fn) 38 | 39 | self.test_loader=DataLoader(self.test_gaze, 40 | batch_size=opt.DATASET.test_batch_size, 41 | num_workers=opt.DATASET.load_workers, 42 | shuffle=False, 43 | collate_fn=collate_fn) 44 | 45 | 46 | class GFIEDataset(Dataset): 47 | 48 | def __init__(self,dstype,opt,show=False): 49 | 50 | 51 | rgb_path=os.path.join(opt.DATASET.root_dir,opt.DATASET.rgb) 52 | depth_path=os.path.join(opt.DATASET.root_dir,opt.DATASET.depth) 53 | 54 | camerapara=np.load(os.path.join(opt.DATASET.root_dir,opt.DATASET.camerapara)) 55 | 56 | if dstype=="train": 57 | annofile_path=os.path.join(opt.DATASET.root_dir,opt.DATASET.train) 58 | elif dstype=="valid": 59 | annofile_path = os.path.join(opt.DATASET.root_dir, opt.DATASET.valid) 60 | elif dstype=="test": 61 | annofile_path = os.path.join(opt.DATASET.root_dir, opt.DATASET.test) 62 | else: 63 | raise NotImplemented 64 | 65 | df=pd.read_csv(annofile_path) 66 | 67 | self.X_train = df[['scene_id', 'frame_id', "h_x_min","h_y_min","h_x_max","h_y_max",'eye_u','eye_v','eye_X','eye_Y','eye_Z']] 68 | 69 | self.Y_train = df[['gaze_u', 'gaze_v', 'gaze_X', 'gaze_Y', 'gaze_Z']] 70 | 71 | self.length=len(df) 72 | 73 | self.rgb_path=rgb_path 74 | self.depth_path=depth_path 75 | self.camerapara=camerapara 76 | 77 | self.input_size=opt.TRAIN.input_size 78 | self.output_size=opt.TRAIN.output_size 79 | 80 | transform_list = [] 81 | transform_list.append(transforms.Resize((self.input_size, self.input_size))) 82 | transform_list.append(transforms.ToTensor()) 83 | transform_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) 84 | 85 | self.transform = transforms.Compose(transform_list) 86 | 87 | self.dstype=dstype 88 | 89 | self.imshow=show 90 | 91 | 92 | def __getitem__(self, index): 93 | 94 | scene_id,frame_index,h_x_min,h_y_min,h_x_max,h_y_max,eye_u,eye_v,eye_X,eye_Y,eye_Z=self.X_train.iloc[index] 95 | scene_id=str(int(scene_id)) 96 | frame_index=int(frame_index) 97 | 98 | gaze_u, gaze_v,gaze_X, gaze_Y, gaze_Z = self.Y_train.iloc[index] 99 | 100 | rgb_path=os.path.join(self.rgb_path,self.dstype,"scene{}".format(scene_id),"{:04}.jpg".format(frame_index)) 101 | depth_path=os.path.join(self.depth_path,self.dstype,"scene{}".format(scene_id),"{:04}.npy".format(frame_index)) 102 | 103 | # load the rgb image 104 | img = Image.open(rgb_path) 105 | img = img.convert('RGB') 106 | width, height = img.size 107 | org_width, org_height = width, height 108 | 109 | # load the depth image 110 | depthimg=np.load(depth_path) 111 | # replace the invalid value with 0 112 | depthimg[np.isnan(depthimg)]=0 113 | depthimg=depthimg.astype(np.float32) 114 | depthimg=Image.fromarray(depthimg) 115 | 116 | # expand face bbox a bit 117 | k=0.1 118 | h_x_min -= k * abs(h_x_max - h_x_min) 119 | h_y_min -= k * abs(h_y_max - h_y_min) 120 | h_x_max += k * abs(h_x_max - h_x_min) 121 | h_y_max += k * abs(h_y_max - h_y_min) 122 | 123 | x_min, y_min, x_max, y_max=map(float,[h_x_min,h_y_min,h_x_max,h_y_max]) 124 | 125 | # Data augmentation for training procedure 126 | offset_x, offset_y = 0, 0 127 | flip_flag = False 128 | if self.dstype=="train": 129 | 130 | # Jitter (expansion-only) bounding box size 131 | if np.random.random_sample() <= 0.5: 132 | k = np.random.random_sample() * 0.2 133 | x_min -= k * abs(x_max - x_min) 134 | y_min -= k * abs(y_max - y_min) 135 | x_max += k * abs(x_max - x_min) 136 | y_max += k * abs(y_max - y_min) 137 | 138 | # Random crop 139 | if np.random.random_sample() <= 0.5: 140 | # calculate the minimum valid range of the crop that doesn't exclude the face and the gaze target 141 | crop_x_min = np.min([gaze_u , x_min, x_max]) 142 | crop_y_min = np.min([gaze_v , y_min, y_max]) 143 | crop_x_max = np.max([gaze_u , x_min, x_max]) 144 | crop_y_max = np.max([gaze_v , y_min, y_max]) 145 | 146 | # randomly select a top left corner 147 | if crop_x_min >= 0: 148 | crop_x_min = np.random.uniform(0, crop_x_min) 149 | if crop_y_min >= 0: 150 | crop_y_min = np.random.uniform(0, crop_y_min) 151 | 152 | # find the range of valid crop width and height starting from the (crop_x_min, crop_y_min) 153 | crop_width_min = crop_x_max - crop_x_min 154 | crop_height_min = crop_y_max - crop_y_min 155 | crop_width_max = width - crop_x_min 156 | crop_height_max = height - crop_y_min 157 | # randomly select a width and a height 158 | crop_width = np.random.uniform(crop_width_min, crop_width_max) 159 | crop_height = np.random.uniform(crop_height_min, crop_height_max) 160 | 161 | # crop scene img 162 | img = TF.crop(img, crop_y_min, crop_x_min, crop_height, crop_width) 163 | 164 | # crop depth img 165 | depthimg=TF.crop(depthimg, crop_y_min, crop_x_min, crop_height, crop_width) 166 | 167 | # record the crop's (x, y) offset 168 | offset_x, offset_y = crop_x_min, crop_y_min 169 | 170 | # convert coordinates into the cropped frame 171 | x_min, y_min, x_max, y_max = x_min - offset_x, y_min - offset_y, x_max - offset_x, y_max - offset_y 172 | 173 | gaze_u, gaze_v = (gaze_u - offset_x) / float(crop_width), \ 174 | (gaze_v - offset_y) / float(crop_height) 175 | 176 | eye_u, eye_v = (eye_u - offset_x) / float(crop_width), \ 177 | (eye_v - offset_y) / float(crop_height) 178 | 179 | width, height = crop_width, crop_height 180 | 181 | else: 182 | gaze_u, gaze_v = (gaze_u - offset_x) / float(width), \ 183 | (gaze_v - offset_y) / float(height) 184 | 185 | eye_u, eye_v = (eye_u - offset_x) / float(width), \ 186 | (eye_v - offset_y) / float(height) 187 | 188 | # Random flip 189 | if np.random.random_sample() <= 0.5: 190 | flip_flag=True 191 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 192 | depthimg = depthimg.transpose(Image.FLIP_LEFT_RIGHT) 193 | x_max_2 = width - x_min 194 | x_min_2 = width - x_max 195 | x_max = x_max_2 196 | x_min = x_min_2 197 | gaze_u= 1 - gaze_u 198 | eye_u=1 -eye_u 199 | 200 | # Random change the brightness, contrast and saturation of the scene images 201 | if np.random.random_sample() <= 0.5: 202 | img = TF.adjust_brightness(img, brightness_factor=np.random.uniform(0.5, 1.5)) 203 | img = TF.adjust_contrast(img, contrast_factor=np.random.uniform(0.5, 1.5)) 204 | img = TF.adjust_saturation(img, saturation_factor=np.random.uniform(0, 1.5)) 205 | 206 | else: 207 | 208 | gaze_u, gaze_v = gaze_u / float(width), \ 209 | gaze_v / float(height) 210 | 211 | eye_u, eye_v = eye_u / float(width), \ 212 | eye_v / float(height) 213 | 214 | # represent the head location with mask 215 | head_channel = img_utils.get_head_box_channel(x_min, y_min, x_max, y_max, width, height, 216 | resolution=self.input_size, coordconv=False).unsqueeze(0) 217 | 218 | # the final image size in train/valid/test 219 | final_width,final_height=img.size 220 | 221 | # crop the face 222 | headimg = img.crop((int(x_min), int(y_min), int(x_max), int(y_max))) 223 | 224 | # set for display 225 | if self.imshow: 226 | img_show=img 227 | depthimg_show=depthimg 228 | 229 | # resize scene/face image and convert them to tensor 230 | if self.transform is not None: 231 | 232 | img=self.transform(img) 233 | headimg=self.transform(headimg) 234 | 235 | # Generate the matrix_T 236 | depthmap=depthimg.resize((self.input_size,self.input_size),Image.BICUBIC) 237 | depthmap=np.array(depthmap) 238 | 239 | # scale proportionally 240 | scale_width,scale_height=final_width/self.input_size,final_height/self.input_size 241 | 242 | # construct empty matrix 243 | matrix_T_DW = np.linspace(0, self.input_size - 1, self.input_size) 244 | matrix_T_DH = np.linspace(0, self.input_size - 1, self.input_size) 245 | [matrix_T_xx, matrix_T_yy] = np.meshgrid(matrix_T_DW, matrix_T_DH) 246 | 247 | # construct matrix_T according to Eq 3. in paper 248 | fx,fy,cx,cy=self.camerapara 249 | if flip_flag: 250 | cx= org_width - cx 251 | matrix_T_X = (matrix_T_xx * scale_width + (org_width - final_width - offset_x) - cx) * depthmap / fx 252 | 253 | else: 254 | matrix_T_X = (matrix_T_xx * scale_width + offset_x - cx) * depthmap / fx 255 | 256 | matrix_T_Y = (matrix_T_yy * scale_height + offset_y - cy) * depthmap / fy 257 | matrix_T_Z = depthmap 258 | 259 | matrix_T = np.dstack((matrix_T_X, matrix_T_Y, matrix_T_Z)) 260 | matrix_T = matrix_T.reshape([-1, 3]) 261 | matrix_T = matrix_T.reshape([self.input_size, self.input_size, 3]) 262 | 263 | if flip_flag: 264 | matrix_T = matrix_T - np.array([-eye_X, eye_Y, eye_Z]) 265 | else: 266 | matrix_T = matrix_T - np.array([eye_X, eye_Y, eye_Z]) 267 | 268 | norm_value = np.linalg.norm(matrix_T, axis=2, keepdims=True) 269 | norm_value[norm_value <= 0] = 1 270 | 271 | matrix_T = matrix_T / norm_value 272 | 273 | # convert it to tensor 274 | matrix_T=torch.from_numpy(matrix_T).float() 275 | 276 | 277 | # generate the gaze vector label 278 | gaze_vector = np.array([gaze_X - eye_X, gaze_Y - eye_Y, gaze_Z - eye_Z]) 279 | 280 | if flip_flag: 281 | gaze_vector[0]=-gaze_vector[0] 282 | 283 | norm_gaze_vector = 1.0 if np.linalg.norm (gaze_vector) <= 0.0 else np.linalg.norm (gaze_vector) 284 | gaze_vector=gaze_vector/norm_gaze_vector 285 | gaze_vector=torch.from_numpy(gaze_vector) 286 | 287 | # generate the heat map label 288 | gaze_heatmap = torch.zeros(self.output_size, self.output_size) # set the size of the output 289 | 290 | gaze_heatmap = img_utils.draw_labelmap(gaze_heatmap, [gaze_u * self.output_size, gaze_v * self.output_size], 291 | 3,type='Gaussian') 292 | 293 | 294 | # auxilary info 295 | gaze_target2d=torch.from_numpy(np.array([gaze_u,gaze_v])) 296 | matrix_T_heatmap = np.dot(matrix_T, gaze_vector) 297 | 298 | # display 299 | if self.imshow: 300 | 301 | def unnorm(img, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): 302 | std = np.array(std).reshape(3, 1, 1) 303 | mean = np.array(mean).reshape(3, 1, 1) 304 | return img * std + mean 305 | 306 | figure,ax=plt.subplots(2,3) 307 | figure.set_size_inches(15 ,8) 308 | 309 | simgshow=unnorm(img.numpy()) * 255 310 | simgshow=np.clip(simgshow,0,255) 311 | simgshow=simgshow.astype(np.uint8) 312 | 313 | himgshow=unnorm(headimg.numpy()) * 255 314 | himgshow=np.clip(himgshow,0,255) 315 | himgshow=himgshow.astype(np.uint8) 316 | 317 | eyes_outpix=[eye_u*self.input_size,eye_v*self.input_size] 318 | 319 | gaze_outpix = [gaze_u * self.input_size, gaze_v * self.input_size] 320 | 321 | # display scene image 322 | ax[0][0].imshow(np.transpose(simgshow, (1, 2, 0))) 323 | # display gaze target and eyes in scene image 324 | ax[0][0].scatter(eyes_outpix[0],eyes_outpix[1]) 325 | ax[0][0].scatter(gaze_outpix[0],gaze_outpix[1]) 326 | 327 | # display depth map 328 | ax[0][1].imshow(depthmap,cmap='gray') 329 | # display head image 330 | ax[1][0].imshow(np.transpose(himgshow, (1, 2, 0))) 331 | # display expected stero FoV heatmap 332 | ax[1][1].imshow(matrix_T_heatmap, cmap='jet') 333 | 334 | 335 | plt.show() 336 | 337 | all_data={} 338 | all_data['simg'] = img 339 | all_data["himg"] = headimg 340 | all_data["headloc"] = head_channel 341 | all_data["matrixT"]=matrix_T 342 | 343 | # Y_label 344 | all_data["gaze_heatmap"] = gaze_heatmap 345 | all_data["gaze_vector"] = gaze_vector 346 | all_data["gaze_target2d"] = gaze_target2d 347 | 348 | return all_data 349 | 350 | 351 | def __len__(self): 352 | 353 | return self.length 354 | 355 | def collate_fn(batch): 356 | 357 | batch_data={} 358 | 359 | batch_data["simg"]=[] 360 | batch_data["himg"]=[] 361 | batch_data["headloc"]=[] 362 | batch_data["matrixT"]=[] 363 | 364 | 365 | batch_data["gaze_heatmap"]=[] 366 | batch_data["gaze_vector"]=[] 367 | batch_data["gaze_target2d"]=[] 368 | 369 | 370 | for data in batch: 371 | batch_data["simg"].append(data["simg"]) 372 | batch_data["himg"].append(data["himg"]) 373 | batch_data["headloc"].append(data["headloc"]) 374 | batch_data["matrixT"].append(data["matrixT"]) 375 | 376 | batch_data["gaze_heatmap"].append(data["gaze_heatmap"]) 377 | batch_data["gaze_vector"].append(data["gaze_vector"]) 378 | batch_data["gaze_target2d"].append(data["gaze_target2d"]) 379 | 380 | 381 | # train data 382 | batch_data["simg"]=torch.stack(batch_data["simg"],0) 383 | batch_data["himg"]=torch.stack(batch_data["himg"],0) 384 | batch_data["headloc"]=torch.stack(batch_data["headloc"],0) 385 | batch_data["matrixT"]=torch.stack(batch_data["matrixT"],0) 386 | 387 | 388 | # label data 389 | batch_data["gaze_heatmap"]=torch.stack(batch_data["gaze_heatmap"],0) 390 | batch_data["gaze_vector"] = torch.stack(batch_data["gaze_vector"], 0) 391 | batch_data["gaze_target2d"] = torch.stack(batch_data["gaze_target2d"], 0) 392 | 393 | return batch_data 394 | -------------------------------------------------------------------------------- /utils/infer_engine.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | import torch.backends.cudnn as cudnn 5 | import pandas as pd 6 | 7 | from PIL import Image 8 | from torch.utils.data.dataset import Dataset 9 | from torchvision.transforms import transforms 10 | 11 | from utils import img_utils 12 | from gfiemodel.gfienet import GFIENet 13 | 14 | class GFIETestDataset(Dataset): 15 | 16 | def __init__(self,opt): 17 | super(GFIETestDataset,self).__init__() 18 | 19 | rgb_path=os.path.join(opt.DATASET.root_dir,opt.DATASET.rgb) 20 | depth_path=os.path.join(opt.DATASET.root_dir,opt.DATASET.depth) 21 | 22 | camerapara=np.load(os.path.join(opt.DATASET.root_dir,opt.DATASET.camerapara)) 23 | 24 | annofile_path = os.path.join(opt.DATASET.root_dir, opt.DATASET.test) 25 | 26 | 27 | self.input_size=opt.TRAIN.input_size 28 | 29 | df=pd.read_csv(annofile_path) 30 | 31 | self.X_train = df[['scene_id', 'frame_id', "h_x_min","h_y_min","h_x_max","h_y_max",'eye_u','eye_v','eye_X','eye_Y','eye_Z']] 32 | 33 | self.Y_train = df[['gaze_u', 'gaze_v', 'gaze_X', 'gaze_Y', 'gaze_Z']] 34 | 35 | self.length=len(df) 36 | 37 | self.rgb_path=rgb_path 38 | self.depth_path=depth_path 39 | self.camerapara=camerapara 40 | 41 | self.input_size=opt.TRAIN.input_size 42 | self.output_size=opt.TRAIN.output_size 43 | 44 | transform_list = [] 45 | transform_list.append(transforms.Resize((opt.TRAIN.input_size, opt.TRAIN.input_size))) 46 | transform_list.append(transforms.ToTensor()) 47 | transform_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) 48 | self.transform = transforms.Compose(transform_list) 49 | 50 | def __getitem__(self, index): 51 | scene_id, frame_index, h_x_min, h_y_min, h_x_max, h_y_max, eye_u, eye_v, eye_X, eye_Y, eye_Z = self.X_train.iloc[index] 52 | 53 | scene_id = str(int(scene_id)) 54 | frame_index = int(frame_index) 55 | 56 | gaze_u, gaze_v,gaze_X, gaze_Y, gaze_Z = self.Y_train.iloc[index] 57 | 58 | rgb_path=os.path.join(self.rgb_path,"test","scene{}".format(scene_id),"{:04}.jpg".format(frame_index)) 59 | depth_path=os.path.join(self.depth_path,"test","scene{}".format(scene_id),"{:04}.npy".format(frame_index)) 60 | 61 | head_bbox=[h_x_min, h_y_min, h_x_max, h_y_max] 62 | 63 | eye_3d=np.array([eye_X,eye_Y,eye_Z]) 64 | gaze_target_2d=np.array([gaze_u,gaze_v]) 65 | gaze_target_3d=np.array([gaze_X,gaze_Y,gaze_Z]) 66 | 67 | # format input 68 | format_input=self.format_model_input(rgb_path,depth_path,head_bbox,self.camerapara,eye_3d) 69 | 70 | # generate GT 71 | groundtruth=self.getGroundTruth(eye_3d,gaze_target_2d,gaze_target_3d) 72 | 73 | all_data={} 74 | all_data['simg'] = format_input["simg"] 75 | all_data["himg"] = format_input["himg"] 76 | all_data["headloc"] = format_input["headloc"] 77 | all_data["matrixT"]=format_input["matrixT"] 78 | all_data['depthmap'] = format_input["depthmap"] 79 | all_data["eye3d"]=eye_3d 80 | 81 | 82 | all_data["gt_gaze_vector"]=groundtruth["gaze_vector"] 83 | all_data["gt_gaze_target2d"]=groundtruth["gaze_target2d"] 84 | all_data["gt_gaze_target3d"]=groundtruth["gaze_target3d"] 85 | 86 | return all_data 87 | 88 | def __len__(self): 89 | return self.length 90 | 91 | def format_model_input(self,rgb_path,depth_path,head_bbox,campara,eye_coord): 92 | 93 | 94 | # load the rgb image and depth map 95 | rgbimg=Image.open(rgb_path) 96 | rgbimg = rgbimg.convert('RGB') 97 | 98 | depthimg = np.load(depth_path) #Image.open(depth_path) 99 | depthimg[np.isnan(depthimg)]=0 100 | depthimg=depthimg.astype(np.float32) 101 | depthimg=Image.fromarray(depthimg) 102 | 103 | width, height = rgbimg.size 104 | self.img_para=[width,height] 105 | 106 | # expand the head bounding box (in pixel coordinate ) 107 | x_min, y_min, x_max, y_max=map(float,img_utils.expand_head_box(head_bbox 108 | ,[width,height])) 109 | 110 | # crop the head 111 | head = rgbimg.crop((int(x_min), int(y_min), int(x_max), int(y_max))) 112 | 113 | # represent the head location with mask 114 | head_loc = img_utils.get_head_box_channel(x_min, y_min, x_max, y_max, width, height, 115 | resolution=self.input_size, coordconv=False).unsqueeze(0) 116 | 117 | # to tensor 118 | rgbimg=self.transform(rgbimg) 119 | headimg=self.transform(head) 120 | 121 | # generate the matrix_T 122 | depthmap=depthimg.resize((self.input_size,self.input_size),Image.BICUBIC) 123 | depthmap=np.array(depthmap) 124 | matrix_T=self.getMatrixT(depthmap,campara,eye_coord) 125 | 126 | # reserved for strategy for 3D gaze-following 127 | depthvalue=depthimg.copy() 128 | depthvalue=np.array(depthvalue) 129 | 130 | format_input={} 131 | format_input['simg']=rgbimg 132 | format_input['himg']=headimg 133 | format_input['headloc']=head_loc 134 | format_input['matrixT']=matrix_T 135 | format_input['depthmap']=depthvalue 136 | 137 | return format_input 138 | 139 | def getGroundTruth(self,eye3d,gt2d,gt3d): 140 | 141 | img_W, img_H = self.img_para 142 | 143 | gaze_vector=gt3d-eye3d 144 | norm_gaze_vector = 1.0 if np.linalg.norm(gaze_vector) <= 0.0 else np.linalg.norm(gaze_vector) 145 | gaze_vector=gaze_vector/norm_gaze_vector 146 | 147 | ground_truth={} 148 | ground_truth["gaze_vector"]=gaze_vector 149 | ground_truth["gaze_target2d"]=gt2d/np.array([img_W,img_H]) 150 | ground_truth["gaze_target3d"]=gt3d 151 | 152 | return ground_truth 153 | 154 | def getMatrixT(self,dmap,camera_p,eye_3d): 155 | 156 | img_W,img_H=self.img_para 157 | 158 | fx, fy,cx, cy = camera_p 159 | 160 | # construct empty matrix 161 | matrix_T_DW = np.linspace(0, self.input_size - 1, self.input_size) 162 | matrix_T_DH = np.linspace(0, self.input_size - 1, self.input_size) 163 | [matrix_T_xx, matrix_T_yy] = np.meshgrid(matrix_T_DW, matrix_T_DH) 164 | 165 | scale_width, scale_height = img_W / self.input_size, img_H / self.input_size 166 | 167 | matrix_T_X = (matrix_T_xx*scale_width - cx) * dmap /fx 168 | matrix_T_Y = (matrix_T_yy*scale_height - cy) * dmap /fy 169 | matrix_T_Z = dmap 170 | 171 | matrix_T = np.dstack((matrix_T_X, matrix_T_Y, matrix_T_Z)) 172 | matrix_T = matrix_T.reshape([-1, 3]) 173 | matrix_T = matrix_T.reshape([self.input_size, self.input_size, 3]) 174 | 175 | matrix_T = matrix_T- eye_3d 176 | 177 | norm_value = np.linalg.norm(matrix_T, axis=2, keepdims=True) 178 | norm_value[norm_value <= 0] = 1 179 | 180 | matrix_T = matrix_T / norm_value 181 | 182 | matrix_T=torch.from_numpy(matrix_T).float() 183 | 184 | return matrix_T 185 | 186 | 187 | class CAD120TestDataset(Dataset): 188 | 189 | def __init__(self,opt): 190 | 191 | super(CAD120TestDataset,self).__init__() 192 | 193 | rgb_path=os.path.join(opt.DATASET.root_dir,opt.DATASET.rgb) 194 | depth_path=os.path.join(opt.DATASET.root_dir,opt.DATASET.depth) 195 | 196 | camerapara=np.load(os.path.join(opt.DATASET.root_dir,opt.DATASET.camerapara)) 197 | 198 | annofile_path = os.path.join(opt.DATASET.root_dir, opt.DATASET.test) 199 | 200 | self.input_size=opt.TRAIN.input_size 201 | 202 | df=pd.read_csv(annofile_path) 203 | 204 | self.X_train = df[['dataset_id', 'frame_index', "x_initial","y_initial","w","h", 'eye_x', 'eye_y', 'eye_X', 'eye_Y','eye_Z']] 205 | 206 | self.Y_train = df[['gaze_x', 'gaze_y', 'gaze_X', 'gaze_Y', 'gaze_Z']] 207 | 208 | self.length=len(df) 209 | 210 | self.rgb_path=rgb_path 211 | self.depth_path=depth_path 212 | self.camerapara=camerapara 213 | 214 | self.input_size=opt.TRAIN.input_size 215 | self.output_size=opt.TRAIN.output_size 216 | 217 | transform_list = [] 218 | transform_list.append(transforms.Resize((opt.TRAIN.input_size, opt.TRAIN.input_size))) 219 | transform_list.append(transforms.ToTensor()) 220 | transform_list.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) 221 | self.transform = transforms.Compose(transform_list) 222 | 223 | def __getitem__(self, index): 224 | scene_id, frame_index, h_x_min, h_y_min, h_w, h_h, eye_u, eye_v, eye_X, eye_Y, eye_Z = self.X_train.iloc[index] 225 | 226 | frame_index = int(frame_index) 227 | 228 | gaze_u, gaze_v,gaze_X, gaze_Y, gaze_Z = self.Y_train.iloc[index] 229 | 230 | rgb_path=os.path.join(self.rgb_path,"{}".format(scene_id),"RGB_{}.png".format(frame_index)) 231 | depth_path=os.path.join(self.depth_path,"{}".format(scene_id),"Depth_{}.png".format(frame_index)) 232 | 233 | head_bbox=[h_x_min, h_y_min, h_w, h_h] 234 | 235 | eye_3d=np.array([eye_X,eye_Y,eye_Z]) 236 | gaze_target_2d=np.array([gaze_u,gaze_v]) 237 | gaze_target_3d=np.array([gaze_X,gaze_Y,gaze_Z]) 238 | 239 | # format input 240 | format_input=self.format_model_input(rgb_path,depth_path,head_bbox,self.camerapara,eye_3d,index) 241 | 242 | # generate GT 243 | groundtruth=self.getGroundTruth(eye_3d,gaze_target_2d,gaze_target_3d) 244 | 245 | 246 | all_data={} 247 | all_data['simg'] = format_input["simg"] 248 | all_data["himg"] = format_input["himg"] 249 | all_data["headloc"] = format_input["headloc"] 250 | all_data["matrixT"]=format_input["matrixT"] 251 | all_data['depthmap'] = format_input["depthmap"] 252 | all_data["eye3d"]=eye_3d 253 | 254 | all_data["gt_gaze_vector"]=groundtruth["gaze_vector"] 255 | all_data["gt_gaze_target2d"]=groundtruth["gaze_target2d"] 256 | all_data["gt_gaze_target3d"]=groundtruth["gaze_target3d"] 257 | 258 | return all_data 259 | 260 | def __len__(self): 261 | return self.length 262 | 263 | def format_model_input(self,rgb_path,depth_path,head_bbox,campara,eye_coord,index=None): 264 | 265 | rgbimg=Image.open(rgb_path) 266 | rgbimg = rgbimg.convert('RGB') 267 | 268 | depthimg= Image.open(depth_path) 269 | depthimg= np.array(depthimg).astype(np.float) 270 | depthimg= depthimg/1000. 271 | 272 | depthimg=Image.fromarray(depthimg) 273 | 274 | width, height = rgbimg.size 275 | self.img_para=[width,height] 276 | 277 | # convert to image coordinate system 278 | head_bbox=np.array(head_bbox) 279 | head_bbox[2:]=head_bbox[2:]+head_bbox[0:2] 280 | head_bbox=head_bbox*np.array([width,height,width,height]) 281 | 282 | # expand the head bounding box (in pixel coordinate ) 283 | x_min, y_min, x_max, y_max=map(float,img_utils.expand_head_box(head_bbox 284 | ,[width,height])) 285 | 286 | # crop the head 287 | head = rgbimg.crop((int(x_min), int(y_min), int(x_max), int(y_max))) 288 | 289 | # represent the head location with mask 290 | head_loc = img_utils.get_head_box_channel(x_min, y_min, x_max, y_max, width, height, 291 | resolution=self.input_size, coordconv=False).unsqueeze(0) 292 | 293 | # to tensor 294 | rgbimg=self.transform(rgbimg) 295 | headimg=self.transform(head) 296 | 297 | # generate the matrix_T 298 | depthmap=depthimg.resize((self.input_size,self.input_size),Image.NEAREST) 299 | depthmap=np.array(depthmap) 300 | 301 | matrix_T=self.getMatrixT(depthmap,campara,eye_coord,index) 302 | 303 | # reserved for strategy for 3D gaze-following 304 | depthvalue=depthimg.copy() 305 | depthvalue=np.array(depthvalue) 306 | 307 | format_input={} 308 | format_input['simg']=rgbimg 309 | format_input['himg']=headimg 310 | format_input['headloc']=head_loc 311 | format_input['matrixT']=matrix_T 312 | format_input['depthmap']=depthvalue 313 | 314 | return format_input 315 | 316 | def getGroundTruth(self,eye3d,gt2d,gt3d): 317 | 318 | gaze_vector=gt3d-eye3d 319 | norm_gaze_vector = 1.0 if np.linalg.norm(gaze_vector) <= 0.0 else np.linalg.norm(gaze_vector) 320 | gaze_vector=gaze_vector/norm_gaze_vector 321 | 322 | ground_truth={} 323 | ground_truth["gaze_vector"]=gaze_vector 324 | ground_truth["gaze_target2d"]=gt2d 325 | ground_truth["gaze_target3d"]=gt3d 326 | 327 | return ground_truth 328 | 329 | def getMatrixT(self,dmap,camera_p,eye_3d,index=None): 330 | 331 | img_W,img_H=self.img_para 332 | fx, fy,cx, cy = camera_p 333 | 334 | # construct empty matrix 335 | matrix_T_DW = np.linspace(0, self.input_size - 1, self.input_size) 336 | matrix_T_DH = np.linspace(0, self.input_size - 1, self.input_size) 337 | [matrix_T_xx, matrix_T_yy] = np.meshgrid(matrix_T_DW, matrix_T_DH) 338 | 339 | scale_width, scale_height = img_W / self.input_size, img_H / self.input_size 340 | 341 | matrix_T_X = (matrix_T_xx*scale_width - cx) * dmap /fx 342 | matrix_T_Y = (matrix_T_yy*scale_height - cy) * dmap /fy 343 | matrix_T_Z = dmap 344 | 345 | matrix_T = np.dstack((matrix_T_X, matrix_T_Y, matrix_T_Z)) 346 | 347 | matrix_T = matrix_T- eye_3d 348 | 349 | norm_value = np.linalg.norm(matrix_T, axis=2, keepdims=True) 350 | norm_value[norm_value <= 0] = 1 351 | 352 | matrix_T = matrix_T / norm_value 353 | matrix_T[dmap==0]=np.zeros_like(matrix_T[dmap==0]) 354 | matrix_T=torch.from_numpy(matrix_T).float() 355 | 356 | return matrix_T 357 | 358 | def collate_fn(batch): 359 | 360 | batch_data={} 361 | 362 | batch_data["simg"]=[] 363 | batch_data["himg"]=[] 364 | batch_data["headloc"]=[] 365 | batch_data["matrixT"]=[] 366 | 367 | # for inference 368 | batch_data["depthmap"] = [] 369 | batch_data["eye3d"]=[] 370 | 371 | batch_data["gt_gaze_vector"]=[] 372 | batch_data["gt_gaze_target2d"]=[] 373 | batch_data["gt_gaze_target3d"]=[] 374 | 375 | 376 | for data in batch: 377 | batch_data["simg"].append(data["simg"]) 378 | batch_data["himg"].append(data["himg"]) 379 | batch_data["headloc"].append(data["headloc"]) 380 | batch_data["matrixT"].append(data["matrixT"]) 381 | 382 | batch_data["depthmap"].append(data["depthmap"]) 383 | batch_data["eye3d"].append(data["eye3d"]) 384 | 385 | batch_data["gt_gaze_vector"].append(data["gt_gaze_vector"]) 386 | batch_data["gt_gaze_target2d"].append(data["gt_gaze_target2d"]) 387 | batch_data["gt_gaze_target3d"].append(data["gt_gaze_target3d"]) 388 | 389 | 390 | # train data 391 | batch_data["simg"]=torch.stack(batch_data["simg"],0) 392 | batch_data["himg"]=torch.stack(batch_data["himg"],0) 393 | batch_data["headloc"]=torch.stack(batch_data["headloc"],0) 394 | batch_data["matrixT"]=torch.stack(batch_data["matrixT"],0) 395 | 396 | # aux data 397 | batch_data["depthmap"]=np.stack(batch_data["depthmap"],0) 398 | batch_data["eye3d"]=np.stack(batch_data["eye3d"],0) 399 | 400 | # label data 401 | batch_data["gt_gaze_vector"]=np.stack(batch_data["gt_gaze_vector"],0) 402 | batch_data["gt_gaze_target2d"] = np.stack(batch_data["gt_gaze_target2d"], 0) 403 | batch_data["gt_gaze_target3d"] = np.stack(batch_data["gt_gaze_target3d"], 0) 404 | 405 | return batch_data 406 | 407 | 408 | def model_init(device,cpkt): 409 | cudnn.deterministic = True 410 | 411 | model = GFIENet(pretrained=False) 412 | 413 | model = model.to(device) 414 | model.eval() 415 | 416 | checkpoint = torch.load(cpkt) 417 | model.load_state_dict(checkpoint["state_dict"]) 418 | 419 | return model 420 | 421 | def strategy3dGazeFollowing(depthmap,pred_gh,pred_gv,eye_3d,campara,ratio=0.1): 422 | 423 | img_H,img_W=depthmap.shape 424 | 425 | # get the center of 2d proposal area 426 | output_h, output_w = pred_gh.shape 427 | 428 | pred_center = list(img_utils.argmax_pts(pred_gh)) 429 | pred_gazetarget_2d=np.array([pred_center[0]/output_w,pred_center[1]/output_h]) 430 | 431 | pred_center[0] = pred_center[0] * img_W / output_w 432 | pred_center[1] = pred_center[1] * img_H / output_h 433 | 434 | # get the proposal rectangle area 435 | pu_min = pred_center[0] - img_W * ratio / 2 436 | pu_max = pred_center[0] + img_W * ratio / 2 437 | 438 | pv_min = pred_center[1] - img_H * ratio / 2 439 | pv_max = pred_center[1] + img_H * ratio / 2 440 | 441 | if pu_min < 0: 442 | pu_min, pu_max = 0, img_W * ratio 443 | elif pu_max > img_W: 444 | pu_max, pu_min = img_W, img_W - img_W * ratio 445 | 446 | if pv_min < 0: 447 | pv_min, pv_max = 0, img_H * ratio 448 | elif pv_max > img_H: 449 | pv_max, pv_min = img_H, img_H - img_H * ratio 450 | 451 | pu_min, pu_max, pv_min, pv_max = map(int, [pu_min, pu_max, pv_min, pv_max]) 452 | 453 | # unproject to 3d proposal area 454 | range_depthmap = depthmap[pv_min:pv_max, pu_min:pu_max] 455 | fx, fy ,cx, cy = campara 456 | 457 | range_space_DW = np.linspace(pu_min, pu_max - 1, pu_max - pu_min) 458 | range_space_DH = np.linspace(pv_min, pv_max - 1, pv_max - pv_min) 459 | [range_space_xx, range_space_yy] = np.meshgrid(range_space_DW, range_space_DH) 460 | 461 | 462 | 463 | range_space_X = (range_space_xx - cx) * range_depthmap / fx 464 | range_space_Y = (range_space_yy - cy) * range_depthmap / fy 465 | range_space_Z = range_depthmap 466 | 467 | proposal_3d = np.dstack([range_space_X, range_space_Y, range_space_Z]) 468 | 469 | matrix_T = proposal_3d-eye_3d 470 | 471 | norm_value = np.linalg.norm(matrix_T, axis=2, keepdims=True) 472 | norm_value[norm_value <= 0] = 1 473 | matrix_T = matrix_T / norm_value 474 | 475 | # filter out the invalid depth 476 | matrix_T[range_depthmap == 0] = 0 477 | 478 | # find the 479 | gaze_vector_similar_set = np.dot(matrix_T, pred_gv) 480 | 481 | max_index_u, max_index_v = img_utils.argmax_pts(gaze_vector_similar_set) 482 | 483 | pred_gazetarget_3d=proposal_3d[int(max_index_v),int(max_index_u)] 484 | 485 | pred_gazevector=matrix_T[int(max_index_v),int(max_index_u)] 486 | 487 | pred_gazetarget_3d=np.array(pred_gazetarget_3d).reshape(-1,3) 488 | pred_gazetarget_2d=np.array(pred_gazetarget_2d).reshape(-1,2) 489 | pred_gazevector=pred_gazevector.reshape(-1,3) 490 | 491 | return {"pred_gazetarget_3d":pred_gazetarget_3d, 492 | "pred_gazetarget_2d":pred_gazetarget_2d, 493 | "pred_gazevector":pred_gazevector} 494 | 495 | 496 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | 635 | Copyright (C) 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | Copyright (C) 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | --------------------------------------------------------------------------------