├── lib ├── models │ ├── __init__.py │ ├── __pycache__ │ │ └── attention.cpython-36.pyc.140688647036976 │ ├── models.py │ ├── feature_extractor.py │ ├── bitrap_np.py │ ├── SGNet.py │ └── SGNet_CVAE.py ├── dataloaders │ ├── __init__.py │ ├── datasets.py │ ├── trajectron.py │ ├── ethucy_data_layer.py │ ├── pie_data_layer.py │ ├── jaad_data_layer.py │ └── JAAD_origin.py ├── losses │ ├── __init__.py │ ├── cvae.py │ └── rmse.py └── utils │ ├── __init__.py │ ├── data_utils.py │ ├── hevi_train_utils.py │ ├── ethucy_train_utils.py │ ├── jaadpie_train_utils_cvae.py │ ├── eval_utils.py │ └── ethucy_train_utils_cvae.py ├── configs ├── ethucy │ ├── __init__.py │ ├── ethucy.py │ └── ETH_UCY.json ├── jaad │ ├── __init__.py │ └── jaad.py ├── pie │ ├── __init__.py │ └── pie.py ├── __init__.py └── base_configs.py ├── .gitmodules ├── .gitignore ├── tools ├── ethucy │ ├── eval_deterministic.py │ ├── eval_cvae.py │ ├── train_deterministic.py │ └── train_cvae.py ├── pie │ ├── eval_cvae.py │ └── train_cvae.py └── jaad │ ├── eval_cvae.py │ └── train_cvae.py ├── SGNet_env.yml └── README.md /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import * -------------------------------------------------------------------------------- /configs/ethucy/__init__.py: -------------------------------------------------------------------------------- 1 | from .ethucy import * -------------------------------------------------------------------------------- /configs/jaad/__init__.py: -------------------------------------------------------------------------------- 1 | from .jaad import * 2 | -------------------------------------------------------------------------------- /configs/pie/__init__.py: -------------------------------------------------------------------------------- 1 | from .pie import * 2 | -------------------------------------------------------------------------------- /configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_configs import * 2 | -------------------------------------------------------------------------------- /lib/dataloaders/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets import build_dataset 2 | -------------------------------------------------------------------------------- /lib/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .rmse import * 2 | from .cvae import * 3 | 4 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .eval_utils import * 2 | from .data_utils import * 3 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "Trajectron-plus-plus"] 2 | path = Trajectron-plus-plus 3 | url = https://github.com/StanfordASL/Trajectron-plus-plus.git 4 | -------------------------------------------------------------------------------- /lib/models/__pycache__/attention.cpython-36.pyc.140688647036976: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChuhuaW/SGNet.pytorch/HEAD/lib/models/__pycache__/attention.cpython-36.pyc.140688647036976 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.swp 3 | *.pth 4 | *.txt 5 | *.pyc 6 | *.err 7 | *.script 8 | 9 | data/* 10 | ======= 11 | data/PIE 12 | data/JAAD 13 | data/HEVI_dataset 14 | data/ETHUCY -------------------------------------------------------------------------------- /lib/models/models.py: -------------------------------------------------------------------------------- 1 | from .SGNet import SGNet 2 | from .SGNet_CVAE import SGNet_CVAE 3 | 4 | _META_ARCHITECTURES = { 5 | 'SGNet':SGNet, 6 | 'SGNet_CVAE':SGNet_CVAE, 7 | } 8 | 9 | 10 | def build_model(args): 11 | meta_arch = _META_ARCHITECTURES[args.model] 12 | return meta_arch(args) 13 | -------------------------------------------------------------------------------- /lib/dataloaders/datasets.py: -------------------------------------------------------------------------------- 1 | from .jaad_data_layer import JAADDataLayer 2 | from .pie_data_layer import PIEDataLayer 3 | from .ethucy_data_layer import ETHUCYDataLayer 4 | 5 | def build_dataset(args, phase): 6 | print(args.dataset) 7 | if args.dataset in ['JAAD']: 8 | data_layer = JAADDataLayer 9 | elif args.dataset in ['PIE']: 10 | data_layer = PIEDataLayer 11 | elif args.dataset in ['ETH', 'HOTEL','UNIV', 'ZARA1', 'ZARA2']: 12 | data_layer = ETHUCYDataLayer 13 | return data_layer(args, phase) -------------------------------------------------------------------------------- /lib/losses/cvae.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def cvae_multi(pred_traj, target, first_history_index = 0): 4 | ''' 5 | CVAE loss use best-of-many 6 | ''' 7 | K = pred_traj.shape[3] 8 | 9 | target = target.unsqueeze(3).repeat(1, 1, 1, K, 1) 10 | total_loss = [] 11 | for enc_step in range(first_history_index, pred_traj.size(1)): 12 | traj_rmse = torch.sqrt(torch.sum((pred_traj[:,enc_step,:,:,:] - target[:,enc_step,:,:,:])**2, dim=-1)).sum(dim=1) 13 | best_idx = torch.argmin(traj_rmse, dim=1) 14 | loss_traj = traj_rmse[range(len(best_idx)), best_idx].mean() 15 | total_loss.append(loss_traj) 16 | 17 | return sum(total_loss)/len(total_loss) 18 | -------------------------------------------------------------------------------- /configs/base_configs.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | __all__ = ['parse_base_args'] 4 | 5 | def parse_base_args(): 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--checkpoint', default='', type=str) 8 | parser.add_argument('--start_epoch', default=1, type=int) 9 | parser.add_argument('--gpu', default='0', type=str) 10 | parser.add_argument('--num_workers', default=8, type=int) 11 | parser.add_argument('--epochs', default=50, type=int) 12 | parser.add_argument('--batch_size', default=128, type=int) 13 | parser.add_argument('--weight_decay', default=5e-04, type=float) 14 | parser.add_argument('--seed', default=1, type=int) 15 | parser.add_argument('--phases', default=['train', 'test'], type=list) 16 | parser.add_argument('--shuffle', default=True, type=bool) 17 | return parser 18 | -------------------------------------------------------------------------------- /lib/losses/rmse.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class rmse_loss(nn.Module): 6 | ''' 7 | Params: 8 | x_pred: (batch_size, enc_steps, dec_steps, pred_dim) 9 | x_true: (batch_size, enc_steps, dec_steps, pred_dim) 10 | Returns: 11 | rmse: scalar, rmse = \sum_{i=1:batch_size}() 12 | ''' 13 | def __init__(self): 14 | super(rmse_loss, self).__init__() 15 | 16 | def forward(self, x_pred, x_true): 17 | L2_diff = torch.sqrt(torch.sum((x_pred - x_true)**2, dim=3)) 18 | # sum over prediction time steps 19 | L2_all_pred = torch.sum(L2_diff, dim=2) 20 | # mean of each frames predictions 21 | L2_mean_pred = torch.mean(L2_all_pred, dim=1) 22 | # sum of all batches 23 | L2_mean_pred = torch.mean(L2_mean_pred, dim=0) 24 | return L2_mean_pred -------------------------------------------------------------------------------- /configs/ethucy/ethucy.py: -------------------------------------------------------------------------------- 1 | from configs import parse_base_args 2 | 3 | __all__ = ['parse_sgnet_args'] 4 | 5 | def parse_sgnet_args(): 6 | parser = parse_base_args() 7 | parser.add_argument('--dataset', default='ETH', type=str) 8 | parser.add_argument('--lr', default=5e-04, type=float) # ETH 0.0005,HOTEL 0.0001, UNIV 0.0001, ZARA1 0.0001, ZARA2 0.0001 9 | parser.add_argument('--eth_root', default='data/ETHUCY', type=str) 10 | parser.add_argument('--model', default='SGNet_CVAE', type=str) 11 | parser.add_argument('--hidden_size', default=512, type=int) 12 | parser.add_argument('--enc_steps', default=8, type=int) 13 | parser.add_argument('--dec_steps', default=12, type=int) 14 | parser.add_argument('--dropout', default=0.5, type=float) 15 | parser.add_argument('--nu', default=0.0, type=float) 16 | parser.add_argument('--sigma', default=1.0, type=float) 17 | parser.add_argument('--ETH_CONFIG', default='./configs/ethucy/ETH_UCY.json', type=str) 18 | parser.add_argument('--augment', default=False, type=bool) 19 | parser.add_argument('--DEC_WITH_Z', default=True, type=bool) 20 | parser.add_argument('--LATENT_DIM', default=32, type=int) 21 | parser.add_argument('--pred_dim', default=2, type=int) 22 | parser.add_argument('--input_dim', default=6, type=int) 23 | parser.add_argument('--K', default=20, type=int) 24 | 25 | return parser.parse_args() -------------------------------------------------------------------------------- /configs/pie/pie.py: -------------------------------------------------------------------------------- 1 | from configs import parse_base_args 2 | 3 | __all__ = ['parse_sgnet_args'] 4 | 5 | def parse_sgnet_args(): 6 | parser = parse_base_args() 7 | parser.add_argument('--dataset', default='PIE', type=str) 8 | parser.add_argument('--lr', default=5e-04, type=float) 9 | parser.add_argument('--data_root', default='data/PIE', type=str) 10 | parser.add_argument('--model', default='SGNet_CVAE', type=str) 11 | parser.add_argument('--bbox_type', default='cxcywh', type=str) 12 | parser.add_argument('--normalize', default='zero-one', type=str) 13 | parser.add_argument('--hidden_size', default=512, type=int) 14 | parser.add_argument('--enc_steps', default=15, type=int) 15 | parser.add_argument('--dec_steps', default=45, type=int) 16 | parser.add_argument('--dropout', default=0.0, type=float) 17 | parser.add_argument('--nu', default=0.0, type=float) 18 | parser.add_argument('--sigma', default=1.5, type=float) 19 | parser.add_argument('--FPS', default=30, type=int) 20 | parser.add_argument('--min_bbox', default=[0,0,0,0], type=list) 21 | parser.add_argument('--max_bbox', default=[1920, 1080, 1920, 1080], type=list) 22 | parser.add_argument('--K', default=20, type=int) 23 | parser.add_argument('--DEC_WITH_Z', default=True, type=bool) 24 | parser.add_argument('--LATENT_DIM', default=32, type=int) 25 | parser.add_argument('--pred_dim', default=4, type=int) 26 | parser.add_argument('--input_dim', default=4, type=int) 27 | 28 | return parser.parse_args() -------------------------------------------------------------------------------- /configs/jaad/jaad.py: -------------------------------------------------------------------------------- 1 | from configs import parse_base_args 2 | 3 | __all__ = ['parse_sgnet_args'] 4 | 5 | def parse_sgnet_args(): 6 | parser = parse_base_args() 7 | parser.add_argument('--dataset', default='JAAD', type=str) 8 | parser.add_argument('--data_root', default='data/JAAD', type=str) 9 | parser.add_argument('--lr', default=5e-04, type=float) 10 | parser.add_argument('--model', default='SGNet_CVAE', type=str) 11 | parser.add_argument('--bbox_type', default='cxcywh', type=str) 12 | parser.add_argument('--normalize', default='zero-one', type=str) 13 | parser.add_argument('--hidden_size', default=512, type=int) 14 | parser.add_argument('--enc_steps', default=15, type=int) 15 | parser.add_argument('--dec_steps', default=45, type=int) 16 | parser.add_argument('--dropout', default=0.0, type=float) 17 | parser.add_argument('--nu', default=0.0, type=float) 18 | parser.add_argument('--sigma', default=1.5, type=float) 19 | parser.add_argument('--FPS', default=30, type=int) 20 | parser.add_argument('--min_bbox', default=[0,0,0,0], type=list) 21 | parser.add_argument('--max_bbox', default=[1920, 1080, 1920, 1080], type=list) 22 | parser.add_argument('--K', default=20, type=int) 23 | parser.add_argument('--DEC_WITH_Z', default=True, type=bool) 24 | parser.add_argument('--LATENT_DIM', default=32, type=int) 25 | parser.add_argument('--pred_dim', default=4, type=int) 26 | parser.add_argument('--input_dim', default=4, type=int) 27 | 28 | 29 | 30 | return parser.parse_args() 31 | -------------------------------------------------------------------------------- /tools/ethucy/eval_deterministic.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | 14 | from configs.ethucy import parse_sgd_args as parse_args 15 | import lib.utils as utl 16 | from lib.models import build_model 17 | from lib.losses import rmse_loss 18 | from lib.utils.ethucy_train_utils import train, val, test 19 | 20 | 21 | def main(args): 22 | this_dir = osp.dirname(__file__) 23 | model_name = args.model 24 | save_dir = osp.join(this_dir, 'checkpoints', args.dataset,model_name, str(args.dropout), str(args.seed)) 25 | if not osp.isdir(save_dir): 26 | os.makedirs(save_dir) 27 | 28 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 29 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 30 | utl.set_seed(int(args.seed)) 31 | model = build_model(args) 32 | model = model.to(device) 33 | if osp.isfile(args.checkpoint): 34 | 35 | checkpoint = torch.load(args.checkpoint, map_location=device) 36 | model.load_state_dict(checkpoint['model_state_dict']) 37 | del checkpoint 38 | 39 | 40 | criterion = rmse_loss().to(device) 41 | 42 | test_gen = utl.build_data_loader(args, 'test', batch_size = 1) 43 | print("Number of test samples:", test_gen.__len__()) 44 | 45 | 46 | # test 47 | test_loss, ADE_08, FDE_08, ADE_12, FDE_12 = test(model, test_gen, criterion, device) 48 | 49 | if __name__ == '__main__': 50 | main(parse_args()) 51 | -------------------------------------------------------------------------------- /tools/pie/eval_cvae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | import lib.utils as utl 14 | from configs.pie import parse_sgd_args as parse_args 15 | from lib.models import build_model 16 | from lib.losses import rmse_loss 17 | from lib.utils.jaadpie_train_utils_cvae import train, val, test 18 | 19 | def main(args): 20 | this_dir = osp.dirname(__file__) 21 | model_name = args.model 22 | save_dir = osp.join(this_dir, 'checkpoints', model_name, str(args.seed)) 23 | if not osp.isdir(save_dir): 24 | os.makedirs(save_dir) 25 | 26 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 27 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 28 | utl.set_seed(int(args.seed)) 29 | 30 | model = build_model(args) 31 | 32 | 33 | if osp.isfile(args.checkpoint): 34 | checkpoint = torch.load(args.checkpoint, map_location=device) 35 | model.load_state_dict(checkpoint['model_state_dict'],strict=False) 36 | model = nn.DataParallel(model) 37 | model = model.to(device) 38 | criterion = rmse_loss().to(device) 39 | test_gen = utl.build_data_loader(args, 'test') 40 | print("Number of test samples:", test_gen.__len__()) 41 | 42 | # test 43 | test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE = test(model, test_gen, criterion, device) 44 | print("MSE_05: %4f; MSE_10: %4f; MSE_15: %4f; FMSE: %4f; FIOU: %4f\n" % (MSE_05, MSE_10, MSE_15, FMSE, FIOU)) 45 | print("CFMSE: %4f; CMSE: %4f; \n" % (CFMSE, CMSE)) 46 | 47 | if __name__ == '__main__': 48 | main(parse_args()) 49 | -------------------------------------------------------------------------------- /tools/jaad/eval_cvae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | import lib.utils as utl 14 | from configs.jaad import parse_sgnet_args as parse_args 15 | from lib.models import build_model 16 | from lib.losses import rmse_loss 17 | from lib.utils.jaadpie_train_utils_cvae import train, val, test 18 | 19 | def main(args): 20 | this_dir = osp.dirname(__file__) 21 | model_name = args.model 22 | save_dir = osp.join(this_dir, 'checkpoints', model_name, str(args.seed)) 23 | if not osp.isdir(save_dir): 24 | os.makedirs(save_dir) 25 | 26 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 27 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 28 | utl.set_seed(int(args.seed)) 29 | 30 | model = build_model(args) 31 | 32 | 33 | if osp.isfile(args.checkpoint): 34 | checkpoint = torch.load(args.checkpoint, map_location=device) 35 | model.load_state_dict(checkpoint['model_state_dict'],strict=False) 36 | model = nn.DataParallel(model) 37 | model = model.to(device) 38 | criterion = rmse_loss().to(device) 39 | test_gen = utl.build_data_loader(args, 'test') 40 | print("Number of test samples:", test_gen.__len__()) 41 | 42 | # test 43 | test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE = test(model, test_gen, criterion, device) 44 | print("MSE_05: %4f; MSE_10: %4f; MSE_15: %4f; FMSE: %4f; FIOU: %4f\n" % (MSE_05, MSE_10, MSE_15, FMSE, FIOU)) 45 | print("CFMSE: %4f; CMSE: %4f; \n" % (CFMSE, CMSE)) 46 | 47 | if __name__ == '__main__': 48 | main(parse_args()) 49 | -------------------------------------------------------------------------------- /tools/ethucy/eval_cvae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | import lib.utils as utl 14 | from configs.ethucy import parse_sgd_args as parse_args 15 | from lib.models import build_model 16 | from lib.losses import rmse_loss 17 | from lib.utils.ethucy_train_utils_cvae import train, val, test 18 | 19 | def main(args): 20 | this_dir = osp.dirname(__file__) 21 | model_name = args.model 22 | save_dir = osp.join(this_dir, 'checkpoints', args.dataset,model_name,str(args.dropout), str(args.seed)) 23 | if not osp.isdir(save_dir): 24 | os.makedirs(save_dir) 25 | 26 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 27 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 28 | utl.set_seed(int(args.seed)) 29 | model = build_model(args) 30 | 31 | model = nn.DataParallel(model) 32 | model = model.to(device) 33 | if osp.isfile(args.checkpoint): 34 | 35 | checkpoint = torch.load(args.checkpoint, map_location=device) 36 | model.load_state_dict(checkpoint['model_state_dict'], strict=False) 37 | del checkpoint 38 | 39 | criterion = rmse_loss().to(device) 40 | 41 | test_gen = utl.build_data_loader(args, 'test', batch_size = 1) 42 | print("Number of test samples:", test_gen.__len__()) 43 | 44 | 45 | 46 | 47 | 48 | # test 49 | test_loss, ADE_08, FDE_08, ADE_12, FDE_12 = test(model, test_gen, criterion, device) 50 | print("Test Loss: {:.4f}".format(test_loss)) 51 | print("ADE_08: %4f; FDE_08: %4f; ADE_12: %4f; FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12)) 52 | 53 | 54 | if __name__ == '__main__': 55 | main(parse_args()) 56 | -------------------------------------------------------------------------------- /lib/models/feature_extractor.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torchvision import datasets, transforms, models 4 | import torch.nn.functional as F 5 | 6 | 7 | class JAADFeatureExtractor(nn.Module): 8 | 9 | def __init__(self, args): 10 | super(JAADFeatureExtractor, self).__init__() 11 | self.embbed_size = args.hidden_size 12 | self.box_embed = nn.Sequential(nn.Linear(4, self.embbed_size), 13 | nn.ReLU()) 14 | def forward(self, inputs): 15 | box_input = inputs 16 | embedded_box_input= self.box_embed(box_input) 17 | 18 | return embedded_box_input 19 | 20 | class ETHUCYFeatureExtractor(nn.Module): 21 | 22 | def __init__(self, args): 23 | super(ETHUCYFeatureExtractor, self).__init__() 24 | self.embbed_size = args.hidden_size 25 | self.embed = nn.Sequential(nn.Linear(6, self.embbed_size), 26 | nn.ReLU()) 27 | 28 | 29 | def forward(self, inputs): 30 | box_input = inputs 31 | 32 | embedded_box_input= self.embed(box_input) 33 | 34 | return embedded_box_input 35 | 36 | class PIEFeatureExtractor(nn.Module): 37 | 38 | def __init__(self, args): 39 | super(PIEFeatureExtractor, self).__init__() 40 | 41 | self.embbed_size = args.hidden_size 42 | self.box_embed = nn.Sequential(nn.Linear(4, self.embbed_size), 43 | nn.ReLU()) 44 | def forward(self, inputs): 45 | box_input = inputs 46 | embedded_box_input= self.box_embed(box_input) 47 | return embedded_box_input 48 | 49 | _FEATURE_EXTRACTORS = { 50 | 'PIE': PIEFeatureExtractor, 51 | 'JAAD': JAADFeatureExtractor, 52 | 'ETH': ETHUCYFeatureExtractor, 53 | 'HOTEL': ETHUCYFeatureExtractor, 54 | 'UNIV': ETHUCYFeatureExtractor, 55 | 'ZARA1': ETHUCYFeatureExtractor, 56 | 'ZARA2': ETHUCYFeatureExtractor, 57 | } 58 | 59 | def build_feature_extractor(args): 60 | func = _FEATURE_EXTRACTORS[args.dataset] 61 | return func(args) 62 | -------------------------------------------------------------------------------- /tools/pie/train_cvae.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import torch 4 | from torch import nn, optim 5 | 6 | import lib.utils as utl 7 | from configs.pie import parse_sgnet_args as parse_args 8 | from lib.models import build_model 9 | from lib.losses import rmse_loss 10 | from lib.utils.jaadpie_train_utils_cvae import train, val, test 11 | 12 | def main(args): 13 | this_dir = osp.dirname(__file__) 14 | model_name = args.model 15 | save_dir = osp.join(this_dir, 'checkpoints', model_name, str(args.seed)) 16 | if not osp.isdir(save_dir): 17 | os.makedirs(save_dir) 18 | 19 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 20 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 21 | utl.set_seed(int(args.seed)) 22 | 23 | 24 | model = build_model(args) 25 | model = nn.DataParallel(model) 26 | model = model.to(device) 27 | 28 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 29 | lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5, 30 | min_lr=1e-10, verbose=1) 31 | if osp.isfile(args.checkpoint): 32 | checkpoint = torch.load(args.checkpoint, map_location=device) 33 | model.load_state_dict(checkpoint['model_state_dict']) 34 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 35 | args.start_epoch += checkpoint['epoch'] 36 | 37 | criterion = rmse_loss().to(device) 38 | 39 | train_gen = utl.build_data_loader(args, 'train') 40 | val_gen = utl.build_data_loader(args, 'val') 41 | test_gen = utl.build_data_loader(args, 'test') 42 | print("Number of validation samples:", val_gen.__len__()) 43 | print("Number of test samples:", test_gen.__len__()) 44 | 45 | 46 | 47 | # train 48 | min_loss = 1e6 49 | min_MSE_15 = 10e5 50 | best_model = None 51 | best_model_metric = None 52 | 53 | for epoch in range(args.start_epoch, args.epochs+args.start_epoch): 54 | print("Number of training samples:", len(train_gen)) 55 | 56 | # train 57 | train_goal_loss, train_cvae_loss, train_KLD_loss = train(model, train_gen, criterion, optimizer, device) 58 | print('Train Epoch: {} \t Goal loss: {:.4f}\t CVAE loss: {:.4f}\t KLD loss: {:.4f}'.format( 59 | epoch, train_goal_loss, train_cvae_loss, train_KLD_loss)) 60 | 61 | 62 | # val 63 | val_loss = val(model, val_gen, criterion, device) 64 | lr_scheduler.step(val_loss) 65 | 66 | 67 | # test 68 | test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE = test(model, test_gen, criterion, device) 69 | print("Test Loss: {:.4f}".format(test_loss)) 70 | print("MSE_05: %4f; MSE_10: %4f; MSE_15: %4f\n" % (MSE_05, MSE_10, MSE_15)) 71 | 72 | if __name__ == '__main__': 73 | main(parse_args()) 74 | -------------------------------------------------------------------------------- /tools/ethucy/train_deterministic.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | 14 | from configs.ethucy import parse_sgnet_args as parse_args 15 | import lib.utils as utl 16 | from lib.models import build_model 17 | from lib.losses import rmse_loss 18 | from lib.utils.ethucy_train_utils import train, val, test 19 | 20 | 21 | def main(args): 22 | this_dir = osp.dirname(__file__) 23 | model_name = args.model 24 | save_dir = osp.join(this_dir, 'checkpoints', args.dataset, model_name, str(args.seed)) 25 | if not osp.isdir(save_dir): 26 | os.makedirs(save_dir) 27 | 28 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 29 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 30 | utl.set_seed(int(args.seed)) 31 | model = build_model(args) 32 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 33 | lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5, 34 | min_lr=1e-10, verbose=1) 35 | model = model.to(device) 36 | if osp.isfile(args.checkpoint): 37 | checkpoint = torch.load(args.checkpoint, map_location=device) 38 | model.load_state_dict(checkpoint['model_state_dict']) 39 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 40 | args.start_epoch += checkpoint['epoch'] 41 | del checkpoint 42 | 43 | 44 | criterion = rmse_loss().to(device) 45 | 46 | train_gen = utl.build_data_loader(args, 'train', batch_size = 1) 47 | val_gen = utl.build_data_loader(args, 'val', batch_size = 1) 48 | test_gen = utl.build_data_loader(args, 'test', batch_size = 1) 49 | print("Number of validation samples:", val_gen.__len__()) 50 | print("Number of test samples:", test_gen.__len__()) 51 | # train 52 | min_loss = 1e6 53 | min_ADE_08 = 10e5 54 | min_FDE_08 = 10e5 55 | min_ADE_12 = 10e5 56 | min_FDE_12 = 10e5 57 | best_model = None 58 | best_model_metric = None 59 | 60 | 61 | for epoch in range(args.start_epoch, args.epochs+args.start_epoch): 62 | 63 | train_goal_loss, train_dec_loss, total_train_loss = train(model, train_gen, criterion, optimizer, device) 64 | 65 | print('Train Epoch: {} \t Goal loss: {:.4f}\t Decoder loss: {:.4f}\t Total: {:.4f}'.format( 66 | epoch, train_goal_loss, train_dec_loss, total_train_loss)) 67 | 68 | 69 | 70 | # val 71 | val_loss = val(model, val_gen, criterion, device) 72 | # lr_scheduler.step(val_loss) 73 | 74 | 75 | # test 76 | test_loss, ADE_08, FDE_08, ADE_12, FDE_12 = test(model, test_gen, criterion, device) 77 | 78 | 79 | if __name__ == '__main__': 80 | main(parse_args()) 81 | -------------------------------------------------------------------------------- /tools/jaad/train_cvae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | import lib.utils as utl 14 | from configs.jaad import parse_sgnet_args as parse_args 15 | from lib.models import build_model 16 | from lib.losses import rmse_loss 17 | from lib.utils.jaadpie_train_utils_cvae import train, val, test 18 | 19 | def main(args): 20 | this_dir = osp.dirname(__file__) 21 | model_name = args.model 22 | save_dir = osp.join(this_dir, 'checkpoints', model_name, str(args.seed)) 23 | if not osp.isdir(save_dir): 24 | os.makedirs(save_dir) 25 | 26 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 27 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 28 | utl.set_seed(int(args.seed)) 29 | 30 | 31 | model = build_model(args) 32 | model = nn.DataParallel(model) 33 | model = model.to(device) 34 | 35 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 36 | lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5, 37 | min_lr=1e-10, verbose=1) 38 | if osp.isfile(args.checkpoint): 39 | checkpoint = torch.load(args.checkpoint, map_location=device) 40 | model.load_state_dict(checkpoint['model_state_dict']) 41 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 42 | args.start_epoch += checkpoint['epoch'] 43 | 44 | criterion = rmse_loss().to(device) 45 | 46 | train_gen = utl.build_data_loader(args, 'train') 47 | val_gen = utl.build_data_loader(args, 'val') 48 | test_gen = utl.build_data_loader(args, 'test') 49 | print("Number of validation samples:", val_gen.__len__()) 50 | print("Number of test samples:", test_gen.__len__()) 51 | 52 | 53 | 54 | # train 55 | min_loss = 1e6 56 | min_MSE_15 = 10e5 57 | best_model = None 58 | best_model_metric = None 59 | 60 | for epoch in range(args.start_epoch, args.epochs+args.start_epoch): 61 | print("Number of training samples:", len(train_gen)) 62 | 63 | # train 64 | train_goal_loss, train_cvae_loss, train_KLD_loss = train(model, train_gen, criterion, optimizer, device) 65 | # print('Train Epoch: ', epoch, 'Goal loss: ', train_goal_loss, 'Decoder loss: ', train_dec_loss, 'CVAE loss: ', train_cvae_loss, \ 66 | # 'KLD loss: ', train_KLD_loss, 'Total: ', total_train_loss) 67 | print('Train Epoch: {} \t Goal loss: {:.4f}\t CVAE loss: {:.4f}\t KLD loss: {:.4f}'.format( 68 | epoch, train_goal_loss, train_cvae_loss, train_KLD_loss)) 69 | 70 | 71 | # val 72 | val_loss = val(model, val_gen, criterion, device) 73 | lr_scheduler.step(val_loss) 74 | 75 | 76 | # test 77 | test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE = test(model, test_gen, criterion, device) 78 | print("Test Loss: {:.4f}".format(test_loss)) 79 | print("MSE_05: %4f; MSE_10: %4f; MSE_15: %4f\n" % (MSE_05, MSE_10, MSE_15)) 80 | 81 | 82 | 83 | if __name__ == '__main__': 84 | main(parse_args()) 85 | -------------------------------------------------------------------------------- /tools/ethucy/train_cvae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | import lib.utils as utl 14 | from configs.ethucy import parse_sgnet_args as parse_args 15 | from lib.models import build_model 16 | from lib.losses import rmse_loss 17 | from lib.utils.ethucy_train_utils_cvae import train, val, test 18 | 19 | def main(args): 20 | this_dir = osp.dirname(__file__) 21 | model_name = args.model 22 | save_dir = osp.join(this_dir, 'checkpoints', args.dataset,model_name,str(args.dropout), str(args.seed)) 23 | if not osp.isdir(save_dir): 24 | os.makedirs(save_dir) 25 | 26 | os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu 27 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 28 | utl.set_seed(int(args.seed)) 29 | model = build_model(args) 30 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 31 | lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5, 32 | min_lr=1e-10, verbose=1) 33 | model = nn.DataParallel(model) 34 | model = model.to(device) 35 | if osp.isfile(args.checkpoint): 36 | 37 | checkpoint = torch.load(args.checkpoint, map_location=device) 38 | model.load_state_dict(checkpoint['model_state_dict']) 39 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 40 | args.start_epoch += checkpoint['epoch'] 41 | del checkpoint 42 | 43 | criterion = rmse_loss().to(device) 44 | 45 | train_gen = utl.build_data_loader(args, 'train', batch_size = 1) 46 | val_gen = utl.build_data_loader(args, 'val', batch_size = 1) 47 | test_gen = utl.build_data_loader(args, 'test', batch_size = 1) 48 | print("Number of validation samples:", val_gen.__len__()) 49 | print("Number of test samples:", test_gen.__len__()) 50 | 51 | 52 | 53 | # train 54 | min_loss = 1e6 55 | min_ADE_08 = 10e5 56 | min_FDE_08 = 10e5 57 | min_ADE_12 = 10e5 58 | min_FDE_12 = 10e5 59 | best_model = None 60 | best_model_metric = None 61 | 62 | for epoch in range(args.start_epoch, args.epochs+args.start_epoch): 63 | print("Number of training samples:", len(train_gen)) 64 | 65 | # train 66 | train_goal_loss, train_cvae_loss, train_KLD_loss = train(model, train_gen, criterion, optimizer, device) 67 | # print('Train Epoch: ', epoch, 'Goal loss: ', train_goal_loss, 'Decoder loss: ', train_dec_loss, 'CVAE loss: ', train_cvae_loss, \ 68 | # 'KLD loss: ', train_KLD_loss, 'Total: ', total_train_loss) 69 | print('Train Epoch: {} \t Goal loss: {:.4f}\t CVAE loss: {:.4f}\t KLD loss: {:.4f}\t Total: {:.4f}'.format( 70 | epoch,train_goal_loss, train_cvae_loss, train_KLD_loss, train_goal_loss + train_cvae_loss + train_KLD_loss )) 71 | 72 | 73 | # val 74 | val_loss = val(model, val_gen, criterion, device) 75 | lr_scheduler.step(val_loss) 76 | 77 | 78 | # test 79 | test_loss, ADE_08, FDE_08, ADE_12, FDE_12 = test(model, test_gen, criterion, device) 80 | print("Test Loss: {:.4f}".format(test_loss)) 81 | print("ADE_08: %4f; FDE_08: %4f; ADE_12: %4f; FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12)) 82 | 83 | 84 | if __name__ == '__main__': 85 | main(parse_args()) 86 | -------------------------------------------------------------------------------- /configs/ethucy/ETH_UCY.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "grad_clip": 1.0, 4 | "learning_rate_style": "exp", 5 | "learning_rate": 0.001, 6 | "min_learning_rate": 1e-05, 7 | "learning_decay_rate": 0.9999, 8 | "prediction_horizon": 12, 9 | "minimum_history_length": 7, 10 | "maximum_history_length": 7, 11 | "map_encoder": { 12 | "PEDESTRIAN": { 13 | "heading_state_index": 6, 14 | "patch_size": [ 15 | 50, 16 | 10, 17 | 50, 18 | 90 19 | ], 20 | "map_channels": 3, 21 | "hidden_channels": [ 22 | 10, 23 | 20, 24 | 10, 25 | 1 26 | ], 27 | "output_size": 32, 28 | "masks": [ 29 | 5, 30 | 5, 31 | 5, 32 | 5 33 | ], 34 | "strides": [ 35 | 1, 36 | 1, 37 | 1, 38 | 1 39 | ], 40 | "dropout": 0.5 41 | } 42 | }, 43 | "k": 1, 44 | "k_eval": 25, 45 | "kl_min": 0.07, 46 | "kl_weight": 100.0, 47 | "kl_weight_start": 0, 48 | "kl_decay_rate": 0.99995, 49 | "kl_crossover": 400, 50 | "kl_sigmoid_divisor": 4, 51 | "rnn_kwargs": { 52 | "dropout_keep_prob": 0.75 53 | }, 54 | "MLP_dropout_keep_prob": 0.9, 55 | "enc_rnn_dim_edge": 32, 56 | "enc_rnn_dim_edge_influence": 32, 57 | "enc_rnn_dim_history": 32, 58 | "enc_rnn_dim_future": 32, 59 | "dec_rnn_dim": 128, 60 | "q_z_xy_MLP_dims": null, 61 | "p_z_x_MLP_dims": 32, 62 | "GMM_components": 1, 63 | "log_p_yt_xz_max": 6, 64 | "N": 1, 65 | "K": 25, 66 | "tau_init": 2.0, 67 | "tau_final": 0.05, 68 | "tau_decay_rate": 0.997, 69 | "use_z_logit_clipping": true, 70 | "z_logit_clip_start": 0.05, 71 | "z_logit_clip_final": 5.0, 72 | "z_logit_clip_crossover": 300, 73 | "z_logit_clip_divisor": 5, 74 | "dynamic": { 75 | "PEDESTRIAN": { 76 | "name": "SingleIntegrator", 77 | "distribution": true, 78 | "limits": {} 79 | } 80 | }, 81 | "state": { 82 | "PEDESTRIAN": { 83 | "position": [ 84 | "x", 85 | "y" 86 | ], 87 | "velocity": [ 88 | "x", 89 | "y" 90 | ], 91 | "acceleration": [ 92 | "x", 93 | "y" 94 | ] 95 | } 96 | }, 97 | "pred_state": { 98 | "PEDESTRIAN": { 99 | "position": [ 100 | "x", 101 | "y" 102 | ] 103 | } 104 | }, 105 | "log_histograms": false, 106 | "dynamic_edges": "yes", 107 | "edge_state_combine_method": "sum", 108 | "edge_influence_combine_method": "attention", 109 | "edge_addition_filter": [ 110 | 0.25, 111 | 0.5, 112 | 0.75, 113 | 1.0 114 | ], 115 | "edge_removal_filter": [ 116 | 1.0, 117 | 0.0 118 | ], 119 | "offline_scene_graph": "yes", 120 | "incl_robot_node": false, 121 | "node_freq_mult_train": false, 122 | "node_freq_mult_eval": false, 123 | "scene_freq_mult_train": false, 124 | "scene_freq_mult_eval": false, 125 | "scene_freq_mult_viz": false, 126 | "edge_encoding": true, 127 | "use_map_encoding": false, 128 | "augment": true, 129 | "override_attention_radius": [] 130 | } -------------------------------------------------------------------------------- /lib/utils/data_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import pickle as pkl 4 | import os 5 | import copy 6 | import torch 7 | import torch.utils.data as data 8 | from lib.dataloaders import build_dataset 9 | 10 | def set_seed(seed): 11 | random.seed(seed) 12 | os.environ['PYTHONHASHSEED'] = str(seed) 13 | np.random.seed(seed) 14 | torch.manual_seed(seed) 15 | if torch.cuda.is_available(): 16 | torch.cuda.manual_seed(seed) 17 | torch.cuda.manual_seed_all(seed) 18 | torch.backends.cudnn.benchmark = False 19 | torch.backends.cudnn.deterministic = True 20 | 21 | 22 | 23 | def build_data_loader(args, phase='train',batch_size=None): 24 | data_loaders = data.DataLoader( 25 | dataset=build_dataset(args, phase), 26 | batch_size=args.batch_size if batch_size is None else batch_size, 27 | shuffle=phase=='train', 28 | num_workers=args.num_workers, 29 | collate_fn=my_collate_fn if batch_size is not None else None) 30 | 31 | return data_loaders 32 | 33 | def my_collate_fn(batch): 34 | return batch[0] 35 | 36 | def cxcywh_to_x1y1x2y2(boxes): 37 | ''' 38 | Params: 39 | boxes:(Cx, Cy, w, h) 40 | Returns: 41 | (x1, y1, x2, y2 or tlbr 42 | ''' 43 | new_boxes = np.zeros_like(boxes) 44 | new_boxes[...,0] = boxes[...,0] - boxes[...,2]/2 45 | new_boxes[...,1] = boxes[...,1] - boxes[...,3]/2 46 | new_boxes[...,2] = boxes[...,0] + boxes[...,2]/2 47 | new_boxes[...,3] = boxes[...,1] + boxes[...,3]/2 48 | return new_boxes 49 | 50 | 51 | def bbox_normalize(bbox,W=1280,H=640): 52 | ''' 53 | normalize bbox value to [0,1] 54 | :Params: 55 | bbox: [cx, cy, w, h] with size (times, 4), value from 0 to W or H 56 | :Return: 57 | bbox: [cx, cy, w, h] with size (times, 4), value from 0 to 1 58 | ''' 59 | new_bbox = copy.deepcopy(bbox) 60 | new_bbox[:,0] /= W 61 | new_bbox[:,1] /= H 62 | new_bbox[:,2] /= W 63 | new_bbox[:,3] /= H 64 | 65 | return new_bbox 66 | 67 | def bbox_denormalize(bbox,W=1280,H=640): 68 | ''' 69 | normalize bbox value to [0,1] 70 | :Params: 71 | bbox: [cx, cy, w, h] with size (times, 4), value from 0 to 1 72 | :Return: 73 | bbox: [cx, cy, w, h] with size (times, 4), value from 0 to W or H 74 | ''' 75 | new_bbox = copy.deepcopy(bbox) 76 | new_bbox[..., 0] *= W 77 | new_bbox[..., 1] *= H 78 | new_bbox[..., 2] *= W 79 | new_bbox[..., 3] *= H 80 | 81 | return new_bbox 82 | 83 | 84 | # FLow loading code adapted from: 85 | # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy 86 | 87 | def load_flow(flow_folder): 88 | ''' 89 | Given video key, load the corresponding flow file 90 | ''' 91 | flow_files = sorted(glob.glob(flow_folder + '*.flo')) 92 | flows = [] 93 | for file in flow_files: 94 | flow = read_flo(file) 95 | flows.append(flow) 96 | return flows 97 | 98 | TAG_FLOAT = 202021.25 99 | 100 | def read_flo(file): 101 | assert type(file) is str, "file is not str %r" % str(file) 102 | assert os.path.isfile(file) is True, "file does not exist %r" % str(file) 103 | assert file[-4:] == '.flo', "file ending is not .flo %r" % file[-4:] 104 | f = open(file,'rb') 105 | flo_number = np.fromfile(f, np.float32, count=1)[0] 106 | assert flo_number == TAG_FLOAT, 'Flow number %r incorrect. Invalid .flo file' % flo_number 107 | w = int(np.fromfile(f, np.int32, count=1)) 108 | h = int(np.fromfile(f, np.int32, count=1)) 109 | #if error try: data = np.fromfile(f, np.float32, count=2*w[0]*h[0]) 110 | data = np.fromfile(f, np.float32, count=2*w*h) 111 | # Reshape data into 3D array (columns, rows, bands) 112 | flow = np.resize(data, (int(h), int(w), 2)) 113 | f.close() 114 | 115 | return flow 116 | 117 | -------------------------------------------------------------------------------- /SGNet_env.yml: -------------------------------------------------------------------------------- 1 | name: SGNet 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - backcall=0.2.0=py_0 8 | - blas=1.0=mkl 9 | - bzip2=1.0.8=h7b6447c_0 10 | - ca-certificates=2020.12.8=h06a4308_0 11 | - cairo=1.14.12=h8948797_3 12 | - certifi=2020.12.5=py36h06a4308_0 13 | - cudatoolkit=11.0.221=h6bb024c_0 14 | - cycler=0.10.0=py36_0 15 | - dataclasses=0.7=py36_0 16 | - dbus=1.13.18=hb2f20db_0 17 | - decorator=4.4.2=py_0 18 | - dill=0.3.3=pyhd3eb1b0_0 19 | - expat=2.2.10=he6710b0_2 20 | - ffmpeg=4.0=hcdf2ecd_0 21 | - fontconfig=2.13.0=h9420a91_0 22 | - freeglut=3.0.0=hf484d3e_5 23 | - freetype=2.10.4=h5ab3b9f_0 24 | - glib=2.66.1=h92f7085_0 25 | - graphite2=1.3.14=h23475e2_0 26 | - gst-plugins-base=1.14.0=h8213a91_2 27 | - gstreamer=1.14.0=h28cd5cc_2 28 | - harfbuzz=1.8.8=hffaf4a1_0 29 | - hdf5=1.10.2=hba1933b_1 30 | - icu=58.2=he6710b0_3 31 | - intel-openmp=2020.2=254 32 | - ipykernel=5.3.4=py36h5ca1d4c_0 33 | - ipython=7.16.1=py36h5ca1d4c_0 34 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 35 | - jasper=2.0.14=h07fcdf6_1 36 | # - jedi=0.18.0=py36h06a4308_0 37 | - joblib=1.0.0=pyhd3eb1b0_0 38 | - jpeg=9b=h024ee3a_2 39 | - jupyter_client=6.1.7=py_0 40 | - jupyter_core=4.7.0=py36h06a4308_0 41 | - kiwisolver=1.3.0=py36h2531618_0 42 | - lcms2=2.11=h396b838_0 43 | - ld_impl_linux-64=2.33.1=h53a641e_7 44 | - libedit=3.1.20191231=h14c3975_1 45 | - libffi=3.3=he6710b0_2 46 | - libgcc-ng=9.1.0=hdf63c60_0 47 | - libgfortran-ng=7.3.0=hdf63c60_0 48 | - libglu=9.0.0=hf484d3e_1 49 | - libopencv=3.4.2=hb342d67_1 50 | - libopus=1.3.1=h7b6447c_0 51 | - libpng=1.6.37=hbc83047_0 52 | - libsodium=1.0.18=h7b6447c_0 53 | - libstdcxx-ng=9.1.0=hdf63c60_0 54 | - libtiff=4.1.0=h2733197_1 55 | - libuuid=1.0.3=h1bed415_2 56 | - libuv=1.40.0=h7b6447c_0 57 | - libvpx=1.7.0=h439df22_0 58 | - libxcb=1.14=h7b6447c_0 59 | - libxml2=2.9.10=hb55368b_3 60 | - lz4-c=1.9.2=heb0550a_3 61 | - matplotlib=3.3.2=h06a4308_0 62 | - matplotlib-base=3.3.2=py36h817c723_0 63 | - mkl=2020.2=256 64 | - mkl-service=2.3.0=py36he8ac12f_0 65 | - mkl_fft=1.2.0=py36h23d657b_0 66 | - mkl_random=1.1.1=py36h0573a6f_0 67 | - ncurses=6.2=he6710b0_1 68 | - ninja=1.10.2=py36hff7bd54_0 69 | - numpy=1.19.2=py36h54aff64_0 70 | - numpy-base=1.19.2=py36hfa32c7d_0 71 | - olefile=0.46=py36_0 72 | - opencv=3.4.2=py36h6fd60c2_1 73 | - openssl=1.1.1i=h27cfd23_0 74 | - pandas=1.1.5=py36ha9443f7_0 75 | - parso=0.7.0=py_0 76 | - pcre=8.44=he6710b0_0 77 | - pexpect=4.8.0=pyhd3eb1b0_3 78 | - pickleshare=0.7.5=pyhd3eb1b0_1003 79 | - pillow=8.0.1=py36he98fc37_0 80 | - pip=20.3.3=py36h06a4308_0 81 | - pixman=0.40.0=h7b6447c_0 82 | - prompt-toolkit=3.0.8=py_0 83 | - ptyprocess=0.7.0=pyhd3eb1b0_2 84 | - py-opencv=3.4.2=py36hb342d67_1 85 | - pygments=2.7.4=pyhd3eb1b0_0 86 | - pyparsing=2.4.7=py_0 87 | - pyqt=5.9.2=py36h05f1152_2 88 | - python=3.6.12=hcff3b4d_2 89 | - python-dateutil=2.8.1=py_0 90 | - pytorch=1.7.1=py3.6_cuda11.0.221_cudnn8.0.5_0 91 | - pytz=2020.5=pyhd3eb1b0_0 92 | - pyzmq=20.0.0=py36h2531618_1 93 | - qt=5.9.7=h5867ecd_1 94 | - readline=8.0=h7b6447c_0 95 | - scikit-learn=0.23.2=py36h0573a6f_0 96 | - scipy=1.5.2=py36h0b6359f_0 97 | - setuptools=51.0.0=py36h06a4308_2 98 | - sip=4.19.8=py36hf484d3e_0 99 | - six=1.15.0=py36h06a4308_0 100 | - sqlite=3.33.0=h62c20be_0 101 | - threadpoolctl=2.1.0=pyh5ca1d4c_0 102 | - tk=8.6.10=hbc83047_0 103 | - torchaudio=0.7.2=py36 104 | - torchvision=0.8.2=py36_cu110 105 | - tornado=6.1=py36h27cfd23_0 106 | - tqdm=4.54.1=pyhd3eb1b0_0 107 | - traitlets=4.3.3=py36_0 108 | - typing_extensions=3.7.4.3=py_0 109 | - wcwidth=0.2.5=py_0 110 | - wheel=0.36.2=pyhd3eb1b0_0 111 | - xz=5.2.5=h7b6447c_0 112 | - zeromq=4.3.3=he6710b0_3 113 | - zlib=1.2.11=h7b6447c_3 114 | - zstd=1.4.5=h9ceee32_0 115 | - pip: 116 | - ncls==0.0.51 117 | - orjson==3.4.7 118 | 119 | -------------------------------------------------------------------------------- /lib/dataloaders/trajectron.py: -------------------------------------------------------------------------------- 1 | from torch.utils import data 2 | import numpy as np 3 | import random 4 | import torch 5 | from copy import deepcopy 6 | 7 | class NodeTypeDataset(data.Dataset): 8 | def __init__(self, env, node_type, state, pred_state, node_freq_mult, 9 | scene_freq_mult, hyperparams, augment=False, **kwargs): 10 | self.env = env 11 | self.state = state 12 | self.pred_state = pred_state 13 | self.hyperparams = hyperparams 14 | self.max_ht = self.hyperparams['maximum_history_length'] 15 | self.max_ft = kwargs['min_future_timesteps'] 16 | 17 | self.augment = augment 18 | 19 | self.node_type = node_type 20 | self.edge_types = [edge_type for edge_type in env.get_edge_types() if edge_type[0] is node_type] 21 | self.index = self.index_env(node_freq_mult, scene_freq_mult, **kwargs) 22 | self.len = len(self.index) 23 | 24 | # print(self.edge_types) 25 | 26 | def index_env(self, node_freq_mult, scene_freq_mult, **kwargs): 27 | index = list() 28 | for scene in self.env.scenes: 29 | present_node_dict = scene.present_nodes(np.arange(0, scene.timesteps), type=self.node_type, **kwargs) 30 | for t, nodes in present_node_dict.items(): 31 | for node in nodes: 32 | valid = True 33 | data = [(scene, t, node)] *\ 34 | (scene.frequency_multiplier if scene_freq_mult else 1) *\ 35 | (node.frequency_multiplier if node_freq_mult else 1) 36 | (scene, t, node) = data[0] 37 | if self.augment: 38 | scene = scene.augment() 39 | node = scene.get_node_by_id(node.id) 40 | first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name, timestep = get_node_timestep_data(self.env, scene, t, node, self.state, self.pred_state,\ 41 | self.edge_types, self.max_ht, self.max_ft, self.hyperparams) 42 | 43 | all_t = torch.cat((x_t[:,:2], y_t),dim=0) 44 | if valid: 45 | index += [ (first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name, timestep)] 46 | else: 47 | pass 48 | return index 49 | 50 | def __len__(self): 51 | return self.len 52 | 53 | def __getitem__(self, i): 54 | (first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name, timestep) = self.index[i] 55 | return first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name, timestep 56 | 57 | 58 | def get_node_timestep_data(env, scene, t, node, state, pred_state, 59 | edge_types, max_ht, max_ft, hyperparams, 60 | scene_graph=None): 61 | """ 62 | Pre-processes the data for a single batch element: node state over time for a specific time in a specific scene 63 | as well as the neighbour data for it. 64 | 65 | :param env: Environment 66 | :param scene: Scene 67 | :param t: Timestep in scene 68 | :param node: Node 69 | :param state: Specification of the node state 70 | :param pred_state: Specification of the prediction state 71 | :param edge_types: List of all Edge Types for which neighbours are pre-processed 72 | :param max_ht: Maximum history timesteps 73 | :param max_ft: Maximum future timesteps (prediction horizon) 74 | :param hyperparams: Model hyperparameters 75 | :param scene_graph: If scene graph was already computed for this scene and time you can pass it here 76 | :return: Batch Element 77 | """ 78 | 79 | # Node 80 | timestep_range_x = np.array([t - max_ht, t]) 81 | timestep_range_y = np.array([t + 1, t + max_ft]) 82 | 83 | x = node.get(timestep_range_x, state[node.type]) 84 | y = node.get(timestep_range_y, pred_state[node.type]) 85 | first_history_index = (max_ht - node.history_points_at(t)).clip(0) 86 | x_st_t = deepcopy(x) 87 | x_st_t = x_st_t - x[-1] 88 | y_st_t = y 89 | 90 | 91 | x_t = torch.tensor(x, dtype=torch.float) 92 | y_t = torch.tensor(y, dtype=torch.float) 93 | 94 | x_st_t = torch.tensor(x_st_t, dtype=torch.float) 95 | y_st_t = torch.tensor(y_st_t, dtype=torch.float) 96 | 97 | return (first_history_index, x_t, y_t, x_st_t, y_st_t, scene.name, t) 98 | -------------------------------------------------------------------------------- /lib/utils/hevi_train_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | from lib.utils.eval_utils import eval_hevi 14 | 15 | 16 | def train(model, train_gen, criterion, optimizer, device): 17 | model.train() # Sets the module in training mode. 18 | 19 | total_goal_loss = 0 20 | total_dec_loss = 0 21 | loader = tqdm(train_gen, total=len(train_gen)) 22 | with torch.set_grad_enabled(True): 23 | for batch_idx, data in enumerate(loader): 24 | input_traj, input_flow, target_traj = data 25 | batch_size = input_traj.shape[0] 26 | #print(batch_size) 27 | input_traj = input_traj.to('cuda', non_blocking=True) 28 | input_flow = input_flow.to('cuda', non_blocking=True) 29 | target_traj = target_traj.to('cuda', non_blocking=True) 30 | 31 | all_goal_traj, all_dec_traj = model([input_traj,input_flow]) 32 | goal_loss = criterion(all_goal_traj, target_traj) 33 | dec_loss = criterion(all_dec_traj, target_traj) 34 | 35 | train_loss = goal_loss + dec_loss 36 | 37 | total_goal_loss += goal_loss.item()* batch_size 38 | total_dec_loss += dec_loss.item()* batch_size 39 | 40 | 41 | # optimize 42 | optimizer.zero_grad() 43 | train_loss.backward() 44 | optimizer.step() 45 | 46 | total_goal_loss /= len(train_gen.dataset) 47 | total_dec_loss /= len(train_gen.dataset) 48 | 49 | 50 | return total_goal_loss, total_dec_loss, total_goal_loss + total_dec_loss 51 | 52 | 53 | def test(model, test_gen, criterion, device): 54 | total_goal_loss = 0 55 | total_dec_loss = 0 56 | ADE_15 = 0 57 | ADE_05 = 0 58 | ADE_10 = 0 59 | FDE = 0 60 | FIOU = 0 61 | CADE = 0 62 | CFDE = 0 63 | model.eval() 64 | loader = tqdm(test_gen, total=len(test_gen)) 65 | with torch.set_grad_enabled(False): 66 | for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen): 67 | 68 | input_traj, input_flow, target_traj = data 69 | batch_size = input_traj.shape[0] 70 | input_traj = input_traj.to('cuda', non_blocking=True) 71 | input_flow = input_flow.to('cuda', non_blocking=True) 72 | target_traj = target_traj.to('cuda', non_blocking=True) 73 | 74 | all_goal_traj, all_dec_traj = model([input_traj,input_flow]) 75 | 76 | 77 | goal_loss = criterion(all_goal_traj, target_traj) 78 | dec_loss = criterion(all_dec_traj, target_traj) 79 | 80 | test_loss = goal_loss + dec_loss 81 | 82 | total_goal_loss += goal_loss.item()* batch_size 83 | total_dec_loss += dec_loss.item()* batch_size 84 | 85 | all_dec_traj_np = all_dec_traj.to('cpu').numpy() 86 | input_traj_np = input_traj.to('cpu').numpy() 87 | target_traj_np = target_traj.to('cpu').numpy() 88 | 89 | # Decoder 90 | batch_ADE_15, batch_ADE_05, batch_ADE_10, batch_FDE, batch_CADE, batch_CFDE, batch_FIOU =\ 91 | eval_hevi(input_traj_np, target_traj_np, all_dec_traj_np) 92 | 93 | ADE_15 += batch_ADE_15 94 | ADE_05 += batch_ADE_05 95 | ADE_10 += batch_ADE_10 96 | FDE += batch_FDE 97 | CADE += batch_CADE 98 | CFDE += batch_CFDE 99 | FIOU += batch_FIOU 100 | 101 | 102 | 103 | ADE_15 /= len(test_gen.dataset) 104 | ADE_05 /= len(test_gen.dataset) 105 | ADE_10 /= len(test_gen.dataset) 106 | FDE /= len(test_gen.dataset) 107 | FIOU /= len(test_gen.dataset) 108 | 109 | CADE /= len(test_gen.dataset) 110 | CFDE /= len(test_gen.dataset) 111 | 112 | test_loss = total_goal_loss/len(test_gen.dataset) + total_dec_loss/len(test_gen.dataset) 113 | 114 | print("ADE_05: %4f; ADE_10: %4f; ADE_15: %4f; FDE: %4f; FIOU: %4f\n" % (ADE_05, ADE_10, ADE_15, FDE, FIOU)) 115 | print("CFDE: %4f; CADE: %4f; \n" % (CFDE, CADE)) 116 | return test_loss, ADE_15, ADE_05, ADE_10, FDE, FIOU, CADE, CFDE 117 | -------------------------------------------------------------------------------- /lib/models/bitrap_np.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Defined classes: 3 | class BiTraPNP() 4 | Some utilities are cited from Trajectron++ 5 | ''' 6 | import sys 7 | import numpy as np 8 | import copy 9 | from collections import defaultdict 10 | import torch 11 | from torch import nn, optim 12 | from torch.nn import functional as F 13 | import torch.nn.utils.rnn as rnn 14 | from torch.distributions import Normal 15 | 16 | def reconstructed_probability(x): 17 | recon_dist = Normal(0, 1) 18 | p = recon_dist.log_prob(x).exp().mean(dim=-1) # [batch_size, K] 19 | return p 20 | 21 | class BiTraPNP(nn.Module): 22 | def __init__(self, args): 23 | super(BiTraPNP, self).__init__() 24 | self.args = copy.deepcopy(args) 25 | self.param_scheduler = None 26 | self.input_dim = self.args.input_dim 27 | self.pred_dim = self.args.pred_dim 28 | self.hidden_size = self.args.hidden_size 29 | self.nu = args.nu 30 | self.sigma = args.sigma 31 | self.node_future_encoder_h = nn.Sequential(nn.Linear(self.input_dim, self.hidden_size//2),nn.ReLU()) 32 | self.gt_goal_encoder = nn.GRU(input_size=self.pred_dim, 33 | hidden_size=self.hidden_size//2, 34 | bidirectional=True, 35 | batch_first=True) 36 | self.p_z_x = nn.Sequential(nn.Linear(self.hidden_size, 37 | 128), 38 | nn.ReLU(), 39 | nn.Linear(128, 64), 40 | nn.ReLU(), 41 | nn.Linear(64, self.args.LATENT_DIM*2)) 42 | # posterior 43 | self.q_z_xy = nn.Sequential(nn.Linear(self.hidden_size + self.hidden_size, 44 | 128), 45 | nn.ReLU(), 46 | nn.Linear(128, 64), 47 | nn.ReLU(), 48 | nn.Linear(64, self.args.LATENT_DIM*2)) 49 | 50 | 51 | 52 | def gaussian_latent_net(self, enc_h, cur_state, K, target=None, z_mode=None): 53 | # get mu, sigma 54 | # 1. sample z from piror 55 | z_mu_logvar_p = self.p_z_x(enc_h) 56 | z_mu_p = z_mu_logvar_p[:, :self.args.LATENT_DIM] 57 | z_logvar_p = z_mu_logvar_p[:, self.args.LATENT_DIM:] 58 | if target is not None: 59 | # 2. sample z from posterior, for training only 60 | initial_h = self.node_future_encoder_h(cur_state) 61 | initial_h = torch.stack([initial_h, torch.zeros_like(initial_h, device=initial_h.device)], dim=0) 62 | self.gt_goal_encoder.flatten_parameters() 63 | _, target_h = self.gt_goal_encoder(target, initial_h) 64 | target_h = target_h.permute(1,0,2) 65 | target_h = target_h.reshape(-1, target_h.shape[1] * target_h.shape[2]) 66 | 67 | z_mu_logvar_q = self.q_z_xy(torch.cat([enc_h, target_h], dim=-1)) 68 | z_mu_q = z_mu_logvar_q[:, :self.args.LATENT_DIM] 69 | z_logvar_q = z_mu_logvar_q[:, self.args.LATENT_DIM:] 70 | Z_mu = z_mu_q 71 | Z_logvar = z_logvar_q 72 | 73 | # 3. compute KL(q_z_xy||p_z_x) 74 | KLD = 0.5 * ((z_logvar_q.exp()/z_logvar_p.exp()) + \ 75 | (z_mu_p - z_mu_q).pow(2)/z_logvar_p.exp() - \ 76 | 1 + \ 77 | (z_logvar_p - z_logvar_q)) 78 | KLD = KLD.sum(dim=-1).mean() 79 | KLD = torch.clamp(KLD, min=0.001) 80 | 81 | else: 82 | Z_mu = z_mu_p 83 | Z_logvar = z_logvar_p 84 | KLD = torch.as_tensor(0.0, device=Z_logvar.device) 85 | 86 | # 4. Draw sample 87 | with torch.set_grad_enabled(False): 88 | K_samples = torch.normal(self.nu, self.sigma, size = (enc_h.shape[0], K, self.args.LATENT_DIM)).cuda() 89 | 90 | probability = reconstructed_probability(K_samples) 91 | Z_std = torch.exp(0.5 * Z_logvar) 92 | Z = Z_mu.unsqueeze(1).repeat(1, K, 1) + K_samples * Z_std.unsqueeze(1).repeat(1, K, 1) 93 | if z_mode: 94 | Z = torch.cat((Z_mu.unsqueeze(1), Z), dim=1) 95 | 96 | 97 | return Z, KLD, probability 98 | 99 | 100 | def forward(self, h_x, last_input, K, target_y=None): 101 | ''' 102 | Params: 103 | 104 | ''' 105 | Z, KLD, probability = self.gaussian_latent_net(h_x, last_input, K, target_y, z_mode=False) 106 | enc_h_and_z = torch.cat([h_x.unsqueeze(1).repeat(1, Z.shape[1], 1), Z], dim=-1) 107 | dec_h = enc_h_and_z if self.args.DEC_WITH_Z else h_x 108 | return dec_h, KLD, probability 109 | -------------------------------------------------------------------------------- /lib/utils/ethucy_train_utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | from lib.utils.eval_utils import eval_ethucy 14 | 15 | 16 | def train(model, train_gen, criterion, optimizer, device): 17 | model.train() # Sets the module in training mode. 18 | count = 0 19 | total_goal_loss = 0 20 | total_dec_loss = 0 21 | loader = tqdm(train_gen, total=len(train_gen)) 22 | with torch.set_grad_enabled(True): 23 | for batch_idx, data in enumerate(loader): 24 | first_history_index = data['first_history_index'] 25 | assert torch.unique(first_history_index).shape[0] == 1 26 | batch_size = data['input_x'].shape[0] 27 | count += batch_size 28 | 29 | input_traj = data['input_x'].to(device) 30 | input_bbox_st = data['input_x_st'].to(device) 31 | target_traj = data['target_y'].to(device) 32 | # target_bbox_st = data['target_y_st'].to(device) 33 | 34 | all_goal_traj, all_dec_traj = model(input_traj, first_history_index[0]) 35 | 36 | goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 37 | dec_loss = criterion(all_dec_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 38 | 39 | train_loss = goal_loss + dec_loss 40 | 41 | total_goal_loss += goal_loss.item()* batch_size 42 | total_dec_loss += dec_loss.item()* batch_size 43 | 44 | 45 | # optimize 46 | optimizer.zero_grad() 47 | train_loss.backward() 48 | optimizer.step() 49 | 50 | total_goal_loss /= count 51 | total_dec_loss /= count 52 | 53 | 54 | return total_goal_loss, total_dec_loss, total_goal_loss + total_dec_loss 55 | 56 | def val(model, val_gen, criterion, device): 57 | total_goal_loss = 0 58 | total_dec_loss = 0 59 | count = 0 60 | model.eval() 61 | loader = tqdm(val_gen, total=len(val_gen)) 62 | with torch.set_grad_enabled(False): 63 | for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen): 64 | first_history_index = data['first_history_index'] 65 | assert torch.unique(first_history_index).shape[0] == 1 66 | batch_size = data['input_x'].shape[0] 67 | count += batch_size 68 | 69 | input_traj = data['input_x'].to(device) 70 | input_bbox_st = data['input_x_st'].to(device) 71 | target_traj = data['target_y'].to(device) 72 | # target_bbox_st = data['target_y_st'].to(device) 73 | 74 | all_goal_traj, all_dec_traj = model(input_traj, first_history_index[0]) 75 | 76 | 77 | goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 78 | dec_loss = criterion(all_dec_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 79 | 80 | total_goal_loss += goal_loss.item()* batch_size 81 | total_dec_loss += dec_loss.item()* batch_size 82 | 83 | val_loss = total_goal_loss/count + total_dec_loss/count 84 | return val_loss 85 | 86 | def test(model, test_gen, criterion, device): 87 | total_goal_loss = 0 88 | total_dec_loss = 0 89 | ADE_08 = 0 90 | ADE_12 = 0 91 | FDE_08 = 0 92 | FDE_12 = 0 93 | count = 0 94 | model.eval() 95 | loader = tqdm(test_gen, total=len(test_gen)) 96 | with torch.set_grad_enabled(False): 97 | for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen): 98 | 99 | first_history_index = data['first_history_index'] 100 | assert torch.unique(first_history_index).shape[0] == 1 101 | batch_size = data['input_x'].shape[0] 102 | count += batch_size 103 | 104 | input_traj = data['input_x'].to(device) 105 | input_bbox_st = data['input_x_st'].to(device) 106 | target_traj = data['target_y'].to(device) 107 | # target_bbox_st = data['target_y_st'].to(device) 108 | 109 | all_goal_traj, all_dec_traj = model(input_traj, first_history_index[0]) 110 | goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 111 | dec_loss = criterion(all_dec_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 112 | 113 | train_loss = goal_loss + dec_loss 114 | 115 | total_goal_loss += goal_loss.item()* batch_size 116 | total_dec_loss += dec_loss.item()* batch_size 117 | 118 | all_dec_traj_np = all_dec_traj.to('cpu').numpy() 119 | input_traj_np = input_traj.to('cpu').numpy() 120 | target_traj_np = target_traj.to('cpu').numpy() 121 | 122 | # Decoder 123 | batch_ADE_08, batch_FDE_08, batch_ADE_12, batch_FDE_12 =\ 124 | eval_ethucy(input_traj_np, target_traj_np, all_dec_traj_np) 125 | 126 | ADE_08 += batch_ADE_08 127 | ADE_12 += batch_ADE_12 128 | FDE_08 += batch_FDE_08 129 | FDE_12 += batch_FDE_12 130 | 131 | ADE_08 /= count 132 | ADE_12 /= count 133 | FDE_08 /= count 134 | FDE_12 /= count 135 | 136 | 137 | test_loss = total_goal_loss/count + total_dec_loss/count 138 | 139 | print("ADE_08: %4f; FDE_08: %4f; ADE_12: %4f; FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12)) 140 | return test_loss, ADE_08, FDE_08, ADE_12, FDE_12 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pytorch Implementation for Stepwise Goal-Driven Networks for Trajectory Prediction (RA-L/ICRA2022) 2 | 3 | 4 | 5 | ## Installation 6 | 7 | # Cloning 8 | 9 | We use part of the dataloader in Trajectron++, so we include [Trajectron++](https://github.com/StanfordASL/Trajectron-plus-plus) as a submodule. 10 | ``` 11 | git clone --recurse-submodules git@github.com:ChuhuaW/SGNet.pytorch.git 12 | ``` 13 | 14 | # Environment 15 | 16 | * Install conda environment from yml file 17 | 18 | ``` 19 | conda env create --file SGNet_env.yml 20 | ``` 21 | 22 | # Data 23 | 24 | * JAAD and PIE 25 | JAAD and PIE can be downloaded from https://github.com/ykotseruba/JAAD and https://github.com/aras62/PIE, respectively. Creating symlinks from the dataset path to ```./data``` 26 | 27 | ``` 28 | ln -s path/to/dataset/ ./data/ 29 | ``` 30 | 31 | * ETH/UCY 32 | We follow [Trajectron++](https://github.com/StanfordASL/Trajectron-plus-plus) to preprocess data splits for the ETH and UCY datasets in this repository. Please refer to their repository for instruction. After the data is generated, please create symlinks from the dataset path to ```./data``` 33 | 34 | ``` 35 | ln -s path/to/dataset/ ./data/ 36 | ``` 37 | 38 | 39 | ## Training 40 | 41 | ### Stochastic prediction 42 | 43 | * Training on JAAD dataset: 44 | ``` 45 | cd SGDNet.Pytorch 46 | python tools/jaad/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset JAAD --model SGNet_CVAE 47 | ``` 48 | 49 | * Training on PIE dataset: 50 | ``` 51 | cd SGDNet.Pytorch 52 | python tools/pie/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset PIE --model SGNet_CVAE 53 | ``` 54 | 55 | * Training on ETH/UCY dataset: 56 | ``` 57 | cd SGDNet.Pytorch 58 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ETH --model SGNet_CVAE 59 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset HOTEL --model SGNet_CVAE 60 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset UNIV --model SGNet_CVAE 61 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA1 --model SGNet_CVAE 62 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA2 --model SGNet_CVAE 63 | ``` 64 | 65 | ### Deterministic prediction 66 | 67 | * Training on JAAD dataset: 68 | ``` 69 | cd SGDNet.Pytorch 70 | python tools/jaad/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset JAAD --model SGNet 71 | ``` 72 | 73 | * Training on PIE dataset: 74 | ``` 75 | cd SGDNet.Pytorch 76 | python tools/pie/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset PIE --model SGNet 77 | ``` 78 | 79 | * Training on ETH/UCY dataset: 80 | ``` 81 | cd SGDNet.Pytorch 82 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ETH --model SGNet 83 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset HOTEL --model SGNet 84 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset UNIV --model SGNet 85 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA1 --model SGNet 86 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA2 --model SGNet 87 | ``` 88 | 89 | ## Evaluation 90 | 91 | ### Stochastic prediction 92 | 93 | * Evaluating on JAAD dataset: 94 | ``` 95 | cd SGDNet.Pytorch 96 | python tools/jaad/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset JAAD --model SGNet_CVAE --checkpoint path/to/checkpoint 97 | ``` 98 | 99 | * Evaluating on PIE dataset: 100 | ``` 101 | cd SGDNet.Pytorch 102 | python tools/pie/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset PIE --model SGNet_CVAE --checkpoint path/to/checkpoint 103 | ``` 104 | 105 | * Evaluating on ETH/UCY dataset: 106 | ``` 107 | cd SGDNet.Pytorch 108 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ETH --model SGNet_CVAE --checkpoint path/to/checkpoint 109 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset HOTEL --model SGNet_CVAE --checkpoint path/to/checkpoint 110 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset UNIV --model SGNet_CVAE --checkpoint path/to/checkpoint 111 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA1 --model SGNet_CVAE --checkpoint path/to/checkpoint 112 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA2 --model SGNet_CVAE --checkpoint path/to/checkpoint 113 | ``` 114 | 115 | ### Deterministic prediction 116 | 117 | * Evaluating on ETH/UCY dataset: 118 | [ETH/UCY checkpoints](https://drive.google.com/drive/folders/1FCudihx-dmns-lh61uOcOD5uIWaKdKh8?usp=sharing) 119 | 120 | ``` 121 | cd SGDNet.Pytorch 122 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ETH --model SGNet --checkpoint path/to/checkpoint 123 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset HOTEL --model SGNet --checkpoint path/to/checkpoint 124 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset UNIV --model SGNet --checkpoint path/to/checkpoint 125 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA1 --model SGNet --checkpoint path/to/checkpoint 126 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA2 --model SGNet --checkpoint path/to/checkpoint 127 | ``` 128 | 129 | [JAAD/PIE checkpoints](https://drive.google.com/drive/folders/1SskmNtf9FMn4azAxIfKXcYUgAEuVKNgR?usp=sharing) 130 | 131 | ## Citation 132 | 133 | ``` 134 | @ARTICLE{9691856, 135 | author={Wang, Chuhua and Wang, Yuchen and Xu, Mingze and Crandall, David J.}, 136 | journal={IEEE Robotics and Automation Letters}, 137 | title={Stepwise Goal-Driven Networks for Trajectory Prediction}, 138 | year={2022}} 139 | ``` 140 | ```diff 141 | - Rank 3rd on nuScences prediction task at 6th AI Driving Olympics, ICRA 2021 142 | ``` 143 | The source code and pretrained models will be made availble. Stay tuned. 144 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/stepwise-goal-driven-networks-for-trajectory/trajectory-prediction-on-ethucy)](https://paperswithcode.com/sota/trajectory-prediction-on-ethucy?p=stepwise-goal-driven-networks-for-trajectory) 145 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/stepwise-goal-driven-networks-for-trajectory/trajectory-prediction-on-jaad)](https://paperswithcode.com/sota/trajectory-prediction-on-jaad?p=stepwise-goal-driven-networks-for-trajectory) 146 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/stepwise-goal-driven-networks-for-trajectory/trajectory-prediction-on-pie)](https://paperswithcode.com/sota/trajectory-prediction-on-pie?p=stepwise-goal-driven-networks-for-trajectory) 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /lib/dataloaders/ethucy_data_layer.py: -------------------------------------------------------------------------------- 1 | ## Code modified based on https://github.com/MoonBlvd/bidireaction-trajectory-prediction/blob/main/datasets/ETH_UCY.py 2 | 3 | import os 4 | import sys 5 | sys.path.append('./Trajectron-plus-plus') 6 | sys.path.append('./Trajectron-plus-plus/trajectron') 7 | from .trajectron import NodeTypeDataset 8 | import numpy as np 9 | import torch 10 | from torch.utils import data 11 | import dill 12 | import json 13 | import random 14 | 15 | def chunks(lst, n): 16 | for i in range(0, len(lst), n): 17 | yield lst[i:i + n] 18 | 19 | 20 | class ETHUCYDataLayer(data.Dataset): 21 | 22 | def __init__(self, args, split): 23 | self.args = args 24 | self.split = split 25 | self.batch_size = args.batch_size 26 | 27 | conf_json = open(args.ETH_CONFIG, 'r') 28 | hyperparams = json.load(conf_json) 29 | 30 | hyperparams['minimum_history_length'] = self.args.enc_steps-1 if self.split == 'test' else 1 31 | hyperparams['maximum_history_length'] = self.args.enc_steps-1 32 | 33 | hyperparams['state'] = {'PEDESTRIAN':{'position':['x','y'], 'velocity':['x','y'], 'acceleration':['x','y']}} 34 | hyperparams['pred_state'] = {'PEDESTRIAN':{'position':['x','y']}} 35 | 36 | args.data_root = args.dataset.lower() 37 | 38 | # File can be generated by using srcipts from Trajectron++ (https://github.com/StanfordASL/Trajectron-plus-plus) 39 | if split == 'train': 40 | f = open(os.path.join(args.eth_root, args.data_root, 'train', args.data_root+'_train.pkl'), 'rb') 41 | elif split == 'val': 42 | f = open(os.path.join(args.eth_root, args.data_root, 'val', args.data_root+'_val.pkl'), 'rb') 43 | elif split == 'test': 44 | f = open(os.path.join(args.eth_root, args.data_root, 'test', args.data_root+'_test.pkl'), 'rb') 45 | else: 46 | raise ValueError() 47 | 48 | train_env = dill.load(f, encoding='latin1') 49 | 50 | node_type=train_env.NodeType[0] 51 | train_env.attention_radius[(node_type, node_type)] = 3.0 #10.0 52 | augment = False 53 | if split=='train': 54 | min_history_timesteps = 1 55 | augment = True if self.args.augment else False 56 | else: 57 | min_history_timesteps = 7 58 | self.dataset = NodeTypeDataset(train_env, 59 | node_type, 60 | hyperparams['state'], 61 | hyperparams['pred_state'], 62 | scene_freq_mult=hyperparams['scene_freq_mult_train'], 63 | node_freq_mult=hyperparams['node_freq_mult_train'], 64 | hyperparams=hyperparams, 65 | augment=augment, 66 | min_history_timesteps=min_history_timesteps, 67 | min_future_timesteps=hyperparams['prediction_horizon'], 68 | return_robot=False) 69 | 70 | self.len_dict = {} 71 | for index in range(len(self.dataset)): 72 | first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name,timestep = self.dataset.__getitem__(index) 73 | if first_history_index not in self.len_dict: 74 | self.len_dict[first_history_index] = [] 75 | self.len_dict[first_history_index].append(index) 76 | self.shuffle_dataset() 77 | 78 | def shuffle_dataset(self): 79 | self._init_inputs() 80 | 81 | def _init_inputs(self): 82 | ''' 83 | shuffle the data based on its length 84 | ''' 85 | self.inputs = [] 86 | for length in self.len_dict: 87 | indices = self.len_dict[length] 88 | random.shuffle(indices) 89 | self.inputs.extend(list(chunks(self.len_dict[length], self.batch_size))) 90 | 91 | def __len__(self): 92 | return len(self.inputs) 93 | 94 | def __getitem__(self, index): 95 | indices = self.inputs[index] 96 | 97 | ret = { 98 | 'input_x': [], 99 | 'input_x_st': [], 100 | 'target_y': [], 101 | 'target_y_st': [], 102 | 'first_history_index':[], 103 | 'scene_name': [], 104 | 'timestep': [], 105 | } 106 | 107 | for idx in indices: 108 | this_ret = self.getitem_one(idx) 109 | ret['input_x'].append(this_ret['input_x']) 110 | ret['input_x_st'].append(this_ret['input_x_st']) 111 | ret['target_y'].append(torch.as_tensor(this_ret['target_y']).type(torch.FloatTensor)) 112 | ret['first_history_index'].append(torch.as_tensor(this_ret['first_history_index']).type(torch.LongTensor)) 113 | ret['scene_name'].append(this_ret['scene_name']) 114 | ret['timestep'].append(this_ret['timestep']) 115 | 116 | 117 | ret['input_x'] = torch.stack(ret['input_x']) 118 | ret['input_x_st'] = torch.stack(ret['input_x_st']) 119 | ret['target_y'] = torch.stack(ret['target_y']) 120 | 121 | ret['first_history_index'] = torch.stack(ret['first_history_index']) 122 | # to locate image 123 | ret['scene_name'] = ret['scene_name'] 124 | ret['timestep'] = ret['timestep'] 125 | 126 | return ret 127 | 128 | def getitem_one(self, index): 129 | first_history_index, x_t, y_t, x_st_t, y_st_t, scene_name, timestep = self.dataset.__getitem__(index) 130 | ret = {} 131 | all_t = torch.cat((x_t[:,:2], y_t),dim=0) 132 | y_t = self.get_target(all_t, 0, self.args.enc_steps, self.args.enc_steps, self.args.dec_steps) 133 | ret['first_history_index'] = first_history_index 134 | ret['input_x'] = x_t 135 | ret['input_x_st'] = x_st_t 136 | ret['target_y'] = y_t 137 | ret['target_y_st'] = y_st_t 138 | ret['scene_name'] = scene_name 139 | ret['timestep'] = timestep 140 | return ret 141 | 142 | def get_target(self, session, start, end, observe_length, predict_length): 143 | ''' 144 | Prepare the target for loss 145 | 146 | ''' 147 | target = np.zeros((observe_length, predict_length, session.shape[-1])) 148 | for i, target_start in enumerate(range(start, end)): 149 | '''the target of time t is the change of bbox/ego motion at times [t+1,...,t+5}''' 150 | target_start = target_start + 1 151 | try: 152 | target[i,:,:] = np.asarray(session[target_start:target_start+predict_length,:] - 153 | session[target_start-1:target_start,:]) 154 | except: 155 | print("segment start: ", start) 156 | print("sample start: ", target_start) 157 | print("segment end: ", end) 158 | print(session.shape) 159 | raise ValueError() 160 | return target 161 | 162 | 163 | -------------------------------------------------------------------------------- /lib/utils/jaadpie_train_utils_cvae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | from lib.utils.eval_utils import eval_jaad_pie, eval_jaad_pie_cvae 14 | from lib.losses import cvae, cvae_multi 15 | 16 | def train(model, train_gen, criterion, optimizer, device): 17 | model.train() # Sets the module in training mode. 18 | total_goal_loss = 0 19 | total_cvae_loss = 0 20 | total_KLD_loss = 0 21 | loader = tqdm(train_gen, total=len(train_gen)) 22 | with torch.set_grad_enabled(True): 23 | for batch_idx, data in enumerate(loader): 24 | batch_size = data['input_x'].shape[0] 25 | input_traj = data['input_x'].to(device) 26 | target_traj = data['target_y'].to(device) 27 | 28 | all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(inputs=input_traj, map_mask=None, targets=target_traj) 29 | cvae_loss = cvae_multi(cvae_dec_traj,target_traj) 30 | goal_loss = criterion(all_goal_traj, target_traj) 31 | 32 | train_loss = goal_loss + cvae_loss + KLD_loss.mean() 33 | 34 | total_goal_loss += goal_loss.item()* batch_size 35 | total_cvae_loss += cvae_loss.item()* batch_size 36 | total_KLD_loss += KLD_loss.mean()* batch_size 37 | 38 | # optimize 39 | optimizer.zero_grad() 40 | train_loss.backward() 41 | optimizer.step() 42 | 43 | total_goal_loss /= len(train_gen.dataset) 44 | total_cvae_loss/=len(train_gen.dataset) 45 | total_KLD_loss/=len(train_gen.dataset) 46 | 47 | return total_goal_loss, total_cvae_loss, total_KLD_loss 48 | 49 | def val(model, val_gen, criterion, device): 50 | total_goal_loss = 0 51 | total_cvae_loss = 0 52 | total_KLD_loss = 0 53 | model.eval() 54 | loader = tqdm(val_gen, total=len(val_gen)) 55 | with torch.set_grad_enabled(False): 56 | for batch_idx, data in enumerate(loader): 57 | batch_size = data['input_x'].shape[0] 58 | input_traj = data['input_x'].to(device) 59 | target_traj = data['target_y'].to(device) 60 | 61 | all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(inputs=input_traj, map_mask=None, targets=None,training=False) 62 | cvae_loss = cvae_multi(cvae_dec_traj,target_traj) 63 | 64 | 65 | goal_loss = criterion(all_goal_traj, target_traj) 66 | 67 | 68 | total_goal_loss += goal_loss.item()* batch_size 69 | total_cvae_loss += cvae_loss.item()* batch_size 70 | total_KLD_loss += KLD_loss.mean()* batch_size 71 | 72 | val_loss = total_goal_loss/len(val_gen.dataset)\ 73 | + total_cvae_loss/len(val_gen.dataset) + total_KLD_loss/len(val_gen.dataset) 74 | return val_loss 75 | 76 | def test(model, test_gen, criterion, device): 77 | total_goal_loss = 0 78 | total_cvae_loss = 0 79 | total_KLD_loss = 0 80 | MSE_15 = 0 81 | MSE_05 = 0 82 | MSE_10 = 0 83 | FMSE = 0 84 | FIOU = 0 85 | CMSE = 0 86 | CFMSE = 0 87 | model.eval() 88 | loader = tqdm(test_gen, total=len(test_gen)) 89 | with torch.set_grad_enabled(False): 90 | for batch_idx, data in enumerate(loader): 91 | batch_size = data['input_x'].shape[0] 92 | input_traj = data['input_x'].to(device) 93 | target_traj = data['target_y'].to(device) 94 | 95 | all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(inputs=input_traj, map_mask=None, targets=None, training=False) 96 | cvae_loss = cvae_multi(cvae_dec_traj,target_traj) 97 | 98 | 99 | goal_loss = criterion(all_goal_traj, target_traj) 100 | 101 | test_loss = goal_loss + cvae_loss 102 | 103 | total_goal_loss += goal_loss.item()* batch_size 104 | total_cvae_loss += cvae_loss.item()* batch_size 105 | total_KLD_loss += KLD_loss.mean()* batch_size 106 | input_traj_np = input_traj.to('cpu').numpy() 107 | target_traj_np = target_traj.to('cpu').numpy() 108 | cvae_dec_traj = cvae_dec_traj.to('cpu').numpy() 109 | batch_MSE_15, batch_MSE_05, batch_MSE_10, batch_FMSE, batch_CMSE, batch_CFMSE, batch_FIOU =\ 110 | eval_jaad_pie_cvae(input_traj_np, target_traj_np[:,-1,:,:], cvae_dec_traj[:,-1,:,:,:]) 111 | MSE_15 += batch_MSE_15 112 | MSE_05 += batch_MSE_05 113 | MSE_10 += batch_MSE_10 114 | FMSE += batch_FMSE 115 | CMSE += batch_CMSE 116 | CFMSE += batch_CFMSE 117 | FIOU += batch_FIOU 118 | 119 | 120 | 121 | MSE_15 /= len(test_gen.dataset) 122 | MSE_05 /= len(test_gen.dataset) 123 | MSE_10 /= len(test_gen.dataset) 124 | FMSE /= len(test_gen.dataset) 125 | FIOU /= len(test_gen.dataset) 126 | 127 | CMSE /= len(test_gen.dataset) 128 | CFMSE /= len(test_gen.dataset) 129 | 130 | 131 | test_loss = total_goal_loss/len(test_gen.dataset) \ 132 | + total_cvae_loss/len(test_gen.dataset) + total_KLD_loss/len(test_gen.dataset) 133 | return test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE 134 | 135 | 136 | def weights_init(m): 137 | if isinstance(m, nn.Linear): 138 | m.weight.data.normal_(0.0, 0.001) 139 | elif isinstance(m, nn.Conv1d): 140 | nn.init.normal_(m.weight.data) 141 | if m.bias is not None: 142 | nn.init.normal_(m.bias.data) 143 | elif isinstance(m, nn.Conv2d): 144 | nn.init.xavier_normal_(m.weight.data) 145 | if m.bias is not None: 146 | nn.init.normal_(m.bias.data) 147 | elif isinstance(m, nn.Conv3d): 148 | nn.init.xavier_normal_(m.weight.data) 149 | if m.bias is not None: 150 | nn.init.normal_(m.bias.data) 151 | elif isinstance(m, nn.ConvTranspose1d): 152 | nn.init.normal_(m.weight.data) 153 | if m.bias is not None: 154 | nn.init.normal_(m.bias.data) 155 | elif isinstance(m, nn.ConvTranspose2d): 156 | nn.init.xavier_normal_(m.weight.data) 157 | if m.bias is not None: 158 | nn.init.normal_(m.bias.data) 159 | elif isinstance(m, nn.ConvTranspose3d): 160 | nn.init.xavier_normal_(m.weight.data) 161 | if m.bias is not None: 162 | nn.init.normal_(m.bias.data) 163 | elif isinstance(m, nn.BatchNorm1d): 164 | nn.init.normal_(m.weight.data, mean=1, std=0.02) 165 | nn.init.constant_(m.bias.data, 0) 166 | elif isinstance(m, nn.BatchNorm2d): 167 | nn.init.normal_(m.weight.data, mean=1, std=0.02) 168 | nn.init.constant_(m.bias.data, 0) 169 | elif isinstance(m, nn.BatchNorm3d): 170 | nn.init.normal_(m.weight.data, mean=1, std=0.02) 171 | nn.init.constant_(m.bias.data, 0) 172 | elif isinstance(m, nn.LSTM): 173 | for param in m.parameters(): 174 | if len(param.shape) >= 2: 175 | nn.init.orthogonal_(param.data) 176 | else: 177 | nn.init.normal_(param.data) 178 | elif isinstance(m, nn.LSTMCell): 179 | for param in m.parameters(): 180 | if len(param.shape) >= 2: 181 | nn.init.orthogonal_(param.data) 182 | else: 183 | nn.init.normal_(param.data) 184 | elif isinstance(m, nn.GRU): 185 | for param in m.parameters(): 186 | if len(param.shape) >= 2: 187 | nn.init.orthogonal_(param.data) 188 | else: 189 | nn.init.normal_(param.data) 190 | elif isinstance(m, nn.GRUCell): 191 | for param in m.parameters(): 192 | if len(param.shape) >= 2: 193 | nn.init.orthogonal_(param.data) 194 | else: 195 | nn.init.normal_(param.data) 196 | -------------------------------------------------------------------------------- /lib/models/SGNet.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | from .feature_extractor import build_feature_extractor 5 | import torch.nn.functional as F 6 | class SGNet(nn.Module): 7 | def __init__(self, args): 8 | super(SGNet, self).__init__() 9 | 10 | self.hidden_size = args.hidden_size 11 | self.enc_steps = args.enc_steps 12 | self.dec_steps = args.dec_steps 13 | self.dataset = args.dataset 14 | self.dropout = args.dropout 15 | self.feature_extractor = build_feature_extractor(args) 16 | if self.dataset in ['JAAD','PIE']: 17 | self.pred_dim = 4 18 | self.regressor = nn.Sequential(nn.Linear(self.hidden_size, 19 | self.pred_dim), 20 | nn.Tanh()) 21 | self.flow_enc_cell = nn.GRUCell(self.hidden_size*2, self.hidden_size) 22 | elif self.dataset in ['ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']: 23 | self.pred_dim = 2 24 | self.regressor = nn.Sequential(nn.Linear(self.hidden_size, 25 | self.pred_dim)) 26 | 27 | self.enc_goal_attn = nn.Sequential(nn.Linear(self.hidden_size//4, 28 | 1), 29 | nn.ReLU(inplace=True)) 30 | self.dec_goal_attn = nn.Sequential(nn.Linear(self.hidden_size//4, 31 | 1), 32 | nn.ReLU(inplace=True)) 33 | 34 | self.enc_to_goal_hidden = nn.Sequential(nn.Linear(self.hidden_size, 35 | self.hidden_size//4), 36 | nn.ReLU(inplace=True)) 37 | self.enc_to_dec_hidden = nn.Sequential(nn.Linear(self.hidden_size, 38 | self.hidden_size), 39 | nn.ReLU(inplace=True)) 40 | 41 | 42 | self.goal_hidden_to_input = nn.Sequential(nn.Linear(self.hidden_size//4, 43 | self.hidden_size//4), 44 | nn.ReLU(inplace=True)) 45 | self.dec_hidden_to_input = nn.Sequential(nn.Linear(self.hidden_size, 46 | self.hidden_size), 47 | nn.ReLU(inplace=True)) 48 | self.goal_hidden_to_traj = nn.Sequential(nn.Linear(self.hidden_size//4, 49 | self.hidden_size), 50 | nn.ReLU(inplace=True)) 51 | self.goal_to_enc = nn.Sequential(nn.Linear(self.hidden_size//4, 52 | self.hidden_size//4), 53 | nn.ReLU(inplace=True)) 54 | self.goal_to_dec = nn.Sequential(nn.Linear(self.hidden_size//4, 55 | self.hidden_size//4), 56 | nn.ReLU(inplace=True)) 57 | self.enc_drop = nn.Dropout(self.dropout) 58 | self.goal_drop = nn.Dropout(self.dropout) 59 | self.dec_drop = nn.Dropout(self.dropout) 60 | 61 | self.traj_enc_cell = nn.GRUCell(self.hidden_size + self.hidden_size//4, self.hidden_size) 62 | self.goal_cell = nn.GRUCell(self.hidden_size//4, self.hidden_size//4) 63 | self.dec_cell = nn.GRUCell(self.hidden_size + self.hidden_size//4, self.hidden_size) 64 | 65 | def SGE(self, goal_hidden): 66 | goal_input = goal_hidden.new_zeros((goal_hidden.size(0), self.hidden_size//4)) 67 | goal_traj = goal_hidden.new_zeros(goal_hidden.size(0), self.dec_steps, self.pred_dim) 68 | goal_list = [] 69 | for dec_step in range(self.dec_steps): 70 | goal_hidden = self.goal_cell(self.goal_drop(goal_input), goal_hidden) 71 | goal_input = self.goal_hidden_to_input(goal_hidden) 72 | goal_list.append(goal_hidden) 73 | goal_traj_hidden = self.goal_hidden_to_traj(goal_hidden) 74 | # regress goal traj for loss 75 | goal_traj[:,dec_step,:] = self.regressor(goal_traj_hidden) 76 | # get goal for decoder and encoder 77 | goal_for_dec = [self.goal_to_dec(goal) for goal in goal_list] 78 | goal_for_enc = torch.stack([self.goal_to_enc(goal) for goal in goal_list],dim = 1) 79 | enc_attn= self.enc_goal_attn(torch.tanh(goal_for_enc)).squeeze(-1) 80 | enc_attn = F.softmax(enc_attn, dim =1).unsqueeze(1) 81 | goal_for_enc = torch.bmm(enc_attn, goal_for_enc).squeeze(1) 82 | return goal_for_dec, goal_for_enc, goal_traj 83 | 84 | def decoder(self, dec_hidden, goal_for_dec): 85 | # initial trajectory tensor 86 | dec_traj = dec_hidden.new_zeros(dec_hidden.size(0), self.dec_steps, self.pred_dim) 87 | for dec_step in range(self.dec_steps): 88 | goal_dec_input = dec_hidden.new_zeros(dec_hidden.size(0), self.dec_steps, self.hidden_size//4) 89 | goal_dec_input_temp = torch.stack(goal_for_dec[dec_step:],dim=1) 90 | goal_dec_input[:,dec_step:,:] = goal_dec_input_temp 91 | dec_attn= self.dec_goal_attn(torch.tanh(goal_dec_input)).squeeze(-1) 92 | dec_attn = F.softmax(dec_attn, dim =1).unsqueeze(1) 93 | goal_dec_input = torch.bmm(dec_attn,goal_dec_input).squeeze(1)#.view(goal_hidden.size(0), self.dec_steps, self.hidden_size//4).sum(1) 94 | 95 | 96 | dec_dec_input = self.dec_hidden_to_input(dec_hidden) 97 | dec_input = self.dec_drop(torch.cat((goal_dec_input,dec_dec_input),dim = -1)) 98 | dec_hidden = self.dec_cell(dec_input, dec_hidden) 99 | # regress dec traj for loss 100 | dec_traj[:,dec_step,:] = self.regressor(dec_hidden) 101 | return dec_traj 102 | 103 | def encoder(self, traj_input, flow_input=None, start_index = 0): 104 | # initial output tensor 105 | all_goal_traj = traj_input.new_zeros(traj_input.size(0), self.enc_steps, self.dec_steps, self.pred_dim) 106 | all_dec_traj = traj_input.new_zeros(traj_input.size(0), self.enc_steps, self.dec_steps, self.pred_dim) 107 | # initial encoder goal with zeros 108 | goal_for_enc = traj_input.new_zeros((traj_input.size(0), self.hidden_size//4)) 109 | # initial encoder hidden with zeros 110 | traj_enc_hidden = traj_input.new_zeros((traj_input.size(0), self.hidden_size)) 111 | for enc_step in range(start_index, self.enc_steps): 112 | 113 | traj_enc_hidden = self.traj_enc_cell(self.enc_drop(torch.cat((traj_input[:,enc_step,:], goal_for_enc), 1)), traj_enc_hidden) 114 | if self.dataset in ['JAAD','PIE', 'ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']: 115 | enc_hidden = traj_enc_hidden 116 | # generate hidden states for goal and decoder 117 | goal_hidden = self.enc_to_goal_hidden(enc_hidden) 118 | dec_hidden = self.enc_to_dec_hidden(enc_hidden) 119 | 120 | goal_for_dec, goal_for_enc, goal_traj = self.SGE(goal_hidden) 121 | dec_traj = self.decoder(dec_hidden, goal_for_dec) 122 | 123 | # output 124 | all_goal_traj[:,enc_step,:,:] = goal_traj 125 | all_dec_traj[:,enc_step,:,:] = dec_traj 126 | 127 | return all_goal_traj, all_dec_traj 128 | 129 | 130 | def forward(self, inputs, start_index = 0): 131 | if self.dataset in ['JAAD','PIE']: 132 | traj_input = self.feature_extractor(inputs) 133 | all_goal_traj, all_dec_traj = self.encoder(traj_input) 134 | return all_goal_traj, all_dec_traj 135 | elif self.dataset in ['ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']: 136 | traj_input_temp = self.feature_extractor(inputs[:,start_index:,:]) 137 | traj_input = traj_input_temp.new_zeros((inputs.size(0), inputs.size(1), traj_input_temp.size(-1))) 138 | traj_input[:,start_index:,:] = traj_input_temp 139 | all_goal_traj, all_dec_traj = self.encoder(traj_input, None, start_index) 140 | return all_goal_traj, all_dec_traj -------------------------------------------------------------------------------- /lib/models/SGNet_CVAE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .feature_extractor import build_feature_extractor 4 | from .bitrap_np import BiTraPNP 5 | import torch.nn.functional as F 6 | 7 | class SGNet_CVAE(nn.Module): 8 | def __init__(self, args): 9 | super(SGNet_CVAE, self).__init__() 10 | self.cvae = BiTraPNP(args) 11 | self.hidden_size = args.hidden_size # GRU hidden size 12 | self.enc_steps = args.enc_steps # observation step 13 | self.dec_steps = args.dec_steps # prediction step 14 | self.dataset = args.dataset 15 | self.dropout = args.dropout 16 | self.feature_extractor = build_feature_extractor(args) 17 | self.pred_dim = args.pred_dim 18 | self.K = args.K 19 | self.map = False 20 | if self.dataset in ['JAAD','PIE']: 21 | # the predict shift is in pixel 22 | self.pred_dim = 4 23 | self.regressor = nn.Sequential(nn.Linear(self.hidden_size, 24 | self.pred_dim), 25 | nn.Tanh()) 26 | self.flow_enc_cell = nn.GRUCell(self.hidden_size*2, self.hidden_size) 27 | elif self.dataset in ['ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']: 28 | self.pred_dim = 2 29 | # the predict shift is in meter 30 | self.regressor = nn.Sequential(nn.Linear(self.hidden_size, 31 | self.pred_dim)) 32 | self.enc_goal_attn = nn.Sequential(nn.Linear(self.hidden_size//4, 33 | 1), 34 | nn.ReLU(inplace=True)) 35 | self.dec_goal_attn = nn.Sequential(nn.Linear(self.hidden_size//4, 36 | 1), 37 | nn.ReLU(inplace=True)) 38 | 39 | self.enc_to_goal_hidden = nn.Sequential(nn.Linear(self.hidden_size, 40 | self.hidden_size//4), 41 | nn.ReLU(inplace=True)) 42 | self.goal_hidden_to_traj = nn.Sequential(nn.Linear(self.hidden_size//4, 43 | self.hidden_size), 44 | nn.ReLU(inplace=True)) 45 | self.cvae_to_dec_hidden = nn.Sequential(nn.Linear(self.hidden_size + args.LATENT_DIM, 46 | self.hidden_size), 47 | nn.ReLU(inplace=True)) 48 | self.enc_to_dec_hidden = nn.Sequential(nn.Linear(self.hidden_size, 49 | self.hidden_size), 50 | nn.ReLU(inplace=True)) 51 | 52 | self.goal_hidden_to_input = nn.Sequential(nn.Linear(self.hidden_size//4, 53 | self.hidden_size//4), 54 | nn.ReLU(inplace=True)) 55 | self.dec_hidden_to_input = nn.Sequential(nn.Linear(self.hidden_size, 56 | self.hidden_size), 57 | nn.ReLU(inplace=True)) 58 | self.goal_to_enc = nn.Sequential(nn.Linear(self.hidden_size//4, 59 | self.hidden_size//4), 60 | nn.ReLU(inplace=True)) 61 | self.goal_to_dec = nn.Sequential(nn.Linear(self.hidden_size//4, 62 | self.hidden_size//4), 63 | nn.ReLU(inplace=True)) 64 | self.enc_drop = nn.Dropout(self.dropout) 65 | self.goal_drop = nn.Dropout(self.dropout) 66 | self.dec_drop = nn.Dropout(self.dropout) 67 | self.traj_enc_cell = nn.GRUCell(self.hidden_size + self.hidden_size//4, self.hidden_size) 68 | self.goal_cell = nn.GRUCell(self.hidden_size//4, self.hidden_size//4) 69 | self.dec_cell = nn.GRUCell(self.hidden_size + self.hidden_size//4, self.hidden_size) 70 | 71 | def SGE(self, goal_hidden): 72 | # initial goal input with zero 73 | goal_input = goal_hidden.new_zeros((goal_hidden.size(0), self.hidden_size//4)) 74 | # initial trajectory tensor 75 | goal_traj = goal_hidden.new_zeros(goal_hidden.size(0), self.dec_steps, self.pred_dim) 76 | goal_list = [] 77 | for dec_step in range(self.dec_steps): 78 | goal_hidden = self.goal_cell(self.goal_drop(goal_input), goal_hidden) 79 | # next step input is generate by hidden 80 | goal_input = self.goal_hidden_to_input(goal_hidden) 81 | goal_list.append(goal_hidden) 82 | # regress goal traj for loss 83 | goal_traj_hidden = self.goal_hidden_to_traj(goal_hidden) 84 | goal_traj[:,dec_step,:] = self.regressor(goal_traj_hidden) 85 | # get goal for decoder and encoder 86 | goal_for_dec = [self.goal_to_dec(goal) for goal in goal_list] 87 | goal_for_enc = torch.stack([self.goal_to_enc(goal) for goal in goal_list],dim = 1) 88 | enc_attn= self.enc_goal_attn(torch.tanh(goal_for_enc)).squeeze(-1) 89 | enc_attn = F.softmax(enc_attn, dim =1).unsqueeze(1) 90 | goal_for_enc = torch.bmm(enc_attn, goal_for_enc).squeeze(1) 91 | return goal_for_dec, goal_for_enc, goal_traj 92 | 93 | def cvae_decoder(self, dec_hidden, goal_for_dec): 94 | batch_size = dec_hidden.size(0) 95 | 96 | K = dec_hidden.shape[1] 97 | dec_hidden = dec_hidden.view(-1, dec_hidden.shape[-1]) 98 | dec_traj = dec_hidden.new_zeros(batch_size, self.dec_steps, K, self.pred_dim) 99 | for dec_step in range(self.dec_steps): 100 | # incremental goal for each time step 101 | goal_dec_input = dec_hidden.new_zeros(batch_size, self.dec_steps, self.hidden_size//4) 102 | goal_dec_input_temp = torch.stack(goal_for_dec[dec_step:],dim=1) 103 | goal_dec_input[:,dec_step:,:] = goal_dec_input_temp 104 | dec_attn= self.dec_goal_attn(torch.tanh(goal_dec_input)).squeeze(-1) 105 | dec_attn = F.softmax(dec_attn, dim =1).unsqueeze(1) 106 | goal_dec_input = torch.bmm(dec_attn,goal_dec_input).squeeze(1) 107 | goal_dec_input = goal_dec_input.unsqueeze(1).repeat(1, K, 1).view(-1, goal_dec_input.shape[-1]) 108 | dec_dec_input = self.dec_hidden_to_input(dec_hidden) 109 | dec_input = self.dec_drop(torch.cat((goal_dec_input,dec_dec_input),dim = -1)) 110 | dec_hidden = self.dec_cell(dec_input, dec_hidden) 111 | # regress dec traj for loss 112 | batch_traj = self.regressor(dec_hidden) 113 | batch_traj = batch_traj.view(-1, K, batch_traj.shape[-1]) 114 | dec_traj[:,dec_step,:,:] = batch_traj 115 | return dec_traj 116 | 117 | def encoder(self, raw_inputs, raw_targets, traj_input, flow_input=None, start_index = 0): 118 | # initial output tensor 119 | all_goal_traj = traj_input.new_zeros(traj_input.size(0), self.enc_steps, self.dec_steps, self.pred_dim) 120 | all_cvae_dec_traj = traj_input.new_zeros(traj_input.size(0), self.enc_steps, self.dec_steps, self.K, self.pred_dim) 121 | # initial encoder goal with zeros 122 | goal_for_enc = traj_input.new_zeros((traj_input.size(0), self.hidden_size//4)) 123 | # initial encoder hidden with zeros 124 | traj_enc_hidden = traj_input.new_zeros((traj_input.size(0), self.hidden_size)) 125 | total_probabilities = traj_input.new_zeros((traj_input.size(0), self.enc_steps, self.K)) 126 | total_KLD = 0 127 | for enc_step in range(start_index, self.enc_steps): 128 | traj_enc_hidden = self.traj_enc_cell(self.enc_drop(torch.cat((traj_input[:,enc_step,:], goal_for_enc), 1)), traj_enc_hidden) 129 | enc_hidden = traj_enc_hidden 130 | goal_hidden = self.enc_to_goal_hidden(enc_hidden) 131 | goal_for_dec, goal_for_enc, goal_traj = self.SGE(goal_hidden) 132 | all_goal_traj[:,enc_step,:,:] = goal_traj 133 | dec_hidden = self.enc_to_dec_hidden(enc_hidden) 134 | if self.training: 135 | cvae_hidden, KLD, probability = self.cvae(dec_hidden, raw_inputs[:,enc_step,:], self.K, raw_targets[:,enc_step,:,:]) 136 | else: 137 | cvae_hidden, KLD, probability = self.cvae(dec_hidden, raw_inputs[:,enc_step,:], self.K) 138 | total_probabilities[:,enc_step,:] = probability 139 | total_KLD += KLD 140 | cvae_dec_hidden= self.cvae_to_dec_hidden(cvae_hidden) 141 | if self.map: 142 | map_input = flow_input 143 | cvae_dec_hidden = (cvae_dec_hidden + map_input.unsqueeze(1))/2 144 | all_cvae_dec_traj[:,enc_step,:,:,:] = self.cvae_decoder(cvae_dec_hidden, goal_for_dec) 145 | return all_goal_traj, all_cvae_dec_traj, total_KLD, total_probabilities 146 | 147 | def forward(self, inputs, map_mask=None, targets = None, start_index = 0, training=True): 148 | self.training = training 149 | if torch.is_tensor(start_index): 150 | start_index = start_index[0].item() 151 | if self.dataset in ['JAAD','PIE']: 152 | traj_input = self.feature_extractor(inputs) 153 | all_goal_traj, all_cvae_dec_traj, KLD, total_probabilities = self.encoder(inputs, targets, traj_input) 154 | return all_goal_traj, all_cvae_dec_traj, KLD, total_probabilities 155 | elif self.dataset in ['ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']: 156 | traj_input_temp = self.feature_extractor(inputs[:,start_index:,:]) 157 | traj_input = traj_input_temp.new_zeros((inputs.size(0), inputs.size(1), traj_input_temp.size(-1))) 158 | traj_input[:,start_index:,:] = traj_input_temp 159 | all_goal_traj, all_cvae_dec_traj, KLD, total_probabilities = self.encoder(inputs, targets, traj_input, None, start_index) 160 | return all_goal_traj, all_cvae_dec_traj, KLD, total_probabilities -------------------------------------------------------------------------------- /lib/utils/eval_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | from .data_utils import bbox_denormalize, cxcywh_to_x1y1x2y2 5 | from nuscenes.prediction import convert_local_coords_to_global 6 | def compute_IOU(bbox_true, bbox_pred, format='xywh'): 7 | ''' 8 | compute IOU 9 | [cx, cy, w, h] or [x1, y1, x2, y2] 10 | ''' 11 | if format == 'xywh': 12 | xmin = np.max([bbox_true[0] - bbox_true[2]/2, bbox_pred[0] - bbox_pred[2]/2]) 13 | xmax = np.min([bbox_true[0] + bbox_true[2]/2, bbox_pred[0] + bbox_pred[2]/2]) 14 | ymin = np.max([bbox_true[1] - bbox_true[3]/2, bbox_pred[1] - bbox_pred[3]/2]) 15 | ymax = np.min([bbox_true[1] + bbox_true[3]/2, bbox_pred[1] + bbox_pred[3]/2]) 16 | w_true = bbox_true[2] 17 | h_true = bbox_true[3] 18 | w_pred = bbox_pred[2] 19 | h_pred = bbox_pred[3] 20 | elif format == 'x1y1x2y2': 21 | xmin = np.max([bbox_true[0], bbox_pred[0]]) 22 | xmax = np.min([bbox_true[2], bbox_pred[2]]) 23 | ymin = np.max([bbox_true[1], bbox_pred[1]]) 24 | ymax = np.min([bbox_true[3], bbox_pred[3]]) 25 | w_true = bbox_true[2] - bbox_true[0] 26 | h_true = bbox_true[3] - bbox_true[1] 27 | w_pred = bbox_pred[2] - bbox_pred[0] 28 | h_pred = bbox_pred[3] - bbox_pred[1] 29 | else: 30 | raise NameError("Unknown format {}".format(format)) 31 | w_inter = np.max([0, xmax - xmin]) 32 | h_inter = np.max([0, ymax - ymin]) 33 | intersection = w_inter * h_inter 34 | union = (w_true * h_true + w_pred * h_pred) - intersection 35 | 36 | return intersection/union 37 | 38 | def eval_jaad_pie(input_traj_np, target_traj_np, all_dec_traj_np): 39 | MSE_15=0 40 | MSE_05=0 41 | MSE_10=0 42 | FMSE=0 43 | CMSE=0 44 | CFMSE=0 45 | FIOU=0 46 | for batch_index in range(all_dec_traj_np.shape[0]): 47 | input_traj = np.expand_dims(input_traj_np[batch_index], axis=1) 48 | 49 | target_traj = input_traj + target_traj_np[batch_index] 50 | all_dec_traj = input_traj + all_dec_traj_np[batch_index] 51 | 52 | all_dec_traj = bbox_denormalize(all_dec_traj, W=1920, H=1080) 53 | target_traj = bbox_denormalize(target_traj, W=1920, H=1080) 54 | 55 | all_dec_traj_xyxy = cxcywh_to_x1y1x2y2(all_dec_traj) 56 | target_traj_xyxy = cxcywh_to_x1y1x2y2(target_traj) 57 | 58 | 59 | MSE_15 += np.square(target_traj_xyxy[-1,0:45,:] - all_dec_traj_xyxy[-1,0:45,:]).mean(axis=None) 60 | MSE_05 += np.square(target_traj_xyxy[-1,0:15,:] - all_dec_traj_xyxy[-1,0:15,:]).mean(axis=None) 61 | MSE_10 += np.square(target_traj_xyxy[-1,0:30,:] - all_dec_traj_xyxy[-1,0:30,:]).mean(axis=None) 62 | 63 | FMSE +=np.square(target_traj_xyxy[-1,44,:] - all_dec_traj_xyxy[-1,44,:]).mean(axis=None) 64 | 65 | 66 | CMSE += np.square(target_traj[-1,0:45,:2] - all_dec_traj[-1,0:45,:2]).mean(axis=None) 67 | CFMSE += np.square(target_traj[-1,44,:2] - all_dec_traj[-1,44,:2]).mean(axis=None) 68 | tmp_FIOU = [] 69 | for i in range(target_traj_xyxy.shape[0]): 70 | tmp_FIOU.append(compute_IOU(target_traj_xyxy[i,44,:], all_dec_traj_xyxy[i,44,:], format='x1y1x2y2')) 71 | FIOU += np.mean(tmp_FIOU) 72 | return MSE_15, MSE_05, MSE_10, FMSE, CMSE, CFMSE, FIOU 73 | 74 | 75 | def eval_jaad_pie_cvae(input_traj, target_traj, cvae_all_dec_traj): 76 | MSE_15=0 77 | MSE_05=0 78 | MSE_10=0 79 | FMSE=0 80 | CMSE=0 81 | CFMSE=0 82 | FIOU=0 83 | K = cvae_all_dec_traj.shape[2] 84 | tiled_target_traj = np.tile(target_traj[:, :, None, :], (1, 1, K, 1)) 85 | #import pdb; pdb.set_trace() 86 | input_traj = np.tile(input_traj[:,-1,:][:,None, None,:], (1, 1, K, 1)) 87 | #import pdb; pdb.set_trace() 88 | tiled_target_traj += input_traj 89 | cvae_all_dec_traj += input_traj 90 | 91 | tiled_target_traj = bbox_denormalize(tiled_target_traj, W=1920, H=1080) 92 | cvae_all_dec_traj = bbox_denormalize(cvae_all_dec_traj, W=1920, H=1080) 93 | 94 | tiled_target_traj_xyxy = cxcywh_to_x1y1x2y2(tiled_target_traj) 95 | cvae_all_dec_traj_xyxy = cxcywh_to_x1y1x2y2(cvae_all_dec_traj) 96 | 97 | MSE_05 = np.square(cvae_all_dec_traj_xyxy[:,:15,:,:] - tiled_target_traj_xyxy[:,:15,:,:]).mean(axis=(1, 3)).min(axis=-1).sum() 98 | #import pdb; pdb.set_trace() 99 | MSE_10 = np.square(cvae_all_dec_traj_xyxy[:,:30,:,:] - tiled_target_traj_xyxy[:,:30,:,:]).mean(axis=(1, 3)).min(axis=-1).sum() 100 | MSE_15 = np.square(cvae_all_dec_traj_xyxy - tiled_target_traj_xyxy).mean(axis=(1, 3)).min(axis=-1).sum() 101 | FMSE = np.square(cvae_all_dec_traj_xyxy[:,-1,:,:] - tiled_target_traj_xyxy[:,-1,:,:]).mean(axis=-1).min(axis=-1).sum() 102 | CMSE = np.square(cvae_all_dec_traj[:,:,:,:2] - tiled_target_traj[:,:,:,:2]).mean(axis=(1, 3)).min(axis=-1).sum() 103 | CFMSE = np.square(cvae_all_dec_traj[:,-1,:,:2] - tiled_target_traj[:,-1,:,:2]).mean(axis=-1).min(axis=-1).sum() 104 | return MSE_15, MSE_05, MSE_10, FMSE, CMSE, CFMSE, FIOU 105 | 106 | def eval_hevi(input_traj_np, target_traj_np, all_dec_traj_np): 107 | ADE_15=0 108 | ADE_05=0 109 | ADE_10=0 110 | FDE=0 111 | CADE=0 112 | CFDE=0 113 | FIOU=0 114 | for batch_index in range(all_dec_traj_np.shape[0]): 115 | input_traj = np.expand_dims(input_traj_np[batch_index], axis=1) 116 | target_traj = input_traj + target_traj_np[batch_index] 117 | all_dec_traj = input_traj + all_dec_traj_np[batch_index] 118 | 119 | target_traj = bbox_denormalize(target_traj, W=1280, H=640) 120 | all_dec_traj = bbox_denormalize(all_dec_traj, W=1280, H=640) 121 | 122 | target_traj_xyxy = cxcywh_to_x1y1x2y2(target_traj) 123 | all_dec_traj_xyxy = cxcywh_to_x1y1x2y2(all_dec_traj) 124 | 125 | 126 | ADE_15 += np.mean(np.sqrt(np.sum((target_traj_xyxy[:,:,:2] - all_dec_traj_xyxy[:,:,:2]) ** 2, axis=-1))) 127 | 128 | ADE_05 += np.mean(np.sqrt(np.sum((target_traj_xyxy[:,0:5,:2] - all_dec_traj_xyxy[:,0:5,:2]) ** 2, axis=-1))) 129 | ADE_10 += np.mean(np.sqrt(np.sum((target_traj_xyxy[:,0:10,:2] - all_dec_traj_xyxy[:,0:10,:2]) ** 2, axis=-1))) 130 | FDE += np.mean(np.sqrt(np.sum((target_traj_xyxy[:,-1,:2] - all_dec_traj_xyxy[:,-1,:2]) ** 2, axis=-1))) 131 | 132 | 133 | CADE += np.mean(np.sqrt(np.sum((target_traj[:,:,:2] - all_dec_traj[:,:,:2]) ** 2, axis=-1))) 134 | CFDE += np.mean(np.sqrt(np.sum((target_traj[:,-1,:2] - all_dec_traj[:,-1,:2]) ** 2, axis=-1))) 135 | tmp_FIOU = [] 136 | for i in range(target_traj_xyxy.shape[0]): 137 | tmp_FIOU.append(compute_IOU(target_traj_xyxy[i,-1,:], all_dec_traj_xyxy[i,-1,:], format='x1y1x2y2')) 138 | FIOU += np.mean(tmp_FIOU) 139 | return ADE_15, ADE_05, ADE_10, FDE, CADE, CFDE, FIOU 140 | 141 | def eval_ethucy(input_traj_np, target_traj_np, all_dec_traj_np): 142 | ADE_08=0 143 | ADE_12=0 144 | FDE_08=0 145 | FDE_12=0 146 | for batch in range(all_dec_traj_np.shape[0]): 147 | input_traj = np.expand_dims(input_traj_np[batch], axis=1) 148 | target_traj = input_traj[...,:2] + target_traj_np[batch] 149 | all_dec_traj = input_traj[...,:2] + all_dec_traj_np[batch] 150 | 151 | ADE_08 += np.mean(np.sqrt(np.sum((target_traj[-1,:8,:] - all_dec_traj[-1,:8,:]) ** 2, axis=-1))) 152 | ADE_12 += np.mean(np.sqrt(np.sum((target_traj[-1,:,:] - all_dec_traj[-1,:,:]) ** 2, axis=-1))) 153 | 154 | FDE_08 += np.mean(np.sqrt(np.sum((target_traj[-1,7,:] - all_dec_traj[-1,7,:]) ** 2, axis=-1))) 155 | FDE_12 += np.mean(np.sqrt(np.sum((target_traj[-1,-1,:] - all_dec_traj[-1,-1,:]) ** 2, axis=-1))) 156 | return ADE_08, FDE_08, ADE_12, FDE_12 157 | 158 | 159 | def eval_ethucy_cvae(input_traj, target_traj, cvae_all_traj): 160 | result = {'ADE_08':0, 'ADE_12':0, 'FDE_08':0, 'FDE_12':0} 161 | 162 | K = cvae_all_traj.shape[2] 163 | tiled_target_traj = np.tile(target_traj[:, :, None, :], (1, 1, K, 1)) 164 | #import pdb; pdb.set_trace() 165 | input_traj = np.tile(input_traj[:,-1,:][:,None, None,:], (1, 1, K, 1)) 166 | 167 | result['ADE_08'] = np.linalg.norm(cvae_all_traj[:,:8,:,:] - tiled_target_traj[:,:8,:,:], axis=-1).mean(axis=1).min(axis=1).sum() 168 | result['ADE_12'] = np.linalg.norm(cvae_all_traj[:,:12,:,:] - tiled_target_traj[:,:12,:,:], axis=-1).mean(axis=1).min(axis=1).sum() 169 | result['FDE_08'] = np.linalg.norm(cvae_all_traj[:,7,:,:] - tiled_target_traj[:,7,:,:], axis=-1).min(axis=1).sum() 170 | result['FDE_12'] = np.linalg.norm(cvae_all_traj[:,11,:,:] - tiled_target_traj[:,11,:,:], axis=-1).min(axis=1).sum() 171 | 172 | 173 | return result 174 | 175 | def eval_nuscenes_local(starting_translation, starting_rotation, target_traj, cvae_all_traj): 176 | result = {'ADE_12':0, 'FDE_12':0} 177 | 178 | 179 | K = cvae_all_traj.shape[2] 180 | B = cvae_all_traj.shape[0] 181 | tiled_target_traj = np.tile(target_traj[:, :, None, :], (1, 1, K, 1)) 182 | 183 | cvae_all_traj_global = np.zeros(cvae_all_traj.shape) 184 | for k in range(K): 185 | for b in range(B): 186 | cvae_all_traj_global[b,:,k,:] = convert_local_coords_to_global(cvae_all_traj[b,:,k,:],starting_translation[b] ,starting_rotation[b]) 187 | result['ADE_12'] = np.linalg.norm(cvae_all_traj_global[:,:12,:,:] - tiled_target_traj[:,:12,:,:], axis=-1).mean(axis=1).min(axis=1).sum() 188 | result['FDE_12'] = np.linalg.norm(cvae_all_traj_global[:,11,:,:] - tiled_target_traj[:,11,:,:], axis=-1).min(axis=1).sum() 189 | 190 | 191 | return result 192 | 193 | 194 | 195 | def eval_nuscenes_api(starting_translation, starting_rotation, target_traj, cvae_all_traj, total_probabilities, tokens): 196 | result = {'ADE_12':0, 'FDE_12':0} 197 | 198 | 199 | K = cvae_all_traj.shape[2] 200 | B = cvae_all_traj.shape[0] 201 | tiled_target_traj = np.tile(target_traj[:, :, None, :], (1, 1, K, 1)) 202 | preds5 = [] 203 | cvae_all_traj_global = np.zeros(cvae_all_traj.shape) 204 | for k in range(K): 205 | for b in range(B): 206 | cvae_all_traj_global[b,:,k,:] = convert_local_coords_to_global(cvae_all_traj[b,:,k,:],starting_translation[b] ,starting_rotation[b]) 207 | 208 | cvae_all_traj_global = np.transpose(cvae_all_traj_global, (0,2,1,3)) 209 | 210 | tiled_target_traj = np.transpose(tiled_target_traj, (0,2,1,3)) 211 | for i, token in enumerate(tokens): 212 | 213 | instance_token, sample_token = token.split("_") 214 | prediction = Prediction(instance=instance_token, sample=sample_token, prediction=cvae_all_traj_global[i], 215 | probabilities=total_probabilities[i]).serialize() 216 | preds5.append(prediction) 217 | 218 | return preds5 -------------------------------------------------------------------------------- /lib/dataloaders/pie_data_layer.py: -------------------------------------------------------------------------------- 1 | ## Code modified based on https://github.com/MoonBlvd/bidireaction-trajectory-prediction/blob/main/datasets/PIE.py 2 | 3 | import os 4 | import numpy as np 5 | import torch 6 | from torch.utils import data 7 | from .PIE_origin import PIE 8 | 9 | class PIEDataLayer(data.Dataset): 10 | def __init__(self, args, split): 11 | self.split = split 12 | self.root = args.data_root 13 | self.args = args 14 | # NOTE: add downsample function 15 | self.downsample_step = int(30/self.args.FPS) 16 | traj_data_opts = {'fstride': 1, 17 | 'sample_type': 'all', 18 | 'height_rng': [0, float('inf')], 19 | 'squarify_ratio': 0, 20 | 'data_split_type': 'default', # kfold, random, default 21 | 'seq_type': 'trajectory', 22 | 'min_track_size': 61, 23 | 'random_params': {'ratios': None, 24 | 'val_data': True, 25 | 'regen_data': True}, 26 | 'kfold_params': {'num_folds': 5, 'fold': 1}} 27 | 28 | traj_model_opts = {'normalize_bbox': True, 29 | 'track_overlap': 0.5, 30 | 'observe_length': 15, 31 | 'predict_length': self.args.dec_steps, 32 | 'enc_input_type': ['bbox'], 33 | 'dec_input_type': [], 34 | 'prediction_type': ['bbox'] 35 | } 36 | imdb = PIE(data_path=self.root) 37 | 38 | traj_model_opts['enc_input_type'].extend(['obd_speed', 'heading_angle']) 39 | traj_model_opts['prediction_type'].extend(['obd_speed', 'heading_angle']) 40 | beh_seq = imdb.generate_data_trajectory_sequence(self.split, **traj_data_opts) 41 | self.data = self.get_traj_data(beh_seq, **traj_model_opts) 42 | 43 | def __getitem__(self, index): 44 | obs_bbox = torch.FloatTensor(self.data['obs_bbox'][index]) 45 | pred_bbox = torch.FloatTensor(self.data['pred_bbox'][index]) 46 | cur_image_file = self.data['obs_image'][index][-1] 47 | 48 | 49 | ret = {'input_x':obs_bbox, 50 | 'target_y':pred_bbox, 'cur_image_file':cur_image_file} 51 | 52 | ret['timestep'] = int(cur_image_file.split('/')[-1].split('.')[0]) 53 | 54 | return ret 55 | 56 | def __len__(self): 57 | return len(self.data[list(self.data.keys())[0]]) 58 | 59 | def get_traj_tracks(self, dataset, data_types, observe_length, predict_length, overlap, normalize): 60 | """ 61 | Generates tracks by sampling from pedestrian sequences 62 | :param dataset: The raw data passed to the method 63 | :param data_types: Specification of types of data for encoder and decoder. Data types depend on datasets. e.g. 64 | JAAD has 'bbox', 'ceneter' and PIE in addition has 'obd_speed', 'heading_angle', etc. 65 | :param observe_length: The length of the observation (i.e. time steps of the encoder) 66 | :param predict_length: The length of the prediction (i.e. time steps of the decoder) 67 | :param overlap: How much the sampled tracks should overlap. A value between [0,1) should be selected 68 | :param normalize: Whether to normalize center/bounding box coordinates, i.e. convert to velocities. NOTE: when 69 | the tracks are normalized, observation length becomes 1 step shorter, i.e. first step is removed. 70 | :return: A dictinary containing sampled tracks for each data modality 71 | """ 72 | # Calculates the overlap in terms of number of frames 73 | seq_length = observe_length + predict_length 74 | overlap_stride = observe_length if overlap == 0 else \ 75 | int((1 - overlap) * observe_length) 76 | overlap_stride = 1 if overlap_stride < 1 else overlap_stride 77 | 78 | # Check the validity of keys selected by user as data type 79 | d = {} 80 | for dt in data_types: 81 | try: 82 | d[dt] = dataset[dt] 83 | except:# KeyError: 84 | raise KeyError('Wrong data type is selected %s' % dt) 85 | 86 | d['image'] = dataset['image'] 87 | d['pid'] = dataset['pid'] 88 | d['resolution'] = dataset['resolution'] 89 | d['flow'] = [] 90 | num_trks = len(d['image']) 91 | # Sample tracks from sequneces 92 | for k in d.keys(): 93 | tracks = [] 94 | for track in d[k]: 95 | for i in range(0, len(track) - seq_length + 1, overlap_stride): 96 | tracks.append(track[i:i + seq_length]) 97 | d[k] = tracks 98 | # Normalize tracks using FOL paper method, 99 | d['bbox'] = self.convert_normalize_bboxes(d['bbox'], d['resolution'], 100 | self.args.normalize, self.args.bbox_type) 101 | return d 102 | 103 | def convert_normalize_bboxes(self, all_bboxes, all_resolutions, normalize, bbox_type): 104 | '''input box type is x1y1x2y2 in original resolution''' 105 | for i in range(len(all_bboxes)): 106 | if len(all_bboxes[i]) == 0: 107 | continue 108 | bbox = np.array(all_bboxes[i]) 109 | # NOTE ltrb to cxcywh 110 | if bbox_type == 'cxcywh': 111 | bbox[..., [2, 3]] = bbox[..., [2, 3]] - bbox[..., [0, 1]] 112 | bbox[..., [0, 1]] += bbox[..., [2, 3]]/2 113 | # NOTE Normalize bbox 114 | if normalize == 'zero-one': 115 | # W, H = all_resolutions[i][0] 116 | _min = np.array(self.args.min_bbox)[None, :] 117 | _max = np.array(self.args.max_bbox)[None, :] 118 | bbox = (bbox - _min) / (_max - _min) 119 | elif normalize == 'plus-minus-one': 120 | # W, H = all_resolutions[i][0] 121 | _min = np.array(self.args.min_bbox)[None, :] 122 | _max = np.array(self.args.max_bbox)[None, :] 123 | bbox = (2 * (bbox - _min) / (_max - _min)) - 1 124 | elif normalize == 'none': 125 | pass 126 | else: 127 | raise ValueError(normalize) 128 | all_bboxes[i] = bbox 129 | return all_bboxes 130 | 131 | def get_data_helper(self, data, data_type): 132 | """ 133 | A helper function for data generation that combines different data types into a single representation 134 | :param data: A dictionary of different data types 135 | :param data_type: The data types defined for encoder and decoder input/output 136 | :return: A unified data representation as a list 137 | """ 138 | if not data_type: 139 | return [] 140 | d = [] 141 | for dt in data_type: 142 | if dt == 'image': 143 | continue 144 | d.append(np.array(data[dt])) 145 | 146 | # Concatenate different data points into a single representation 147 | if len(d) > 1: 148 | return np.concatenate(d, axis=2) 149 | elif len(d) == 1: 150 | return d[0] 151 | else: 152 | return d 153 | 154 | def get_traj_data(self, data, **model_opts): 155 | """ 156 | Main data generation function for training/testing 157 | :param data: The raw data 158 | :param model_opts: Control parameters for data generation characteristics (see below for default values) 159 | :return: A dictionary containing training and testing data 160 | """ 161 | 162 | opts = { 163 | 'normalize_bbox': True, 164 | 'track_overlap': 0.5, 165 | 'observe_length': self.args.enc_steps, 166 | 'predict_length': self.args.dec_steps, 167 | 'enc_input_type': ['bbox'], 168 | 'dec_input_type': [], 169 | 'prediction_type': ['bbox'] 170 | } 171 | for key, value in model_opts.items(): 172 | assert key in opts.keys(), 'wrong data parameter %s' % key 173 | opts[key] = value 174 | 175 | observe_length = opts['observe_length'] 176 | predict_length = opts['predict_length'] 177 | data_types = set(opts['enc_input_type'] + opts['dec_input_type'] + opts['prediction_type']) 178 | data_tracks = self.get_traj_tracks(data, data_types, observe_length, 179 | opts['predict_length'], opts['track_overlap'], 180 | opts['normalize_bbox']) 181 | obs_slices = {} 182 | pred_slices = {} 183 | # Generate observation/prediction sequences from the tracks 184 | for k in data_tracks.keys(): 185 | obs_slices[k] = [] 186 | pred_slices[k] = [] 187 | # NOTE: Add downsample function 188 | down = self.downsample_step 189 | obs_slices[k].extend([d[down-1:observe_length:down] for d in data_tracks[k]]) 190 | if k == 'bbox': 191 | start = down-1 # 0 192 | end = start + observe_length # 0 + 15 = 15 193 | target_list = [] # 15 * 45 * 4 194 | 195 | for d in data_tracks[k]: 196 | target = self.get_target(d,start,end,observe_length,predict_length) 197 | target_list.append(target) 198 | pred_slices[k].extend(target_list) 199 | ret = {'obs_image': obs_slices['image'], 200 | 'obs_pid': obs_slices['pid'], 201 | 'obs_resolution': obs_slices['resolution'], 202 | 'pred_image': pred_slices['image'], 203 | 'pred_pid': pred_slices['pid'], 204 | 'pred_resolution': pred_slices['resolution'], 205 | 'obs_bbox': np.array(obs_slices['bbox']), #enc_input, 206 | 'pred_bbox': np.array(pred_slices['bbox']), #pred_target, 207 | } 208 | 209 | return ret 210 | 211 | def get_path(self, 212 | file_name='', 213 | save_folder='models', 214 | dataset='pie', 215 | model_type='trajectory', 216 | save_root_folder='data/'): 217 | """ 218 | A path generator method for saving model and config data. It create directories if needed. 219 | :param file_name: The actual save file name , e.g. 'model.h5' 220 | :param save_folder: The name of folder containing the saved files 221 | :param dataset: The name of the dataset used 222 | :param save_root_folder: The root folder 223 | :return: The full path for the model name and the path to the final folder 224 | """ 225 | save_path = os.path.join(save_root_folder, dataset, model_type, save_folder) 226 | if not os.path.exists(save_path): 227 | os.makedirs(save_path) 228 | return os.path.join(save_path, file_name), save_path 229 | 230 | def get_target(self, session, start, end, observe_length, predict_length): 231 | ''' 232 | Given the input session and the start and end time of the input clip, find the target 233 | TARGET FOR PREDICTION IS THE CHANGES IN THE FUTURE!! 234 | Params: 235 | session: the input time sequence of a car, can be bbox or ego_motion with shape (time, :) 236 | start: start frame id 237 | end: end frame id 238 | Returns: 239 | target: Target tensor with shape (self.args.segment_len, dec_steps, :) 240 | The target is the change of the values. e.g. target of yaw is \delta{\theta}_{t0,tn} 241 | ''' 242 | target = np.zeros((observe_length, predict_length, session.shape[-1])) 243 | for i, target_start in enumerate(range(start, end)): 244 | '''the target of time t is the change of bbox/ego motion at times [t+1,...,t+5}''' 245 | # i, target_start = (0,0) (1,1) (2,2) ...... 246 | target_start = target_start + 1 247 | try: 248 | target[i,:,:] = np.asarray(session[target_start:target_start+predict_length,:] - 249 | session[target_start-1:target_start,:]) 250 | except: 251 | print("segment start: ", start) 252 | print("sample start: ", target_start) 253 | print("segment end: ", end) 254 | print(session.shape) 255 | raise ValueError() 256 | return target -------------------------------------------------------------------------------- /lib/dataloaders/jaad_data_layer.py: -------------------------------------------------------------------------------- 1 | ## Code modified based on https://github.com/MoonBlvd/bidireaction-trajectory-prediction/blob/main/datasets/JAAD.py 2 | 3 | import numpy as np 4 | import torch 5 | from torch.utils import data 6 | from .JAAD_origin import JAAD 7 | from copy import deepcopy 8 | 9 | class JAADDataLayer(data.Dataset): 10 | def __init__(self, args, split): 11 | self.split = split 12 | self.root = args.data_root 13 | self.args = args 14 | data_opts = {'fstride': 1, 15 | 'sample_type': 'all', 16 | 'height_rng': [0, float('inf')], 17 | 'squarify_ratio': 0, 18 | 'data_split_type': 'default', # kfold, random, default 19 | 'seq_type': 'trajectory', 20 | 'min_track_size': 61, 21 | 'random_params': {'ratios': None, 22 | 'val_data': True, 23 | 'regen_data': True}, 24 | 'kfold_params': {'num_folds': 5, 'fold': 1}} 25 | traj_model_opts = {'normalize_bbox': True, 26 | 'track_overlap': 0.5, 27 | 'observe_length': 15, 28 | 'predict_length': self.args.dec_steps, 29 | 'enc_input_type': ['bbox'], 30 | 'dec_input_type': [], #['intention_prob', 'obd_speed'], 31 | 'prediction_type': ['bbox'] 32 | } 33 | self.downsample_step = int(30/self.args.FPS) 34 | imdb = JAAD(data_path=self.root) 35 | #imdb.generate_database() 36 | beh_seq = imdb.generate_data_trajectory_sequence(self.split, **data_opts) 37 | self.data = self.get_data(beh_seq, **traj_model_opts) 38 | def __getitem__(self, index): 39 | obs_bbox = torch.FloatTensor(self.data['obs_bbox'][index]) 40 | pred_bbox = torch.FloatTensor(self.data['pred_bbox'][index]) 41 | gt_mean = torch.FloatTensor(self.data['gt_mean'][index]) 42 | gt_std = torch.FloatTensor(self.data['gt_std'][index]) 43 | cur_image_file = self.data['obs_image'][index][-1] 44 | ret = {'input_x':obs_bbox, 45 | 'target_y':pred_bbox, 'cur_image_file':cur_image_file, 'gt_mean':gt_mean, 'gt_std':gt_std} 46 | ret['timestep'] = int(cur_image_file.split('/')[-1].split('.')[0]) 47 | 48 | return ret 49 | 50 | def __len__(self): 51 | return len(self.data[list(self.data.keys())[0]]) 52 | 53 | def get_tracks(self, dataset, data_types, observe_length, predict_length, overlap, normalize): 54 | """ 55 | Generates tracks by sampling from pedestrian sequences 56 | :param dataset: The raw data passed to the method 57 | :param data_types: Specification of types of data for encoder and decoder. Data types depend on datasets. e.g. 58 | JAAD has 'bbox', 'ceneter' and PIE in addition has 'obd_speed', 'heading_angle', etc. 59 | :param observe_length: The length of the observation (i.e. time steps of the encoder) 60 | :param predict_length: The length of the prediction (i.e. time steps of the decoder) 61 | :param overlap: How much the sampled tracks should overlap. A value between [0,1) should be selected 62 | :param normalize: Whether to normalize center/bounding box coordinates, i.e. convert to velocities. NOTE: when 63 | the tracks are normalized, observation length becomes 1 step shorter, i.e. first step is removed. 64 | :return: A dictinary containing sampled tracks for each data modality 65 | """ 66 | # Calculates the overlap in terms of number of frames 67 | seq_length = observe_length + predict_length 68 | overlap_stride = observe_length if overlap == 0 else \ 69 | int((1 - overlap) * observe_length) 70 | overlap_stride = 1 if overlap_stride < 1 else overlap_stride 71 | 72 | # Check the validity of keys selected by user as data type 73 | d = {} 74 | for dt in data_types: 75 | try: 76 | d[dt] = dataset[dt] 77 | except: 78 | raise KeyError('Wrong data type is selected %s' % dt) 79 | d['image'] = dataset['image'] 80 | d['pid'] = dataset['pid'] 81 | d['resolution'] = dataset['resolution'] 82 | d['flow'] = [] 83 | 84 | # Sample tracks from sequneces 85 | for k in d.keys(): 86 | tracks = [] 87 | for track in d[k]: 88 | tracks.extend([track[i:i + seq_length] for i in 89 | range(0, len(track) - seq_length + 1, overlap_stride)]) 90 | d[k] = tracks 91 | 92 | # Normalize tracks using FOL paper method, 93 | d['bbox'] = self.convert_normalize_bboxes(d['bbox'], d['resolution'], 94 | self.args.normalize, self.args.bbox_type) 95 | return d 96 | 97 | def convert_normalize_bboxes(self, all_bboxes, all_resolutions, normalize, bbox_type): 98 | '''input box type is x1y1x2y2 in original resolution''' 99 | for i in range(len(all_bboxes)): 100 | if len(all_bboxes[i]) == 0: 101 | continue 102 | bbox = np.array(all_bboxes[i]) 103 | # NOTE ltrb to cxcywh 104 | if bbox_type == 'cxcywh': 105 | bbox[..., [2, 3]] = bbox[..., [2, 3]] - bbox[..., [0, 1]] 106 | bbox[..., [0, 1]] += bbox[..., [2, 3]]/2 107 | # NOTE Normalize bbox 108 | if normalize == 'zero-one': 109 | # W, H = all_resolutions[i][0] 110 | _min = np.array(self.args.min_bbox)[None, :] 111 | _max = np.array(self.args.max_bbox)[None, :] 112 | bbox = (bbox - _min) / (_max - _min) 113 | elif normalize == 'plus-minus-one': 114 | # W, H = all_resolutions[i][0] 115 | _min = np.array(self.args.min_bbox)[None, :] 116 | _max = np.array(self.args.max_bbox)[None, :] 117 | bbox = (2 * (bbox - _min) / (_max - _min)) - 1 118 | elif normalize == 'none': 119 | pass 120 | else: 121 | raise ValueError(normalize) 122 | all_bboxes[i] = bbox 123 | return all_bboxes 124 | 125 | def get_data_helper(self, data, data_type): 126 | """ 127 | A helper function for data generation that combines different data types into a single representation 128 | :param data: A dictionary of different data types 129 | :param data_type: The data types defined for encoder and decoder input/output 130 | :return: A unified data representation as a list 131 | """ 132 | if not data_type: 133 | return [] 134 | d = [] 135 | for dt in data_type: 136 | if dt == 'image': 137 | continue 138 | d.append(np.array(data[dt])) 139 | 140 | # Concatenate different data points into a single representation 141 | if len(d) > 1: 142 | return np.concatenate(d, axis=2) 143 | elif len(d) == 1: 144 | return d[0] 145 | else: 146 | return d 147 | 148 | def get_data(self, data, **model_opts): 149 | """ 150 | Main data generation function for training/testing 151 | :param data: The raw data 152 | :param model_opts: Control parameters for data generation characteristics (see below for default values) 153 | :return: A dictionary containing training and testing data 154 | """ 155 | 156 | opts = { 157 | 'normalize_bbox': True, 158 | 'track_overlap': 0.5, 159 | 'observe_length': 15, 160 | 'predict_length': self.args.dec_steps, 161 | 'enc_input_type': ['bbox'], 162 | 'dec_input_type': [], 163 | 'prediction_type': ['bbox'] 164 | } 165 | for key, value in model_opts.items(): 166 | assert key in opts.keys(), 'wrong data parameter %s' % key 167 | opts[key] = value 168 | 169 | observe_length = opts['observe_length'] 170 | predict_length = opts['predict_length'] 171 | data_types = set(opts['enc_input_type'] + opts['dec_input_type'] + opts['prediction_type']) 172 | data_tracks = self.get_tracks(data, data_types, observe_length, 173 | opts['predict_length'], opts['track_overlap'], 174 | opts['normalize_bbox']) 175 | 176 | obs_slices = {} 177 | pred_slices = {} 178 | obs_slices['gt_mean'] = [] 179 | obs_slices['gt_std'] = [] 180 | # Generate observation/prediction sequences from the tracks 181 | for k in data_tracks.keys(): 182 | 183 | obs_slices[k] = [] 184 | pred_slices[k] = [] 185 | # NOTE: Add downsample function 186 | down = self.downsample_step 187 | if k == 'bbox': 188 | start = down-1 189 | end = start + observe_length 190 | mean_list = [] 191 | std_list = [] 192 | observe_list = [] 193 | target_list = [] 194 | for sample in data_tracks[k]: 195 | target = self.get_target(sample,start,end,observe_length,predict_length) 196 | target_list.append(target) 197 | observe = sample[down-1:observe_length:down] 198 | observe_list.append(observe) 199 | mean_np = np.zeros((observe_length, 4)) 200 | std_np = np.zeros((observe_length, 4)) 201 | for obs in range(1,observe_length+1): 202 | whole_seq = sample[down-1:(obs + predict_length):down] 203 | mean_np[obs-1] = deepcopy(whole_seq).mean(axis=0) 204 | std_np[obs-1] = deepcopy(whole_seq).mean(axis=0) 205 | mean_list.append(mean_np) 206 | std_list.append(std_np) 207 | obs_slices[k].extend(observe_list) 208 | obs_slices['gt_mean'].extend(mean_list) 209 | obs_slices['gt_std'].extend(std_list) 210 | pred_slices[k].extend(target_list) 211 | 212 | else: 213 | obs_slices[k].extend([sample[down-1:observe_length:down] for sample in data_tracks[k]]) 214 | ret = {'obs_image': obs_slices['image'], 215 | 'obs_pid': obs_slices['pid'], 216 | 'obs_resolution': obs_slices['resolution'], 217 | 'gt_mean': obs_slices['gt_mean'], 218 | 'gt_std': obs_slices['gt_std'], 219 | 'pred_image': pred_slices['image'], 220 | 'pred_pid': pred_slices['pid'], 221 | 'pred_resolution': pred_slices['resolution'], 222 | 'obs_bbox': np.array(obs_slices['bbox']), 223 | 'flow_input': obs_slices['flow'], 224 | 'pred_bbox': np.array(pred_slices['bbox']), 225 | 'model_opts': opts, 226 | } 227 | 228 | return ret 229 | 230 | 231 | def get_target(self, session, start, end, observe_length, predict_length): 232 | ''' 233 | Given the input session and the start and end time of the input clip, find the target 234 | TARGET FOR PREDICTION IS THE CHANGES IN THE FUTURE!! 235 | Params: 236 | session: the input time sequence of a car, can be bbox or ego_motion with shape (time, :) 237 | start: start frame id 238 | end: end frame id 239 | Returns: 240 | target: Target tensor with shape (self.args.segment_len, dec_steps, :) 241 | The target is the change of the values. e.g. target of yaw is \delta{\theta}_{t0,tn} 242 | ''' 243 | target = np.zeros((observe_length, predict_length, session.shape[-1])) 244 | for i, target_start in enumerate(range(start, end)): 245 | '''the target of time t is the change of bbox/ego motion at times [t+1,...,t+5}''' 246 | target_start = target_start + 1 247 | try: 248 | target[i,:,:] = np.asarray(session[target_start:target_start+predict_length,:] - 249 | session[target_start-1:target_start,:]) 250 | except: 251 | print("segment start: ", start) 252 | print("sample start: ", target_start) 253 | print("segment end: ", end) 254 | print(session.shape) 255 | raise ValueError() 256 | return target -------------------------------------------------------------------------------- /lib/utils/ethucy_train_utils_cvae.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import os.path as osp 4 | import numpy as np 5 | import time 6 | import random 7 | from tqdm import tqdm 8 | import torch 9 | from torch import nn, optim 10 | from torch.nn import functional as F 11 | from torch.utils import data 12 | 13 | from lib.utils.eval_utils import eval_ethucy, eval_ethucy_cvae 14 | from lib.losses import cvae, cvae_multi 15 | 16 | def train(model, train_gen, criterion, optimizer, device): 17 | model.train() # Sets the module in training mode. 18 | count = 0 19 | total_goal_loss = 0 20 | total_dec_loss = 0 21 | total_cvae_loss = 0 22 | total_KLD_loss = 0 23 | loader = tqdm(train_gen, total=len(train_gen)) 24 | with torch.set_grad_enabled(True): 25 | for batch_idx, data in enumerate(loader): 26 | # if batch_idx > 1: 27 | # break 28 | first_history_index = data['first_history_index'] 29 | assert torch.unique(first_history_index).shape[0] == 1 30 | batch_size = data['input_x'].shape[0] 31 | count += batch_size 32 | 33 | input_traj = data['input_x'].to(device) 34 | input_traj_st = data['input_x_st'].to(device) 35 | target_traj = data['target_y'].to(device) 36 | 37 | all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(input_traj, map_mask = None, targets = target_traj, start_index = first_history_index, training = False) 38 | cvae_loss = cvae_multi(cvae_dec_traj,target_traj, first_history_index[0]) 39 | #import pdb; pdb.set_trace() 40 | goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 41 | train_loss = goal_loss + cvae_loss + KLD_loss.mean() 42 | 43 | total_goal_loss += goal_loss.item()* batch_size 44 | total_cvae_loss += cvae_loss.item()* batch_size 45 | total_KLD_loss += KLD_loss.mean()* batch_size 46 | 47 | # optimize 48 | optimizer.zero_grad() 49 | train_loss.backward() 50 | optimizer.step() 51 | 52 | total_goal_loss /= count 53 | total_cvae_loss/= count 54 | total_KLD_loss/= count 55 | 56 | return total_goal_loss, total_cvae_loss, total_KLD_loss 57 | 58 | def val(model, val_gen, criterion, device): 59 | total_goal_loss = 0 60 | total_cvae_loss = 0 61 | total_KLD_loss = 0 62 | count = 0 63 | model.eval() 64 | loader = tqdm(val_gen, total=len(val_gen)) 65 | with torch.set_grad_enabled(False): 66 | for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen): 67 | # if batch_idx > 1: 68 | # break 69 | first_history_index = data['first_history_index'] 70 | assert torch.unique(first_history_index).shape[0] == 1 71 | batch_size = data['input_x'].shape[0] 72 | count += batch_size 73 | 74 | input_traj = data['input_x'].to(device) 75 | input_traj_st = data['input_x_st'].to(device) 76 | target_traj = data['target_y'].to(device) 77 | 78 | all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(input_traj, map_mask = None, targets = None, start_index = first_history_index, training = False) 79 | cvae_loss = cvae_multi(cvae_dec_traj,target_traj) 80 | 81 | 82 | goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 83 | 84 | total_goal_loss += goal_loss.item()* batch_size 85 | total_cvae_loss += cvae_loss.item()* batch_size 86 | total_KLD_loss += KLD_loss.mean()* batch_size 87 | 88 | val_loss = total_goal_loss/count \ 89 | + total_cvae_loss/count+ total_KLD_loss/ count 90 | #import pdb;pdb.set_trace() 91 | return val_loss 92 | 93 | def test(model, test_gen, criterion, device): 94 | total_goal_loss = 0 95 | total_cvae_loss = 0 96 | total_KLD_loss = 0 97 | ADE_08 = 0 98 | ADE_12 = 0 99 | FDE_08 = 0 100 | FDE_12 = 0 101 | count = 0 102 | model.eval() 103 | loader = tqdm(test_gen, total=len(test_gen)) 104 | with torch.set_grad_enabled(False): 105 | for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen): 106 | # if batch_idx > 1: 107 | # break 108 | 109 | first_history_index = data['first_history_index'] 110 | assert torch.unique(first_history_index).shape[0] == 1 111 | batch_size = data['input_x'].shape[0] 112 | count += batch_size 113 | 114 | input_traj = data['input_x'].to(device) 115 | input_traj_st = data['input_x_st'].to(device) 116 | target_traj = data['target_y'].to(device) 117 | 118 | all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(input_traj, map_mask = None, targets = None, start_index = first_history_index, training = False) 119 | cvae_loss = cvae_multi(cvae_dec_traj,target_traj) 120 | goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 121 | 122 | 123 | 124 | total_goal_loss += goal_loss.item()* batch_size 125 | total_cvae_loss += cvae_loss.item()* batch_size 126 | total_KLD_loss += KLD_loss.mean()* batch_size 127 | 128 | cvae_dec_traj = cvae_dec_traj.to('cpu').numpy() 129 | all_goal_traj_np = all_goal_traj.to('cpu').numpy() 130 | input_traj_np = input_traj.to('cpu').numpy() 131 | target_traj_np = target_traj.to('cpu').numpy() 132 | 133 | batch_results =\ 134 | eval_ethucy_cvae(input_traj_np, target_traj_np[:,-1,:,:], cvae_dec_traj[:,-1,:,:,:]) 135 | ADE_08 += batch_results['ADE_08'] 136 | ADE_12 += batch_results['ADE_12'] 137 | FDE_08 += batch_results['FDE_08'] 138 | FDE_12 += batch_results['FDE_12'] 139 | 140 | 141 | 142 | ADE_08 /= count 143 | ADE_12 /= count 144 | FDE_08 /= count 145 | FDE_12 /= count 146 | 147 | 148 | test_loss = total_goal_loss/count + total_cvae_loss/count + total_KLD_loss/count 149 | # print("Test Loss %4f\n" % (test_loss)) 150 | # print("ADE_08: %4f; FDE_08: %4f; ADE_12: %4f; FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12)) 151 | return test_loss, ADE_08, FDE_08, ADE_12, FDE_12 152 | 153 | def evaluate(model, test_gen, criterion, device): 154 | total_goal_loss = 0 155 | total_cvae_loss = 0 156 | total_KLD_loss = 0 157 | ADE_08 = 0 158 | ADE_12 = 0 159 | FDE_08 = 0 160 | FDE_12 = 0 161 | count = 0 162 | all_file_name = [] 163 | model.eval() 164 | loader = tqdm(test_gen, total=len(test_gen)) 165 | with torch.set_grad_enabled(False): 166 | for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen): 167 | first_history_index = data['first_history_index'] 168 | assert torch.unique(first_history_index).shape[0] == 1 169 | batch_size = data['input_x'].shape[0] 170 | count += batch_size 171 | 172 | input_traj = data['input_x'].to(device) 173 | input_traj_st = data['input_x_st'].to(device) 174 | target_traj = data['target_y'].to(device) 175 | scene_name = data['scene_name'] 176 | timestep = data['timestep'] 177 | current_img = timestep 178 | #import pdb; pdb.set_trace() 179 | # filename = datapath + '/test/biwi_eth.txt' 180 | # data = pd.read_csv(filename, sep='\t', index_col=False, header=None) 181 | # data.columns = ['frame_id', 'track_id', 'pos_x', 'pos_y'] 182 | # frame_id_min = data['frame_id'].min() 183 | # filename path = os.path.join(datapath, dataset ,str((current_img[1][0]+int(frame_id_min)//10)*10).zfill(5) + '.png') 184 | 185 | all_goal_traj, cvae_dec_traj, KLD_loss = model(input_traj, target_traj, first_history_index, False) 186 | cvae_loss = cvae_multi(cvae_dec_traj,target_traj) 187 | goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:]) 188 | total_goal_loss += goal_loss.item()* batch_size 189 | total_cvae_loss += cvae_loss.item()* batch_size 190 | total_KLD_loss += KLD_loss.mean()* batch_size 191 | 192 | cvae_dec_traj_np = cvae_dec_traj.to('cpu').numpy() 193 | cvae_dec_traj = cvae_dec_traj.to('cpu').numpy() 194 | 195 | all_goal_traj_np = all_goal_traj.to('cpu').numpy() 196 | input_traj_np = input_traj.to('cpu').numpy() 197 | target_traj_np = target_traj.to('cpu').numpy() 198 | #import pdb;pdb.set_trace() 199 | # Decoder 200 | # batch_MSE_15, batch_MSE_05, batch_MSE_10, batch_FMSE, batch_CMSE, batch_CFMSE, batch_FIOU =\ 201 | # eval_jaad_pie(input_traj_np, target_traj_np, all_dec_traj_np) 202 | batch_results =\ 203 | eval_ethucy_cvae(input_traj_np, target_traj_np[:,-1,:,:], cvae_dec_traj[:,-1,:,:,:]) 204 | ADE_08 += batch_results['ADE_08'] 205 | ADE_12 += batch_results['ADE_12'] 206 | FDE_08 += batch_results['FDE_08'] 207 | FDE_12 += batch_results['FDE_12'] 208 | 209 | if batch_idx == 0: 210 | all_input = input_traj_np 211 | all_target = target_traj_np 212 | all_prediction = cvae_dec_traj_np 213 | else: 214 | all_input = np.vstack((all_input,input_traj_np)) 215 | all_target = np.vstack((all_target,target_traj_np)) 216 | all_prediction = np.vstack((all_prediction,cvae_dec_traj_np)) 217 | all_file_name.extend(current_img) 218 | 219 | 220 | 221 | 222 | ADE_08 /= count 223 | ADE_12 /= count 224 | FDE_08 /= count 225 | FDE_12 /= count 226 | 227 | print("ADE_08: %4f; FDE_08: %4f; ADE_12: %4f; FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12)) 228 | 229 | return all_input,all_target,all_prediction,all_file_name 230 | 231 | def weights_init(m): 232 | if isinstance(m, nn.Linear): 233 | m.weight.data.normal_(0.0, 0.001) 234 | elif isinstance(m, nn.Conv1d): 235 | nn.init.normal_(m.weight.data) 236 | if m.bias is not None: 237 | nn.init.normal_(m.bias.data) 238 | elif isinstance(m, nn.Conv2d): 239 | nn.init.xavier_normal_(m.weight.data) 240 | if m.bias is not None: 241 | nn.init.normal_(m.bias.data) 242 | elif isinstance(m, nn.Conv3d): 243 | nn.init.xavier_normal_(m.weight.data) 244 | if m.bias is not None: 245 | nn.init.normal_(m.bias.data) 246 | elif isinstance(m, nn.ConvTranspose1d): 247 | nn.init.normal_(m.weight.data) 248 | if m.bias is not None: 249 | nn.init.normal_(m.bias.data) 250 | elif isinstance(m, nn.ConvTranspose2d): 251 | nn.init.xavier_normal_(m.weight.data) 252 | if m.bias is not None: 253 | nn.init.normal_(m.bias.data) 254 | elif isinstance(m, nn.ConvTranspose3d): 255 | nn.init.xavier_normal_(m.weight.data) 256 | if m.bias is not None: 257 | nn.init.normal_(m.bias.data) 258 | elif isinstance(m, nn.BatchNorm1d): 259 | nn.init.normal_(m.weight.data, mean=1, std=0.02) 260 | nn.init.constant_(m.bias.data, 0) 261 | elif isinstance(m, nn.BatchNorm2d): 262 | nn.init.normal_(m.weight.data, mean=1, std=0.02) 263 | nn.init.constant_(m.bias.data, 0) 264 | elif isinstance(m, nn.BatchNorm3d): 265 | nn.init.normal_(m.weight.data, mean=1, std=0.02) 266 | nn.init.constant_(m.bias.data, 0) 267 | elif isinstance(m, nn.LSTM): 268 | for param in m.parameters(): 269 | if len(param.shape) >= 2: 270 | nn.init.orthogonal_(param.data) 271 | else: 272 | nn.init.normal_(param.data) 273 | elif isinstance(m, nn.LSTMCell): 274 | for param in m.parameters(): 275 | if len(param.shape) >= 2: 276 | nn.init.orthogonal_(param.data) 277 | else: 278 | nn.init.normal_(param.data) 279 | elif isinstance(m, nn.GRU): 280 | for param in m.parameters(): 281 | if len(param.shape) >= 2: 282 | nn.init.orthogonal_(param.data) 283 | else: 284 | nn.init.normal_(param.data) 285 | elif isinstance(m, nn.GRUCell): 286 | for param in m.parameters(): 287 | if len(param.shape) >= 2: 288 | nn.init.orthogonal_(param.data) 289 | else: 290 | nn.init.normal_(param.data) 291 | -------------------------------------------------------------------------------- /lib/dataloaders/JAAD_origin.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interface for the JAAD dataset: 3 | 4 | A. Rasouli, I. Kotseruba, and J. K. Tsotsos,“Are they going to cross? 5 | a benchmark dataset and baseline for pedestrian crosswalk behavior,” In Proc. 6 | ICCV Workshop, 2017, pp. 206–213. 7 | 8 | A. Rasouli, I. Kotseruba, and J. K. Tsotsos, “Agreeing to cross: How drivers 9 | and pedestrians communicate,” In Proc. Intelligent Vehicles Symposium (IV), 10 | 2017, pp. 264–269. 11 | 12 | I. Kotseruba, A. Rasouli, and J. K. Tsotsos, “Joint attention in autonomous 13 | driving (jaad),” arXiv:1609.04741, 2016. 14 | 15 | MIT License 16 | 17 | Copyright (c) 2018 I. Kotseruba 18 | 19 | Permission is hereby granted, free of charge, to any person obtaining a copy 20 | of this software and associated documentation files (the "Software"), to deal 21 | in the Software without restriction, including without limitation the rights 22 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 23 | copies of the Software, and to permit persons to whom the Software is 24 | furnished to do so, subject to the following conditions: 25 | 26 | The above copyright notice and this permission notice shall be included in all 27 | copies or substantial portions of the Software. 28 | 29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 30 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 31 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 32 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 33 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 34 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 35 | SOFTWARE. 36 | 37 | """ 38 | import sys 39 | import pickle 40 | import cv2 41 | 42 | import numpy as np 43 | import xml.etree.ElementTree as ET 44 | 45 | from os.path import join, abspath, exists 46 | from os import listdir, makedirs 47 | from sklearn.model_selection import train_test_split, KFold 48 | 49 | 50 | class JAAD(object): 51 | def __init__(self, data_path='', regen_pkl=False): 52 | """ 53 | Constructor of the jaad class 54 | :param data_path: Path to the folder of the dataset 55 | :param regen_pkl: Whether to regenerate the database 56 | """ 57 | self._year = '2016' 58 | self._name = 'JAAD' 59 | self._regen_pkl = regen_pkl 60 | self._image_ext = '.png' 61 | 62 | # Paths 63 | self._jaad_path = data_path if data_path else self._get_default_path() 64 | assert exists(self._jaad_path), \ 65 | 'Jaad path does not exist: {}'.format(self._jaad_path) 66 | self._data_split_ids_path = join(self._jaad_path, 'split_ids') 67 | self._annotation_path = join(self._jaad_path, 'annotations') 68 | self._annotation_vehicle_path = join(self._jaad_path, 'annotations_vehicle') 69 | self._annotation_traffic_path = join(self._jaad_path, 'annotations_traffic') 70 | self._annotation_attributes_path = join(self._jaad_path, 'annotations_attributes') 71 | self._annotation_appearance_path = join(self._jaad_path, 'annotations_appearance') 72 | self._clips_path = join(self._jaad_path, 'JAAD_clips') 73 | self._images_path = join(self._jaad_path, 'images') 74 | 75 | # Path generators 76 | @property 77 | def cache_path(self): 78 | """ 79 | Generate a path to save cache files 80 | :return: Cache file folder path 81 | """ 82 | cache_path = abspath(join(self._jaad_path, 'data_cache')) 83 | if not exists(cache_path): 84 | makedirs(cache_path) 85 | return cache_path 86 | 87 | def _get_default_path(self): 88 | """ 89 | Return the default path where jaad_raw files are expected to be placed. 90 | :return: the default path to the dataset folder 91 | """ 92 | return 'dataset/jaad' 93 | 94 | def _get_video_ids_split(self, image_set, subset='default'): 95 | """ 96 | Returns a list of video ids for a given data split 97 | :param image_set: Data split, train, test, val 98 | :return: The list of video ids 99 | """ 100 | vid_ids = [] 101 | sets = [image_set] if image_set != 'all' else ['train', 'test', 'val'] 102 | for s in sets: 103 | vid_id_file = join(self._data_split_ids_path, subset, s + '.txt') 104 | with open(vid_id_file, 'rt') as fid: 105 | vid_ids.extend([x.strip() for x in fid.readlines()]) 106 | return vid_ids 107 | 108 | def _get_video_ids(self): 109 | """ 110 | Returns a list of all video ids 111 | :return: The list of video ids 112 | """ 113 | return [vid.split('.')[0] for vid in listdir(self._annotation_path)] 114 | 115 | def _get_image_path(self, vid, fid): 116 | """ 117 | Generates the image path given ids 118 | :param vid: Video id 119 | :param fid: Frame id 120 | :return: Return the path to the given image 121 | """ 122 | return join(self._images_path, vid, 123 | '{:05d}.png'.format(fid)) 124 | 125 | # Visual helpers 126 | def update_progress(self, progress): 127 | """ 128 | Creates a progress bar 129 | :param progress: The progress thus far 130 | """ 131 | barLength = 20 132 | status = "" 133 | if isinstance(progress, int): 134 | progress = float(progress) 135 | 136 | block = int(round(barLength * progress)) 137 | text = "\r[{}] {:0.2f}% {}".format("#" * block + "-" * (barLength - block), progress * 100, status) 138 | sys.stdout.write(text) 139 | sys.stdout.flush() 140 | 141 | def _print_dict(self, dic): 142 | """ 143 | Prints a dictionary, one key-value pair per line 144 | :param dic: Dictionary 145 | """ 146 | for k, v in dic.items(): 147 | print('%s: %s' % (str(k), str(v))) 148 | 149 | # Image processing helpers 150 | def _squarify(self, bbox, ratio, img_width): 151 | """ 152 | Changes is the ratio of bounding boxes to a fixed ratio 153 | :param bbox: Bounding box 154 | :param ratio: Ratio to be changed to 155 | :param img_width: Image width 156 | :return: Squarified boduning box 157 | """ 158 | width = abs(bbox[0] - bbox[2]) 159 | height = abs(bbox[1] - bbox[3]) 160 | width_change = height * ratio - width 161 | 162 | bbox[0] = bbox[0] - width_change / 2 163 | bbox[2] = bbox[2] + width_change / 2 164 | if bbox[0] < 0: 165 | bbox[0] = 0 166 | 167 | # check whether the new bounding box goes beyond image boarders 168 | # If this is the case, the bounding box is shifted back 169 | if bbox[2] > img_width: 170 | bbox[0] = bbox[0] - bbox[2] + img_width 171 | bbox[2] = img_width 172 | return bbox 173 | 174 | def extract_and_save_images(self): 175 | """ 176 | Extract images from clips and save on drive 177 | """ 178 | 179 | videos = [f.split('.')[0] for f in sorted(listdir(self._clips_path))] 180 | 181 | for vid in videos: 182 | path_to_file = join(self._annotation_path, vid + '.xml') 183 | print(vid) 184 | tree = ET.parse(path_to_file) 185 | num_frames = int(tree.find("./meta/task/size").text) 186 | 187 | video_clip_path = join(self._clips_path, vid + '.mp4') 188 | 189 | save_images_path = join(self._images_path, vid) 190 | if not exists(save_images_path): 191 | makedirs(save_images_path) 192 | 193 | vidcap = cv2.VideoCapture(video_clip_path) 194 | success, image = vidcap.read() 195 | frame_num = 0 196 | img_count = 0 197 | if not success: 198 | print('Failed to open the video {}'.format(vid)) 199 | while success: 200 | self.update_progress(img_count / num_frames) 201 | img_count += 1 202 | if not exists(join(save_images_path, "{:05d}.png").format(frame_num)): 203 | cv2.imwrite(join(save_images_path, "{:05d}.png").format(frame_num), image) 204 | else: 205 | print('path %s already exists') 206 | success, image = vidcap.read() 207 | frame_num += 1 208 | if num_frames != img_count: 209 | print('num images don\'t match {}/{}'.format(num_frames, img_count)) 210 | print('\n') 211 | 212 | # Annotation processing helpers 213 | def _map_text_to_scalar(self, label_type, value): 214 | """ 215 | Maps a text label in XML file to scalars 216 | :param label_type: The label type 217 | :param value: The text to be mapped 218 | :return: The scalar value 219 | """ 220 | map_dic = {'occlusion': {'none': 0, 'part': 1, 'full': 2}, 221 | 'action': {'standing': 0, 'walking': 1}, 222 | 'nod': {'__undefined__': 0, 'nodding': 1}, 223 | 'look': {'not-looking': 0, 'looking': 1}, 224 | 'hand_gesture': {'__undefined__': 0, 'greet': 1, 'yield': 2, 225 | 'rightofway': 3, 'other': 4}, 226 | 'reaction': {'__undefined__': 0, 'clear_path': 1, 'speed_up': 2, 227 | 'slow_down': 3}, 228 | 'cross': {'not-crossing': 0, 'crossing': 1, 'irrelevant': -1}, 229 | 'age': {'child': 0, 'young': 1, 'adult': 2, 'senior': 3}, 230 | 'designated': {'ND': 0, 'D': 1}, 231 | 'gender': {'n/a': 0, 'female': 1, 'male': 2}, 232 | 'intersection': {'no': 0, 'yes': 1}, 233 | 'motion_direction': {'n/a': 0, 'LAT': 1, 'LONG': 2}, 234 | 'traffic_direction': {'OW': 0, 'TW': 1}, 235 | 'signalized': {'n/a': 0, 'NS': 1, 'S': 2}, 236 | 'vehicle': {'stopped': 0, 'moving_slow': 1, 'moving_fast': 2, 237 | 'decelerating': 3, 'accelerating': 4}, 238 | 'road_type': {'street': 0, 'parking_lot': 1, 'garage': 2}, 239 | 'traffic_light': {'n/a': 0, 'red': 1, 'green': 2}} 240 | 241 | return map_dic[label_type][value] 242 | 243 | def _map_scalar_to_text(self, label_type, value): 244 | """ 245 | Maps a scalar value to a text label 246 | :param label_type: The label type 247 | :param value: The scalar to be mapped 248 | :return: The text label 249 | """ 250 | map_dic = {'occlusion': {0: 'none', 1: 'part', 2: 'full'}, 251 | 'action': {0: 'standing', 1: 'walking'}, 252 | 'nod': {0: '__undefined__', 1: 'nodding'}, 253 | 'look': {0: 'not-looking', 1: 'looking'}, 254 | 'hand_gesture': {0: '__undefined__', 1: 'greet', 255 | 2: 'yield', 3: 'rightofway', 256 | 4: 'other'}, 257 | 'reaction': {0: '__undefined__', 1: 'clear_path', 258 | 2: 'speed_up', 3: 'slow_down'}, 259 | 'cross': {0: 'not-crossing', 1: 'crossing', -1: 'irrelevant'}, 260 | 'age': {0: 'child', 1: 'young', 2: 'adult', 3: 'senior'}, 261 | 'designated': {0: 'ND', 1: 'D'}, 262 | 'gender': {0: 'n/a', 1: 'female', 2: 'male'}, 263 | 'intersection': {0: 'no', 1: 'yes'}, 264 | 'motion_direction': {0: 'n/a', 1: 'LAT', 2: 'LONG'}, 265 | 'traffic_direction': {0: 'OW', 1: 'TW'}, 266 | 'signalized': {0: 'n/a', 1: 'NS', 2: 'S'}, 267 | 'vehicle': {0: 'stopped', 1: 'moving_slow', 2: 'moving_fast', 268 | 3: 'decelerating', 4: 'accelerating'}, 269 | 'road_type': {0: 'street', 1: 'parking_lot', 2: 'garage'}, 270 | 'traffic_light': {0: 'n/a', 1: 'red', 2: 'green'}} 271 | 272 | return map_dic[label_type][value] 273 | 274 | def _get_annotations(self, vid): 275 | """ 276 | Generates a dictinary of annotations by parsing the video XML file 277 | :param vid: The id of video to parse 278 | :return: A dictionary of annotations 279 | """ 280 | path_to_file = join(self._annotation_path, vid + '.xml') 281 | tree = ET.parse(path_to_file) 282 | ped_annt = 'ped_annotations' 283 | 284 | annotations = {} 285 | annotations['num_frames'] = int(tree.find("./meta/task/size").text) 286 | annotations['width'] = int(tree.find("./meta/task/original_size/width").text) 287 | annotations['height'] = int(tree.find("./meta/task/original_size/height").text) 288 | annotations[ped_annt] = {} 289 | 290 | ped_tracks = tree.findall("./track") 291 | 292 | for t in ped_tracks: 293 | boxes = t.findall('./box') 294 | new_id = boxes[0].find('./attribute[@name=\"id\"]').text 295 | old_id = boxes[0].find('./attribute[@name=\"old_id\"]').text 296 | annotations[ped_annt][new_id] = {'old_id': old_id, 'frames': [], 297 | 'bbox': [], 'occlusion': []} 298 | if 'pedestrian' in old_id: 299 | annotations['ped_annotations'][new_id]['behavior'] = {'cross': [], 300 | 'reaction': [], 301 | 'hand_gesture': [], 302 | 'look': [], 303 | 'action': [], 304 | 'nod': []} 305 | else: 306 | annotations[ped_annt][new_id]['behavior'] = {} 307 | 308 | for b in boxes: 309 | annotations[ped_annt][new_id]['bbox'].append( 310 | [float(b.get('xtl')), float(b.get('ytl')), 311 | float(b.get('xbr')), float(b.get('ybr'))]) 312 | occ = self._map_text_to_scalar('occlusion', 313 | b.find('./attribute[@name=\"occlusion\"]').text) 314 | annotations[ped_annt][new_id]['occlusion'].append(occ) 315 | annotations[ped_annt][new_id]['frames'].append(int(b.get('frame'))) 316 | for beh in annotations['ped_annotations'][new_id]['behavior'].keys(): 317 | annotations[ped_annt][new_id]['behavior'][beh].append( 318 | self._map_text_to_scalar(beh, 319 | b.find('./attribute[@name=\"' + beh + '\"]').text)) 320 | 321 | return annotations 322 | 323 | def _get_ped_attributes(self, vid): 324 | """ 325 | Generates a dictinary of attributes by parsing the video XML file 326 | :param vid: The id of video to parse 327 | :return: A dictionary of attributes 328 | """ 329 | path_to_file = join(self._annotation_attributes_path, vid + '_attributes.xml') 330 | tree = ET.parse(path_to_file) 331 | 332 | attributes = {} 333 | pedestrians = tree.findall("./pedestrian") 334 | for p in pedestrians: 335 | new_id = p.get('id') 336 | old_id = p.get('old_id') 337 | attributes[new_id] = {'old_id': old_id} 338 | for k, v in p.items(): 339 | if 'id' in k: 340 | continue 341 | try: 342 | attributes[new_id][k] = int(v) 343 | except ValueError: 344 | attributes[new_id][k] = self._map_text_to_scalar(k, v) 345 | 346 | return attributes 347 | 348 | def _get_ped_appearance(self, vid): 349 | """ 350 | Generates a dictinary of appearance annotations by parsing the video XML file 351 | :param vid: The id of video to parse. The labels are as follows: 352 | - pose_front, pose_back... - coarse pose of the pedestrian relative to the camera 353 | - clothes_below_knee - long clothing 354 | - clothes_upper_light, clothes_lower_dark... - coarse clothing color above/below waist 355 | - backpack - presence of a backpack (worn on the back, not held in hand) 356 | - bag_hand, bag_elbow, bag_shoulder - whether bag(s) are held in a hand, on a bent elbow or worn on a shoulder 357 | - bag_left_side, bag_right_side - whether bag(s) appear on the left/right side of the pedestrian body 358 | - cap,hood - headwear 359 | - umbrella,phone,baby,object - various things carried by the pedestrians 360 | - stroller/cart - objects being pushed by the pedestrian 361 | - bicycle/motorcycle - for pedestrians riding or walking these vehicles 362 | :return: A dictionary of appearance annotations 363 | """ 364 | labels = ['pose_front', 'pose_back', 'pose_left', 'pose_right', 365 | 'clothes_below_knee', 'clothes_upper_light', 'clothes_upper_dark', 'clothes_lower_light', 366 | 'clothes_lower_dark', 'backpack', 'bag_hand', 'bag_elbow', 367 | 'bag_shoulder', 'bag_left_side', 'bag_right_side', 'cap', 368 | 'hood', 'sunglasses', 'umbrella', 'phone', 369 | 'baby', 'object', 'stroller_cart', 'bicycle_motorcycle'] 370 | path_to_file = join(self._annotation_appearance_path , vid + '_appearance.xml') 371 | tree = ET.parse(path_to_file) 372 | annotations = {} 373 | ped_tracks = tree.findall("./track") 374 | for t in ped_tracks: 375 | boxes = t.findall('./box') 376 | new_id = t.get("id") 377 | annotations[new_id] = dict(zip(labels, [[] for _ in range(len(labels))])) 378 | annotations[new_id]['frames'] = [] 379 | for b in boxes: 380 | annotations[new_id]['frames'].append(int(b.get('frame'))) 381 | for l in labels: 382 | annotations[new_id][l].append(b.get(l)) 383 | return annotations 384 | 385 | def _get_traffic_attributes(self, vid): 386 | """ 387 | Generates a dictinary of vehicle attributes by parsing the video XML file 388 | :param vid: The id of video to parse 389 | :return: A dictionary of vehicle attributes 390 | """ 391 | path_to_file = join(self._annotation_traffic_path, vid + '_traffic.xml') 392 | tree = ET.parse(path_to_file) 393 | road_type = tree.find("./road_type").text 394 | traffic_attributes = {'road_type': self._map_text_to_scalar('road_type', road_type)} 395 | frames = tree.findall("./frame") 396 | for f in frames: 397 | traffic_attributes[int(f.get('id'))] = {'ped_crossing': f.get('ped_crossing'), 398 | 'ped_sign': f.get('ped_sign'), 399 | 'stop_sign': f.get('stop_sign'), 400 | 'traffic_light': self._map_text_to_scalar('traffic_light', 401 | f.get('traffic_light'))} 402 | 403 | return traffic_attributes 404 | 405 | def _get_vehicle_attributes(self, vid): 406 | """ 407 | Generates a dictinary of vehicle attributes by parsing the video XML file 408 | :param vid: The id of video to parse 409 | :return: A dictionary of vehicle attributes 410 | """ 411 | path_to_file = join(self._annotation_vehicle_path, vid + '_vehicle.xml') 412 | tree = ET.parse(path_to_file) 413 | 414 | veh_attributes = {} 415 | frames = tree.findall("./frame") 416 | for f in frames: 417 | veh_attributes[int(f.get('id'))] = self._map_text_to_scalar('vehicle', f.get('action')) 418 | 419 | return veh_attributes 420 | 421 | def generate_database(self): 422 | """ 423 | Generate a database of jaad dataset by integrating all annotations 424 | Dictionary structure: 425 | 'vid_id'(str): { 426 | 'num_frames': int 427 | 'width': int 428 | 'height': int 429 | 'ped_annotations'(str): { 430 | 'ped_id'(str): { 431 | 'old_id': str 432 | 'frames: list(int) 433 | 'occlusion': list(int) 434 | 'bbox': list([x1, y1, x2, y2]) 435 | 'behavior'(str): { 436 | 'action': list(int) 437 | 'reaction': list(int) 438 | 'nod': list(int) 439 | 'hand_gesture': list(int) 440 | 'cross': list(int) 441 | 'look': list(int) 442 | 'appearance'(str): { 443 | 'pose_front':list(int) 444 | 'pose_back':list(int) 445 | 'pose_left':list(int) 446 | 'pose_right':list(int) 447 | 'clothes_below_knee':list(int) 448 | 'clothes_upper_light':list(int) 449 | 'clothes_upper_dark':list(int) 450 | 'clothes_lower_light':list(int) 451 | 'clothes_lower_dark':list(int) 452 | 'backpack':list(int) 453 | 'bag_hand':list(int) 454 | 'bag_elbow':list(int) 455 | 'bag_shoulder':list(int) 456 | 'bag_left_side':list(int) 457 | 'bag_right_side':list(int) 458 | 'cap':list(int) 459 | 'hood':list(int) 460 | 'sunglasses':list(int) 461 | 'umbrella':list(int) 462 | 'phone':list(int) 463 | 'baby':list(int) 464 | 'object':list(int) 465 | 'stroller_cart':list(int) 466 | 'bicycle_motorcycle':list(int) 467 | 'attributes'(str): { 468 | 'age': int 469 | 'old_id': str 470 | 'num_lanes': int 471 | 'crossing': int 472 | 'gender': int 473 | 'crossing_point': int 474 | 'decision_point': int 475 | 'intersection': int 476 | 'designated': int 477 | 'signalized': int 478 | 'traffic_direction': int 479 | 'group_size': int 480 | 'motion_direction': int 481 | 'vehicle_annotations'(str): { 482 | frames(int):{ 483 | action: int 484 | 'traffic_annotations'(str): { 485 | road_type: int 486 | frames(int):{ 487 | ped_crossing: int 488 | ped_sign: int 489 | stop_sign: int 490 | traffic_light: int 491 | 492 | :return: A database dictionary 493 | """ 494 | print('---------------------------------------------------------') 495 | print("Generating database for jaad") 496 | 497 | # Generates a list of behavioral xml file names for videos 498 | cache_file = join(self.cache_path, 'jaad_database.pkl') 499 | if exists(cache_file) and not self._regen_pkl: 500 | with open(cache_file, 'rb') as fid: 501 | try: 502 | database = pickle.load(fid) 503 | except: 504 | database = pickle.load(fid, encoding='bytes') 505 | print('jaad database loaded from {}'.format(cache_file)) 506 | return database 507 | 508 | video_ids = sorted(self._get_video_ids()) 509 | database = {} 510 | for vid in video_ids: 511 | #print('Getting annotations for %s' % vid) 512 | vid_annotations = self._get_annotations(vid) 513 | vid_attributes = self._get_ped_attributes(vid) 514 | vid_appearance = self._get_ped_appearance(vid) 515 | vid_veh_annotations = self._get_vehicle_attributes(vid) 516 | vid_traffic_annotations = self._get_traffic_attributes(vid) 517 | 518 | # Combining all annotations 519 | vid_annotations['vehicle_annotations'] = vid_veh_annotations 520 | vid_annotations['traffic_annotations'] = vid_traffic_annotations 521 | for ped in vid_annotations['ped_annotations']: 522 | try: 523 | vid_annotations['ped_annotations'][ped]['attributes'] = vid_attributes[ped] 524 | except KeyError: 525 | vid_annotations['ped_annotations'][ped]['attributes'] = {} 526 | try: 527 | vid_annotations['ped_annotations'][ped]['appearance'] = vid_appearance[ped] 528 | except KeyError: 529 | vid_annotations['ped_annotations'][ped]['appearance'] = {} 530 | 531 | database[vid] = vid_annotations 532 | 533 | with open(cache_file, 'wb') as fid: 534 | pickle.dump(database, fid, pickle.HIGHEST_PROTOCOL) 535 | print('The database is written to {}'.format(cache_file)) 536 | 537 | return database 538 | 539 | def get_data_stats(self): 540 | """ 541 | Generates statistics for jaad dataset 542 | """ 543 | annotations = self.generate_database() 544 | 545 | videos_count = len(annotations.keys()) 546 | ped_box_beh_count = 0 547 | ped_beh_count = 0 548 | ped_count = 0 549 | ped_box_count = 0 550 | people_count = 0 551 | people_box_count = 0 552 | total_frames = 0 553 | 554 | for vid in annotations: 555 | total_frames += annotations[vid]['num_frames'] 556 | for ped in annotations[vid]['ped_annotations']: 557 | 558 | if 'b' in ped: 559 | ped_beh_count += 1 560 | ped_box_beh_count += len(annotations[vid]['ped_annotations'][ped]['bbox']) 561 | elif 'p' in ped: 562 | people_count += 1 563 | people_box_count += len(annotations[vid]['ped_annotations'][ped]['bbox']) 564 | else: 565 | ped_count += 1 566 | ped_box_count += len(annotations[vid]['ped_annotations'][ped]['bbox']) 567 | 568 | print('---------------------------------------------------------') 569 | print("Number of videos: %d" % videos_count) 570 | print("Number of frames: %d" % total_frames) 571 | print("Number of pedestrians with behavior tag: %d" % ped_beh_count) 572 | print("Number of pedestrians with no behavior tag: %d" % ped_count) 573 | print("Number of people: %d" % people_count) 574 | print("Total number of pedestrians: %d" % (ped_count + ped_beh_count + people_count)) 575 | 576 | print("Number of pedestrian bounding boxes with behavior tag: %d" % ped_box_beh_count) 577 | print("Number of pedestrian bounding boxes with no behavior tag: %d" % ped_box_count) 578 | print("Number of people bounding boxes: %d" % people_box_count) 579 | print("Total number of pedestrian bounding boxes: %d" % (ped_box_beh_count + ped_box_count)) 580 | 581 | def balance_samples_count(self, seq_data, label_type, random_seed=42): 582 | """ 583 | Balances the number of positive and negative samples by randomly sampling 584 | from the more represented samples. Only works for binary classes. 585 | :param seq_data: The sequence data to be balanced. 586 | :param label_type: The lable type based on which the balancing takes place. 587 | The label values must be binary, i.e. only 0, 1. 588 | :param random_seed: The seed for random number generator. 589 | :return: Balanced data sequence. 590 | """ 591 | for lbl in seq_data[label_type]: 592 | for i in lbl: 593 | if i[0] not in [0, 1]: 594 | raise Exception("The label values used for balancing must be" 595 | " either 0 or 1") 596 | 597 | # balances the number of positive and negative samples 598 | print('---------------------------------------------------------') 599 | print("Balancing the number of positive and negative intention samples") 600 | 601 | gt_labels = [gt[0] for gt in seq_data[label_type]] 602 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) 603 | num_neg_samples = len(gt_labels) - num_pos_samples 604 | 605 | new_seq_data = {} 606 | # finds the indices of the samples with larger quantity 607 | if num_neg_samples == num_pos_samples: 608 | print('Positive and negative samples are already balanced') 609 | return seq_data 610 | else: 611 | print('Unbalanced: \t Positive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples)) 612 | if num_neg_samples > num_pos_samples: 613 | rm_index = np.where(np.array(gt_labels) == 0)[0] 614 | else: 615 | rm_index = np.where(np.array(gt_labels) == 1)[0] 616 | 617 | # Calculate the difference of sample counts 618 | dif_samples = abs(num_neg_samples - num_pos_samples) 619 | # shuffle the indices 620 | np.random.seed(random_seed) 621 | np.random.shuffle(rm_index) 622 | # reduce the number of indices to the difference 623 | rm_index = rm_index[0:dif_samples] 624 | # update the data 625 | for k in seq_data: 626 | seq_data_k = seq_data[k] 627 | if not isinstance(seq_data[k], list): 628 | new_seq_data[k] = seq_data[k] 629 | else: 630 | new_seq_data[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index] 631 | 632 | new_gt_labels = [gt[0] for gt in new_seq_data[label_type]] 633 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels)) 634 | print('Balanced:\t Positive: %d \t Negative: %d\n' 635 | % (num_pos_samples, len(new_seq_data[label_type]) - num_pos_samples)) 636 | return new_seq_data 637 | 638 | # Pedestrian id helpers 639 | def _get_pedestrian_ids(self, sample_type='all'): 640 | """ 641 | Get all pedestrian ids 642 | :return: A list of pedestrian ids 643 | """ 644 | annotations = self.generate_database() 645 | pids = [] 646 | for vid in sorted(annotations): 647 | if sample_type == 'beh': 648 | pids.extend([p for p in annotations[vid]['ped_annotations'].keys() if 'b' in p]) 649 | else: 650 | pids.extend(annotations[vid]['ped_annotations'].keys()) 651 | return pids 652 | 653 | def _get_random_pedestrian_ids(self, image_set, ratios=None, val_data=True, regen_data=False, sample_type='all'): 654 | """ 655 | Generates and save a database of activities for all pedestriasns 656 | :param image_set: The data split to return 657 | :param ratios: The ratios to split the data. There should be 2 ratios (or 3 if val_data is true) 658 | and they should sum to 1. e.g. [0.4, 0.6], [0.3, 0.5, 0.2] 659 | :param val_data: Whether to generate validation data 660 | :param regen_data: Whether to overwrite the existing data 661 | :return: The random sample split 662 | """ 663 | 664 | assert image_set in ['train', 'test', 'val'] 665 | cache_file = join(self.cache_path, "random_samples.pkl") 666 | if exists(cache_file) and not regen_data: 667 | print("Random sample currently exists.\n Loading from %s" % cache_file) 668 | with open(cache_file, 'rb') as fid: 669 | try: 670 | rand_samples = pickle.load(fid) 671 | except: 672 | rand_samples = pickle.load(fid, encoding='bytes') 673 | assert image_set in rand_samples, "%s does not exist in random samples\n" \ 674 | "Please try again by setting regen_data = True" % image_set 675 | if val_data: 676 | assert len(rand_samples['ratios']) == 3, "The existing random samples " \ 677 | "does not have validation data.\n" \ 678 | "Please try again by setting regen_data = True" 679 | if ratios is not None: 680 | assert ratios == rand_samples['ratios'], "Specified ratios {} does not match the ones in existing file {}.\n\ 681 | Perform one of the following options:\ 682 | 1- Set ratios to None\ 683 | 2- Set ratios to the same values \ 684 | 3- Regenerate data".format(ratios, rand_samples['ratios']) 685 | 686 | print('The ratios are {}'.format(rand_samples['ratios'])) 687 | print("Number of %s tracks %d" % (image_set, len(rand_samples[image_set]))) 688 | return rand_samples[image_set] 689 | 690 | if ratios is None: 691 | if val_data: 692 | ratios = [0.5, 0.4, 0.1] 693 | else: 694 | ratios = [0.5, 0.5] 695 | 696 | assert sum(ratios) > 0.999999, "Ratios {} do not sum to 1".format(ratios) 697 | if val_data: 698 | assert len(ratios) == 3, "To generate validation data three ratios should be selected" 699 | else: 700 | assert len(ratios) == 2, "With no validation only two ratios should be selected" 701 | 702 | print("################ Generating Random training/testing data ################") 703 | ped_ids = self._get_pedestrian_ids(sample_type) 704 | print("Total number of tracks %d" % len(ped_ids)) 705 | print('The ratios are {}'.format(ratios)) 706 | sample_split = {'ratios': ratios} 707 | train_samples, test_samples = train_test_split(ped_ids, train_size=ratios[0]) 708 | print("Number of train tracks %d" % len(train_samples)) 709 | 710 | if val_data: 711 | test_samples, val_samples = train_test_split(test_samples, train_size=ratios[1] / sum(ratios[1:])) 712 | print("Number of val tracks %d" % len(val_samples)) 713 | sample_split['val'] = val_samples 714 | 715 | print("Number of test tracks %d" % len(test_samples)) 716 | sample_split['train'] = train_samples 717 | sample_split['test'] = test_samples 718 | 719 | cache_file = join(self.cache_path, "random_samples.pkl") 720 | with open(cache_file, 'wb') as fid: 721 | pickle.dump(sample_split, fid, pickle.HIGHEST_PROTOCOL) 722 | print('jaad {} samples written to {}'.format('random', cache_file)) 723 | return sample_split[image_set] 724 | 725 | def _get_kfold_pedestrian_ids(self, image_set, num_folds=5, fold=1, sample_type='all'): 726 | """ 727 | Generate kfold pedestrian ids 728 | :param image_set: Image set split 729 | :param num_folds: Number of folds 730 | :param fold: The given fold 731 | :return: List of pedestrian ids for the given fold 732 | """ 733 | assert image_set in ['train', 'test'], "For K-fold data split, image-set should be either \"train\" or \"test\"" 734 | assert fold <= num_folds, "Fold number should be smaller than number of folds" 735 | print("################ Generating %d fold data ################" % num_folds) 736 | cache_file = join(self.cache_path, "%d_fold_samples.pkl" % num_folds) 737 | 738 | if exists(cache_file): 739 | print("Loading %d-fold data from %s" % (num_folds, cache_file)) 740 | with open(cache_file, 'rb') as fid: 741 | try: 742 | fold_idx = pickle.load(fid) 743 | except: 744 | fold_idx = pickle.load(fid, encoding='bytes') 745 | else: 746 | ped_ids = self._get_pedestrian_ids(sample_type) 747 | kf = KFold(n_splits=num_folds, shuffle=True) 748 | fold_idx = {'pid': ped_ids} 749 | count = 1 750 | for train_index, test_index in kf.split(ped_ids): 751 | fold_idx[count] = {'train': train_index.tolist(), 'test': test_index.tolist()} 752 | count += 1 753 | with open(cache_file, 'wb') as fid: 754 | pickle.dump(fold_idx, fid, pickle.HIGHEST_PROTOCOL) 755 | print('jaad {}-fold samples written to {}'.format(num_folds, cache_file)) 756 | print("Number of %s tracks %d" % (image_set, len(fold_idx[fold][image_set]))) 757 | kfold_ids = [fold_idx['pid'][i] for i in range(len(fold_idx['pid'])) if i in fold_idx[fold][image_set]] 758 | return kfold_ids 759 | 760 | # Pedestrian detection generators 761 | def get_detection_data(self, image_set, method, occlusion_type=None, file_path='data/', **params): 762 | """ 763 | Generates data for pedestrian detection algorithms 764 | :param image_set: Split set name 765 | :param method: Detection algorithm: frcnn, retinanet, yolo3, ssd 766 | :param occlusion_type: The types of occlusion: None: only unoccluded samples 767 | part: Unoccluded and partially occluded samples 768 | full: All samples 769 | :param file_path: Where to save the script file 770 | :return: Pedestrian samples 771 | """ 772 | squarify_ratio = params['squarify_ratio'] 773 | frame_stride = params['fstride'] 774 | height_rng = params['height_rng'] 775 | if not exists(file_path): 776 | makedirs(file_path) 777 | if height_rng is None: 778 | height_rng = [0, float('inf')] 779 | 780 | annotations = self.generate_database() 781 | video_ids, _pids = self._get_data_ids(image_set, params) 782 | 783 | ped_samples = {} 784 | unique_samples = [] 785 | total_sample_count = 0 786 | for vid in video_ids: 787 | img_width = annotations[vid]['width'] 788 | img_height = annotations[vid]['height'] 789 | num_frames = annotations[vid]['num_frames'] 790 | for i in range(0,num_frames,frame_stride): 791 | ped_samples[join(self._jaad_path, 'images', vid, '{:05d}.png'.format(i))] = [] 792 | for pid in annotations[vid]['ped_annotations']: 793 | if params['data_split_type'] != 'default' and pid not in _pids: 794 | continue 795 | difficult = 0 796 | if 'p' in pid: 797 | difficult = -1 798 | if image_set in ['train', 'val']: 799 | continue 800 | imgs = [join(self._jaad_path, 'images', vid, '{:05d}.png'.format(f)) for f in \ 801 | annotations[vid]['ped_annotations'][pid]['frames']] 802 | boxes = annotations[vid]['ped_annotations'][pid]['bbox'] 803 | occlusion = annotations[vid]['ped_annotations'][pid]['occlusion'] 804 | for i, b in enumerate(boxes): 805 | if imgs[i] not in ped_samples: 806 | continue 807 | bbox_height = abs(b[0] - b[2]) 808 | if height_rng[0] <= bbox_height <= height_rng[1]: 809 | if (occlusion_type == None and occlusion[i] == 0) or \ 810 | (occlusion_type == 'part' and occlusion[i] < 2) or \ 811 | (occlusion_type == 'full'): 812 | if squarify_ratio: 813 | b = self._squarify(b, squarify_ratio, img_width) 814 | ped_samples[imgs[i]].append( 815 | {'width': img_width, 816 | 'height': img_height, 817 | 'tag': pid, 818 | 'box': b, 819 | 'seg_area': (b[2] - b[0] + 1) * (b[3] - b[1] + 1), 820 | 'occlusion': occlusion[i], 821 | 'difficult': difficult}) 822 | if pid not in unique_samples: 823 | unique_samples.append(pid) 824 | total_sample_count += 1 825 | print('Number of unique pedestrians %d ' % len(unique_samples)) 826 | print('Number of samples %d ' % total_sample_count) 827 | if method == 'frcnn': 828 | return self._get_data_frcnn(ped_samples) 829 | elif method == 'retinanet': 830 | return self._generate_csv_data_retinanet(image_set, file_path, ped_samples) 831 | elif method == 'yolo3': 832 | return self._generate_csv_data_yolo3(image_set, file_path, ped_samples) 833 | elif method == 'ssd': 834 | return self._generate_csv_data_ssd(image_set, file_path, ped_samples) 835 | 836 | def _get_data_frcnn(self, ped_samples): 837 | """ 838 | Data generation for Faster-rcnn algorithm 839 | :param ped_samples: Dictionary of all samples 840 | """ 841 | classes_count = {} 842 | class_mapping = {} 843 | all_imgs = {} 844 | class_name = 'pedestrian' 845 | classes_count['bg'] = 0 846 | class_mapping['bg'] = 1 847 | classes_count[class_name] = 0 848 | class_mapping[class_name] = 0 849 | 850 | for img, samples in sorted(ped_samples.items()): 851 | if not samples: 852 | continue 853 | all_imgs[img] = {'filepath': img, 'width': samples[0]['width'], 854 | 'height': samples[0]['height'], 'bboxes': []} 855 | for s in samples: 856 | box = s['box'] 857 | all_imgs[img]['bboxes'].append({'class': class_name, 'x1': box[0], 858 | 'x2': box[2], 'y1': box[1], 'y2': box[3]}) 859 | print('Data generated for Faster-rcnn') 860 | all_data = [] 861 | for key in all_imgs: 862 | all_data.append(all_imgs[key]) 863 | return all_data, classes_count, class_mapping 864 | 865 | def _generate_csv_data_retinanet(self, image_set, file_path, ped_samples): 866 | """ 867 | Data generation for Retinanet algorithm 868 | :param image_set: Data split 869 | :param file_path: Path to save the data 870 | :param ped_samples: Dictionary of all samples 871 | """ 872 | class_name = 'pedestrian' 873 | data_save_path = file_path + 'retinanet_' + image_set + '.csv' 874 | with open(data_save_path, "wt") as f: 875 | for img, samples in sorted(ped_samples.items()): 876 | if not samples: 877 | f.write('%s,,,,,\n' % (img)) 878 | for s in samples: 879 | box = s['box'] 880 | f.write('%s,%.0f,%.0f,%.0f,%.0f,%s\n' % (img, box[0], box[1], box[2], box[3], class_name)) 881 | print('Data generated for Retinanet') 882 | 883 | map_path = file_path + '_mapping.csv' 884 | with open(map_path, "w") as f: 885 | f.write('%s,0\n' % (class_name)) 886 | return data_save_path, map_path 887 | 888 | def _generate_csv_data_yolo3(self, image_set, file_path, ped_samples): 889 | """ 890 | Data generation for YOLO3 algorithm 891 | :param image_set: Data split 892 | :param file_path: Path to save the data 893 | :param ped_samples: Dictionary of all samples 894 | """ 895 | class_name = 'pedestrian' 896 | all_imgs = {} 897 | data_save_path = file_path + 'yolo3_' + image_set + '.txt' 898 | with open(data_save_path, "wt") as f: 899 | for img, samples in sorted(ped_samples.items()): 900 | if not samples: 901 | continue 902 | f.write('%s ' % (img)) 903 | for s in samples: 904 | box = s['box'] 905 | f.write('%.0f,%.0f,%.0f,%.0f,%.0f ' % (box[0], box[1], box[2], box[3], 0)) 906 | f.write('\n') 907 | print('Data generated for YOLO3') 908 | map_path = file_path + 'mapping_yolo3' 909 | with open(map_path, "wt") as f: 910 | f.write('%s,0\n' % (class_name)) 911 | return data_save_path, map_path 912 | 913 | def _generate_csv_data_ssd(self, image_set, file_path, ped_samples): 914 | """ 915 | Data generation for SSD algorithm 916 | :param image_set: Data split 917 | :param file_path: Path to save the data 918 | :param ped_samples: Dictionary of all samples 919 | """ 920 | data_save_path = file_path + 'ssd_' + image_set + '.csv' 921 | with open(data_save_path, "wt") as f: 922 | for img, samples in sorted(ped_samples.items()): 923 | if not samples: 924 | continue 925 | for s in samples: 926 | box = s['box'] 927 | f.write('%s,%.0f,%.0f,%.0f,%.0f,%s\n' % (img, box[0], box[1], box[2], box[3], 1)) 928 | print('Data generated for SSD') 929 | return data_save_path 930 | 931 | # Trajectory data generation 932 | def _get_data_ids(self, image_set, params): 933 | """ 934 | A helper function to generate set id and ped ids (if needed) for processing 935 | :param image_set: Image-set to generate data 936 | :param params: Data generation params 937 | :return: Set and pedestrian ids 938 | """ 939 | _pids = None 940 | 941 | if params['data_split_type'] == 'default': 942 | return self._get_video_ids_split(image_set, params['subset']), _pids 943 | 944 | video_ids = self._get_video_ids_split('all', params['subset']) 945 | if params['data_split_type'] == 'random': 946 | params['random_params']['sample_type'] = params['sample_type'] 947 | _pids = self._get_random_pedestrian_ids(image_set, **params['random_params']) 948 | elif params['data_split_type'] == 'kfold': 949 | params['kfold_params']['sample_type'] = params['sample_type'] 950 | _pids = self._get_kfold_pedestrian_ids(image_set, **params['kfold_params']) 951 | 952 | return video_ids, _pids 953 | 954 | def _height_check(self, height_rng, frame_ids, boxes, images, occlusion): 955 | """ 956 | Checks whether the bounding boxes are within a given height limit. If not, it 957 | will adjust the length of data sequences accordingly 958 | :param height_rng: Height limit [lower, higher] 959 | :param frame_ids: List of frame ids 960 | :param boxes: List of bounding boxes 961 | :param images: List of images 962 | :param occlusion: List of occlusions 963 | :return: The adjusted data sequences 964 | """ 965 | imgs, box, frames, occ = [], [], [], [] 966 | for i, b in enumerate(boxes): 967 | bbox_height = abs(b[0] - b[2]) 968 | if height_rng[0] <= bbox_height <= height_rng[1]: 969 | box.append(b) 970 | imgs.append(images[i]) 971 | frames.append(frame_ids[i]) 972 | occ.append(occlusion[i]) 973 | return imgs, box, frames, occ 974 | 975 | def _get_center(self, box): 976 | """ 977 | Calculates the center coordinate of a bounding box 978 | :param box: Bounding box coordinates 979 | :return: The center coordinate 980 | """ 981 | return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2] 982 | 983 | def generate_data_trajectory_sequence(self, image_set, **opts): 984 | """ 985 | Generates pedestrian tracks 986 | :param image_set: the split set to produce for. Options are train, test, val. 987 | :param opts: 988 | 'fstride': Frequency of sampling from the data. 989 | 'sample_type': Whether to use 'all' pedestrian annotations or the ones 990 | with 'beh'avior only. 991 | 'subset': The subset of data annotations to use. Options are: 'default': Includes high resolution and 992 | high visibility videos 993 | 'high_visibility': Only videos with high 994 | visibility (include low 995 | resolution videos) 996 | 'all': Uses all videos 997 | 'height_rng': The height range of pedestrians to use. 998 | 'squarify_ratio': The width/height ratio of bounding boxes. A value between (0,1]. 0 the original 999 | ratio is used. 1000 | 'data_split_type': How to split the data. Options: 'default', predefined sets, 'random', randomly split the data, 1001 | and 'kfold', k-fold data split (NOTE: only train/test splits). 1002 | 'seq_type': Sequence type to generate. Options: 'trajectory', generates tracks, 'crossing', generates 1003 | tracks up to 'crossing_point', 'intention' generates tracks similar to human experiments 1004 | 'min_track_size': Min track length allowable. 1005 | 'random_params: Parameters for random data split generation. (see _get_random_pedestrian_ids) 1006 | 'kfold_params: Parameters for kfold split generation. (see _get_kfold_pedestrian_ids) 1007 | :return: Sequence data 1008 | """ 1009 | params = {'fstride': 1, 1010 | 'sample_type': 'all', # 'beh' 1011 | 'subset': 'default', 1012 | 'height_rng': [0, float('inf')], 1013 | 'squarify_ratio': 0, 1014 | 'data_split_type': 'default', # kfold, random, default 1015 | 'seq_type': 'intention', 1016 | 'min_track_size': 15, 1017 | 'random_params': {'ratios': None, 1018 | 'val_data': True, 1019 | 'regen_data': False}, 1020 | 'kfold_params': {'num_folds': 5, 'fold': 1}} 1021 | assert all(k in params for k in opts.keys()), "Wrong option(s)."\ 1022 | "Choose one of the following: {}".format(list(params.keys())) 1023 | params.update(opts) 1024 | 1025 | print('---------------------------------------------------------') 1026 | print("Generating action sequence data") 1027 | self._print_dict(params) 1028 | 1029 | annot_database = self.generate_database() 1030 | if params['seq_type'] == 'trajectory': 1031 | sequence = self._get_trajectories(image_set, annot_database, **params) 1032 | elif params['seq_type'] == 'crossing': 1033 | sequence = self._get_crossing(image_set, annot_database, **params) 1034 | elif params['seq_type'] == 'intention': 1035 | sequence = self._get_intention(image_set, annot_database, **params) 1036 | 1037 | return sequence 1038 | 1039 | def _get_trajectories(self, image_set, annotations, **params): 1040 | """ 1041 | Generates trajectory data. 1042 | :param params: Parameters for generating trajectories 1043 | :param annotations: The annotations database 1044 | :return: A dictionary of trajectories 1045 | """ 1046 | 1047 | print('---------------------------------------------------------') 1048 | print("Generating trajectory data") 1049 | 1050 | num_pedestrians = 0 1051 | seq_stride = params['fstride'] 1052 | sq_ratio = params['squarify_ratio'] 1053 | height_rng = params['height_rng'] 1054 | 1055 | image_seq, pids_seq = [], [] 1056 | box_seq, center_seq, occ_seq = [], [], [] 1057 | intent_seq = [] 1058 | vehicle_seq = [] 1059 | resolution_seq = [] 1060 | video_ids, _pids = self._get_data_ids(image_set, params) 1061 | 1062 | for vid in sorted(video_ids): 1063 | img_width = annotations[vid]['width'] 1064 | img_height = annotations[vid]['height'] 1065 | pid_annots = annotations[vid]['ped_annotations'] 1066 | vid_annots = annotations[vid]['vehicle_annotations'] 1067 | 1068 | for pid in sorted(annotations[vid]['ped_annotations']): 1069 | if params['data_split_type'] != 'default' and pid not in _pids: 1070 | continue 1071 | if 'p' in pid: 1072 | continue 1073 | if params['sample_type'] == 'beh' and 'b' not in pid: 1074 | continue 1075 | num_pedestrians += 1 1076 | frame_ids = pid_annots[pid]['frames'] 1077 | images = [join(self._jaad_path, 'images', vid, '{:05d}.png'.format(f)) for f in 1078 | pid_annots[pid]['frames']] 1079 | boxes = pid_annots[pid]['bbox'] 1080 | occlusions = pid_annots[pid]['occlusion'] 1081 | 1082 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 1083 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, 1084 | frame_ids, boxes, 1085 | images, occlusions) 1086 | 1087 | if len(boxes) / seq_stride < params['min_track_size']: 1088 | continue 1089 | 1090 | if sq_ratio: 1091 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 1092 | 1093 | ped_ids = [[pid]] * len(boxes) 1094 | 1095 | if params['sample_type'] == 'all': 1096 | intent = [[0]] * len(boxes) 1097 | else: 1098 | if annotations[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1: 1099 | intent = [[0]] * len(boxes) 1100 | else: 1101 | intent = [[1]] * len(boxes) 1102 | center = [self._get_center(b) for b in boxes] 1103 | 1104 | occ_seq.append(occlusions[::seq_stride]) 1105 | image_seq.append(images[::seq_stride]) 1106 | box_seq.append(boxes[::seq_stride]) 1107 | center_seq.append(center[::seq_stride]) 1108 | intent_seq.append(intent[::seq_stride]) 1109 | pids_seq.append(ped_ids[::seq_stride]) 1110 | vehicle_seq.append([[vid_annots[i]] 1111 | for i in frame_ids][::seq_stride]) 1112 | resolutions = [[img_width, img_height]] * len(boxes) 1113 | resolution_seq.append(resolutions[::seq_stride]) 1114 | 1115 | print('Split: %s' % image_set) 1116 | print('Number of pedestrians: %d ' % num_pedestrians) 1117 | print('Total number of used pedestrians: %d ' % len(image_seq)) 1118 | 1119 | return {'image': image_seq, 1120 | 'resolution': resolution_seq, 1121 | 'pid': pids_seq, 1122 | 'bbox': box_seq, 1123 | 'center': center_seq, 1124 | 'occlusion': occ_seq, 1125 | 'intent': intent_seq, 1126 | 'vehicle_act': vehicle_seq} 1127 | 1128 | def _get_crossing(self, image_set, annotations, **params): 1129 | """ 1130 | Generates crossing data. 1131 | :param image_set: Data split to use 1132 | :param annotations: Annotations database 1133 | :param params: Parameters to generate data (see generade_database) 1134 | :return: A dictionary of trajectories 1135 | """ 1136 | 1137 | print('---------------------------------------------------------') 1138 | print("Generating crossing data") 1139 | 1140 | num_pedestrians = 0 1141 | seq_stride = params['fstride'] 1142 | sq_ratio = params['squarify_ratio'] 1143 | height_rng = params['height_rng'] 1144 | image_seq, pids_seq = [], [] 1145 | box_seq, center_seq, occ_seq = [], [], [] 1146 | intent_seq = [] 1147 | vehicle_seq = [] 1148 | activities = [] 1149 | 1150 | video_ids, _pids = self._get_data_ids(image_set, params) 1151 | 1152 | for vid in sorted(video_ids): 1153 | img_width = annotations[vid]['width'] 1154 | img_height = annotations[vid]['height'] 1155 | pid_annots = annotations[vid]['ped_annotations'] 1156 | vid_annots = annotations[vid]['vehicle_annotations'] 1157 | for pid in sorted(pid_annots): 1158 | if params['data_split_type'] != 'default' and pid not in _pids: 1159 | continue 1160 | if 'p' in pid: 1161 | continue 1162 | if params['sample_type'] == 'beh' and 'b' not in pid: 1163 | continue 1164 | num_pedestrians += 1 1165 | 1166 | frame_ids = pid_annots[pid]['frames'] 1167 | 1168 | if 'b' in pid: 1169 | event_frame = pid_annots[pid]['attributes']['crossing_point'] 1170 | else: 1171 | event_frame = -1 1172 | 1173 | if event_frame == -1: 1174 | end_idx = -3 1175 | else: 1176 | end_idx = frame_ids.index(event_frame) 1177 | boxes = pid_annots[pid]['bbox'][:end_idx + 1] 1178 | frame_ids = frame_ids[: end_idx + 1] 1179 | images = [self._get_image_path(vid, f) for f in frame_ids] 1180 | occlusions = pid_annots[pid]['occlusion'][:end_idx + 1] 1181 | 1182 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 1183 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, 1184 | frame_ids, boxes, 1185 | images, occlusions) 1186 | 1187 | if len(boxes) / seq_stride < params['min_track_size']: 1188 | continue 1189 | 1190 | if sq_ratio: 1191 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 1192 | 1193 | image_seq.append(images[::seq_stride]) 1194 | box_seq.append(boxes[::seq_stride]) 1195 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride]) 1196 | occ_seq.append(occlusions[::seq_stride]) 1197 | 1198 | ped_ids = [[pid]] * len(boxes) 1199 | pids_seq.append(ped_ids[::seq_stride]) 1200 | 1201 | if 'b' not in pid: 1202 | intent = [[0]] * len(boxes) 1203 | acts = [[0]] * len(boxes) 1204 | else: 1205 | if annotations[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1: 1206 | intent = [[0]] * len(boxes) 1207 | else: 1208 | intent = [[1]] * len(boxes) 1209 | acts = [[int(pid_annots[pid]['attributes']['crossing'] > 0)]] * len(boxes) 1210 | 1211 | intent_seq.append(intent[::seq_stride]) 1212 | activities.append(acts[::seq_stride]) 1213 | vehicle_seq.append([[vid_annots[i]] 1214 | for i in frame_ids][::seq_stride]) 1215 | 1216 | print('Split: %s' % image_set) 1217 | print('Number of pedestrians: %d ' % num_pedestrians) 1218 | print('Total number of samples: %d ' % len(image_seq)) 1219 | 1220 | return {'image': image_seq, 1221 | 'pid': pids_seq, 1222 | 'bbox': box_seq, 1223 | 'center': center_seq, 1224 | 'occlusion': occ_seq, 1225 | 'vehicle_act': vehicle_seq, 1226 | 'intent': intent_seq, 1227 | 'activities': activities, 1228 | 'image_dimension': (img_width, img_height)} 1229 | 1230 | def _get_intention(self, image_set, annotations, **params): 1231 | """ 1232 | Generates intention data. 1233 | :param image_set: Data split to use 1234 | :param annotations: Annotations database 1235 | :param params: Parameters to generate data (see generade_database) 1236 | :return: A dictionary of trajectories 1237 | """ 1238 | print('---------------------------------------------------------') 1239 | print("Generating intention data") 1240 | 1241 | num_pedestrians = 0 1242 | seq_stride = params['fstride'] 1243 | sq_ratio = params['squarify_ratio'] 1244 | height_rng = params['height_rng'] 1245 | image_seq, pids_seq = [], [] 1246 | box_seq, center_seq, occ_seq = [], [], [] 1247 | intent_seq = [] 1248 | video_ids, _pids = self._get_data_ids(image_set, params) 1249 | 1250 | for vid in sorted(video_ids): 1251 | img_width = annotations[vid]['width'] 1252 | pid_annots = annotations[vid]['ped_annotations'] 1253 | for pid in sorted(pid_annots): 1254 | if params['data_split_type'] != 'default' and pid not in _pids: 1255 | continue 1256 | if 'p' in pid: 1257 | continue 1258 | if params['sample_type'] == 'beh' and 'b' not in pid: 1259 | continue 1260 | num_pedestrians += 1 1261 | frame_ids = pid_annots[pid]['frames'] 1262 | 1263 | if params['sample_type'] == 'beh': 1264 | event_frame = pid_annots[pid]['attributes']['decision_point'] 1265 | else: 1266 | event_frame = -1 1267 | 1268 | if event_frame == -1: 1269 | end_idx = -3 1270 | else: 1271 | end_idx = frame_ids.index(event_frame) 1272 | 1273 | boxes = pid_annots[pid]['bbox'][:end_idx + 1] 1274 | frame_ids = frame_ids[: end_idx + 1] 1275 | images = [self._get_image_path(vid, f) for f in frame_ids] 1276 | occlusions = pid_annots[pid]['occlusion'][:end_idx + 1] 1277 | 1278 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 1279 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, 1280 | frame_ids, boxes, 1281 | images, occlusions) 1282 | if len(boxes) / seq_stride < params['min_track_size']: 1283 | continue 1284 | 1285 | if sq_ratio: 1286 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 1287 | 1288 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride]) 1289 | image_seq.append(images[::seq_stride]) 1290 | box_seq.append(boxes[::seq_stride]) 1291 | occ_seq.append(occlusions[::seq_stride]) 1292 | ped_ids = [[pid]] * len(boxes) 1293 | pids_seq.append(ped_ids[::seq_stride]) 1294 | 1295 | if params['sample_type'] == 'all': 1296 | intent = [[0]] * len(boxes) 1297 | else: 1298 | if annotations[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1: 1299 | intent = [[0]] * len(boxes) 1300 | else: 1301 | intent = [[1]] * len(boxes) 1302 | intent_seq.append(intent[::seq_stride]) 1303 | 1304 | print('Split: %s' % image_set) 1305 | print('Number of pedestrians: %d ' % num_pedestrians) 1306 | print('Total number of samples: %d ' % len(image_seq)) 1307 | 1308 | return {'image': image_seq, 1309 | 'pid': pids_seq, 1310 | 'bbox': box_seq, 1311 | 'center': center_seq, 1312 | 'occlusion': occ_seq, 1313 | 'intent': intent_seq} --------------------------------------------------------------------------------