├── lib
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── attention.cpython-36.pyc.140688647036976
    │   ├── models.py
    │   ├── feature_extractor.py
    │   ├── bitrap_np.py
    │   ├── SGNet.py
    │   └── SGNet_CVAE.py
    ├── dataloaders
    │   ├── __init__.py
    │   ├── datasets.py
    │   ├── trajectron.py
    │   ├── ethucy_data_layer.py
    │   ├── pie_data_layer.py
    │   ├── jaad_data_layer.py
    │   └── JAAD_origin.py
    ├── losses
    │   ├── __init__.py
    │   ├── cvae.py
    │   └── rmse.py
    └── utils
    │   ├── __init__.py
    │   ├── data_utils.py
    │   ├── hevi_train_utils.py
    │   ├── ethucy_train_utils.py
    │   ├── jaadpie_train_utils_cvae.py
    │   ├── eval_utils.py
    │   └── ethucy_train_utils_cvae.py
├── configs
    ├── ethucy
    │   ├── __init__.py
    │   ├── ethucy.py
    │   └── ETH_UCY.json
    ├── jaad
    │   ├── __init__.py
    │   └── jaad.py
    ├── pie
    │   ├── __init__.py
    │   └── pie.py
    ├── __init__.py
    └── base_configs.py
├── .gitmodules
├── .gitignore
├── tools
    ├── ethucy
    │   ├── eval_deterministic.py
    │   ├── eval_cvae.py
    │   ├── train_deterministic.py
    │   └── train_cvae.py
    ├── pie
    │   ├── eval_cvae.py
    │   └── train_cvae.py
    └── jaad
    │   ├── eval_cvae.py
    │   └── train_cvae.py
├── SGNet_env.yml
└── README.md


/lib/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import *


--------------------------------------------------------------------------------
/configs/ethucy/__init__.py:
--------------------------------------------------------------------------------
1 | from .ethucy import *


--------------------------------------------------------------------------------
/configs/jaad/__init__.py:
--------------------------------------------------------------------------------
1 | from .jaad import *
2 | 


--------------------------------------------------------------------------------
/configs/pie/__init__.py:
--------------------------------------------------------------------------------
1 | from .pie import *
2 | 


--------------------------------------------------------------------------------
/configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_configs import *
2 | 


--------------------------------------------------------------------------------
/lib/dataloaders/__init__.py:
--------------------------------------------------------------------------------
1 | from .datasets import build_dataset
2 | 


--------------------------------------------------------------------------------
/lib/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .rmse import *
2 | from .cvae import *
3 | 
4 | 


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .eval_utils import *
2 | from .data_utils import *
3 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Trajectron-plus-plus"]
2 | 	path = Trajectron-plus-plus
3 | 	url = https://github.com/StanfordASL/Trajectron-plus-plus.git
4 | 


--------------------------------------------------------------------------------
/lib/models/__pycache__/attention.cpython-36.pyc.140688647036976:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ChuhuaW/SGNet.pytorch/HEAD/lib/models/__pycache__/attention.cpython-36.pyc.140688647036976


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.DS_Store
 2 | *.swp
 3 | *.pth
 4 | *.txt
 5 | *.pyc
 6 | *.err
 7 | *.script
 8 | 
 9 | data/*
10 | =======
11 | data/PIE
12 | data/JAAD
13 | data/HEVI_dataset
14 | data/ETHUCY


--------------------------------------------------------------------------------
/lib/models/models.py:
--------------------------------------------------------------------------------
 1 | from .SGNet import SGNet
 2 | from .SGNet_CVAE import SGNet_CVAE
 3 | 
 4 | _META_ARCHITECTURES = {
 5 |     'SGNet':SGNet,
 6 |     'SGNet_CVAE':SGNet_CVAE,
 7 | }
 8 | 
 9 | 
10 | def build_model(args):
11 |     meta_arch = _META_ARCHITECTURES[args.model]
12 |     return meta_arch(args)
13 | 


--------------------------------------------------------------------------------
/lib/dataloaders/datasets.py:
--------------------------------------------------------------------------------
 1 | from .jaad_data_layer import JAADDataLayer
 2 | from .pie_data_layer import PIEDataLayer
 3 | from .ethucy_data_layer import ETHUCYDataLayer
 4 | 
 5 | def build_dataset(args, phase):
 6 |     print(args.dataset)
 7 |     if args.dataset in ['JAAD']:
 8 |         data_layer = JAADDataLayer
 9 |     elif args.dataset in ['PIE']:
10 |         data_layer = PIEDataLayer
11 |     elif args.dataset in ['ETH', 'HOTEL','UNIV', 'ZARA1', 'ZARA2']:
12 |         data_layer = ETHUCYDataLayer
13 |     return data_layer(args, phase)


--------------------------------------------------------------------------------
/lib/losses/cvae.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def cvae_multi(pred_traj, target, first_history_index = 0):
 4 |         '''
 5 |         CVAE loss use best-of-many
 6 |         '''
 7 |         K = pred_traj.shape[3]
 8 |         
 9 |         target = target.unsqueeze(3).repeat(1, 1, 1, K, 1)
10 |         total_loss = []
11 |         for enc_step in range(first_history_index, pred_traj.size(1)):
12 |             traj_rmse = torch.sqrt(torch.sum((pred_traj[:,enc_step,:,:,:] - target[:,enc_step,:,:,:])**2, dim=-1)).sum(dim=1)
13 |             best_idx = torch.argmin(traj_rmse, dim=1)
14 |             loss_traj = traj_rmse[range(len(best_idx)), best_idx].mean()
15 |             total_loss.append(loss_traj)
16 |         
17 |         return sum(total_loss)/len(total_loss)
18 | 


--------------------------------------------------------------------------------
/configs/base_configs.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | __all__ = ['parse_base_args']
 4 | 
 5 | def parse_base_args():
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument('--checkpoint', default='', type=str)
 8 |     parser.add_argument('--start_epoch', default=1, type=int)
 9 |     parser.add_argument('--gpu', default='0', type=str)
10 |     parser.add_argument('--num_workers', default=8, type=int)
11 |     parser.add_argument('--epochs', default=50, type=int)
12 |     parser.add_argument('--batch_size', default=128, type=int)
13 |     parser.add_argument('--weight_decay', default=5e-04, type=float)
14 |     parser.add_argument('--seed', default=1, type=int)
15 |     parser.add_argument('--phases', default=['train', 'test'], type=list)
16 |     parser.add_argument('--shuffle', default=True, type=bool)
17 |     return parser
18 | 


--------------------------------------------------------------------------------
/lib/losses/rmse.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class rmse_loss(nn.Module):
 6 |     '''
 7 |     Params:
 8 |         x_pred: (batch_size, enc_steps, dec_steps, pred_dim)
 9 |         x_true: (batch_size, enc_steps, dec_steps, pred_dim)
10 |     Returns:
11 |         rmse: scalar, rmse = \sum_{i=1:batch_size}()
12 |     '''
13 |     def __init__(self):
14 |         super(rmse_loss, self).__init__()
15 |     
16 |     def forward(self, x_pred, x_true):
17 |         L2_diff = torch.sqrt(torch.sum((x_pred - x_true)**2, dim=3))
18 |         # sum over prediction time steps
19 |         L2_all_pred = torch.sum(L2_diff, dim=2)
20 |         # mean of each frames predictions
21 |         L2_mean_pred = torch.mean(L2_all_pred, dim=1)
22 |         # sum of all batches
23 |         L2_mean_pred = torch.mean(L2_mean_pred, dim=0)
24 |         return L2_mean_pred


--------------------------------------------------------------------------------
/configs/ethucy/ethucy.py:
--------------------------------------------------------------------------------
 1 | from configs import parse_base_args
 2 | 
 3 | __all__ = ['parse_sgnet_args']
 4 | 
 5 | def parse_sgnet_args():
 6 |     parser = parse_base_args()
 7 |     parser.add_argument('--dataset', default='ETH', type=str)
 8 |     parser.add_argument('--lr', default=5e-04, type=float) # ETH 0.0005，HOTEL 0.0001, UNIV 0.0001, ZARA1 0.0001, ZARA2 0.0001
 9 |     parser.add_argument('--eth_root', default='data/ETHUCY', type=str)
10 |     parser.add_argument('--model', default='SGNet_CVAE', type=str)
11 |     parser.add_argument('--hidden_size', default=512, type=int)
12 |     parser.add_argument('--enc_steps', default=8, type=int)
13 |     parser.add_argument('--dec_steps', default=12, type=int)
14 |     parser.add_argument('--dropout', default=0.5, type=float)
15 |     parser.add_argument('--nu', default=0.0, type=float)
16 |     parser.add_argument('--sigma', default=1.0, type=float) 
17 |     parser.add_argument('--ETH_CONFIG', default='./configs/ethucy/ETH_UCY.json', type=str)
18 |     parser.add_argument('--augment', default=False, type=bool)
19 |     parser.add_argument('--DEC_WITH_Z', default=True, type=bool)
20 |     parser.add_argument('--LATENT_DIM', default=32, type=int)
21 |     parser.add_argument('--pred_dim', default=2, type=int)
22 |     parser.add_argument('--input_dim', default=6, type=int)
23 |     parser.add_argument('--K', default=20, type=int)
24 | 
25 |     return parser.parse_args()


--------------------------------------------------------------------------------
/configs/pie/pie.py:
--------------------------------------------------------------------------------
 1 | from configs import parse_base_args
 2 | 
 3 | __all__ = ['parse_sgnet_args']
 4 | 
 5 | def parse_sgnet_args():
 6 |     parser = parse_base_args()
 7 |     parser.add_argument('--dataset', default='PIE', type=str)
 8 |     parser.add_argument('--lr', default=5e-04, type=float)
 9 |     parser.add_argument('--data_root', default='data/PIE', type=str)
10 |     parser.add_argument('--model', default='SGNet_CVAE', type=str)
11 |     parser.add_argument('--bbox_type', default='cxcywh', type=str)
12 |     parser.add_argument('--normalize', default='zero-one', type=str)
13 |     parser.add_argument('--hidden_size', default=512, type=int)
14 |     parser.add_argument('--enc_steps', default=15, type=int)
15 |     parser.add_argument('--dec_steps', default=45, type=int)
16 |     parser.add_argument('--dropout', default=0.0, type=float)
17 |     parser.add_argument('--nu', default=0.0, type=float)
18 |     parser.add_argument('--sigma', default=1.5, type=float)
19 |     parser.add_argument('--FPS', default=30, type=int)
20 |     parser.add_argument('--min_bbox', default=[0,0,0,0], type=list)
21 |     parser.add_argument('--max_bbox', default=[1920, 1080, 1920, 1080], type=list)
22 |     parser.add_argument('--K', default=20, type=int)
23 |     parser.add_argument('--DEC_WITH_Z', default=True, type=bool)
24 |     parser.add_argument('--LATENT_DIM', default=32, type=int)
25 |     parser.add_argument('--pred_dim', default=4, type=int)
26 |     parser.add_argument('--input_dim', default=4, type=int)
27 | 
28 |     return parser.parse_args()


--------------------------------------------------------------------------------
/configs/jaad/jaad.py:
--------------------------------------------------------------------------------
 1 | from configs import parse_base_args
 2 | 
 3 | __all__ = ['parse_sgnet_args']
 4 | 
 5 | def parse_sgnet_args():
 6 |     parser = parse_base_args()
 7 |     parser.add_argument('--dataset', default='JAAD', type=str)
 8 |     parser.add_argument('--data_root', default='data/JAAD', type=str)
 9 |     parser.add_argument('--lr', default=5e-04, type=float)
10 |     parser.add_argument('--model', default='SGNet_CVAE', type=str)
11 |     parser.add_argument('--bbox_type', default='cxcywh', type=str)
12 |     parser.add_argument('--normalize', default='zero-one', type=str)
13 |     parser.add_argument('--hidden_size', default=512, type=int)
14 |     parser.add_argument('--enc_steps', default=15, type=int)
15 |     parser.add_argument('--dec_steps', default=45, type=int)
16 |     parser.add_argument('--dropout', default=0.0, type=float)
17 |     parser.add_argument('--nu', default=0.0, type=float)
18 |     parser.add_argument('--sigma', default=1.5, type=float)
19 |     parser.add_argument('--FPS', default=30, type=int)
20 |     parser.add_argument('--min_bbox', default=[0,0,0,0], type=list)
21 |     parser.add_argument('--max_bbox', default=[1920, 1080, 1920, 1080], type=list)
22 |     parser.add_argument('--K', default=20, type=int)
23 |     parser.add_argument('--DEC_WITH_Z', default=True, type=bool)
24 |     parser.add_argument('--LATENT_DIM', default=32, type=int)
25 |     parser.add_argument('--pred_dim', default=4, type=int)
26 |     parser.add_argument('--input_dim', default=4, type=int)
27 |     
28 |     
29 | 
30 |     return parser.parse_args()
31 | 


--------------------------------------------------------------------------------
/tools/ethucy/eval_deterministic.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | import time
 6 | import random
 7 | from tqdm import tqdm
 8 | import torch
 9 | from torch import nn, optim
10 | from torch.nn import functional as F
11 | from torch.utils import data
12 | 
13 | 
14 | from configs.ethucy import parse_sgd_args as parse_args
15 | import lib.utils as utl
16 | from lib.models import build_model
17 | from lib.losses import rmse_loss
18 | from lib.utils.ethucy_train_utils import train, val, test
19 | 
20 | 
21 | def main(args):
22 |     this_dir = osp.dirname(__file__)
23 |     model_name = args.model
24 |     save_dir = osp.join(this_dir, 'checkpoints', args.dataset,model_name, str(args.dropout), str(args.seed))
25 |     if not osp.isdir(save_dir):
26 |         os.makedirs(save_dir)
27 | 
28 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
29 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30 |     utl.set_seed(int(args.seed))
31 |     model = build_model(args)
32 |     model = model.to(device)
33 |     if osp.isfile(args.checkpoint):
34 | 
35 |         checkpoint = torch.load(args.checkpoint, map_location=device)
36 |         model.load_state_dict(checkpoint['model_state_dict'])
37 |         del checkpoint
38 | 
39 | 
40 |     criterion = rmse_loss().to(device)
41 | 
42 |     test_gen = utl.build_data_loader(args, 'test', batch_size = 1)
43 |     print("Number of test samples:", test_gen.__len__())
44 | 
45 | 
46 |     # test
47 |     test_loss, ADE_08, FDE_08, ADE_12, FDE_12 = test(model, test_gen, criterion, device)
48 | 
49 | if __name__ == '__main__':
50 |     main(parse_args())
51 | 


--------------------------------------------------------------------------------
/tools/pie/eval_cvae.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | import time
 6 | import random
 7 | from tqdm import tqdm
 8 | import torch
 9 | from torch import nn, optim
10 | from torch.nn import functional as F
11 | from torch.utils import data
12 | 
13 | import lib.utils as utl
14 | from configs.pie import parse_sgd_args as parse_args
15 | from lib.models import build_model
16 | from lib.losses import rmse_loss
17 | from lib.utils.jaadpie_train_utils_cvae import train, val, test
18 | 
19 | def main(args):
20 |     this_dir = osp.dirname(__file__)
21 |     model_name = args.model
22 |     save_dir = osp.join(this_dir, 'checkpoints', model_name, str(args.seed))
23 |     if not osp.isdir(save_dir):
24 |         os.makedirs(save_dir)
25 | 
26 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
27 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
28 |     utl.set_seed(int(args.seed))
29 | 
30 |     model = build_model(args)
31 |     
32 | 
33 |     if osp.isfile(args.checkpoint):
34 |         checkpoint = torch.load(args.checkpoint, map_location=device)
35 |         model.load_state_dict(checkpoint['model_state_dict'],strict=False)
36 |     model = nn.DataParallel(model)
37 |     model = model.to(device)
38 |     criterion = rmse_loss().to(device)
39 |     test_gen = utl.build_data_loader(args, 'test')
40 |     print("Number of test samples:", test_gen.__len__())
41 | 
42 |     # test
43 |     test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE = test(model, test_gen, criterion, device)
44 |     print("MSE_05: %4f;  MSE_10: %4f;  MSE_15: %4f;   FMSE: %4f;   FIOU: %4f\n" % (MSE_05, MSE_10, MSE_15, FMSE, FIOU))
45 |     print("CFMSE: %4f;   CMSE: %4f;  \n" % (CFMSE, CMSE))
46 | 
47 | if __name__ == '__main__':
48 |     main(parse_args())
49 | 


--------------------------------------------------------------------------------
/tools/jaad/eval_cvae.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | import time
 6 | import random
 7 | from tqdm import tqdm
 8 | import torch
 9 | from torch import nn, optim
10 | from torch.nn import functional as F
11 | from torch.utils import data
12 | 
13 | import lib.utils as utl
14 | from configs.jaad import parse_sgnet_args as parse_args
15 | from lib.models import build_model
16 | from lib.losses import rmse_loss
17 | from lib.utils.jaadpie_train_utils_cvae import train, val, test
18 | 
19 | def main(args):
20 |     this_dir = osp.dirname(__file__)
21 |     model_name = args.model
22 |     save_dir = osp.join(this_dir, 'checkpoints', model_name, str(args.seed))
23 |     if not osp.isdir(save_dir):
24 |         os.makedirs(save_dir)
25 | 
26 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
27 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
28 |     utl.set_seed(int(args.seed))
29 | 
30 |     model = build_model(args)
31 |     
32 | 
33 |     if osp.isfile(args.checkpoint):
34 |         checkpoint = torch.load(args.checkpoint, map_location=device)
35 |         model.load_state_dict(checkpoint['model_state_dict'],strict=False)
36 |     model = nn.DataParallel(model)
37 |     model = model.to(device)
38 |     criterion = rmse_loss().to(device)
39 |     test_gen = utl.build_data_loader(args, 'test')
40 |     print("Number of test samples:", test_gen.__len__())
41 | 
42 |     # test
43 |     test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE = test(model, test_gen, criterion, device)
44 |     print("MSE_05: %4f;  MSE_10: %4f;  MSE_15: %4f;   FMSE: %4f;   FIOU: %4f\n" % (MSE_05, MSE_10, MSE_15, FMSE, FIOU))
45 |     print("CFMSE: %4f;   CMSE: %4f;  \n" % (CFMSE, CMSE))
46 | 
47 | if __name__ == '__main__':
48 |     main(parse_args())
49 | 


--------------------------------------------------------------------------------
/tools/ethucy/eval_cvae.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | import time
 6 | import random
 7 | from tqdm import tqdm
 8 | import torch
 9 | from torch import nn, optim
10 | from torch.nn import functional as F
11 | from torch.utils import data
12 | 
13 | import lib.utils as utl
14 | from configs.ethucy import parse_sgd_args as parse_args
15 | from lib.models import build_model
16 | from lib.losses import rmse_loss
17 | from lib.utils.ethucy_train_utils_cvae import train, val, test
18 | 
19 | def main(args):
20 |     this_dir = osp.dirname(__file__)
21 |     model_name = args.model
22 |     save_dir = osp.join(this_dir, 'checkpoints', args.dataset,model_name,str(args.dropout), str(args.seed))
23 |     if not osp.isdir(save_dir):
24 |         os.makedirs(save_dir)
25 | 
26 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
27 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
28 |     utl.set_seed(int(args.seed))
29 |     model = build_model(args)
30 | 
31 |     model = nn.DataParallel(model)
32 |     model = model.to(device)
33 |     if osp.isfile(args.checkpoint):
34 | 
35 |         checkpoint = torch.load(args.checkpoint, map_location=device)
36 |         model.load_state_dict(checkpoint['model_state_dict'], strict=False)
37 |         del checkpoint
38 | 
39 |     criterion = rmse_loss().to(device)
40 | 
41 |     test_gen = utl.build_data_loader(args, 'test', batch_size = 1)
42 |     print("Number of test samples:", test_gen.__len__())
43 | 
44 | 
45 | 
46 | 
47 | 
48 |     # test
49 |     test_loss, ADE_08, FDE_08, ADE_12, FDE_12 = test(model, test_gen, criterion, device)
50 |     print("Test Loss: {:.4f}".format(test_loss))
51 |     print("ADE_08: %4f;  FDE_08: %4f;  ADE_12: %4f;   FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12))
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     main(parse_args())
56 | 


--------------------------------------------------------------------------------
/lib/models/feature_extractor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torchvision import datasets, transforms, models
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | class JAADFeatureExtractor(nn.Module):
 8 | 
 9 |     def __init__(self, args):
10 |         super(JAADFeatureExtractor, self).__init__()
11 |         self.embbed_size = args.hidden_size
12 |         self.box_embed = nn.Sequential(nn.Linear(4, self.embbed_size), 
13 |                                         nn.ReLU()) 
14 |     def forward(self, inputs):
15 |         box_input = inputs
16 |         embedded_box_input= self.box_embed(box_input)
17 | 
18 |         return embedded_box_input
19 | 
20 | class ETHUCYFeatureExtractor(nn.Module):
21 | 
22 |     def __init__(self, args):
23 |         super(ETHUCYFeatureExtractor, self).__init__()
24 |         self.embbed_size = args.hidden_size
25 |         self.embed = nn.Sequential(nn.Linear(6, self.embbed_size), 
26 |                                         nn.ReLU()) 
27 | 
28 | 
29 |     def forward(self, inputs):
30 |         box_input = inputs
31 | 
32 |         embedded_box_input= self.embed(box_input)
33 | 
34 |         return embedded_box_input
35 | 
36 | class PIEFeatureExtractor(nn.Module):
37 | 
38 |     def __init__(self, args):
39 |         super(PIEFeatureExtractor, self).__init__()
40 | 
41 |         self.embbed_size = args.hidden_size
42 |         self.box_embed = nn.Sequential(nn.Linear(4, self.embbed_size), 
43 |                                         nn.ReLU()) 
44 |     def forward(self, inputs):
45 |         box_input = inputs
46 |         embedded_box_input= self.box_embed(box_input)
47 |         return embedded_box_input
48 | 
49 | _FEATURE_EXTRACTORS = {
50 |     'PIE': PIEFeatureExtractor,
51 |     'JAAD': JAADFeatureExtractor,
52 |     'ETH': ETHUCYFeatureExtractor,
53 |     'HOTEL': ETHUCYFeatureExtractor,
54 |     'UNIV': ETHUCYFeatureExtractor,
55 |     'ZARA1': ETHUCYFeatureExtractor,
56 |     'ZARA2': ETHUCYFeatureExtractor,
57 | }
58 | 
59 | def build_feature_extractor(args):
60 |     func = _FEATURE_EXTRACTORS[args.dataset]
61 |     return func(args)
62 | 


--------------------------------------------------------------------------------
/tools/pie/train_cvae.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import torch
 4 | from torch import nn, optim
 5 | 
 6 | import lib.utils as utl
 7 | from configs.pie import parse_sgnet_args as parse_args
 8 | from lib.models import build_model
 9 | from lib.losses import rmse_loss
10 | from lib.utils.jaadpie_train_utils_cvae import train, val, test
11 | 
12 | def main(args):
13 |     this_dir = osp.dirname(__file__)
14 |     model_name = args.model
15 |     save_dir = osp.join(this_dir, 'checkpoints', model_name, str(args.seed))
16 |     if not osp.isdir(save_dir):
17 |         os.makedirs(save_dir)
18 | 
19 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
20 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
21 |     utl.set_seed(int(args.seed))
22 | 
23 | 
24 |     model = build_model(args)
25 |     model = nn.DataParallel(model)
26 |     model = model.to(device)
27 | 
28 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
29 |     lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5,
30 |                                                             min_lr=1e-10, verbose=1)
31 |     if osp.isfile(args.checkpoint):
32 |         checkpoint = torch.load(args.checkpoint, map_location=device)
33 |         model.load_state_dict(checkpoint['model_state_dict'])
34 |         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
35 |         args.start_epoch += checkpoint['epoch']
36 | 
37 |     criterion = rmse_loss().to(device)
38 | 
39 |     train_gen = utl.build_data_loader(args, 'train')
40 |     val_gen = utl.build_data_loader(args, 'val')
41 |     test_gen = utl.build_data_loader(args, 'test')
42 |     print("Number of validation samples:", val_gen.__len__())
43 |     print("Number of test samples:", test_gen.__len__())
44 | 
45 | 
46 | 
47 |     # train
48 |     min_loss = 1e6
49 |     min_MSE_15 = 10e5
50 |     best_model = None
51 |     best_model_metric = None
52 | 
53 |     for epoch in range(args.start_epoch, args.epochs+args.start_epoch):
54 |         print("Number of training samples:", len(train_gen))
55 | 
56 |         # train
57 |         train_goal_loss, train_cvae_loss, train_KLD_loss = train(model, train_gen, criterion, optimizer, device)
58 |         print('Train Epoch: {} \t Goal loss: {:.4f}\t CVAE loss: {:.4f}\t KLD loss: {:.4f}'.format(
59 |                 epoch, train_goal_loss, train_cvae_loss, train_KLD_loss))
60 | 
61 | 
62 |         # val
63 |         val_loss = val(model, val_gen, criterion, device)
64 |         lr_scheduler.step(val_loss)
65 | 
66 | 
67 |         # test
68 |         test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE = test(model, test_gen, criterion, device)
69 |         print("Test Loss: {:.4f}".format(test_loss))
70 |         print("MSE_05: %4f;  MSE_10: %4f;  MSE_15: %4f\n" % (MSE_05, MSE_10, MSE_15))
71 | 
72 | if __name__ == '__main__':
73 |     main(parse_args())
74 | 


--------------------------------------------------------------------------------
/tools/ethucy/train_deterministic.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | import time
 6 | import random
 7 | from tqdm import tqdm
 8 | import torch
 9 | from torch import nn, optim
10 | from torch.nn import functional as F
11 | from torch.utils import data
12 | 
13 | 
14 | from configs.ethucy import parse_sgnet_args as parse_args
15 | import lib.utils as utl
16 | from lib.models import build_model
17 | from lib.losses import rmse_loss
18 | from lib.utils.ethucy_train_utils import train, val, test
19 | 
20 | 
21 | def main(args):
22 |     this_dir = osp.dirname(__file__)
23 |     model_name = args.model
24 |     save_dir = osp.join(this_dir, 'checkpoints', args.dataset, model_name, str(args.seed))
25 |     if not osp.isdir(save_dir):
26 |         os.makedirs(save_dir)
27 | 
28 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
29 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
30 |     utl.set_seed(int(args.seed))
31 |     model = build_model(args)
32 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
33 |     lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5,
34 |                                                            min_lr=1e-10, verbose=1)
35 |     model = model.to(device)
36 |     if osp.isfile(args.checkpoint):
37 |         checkpoint = torch.load(args.checkpoint, map_location=device)
38 |         model.load_state_dict(checkpoint['model_state_dict'])
39 |         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
40 |         args.start_epoch += checkpoint['epoch']
41 |         del checkpoint
42 | 
43 | 
44 |     criterion = rmse_loss().to(device)
45 | 
46 |     train_gen = utl.build_data_loader(args, 'train', batch_size = 1)
47 |     val_gen = utl.build_data_loader(args, 'val', batch_size = 1)
48 |     test_gen = utl.build_data_loader(args, 'test', batch_size = 1)
49 |     print("Number of validation samples:", val_gen.__len__())
50 |     print("Number of test samples:", test_gen.__len__())
51 |     # train
52 |     min_loss = 1e6
53 |     min_ADE_08 = 10e5
54 |     min_FDE_08 = 10e5
55 |     min_ADE_12 = 10e5
56 |     min_FDE_12 = 10e5
57 |     best_model = None
58 |     best_model_metric = None
59 | 
60 | 
61 |     for epoch in range(args.start_epoch, args.epochs+args.start_epoch):
62 | 
63 |         train_goal_loss, train_dec_loss, total_train_loss = train(model, train_gen, criterion, optimizer, device)
64 | 
65 |         print('Train Epoch: {} \t Goal loss: {:.4f}\t Decoder loss: {:.4f}\t Total: {:.4f}'.format(
66 |                 epoch, train_goal_loss, train_dec_loss, total_train_loss))
67 | 
68 | 
69 | 
70 |         # val
71 |         val_loss = val(model, val_gen, criterion, device)
72 |         # lr_scheduler.step(val_loss)
73 | 
74 | 
75 |         # test
76 |         test_loss, ADE_08, FDE_08, ADE_12, FDE_12 = test(model, test_gen, criterion, device)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     main(parse_args())
81 | 


--------------------------------------------------------------------------------
/tools/jaad/train_cvae.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | import time
 6 | import random
 7 | from tqdm import tqdm
 8 | import torch
 9 | from torch import nn, optim
10 | from torch.nn import functional as F
11 | from torch.utils import data
12 | 
13 | import lib.utils as utl
14 | from configs.jaad import parse_sgnet_args as parse_args
15 | from lib.models import build_model
16 | from lib.losses import rmse_loss
17 | from lib.utils.jaadpie_train_utils_cvae import train, val, test
18 | 
19 | def main(args):
20 |     this_dir = osp.dirname(__file__)
21 |     model_name = args.model
22 |     save_dir = osp.join(this_dir, 'checkpoints', model_name, str(args.seed))
23 |     if not osp.isdir(save_dir):
24 |         os.makedirs(save_dir)
25 | 
26 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
27 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
28 |     utl.set_seed(int(args.seed))
29 | 
30 | 
31 |     model = build_model(args)
32 |     model = nn.DataParallel(model)
33 |     model = model.to(device)
34 | 
35 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
36 |     lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5,
37 |                                                             min_lr=1e-10, verbose=1)
38 |     if osp.isfile(args.checkpoint):
39 |         checkpoint = torch.load(args.checkpoint, map_location=device)
40 |         model.load_state_dict(checkpoint['model_state_dict'])
41 |         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
42 |         args.start_epoch += checkpoint['epoch']
43 | 
44 |     criterion = rmse_loss().to(device)
45 | 
46 |     train_gen = utl.build_data_loader(args, 'train')
47 |     val_gen = utl.build_data_loader(args, 'val')
48 |     test_gen = utl.build_data_loader(args, 'test')
49 |     print("Number of validation samples:", val_gen.__len__())
50 |     print("Number of test samples:", test_gen.__len__())
51 | 
52 | 
53 | 
54 |     # train
55 |     min_loss = 1e6
56 |     min_MSE_15 = 10e5
57 |     best_model = None
58 |     best_model_metric = None
59 | 
60 |     for epoch in range(args.start_epoch, args.epochs+args.start_epoch):
61 |         print("Number of training samples:", len(train_gen))
62 | 
63 |         # train
64 |         train_goal_loss, train_cvae_loss, train_KLD_loss = train(model, train_gen, criterion, optimizer, device)
65 |         # print('Train Epoch: ', epoch, 'Goal loss: ', train_goal_loss, 'Decoder loss: ', train_dec_loss, 'CVAE loss: ', train_cvae_loss, \
66 |         #     'KLD loss: ', train_KLD_loss, 'Total: ', total_train_loss) 
67 |         print('Train Epoch: {} \t Goal loss: {:.4f}\t CVAE loss: {:.4f}\t KLD loss: {:.4f}'.format(
68 |                 epoch, train_goal_loss, train_cvae_loss, train_KLD_loss))
69 | 
70 | 
71 |         # val
72 |         val_loss = val(model, val_gen, criterion, device)
73 |         lr_scheduler.step(val_loss)
74 | 
75 | 
76 |         # test
77 |         test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE = test(model, test_gen, criterion, device)
78 |         print("Test Loss: {:.4f}".format(test_loss))
79 |         print("MSE_05: %4f;  MSE_10: %4f;  MSE_15: %4f\n" % (MSE_05, MSE_10, MSE_15))
80 | 
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     main(parse_args())
85 | 


--------------------------------------------------------------------------------
/tools/ethucy/train_cvae.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import os.path as osp
 4 | import numpy as np
 5 | import time
 6 | import random
 7 | from tqdm import tqdm
 8 | import torch
 9 | from torch import nn, optim
10 | from torch.nn import functional as F
11 | from torch.utils import data
12 | 
13 | import lib.utils as utl
14 | from configs.ethucy import parse_sgnet_args as parse_args
15 | from lib.models import build_model
16 | from lib.losses import rmse_loss
17 | from lib.utils.ethucy_train_utils_cvae import train, val, test
18 | 
19 | def main(args):
20 |     this_dir = osp.dirname(__file__)
21 |     model_name = args.model
22 |     save_dir = osp.join(this_dir, 'checkpoints', args.dataset,model_name,str(args.dropout), str(args.seed))
23 |     if not osp.isdir(save_dir):
24 |         os.makedirs(save_dir)
25 | 
26 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
27 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
28 |     utl.set_seed(int(args.seed))
29 |     model = build_model(args)
30 |     optimizer = optim.Adam(model.parameters(), lr=args.lr)
31 |     lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.2, patience=5,
32 |                                                            min_lr=1e-10, verbose=1)
33 |     model = nn.DataParallel(model)
34 |     model = model.to(device)
35 |     if osp.isfile(args.checkpoint):
36 | 
37 |         checkpoint = torch.load(args.checkpoint, map_location=device)
38 |         model.load_state_dict(checkpoint['model_state_dict'])
39 |         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
40 |         args.start_epoch += checkpoint['epoch']
41 |         del checkpoint
42 | 
43 |     criterion = rmse_loss().to(device)
44 | 
45 |     train_gen = utl.build_data_loader(args, 'train', batch_size = 1)
46 |     val_gen = utl.build_data_loader(args, 'val', batch_size = 1)
47 |     test_gen = utl.build_data_loader(args, 'test', batch_size = 1)
48 |     print("Number of validation samples:", val_gen.__len__())
49 |     print("Number of test samples:", test_gen.__len__())
50 | 
51 | 
52 | 
53 |     # train
54 |     min_loss = 1e6
55 |     min_ADE_08 = 10e5
56 |     min_FDE_08 = 10e5
57 |     min_ADE_12 = 10e5
58 |     min_FDE_12 = 10e5
59 |     best_model = None
60 |     best_model_metric = None
61 | 
62 |     for epoch in range(args.start_epoch, args.epochs+args.start_epoch):
63 |         print("Number of training samples:", len(train_gen))
64 | 
65 |         # train
66 |         train_goal_loss, train_cvae_loss, train_KLD_loss = train(model, train_gen, criterion, optimizer, device)
67 |         # print('Train Epoch: ', epoch, 'Goal loss: ', train_goal_loss, 'Decoder loss: ', train_dec_loss, 'CVAE loss: ', train_cvae_loss, \
68 |         #     'KLD loss: ', train_KLD_loss, 'Total: ', total_train_loss) 
69 |         print('Train Epoch: {} \t Goal loss: {:.4f}\t  CVAE loss: {:.4f}\t KLD loss: {:.4f}\t Total: {:.4f}'.format(
70 |                 epoch,train_goal_loss, train_cvae_loss, train_KLD_loss, train_goal_loss + train_cvae_loss + train_KLD_loss ))
71 | 
72 | 
73 |         # val
74 |         val_loss = val(model, val_gen, criterion, device)
75 |         lr_scheduler.step(val_loss)
76 | 
77 | 
78 |         # test
79 |         test_loss, ADE_08, FDE_08, ADE_12, FDE_12 = test(model, test_gen, criterion, device)
80 |         print("Test Loss: {:.4f}".format(test_loss))
81 |         print("ADE_08: %4f;  FDE_08: %4f;  ADE_12: %4f;   FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12))
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     main(parse_args())
86 | 


--------------------------------------------------------------------------------
/configs/ethucy/ETH_UCY.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "batch_size": 256,
  3 |     "grad_clip": 1.0,
  4 |     "learning_rate_style": "exp",
  5 |     "learning_rate": 0.001,
  6 |     "min_learning_rate": 1e-05,
  7 |     "learning_decay_rate": 0.9999,
  8 |     "prediction_horizon": 12,
  9 |     "minimum_history_length": 7,
 10 |     "maximum_history_length": 7,
 11 |     "map_encoder": {
 12 |         "PEDESTRIAN": {
 13 |             "heading_state_index": 6,
 14 |             "patch_size": [
 15 |                 50,
 16 |                 10,
 17 |                 50,
 18 |                 90
 19 |             ],
 20 |             "map_channels": 3,
 21 |             "hidden_channels": [
 22 |                 10,
 23 |                 20,
 24 |                 10,
 25 |                 1
 26 |             ],
 27 |             "output_size": 32,
 28 |             "masks": [
 29 |                 5,
 30 |                 5,
 31 |                 5,
 32 |                 5
 33 |             ],
 34 |             "strides": [
 35 |                 1,
 36 |                 1,
 37 |                 1,
 38 |                 1
 39 |             ],
 40 |             "dropout": 0.5
 41 |         }
 42 |     },
 43 |     "k": 1,
 44 |     "k_eval": 25,
 45 |     "kl_min": 0.07,
 46 |     "kl_weight": 100.0,
 47 |     "kl_weight_start": 0,
 48 |     "kl_decay_rate": 0.99995,
 49 |     "kl_crossover": 400,
 50 |     "kl_sigmoid_divisor": 4,
 51 |     "rnn_kwargs": {
 52 |         "dropout_keep_prob": 0.75
 53 |     },
 54 |     "MLP_dropout_keep_prob": 0.9,
 55 |     "enc_rnn_dim_edge": 32,
 56 |     "enc_rnn_dim_edge_influence": 32,
 57 |     "enc_rnn_dim_history": 32,
 58 |     "enc_rnn_dim_future": 32,
 59 |     "dec_rnn_dim": 128,
 60 |     "q_z_xy_MLP_dims": null,
 61 |     "p_z_x_MLP_dims": 32,
 62 |     "GMM_components": 1,
 63 |     "log_p_yt_xz_max": 6,
 64 |     "N": 1,
 65 |     "K": 25,
 66 |     "tau_init": 2.0,
 67 |     "tau_final": 0.05,
 68 |     "tau_decay_rate": 0.997,
 69 |     "use_z_logit_clipping": true,
 70 |     "z_logit_clip_start": 0.05,
 71 |     "z_logit_clip_final": 5.0,
 72 |     "z_logit_clip_crossover": 300,
 73 |     "z_logit_clip_divisor": 5,
 74 |     "dynamic": {
 75 |         "PEDESTRIAN": {
 76 |             "name": "SingleIntegrator",
 77 |             "distribution": true,
 78 |             "limits": {}
 79 |         }
 80 |     },
 81 |     "state": {
 82 |         "PEDESTRIAN": {
 83 |             "position": [
 84 |                 "x",
 85 |                 "y"
 86 |             ],
 87 |             "velocity": [
 88 |                 "x",
 89 |                 "y"
 90 |             ],
 91 |             "acceleration": [
 92 |                 "x",
 93 |                 "y"
 94 |             ]
 95 |         }
 96 |     },
 97 |     "pred_state": {
 98 |         "PEDESTRIAN": {
 99 |             "position": [
100 |                 "x",
101 |                 "y"
102 |             ]
103 |         }
104 |     },
105 |     "log_histograms": false,
106 |     "dynamic_edges": "yes",
107 |     "edge_state_combine_method": "sum",
108 |     "edge_influence_combine_method": "attention",
109 |     "edge_addition_filter": [
110 |         0.25,
111 |         0.5,
112 |         0.75,
113 |         1.0
114 |     ],
115 |     "edge_removal_filter": [
116 |         1.0,
117 |         0.0
118 |     ],
119 |     "offline_scene_graph": "yes",
120 |     "incl_robot_node": false,
121 |     "node_freq_mult_train": false,
122 |     "node_freq_mult_eval": false,
123 |     "scene_freq_mult_train": false,
124 |     "scene_freq_mult_eval": false,
125 |     "scene_freq_mult_viz": false,
126 |     "edge_encoding": true,
127 |     "use_map_encoding": false,
128 |     "augment": true,
129 |     "override_attention_radius": []
130 | }


--------------------------------------------------------------------------------
/lib/utils/data_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import pickle as pkl
  4 | import os
  5 | import copy
  6 | import torch
  7 | import torch.utils.data as data
  8 | from lib.dataloaders import build_dataset
  9 | 
 10 | def set_seed(seed):
 11 |     random.seed(seed)
 12 |     os.environ['PYTHONHASHSEED'] = str(seed)
 13 |     np.random.seed(seed)
 14 |     torch.manual_seed(seed)
 15 |     if torch.cuda.is_available():
 16 |         torch.cuda.manual_seed(seed)
 17 |         torch.cuda.manual_seed_all(seed)
 18 |     torch.backends.cudnn.benchmark = False
 19 |     torch.backends.cudnn.deterministic = True
 20 | 
 21 | 
 22 | 
 23 | def build_data_loader(args, phase='train',batch_size=None):
 24 |     data_loaders = data.DataLoader(
 25 |         dataset=build_dataset(args, phase),
 26 |         batch_size=args.batch_size if batch_size is None else batch_size,
 27 |         shuffle=phase=='train',
 28 |         num_workers=args.num_workers,
 29 |         collate_fn=my_collate_fn if batch_size is not None else None)
 30 | 
 31 |     return data_loaders
 32 | 
 33 | def my_collate_fn(batch):
 34 |     return batch[0]
 35 | 
 36 | def cxcywh_to_x1y1x2y2(boxes):
 37 |     '''
 38 |     Params:
 39 |         boxes:(Cx, Cy, w, h)
 40 |     Returns:
 41 |         (x1, y1, x2, y2 or tlbr
 42 |     '''
 43 |     new_boxes = np.zeros_like(boxes)
 44 |     new_boxes[...,0] = boxes[...,0] - boxes[...,2]/2
 45 |     new_boxes[...,1] = boxes[...,1] - boxes[...,3]/2
 46 |     new_boxes[...,2] = boxes[...,0] + boxes[...,2]/2
 47 |     new_boxes[...,3] = boxes[...,1] + boxes[...,3]/2
 48 |     return new_boxes
 49 | 
 50 | 
 51 | def bbox_normalize(bbox,W=1280,H=640):
 52 |     '''
 53 |     normalize bbox value to [0,1]
 54 |     :Params:
 55 |         bbox: [cx, cy, w, h] with size (times, 4), value from 0 to W or H
 56 |     :Return:
 57 |         bbox: [cx, cy, w, h] with size (times, 4), value from 0 to 1
 58 |     '''
 59 |     new_bbox = copy.deepcopy(bbox)
 60 |     new_bbox[:,0] /= W
 61 |     new_bbox[:,1] /= H
 62 |     new_bbox[:,2] /= W
 63 |     new_bbox[:,3] /= H
 64 |     
 65 |     return new_bbox
 66 | 
 67 | def bbox_denormalize(bbox,W=1280,H=640):
 68 |     '''
 69 |     normalize bbox value to [0,1]
 70 |     :Params:
 71 |         bbox: [cx, cy, w, h] with size (times, 4), value from 0 to 1
 72 |     :Return:
 73 |         bbox: [cx, cy, w, h] with size (times, 4), value from 0 to W or H
 74 |     '''
 75 |     new_bbox = copy.deepcopy(bbox)
 76 |     new_bbox[..., 0] *= W
 77 |     new_bbox[..., 1] *= H
 78 |     new_bbox[..., 2] *= W
 79 |     new_bbox[..., 3] *= H
 80 |     
 81 |     return new_bbox
 82 | 
 83 | 
 84 | # FLow loading code adapted from:
 85 | # http://stackoverflow.com/questions/28013200/reading-middlebury-flow-files-with-python-bytes-array-numpy
 86 | 
 87 | def load_flow(flow_folder):
 88 |     '''
 89 |     Given video key, load the corresponding flow file
 90 |     '''
 91 |     flow_files = sorted(glob.glob(flow_folder + '*.flo'))
 92 |     flows = []
 93 |     for file in flow_files:
 94 |         flow = read_flo(file)
 95 |         flows.append(flow)
 96 |     return flows
 97 | 
 98 | TAG_FLOAT = 202021.25
 99 | 
100 | def read_flo(file):
101 |     assert type(file) is str, "file is not str %r" % str(file)
102 |     assert os.path.isfile(file) is True, "file does not exist %r" % str(file)
103 |     assert file[-4:] == '.flo', "file ending is not .flo %r" % file[-4:]
104 |     f = open(file,'rb')
105 |     flo_number = np.fromfile(f, np.float32, count=1)[0]
106 |     assert flo_number == TAG_FLOAT, 'Flow number %r incorrect. Invalid .flo file' % flo_number
107 |     w = int(np.fromfile(f, np.int32, count=1))
108 |     h = int(np.fromfile(f, np.int32, count=1))
109 |     #if error try: data = np.fromfile(f, np.float32, count=2*w[0]*h[0])
110 |     data = np.fromfile(f, np.float32, count=2*w*h)
111 |     # Reshape data into 3D array (columns, rows, bands)
112 |     flow = np.resize(data, (int(h), int(w), 2))	
113 |     f.close()
114 | 
115 |     return flow
116 | 
117 | 


--------------------------------------------------------------------------------
/SGNet_env.yml:
--------------------------------------------------------------------------------
  1 | name: SGNet
  2 | channels:
  3 |   - pytorch
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex=0.1=main
  7 |   - backcall=0.2.0=py_0
  8 |   - blas=1.0=mkl
  9 |   - bzip2=1.0.8=h7b6447c_0
 10 |   - ca-certificates=2020.12.8=h06a4308_0
 11 |   - cairo=1.14.12=h8948797_3
 12 |   - certifi=2020.12.5=py36h06a4308_0
 13 |   - cudatoolkit=11.0.221=h6bb024c_0
 14 |   - cycler=0.10.0=py36_0
 15 |   - dataclasses=0.7=py36_0
 16 |   - dbus=1.13.18=hb2f20db_0
 17 |   - decorator=4.4.2=py_0
 18 |   - dill=0.3.3=pyhd3eb1b0_0
 19 |   - expat=2.2.10=he6710b0_2
 20 |   - ffmpeg=4.0=hcdf2ecd_0
 21 |   - fontconfig=2.13.0=h9420a91_0
 22 |   - freeglut=3.0.0=hf484d3e_5
 23 |   - freetype=2.10.4=h5ab3b9f_0
 24 |   - glib=2.66.1=h92f7085_0
 25 |   - graphite2=1.3.14=h23475e2_0
 26 |   - gst-plugins-base=1.14.0=h8213a91_2
 27 |   - gstreamer=1.14.0=h28cd5cc_2
 28 |   - harfbuzz=1.8.8=hffaf4a1_0
 29 |   - hdf5=1.10.2=hba1933b_1
 30 |   - icu=58.2=he6710b0_3
 31 |   - intel-openmp=2020.2=254
 32 |   - ipykernel=5.3.4=py36h5ca1d4c_0
 33 |   - ipython=7.16.1=py36h5ca1d4c_0
 34 |   - ipython_genutils=0.2.0=pyhd3eb1b0_1
 35 |   - jasper=2.0.14=h07fcdf6_1
 36 |   # - jedi=0.18.0=py36h06a4308_0
 37 |   - joblib=1.0.0=pyhd3eb1b0_0
 38 |   - jpeg=9b=h024ee3a_2
 39 |   - jupyter_client=6.1.7=py_0
 40 |   - jupyter_core=4.7.0=py36h06a4308_0
 41 |   - kiwisolver=1.3.0=py36h2531618_0
 42 |   - lcms2=2.11=h396b838_0
 43 |   - ld_impl_linux-64=2.33.1=h53a641e_7
 44 |   - libedit=3.1.20191231=h14c3975_1
 45 |   - libffi=3.3=he6710b0_2
 46 |   - libgcc-ng=9.1.0=hdf63c60_0
 47 |   - libgfortran-ng=7.3.0=hdf63c60_0
 48 |   - libglu=9.0.0=hf484d3e_1
 49 |   - libopencv=3.4.2=hb342d67_1
 50 |   - libopus=1.3.1=h7b6447c_0
 51 |   - libpng=1.6.37=hbc83047_0
 52 |   - libsodium=1.0.18=h7b6447c_0
 53 |   - libstdcxx-ng=9.1.0=hdf63c60_0
 54 |   - libtiff=4.1.0=h2733197_1
 55 |   - libuuid=1.0.3=h1bed415_2
 56 |   - libuv=1.40.0=h7b6447c_0
 57 |   - libvpx=1.7.0=h439df22_0
 58 |   - libxcb=1.14=h7b6447c_0
 59 |   - libxml2=2.9.10=hb55368b_3
 60 |   - lz4-c=1.9.2=heb0550a_3
 61 |   - matplotlib=3.3.2=h06a4308_0
 62 |   - matplotlib-base=3.3.2=py36h817c723_0
 63 |   - mkl=2020.2=256
 64 |   - mkl-service=2.3.0=py36he8ac12f_0
 65 |   - mkl_fft=1.2.0=py36h23d657b_0
 66 |   - mkl_random=1.1.1=py36h0573a6f_0
 67 |   - ncurses=6.2=he6710b0_1
 68 |   - ninja=1.10.2=py36hff7bd54_0
 69 |   - numpy=1.19.2=py36h54aff64_0
 70 |   - numpy-base=1.19.2=py36hfa32c7d_0
 71 |   - olefile=0.46=py36_0
 72 |   - opencv=3.4.2=py36h6fd60c2_1
 73 |   - openssl=1.1.1i=h27cfd23_0
 74 |   - pandas=1.1.5=py36ha9443f7_0
 75 |   - parso=0.7.0=py_0
 76 |   - pcre=8.44=he6710b0_0
 77 |   - pexpect=4.8.0=pyhd3eb1b0_3
 78 |   - pickleshare=0.7.5=pyhd3eb1b0_1003
 79 |   - pillow=8.0.1=py36he98fc37_0
 80 |   - pip=20.3.3=py36h06a4308_0
 81 |   - pixman=0.40.0=h7b6447c_0
 82 |   - prompt-toolkit=3.0.8=py_0
 83 |   - ptyprocess=0.7.0=pyhd3eb1b0_2
 84 |   - py-opencv=3.4.2=py36hb342d67_1
 85 |   - pygments=2.7.4=pyhd3eb1b0_0
 86 |   - pyparsing=2.4.7=py_0
 87 |   - pyqt=5.9.2=py36h05f1152_2
 88 |   - python=3.6.12=hcff3b4d_2
 89 |   - python-dateutil=2.8.1=py_0
 90 |   - pytorch=1.7.1=py3.6_cuda11.0.221_cudnn8.0.5_0
 91 |   - pytz=2020.5=pyhd3eb1b0_0
 92 |   - pyzmq=20.0.0=py36h2531618_1
 93 |   - qt=5.9.7=h5867ecd_1
 94 |   - readline=8.0=h7b6447c_0
 95 |   - scikit-learn=0.23.2=py36h0573a6f_0
 96 |   - scipy=1.5.2=py36h0b6359f_0
 97 |   - setuptools=51.0.0=py36h06a4308_2
 98 |   - sip=4.19.8=py36hf484d3e_0
 99 |   - six=1.15.0=py36h06a4308_0
100 |   - sqlite=3.33.0=h62c20be_0
101 |   - threadpoolctl=2.1.0=pyh5ca1d4c_0
102 |   - tk=8.6.10=hbc83047_0
103 |   - torchaudio=0.7.2=py36
104 |   - torchvision=0.8.2=py36_cu110
105 |   - tornado=6.1=py36h27cfd23_0
106 |   - tqdm=4.54.1=pyhd3eb1b0_0
107 |   - traitlets=4.3.3=py36_0
108 |   - typing_extensions=3.7.4.3=py_0
109 |   - wcwidth=0.2.5=py_0
110 |   - wheel=0.36.2=pyhd3eb1b0_0
111 |   - xz=5.2.5=h7b6447c_0
112 |   - zeromq=4.3.3=he6710b0_3
113 |   - zlib=1.2.11=h7b6447c_3
114 |   - zstd=1.4.5=h9ceee32_0
115 |   - pip:
116 |     - ncls==0.0.51
117 |     - orjson==3.4.7
118 | 
119 | 


--------------------------------------------------------------------------------
/lib/dataloaders/trajectron.py:
--------------------------------------------------------------------------------
 1 | from torch.utils import data
 2 | import numpy as np
 3 | import random 
 4 | import torch
 5 | from copy import deepcopy
 6 | 
 7 | class NodeTypeDataset(data.Dataset):
 8 |     def __init__(self, env, node_type, state, pred_state, node_freq_mult,
 9 |                  scene_freq_mult, hyperparams, augment=False, **kwargs):
10 |         self.env = env
11 |         self.state = state
12 |         self.pred_state = pred_state
13 |         self.hyperparams = hyperparams
14 |         self.max_ht = self.hyperparams['maximum_history_length']
15 |         self.max_ft = kwargs['min_future_timesteps']
16 | 
17 |         self.augment = augment
18 | 
19 |         self.node_type = node_type
20 |         self.edge_types = [edge_type for edge_type in env.get_edge_types() if edge_type[0] is node_type]
21 |         self.index = self.index_env(node_freq_mult, scene_freq_mult, **kwargs)
22 |         self.len = len(self.index)
23 |         
24 |         # print(self.edge_types)
25 | 
26 |     def index_env(self, node_freq_mult, scene_freq_mult, **kwargs):
27 |         index = list()
28 |         for scene in self.env.scenes:
29 |             present_node_dict = scene.present_nodes(np.arange(0, scene.timesteps), type=self.node_type, **kwargs)
30 |             for t, nodes in present_node_dict.items():
31 |                 for node in nodes:
32 |                     valid = True
33 |                     data = [(scene, t, node)] *\
34 |                              (scene.frequency_multiplier if scene_freq_mult else 1) *\
35 |                              (node.frequency_multiplier if node_freq_mult else 1)
36 |                     (scene, t, node) = data[0]
37 |                     if self.augment:
38 |                         scene = scene.augment()
39 |                         node = scene.get_node_by_id(node.id)
40 |                     first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name, timestep = get_node_timestep_data(self.env, scene, t, node, self.state, self.pred_state,\
41 |                                       self.edge_types, self.max_ht, self.max_ft, self.hyperparams)
42 | 
43 |                     all_t = torch.cat((x_t[:,:2], y_t),dim=0)
44 |                     if valid:
45 |                         index += [ (first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name, timestep)]
46 |                     else: 
47 |                         pass
48 |         return index
49 | 
50 |     def __len__(self):
51 |         return self.len
52 | 
53 |     def __getitem__(self, i):
54 |         (first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name, timestep) = self.index[i]
55 |         return first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name, timestep
56 | 
57 | 
58 | def get_node_timestep_data(env, scene, t, node, state, pred_state,
59 |                            edge_types, max_ht, max_ft, hyperparams,
60 |                            scene_graph=None):
61 |     """
62 |     Pre-processes the data for a single batch element: node state over time for a specific time in a specific scene
63 |     as well as the neighbour data for it.
64 | 
65 |     :param env: Environment
66 |     :param scene: Scene
67 |     :param t: Timestep in scene
68 |     :param node: Node
69 |     :param state: Specification of the node state
70 |     :param pred_state: Specification of the prediction state
71 |     :param edge_types: List of all Edge Types for which neighbours are pre-processed
72 |     :param max_ht: Maximum history timesteps
73 |     :param max_ft: Maximum future timesteps (prediction horizon)
74 |     :param hyperparams: Model hyperparameters
75 |     :param scene_graph: If scene graph was already computed for this scene and time you can pass it here
76 |     :return: Batch Element
77 |     """
78 | 
79 |     # Node
80 |     timestep_range_x = np.array([t - max_ht, t])
81 |     timestep_range_y = np.array([t + 1, t + max_ft])
82 | 
83 |     x = node.get(timestep_range_x, state[node.type])
84 |     y = node.get(timestep_range_y, pred_state[node.type])
85 |     first_history_index = (max_ht - node.history_points_at(t)).clip(0)
86 |     x_st_t = deepcopy(x)
87 |     x_st_t = x_st_t - x[-1]
88 |     y_st_t = y
89 |     
90 | 
91 |     x_t = torch.tensor(x, dtype=torch.float)
92 |     y_t = torch.tensor(y, dtype=torch.float)
93 | 
94 |     x_st_t = torch.tensor(x_st_t, dtype=torch.float)
95 |     y_st_t = torch.tensor(y_st_t, dtype=torch.float)
96 | 
97 |     return (first_history_index, x_t, y_t, x_st_t, y_st_t, scene.name, t)
98 | 


--------------------------------------------------------------------------------
/lib/utils/hevi_train_utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import os.path as osp
  4 | import numpy as np
  5 | import time
  6 | import random
  7 | from tqdm import tqdm
  8 | import torch
  9 | from torch import nn, optim
 10 | from torch.nn import functional as F
 11 | from torch.utils import data
 12 | 
 13 | from lib.utils.eval_utils import eval_hevi
 14 | 
 15 | 
 16 | def train(model, train_gen, criterion, optimizer, device):
 17 |     model.train() # Sets the module in training mode.
 18 | 
 19 |     total_goal_loss = 0
 20 |     total_dec_loss = 0
 21 |     loader = tqdm(train_gen, total=len(train_gen))
 22 |     with torch.set_grad_enabled(True):
 23 |         for batch_idx, data in enumerate(loader):
 24 |             input_traj, input_flow, target_traj = data
 25 |             batch_size = input_traj.shape[0]
 26 |             #print(batch_size)
 27 |             input_traj = input_traj.to('cuda', non_blocking=True)
 28 |             input_flow = input_flow.to('cuda', non_blocking=True)
 29 |             target_traj = target_traj.to('cuda', non_blocking=True)
 30 | 
 31 |             all_goal_traj, all_dec_traj = model([input_traj,input_flow])
 32 |             goal_loss = criterion(all_goal_traj, target_traj)
 33 |             dec_loss = criterion(all_dec_traj, target_traj)
 34 | 
 35 |             train_loss = goal_loss + dec_loss
 36 | 
 37 |             total_goal_loss += goal_loss.item()* batch_size
 38 |             total_dec_loss += dec_loss.item()* batch_size
 39 | 
 40 | 
 41 |             # optimize
 42 |             optimizer.zero_grad()
 43 |             train_loss.backward()
 44 |             optimizer.step()
 45 |         
 46 |     total_goal_loss /= len(train_gen.dataset)
 47 |     total_dec_loss /= len(train_gen.dataset)
 48 | 
 49 |     
 50 |     return total_goal_loss, total_dec_loss, total_goal_loss + total_dec_loss
 51 | 
 52 | 
 53 | def test(model, test_gen, criterion, device):
 54 |     total_goal_loss = 0
 55 |     total_dec_loss = 0
 56 |     ADE_15 = 0
 57 |     ADE_05 = 0 
 58 |     ADE_10 = 0 
 59 |     FDE = 0 
 60 |     FIOU = 0
 61 |     CADE = 0 
 62 |     CFDE = 0
 63 |     model.eval()
 64 |     loader = tqdm(test_gen, total=len(test_gen))
 65 |     with torch.set_grad_enabled(False):
 66 |         for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen):
 67 | 
 68 |             input_traj, input_flow, target_traj = data
 69 |             batch_size = input_traj.shape[0]
 70 |             input_traj = input_traj.to('cuda', non_blocking=True)
 71 |             input_flow = input_flow.to('cuda', non_blocking=True)
 72 |             target_traj = target_traj.to('cuda', non_blocking=True)
 73 | 
 74 |             all_goal_traj, all_dec_traj = model([input_traj,input_flow])
 75 | 
 76 | 
 77 |             goal_loss = criterion(all_goal_traj, target_traj)
 78 |             dec_loss = criterion(all_dec_traj, target_traj)
 79 | 
 80 |             test_loss = goal_loss + dec_loss
 81 | 
 82 |             total_goal_loss += goal_loss.item()* batch_size
 83 |             total_dec_loss += dec_loss.item()* batch_size
 84 | 
 85 |             all_dec_traj_np = all_dec_traj.to('cpu').numpy()
 86 |             input_traj_np = input_traj.to('cpu').numpy()
 87 |             target_traj_np = target_traj.to('cpu').numpy()
 88 | 
 89 |             # Decoder
 90 |             batch_ADE_15, batch_ADE_05, batch_ADE_10, batch_FDE, batch_CADE, batch_CFDE, batch_FIOU =\
 91 |                 eval_hevi(input_traj_np, target_traj_np, all_dec_traj_np)
 92 | 
 93 |             ADE_15 += batch_ADE_15
 94 |             ADE_05 += batch_ADE_05
 95 |             ADE_10 += batch_ADE_10
 96 |             FDE += batch_FDE
 97 |             CADE += batch_CADE
 98 |             CFDE += batch_CFDE
 99 |             FIOU += batch_FIOU
100 |             
101 | 
102 |     
103 |     ADE_15 /= len(test_gen.dataset)
104 |     ADE_05 /= len(test_gen.dataset)
105 |     ADE_10 /= len(test_gen.dataset)
106 |     FDE /= len(test_gen.dataset)
107 |     FIOU /= len(test_gen.dataset)
108 |     
109 |     CADE /= len(test_gen.dataset)
110 |     CFDE /= len(test_gen.dataset)
111 | 
112 |     test_loss = total_goal_loss/len(test_gen.dataset) + total_dec_loss/len(test_gen.dataset)
113 | 
114 |     print("ADE_05: %4f;  ADE_10: %4f;  ADE_15: %4f;   FDE: %4f;   FIOU: %4f\n" % (ADE_05, ADE_10, ADE_15, FDE, FIOU))
115 |     print("CFDE: %4f;   CADE: %4f;  \n" % (CFDE, CADE))
116 |     return test_loss, ADE_15, ADE_05, ADE_10, FDE, FIOU, CADE, CFDE
117 | 


--------------------------------------------------------------------------------
/lib/models/bitrap_np.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Defined classes:
  3 |     class BiTraPNP()
  4 | Some utilities are cited from Trajectron++
  5 | '''
  6 | import sys
  7 | import numpy as np
  8 | import copy
  9 | from collections import defaultdict
 10 | import torch
 11 | from torch import nn, optim
 12 | from torch.nn import functional as F
 13 | import torch.nn.utils.rnn as rnn
 14 | from torch.distributions import Normal
 15 | 
 16 | def reconstructed_probability(x):
 17 |     recon_dist = Normal(0, 1)
 18 |     p = recon_dist.log_prob(x).exp().mean(dim=-1)  # [batch_size, K]
 19 |     return p
 20 | 
 21 | class BiTraPNP(nn.Module):
 22 |     def __init__(self, args):
 23 |         super(BiTraPNP, self).__init__()
 24 |         self.args = copy.deepcopy(args)
 25 |         self.param_scheduler = None
 26 |         self.input_dim = self.args.input_dim
 27 |         self.pred_dim = self.args.pred_dim
 28 |         self.hidden_size = self.args.hidden_size
 29 |         self.nu = args.nu
 30 |         self.sigma = args.sigma
 31 |         self.node_future_encoder_h = nn.Sequential(nn.Linear(self.input_dim, self.hidden_size//2),nn.ReLU())
 32 |         self.gt_goal_encoder = nn.GRU(input_size=self.pred_dim,
 33 |                                         hidden_size=self.hidden_size//2,
 34 |                                         bidirectional=True,
 35 |                                         batch_first=True)
 36 |         self.p_z_x = nn.Sequential(nn.Linear(self.hidden_size,  
 37 |                                             128),
 38 |                                     nn.ReLU(),
 39 |                                     nn.Linear(128, 64),
 40 |                                     nn.ReLU(),
 41 |                                     nn.Linear(64, self.args.LATENT_DIM*2))
 42 |         # posterior
 43 |         self.q_z_xy = nn.Sequential(nn.Linear(self.hidden_size + self.hidden_size,
 44 |                                             128),
 45 |                                     nn.ReLU(),
 46 |                                     nn.Linear(128, 64),
 47 |                                     nn.ReLU(),
 48 |                                     nn.Linear(64, self.args.LATENT_DIM*2))
 49 |         
 50 |         
 51 | 
 52 |     def gaussian_latent_net(self, enc_h, cur_state, K,  target=None, z_mode=None):
 53 |         # get mu, sigma
 54 |         # 1. sample z from piror
 55 |         z_mu_logvar_p = self.p_z_x(enc_h)
 56 |         z_mu_p = z_mu_logvar_p[:, :self.args.LATENT_DIM]
 57 |         z_logvar_p = z_mu_logvar_p[:, self.args.LATENT_DIM:]
 58 |         if target is not None:
 59 |             # 2. sample z from posterior, for training only
 60 |             initial_h = self.node_future_encoder_h(cur_state)
 61 |             initial_h = torch.stack([initial_h, torch.zeros_like(initial_h, device=initial_h.device)], dim=0)
 62 |             self.gt_goal_encoder.flatten_parameters()
 63 |             _, target_h = self.gt_goal_encoder(target, initial_h)
 64 |             target_h = target_h.permute(1,0,2)
 65 |             target_h = target_h.reshape(-1, target_h.shape[1] * target_h.shape[2])
 66 |             
 67 |             z_mu_logvar_q = self.q_z_xy(torch.cat([enc_h, target_h], dim=-1))
 68 |             z_mu_q = z_mu_logvar_q[:, :self.args.LATENT_DIM]
 69 |             z_logvar_q = z_mu_logvar_q[:, self.args.LATENT_DIM:]
 70 |             Z_mu = z_mu_q
 71 |             Z_logvar = z_logvar_q
 72 | 
 73 |             # 3. compute KL(q_z_xy||p_z_x)
 74 |             KLD = 0.5 * ((z_logvar_q.exp()/z_logvar_p.exp()) + \
 75 |                         (z_mu_p - z_mu_q).pow(2)/z_logvar_p.exp() - \
 76 |                         1 + \
 77 |                         (z_logvar_p - z_logvar_q))
 78 |             KLD = KLD.sum(dim=-1).mean()
 79 |             KLD = torch.clamp(KLD, min=0.001)
 80 |             
 81 |         else:
 82 |             Z_mu = z_mu_p
 83 |             Z_logvar = z_logvar_p
 84 |             KLD = torch.as_tensor(0.0, device=Z_logvar.device)
 85 |         
 86 |         # 4. Draw sample
 87 |         with torch.set_grad_enabled(False):
 88 |             K_samples = torch.normal(self.nu, self.sigma, size = (enc_h.shape[0], K, self.args.LATENT_DIM)).cuda()
 89 | 
 90 |         probability = reconstructed_probability(K_samples)
 91 |         Z_std = torch.exp(0.5 * Z_logvar)
 92 |         Z = Z_mu.unsqueeze(1).repeat(1, K, 1) + K_samples * Z_std.unsqueeze(1).repeat(1, K, 1)
 93 |         if z_mode:
 94 |             Z = torch.cat((Z_mu.unsqueeze(1), Z), dim=1)
 95 | 
 96 |         
 97 |         return Z, KLD, probability
 98 | 
 99 | 
100 |     def forward(self, h_x, last_input, K, target_y=None):
101 |         '''
102 |         Params:
103 | 
104 |         '''
105 |         Z, KLD, probability = self.gaussian_latent_net(h_x, last_input, K, target_y, z_mode=False)
106 |         enc_h_and_z = torch.cat([h_x.unsqueeze(1).repeat(1, Z.shape[1], 1), Z], dim=-1)
107 |         dec_h = enc_h_and_z if self.args.DEC_WITH_Z else h_x
108 |         return dec_h, KLD, probability
109 | 


--------------------------------------------------------------------------------
/lib/utils/ethucy_train_utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import os.path as osp
  4 | import numpy as np
  5 | import time
  6 | import random
  7 | from tqdm import tqdm
  8 | import torch
  9 | from torch import nn, optim
 10 | from torch.nn import functional as F
 11 | from torch.utils import data
 12 | 
 13 | from lib.utils.eval_utils import eval_ethucy
 14 | 
 15 | 
 16 | def train(model, train_gen, criterion, optimizer, device):
 17 |     model.train() # Sets the module in training mode.
 18 |     count = 0
 19 |     total_goal_loss = 0
 20 |     total_dec_loss = 0
 21 |     loader = tqdm(train_gen, total=len(train_gen))
 22 |     with torch.set_grad_enabled(True):
 23 |         for batch_idx, data in enumerate(loader):
 24 |             first_history_index = data['first_history_index']
 25 |             assert torch.unique(first_history_index).shape[0] == 1
 26 |             batch_size = data['input_x'].shape[0]
 27 |             count += batch_size
 28 |             
 29 |             input_traj = data['input_x'].to(device)
 30 |             input_bbox_st = data['input_x_st'].to(device)
 31 |             target_traj = data['target_y'].to(device)
 32 |             # target_bbox_st = data['target_y_st'].to(device)
 33 | 
 34 |             all_goal_traj, all_dec_traj = model(input_traj, first_history_index[0])
 35 | 
 36 |             goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
 37 |             dec_loss = criterion(all_dec_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
 38 | 
 39 |             train_loss = goal_loss + dec_loss
 40 | 
 41 |             total_goal_loss += goal_loss.item()* batch_size
 42 |             total_dec_loss += dec_loss.item()* batch_size
 43 | 
 44 | 
 45 |             # optimize
 46 |             optimizer.zero_grad()
 47 |             train_loss.backward()
 48 |             optimizer.step()
 49 |         
 50 |     total_goal_loss /= count
 51 |     total_dec_loss /= count
 52 | 
 53 |     
 54 |     return total_goal_loss, total_dec_loss, total_goal_loss + total_dec_loss
 55 | 
 56 | def val(model, val_gen, criterion, device):
 57 |     total_goal_loss = 0
 58 |     total_dec_loss = 0
 59 |     count = 0
 60 |     model.eval()
 61 |     loader = tqdm(val_gen, total=len(val_gen))
 62 |     with torch.set_grad_enabled(False):
 63 |         for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen):
 64 |             first_history_index = data['first_history_index']
 65 |             assert torch.unique(first_history_index).shape[0] == 1
 66 |             batch_size = data['input_x'].shape[0]
 67 |             count += batch_size
 68 |             
 69 |             input_traj = data['input_x'].to(device)
 70 |             input_bbox_st = data['input_x_st'].to(device)
 71 |             target_traj = data['target_y'].to(device)
 72 |             # target_bbox_st = data['target_y_st'].to(device)
 73 | 
 74 |             all_goal_traj, all_dec_traj = model(input_traj, first_history_index[0])
 75 | 
 76 | 
 77 |             goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
 78 |             dec_loss = criterion(all_dec_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
 79 | 
 80 |             total_goal_loss += goal_loss.item()* batch_size
 81 |             total_dec_loss += dec_loss.item()* batch_size
 82 | 
 83 |     val_loss = total_goal_loss/count + total_dec_loss/count
 84 |     return val_loss  
 85 | 
 86 | def test(model, test_gen, criterion, device):
 87 |     total_goal_loss = 0
 88 |     total_dec_loss = 0
 89 |     ADE_08 = 0
 90 |     ADE_12 = 0 
 91 |     FDE_08 = 0 
 92 |     FDE_12 = 0 
 93 |     count = 0
 94 |     model.eval()
 95 |     loader = tqdm(test_gen, total=len(test_gen))
 96 |     with torch.set_grad_enabled(False):
 97 |         for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen):
 98 | 
 99 |             first_history_index = data['first_history_index']
100 |             assert torch.unique(first_history_index).shape[0] == 1
101 |             batch_size = data['input_x'].shape[0]
102 |             count += batch_size
103 |             
104 |             input_traj = data['input_x'].to(device)
105 |             input_bbox_st = data['input_x_st'].to(device)
106 |             target_traj = data['target_y'].to(device)
107 |             # target_bbox_st = data['target_y_st'].to(device)
108 | 
109 |             all_goal_traj, all_dec_traj = model(input_traj, first_history_index[0])
110 |             goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
111 |             dec_loss = criterion(all_dec_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
112 | 
113 |             train_loss = goal_loss + dec_loss
114 | 
115 |             total_goal_loss += goal_loss.item()* batch_size
116 |             total_dec_loss += dec_loss.item()* batch_size
117 | 
118 |             all_dec_traj_np = all_dec_traj.to('cpu').numpy()
119 |             input_traj_np = input_traj.to('cpu').numpy()
120 |             target_traj_np = target_traj.to('cpu').numpy()
121 | 
122 |             # Decoder
123 |             batch_ADE_08, batch_FDE_08, batch_ADE_12, batch_FDE_12 =\
124 |                 eval_ethucy(input_traj_np, target_traj_np, all_dec_traj_np)
125 | 
126 |             ADE_08 += batch_ADE_08
127 |             ADE_12 += batch_ADE_12
128 |             FDE_08 += batch_FDE_08
129 |             FDE_12 += batch_FDE_12
130 |             
131 |     ADE_08 /= count
132 |     ADE_12 /= count
133 |     FDE_08 /= count
134 |     FDE_12 /= count
135 |     
136 | 
137 |     test_loss = total_goal_loss/count + total_dec_loss/count
138 | 
139 |     print("ADE_08: %4f;  FDE_08: %4f;  ADE_12: %4f;   FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12))
140 |     return test_loss, ADE_08, FDE_08, ADE_12, FDE_12


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Pytorch Implementation for Stepwise Goal-Driven Networks for Trajectory Prediction (RA-L/ICRA2022)
  2 | 
  3 | 
  4 | 
  5 | ## Installation
  6 | 
  7 | # Cloning
  8 | 
  9 | We use part of the dataloader in Trajectron++, so we include [Trajectron++](https://github.com/StanfordASL/Trajectron-plus-plus) as a submodule. 
 10 | ```
 11 | git clone --recurse-submodules git@github.com:ChuhuaW/SGNet.pytorch.git
 12 | ```
 13 | 
 14 | # Environment
 15 | 
 16 | * Install conda environment from yml file
 17 | 
 18 | ```
 19 | conda env create --file SGNet_env.yml
 20 | ```
 21 | 
 22 | # Data
 23 | 
 24 | * JAAD and PIE
 25 | JAAD and PIE can be downloaded from https://github.com/ykotseruba/JAAD and https://github.com/aras62/PIE, respectively. Creating symlinks from the dataset path to ```./data```
 26 | 
 27 | ```
 28 | ln -s path/to/dataset/ ./data/
 29 | ```
 30 | 
 31 | * ETH/UCY
 32 | We follow [Trajectron++](https://github.com/StanfordASL/Trajectron-plus-plus) to preprocess data splits for the ETH and UCY datasets in this repository. Please refer to their repository for instruction. After the data is generated, please create symlinks from the dataset path to ```./data```
 33 | 
 34 | ```
 35 | ln -s path/to/dataset/ ./data/
 36 | ```
 37 | 
 38 | 
 39 | ## Training
 40 | 
 41 | ### Stochastic prediction
 42 | 
 43 | * Training on JAAD dataset:
 44 | ```
 45 | cd SGDNet.Pytorch
 46 | python tools/jaad/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset JAAD --model SGNet_CVAE
 47 | ```
 48 | 
 49 | * Training on PIE dataset:
 50 | ```
 51 | cd SGDNet.Pytorch
 52 | python tools/pie/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset PIE --model SGNet_CVAE
 53 | ```
 54 | 
 55 | * Training on ETH/UCY dataset:
 56 | ```
 57 | cd SGDNet.Pytorch
 58 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ETH --model SGNet_CVAE
 59 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset HOTEL --model SGNet_CVAE
 60 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset UNIV --model SGNet_CVAE
 61 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA1 --model SGNet_CVAE
 62 | python tools/ethucy/train_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA2 --model SGNet_CVAE
 63 | ```
 64 | 
 65 | ### Deterministic prediction
 66 | 
 67 | * Training on JAAD dataset:
 68 | ```
 69 | cd SGDNet.Pytorch
 70 | python tools/jaad/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset JAAD --model SGNet
 71 | ```
 72 | 
 73 | * Training on PIE dataset:
 74 | ```
 75 | cd SGDNet.Pytorch
 76 | python tools/pie/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset PIE --model SGNet
 77 | ```
 78 | 
 79 | * Training on ETH/UCY dataset:
 80 | ```
 81 | cd SGDNet.Pytorch
 82 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ETH --model SGNet
 83 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset HOTEL --model SGNet
 84 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset UNIV --model SGNet
 85 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA1 --model SGNet
 86 | python tools/ethucy/train_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA2 --model SGNet
 87 | ```
 88 | 
 89 | ## Evaluation
 90 | 
 91 | ### Stochastic prediction
 92 | 
 93 | * Evaluating on JAAD dataset:
 94 | ```
 95 | cd SGDNet.Pytorch
 96 | python tools/jaad/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset JAAD --model SGNet_CVAE --checkpoint path/to/checkpoint
 97 | ```
 98 | 
 99 | * Evaluating on PIE dataset:
100 | ```
101 | cd SGDNet.Pytorch
102 | python tools/pie/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset PIE --model SGNet_CVAE --checkpoint path/to/checkpoint
103 | ```
104 | 
105 | * Evaluating on ETH/UCY dataset:
106 | ```
107 | cd SGDNet.Pytorch
108 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ETH --model SGNet_CVAE --checkpoint path/to/checkpoint
109 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset HOTEL --model SGNet_CVAE --checkpoint path/to/checkpoint
110 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset UNIV --model SGNet_CVAE --checkpoint path/to/checkpoint
111 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA1 --model SGNet_CVAE --checkpoint path/to/checkpoint
112 | python tools/ethucy/eval_cvae.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA2 --model SGNet_CVAE --checkpoint path/to/checkpoint
113 | ```
114 | 
115 | ### Deterministic prediction
116 | 
117 | * Evaluating on ETH/UCY dataset:
118 | [ETH/UCY checkpoints](https://drive.google.com/drive/folders/1FCudihx-dmns-lh61uOcOD5uIWaKdKh8?usp=sharing)
119 | 
120 | ```
121 | cd SGDNet.Pytorch
122 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ETH --model SGNet --checkpoint path/to/checkpoint
123 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset HOTEL --model SGNet --checkpoint path/to/checkpoint
124 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset UNIV --model SGNet --checkpoint path/to/checkpoint
125 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA1 --model SGNet --checkpoint path/to/checkpoint
126 | python tools/ethucy/eval_deterministic.py --gpu $CUDA_VISIBLE_DEVICES --dataset ZARA2 --model SGNet --checkpoint path/to/checkpoint
127 | ```
128 | 
129 | [JAAD/PIE checkpoints](https://drive.google.com/drive/folders/1SskmNtf9FMn4azAxIfKXcYUgAEuVKNgR?usp=sharing)
130 | 
131 | ## Citation
132 | 
133 | ```
134 | @ARTICLE{9691856,
135 |   author={Wang, Chuhua and Wang, Yuchen and Xu, Mingze and Crandall, David J.},
136 |   journal={IEEE Robotics and Automation Letters}, 
137 |   title={Stepwise Goal-Driven Networks for Trajectory Prediction}, 
138 |   year={2022}}
139 | ```
140 | ```diff
141 | - Rank 3rd on nuScences prediction task at 6th AI Driving Olympics, ICRA 2021
142 | ```
143 | The source code and pretrained models will be made availble. Stay tuned.
144 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/stepwise-goal-driven-networks-for-trajectory/trajectory-prediction-on-ethucy)](https://paperswithcode.com/sota/trajectory-prediction-on-ethucy?p=stepwise-goal-driven-networks-for-trajectory)
145 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/stepwise-goal-driven-networks-for-trajectory/trajectory-prediction-on-jaad)](https://paperswithcode.com/sota/trajectory-prediction-on-jaad?p=stepwise-goal-driven-networks-for-trajectory)
146 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/stepwise-goal-driven-networks-for-trajectory/trajectory-prediction-on-pie)](https://paperswithcode.com/sota/trajectory-prediction-on-pie?p=stepwise-goal-driven-networks-for-trajectory)
147 | 
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/lib/dataloaders/ethucy_data_layer.py:
--------------------------------------------------------------------------------
  1 | ## Code modified based on https://github.com/MoonBlvd/bidireaction-trajectory-prediction/blob/main/datasets/ETH_UCY.py
  2 | 
  3 | import os
  4 | import sys
  5 | sys.path.append('./Trajectron-plus-plus')
  6 | sys.path.append('./Trajectron-plus-plus/trajectron')
  7 | from .trajectron import NodeTypeDataset
  8 | import numpy as np
  9 | import torch
 10 | from torch.utils import data
 11 | import dill
 12 | import json
 13 | import random
 14 | 
 15 | def chunks(lst, n):
 16 |     for i in range(0, len(lst), n):
 17 |         yield lst[i:i + n]
 18 | 
 19 | 
 20 | class ETHUCYDataLayer(data.Dataset):
 21 | 
 22 |     def __init__(self, args, split):
 23 |         self.args = args
 24 |         self.split = split
 25 |         self.batch_size = args.batch_size
 26 | 
 27 |         conf_json = open(args.ETH_CONFIG, 'r')
 28 |         hyperparams = json.load(conf_json)
 29 | 
 30 |         hyperparams['minimum_history_length'] = self.args.enc_steps-1 if self.split == 'test' else 1
 31 |         hyperparams['maximum_history_length'] = self.args.enc_steps-1
 32 | 
 33 |         hyperparams['state'] = {'PEDESTRIAN':{'position':['x','y'], 'velocity':['x','y'], 'acceleration':['x','y']}}
 34 |         hyperparams['pred_state'] = {'PEDESTRIAN':{'position':['x','y']}}
 35 | 
 36 |         args.data_root = args.dataset.lower()
 37 | 
 38 |         # File can be generated by using srcipts from Trajectron++ (https://github.com/StanfordASL/Trajectron-plus-plus)
 39 |         if split == 'train':
 40 |             f = open(os.path.join(args.eth_root, args.data_root, 'train', args.data_root+'_train.pkl'), 'rb')
 41 |         elif split == 'val':
 42 |             f = open(os.path.join(args.eth_root, args.data_root, 'val', args.data_root+'_val.pkl'), 'rb')
 43 |         elif split == 'test':
 44 |             f = open(os.path.join(args.eth_root, args.data_root, 'test', args.data_root+'_test.pkl'), 'rb')
 45 |         else:
 46 |             raise ValueError()
 47 | 
 48 |         train_env = dill.load(f, encoding='latin1')
 49 | 
 50 |         node_type=train_env.NodeType[0]
 51 |         train_env.attention_radius[(node_type, node_type)] = 3.0 #10.0
 52 |         augment = False
 53 |         if split=='train':
 54 |             min_history_timesteps = 1
 55 |             augment = True if self.args.augment else False
 56 |         else:
 57 |             min_history_timesteps = 7
 58 |         self.dataset = NodeTypeDataset(train_env, 
 59 |                                         node_type, 
 60 |                                         hyperparams['state'],
 61 |                                         hyperparams['pred_state'],
 62 |                                         scene_freq_mult=hyperparams['scene_freq_mult_train'],
 63 |                                         node_freq_mult=hyperparams['node_freq_mult_train'],
 64 |                                         hyperparams=hyperparams, 
 65 |                                         augment=augment, 
 66 |                                         min_history_timesteps=min_history_timesteps,
 67 |                                         min_future_timesteps=hyperparams['prediction_horizon'],
 68 |                                         return_robot=False)
 69 | 
 70 |         self.len_dict = {}
 71 |         for index in range(len(self.dataset)):
 72 |             first_history_index, x_t, y_t, x_st_t, y_st_t,scene_name,timestep = self.dataset.__getitem__(index)
 73 |             if first_history_index not in self.len_dict:
 74 |                 self.len_dict[first_history_index] = []
 75 |             self.len_dict[first_history_index].append(index)
 76 |         self.shuffle_dataset()
 77 | 
 78 |     def shuffle_dataset(self):
 79 |         self._init_inputs()
 80 | 
 81 |     def _init_inputs(self):
 82 |         '''
 83 |         shuffle the data based on its length
 84 |         '''
 85 |         self.inputs = []
 86 |         for length in self.len_dict:
 87 |             indices = self.len_dict[length]
 88 |             random.shuffle(indices)
 89 |             self.inputs.extend(list(chunks(self.len_dict[length], self.batch_size)))
 90 | 
 91 |     def __len__(self):
 92 |         return len(self.inputs)
 93 | 
 94 |     def __getitem__(self, index):
 95 |         indices = self.inputs[index]
 96 | 
 97 |         ret = {
 98 |             'input_x': [],
 99 |             'input_x_st': [],
100 |             'target_y': [],
101 |             'target_y_st': [],
102 |             'first_history_index':[],
103 |             'scene_name': [],
104 |             'timestep': [],
105 |         }
106 |         
107 |         for idx in indices:
108 |             this_ret = self.getitem_one(idx)
109 |             ret['input_x'].append(this_ret['input_x'])
110 |             ret['input_x_st'].append(this_ret['input_x_st'])
111 |             ret['target_y'].append(torch.as_tensor(this_ret['target_y']).type(torch.FloatTensor))
112 |             ret['first_history_index'].append(torch.as_tensor(this_ret['first_history_index']).type(torch.LongTensor))
113 |             ret['scene_name'].append(this_ret['scene_name'])
114 |             ret['timestep'].append(this_ret['timestep'])
115 |             
116 | 
117 |         ret['input_x'] = torch.stack(ret['input_x'])
118 |         ret['input_x_st'] = torch.stack(ret['input_x_st'])
119 |         ret['target_y'] = torch.stack(ret['target_y'])
120 |         
121 |         ret['first_history_index'] = torch.stack(ret['first_history_index'])
122 |         # to locate image
123 |         ret['scene_name'] = ret['scene_name']
124 |         ret['timestep'] = ret['timestep']
125 | 
126 |         return ret
127 | 
128 |     def getitem_one(self, index):
129 |         first_history_index, x_t, y_t, x_st_t, y_st_t, scene_name, timestep = self.dataset.__getitem__(index)
130 |         ret = {}
131 |         all_t = torch.cat((x_t[:,:2], y_t),dim=0)
132 |         y_t = self.get_target(all_t, 0, self.args.enc_steps, self.args.enc_steps, self.args.dec_steps)
133 |         ret['first_history_index'] = first_history_index
134 |         ret['input_x'] = x_t
135 |         ret['input_x_st'] = x_st_t
136 |         ret['target_y'] = y_t
137 |         ret['target_y_st'] = y_st_t
138 |         ret['scene_name'] = scene_name
139 |         ret['timestep'] = timestep
140 |         return ret
141 | 
142 |     def get_target(self, session, start, end, observe_length, predict_length):
143 |         '''
144 |         Prepare the target for loss
145 | 
146 |         ''' 
147 |         target = np.zeros((observe_length, predict_length, session.shape[-1]))
148 |         for i, target_start in enumerate(range(start, end)):
149 |             '''the target of time t is the change of bbox/ego motion at times [t+1,...,t+5}'''
150 |             target_start = target_start + 1
151 |             try:
152 |                 target[i,:,:] = np.asarray(session[target_start:target_start+predict_length,:] - 
153 |                                            session[target_start-1:target_start,:])
154 |             except:
155 |                 print("segment start: ", start)
156 |                 print("sample start: ", target_start)
157 |                 print("segment end: ", end)
158 |                 print(session.shape)
159 |                 raise ValueError()
160 |         return target
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/lib/utils/jaadpie_train_utils_cvae.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import os.path as osp
  4 | import numpy as np
  5 | import time
  6 | import random
  7 | from tqdm import tqdm
  8 | import torch
  9 | from torch import nn, optim
 10 | from torch.nn import functional as F
 11 | from torch.utils import data
 12 | 
 13 | from lib.utils.eval_utils import eval_jaad_pie, eval_jaad_pie_cvae
 14 | from lib.losses import cvae, cvae_multi
 15 | 
 16 | def train(model, train_gen, criterion, optimizer, device):
 17 |     model.train() # Sets the module in training mode.
 18 |     total_goal_loss = 0
 19 |     total_cvae_loss = 0
 20 |     total_KLD_loss = 0
 21 |     loader = tqdm(train_gen, total=len(train_gen))
 22 |     with torch.set_grad_enabled(True):
 23 |         for batch_idx, data in enumerate(loader):
 24 |             batch_size = data['input_x'].shape[0]
 25 |             input_traj = data['input_x'].to(device)
 26 |             target_traj = data['target_y'].to(device)
 27 | 
 28 |             all_goal_traj, cvae_dec_traj, KLD_loss, _  = model(inputs=input_traj, map_mask=None, targets=target_traj)
 29 |             cvae_loss = cvae_multi(cvae_dec_traj,target_traj)
 30 |             goal_loss = criterion(all_goal_traj, target_traj)
 31 | 
 32 |             train_loss = goal_loss + cvae_loss + KLD_loss.mean()
 33 | 
 34 |             total_goal_loss += goal_loss.item()* batch_size
 35 |             total_cvae_loss += cvae_loss.item()* batch_size
 36 |             total_KLD_loss += KLD_loss.mean()* batch_size
 37 | 
 38 |             # optimize
 39 |             optimizer.zero_grad()
 40 |             train_loss.backward()
 41 |             optimizer.step()
 42 |         
 43 |     total_goal_loss /= len(train_gen.dataset)
 44 |     total_cvae_loss/=len(train_gen.dataset)
 45 |     total_KLD_loss/=len(train_gen.dataset)
 46 |     
 47 |     return total_goal_loss, total_cvae_loss, total_KLD_loss
 48 | 
 49 | def val(model, val_gen, criterion, device):
 50 |     total_goal_loss = 0
 51 |     total_cvae_loss = 0
 52 |     total_KLD_loss = 0
 53 |     model.eval()
 54 |     loader = tqdm(val_gen, total=len(val_gen))
 55 |     with torch.set_grad_enabled(False):
 56 |         for batch_idx, data in enumerate(loader):
 57 |             batch_size = data['input_x'].shape[0]
 58 |             input_traj = data['input_x'].to(device)
 59 |             target_traj = data['target_y'].to(device)
 60 | 
 61 |             all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(inputs=input_traj, map_mask=None, targets=None,training=False)
 62 |             cvae_loss = cvae_multi(cvae_dec_traj,target_traj)
 63 |             
 64 | 
 65 |             goal_loss = criterion(all_goal_traj, target_traj)
 66 | 
 67 | 
 68 |             total_goal_loss += goal_loss.item()* batch_size
 69 |             total_cvae_loss += cvae_loss.item()* batch_size
 70 |             total_KLD_loss += KLD_loss.mean()* batch_size
 71 | 
 72 |     val_loss = total_goal_loss/len(val_gen.dataset)\
 73 |          + total_cvae_loss/len(val_gen.dataset) + total_KLD_loss/len(val_gen.dataset)
 74 |     return val_loss
 75 | 
 76 | def test(model, test_gen, criterion, device):
 77 |     total_goal_loss = 0
 78 |     total_cvae_loss = 0
 79 |     total_KLD_loss = 0
 80 |     MSE_15 = 0
 81 |     MSE_05 = 0 
 82 |     MSE_10 = 0 
 83 |     FMSE = 0 
 84 |     FIOU = 0
 85 |     CMSE = 0 
 86 |     CFMSE = 0
 87 |     model.eval()
 88 |     loader = tqdm(test_gen, total=len(test_gen))
 89 |     with torch.set_grad_enabled(False):
 90 |         for batch_idx, data in enumerate(loader):
 91 |             batch_size = data['input_x'].shape[0]
 92 |             input_traj = data['input_x'].to(device)
 93 |             target_traj = data['target_y'].to(device)
 94 | 
 95 |             all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(inputs=input_traj, map_mask=None, targets=None, training=False)
 96 |             cvae_loss = cvae_multi(cvae_dec_traj,target_traj)
 97 | 
 98 | 
 99 |             goal_loss = criterion(all_goal_traj, target_traj)
100 | 
101 |             test_loss = goal_loss + cvae_loss
102 | 
103 |             total_goal_loss += goal_loss.item()* batch_size
104 |             total_cvae_loss += cvae_loss.item()* batch_size
105 |             total_KLD_loss += KLD_loss.mean()* batch_size
106 |             input_traj_np = input_traj.to('cpu').numpy()
107 |             target_traj_np = target_traj.to('cpu').numpy()
108 |             cvae_dec_traj = cvae_dec_traj.to('cpu').numpy()
109 |             batch_MSE_15, batch_MSE_05, batch_MSE_10, batch_FMSE, batch_CMSE, batch_CFMSE, batch_FIOU =\
110 |                 eval_jaad_pie_cvae(input_traj_np, target_traj_np[:,-1,:,:], cvae_dec_traj[:,-1,:,:,:])
111 |             MSE_15 += batch_MSE_15
112 |             MSE_05 += batch_MSE_05
113 |             MSE_10 += batch_MSE_10
114 |             FMSE += batch_FMSE
115 |             CMSE += batch_CMSE
116 |             CFMSE += batch_CFMSE
117 |             FIOU += batch_FIOU
118 |             
119 | 
120 |     
121 |     MSE_15 /= len(test_gen.dataset)
122 |     MSE_05 /= len(test_gen.dataset)
123 |     MSE_10 /= len(test_gen.dataset)
124 |     FMSE /= len(test_gen.dataset)
125 |     FIOU /= len(test_gen.dataset)
126 |     
127 |     CMSE /= len(test_gen.dataset)
128 |     CFMSE /= len(test_gen.dataset)
129 |     
130 | 
131 |     test_loss = total_goal_loss/len(test_gen.dataset) \
132 |          + total_cvae_loss/len(test_gen.dataset) + total_KLD_loss/len(test_gen.dataset)
133 |     return test_loss, MSE_15, MSE_05, MSE_10, FMSE, FIOU, CMSE, CFMSE
134 | 
135 | 
136 | def weights_init(m):
137 |     if isinstance(m, nn.Linear):
138 |         m.weight.data.normal_(0.0, 0.001)
139 |     elif isinstance(m, nn.Conv1d):
140 |         nn.init.normal_(m.weight.data)
141 |         if m.bias is not None:
142 |             nn.init.normal_(m.bias.data)
143 |     elif isinstance(m, nn.Conv2d):
144 |         nn.init.xavier_normal_(m.weight.data)
145 |         if m.bias is not None:
146 |             nn.init.normal_(m.bias.data)
147 |     elif isinstance(m, nn.Conv3d):
148 |         nn.init.xavier_normal_(m.weight.data)
149 |         if m.bias is not None:
150 |             nn.init.normal_(m.bias.data)
151 |     elif isinstance(m, nn.ConvTranspose1d):
152 |         nn.init.normal_(m.weight.data)
153 |         if m.bias is not None:
154 |             nn.init.normal_(m.bias.data)
155 |     elif isinstance(m, nn.ConvTranspose2d):
156 |         nn.init.xavier_normal_(m.weight.data)
157 |         if m.bias is not None:
158 |             nn.init.normal_(m.bias.data)
159 |     elif isinstance(m, nn.ConvTranspose3d):
160 |         nn.init.xavier_normal_(m.weight.data)
161 |         if m.bias is not None:
162 |             nn.init.normal_(m.bias.data)
163 |     elif isinstance(m, nn.BatchNorm1d):
164 |         nn.init.normal_(m.weight.data, mean=1, std=0.02)
165 |         nn.init.constant_(m.bias.data, 0)
166 |     elif isinstance(m, nn.BatchNorm2d):
167 |         nn.init.normal_(m.weight.data, mean=1, std=0.02)
168 |         nn.init.constant_(m.bias.data, 0)
169 |     elif isinstance(m, nn.BatchNorm3d):
170 |         nn.init.normal_(m.weight.data, mean=1, std=0.02)
171 |         nn.init.constant_(m.bias.data, 0)
172 |     elif isinstance(m, nn.LSTM):
173 |         for param in m.parameters():
174 |             if len(param.shape) >= 2:
175 |                 nn.init.orthogonal_(param.data)
176 |             else:
177 |                 nn.init.normal_(param.data)
178 |     elif isinstance(m, nn.LSTMCell):
179 |         for param in m.parameters():
180 |             if len(param.shape) >= 2:
181 |                 nn.init.orthogonal_(param.data)
182 |             else:
183 |                 nn.init.normal_(param.data)
184 |     elif isinstance(m, nn.GRU):
185 |         for param in m.parameters():
186 |             if len(param.shape) >= 2:
187 |                 nn.init.orthogonal_(param.data)
188 |             else:
189 |                 nn.init.normal_(param.data)
190 |     elif isinstance(m, nn.GRUCell):
191 |         for param in m.parameters():
192 |             if len(param.shape) >= 2:
193 |                 nn.init.orthogonal_(param.data)
194 |             else:
195 |                 nn.init.normal_(param.data)
196 | 


--------------------------------------------------------------------------------
/lib/models/SGNet.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.nn as nn
  4 | from .feature_extractor import build_feature_extractor
  5 | import torch.nn.functional as F
  6 | class SGNet(nn.Module):
  7 |     def __init__(self, args):
  8 |         super(SGNet, self).__init__()
  9 | 
 10 |         self.hidden_size = args.hidden_size
 11 |         self.enc_steps = args.enc_steps
 12 |         self.dec_steps = args.dec_steps
 13 |         self.dataset = args.dataset
 14 |         self.dropout = args.dropout
 15 |         self.feature_extractor = build_feature_extractor(args)
 16 |         if self.dataset in ['JAAD','PIE']:
 17 |             self.pred_dim = 4
 18 |             self.regressor = nn.Sequential(nn.Linear(self.hidden_size, 
 19 |                                                      self.pred_dim),
 20 |                                                      nn.Tanh())
 21 |             self.flow_enc_cell = nn.GRUCell(self.hidden_size*2, self.hidden_size)
 22 |         elif self.dataset in ['ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']:
 23 |             self.pred_dim = 2
 24 |             self.regressor = nn.Sequential(nn.Linear(self.hidden_size, 
 25 |                                                         self.pred_dim))  
 26 |              
 27 |         self.enc_goal_attn = nn.Sequential(nn.Linear(self.hidden_size//4,
 28 |                                                 1),
 29 |                                                 nn.ReLU(inplace=True))
 30 |         self.dec_goal_attn = nn.Sequential(nn.Linear(self.hidden_size//4,
 31 |                                                 1),
 32 |                                                 nn.ReLU(inplace=True))
 33 | 
 34 |         self.enc_to_goal_hidden = nn.Sequential(nn.Linear(self.hidden_size,
 35 |                                                 self.hidden_size//4),
 36 |                                                 nn.ReLU(inplace=True))
 37 |         self.enc_to_dec_hidden = nn.Sequential(nn.Linear(self.hidden_size,
 38 |                                                 self.hidden_size),
 39 |                                                 nn.ReLU(inplace=True))
 40 | 
 41 | 
 42 |         self.goal_hidden_to_input = nn.Sequential(nn.Linear(self.hidden_size//4,
 43 |                                                     self.hidden_size//4),
 44 |                                                     nn.ReLU(inplace=True))
 45 |         self.dec_hidden_to_input = nn.Sequential(nn.Linear(self.hidden_size,
 46 |                                                     self.hidden_size),
 47 |                                                     nn.ReLU(inplace=True))
 48 |         self.goal_hidden_to_traj = nn.Sequential(nn.Linear(self.hidden_size//4,
 49 |                                                     self.hidden_size),
 50 |                                                     nn.ReLU(inplace=True))
 51 |         self.goal_to_enc = nn.Sequential(nn.Linear(self.hidden_size//4,
 52 |                                                     self.hidden_size//4),
 53 |                                                     nn.ReLU(inplace=True))
 54 |         self.goal_to_dec = nn.Sequential(nn.Linear(self.hidden_size//4,
 55 |                                                     self.hidden_size//4),
 56 |                                                     nn.ReLU(inplace=True))
 57 |         self.enc_drop = nn.Dropout(self.dropout)
 58 |         self.goal_drop = nn.Dropout(self.dropout)
 59 |         self.dec_drop = nn.Dropout(self.dropout)
 60 | 
 61 |         self.traj_enc_cell = nn.GRUCell(self.hidden_size + self.hidden_size//4, self.hidden_size)
 62 |         self.goal_cell = nn.GRUCell(self.hidden_size//4, self.hidden_size//4)
 63 |         self.dec_cell = nn.GRUCell(self.hidden_size + self.hidden_size//4, self.hidden_size)
 64 | 
 65 |     def SGE(self, goal_hidden):
 66 |         goal_input = goal_hidden.new_zeros((goal_hidden.size(0), self.hidden_size//4))
 67 |         goal_traj = goal_hidden.new_zeros(goal_hidden.size(0), self.dec_steps, self.pred_dim)
 68 |         goal_list = []
 69 |         for dec_step in range(self.dec_steps):
 70 |             goal_hidden = self.goal_cell(self.goal_drop(goal_input), goal_hidden)
 71 |             goal_input = self.goal_hidden_to_input(goal_hidden)
 72 |             goal_list.append(goal_hidden)
 73 |             goal_traj_hidden = self.goal_hidden_to_traj(goal_hidden)
 74 |             # regress goal traj for loss
 75 |             goal_traj[:,dec_step,:] = self.regressor(goal_traj_hidden)
 76 |         # get goal for decoder and encoder
 77 |         goal_for_dec = [self.goal_to_dec(goal) for goal in goal_list]
 78 |         goal_for_enc = torch.stack([self.goal_to_enc(goal) for goal in goal_list],dim = 1)
 79 |         enc_attn= self.enc_goal_attn(torch.tanh(goal_for_enc)).squeeze(-1)
 80 |         enc_attn = F.softmax(enc_attn, dim =1).unsqueeze(1)
 81 |         goal_for_enc  = torch.bmm(enc_attn, goal_for_enc).squeeze(1)
 82 |         return goal_for_dec, goal_for_enc, goal_traj
 83 | 
 84 |     def decoder(self, dec_hidden, goal_for_dec):
 85 |         # initial trajectory tensor
 86 |         dec_traj = dec_hidden.new_zeros(dec_hidden.size(0), self.dec_steps, self.pred_dim)
 87 |         for dec_step in range(self.dec_steps):
 88 |             goal_dec_input = dec_hidden.new_zeros(dec_hidden.size(0), self.dec_steps, self.hidden_size//4)
 89 |             goal_dec_input_temp = torch.stack(goal_for_dec[dec_step:],dim=1)
 90 |             goal_dec_input[:,dec_step:,:] = goal_dec_input_temp
 91 |             dec_attn= self.dec_goal_attn(torch.tanh(goal_dec_input)).squeeze(-1)
 92 |             dec_attn = F.softmax(dec_attn, dim =1).unsqueeze(1)
 93 |             goal_dec_input  = torch.bmm(dec_attn,goal_dec_input).squeeze(1)#.view(goal_hidden.size(0), self.dec_steps, self.hidden_size//4).sum(1)
 94 |             
 95 |             
 96 |             dec_dec_input = self.dec_hidden_to_input(dec_hidden)
 97 |             dec_input = self.dec_drop(torch.cat((goal_dec_input,dec_dec_input),dim = -1))
 98 |             dec_hidden = self.dec_cell(dec_input, dec_hidden)
 99 |             # regress dec traj for loss
100 |             dec_traj[:,dec_step,:] = self.regressor(dec_hidden)
101 |         return dec_traj
102 |         
103 |     def encoder(self, traj_input, flow_input=None, start_index = 0):
104 |         # initial output tensor
105 |         all_goal_traj = traj_input.new_zeros(traj_input.size(0), self.enc_steps, self.dec_steps, self.pred_dim)
106 |         all_dec_traj = traj_input.new_zeros(traj_input.size(0), self.enc_steps, self.dec_steps, self.pred_dim)
107 |         # initial encoder goal with zeros
108 |         goal_for_enc = traj_input.new_zeros((traj_input.size(0), self.hidden_size//4))
109 |         # initial encoder hidden with zeros
110 |         traj_enc_hidden = traj_input.new_zeros((traj_input.size(0), self.hidden_size))
111 |         for enc_step in range(start_index, self.enc_steps):
112 |             
113 |             traj_enc_hidden = self.traj_enc_cell(self.enc_drop(torch.cat((traj_input[:,enc_step,:], goal_for_enc), 1)), traj_enc_hidden)
114 |             if self.dataset in ['JAAD','PIE', 'ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']:
115 |                 enc_hidden = traj_enc_hidden
116 |             # generate hidden states for goal and decoder 
117 |             goal_hidden = self.enc_to_goal_hidden(enc_hidden)
118 |             dec_hidden = self.enc_to_dec_hidden(enc_hidden)
119 | 
120 |             goal_for_dec, goal_for_enc, goal_traj = self.SGE(goal_hidden)
121 |             dec_traj = self.decoder(dec_hidden, goal_for_dec)
122 | 
123 |             # output 
124 |             all_goal_traj[:,enc_step,:,:] = goal_traj
125 |             all_dec_traj[:,enc_step,:,:] = dec_traj
126 |         
127 |         return all_goal_traj, all_dec_traj
128 |             
129 | 
130 |     def forward(self, inputs, start_index = 0):
131 |         if self.dataset in ['JAAD','PIE']:
132 |             traj_input = self.feature_extractor(inputs)
133 |             all_goal_traj, all_dec_traj = self.encoder(traj_input)
134 |             return all_goal_traj, all_dec_traj
135 |         elif self.dataset in ['ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']:
136 |             traj_input_temp = self.feature_extractor(inputs[:,start_index:,:])
137 |             traj_input = traj_input_temp.new_zeros((inputs.size(0), inputs.size(1), traj_input_temp.size(-1)))
138 |             traj_input[:,start_index:,:] = traj_input_temp
139 |             all_goal_traj, all_dec_traj = self.encoder(traj_input, None, start_index)
140 |             return all_goal_traj, all_dec_traj


--------------------------------------------------------------------------------
/lib/models/SGNet_CVAE.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from .feature_extractor import build_feature_extractor
  4 | from .bitrap_np import BiTraPNP
  5 | import torch.nn.functional as F
  6 | 
  7 | class SGNet_CVAE(nn.Module):
  8 |     def __init__(self, args):
  9 |         super(SGNet_CVAE, self).__init__()
 10 |         self.cvae = BiTraPNP(args)
 11 |         self.hidden_size = args.hidden_size # GRU hidden size
 12 |         self.enc_steps = args.enc_steps # observation step
 13 |         self.dec_steps = args.dec_steps # prediction step
 14 |         self.dataset = args.dataset
 15 |         self.dropout = args.dropout
 16 |         self.feature_extractor = build_feature_extractor(args)
 17 |         self.pred_dim = args.pred_dim
 18 |         self.K = args.K
 19 |         self.map = False
 20 |         if self.dataset in ['JAAD','PIE']:
 21 |             # the predict shift is in pixel
 22 |             self.pred_dim = 4
 23 |             self.regressor = nn.Sequential(nn.Linear(self.hidden_size, 
 24 |                                                      self.pred_dim),
 25 |                                                      nn.Tanh())
 26 |             self.flow_enc_cell = nn.GRUCell(self.hidden_size*2, self.hidden_size)
 27 |         elif self.dataset in ['ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']:
 28 |             self.pred_dim = 2
 29 |             # the predict shift is in meter
 30 |             self.regressor = nn.Sequential(nn.Linear(self.hidden_size, 
 31 |                                                         self.pred_dim))   
 32 |         self.enc_goal_attn = nn.Sequential(nn.Linear(self.hidden_size//4,
 33 |                                                 1),
 34 |                                                 nn.ReLU(inplace=True))
 35 |         self.dec_goal_attn = nn.Sequential(nn.Linear(self.hidden_size//4,
 36 |                                                 1),
 37 |                                                 nn.ReLU(inplace=True))
 38 | 
 39 |         self.enc_to_goal_hidden = nn.Sequential(nn.Linear(self.hidden_size,
 40 |                                                 self.hidden_size//4),
 41 |                                                 nn.ReLU(inplace=True))
 42 |         self.goal_hidden_to_traj = nn.Sequential(nn.Linear(self.hidden_size//4,
 43 |                                                     self.hidden_size),
 44 |                                                     nn.ReLU(inplace=True))
 45 |         self.cvae_to_dec_hidden = nn.Sequential(nn.Linear(self.hidden_size + args.LATENT_DIM,
 46 |                                                 self.hidden_size),
 47 |                                                 nn.ReLU(inplace=True))
 48 |         self.enc_to_dec_hidden = nn.Sequential(nn.Linear(self.hidden_size,
 49 |                                                 self.hidden_size),
 50 |                                                 nn.ReLU(inplace=True))
 51 | 
 52 |         self.goal_hidden_to_input = nn.Sequential(nn.Linear(self.hidden_size//4,
 53 |                                                     self.hidden_size//4),
 54 |                                                     nn.ReLU(inplace=True))
 55 |         self.dec_hidden_to_input = nn.Sequential(nn.Linear(self.hidden_size,
 56 |                                                     self.hidden_size),
 57 |                                                     nn.ReLU(inplace=True))
 58 |         self.goal_to_enc = nn.Sequential(nn.Linear(self.hidden_size//4,
 59 |                                                     self.hidden_size//4),
 60 |                                                     nn.ReLU(inplace=True))
 61 |         self.goal_to_dec = nn.Sequential(nn.Linear(self.hidden_size//4,
 62 |                                                     self.hidden_size//4),
 63 |                                                     nn.ReLU(inplace=True))
 64 |         self.enc_drop = nn.Dropout(self.dropout)
 65 |         self.goal_drop = nn.Dropout(self.dropout)
 66 |         self.dec_drop = nn.Dropout(self.dropout)
 67 |         self.traj_enc_cell = nn.GRUCell(self.hidden_size + self.hidden_size//4, self.hidden_size)
 68 |         self.goal_cell = nn.GRUCell(self.hidden_size//4, self.hidden_size//4)
 69 |         self.dec_cell = nn.GRUCell(self.hidden_size + self.hidden_size//4, self.hidden_size)
 70 |     
 71 |     def SGE(self, goal_hidden):
 72 |         # initial goal input with zero
 73 |         goal_input = goal_hidden.new_zeros((goal_hidden.size(0), self.hidden_size//4))
 74 |         # initial trajectory tensor
 75 |         goal_traj = goal_hidden.new_zeros(goal_hidden.size(0), self.dec_steps, self.pred_dim)
 76 |         goal_list = []
 77 |         for dec_step in range(self.dec_steps):
 78 |             goal_hidden = self.goal_cell(self.goal_drop(goal_input), goal_hidden)
 79 |             # next step input is generate by hidden
 80 |             goal_input = self.goal_hidden_to_input(goal_hidden)
 81 |             goal_list.append(goal_hidden)
 82 |             # regress goal traj for loss
 83 |             goal_traj_hidden = self.goal_hidden_to_traj(goal_hidden)
 84 |             goal_traj[:,dec_step,:] = self.regressor(goal_traj_hidden)
 85 |         # get goal for decoder and encoder
 86 |         goal_for_dec = [self.goal_to_dec(goal) for goal in goal_list]
 87 |         goal_for_enc = torch.stack([self.goal_to_enc(goal) for goal in goal_list],dim = 1)
 88 |         enc_attn= self.enc_goal_attn(torch.tanh(goal_for_enc)).squeeze(-1)
 89 |         enc_attn = F.softmax(enc_attn, dim =1).unsqueeze(1)
 90 |         goal_for_enc  = torch.bmm(enc_attn, goal_for_enc).squeeze(1)
 91 |         return goal_for_dec, goal_for_enc, goal_traj
 92 | 
 93 |     def cvae_decoder(self, dec_hidden, goal_for_dec):
 94 |         batch_size = dec_hidden.size(0)
 95 |        
 96 |         K = dec_hidden.shape[1]
 97 |         dec_hidden = dec_hidden.view(-1, dec_hidden.shape[-1])
 98 |         dec_traj = dec_hidden.new_zeros(batch_size, self.dec_steps, K, self.pred_dim)
 99 |         for dec_step in range(self.dec_steps):
100 |             # incremental goal for each time step
101 |             goal_dec_input = dec_hidden.new_zeros(batch_size, self.dec_steps, self.hidden_size//4)
102 |             goal_dec_input_temp = torch.stack(goal_for_dec[dec_step:],dim=1)
103 |             goal_dec_input[:,dec_step:,:] = goal_dec_input_temp
104 |             dec_attn= self.dec_goal_attn(torch.tanh(goal_dec_input)).squeeze(-1)
105 |             dec_attn = F.softmax(dec_attn, dim =1).unsqueeze(1)
106 |             goal_dec_input  = torch.bmm(dec_attn,goal_dec_input).squeeze(1)
107 |             goal_dec_input = goal_dec_input.unsqueeze(1).repeat(1, K, 1).view(-1, goal_dec_input.shape[-1])
108 |             dec_dec_input = self.dec_hidden_to_input(dec_hidden)
109 |             dec_input = self.dec_drop(torch.cat((goal_dec_input,dec_dec_input),dim = -1))
110 |             dec_hidden = self.dec_cell(dec_input, dec_hidden)
111 |             # regress dec traj for loss
112 |             batch_traj = self.regressor(dec_hidden)
113 |             batch_traj = batch_traj.view(-1, K, batch_traj.shape[-1])
114 |             dec_traj[:,dec_step,:,:] = batch_traj
115 |         return dec_traj
116 | 
117 |     def encoder(self, raw_inputs, raw_targets, traj_input, flow_input=None, start_index = 0):
118 |         # initial output tensor
119 |         all_goal_traj = traj_input.new_zeros(traj_input.size(0), self.enc_steps, self.dec_steps, self.pred_dim)
120 |         all_cvae_dec_traj = traj_input.new_zeros(traj_input.size(0), self.enc_steps, self.dec_steps, self.K, self.pred_dim)
121 |         # initial encoder goal with zeros
122 |         goal_for_enc = traj_input.new_zeros((traj_input.size(0), self.hidden_size//4))
123 |         # initial encoder hidden with zeros
124 |         traj_enc_hidden = traj_input.new_zeros((traj_input.size(0), self.hidden_size))
125 |         total_probabilities = traj_input.new_zeros((traj_input.size(0), self.enc_steps, self.K))
126 |         total_KLD = 0
127 |         for enc_step in range(start_index, self.enc_steps):
128 |             traj_enc_hidden = self.traj_enc_cell(self.enc_drop(torch.cat((traj_input[:,enc_step,:], goal_for_enc), 1)), traj_enc_hidden)
129 |             enc_hidden = traj_enc_hidden
130 |             goal_hidden = self.enc_to_goal_hidden(enc_hidden)
131 |             goal_for_dec, goal_for_enc, goal_traj = self.SGE(goal_hidden)
132 |             all_goal_traj[:,enc_step,:,:] = goal_traj
133 |             dec_hidden = self.enc_to_dec_hidden(enc_hidden)
134 |             if self.training:
135 |                 cvae_hidden, KLD, probability = self.cvae(dec_hidden, raw_inputs[:,enc_step,:], self.K, raw_targets[:,enc_step,:,:])
136 |             else:
137 |                 cvae_hidden, KLD, probability = self.cvae(dec_hidden, raw_inputs[:,enc_step,:], self.K)
138 |             total_probabilities[:,enc_step,:] = probability
139 |             total_KLD += KLD
140 |             cvae_dec_hidden= self.cvae_to_dec_hidden(cvae_hidden)
141 |             if self.map:
142 |                 map_input = flow_input
143 |                 cvae_dec_hidden = (cvae_dec_hidden + map_input.unsqueeze(1))/2
144 |             all_cvae_dec_traj[:,enc_step,:,:,:] = self.cvae_decoder(cvae_dec_hidden, goal_for_dec)
145 |         return all_goal_traj, all_cvae_dec_traj, total_KLD, total_probabilities
146 |             
147 |     def forward(self, inputs, map_mask=None, targets = None, start_index = 0, training=True):
148 |         self.training = training
149 |         if torch.is_tensor(start_index):
150 |             start_index = start_index[0].item()
151 |         if self.dataset in ['JAAD','PIE']:
152 |             traj_input = self.feature_extractor(inputs)
153 |             all_goal_traj, all_cvae_dec_traj, KLD, total_probabilities = self.encoder(inputs, targets, traj_input)
154 |             return all_goal_traj, all_cvae_dec_traj, KLD, total_probabilities
155 |         elif self.dataset in ['ETH', 'HOTEL','UNIV','ZARA1', 'ZARA2']:
156 |             traj_input_temp = self.feature_extractor(inputs[:,start_index:,:])
157 |             traj_input = traj_input_temp.new_zeros((inputs.size(0), inputs.size(1), traj_input_temp.size(-1)))
158 |             traj_input[:,start_index:,:] = traj_input_temp
159 |             all_goal_traj, all_cvae_dec_traj, KLD, total_probabilities = self.encoder(inputs, targets, traj_input, None, start_index)
160 |             return all_goal_traj, all_cvae_dec_traj, KLD, total_probabilities


--------------------------------------------------------------------------------
/lib/utils/eval_utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import numpy as np
  4 | from .data_utils import bbox_denormalize, cxcywh_to_x1y1x2y2
  5 | from nuscenes.prediction import convert_local_coords_to_global
  6 | def compute_IOU(bbox_true, bbox_pred, format='xywh'):
  7 |     '''
  8 |     compute IOU
  9 |     [cx, cy, w, h] or [x1, y1, x2, y2]
 10 |     '''
 11 |     if format == 'xywh':
 12 |         xmin = np.max([bbox_true[0] - bbox_true[2]/2, bbox_pred[0] - bbox_pred[2]/2]) 
 13 |         xmax = np.min([bbox_true[0] + bbox_true[2]/2, bbox_pred[0] + bbox_pred[2]/2])
 14 |         ymin = np.max([bbox_true[1] - bbox_true[3]/2, bbox_pred[1] - bbox_pred[3]/2])
 15 |         ymax = np.min([bbox_true[1] + bbox_true[3]/2, bbox_pred[1] + bbox_pred[3]/2])
 16 |         w_true = bbox_true[2]
 17 |         h_true = bbox_true[3]
 18 |         w_pred = bbox_pred[2]
 19 |         h_pred = bbox_pred[3]
 20 |     elif format == 'x1y1x2y2':
 21 |         xmin = np.max([bbox_true[0], bbox_pred[0]])
 22 |         xmax = np.min([bbox_true[2], bbox_pred[2]])
 23 |         ymin = np.max([bbox_true[1], bbox_pred[1]])
 24 |         ymax = np.min([bbox_true[3], bbox_pred[3]])
 25 |         w_true = bbox_true[2] - bbox_true[0]
 26 |         h_true = bbox_true[3] - bbox_true[1]
 27 |         w_pred = bbox_pred[2] - bbox_pred[0]
 28 |         h_pred = bbox_pred[3] - bbox_pred[1]
 29 |     else:
 30 |         raise NameError("Unknown format {}".format(format))
 31 |     w_inter = np.max([0, xmax - xmin])
 32 |     h_inter = np.max([0, ymax - ymin])
 33 |     intersection = w_inter * h_inter
 34 |     union = (w_true * h_true + w_pred * h_pred) - intersection
 35 | 
 36 |     return intersection/union
 37 | 
 38 | def eval_jaad_pie(input_traj_np, target_traj_np, all_dec_traj_np):
 39 |     MSE_15=0
 40 |     MSE_05=0
 41 |     MSE_10=0
 42 |     FMSE=0
 43 |     CMSE=0
 44 |     CFMSE=0
 45 |     FIOU=0
 46 |     for batch_index in range(all_dec_traj_np.shape[0]):
 47 |         input_traj = np.expand_dims(input_traj_np[batch_index], axis=1)
 48 | 
 49 |         target_traj = input_traj + target_traj_np[batch_index]
 50 |         all_dec_traj = input_traj + all_dec_traj_np[batch_index]
 51 | 
 52 |         all_dec_traj = bbox_denormalize(all_dec_traj, W=1920, H=1080)
 53 |         target_traj = bbox_denormalize(target_traj, W=1920, H=1080)
 54 | 
 55 |         all_dec_traj_xyxy = cxcywh_to_x1y1x2y2(all_dec_traj)
 56 |         target_traj_xyxy = cxcywh_to_x1y1x2y2(target_traj)
 57 | 
 58 | 
 59 |         MSE_15 += np.square(target_traj_xyxy[-1,0:45,:] - all_dec_traj_xyxy[-1,0:45,:]).mean(axis=None)
 60 |         MSE_05 += np.square(target_traj_xyxy[-1,0:15,:] - all_dec_traj_xyxy[-1,0:15,:]).mean(axis=None)
 61 |         MSE_10 += np.square(target_traj_xyxy[-1,0:30,:] - all_dec_traj_xyxy[-1,0:30,:]).mean(axis=None)
 62 | 
 63 |         FMSE +=np.square(target_traj_xyxy[-1,44,:] - all_dec_traj_xyxy[-1,44,:]).mean(axis=None)
 64 | 
 65 | 
 66 |         CMSE += np.square(target_traj[-1,0:45,:2] - all_dec_traj[-1,0:45,:2]).mean(axis=None)
 67 |         CFMSE += np.square(target_traj[-1,44,:2] - all_dec_traj[-1,44,:2]).mean(axis=None)
 68 |         tmp_FIOU = []
 69 |         for i in range(target_traj_xyxy.shape[0]):
 70 |             tmp_FIOU.append(compute_IOU(target_traj_xyxy[i,44,:], all_dec_traj_xyxy[i,44,:], format='x1y1x2y2'))
 71 |         FIOU += np.mean(tmp_FIOU)
 72 |     return MSE_15, MSE_05, MSE_10, FMSE, CMSE, CFMSE, FIOU
 73 | 
 74 | 
 75 | def eval_jaad_pie_cvae(input_traj, target_traj, cvae_all_dec_traj):
 76 |     MSE_15=0
 77 |     MSE_05=0
 78 |     MSE_10=0
 79 |     FMSE=0
 80 |     CMSE=0
 81 |     CFMSE=0
 82 |     FIOU=0
 83 |     K = cvae_all_dec_traj.shape[2]
 84 |     tiled_target_traj = np.tile(target_traj[:, :, None, :], (1, 1, K, 1))
 85 |     #import pdb; pdb.set_trace()
 86 |     input_traj = np.tile(input_traj[:,-1,:][:,None, None,:], (1, 1, K, 1))
 87 |     #import pdb; pdb.set_trace()
 88 |     tiled_target_traj += input_traj
 89 |     cvae_all_dec_traj += input_traj
 90 |     
 91 |     tiled_target_traj = bbox_denormalize(tiled_target_traj, W=1920, H=1080)
 92 |     cvae_all_dec_traj = bbox_denormalize(cvae_all_dec_traj, W=1920, H=1080)
 93 | 
 94 |     tiled_target_traj_xyxy = cxcywh_to_x1y1x2y2(tiled_target_traj)
 95 |     cvae_all_dec_traj_xyxy = cxcywh_to_x1y1x2y2(cvae_all_dec_traj)
 96 | 
 97 |     MSE_05 = np.square(cvae_all_dec_traj_xyxy[:,:15,:,:] - tiled_target_traj_xyxy[:,:15,:,:]).mean(axis=(1, 3)).min(axis=-1).sum()
 98 |     #import pdb; pdb.set_trace()
 99 |     MSE_10 = np.square(cvae_all_dec_traj_xyxy[:,:30,:,:] - tiled_target_traj_xyxy[:,:30,:,:]).mean(axis=(1, 3)).min(axis=-1).sum()
100 |     MSE_15 = np.square(cvae_all_dec_traj_xyxy - tiled_target_traj_xyxy).mean(axis=(1, 3)).min(axis=-1).sum()
101 |     FMSE = np.square(cvae_all_dec_traj_xyxy[:,-1,:,:] - tiled_target_traj_xyxy[:,-1,:,:]).mean(axis=-1).min(axis=-1).sum()
102 |     CMSE = np.square(cvae_all_dec_traj[:,:,:,:2] - tiled_target_traj[:,:,:,:2]).mean(axis=(1, 3)).min(axis=-1).sum()
103 |     CFMSE = np.square(cvae_all_dec_traj[:,-1,:,:2] - tiled_target_traj[:,-1,:,:2]).mean(axis=-1).min(axis=-1).sum()
104 |     return MSE_15, MSE_05, MSE_10, FMSE, CMSE, CFMSE, FIOU
105 | 
106 | def eval_hevi(input_traj_np, target_traj_np, all_dec_traj_np):
107 |     ADE_15=0
108 |     ADE_05=0
109 |     ADE_10=0
110 |     FDE=0
111 |     CADE=0
112 |     CFDE=0
113 |     FIOU=0
114 |     for batch_index in range(all_dec_traj_np.shape[0]):
115 |         input_traj = np.expand_dims(input_traj_np[batch_index], axis=1)
116 |         target_traj = input_traj + target_traj_np[batch_index]
117 |         all_dec_traj = input_traj + all_dec_traj_np[batch_index]
118 | 
119 |         target_traj = bbox_denormalize(target_traj, W=1280, H=640)
120 |         all_dec_traj = bbox_denormalize(all_dec_traj, W=1280, H=640)
121 | 
122 |         target_traj_xyxy = cxcywh_to_x1y1x2y2(target_traj)
123 |         all_dec_traj_xyxy = cxcywh_to_x1y1x2y2(all_dec_traj)
124 | 
125 | 
126 |         ADE_15 += np.mean(np.sqrt(np.sum((target_traj_xyxy[:,:,:2] - all_dec_traj_xyxy[:,:,:2]) ** 2, axis=-1)))
127 |                     
128 |         ADE_05 += np.mean(np.sqrt(np.sum((target_traj_xyxy[:,0:5,:2] - all_dec_traj_xyxy[:,0:5,:2]) ** 2, axis=-1)))
129 |         ADE_10 += np.mean(np.sqrt(np.sum((target_traj_xyxy[:,0:10,:2] - all_dec_traj_xyxy[:,0:10,:2]) ** 2, axis=-1)))
130 |         FDE += np.mean(np.sqrt(np.sum((target_traj_xyxy[:,-1,:2] - all_dec_traj_xyxy[:,-1,:2]) ** 2, axis=-1)))
131 | 
132 | 
133 |         CADE += np.mean(np.sqrt(np.sum((target_traj[:,:,:2] - all_dec_traj[:,:,:2]) ** 2, axis=-1)))
134 |         CFDE += np.mean(np.sqrt(np.sum((target_traj[:,-1,:2] - all_dec_traj[:,-1,:2]) ** 2, axis=-1)))
135 |         tmp_FIOU = []
136 |         for i in range(target_traj_xyxy.shape[0]):
137 |             tmp_FIOU.append(compute_IOU(target_traj_xyxy[i,-1,:], all_dec_traj_xyxy[i,-1,:], format='x1y1x2y2'))
138 |         FIOU += np.mean(tmp_FIOU)
139 |     return ADE_15, ADE_05, ADE_10, FDE, CADE, CFDE, FIOU
140 | 
141 | def eval_ethucy(input_traj_np, target_traj_np, all_dec_traj_np):
142 |     ADE_08=0
143 |     ADE_12=0
144 |     FDE_08=0
145 |     FDE_12=0
146 |     for batch in range(all_dec_traj_np.shape[0]):
147 |         input_traj = np.expand_dims(input_traj_np[batch], axis=1)
148 |         target_traj = input_traj[...,:2] + target_traj_np[batch]
149 |         all_dec_traj = input_traj[...,:2] + all_dec_traj_np[batch]
150 | 
151 |         ADE_08 += np.mean(np.sqrt(np.sum((target_traj[-1,:8,:] - all_dec_traj[-1,:8,:]) ** 2, axis=-1)))
152 |         ADE_12 += np.mean(np.sqrt(np.sum((target_traj[-1,:,:] - all_dec_traj[-1,:,:]) ** 2, axis=-1)))
153 | 
154 |         FDE_08 += np.mean(np.sqrt(np.sum((target_traj[-1,7,:] - all_dec_traj[-1,7,:]) ** 2, axis=-1)))
155 |         FDE_12 += np.mean(np.sqrt(np.sum((target_traj[-1,-1,:] - all_dec_traj[-1,-1,:]) ** 2, axis=-1)))
156 |     return ADE_08, FDE_08, ADE_12, FDE_12
157 | 
158 | 
159 | def eval_ethucy_cvae(input_traj, target_traj, cvae_all_traj):
160 |     result = {'ADE_08':0, 'ADE_12':0, 'FDE_08':0, 'FDE_12':0}
161 | 
162 |     K = cvae_all_traj.shape[2]
163 |     tiled_target_traj = np.tile(target_traj[:, :, None, :], (1, 1, K, 1))
164 |     #import pdb; pdb.set_trace()
165 |     input_traj = np.tile(input_traj[:,-1,:][:,None, None,:], (1, 1, K, 1))
166 | 
167 |     result['ADE_08'] = np.linalg.norm(cvae_all_traj[:,:8,:,:] - tiled_target_traj[:,:8,:,:], axis=-1).mean(axis=1).min(axis=1).sum()
168 |     result['ADE_12'] = np.linalg.norm(cvae_all_traj[:,:12,:,:] - tiled_target_traj[:,:12,:,:], axis=-1).mean(axis=1).min(axis=1).sum()
169 |     result['FDE_08'] = np.linalg.norm(cvae_all_traj[:,7,:,:] - tiled_target_traj[:,7,:,:], axis=-1).min(axis=1).sum()
170 |     result['FDE_12'] = np.linalg.norm(cvae_all_traj[:,11,:,:] - tiled_target_traj[:,11,:,:], axis=-1).min(axis=1).sum()
171 | 
172 | 
173 |     return result
174 | 
175 | def eval_nuscenes_local(starting_translation, starting_rotation, target_traj, cvae_all_traj):
176 |     result = {'ADE_12':0, 'FDE_12':0}
177 | 
178 | 
179 |     K = cvae_all_traj.shape[2]
180 |     B = cvae_all_traj.shape[0]
181 |     tiled_target_traj = np.tile(target_traj[:, :, None, :], (1, 1, K, 1))
182 |     
183 |     cvae_all_traj_global = np.zeros(cvae_all_traj.shape)
184 |     for k in range(K):
185 |         for b in range(B):
186 |             cvae_all_traj_global[b,:,k,:] = convert_local_coords_to_global(cvae_all_traj[b,:,k,:],starting_translation[b] ,starting_rotation[b]) 
187 |     result['ADE_12'] = np.linalg.norm(cvae_all_traj_global[:,:12,:,:] - tiled_target_traj[:,:12,:,:], axis=-1).mean(axis=1).min(axis=1).sum()
188 |     result['FDE_12'] = np.linalg.norm(cvae_all_traj_global[:,11,:,:] - tiled_target_traj[:,11,:,:], axis=-1).min(axis=1).sum()
189 | 
190 | 
191 |     return result
192 | 
193 | 
194 | 
195 | def eval_nuscenes_api(starting_translation, starting_rotation, target_traj, cvae_all_traj, total_probabilities, tokens):
196 |     result = {'ADE_12':0, 'FDE_12':0}
197 | 
198 | 
199 |     K = cvae_all_traj.shape[2]
200 |     B = cvae_all_traj.shape[0]
201 |     tiled_target_traj = np.tile(target_traj[:, :, None, :], (1, 1, K, 1))
202 |     preds5 = []
203 |     cvae_all_traj_global = np.zeros(cvae_all_traj.shape)
204 |     for k in range(K):
205 |         for b in range(B):
206 |             cvae_all_traj_global[b,:,k,:] = convert_local_coords_to_global(cvae_all_traj[b,:,k,:],starting_translation[b] ,starting_rotation[b])
207 |     
208 |     cvae_all_traj_global = np.transpose(cvae_all_traj_global, (0,2,1,3))
209 | 
210 |     tiled_target_traj = np.transpose(tiled_target_traj, (0,2,1,3))
211 |     for i, token in enumerate(tokens):
212 |         
213 |         instance_token, sample_token = token.split("_")
214 |         prediction = Prediction(instance=instance_token, sample=sample_token, prediction=cvae_all_traj_global[i],
215 |                                         probabilities=total_probabilities[i]).serialize()
216 |         preds5.append(prediction)
217 | 
218 |     return preds5


--------------------------------------------------------------------------------
/lib/dataloaders/pie_data_layer.py:
--------------------------------------------------------------------------------
  1 | ## Code modified based on https://github.com/MoonBlvd/bidireaction-trajectory-prediction/blob/main/datasets/PIE.py
  2 | 
  3 | import os
  4 | import numpy as np
  5 | import torch
  6 | from torch.utils import data
  7 | from .PIE_origin import PIE
  8 | 
  9 | class PIEDataLayer(data.Dataset):
 10 |     def __init__(self, args, split):
 11 |         self.split = split
 12 |         self.root = args.data_root
 13 |         self.args = args
 14 |         # NOTE: add downsample function
 15 |         self.downsample_step = int(30/self.args.FPS)
 16 |         traj_data_opts = {'fstride': 1,
 17 |                  'sample_type': 'all',
 18 |                  'height_rng': [0, float('inf')],
 19 |                  'squarify_ratio': 0,
 20 |                  'data_split_type': 'default',  # kfold, random, default
 21 |                  'seq_type': 'trajectory',
 22 |                  'min_track_size': 61,
 23 |                  'random_params': {'ratios': None,
 24 |                                  'val_data': True,
 25 |                                  'regen_data': True},
 26 |                  'kfold_params': {'num_folds': 5, 'fold': 1}}
 27 | 
 28 |         traj_model_opts = {'normalize_bbox': True,
 29 |                        'track_overlap': 0.5,
 30 |                        'observe_length': 15,
 31 |                        'predict_length': self.args.dec_steps,
 32 |                        'enc_input_type': ['bbox'],
 33 |                        'dec_input_type': [], 
 34 |                        'prediction_type': ['bbox']
 35 |                        }
 36 |         imdb = PIE(data_path=self.root)
 37 |         
 38 |         traj_model_opts['enc_input_type'].extend(['obd_speed', 'heading_angle'])
 39 |         traj_model_opts['prediction_type'].extend(['obd_speed', 'heading_angle'])
 40 |         beh_seq = imdb.generate_data_trajectory_sequence(self.split, **traj_data_opts)
 41 |         self.data = self.get_traj_data(beh_seq, **traj_model_opts)
 42 |         
 43 |     def __getitem__(self, index):
 44 |         obs_bbox = torch.FloatTensor(self.data['obs_bbox'][index])
 45 |         pred_bbox = torch.FloatTensor(self.data['pred_bbox'][index])
 46 |         cur_image_file = self.data['obs_image'][index][-1]
 47 | 
 48 | 
 49 |         ret = {'input_x':obs_bbox,
 50 |                'target_y':pred_bbox, 'cur_image_file':cur_image_file}
 51 |         
 52 |         ret['timestep'] = int(cur_image_file.split('/')[-1].split('.')[0])
 53 |         
 54 |         return ret
 55 | 
 56 |     def __len__(self):
 57 |         return len(self.data[list(self.data.keys())[0]])
 58 | 
 59 |     def get_traj_tracks(self, dataset, data_types, observe_length, predict_length, overlap, normalize):
 60 |         """
 61 |         Generates tracks by sampling from pedestrian sequences
 62 |         :param dataset: The raw data passed to the method
 63 |         :param data_types: Specification of types of data for encoder and decoder. Data types depend on datasets. e.g.
 64 |         JAAD has 'bbox', 'ceneter' and PIE in addition has 'obd_speed', 'heading_angle', etc.
 65 |         :param observe_length: The length of the observation (i.e. time steps of the encoder)
 66 |         :param predict_length: The length of the prediction (i.e. time steps of the decoder)
 67 |         :param overlap: How much the sampled tracks should overlap. A value between [0,1) should be selected
 68 |         :param normalize: Whether to normalize center/bounding box coordinates, i.e. convert to velocities. NOTE: when
 69 |         the tracks are normalized, observation length becomes 1 step shorter, i.e. first step is removed.
 70 |         :return: A dictinary containing sampled tracks for each data modality
 71 |         """
 72 |         #  Calculates the overlap in terms of number of frames
 73 |         seq_length = observe_length + predict_length
 74 |         overlap_stride = observe_length if overlap == 0 else \
 75 |             int((1 - overlap) * observe_length)
 76 |         overlap_stride = 1 if overlap_stride < 1 else overlap_stride
 77 | 
 78 |         #  Check the validity of keys selected by user as data type
 79 |         d = {}
 80 |         for dt in data_types:
 81 |             try:
 82 |                 d[dt] = dataset[dt]
 83 |             except:# KeyError:
 84 |                 raise KeyError('Wrong data type is selected %s' % dt)
 85 |         
 86 |         d['image'] = dataset['image']
 87 |         d['pid'] = dataset['pid']
 88 |         d['resolution'] = dataset['resolution']
 89 |         d['flow'] = []
 90 |         num_trks = len(d['image'])
 91 |         #  Sample tracks from sequneces
 92 |         for k in d.keys():
 93 |             tracks = []
 94 |             for track in d[k]:
 95 |                 for i in range(0, len(track) - seq_length + 1, overlap_stride):
 96 |                     tracks.append(track[i:i + seq_length])
 97 |             d[k] = tracks
 98 |         #  Normalize tracks using FOL paper method, 
 99 |         d['bbox'] = self.convert_normalize_bboxes(d['bbox'], d['resolution'], 
100 |                                                   self.args.normalize, self.args.bbox_type)
101 |         return d
102 | 
103 |     def convert_normalize_bboxes(self, all_bboxes, all_resolutions, normalize, bbox_type):
104 |         '''input box type is x1y1x2y2 in original resolution'''
105 |         for i in range(len(all_bboxes)):
106 |             if len(all_bboxes[i]) == 0:
107 |                 continue
108 |             bbox = np.array(all_bboxes[i])
109 |             # NOTE ltrb to cxcywh
110 |             if bbox_type == 'cxcywh':
111 |                 bbox[..., [2, 3]] = bbox[..., [2, 3]] - bbox[..., [0, 1]]
112 |                 bbox[..., [0, 1]] += bbox[..., [2, 3]]/2
113 |             # NOTE Normalize bbox
114 |             if normalize == 'zero-one':
115 |                 # W, H  = all_resolutions[i][0]
116 |                 _min = np.array(self.args.min_bbox)[None, :]
117 |                 _max = np.array(self.args.max_bbox)[None, :]
118 |                 bbox = (bbox - _min) / (_max - _min)
119 |             elif normalize == 'plus-minus-one':
120 |                 # W, H  = all_resolutions[i][0]
121 |                 _min = np.array(self.args.min_bbox)[None, :]
122 |                 _max = np.array(self.args.max_bbox)[None, :]
123 |                 bbox = (2 * (bbox - _min) / (_max - _min)) - 1
124 |             elif normalize == 'none':
125 |                 pass
126 |             else:
127 |                 raise ValueError(normalize)
128 |             all_bboxes[i] = bbox
129 |         return all_bboxes
130 | 
131 |     def get_data_helper(self, data, data_type):
132 |         """
133 |         A helper function for data generation that combines different data types into a single representation
134 |         :param data: A dictionary of different data types
135 |         :param data_type: The data types defined for encoder and decoder input/output
136 |         :return: A unified data representation as a list
137 |         """
138 |         if not data_type:
139 |             return []
140 |         d = []
141 |         for dt in data_type:
142 |             if dt == 'image':
143 |                 continue
144 |             d.append(np.array(data[dt]))
145 |             
146 |         #  Concatenate different data points into a single representation
147 |         if len(d) > 1:
148 |             return np.concatenate(d, axis=2)
149 |         elif len(d) == 1:
150 |             return d[0]
151 |         else:
152 |             return d
153 | 
154 |     def get_traj_data(self, data, **model_opts):
155 |         """
156 |         Main data generation function for training/testing
157 |         :param data: The raw data
158 |         :param model_opts: Control parameters for data generation characteristics (see below for default values)
159 |         :return: A dictionary containing training and testing data
160 |         """
161 |         
162 |         opts = {
163 |             'normalize_bbox': True,
164 |             'track_overlap': 0.5,
165 |             'observe_length': self.args.enc_steps,
166 |             'predict_length': self.args.dec_steps,
167 |             'enc_input_type': ['bbox'],
168 |             'dec_input_type': [],
169 |             'prediction_type': ['bbox']
170 |         }
171 |         for key, value in model_opts.items():
172 |             assert key in opts.keys(), 'wrong data parameter %s' % key
173 |             opts[key] = value
174 | 
175 |         observe_length = opts['observe_length']
176 |         predict_length = opts['predict_length']
177 |         data_types = set(opts['enc_input_type'] + opts['dec_input_type'] + opts['prediction_type'])
178 |         data_tracks = self.get_traj_tracks(data, data_types, observe_length,
179 |                                       opts['predict_length'], opts['track_overlap'],
180 |                                       opts['normalize_bbox'])
181 |         obs_slices = {}
182 |         pred_slices = {}
183 |         #  Generate observation/prediction sequences from the tracks
184 |         for k in data_tracks.keys():
185 |             obs_slices[k] = []
186 |             pred_slices[k] = []
187 |             # NOTE: Add downsample function
188 |             down = self.downsample_step
189 |             obs_slices[k].extend([d[down-1:observe_length:down] for d in data_tracks[k]])
190 |             if k == 'bbox':
191 |                 start = down-1 # 0 
192 |                 end = start + observe_length # 0 + 15 = 15
193 |                 target_list = [] # 15 * 45 * 4
194 | 
195 |                 for d in data_tracks[k]:
196 |                     target = self.get_target(d,start,end,observe_length,predict_length)
197 |                     target_list.append(target)
198 |                 pred_slices[k].extend(target_list)
199 |         ret =  {'obs_image': obs_slices['image'],
200 |                 'obs_pid': obs_slices['pid'],
201 |                 'obs_resolution': obs_slices['resolution'],
202 |                 'pred_image': pred_slices['image'],
203 |                 'pred_pid': pred_slices['pid'],
204 |                 'pred_resolution': pred_slices['resolution'],
205 |                 'obs_bbox': np.array(obs_slices['bbox']), #enc_input,
206 |                 'pred_bbox': np.array(pred_slices['bbox']), #pred_target,
207 |                 }
208 |         
209 |         return ret
210 | 
211 |     def get_path(self,
212 |                  file_name='',
213 |                  save_folder='models',
214 |                  dataset='pie',
215 |                  model_type='trajectory',
216 |                  save_root_folder='data/'):
217 |         """
218 |         A path generator method for saving model and config data. It create directories if needed.
219 |         :param file_name: The actual save file name , e.g. 'model.h5'
220 |         :param save_folder: The name of folder containing the saved files
221 |         :param dataset: The name of the dataset used
222 |         :param save_root_folder: The root folder
223 |         :return: The full path for the model name and the path to the final folder
224 |         """
225 |         save_path = os.path.join(save_root_folder, dataset, model_type, save_folder)
226 |         if not os.path.exists(save_path):
227 |             os.makedirs(save_path)
228 |         return os.path.join(save_path, file_name), save_path
229 | 
230 |     def get_target(self, session, start, end, observe_length, predict_length):
231 |         '''
232 |         Given the input session and the start and end time of the input clip, find the target
233 |         TARGET FOR PREDICTION IS THE CHANGES IN THE FUTURE!!
234 |         Params:
235 |             session: the input time sequence of a car, can be bbox or ego_motion with shape (time, :)
236 |             start: start frame id 
237 |             end: end frame id
238 |         Returns:
239 |             target: Target tensor with shape (self.args.segment_len, dec_steps, :)
240 |                     The target is the change of the values. e.g. target of yaw is \delta{\theta}_{t0,tn} 
241 |         ''' 
242 |         target = np.zeros((observe_length, predict_length, session.shape[-1]))
243 |         for i, target_start in enumerate(range(start, end)):
244 |             '''the target of time t is the change of bbox/ego motion at times [t+1,...,t+5}'''
245 |             # i, target_start = (0,0) (1,1) (2,2) ......
246 |             target_start = target_start + 1
247 |             try:
248 |                 target[i,:,:] = np.asarray(session[target_start:target_start+predict_length,:] - 
249 |                                            session[target_start-1:target_start,:])
250 |             except:
251 |                 print("segment start: ", start)
252 |                 print("sample start: ", target_start)
253 |                 print("segment end: ", end)
254 |                 print(session.shape)
255 |                 raise ValueError()
256 |         return target


--------------------------------------------------------------------------------
/lib/dataloaders/jaad_data_layer.py:
--------------------------------------------------------------------------------
  1 | ## Code modified based on https://github.com/MoonBlvd/bidireaction-trajectory-prediction/blob/main/datasets/JAAD.py
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from torch.utils import data
  6 | from .JAAD_origin import JAAD
  7 | from copy import deepcopy
  8 | 
  9 | class JAADDataLayer(data.Dataset):
 10 |     def __init__(self, args, split):
 11 |         self.split = split
 12 |         self.root = args.data_root
 13 |         self.args = args
 14 |         data_opts = {'fstride': 1,
 15 |                  'sample_type': 'all',
 16 |                  'height_rng': [0, float('inf')],
 17 |                  'squarify_ratio': 0,
 18 |                  'data_split_type': 'default',  # kfold, random, default
 19 |                  'seq_type': 'trajectory',
 20 |                  'min_track_size': 61,
 21 |                  'random_params': {'ratios': None,
 22 |                                  'val_data': True,
 23 |                                  'regen_data': True},
 24 |                  'kfold_params': {'num_folds': 5, 'fold': 1}}
 25 |         traj_model_opts = {'normalize_bbox': True,
 26 |                        'track_overlap': 0.5,
 27 |                        'observe_length': 15,
 28 |                        'predict_length': self.args.dec_steps,
 29 |                        'enc_input_type': ['bbox'],
 30 |                        'dec_input_type': [], #['intention_prob', 'obd_speed'],
 31 |                        'prediction_type': ['bbox'] 
 32 |                        }
 33 |         self.downsample_step = int(30/self.args.FPS)
 34 |         imdb = JAAD(data_path=self.root)
 35 |         #imdb.generate_database()
 36 |         beh_seq = imdb.generate_data_trajectory_sequence(self.split, **data_opts)
 37 |         self.data = self.get_data(beh_seq, **traj_model_opts)
 38 |     def __getitem__(self, index):
 39 |         obs_bbox = torch.FloatTensor(self.data['obs_bbox'][index])
 40 |         pred_bbox = torch.FloatTensor(self.data['pred_bbox'][index])
 41 |         gt_mean = torch.FloatTensor(self.data['gt_mean'][index])
 42 |         gt_std = torch.FloatTensor(self.data['gt_std'][index])
 43 |         cur_image_file = self.data['obs_image'][index][-1]
 44 |         ret = {'input_x':obs_bbox,
 45 |                'target_y':pred_bbox, 'cur_image_file':cur_image_file, 'gt_mean':gt_mean, 'gt_std':gt_std}
 46 |         ret['timestep'] = int(cur_image_file.split('/')[-1].split('.')[0])
 47 |         
 48 |         return ret
 49 | 
 50 |     def __len__(self):
 51 |         return len(self.data[list(self.data.keys())[0]])
 52 |         
 53 |     def get_tracks(self, dataset, data_types, observe_length, predict_length, overlap, normalize):
 54 |         """
 55 |         Generates tracks by sampling from pedestrian sequences
 56 |         :param dataset: The raw data passed to the method
 57 |         :param data_types: Specification of types of data for encoder and decoder. Data types depend on datasets. e.g.
 58 |         JAAD has 'bbox', 'ceneter' and PIE in addition has 'obd_speed', 'heading_angle', etc.
 59 |         :param observe_length: The length of the observation (i.e. time steps of the encoder)
 60 |         :param predict_length: The length of the prediction (i.e. time steps of the decoder)
 61 |         :param overlap: How much the sampled tracks should overlap. A value between [0,1) should be selected
 62 |         :param normalize: Whether to normalize center/bounding box coordinates, i.e. convert to velocities. NOTE: when
 63 |         the tracks are normalized, observation length becomes 1 step shorter, i.e. first step is removed.
 64 |         :return: A dictinary containing sampled tracks for each data modality
 65 |         """
 66 |         #  Calculates the overlap in terms of number of frames
 67 |         seq_length = observe_length + predict_length
 68 |         overlap_stride = observe_length if overlap == 0 else \
 69 |             int((1 - overlap) * observe_length)
 70 |         overlap_stride = 1 if overlap_stride < 1 else overlap_stride
 71 | 
 72 |         #  Check the validity of keys selected by user as data type
 73 |         d = {}
 74 |         for dt in data_types:
 75 |             try:
 76 |                 d[dt] = dataset[dt]
 77 |             except:
 78 |                 raise KeyError('Wrong data type is selected %s' % dt)
 79 |         d['image'] = dataset['image']
 80 |         d['pid'] = dataset['pid']
 81 |         d['resolution'] = dataset['resolution']
 82 |         d['flow'] = []
 83 | 
 84 |         #  Sample tracks from sequneces
 85 |         for k in d.keys():
 86 |             tracks = []
 87 |             for track in d[k]:
 88 |                 tracks.extend([track[i:i + seq_length] for i in
 89 |                             range(0, len(track) - seq_length + 1, overlap_stride)])
 90 |             d[k] = tracks
 91 | 
 92 |         #  Normalize tracks using FOL paper method, 
 93 |         d['bbox'] = self.convert_normalize_bboxes(d['bbox'], d['resolution'], 
 94 |                                                   self.args.normalize, self.args.bbox_type)
 95 |         return d
 96 | 
 97 |     def convert_normalize_bboxes(self, all_bboxes, all_resolutions, normalize, bbox_type):
 98 |         '''input box type is x1y1x2y2 in original resolution'''
 99 |         for i in range(len(all_bboxes)):
100 |             if len(all_bboxes[i]) == 0:
101 |                 continue
102 |             bbox = np.array(all_bboxes[i])
103 |             # NOTE ltrb to cxcywh
104 |             if bbox_type == 'cxcywh':
105 |                 bbox[..., [2, 3]] = bbox[..., [2, 3]] - bbox[..., [0, 1]]
106 |                 bbox[..., [0, 1]] += bbox[..., [2, 3]]/2
107 |             # NOTE Normalize bbox
108 |             if normalize == 'zero-one':
109 |                 # W, H  = all_resolutions[i][0]
110 |                 _min = np.array(self.args.min_bbox)[None, :]
111 |                 _max = np.array(self.args.max_bbox)[None, :]
112 |                 bbox = (bbox - _min) / (_max - _min)
113 |             elif normalize == 'plus-minus-one':
114 |                 # W, H  = all_resolutions[i][0]
115 |                 _min = np.array(self.args.min_bbox)[None, :]
116 |                 _max = np.array(self.args.max_bbox)[None, :]
117 |                 bbox = (2 * (bbox - _min) / (_max - _min)) - 1
118 |             elif normalize == 'none':
119 |                 pass
120 |             else:
121 |                 raise ValueError(normalize)
122 |             all_bboxes[i] = bbox
123 |         return all_bboxes
124 | 
125 |     def get_data_helper(self, data, data_type):
126 |         """
127 |         A helper function for data generation that combines different data types into a single representation
128 |         :param data: A dictionary of different data types
129 |         :param data_type: The data types defined for encoder and decoder input/output
130 |         :return: A unified data representation as a list
131 |         """
132 |         if not data_type:
133 |             return []
134 |         d = []
135 |         for dt in data_type:
136 |             if dt == 'image':
137 |                 continue
138 |             d.append(np.array(data[dt]))
139 | 
140 |         #  Concatenate different data points into a single representation
141 |         if len(d) > 1:
142 |             return np.concatenate(d, axis=2)
143 |         elif len(d) == 1:
144 |             return d[0]
145 |         else:
146 |             return d
147 | 
148 |     def get_data(self, data, **model_opts):
149 |         """
150 |         Main data generation function for training/testing
151 |         :param data: The raw data
152 |         :param model_opts: Control parameters for data generation characteristics (see below for default values)
153 |         :return: A dictionary containing training and testing data
154 |         """
155 |         
156 |         opts = {
157 |             'normalize_bbox': True,
158 |             'track_overlap': 0.5,
159 |             'observe_length': 15,
160 |             'predict_length': self.args.dec_steps,
161 |             'enc_input_type': ['bbox'],
162 |             'dec_input_type': [],
163 |             'prediction_type': ['bbox']
164 |         }
165 |         for key, value in model_opts.items():
166 |             assert key in opts.keys(), 'wrong data parameter %s' % key
167 |             opts[key] = value
168 | 
169 |         observe_length = opts['observe_length']
170 |         predict_length = opts['predict_length']
171 |         data_types = set(opts['enc_input_type'] + opts['dec_input_type'] + opts['prediction_type'])
172 |         data_tracks = self.get_tracks(data, data_types, observe_length,
173 |                                       opts['predict_length'], opts['track_overlap'],
174 |                                       opts['normalize_bbox'])
175 | 
176 |         obs_slices = {}
177 |         pred_slices = {}
178 |         obs_slices['gt_mean'] = []
179 |         obs_slices['gt_std'] = []
180 |         #  Generate observation/prediction sequences from the tracks
181 |         for k in data_tracks.keys():
182 | 
183 |             obs_slices[k] = []
184 |             pred_slices[k] = []
185 |             # NOTE: Add downsample function
186 |             down = self.downsample_step
187 |             if k == 'bbox':
188 |                 start = down-1
189 |                 end = start + observe_length
190 |                 mean_list = []
191 |                 std_list = []
192 |                 observe_list = []
193 |                 target_list = []
194 |                 for sample in data_tracks[k]:
195 |                     target = self.get_target(sample,start,end,observe_length,predict_length)
196 |                     target_list.append(target)
197 |                     observe = sample[down-1:observe_length:down]
198 |                     observe_list.append(observe)
199 |                     mean_np = np.zeros((observe_length, 4))
200 |                     std_np = np.zeros((observe_length, 4))
201 |                     for obs in range(1,observe_length+1):
202 |                         whole_seq = sample[down-1:(obs + predict_length):down]
203 |                         mean_np[obs-1] = deepcopy(whole_seq).mean(axis=0)
204 |                         std_np[obs-1] = deepcopy(whole_seq).mean(axis=0)
205 |                     mean_list.append(mean_np)
206 |                     std_list.append(std_np)
207 |                 obs_slices[k].extend(observe_list)
208 |                 obs_slices['gt_mean'].extend(mean_list)
209 |                 obs_slices['gt_std'].extend(std_list)
210 |                 pred_slices[k].extend(target_list)
211 | 
212 |             else:
213 |                 obs_slices[k].extend([sample[down-1:observe_length:down] for sample in data_tracks[k]])
214 |         ret =  {'obs_image': obs_slices['image'],
215 |                 'obs_pid': obs_slices['pid'],
216 |                 'obs_resolution': obs_slices['resolution'],
217 |                 'gt_mean': obs_slices['gt_mean'],
218 |                 'gt_std': obs_slices['gt_std'],
219 |                 'pred_image': pred_slices['image'],
220 |                 'pred_pid': pred_slices['pid'],
221 |                 'pred_resolution': pred_slices['resolution'],
222 |                 'obs_bbox': np.array(obs_slices['bbox']),
223 |                 'flow_input': obs_slices['flow'],
224 |                 'pred_bbox': np.array(pred_slices['bbox']), 
225 |                 'model_opts': opts,
226 |                 }
227 |         
228 |         return ret
229 | 
230 | 
231 |     def get_target(self, session, start, end, observe_length, predict_length):
232 |         '''
233 |         Given the input session and the start and end time of the input clip, find the target
234 |         TARGET FOR PREDICTION IS THE CHANGES IN THE FUTURE!!
235 |         Params:
236 |             session: the input time sequence of a car, can be bbox or ego_motion with shape (time, :)
237 |             start: start frame id 
238 |             end: end frame id
239 |         Returns:
240 |             target: Target tensor with shape (self.args.segment_len, dec_steps, :)
241 |                     The target is the change of the values. e.g. target of yaw is \delta{\theta}_{t0,tn} 
242 |         ''' 
243 |         target = np.zeros((observe_length, predict_length, session.shape[-1]))
244 |         for i, target_start in enumerate(range(start, end)):
245 |             '''the target of time t is the change of bbox/ego motion at times [t+1,...,t+5}'''
246 |             target_start = target_start + 1
247 |             try:
248 |                 target[i,:,:] = np.asarray(session[target_start:target_start+predict_length,:] - 
249 |                                            session[target_start-1:target_start,:])
250 |             except:
251 |                 print("segment start: ", start)
252 |                 print("sample start: ", target_start)
253 |                 print("segment end: ", end)
254 |                 print(session.shape)
255 |                 raise ValueError()
256 |         return target


--------------------------------------------------------------------------------
/lib/utils/ethucy_train_utils_cvae.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import os.path as osp
  4 | import numpy as np
  5 | import time
  6 | import random
  7 | from tqdm import tqdm
  8 | import torch
  9 | from torch import nn, optim
 10 | from torch.nn import functional as F
 11 | from torch.utils import data
 12 | 
 13 | from lib.utils.eval_utils import eval_ethucy, eval_ethucy_cvae
 14 | from lib.losses import cvae, cvae_multi
 15 | 
 16 | def train(model, train_gen, criterion, optimizer, device):
 17 |     model.train() # Sets the module in training mode.
 18 |     count = 0
 19 |     total_goal_loss = 0
 20 |     total_dec_loss = 0
 21 |     total_cvae_loss = 0
 22 |     total_KLD_loss = 0
 23 |     loader = tqdm(train_gen, total=len(train_gen))
 24 |     with torch.set_grad_enabled(True):
 25 |         for batch_idx, data in enumerate(loader):
 26 |             # if batch_idx > 1:
 27 |             #     break
 28 |             first_history_index = data['first_history_index']
 29 |             assert torch.unique(first_history_index).shape[0] == 1
 30 |             batch_size = data['input_x'].shape[0]
 31 |             count += batch_size
 32 |             
 33 |             input_traj = data['input_x'].to(device)
 34 |             input_traj_st = data['input_x_st'].to(device)
 35 |             target_traj = data['target_y'].to(device)
 36 | 
 37 |             all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(input_traj, map_mask = None, targets = target_traj, start_index = first_history_index, training =  False)
 38 |             cvae_loss = cvae_multi(cvae_dec_traj,target_traj, first_history_index[0])
 39 |             #import pdb; pdb.set_trace()
 40 |             goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
 41 |             train_loss = goal_loss + cvae_loss  + KLD_loss.mean()
 42 | 
 43 |             total_goal_loss += goal_loss.item()* batch_size
 44 |             total_cvae_loss += cvae_loss.item()* batch_size
 45 |             total_KLD_loss += KLD_loss.mean()* batch_size
 46 | 
 47 |             # optimize
 48 |             optimizer.zero_grad()
 49 |             train_loss.backward()
 50 |             optimizer.step()
 51 |         
 52 |     total_goal_loss /= count
 53 |     total_cvae_loss/= count
 54 |     total_KLD_loss/= count
 55 |     
 56 |     return total_goal_loss, total_cvae_loss, total_KLD_loss
 57 | 
 58 | def val(model, val_gen, criterion, device):
 59 |     total_goal_loss = 0
 60 |     total_cvae_loss = 0
 61 |     total_KLD_loss = 0
 62 |     count = 0
 63 |     model.eval()
 64 |     loader = tqdm(val_gen, total=len(val_gen))
 65 |     with torch.set_grad_enabled(False):
 66 |         for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen):
 67 |             # if batch_idx > 1:
 68 |             #     break
 69 |             first_history_index = data['first_history_index']
 70 |             assert torch.unique(first_history_index).shape[0] == 1
 71 |             batch_size = data['input_x'].shape[0]
 72 |             count += batch_size
 73 |             
 74 |             input_traj = data['input_x'].to(device)
 75 |             input_traj_st = data['input_x_st'].to(device)
 76 |             target_traj = data['target_y'].to(device)
 77 | 
 78 |             all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(input_traj, map_mask = None, targets = None, start_index = first_history_index, training =  False)
 79 |             cvae_loss = cvae_multi(cvae_dec_traj,target_traj)
 80 |             
 81 | 
 82 |             goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
 83 | 
 84 |             total_goal_loss += goal_loss.item()* batch_size
 85 |             total_cvae_loss += cvae_loss.item()* batch_size
 86 |             total_KLD_loss += KLD_loss.mean()* batch_size
 87 | 
 88 |     val_loss = total_goal_loss/count \
 89 |          + total_cvae_loss/count+ total_KLD_loss/ count
 90 |     #import pdb;pdb.set_trace()
 91 |     return val_loss
 92 | 
 93 | def test(model, test_gen, criterion, device):
 94 |     total_goal_loss = 0
 95 |     total_cvae_loss = 0
 96 |     total_KLD_loss = 0
 97 |     ADE_08 = 0
 98 |     ADE_12 = 0 
 99 |     FDE_08 = 0 
100 |     FDE_12 = 0 
101 |     count = 0
102 |     model.eval()
103 |     loader = tqdm(test_gen, total=len(test_gen))
104 |     with torch.set_grad_enabled(False):
105 |         for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen):
106 |             # if batch_idx > 1:
107 |             #     break
108 |             
109 |             first_history_index = data['first_history_index']
110 |             assert torch.unique(first_history_index).shape[0] == 1
111 |             batch_size = data['input_x'].shape[0]
112 |             count += batch_size
113 |             
114 |             input_traj = data['input_x'].to(device)
115 |             input_traj_st = data['input_x_st'].to(device)
116 |             target_traj = data['target_y'].to(device)
117 | 
118 |             all_goal_traj, cvae_dec_traj, KLD_loss, _ = model(input_traj, map_mask = None, targets = None, start_index = first_history_index, training =  False)
119 |             cvae_loss = cvae_multi(cvae_dec_traj,target_traj)
120 |             goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
121 | 
122 |             
123 | 
124 |             total_goal_loss += goal_loss.item()* batch_size
125 |             total_cvae_loss += cvae_loss.item()* batch_size
126 |             total_KLD_loss += KLD_loss.mean()* batch_size
127 | 
128 |             cvae_dec_traj = cvae_dec_traj.to('cpu').numpy()
129 |             all_goal_traj_np = all_goal_traj.to('cpu').numpy()
130 |             input_traj_np = input_traj.to('cpu').numpy()
131 |             target_traj_np = target_traj.to('cpu').numpy()
132 | 
133 |             batch_results =\
134 |                 eval_ethucy_cvae(input_traj_np, target_traj_np[:,-1,:,:], cvae_dec_traj[:,-1,:,:,:])
135 |             ADE_08 += batch_results['ADE_08']
136 |             ADE_12 += batch_results['ADE_12']
137 |             FDE_08 += batch_results['FDE_08']
138 |             FDE_12 += batch_results['FDE_12']
139 |             
140 | 
141 |     
142 |     ADE_08 /= count
143 |     ADE_12 /= count
144 |     FDE_08 /= count
145 |     FDE_12 /= count
146 |     
147 | 
148 |     test_loss = total_goal_loss/count + total_cvae_loss/count + total_KLD_loss/count
149 |     # print("Test Loss %4f\n" % (test_loss))
150 |     # print("ADE_08: %4f;  FDE_08: %4f;  ADE_12: %4f;   FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12))
151 |     return test_loss, ADE_08, FDE_08, ADE_12, FDE_12
152 | 
153 | def evaluate(model, test_gen, criterion, device):
154 |     total_goal_loss = 0
155 |     total_cvae_loss = 0
156 |     total_KLD_loss = 0
157 |     ADE_08 = 0
158 |     ADE_12 = 0 
159 |     FDE_08 = 0 
160 |     FDE_12 = 0 
161 |     count = 0
162 |     all_file_name = []
163 |     model.eval()
164 |     loader = tqdm(test_gen, total=len(test_gen))
165 |     with torch.set_grad_enabled(False):
166 |         for batch_idx, data in enumerate(loader):#for batch_idx, data in enumerate(val_gen):            
167 |             first_history_index = data['first_history_index']
168 |             assert torch.unique(first_history_index).shape[0] == 1
169 |             batch_size = data['input_x'].shape[0]
170 |             count += batch_size
171 |             
172 |             input_traj = data['input_x'].to(device)
173 |             input_traj_st = data['input_x_st'].to(device)
174 |             target_traj = data['target_y'].to(device)
175 |             scene_name = data['scene_name'] 
176 |             timestep = data['timestep']
177 |             current_img = timestep
178 |             #import pdb; pdb.set_trace()
179 |             # filename = datapath + '/test/biwi_eth.txt'
180 |             # data = pd.read_csv(filename, sep='\t', index_col=False, header=None)
181 |             # data.columns = ['frame_id', 'track_id', 'pos_x', 'pos_y']
182 |             # frame_id_min = data['frame_id'].min()
183 |             # filename path = os.path.join(datapath, dataset ,str((current_img[1][0]+int(frame_id_min)//10)*10).zfill(5) + '.png')
184 | 
185 |             all_goal_traj, cvae_dec_traj, KLD_loss = model(input_traj, target_traj, first_history_index, False)
186 |             cvae_loss = cvae_multi(cvae_dec_traj,target_traj)
187 |             goal_loss = criterion(all_goal_traj[:,first_history_index[0]:,:,:], target_traj[:,first_history_index[0]:,:,:])
188 |             total_goal_loss += goal_loss.item()* batch_size
189 |             total_cvae_loss += cvae_loss.item()* batch_size
190 |             total_KLD_loss += KLD_loss.mean()* batch_size
191 | 
192 |             cvae_dec_traj_np = cvae_dec_traj.to('cpu').numpy()
193 |             cvae_dec_traj = cvae_dec_traj.to('cpu').numpy()
194 | 
195 |             all_goal_traj_np = all_goal_traj.to('cpu').numpy()
196 |             input_traj_np = input_traj.to('cpu').numpy()
197 |             target_traj_np = target_traj.to('cpu').numpy()
198 |             #import pdb;pdb.set_trace()
199 |             # Decoder
200 |             # batch_MSE_15, batch_MSE_05, batch_MSE_10, batch_FMSE, batch_CMSE, batch_CFMSE, batch_FIOU =\
201 |             #     eval_jaad_pie(input_traj_np, target_traj_np, all_dec_traj_np)
202 |             batch_results =\
203 |                 eval_ethucy_cvae(input_traj_np, target_traj_np[:,-1,:,:], cvae_dec_traj[:,-1,:,:,:])
204 |             ADE_08 += batch_results['ADE_08']
205 |             ADE_12 += batch_results['ADE_12']
206 |             FDE_08 += batch_results['FDE_08']
207 |             FDE_12 += batch_results['FDE_12']
208 | 
209 |             if batch_idx == 0:
210 |                 all_input = input_traj_np
211 |                 all_target = target_traj_np
212 |                 all_prediction = cvae_dec_traj_np
213 |             else:
214 |                 all_input = np.vstack((all_input,input_traj_np))
215 |                 all_target = np.vstack((all_target,target_traj_np))
216 |                 all_prediction = np.vstack((all_prediction,cvae_dec_traj_np))
217 |             all_file_name.extend(current_img)
218 | 
219 |             
220 | 
221 |     
222 |     ADE_08 /= count
223 |     ADE_12 /= count
224 |     FDE_08 /= count
225 |     FDE_12 /= count
226 |     
227 |     print("ADE_08: %4f;  FDE_08: %4f;  ADE_12: %4f;   FDE_12: %4f\n" % (ADE_08, FDE_08, ADE_12, FDE_12))
228 | 
229 |     return all_input,all_target,all_prediction,all_file_name
230 | 
231 | def weights_init(m):
232 |     if isinstance(m, nn.Linear):
233 |         m.weight.data.normal_(0.0, 0.001)
234 |     elif isinstance(m, nn.Conv1d):
235 |         nn.init.normal_(m.weight.data)
236 |         if m.bias is not None:
237 |             nn.init.normal_(m.bias.data)
238 |     elif isinstance(m, nn.Conv2d):
239 |         nn.init.xavier_normal_(m.weight.data)
240 |         if m.bias is not None:
241 |             nn.init.normal_(m.bias.data)
242 |     elif isinstance(m, nn.Conv3d):
243 |         nn.init.xavier_normal_(m.weight.data)
244 |         if m.bias is not None:
245 |             nn.init.normal_(m.bias.data)
246 |     elif isinstance(m, nn.ConvTranspose1d):
247 |         nn.init.normal_(m.weight.data)
248 |         if m.bias is not None:
249 |             nn.init.normal_(m.bias.data)
250 |     elif isinstance(m, nn.ConvTranspose2d):
251 |         nn.init.xavier_normal_(m.weight.data)
252 |         if m.bias is not None:
253 |             nn.init.normal_(m.bias.data)
254 |     elif isinstance(m, nn.ConvTranspose3d):
255 |         nn.init.xavier_normal_(m.weight.data)
256 |         if m.bias is not None:
257 |             nn.init.normal_(m.bias.data)
258 |     elif isinstance(m, nn.BatchNorm1d):
259 |         nn.init.normal_(m.weight.data, mean=1, std=0.02)
260 |         nn.init.constant_(m.bias.data, 0)
261 |     elif isinstance(m, nn.BatchNorm2d):
262 |         nn.init.normal_(m.weight.data, mean=1, std=0.02)
263 |         nn.init.constant_(m.bias.data, 0)
264 |     elif isinstance(m, nn.BatchNorm3d):
265 |         nn.init.normal_(m.weight.data, mean=1, std=0.02)
266 |         nn.init.constant_(m.bias.data, 0)
267 |     elif isinstance(m, nn.LSTM):
268 |         for param in m.parameters():
269 |             if len(param.shape) >= 2:
270 |                 nn.init.orthogonal_(param.data)
271 |             else:
272 |                 nn.init.normal_(param.data)
273 |     elif isinstance(m, nn.LSTMCell):
274 |         for param in m.parameters():
275 |             if len(param.shape) >= 2:
276 |                 nn.init.orthogonal_(param.data)
277 |             else:
278 |                 nn.init.normal_(param.data)
279 |     elif isinstance(m, nn.GRU):
280 |         for param in m.parameters():
281 |             if len(param.shape) >= 2:
282 |                 nn.init.orthogonal_(param.data)
283 |             else:
284 |                 nn.init.normal_(param.data)
285 |     elif isinstance(m, nn.GRUCell):
286 |         for param in m.parameters():
287 |             if len(param.shape) >= 2:
288 |                 nn.init.orthogonal_(param.data)
289 |             else:
290 |                 nn.init.normal_(param.data)
291 | 


--------------------------------------------------------------------------------
/lib/dataloaders/JAAD_origin.py:
--------------------------------------------------------------------------------
   1 | """
   2 | Interface for the JAAD dataset:
   3 | 
   4 | A. Rasouli, I. Kotseruba, and J. K. Tsotsos,“Are they going to cross?
   5 | a benchmark dataset and baseline for pedestrian crosswalk behavior,” In Proc.
   6 | ICCV Workshop, 2017, pp. 206–213.
   7 | 
   8 | A. Rasouli, I. Kotseruba, and J. K. Tsotsos, “Agreeing to cross: How drivers
   9 | and pedestrians communicate,” In Proc. Intelligent Vehicles Symposium (IV),
  10 | 2017, pp. 264–269.
  11 | 
  12 | I. Kotseruba, A. Rasouli, and J. K. Tsotsos, “Joint attention in autonomous
  13 |  driving (jaad),” arXiv:1609.04741, 2016.
  14 | 
  15 | MIT License
  16 | 
  17 | Copyright (c) 2018 I. Kotseruba
  18 | 
  19 | Permission is hereby granted, free of charge, to any person obtaining a copy
  20 | of this software and associated documentation files (the "Software"), to deal
  21 | in the Software without restriction, including without limitation the rights
  22 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  23 | copies of the Software, and to permit persons to whom the Software is
  24 | furnished to do so, subject to the following conditions:
  25 | 
  26 | The above copyright notice and this permission notice shall be included in all
  27 | copies or substantial portions of the Software.
  28 | 
  29 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  30 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  31 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  32 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  33 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  34 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  35 | SOFTWARE.
  36 | 
  37 | """
  38 | import sys
  39 | import pickle
  40 | import cv2
  41 | 
  42 | import numpy as np
  43 | import xml.etree.ElementTree as ET
  44 | 
  45 | from os.path import join, abspath, exists
  46 | from os import listdir, makedirs
  47 | from sklearn.model_selection import train_test_split, KFold
  48 | 
  49 | 
  50 | class JAAD(object):
  51 |     def __init__(self, data_path='', regen_pkl=False):
  52 |         """
  53 |         Constructor of the jaad class
  54 |         :param data_path: Path to the folder of the dataset
  55 |         :param regen_pkl: Whether to regenerate the database
  56 |         """
  57 |         self._year = '2016'
  58 |         self._name = 'JAAD'
  59 |         self._regen_pkl = regen_pkl
  60 |         self._image_ext = '.png'
  61 | 
  62 |         # Paths
  63 |         self._jaad_path = data_path if data_path else self._get_default_path()
  64 |         assert exists(self._jaad_path), \
  65 |             'Jaad path does not exist: {}'.format(self._jaad_path)
  66 |         self._data_split_ids_path = join(self._jaad_path, 'split_ids')
  67 |         self._annotation_path = join(self._jaad_path, 'annotations')
  68 |         self._annotation_vehicle_path = join(self._jaad_path, 'annotations_vehicle')
  69 |         self._annotation_traffic_path = join(self._jaad_path, 'annotations_traffic')
  70 |         self._annotation_attributes_path = join(self._jaad_path, 'annotations_attributes')
  71 |         self._annotation_appearance_path = join(self._jaad_path, 'annotations_appearance')
  72 |         self._clips_path = join(self._jaad_path, 'JAAD_clips')
  73 |         self._images_path = join(self._jaad_path, 'images')
  74 | 
  75 |     # Path generators
  76 |     @property
  77 |     def cache_path(self):
  78 |         """
  79 |         Generate a path to save cache files
  80 |         :return: Cache file folder path
  81 |         """
  82 |         cache_path = abspath(join(self._jaad_path, 'data_cache'))
  83 |         if not exists(cache_path):
  84 |             makedirs(cache_path)
  85 |         return cache_path
  86 | 
  87 |     def _get_default_path(self):
  88 |         """
  89 |         Return the default path where jaad_raw files are expected to be placed.
  90 |         :return: the default path to the dataset folder
  91 |         """
  92 |         return 'dataset/jaad'
  93 | 
  94 |     def _get_video_ids_split(self, image_set, subset='default'):
  95 |         """
  96 |         Returns a list of video ids for a given data split
  97 |         :param image_set: Data split, train, test, val
  98 |         :return: The list of video ids
  99 |         """
 100 |         vid_ids = []
 101 |         sets = [image_set] if image_set != 'all' else ['train', 'test', 'val']
 102 |         for s in sets:
 103 |             vid_id_file = join(self._data_split_ids_path, subset, s + '.txt')
 104 |             with open(vid_id_file, 'rt') as fid:
 105 |                 vid_ids.extend([x.strip() for x in fid.readlines()])
 106 |         return vid_ids
 107 | 
 108 |     def _get_video_ids(self):
 109 |         """
 110 |         Returns a list of all video ids
 111 |         :return: The list of video ids
 112 |         """
 113 |         return [vid.split('.')[0] for vid in listdir(self._annotation_path)]
 114 | 
 115 |     def _get_image_path(self, vid, fid):
 116 |         """
 117 |           Generates the image path given ids
 118 |           :param vid: Video id
 119 |           :param fid: Frame id
 120 |           :return: Return the path to the given image
 121 |           """
 122 |         return join(self._images_path, vid,
 123 |                     '{:05d}.png'.format(fid))
 124 | 
 125 |     # Visual helpers
 126 |     def update_progress(self, progress):
 127 |         """
 128 |          Creates a progress bar
 129 |          :param progress: The progress thus far
 130 |          """
 131 |         barLength = 20
 132 |         status = ""
 133 |         if isinstance(progress, int):
 134 |             progress = float(progress)
 135 | 
 136 |         block = int(round(barLength * progress))
 137 |         text = "\r[{}] {:0.2f}% {}".format("#" * block + "-" * (barLength - block), progress * 100, status)
 138 |         sys.stdout.write(text)
 139 |         sys.stdout.flush()
 140 | 
 141 |     def _print_dict(self, dic):
 142 |         """
 143 |          Prints a dictionary, one key-value pair per line
 144 |          :param dic: Dictionary
 145 |          """
 146 |         for k, v in dic.items():
 147 |             print('%s: %s' % (str(k), str(v)))
 148 | 
 149 |     # Image processing helpers
 150 |     def _squarify(self, bbox, ratio, img_width):
 151 |         """
 152 |         Changes is the ratio of bounding boxes to a fixed ratio
 153 |         :param bbox: Bounding box
 154 |         :param ratio: Ratio to be changed to
 155 |         :param img_width: Image width
 156 |         :return: Squarified boduning box
 157 |         """
 158 |         width = abs(bbox[0] - bbox[2])
 159 |         height = abs(bbox[1] - bbox[3])
 160 |         width_change = height * ratio - width
 161 | 
 162 |         bbox[0] = bbox[0] - width_change / 2
 163 |         bbox[2] = bbox[2] + width_change / 2
 164 |         if bbox[0] < 0:
 165 |             bbox[0] = 0
 166 | 
 167 |         # check whether the new bounding box goes beyond image boarders
 168 |         # If this is the case, the bounding box is shifted back
 169 |         if bbox[2] > img_width:
 170 |             bbox[0] = bbox[0] - bbox[2] + img_width
 171 |             bbox[2] = img_width
 172 |         return bbox
 173 | 
 174 |     def extract_and_save_images(self):
 175 |         """
 176 |         Extract images from clips and save on drive
 177 |         """
 178 | 
 179 |         videos = [f.split('.')[0] for f in sorted(listdir(self._clips_path))]
 180 | 
 181 |         for vid in videos:
 182 |             path_to_file = join(self._annotation_path, vid + '.xml')
 183 |             print(vid)
 184 |             tree = ET.parse(path_to_file)
 185 |             num_frames = int(tree.find("./meta/task/size").text)
 186 | 
 187 |             video_clip_path = join(self._clips_path, vid + '.mp4')
 188 | 
 189 |             save_images_path = join(self._images_path, vid)
 190 |             if not exists(save_images_path):
 191 |                 makedirs(save_images_path)
 192 | 
 193 |             vidcap = cv2.VideoCapture(video_clip_path)
 194 |             success, image = vidcap.read()
 195 |             frame_num = 0
 196 |             img_count = 0
 197 |             if not success:
 198 |                 print('Failed to open the video {}'.format(vid))
 199 |             while success:
 200 |                 self.update_progress(img_count / num_frames)
 201 |                 img_count += 1
 202 |                 if not exists(join(save_images_path, "{:05d}.png").format(frame_num)):
 203 |                     cv2.imwrite(join(save_images_path, "{:05d}.png").format(frame_num), image)
 204 |                 else:
 205 |                     print('path %s already exists')
 206 |                 success, image = vidcap.read()
 207 |                 frame_num += 1
 208 |             if num_frames != img_count:
 209 |                 print('num images don\'t match {}/{}'.format(num_frames, img_count))
 210 |             print('\n')
 211 | 
 212 |     # Annotation processing helpers
 213 |     def _map_text_to_scalar(self, label_type, value):
 214 |         """
 215 |         Maps a text label in XML file to scalars
 216 |         :param label_type: The label type
 217 |         :param value: The text to be mapped
 218 |         :return: The scalar value
 219 |         """
 220 |         map_dic = {'occlusion': {'none': 0, 'part': 1, 'full': 2},
 221 |                    'action': {'standing': 0, 'walking': 1},
 222 |                    'nod': {'__undefined__': 0, 'nodding': 1},
 223 |                    'look': {'not-looking': 0, 'looking': 1},
 224 |                    'hand_gesture': {'__undefined__': 0, 'greet': 1, 'yield': 2,
 225 |                                     'rightofway': 3, 'other': 4},
 226 |                    'reaction': {'__undefined__': 0, 'clear_path': 1, 'speed_up': 2,
 227 |                                 'slow_down': 3},
 228 |                    'cross': {'not-crossing': 0, 'crossing': 1, 'irrelevant': -1},
 229 |                    'age': {'child': 0, 'young': 1, 'adult': 2, 'senior': 3},
 230 |                    'designated': {'ND': 0, 'D': 1},
 231 |                    'gender': {'n/a': 0, 'female': 1, 'male': 2},
 232 |                    'intersection': {'no': 0, 'yes': 1},
 233 |                    'motion_direction': {'n/a': 0, 'LAT': 1, 'LONG': 2},
 234 |                    'traffic_direction': {'OW': 0, 'TW': 1},
 235 |                    'signalized': {'n/a': 0, 'NS': 1, 'S': 2},
 236 |                    'vehicle': {'stopped': 0, 'moving_slow': 1, 'moving_fast': 2,
 237 |                                'decelerating': 3, 'accelerating': 4},
 238 |                    'road_type': {'street': 0, 'parking_lot': 1, 'garage': 2},
 239 |                    'traffic_light': {'n/a': 0, 'red': 1, 'green': 2}}
 240 | 
 241 |         return map_dic[label_type][value]
 242 | 
 243 |     def _map_scalar_to_text(self, label_type, value):
 244 |         """
 245 |         Maps a scalar value to a text label
 246 |         :param label_type: The label type
 247 |         :param value: The scalar to be mapped
 248 |         :return: The text label
 249 |         """
 250 |         map_dic = {'occlusion': {0: 'none', 1: 'part', 2: 'full'},
 251 |                    'action': {0: 'standing', 1: 'walking'},
 252 |                    'nod': {0: '__undefined__', 1: 'nodding'},
 253 |                    'look': {0: 'not-looking', 1: 'looking'},
 254 |                    'hand_gesture': {0: '__undefined__', 1: 'greet',
 255 |                                     2: 'yield', 3: 'rightofway',
 256 |                                     4: 'other'},
 257 |                    'reaction': {0: '__undefined__', 1: 'clear_path',
 258 |                                 2: 'speed_up', 3: 'slow_down'},
 259 |                    'cross': {0: 'not-crossing', 1: 'crossing', -1: 'irrelevant'},
 260 |                    'age': {0: 'child', 1: 'young', 2: 'adult', 3: 'senior'},
 261 |                    'designated': {0: 'ND', 1: 'D'},
 262 |                    'gender': {0: 'n/a', 1: 'female', 2: 'male'},
 263 |                    'intersection': {0: 'no', 1: 'yes'},
 264 |                    'motion_direction': {0: 'n/a', 1: 'LAT', 2: 'LONG'},
 265 |                    'traffic_direction': {0: 'OW', 1: 'TW'},
 266 |                    'signalized': {0: 'n/a', 1: 'NS', 2: 'S'},
 267 |                    'vehicle': {0: 'stopped', 1: 'moving_slow', 2: 'moving_fast',
 268 |                                3: 'decelerating', 4: 'accelerating'},
 269 |                    'road_type': {0: 'street', 1: 'parking_lot', 2: 'garage'},
 270 |                    'traffic_light': {0: 'n/a', 1: 'red', 2: 'green'}}
 271 | 
 272 |         return map_dic[label_type][value]
 273 | 
 274 |     def _get_annotations(self, vid):
 275 |         """
 276 |         Generates a dictinary of annotations by parsing the video XML file
 277 |         :param vid: The id of video to parse
 278 |         :return: A dictionary of annotations
 279 |         """
 280 |         path_to_file = join(self._annotation_path, vid + '.xml')
 281 |         tree = ET.parse(path_to_file)
 282 |         ped_annt = 'ped_annotations'
 283 | 
 284 |         annotations = {}
 285 |         annotations['num_frames'] = int(tree.find("./meta/task/size").text)
 286 |         annotations['width'] = int(tree.find("./meta/task/original_size/width").text)
 287 |         annotations['height'] = int(tree.find("./meta/task/original_size/height").text)
 288 |         annotations[ped_annt] = {}
 289 | 
 290 |         ped_tracks = tree.findall("./track")
 291 | 
 292 |         for t in ped_tracks:
 293 |             boxes = t.findall('./box')
 294 |             new_id = boxes[0].find('./attribute[@name=\"id\"]').text
 295 |             old_id = boxes[0].find('./attribute[@name=\"old_id\"]').text
 296 |             annotations[ped_annt][new_id] = {'old_id': old_id, 'frames': [],
 297 |                                              'bbox': [], 'occlusion': []}
 298 |             if 'pedestrian' in old_id:
 299 |                 annotations['ped_annotations'][new_id]['behavior'] = {'cross': [],
 300 |                                                                       'reaction': [],
 301 |                                                                       'hand_gesture': [],
 302 |                                                                       'look': [],
 303 |                                                                       'action': [],
 304 |                                                                       'nod': []}
 305 |             else:
 306 |                 annotations[ped_annt][new_id]['behavior'] = {}
 307 | 
 308 |             for b in boxes:
 309 |                 annotations[ped_annt][new_id]['bbox'].append(
 310 |                     [float(b.get('xtl')), float(b.get('ytl')),
 311 |                      float(b.get('xbr')), float(b.get('ybr'))])
 312 |                 occ = self._map_text_to_scalar('occlusion',
 313 |                                                b.find('./attribute[@name=\"occlusion\"]').text)
 314 |                 annotations[ped_annt][new_id]['occlusion'].append(occ)
 315 |                 annotations[ped_annt][new_id]['frames'].append(int(b.get('frame')))
 316 |                 for beh in annotations['ped_annotations'][new_id]['behavior'].keys():
 317 |                     annotations[ped_annt][new_id]['behavior'][beh].append(
 318 |                         self._map_text_to_scalar(beh,
 319 |                                                  b.find('./attribute[@name=\"' + beh + '\"]').text))
 320 | 
 321 |         return annotations
 322 | 
 323 |     def _get_ped_attributes(self, vid):
 324 |         """
 325 |         Generates a dictinary of attributes by parsing the video XML file
 326 |         :param vid: The id of video to parse
 327 |         :return: A dictionary of attributes
 328 |         """
 329 |         path_to_file = join(self._annotation_attributes_path, vid + '_attributes.xml')
 330 |         tree = ET.parse(path_to_file)
 331 | 
 332 |         attributes = {}
 333 |         pedestrians = tree.findall("./pedestrian")
 334 |         for p in pedestrians:
 335 |             new_id = p.get('id')
 336 |             old_id = p.get('old_id')
 337 |             attributes[new_id] = {'old_id': old_id}
 338 |             for k, v in p.items():
 339 |                 if 'id' in k:
 340 |                     continue
 341 |                 try:
 342 |                     attributes[new_id][k] = int(v)
 343 |                 except ValueError:
 344 |                     attributes[new_id][k] = self._map_text_to_scalar(k, v)
 345 | 
 346 |         return attributes
 347 | 
 348 |     def _get_ped_appearance(self, vid):
 349 |         """
 350 |         Generates a dictinary of appearance annotations by parsing the video XML file
 351 |         :param vid: The id of video to parse. The labels are as follows:
 352 |             - pose_front, pose_back... - coarse pose of the pedestrian relative to the camera
 353 |             - clothes_below_knee - long clothing
 354 |             - clothes_upper_light, clothes_lower_dark... - coarse clothing color above/below waist
 355 |             - backpack - presence of a backpack (worn on the back, not held in hand)
 356 |             - bag_hand, bag_elbow, bag_shoulder - whether bag(s) are held in a hand, on a bent elbow or worn on a shoulder
 357 |             - bag_left_side, bag_right_side - whether bag(s) appear on the left/right side of the pedestrian body
 358 |             - cap,hood - headwear
 359 |             - umbrella,phone,baby,object - various things carried by the pedestrians
 360 |             - stroller/cart - objects being pushed by the pedestrian
 361 |             - bicycle/motorcycle - for pedestrians riding or walking these vehicles
 362 |         :return: A dictionary of appearance annotations
 363 |         """
 364 |         labels = ['pose_front', 'pose_back', 'pose_left', 'pose_right',
 365 |                   'clothes_below_knee', 'clothes_upper_light', 'clothes_upper_dark', 'clothes_lower_light',
 366 |                   'clothes_lower_dark', 'backpack', 'bag_hand', 'bag_elbow',
 367 |                   'bag_shoulder', 'bag_left_side', 'bag_right_side', 'cap',
 368 |                   'hood', 'sunglasses', 'umbrella', 'phone',
 369 |                   'baby', 'object', 'stroller_cart', 'bicycle_motorcycle']
 370 |         path_to_file = join(self._annotation_appearance_path , vid + '_appearance.xml')
 371 |         tree = ET.parse(path_to_file)
 372 |         annotations = {}
 373 |         ped_tracks = tree.findall("./track")
 374 |         for t in ped_tracks:
 375 |             boxes = t.findall('./box')
 376 |             new_id = t.get("id")
 377 |             annotations[new_id] = dict(zip(labels, [[] for _ in range(len(labels))]))
 378 |             annotations[new_id]['frames'] = []
 379 |             for b in boxes:
 380 |                 annotations[new_id]['frames'].append(int(b.get('frame')))
 381 |                 for l in labels:
 382 |                     annotations[new_id][l].append(b.get(l))
 383 |         return annotations
 384 | 
 385 |     def _get_traffic_attributes(self, vid):
 386 |         """
 387 |         Generates a dictinary of vehicle attributes by parsing the video XML file
 388 |         :param vid: The id of video to parse
 389 |         :return: A dictionary of vehicle attributes
 390 |         """
 391 |         path_to_file = join(self._annotation_traffic_path, vid + '_traffic.xml')
 392 |         tree = ET.parse(path_to_file)
 393 |         road_type = tree.find("./road_type").text
 394 |         traffic_attributes = {'road_type': self._map_text_to_scalar('road_type', road_type)}
 395 |         frames = tree.findall("./frame")
 396 |         for f in frames:
 397 |             traffic_attributes[int(f.get('id'))] = {'ped_crossing': f.get('ped_crossing'),
 398 |                                                     'ped_sign': f.get('ped_sign'),
 399 |                                                     'stop_sign': f.get('stop_sign'),
 400 |                                                     'traffic_light': self._map_text_to_scalar('traffic_light',
 401 |                                                                                              f.get('traffic_light'))}
 402 | 
 403 |         return traffic_attributes
 404 | 
 405 |     def _get_vehicle_attributes(self, vid):
 406 |         """
 407 |         Generates a dictinary of vehicle attributes by parsing the video XML file
 408 |         :param vid: The id of video to parse
 409 |         :return: A dictionary of vehicle attributes
 410 |         """
 411 |         path_to_file = join(self._annotation_vehicle_path, vid + '_vehicle.xml')
 412 |         tree = ET.parse(path_to_file)
 413 | 
 414 |         veh_attributes = {}
 415 |         frames = tree.findall("./frame")
 416 |         for f in frames:
 417 |             veh_attributes[int(f.get('id'))] = self._map_text_to_scalar('vehicle', f.get('action'))
 418 | 
 419 |         return veh_attributes
 420 | 
 421 |     def generate_database(self):
 422 |         """
 423 |         Generate a database of jaad dataset by integrating all annotations
 424 |         Dictionary structure:
 425 |         'vid_id'(str): {
 426 |             'num_frames': int
 427 |             'width': int
 428 |             'height': int
 429 |             'ped_annotations'(str): {
 430 |                 'ped_id'(str): {
 431 |                     'old_id': str
 432 |                     'frames: list(int)
 433 |                     'occlusion': list(int)
 434 |                     'bbox': list([x1, y1, x2, y2])
 435 |                     'behavior'(str): {
 436 |                         'action': list(int)
 437 |                         'reaction': list(int)
 438 |                         'nod': list(int)
 439 |                         'hand_gesture': list(int)
 440 |                         'cross': list(int)
 441 |                         'look': list(int)
 442 |                     'appearance'(str): {
 443 |                         'pose_front':list(int)
 444 |                         'pose_back':list(int)
 445 |                         'pose_left':list(int)
 446 |                         'pose_right':list(int)
 447 |                         'clothes_below_knee':list(int)
 448 |                         'clothes_upper_light':list(int)
 449 |                         'clothes_upper_dark':list(int)
 450 |                         'clothes_lower_light':list(int)
 451 |                         'clothes_lower_dark':list(int)
 452 |                         'backpack':list(int)
 453 |                         'bag_hand':list(int)
 454 |                         'bag_elbow':list(int)
 455 |                         'bag_shoulder':list(int)
 456 |                         'bag_left_side':list(int)
 457 |                         'bag_right_side':list(int)
 458 |                         'cap':list(int)
 459 |                         'hood':list(int)
 460 |                         'sunglasses':list(int)
 461 |                         'umbrella':list(int)
 462 |                         'phone':list(int)
 463 |                         'baby':list(int)
 464 |                         'object':list(int)
 465 |                         'stroller_cart':list(int)
 466 |                         'bicycle_motorcycle':list(int)
 467 |                     'attributes'(str): {
 468 |                          'age': int
 469 |                          'old_id': str
 470 |                          'num_lanes': int
 471 |                          'crossing': int
 472 |                          'gender': int
 473 |                          'crossing_point': int
 474 |                          'decision_point': int
 475 |                          'intersection': int
 476 |                          'designated': int
 477 |                          'signalized': int
 478 |                          'traffic_direction': int
 479 |                          'group_size': int
 480 |                          'motion_direction': int
 481 |             'vehicle_annotations'(str): {
 482 |                 frames(int):{
 483 |                     action: int
 484 |             'traffic_annotations'(str): {
 485 |                 road_type: int
 486 |                 frames(int):{
 487 |                     ped_crossing: int
 488 |                     ped_sign: int
 489 |                     stop_sign: int
 490 |                     traffic_light: int
 491 | 
 492 |         :return: A database dictionary
 493 |         """
 494 |         print('---------------------------------------------------------')
 495 |         print("Generating database for jaad")
 496 | 
 497 |         # Generates a list of behavioral xml file names for  videos
 498 |         cache_file = join(self.cache_path, 'jaad_database.pkl')
 499 |         if exists(cache_file) and not self._regen_pkl:
 500 |             with open(cache_file, 'rb') as fid:
 501 |                 try:
 502 |                     database = pickle.load(fid)
 503 |                 except:
 504 |                     database = pickle.load(fid, encoding='bytes')
 505 |             print('jaad database loaded from {}'.format(cache_file))
 506 |             return database
 507 | 
 508 |         video_ids = sorted(self._get_video_ids())
 509 |         database = {}
 510 |         for vid in video_ids:
 511 |             #print('Getting annotations for %s' % vid)
 512 |             vid_annotations = self._get_annotations(vid)
 513 |             vid_attributes = self._get_ped_attributes(vid)
 514 |             vid_appearance = self._get_ped_appearance(vid)
 515 |             vid_veh_annotations = self._get_vehicle_attributes(vid)
 516 |             vid_traffic_annotations = self._get_traffic_attributes(vid)
 517 | 
 518 |             # Combining all annotations
 519 |             vid_annotations['vehicle_annotations'] = vid_veh_annotations
 520 |             vid_annotations['traffic_annotations'] = vid_traffic_annotations
 521 |             for ped in vid_annotations['ped_annotations']:
 522 |                 try:
 523 |                     vid_annotations['ped_annotations'][ped]['attributes'] = vid_attributes[ped]
 524 |                 except KeyError:
 525 |                     vid_annotations['ped_annotations'][ped]['attributes'] = {}
 526 |                 try:
 527 |                     vid_annotations['ped_annotations'][ped]['appearance'] = vid_appearance[ped]
 528 |                 except KeyError:
 529 |                     vid_annotations['ped_annotations'][ped]['appearance'] = {}
 530 | 
 531 |             database[vid] = vid_annotations
 532 | 
 533 |         with open(cache_file, 'wb') as fid:
 534 |             pickle.dump(database, fid, pickle.HIGHEST_PROTOCOL)
 535 |         print('The database is written to {}'.format(cache_file))
 536 | 
 537 |         return database
 538 | 
 539 |     def get_data_stats(self):
 540 |         """
 541 |         Generates statistics for jaad dataset
 542 |         """
 543 |         annotations = self.generate_database()
 544 | 
 545 |         videos_count = len(annotations.keys())
 546 |         ped_box_beh_count = 0
 547 |         ped_beh_count = 0
 548 |         ped_count = 0
 549 |         ped_box_count = 0
 550 |         people_count = 0
 551 |         people_box_count = 0
 552 |         total_frames = 0
 553 | 
 554 |         for vid in annotations:
 555 |             total_frames += annotations[vid]['num_frames']
 556 |             for ped in annotations[vid]['ped_annotations']:
 557 | 
 558 |                 if 'b' in ped:
 559 |                     ped_beh_count += 1
 560 |                     ped_box_beh_count += len(annotations[vid]['ped_annotations'][ped]['bbox'])
 561 |                 elif 'p' in ped:
 562 |                     people_count += 1
 563 |                     people_box_count += len(annotations[vid]['ped_annotations'][ped]['bbox'])
 564 |                 else:
 565 |                     ped_count += 1
 566 |                     ped_box_count += len(annotations[vid]['ped_annotations'][ped]['bbox'])
 567 | 
 568 |         print('---------------------------------------------------------')
 569 |         print("Number of videos: %d" % videos_count)
 570 |         print("Number of frames: %d" % total_frames)
 571 |         print("Number of pedestrians with behavior tag: %d" % ped_beh_count)
 572 |         print("Number of pedestrians with no behavior tag: %d" % ped_count)
 573 |         print("Number of people: %d" % people_count)
 574 |         print("Total number of pedestrians: %d" % (ped_count + ped_beh_count + people_count))
 575 | 
 576 |         print("Number of pedestrian bounding boxes with behavior tag: %d" % ped_box_beh_count)
 577 |         print("Number of pedestrian bounding boxes with no behavior tag: %d" % ped_box_count)
 578 |         print("Number of people bounding boxes: %d" % people_box_count)
 579 |         print("Total number of pedestrian bounding boxes: %d" % (ped_box_beh_count + ped_box_count))
 580 | 
 581 |     def balance_samples_count(self, seq_data, label_type, random_seed=42):
 582 |         """
 583 |         Balances the number of positive and negative samples by randomly sampling
 584 |         from the more represented samples. Only works for binary classes.
 585 |         :param seq_data: The sequence data to be balanced.
 586 |         :param label_type: The lable type based on which the balancing takes place.
 587 |         The label values must be binary, i.e. only 0, 1.
 588 |         :param random_seed: The seed for random number generator.
 589 |         :return: Balanced data sequence.
 590 |         """
 591 |         for lbl in seq_data[label_type]:
 592 |             for i in lbl:
 593 |                 if i[0] not in [0, 1]:
 594 |                     raise Exception("The label values used for balancing must be"
 595 |                                     " either 0 or 1")
 596 | 
 597 |         # balances the number of positive and negative samples
 598 |         print('---------------------------------------------------------')
 599 |         print("Balancing the number of positive and negative intention samples")
 600 | 
 601 |         gt_labels = [gt[0] for gt in seq_data[label_type]]
 602 |         num_pos_samples = np.count_nonzero(np.array(gt_labels))
 603 |         num_neg_samples = len(gt_labels) - num_pos_samples
 604 | 
 605 |         new_seq_data = {}
 606 |         # finds the indices of the samples with larger quantity
 607 |         if num_neg_samples == num_pos_samples:
 608 |             print('Positive and negative samples are already balanced')
 609 |             return seq_data
 610 |         else:
 611 |             print('Unbalanced: \t Positive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples))
 612 |             if num_neg_samples > num_pos_samples:
 613 |                 rm_index = np.where(np.array(gt_labels) == 0)[0]
 614 |             else:
 615 |                 rm_index = np.where(np.array(gt_labels) == 1)[0]
 616 | 
 617 |             # Calculate the difference of sample counts
 618 |             dif_samples = abs(num_neg_samples - num_pos_samples)
 619 |             # shuffle the indices
 620 |             np.random.seed(random_seed)
 621 |             np.random.shuffle(rm_index)
 622 |             # reduce the number of indices to the difference
 623 |             rm_index = rm_index[0:dif_samples]
 624 |             # update the data
 625 |             for k in seq_data:
 626 |                 seq_data_k = seq_data[k]
 627 |                 if not isinstance(seq_data[k], list):
 628 |                     new_seq_data[k] = seq_data[k]
 629 |                 else:
 630 |                     new_seq_data[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index]
 631 | 
 632 |             new_gt_labels = [gt[0] for gt in new_seq_data[label_type]]
 633 |             num_pos_samples = np.count_nonzero(np.array(new_gt_labels))
 634 |             print('Balanced:\t Positive: %d  \t Negative: %d\n'
 635 |                   % (num_pos_samples, len(new_seq_data[label_type]) - num_pos_samples))
 636 |         return new_seq_data
 637 | 
 638 |     # Pedestrian id helpers
 639 |     def _get_pedestrian_ids(self, sample_type='all'):
 640 |         """
 641 |         Get all pedestrian ids
 642 |         :return: A list of pedestrian ids
 643 |         """
 644 |         annotations = self.generate_database()
 645 |         pids = []
 646 |         for vid in sorted(annotations):
 647 |             if sample_type == 'beh':
 648 |                 pids.extend([p for p in annotations[vid]['ped_annotations'].keys() if 'b' in p])
 649 |             else:
 650 |                 pids.extend(annotations[vid]['ped_annotations'].keys())
 651 |         return pids
 652 | 
 653 |     def _get_random_pedestrian_ids(self, image_set, ratios=None, val_data=True, regen_data=False, sample_type='all'):
 654 |         """
 655 |         Generates and save a database of activities for all pedestriasns
 656 |         :param image_set: The data split to return
 657 |         :param ratios: The ratios to split the data. There should be 2 ratios (or 3 if val_data is true)
 658 |         and they should sum to 1. e.g. [0.4, 0.6], [0.3, 0.5, 0.2]
 659 |         :param val_data: Whether to generate validation data
 660 |         :param regen_data: Whether to overwrite the existing data
 661 |         :return: The random sample split
 662 |         """
 663 | 
 664 |         assert image_set in ['train', 'test', 'val']
 665 |         cache_file = join(self.cache_path, "random_samples.pkl")
 666 |         if exists(cache_file) and not regen_data:
 667 |             print("Random sample currently exists.\n Loading from %s" % cache_file)
 668 |             with open(cache_file, 'rb') as fid:
 669 |                 try:
 670 |                     rand_samples = pickle.load(fid)
 671 |                 except:
 672 |                     rand_samples = pickle.load(fid, encoding='bytes')
 673 |                 assert image_set in rand_samples, "%s does not exist in random samples\n" \
 674 |                                                   "Please try again by setting regen_data = True" % image_set
 675 |                 if val_data:
 676 |                     assert len(rand_samples['ratios']) == 3, "The existing random samples " \
 677 |                                                              "does not have validation data.\n" \
 678 |                                                              "Please try again by setting regen_data = True"
 679 |                 if ratios is not None:
 680 |                     assert ratios == rand_samples['ratios'], "Specified ratios {} does not match the ones in existing file {}.\n\
 681 |                                                               Perform one of the following options:\
 682 |                                                               1- Set ratios to None\
 683 |                                                               2- Set ratios to the same values \
 684 |                                                               3- Regenerate data".format(ratios, rand_samples['ratios'])
 685 | 
 686 |                 print('The ratios are {}'.format(rand_samples['ratios']))
 687 |                 print("Number of %s tracks %d" % (image_set, len(rand_samples[image_set])))
 688 |                 return rand_samples[image_set]
 689 | 
 690 |         if ratios is None:
 691 |             if val_data:
 692 |                 ratios = [0.5, 0.4, 0.1]
 693 |             else:
 694 |                 ratios = [0.5, 0.5]
 695 | 
 696 |         assert sum(ratios) > 0.999999, "Ratios {} do not sum to 1".format(ratios)
 697 |         if val_data:
 698 |             assert len(ratios) == 3, "To generate validation data three ratios should be selected"
 699 |         else:
 700 |             assert len(ratios) == 2, "With no validation only two ratios should be selected"
 701 | 
 702 |         print("################ Generating Random training/testing data ################")
 703 |         ped_ids = self._get_pedestrian_ids(sample_type)
 704 |         print("Total number of tracks %d" % len(ped_ids))
 705 |         print('The ratios are {}'.format(ratios))
 706 |         sample_split = {'ratios': ratios}
 707 |         train_samples, test_samples = train_test_split(ped_ids, train_size=ratios[0])
 708 |         print("Number of train tracks %d" % len(train_samples))
 709 | 
 710 |         if val_data:
 711 |             test_samples, val_samples = train_test_split(test_samples, train_size=ratios[1] / sum(ratios[1:]))
 712 |             print("Number of val tracks %d" % len(val_samples))
 713 |             sample_split['val'] = val_samples
 714 | 
 715 |         print("Number of test tracks %d" % len(test_samples))
 716 |         sample_split['train'] = train_samples
 717 |         sample_split['test'] = test_samples
 718 | 
 719 |         cache_file = join(self.cache_path, "random_samples.pkl")
 720 |         with open(cache_file, 'wb') as fid:
 721 |             pickle.dump(sample_split, fid, pickle.HIGHEST_PROTOCOL)
 722 |             print('jaad {} samples written to {}'.format('random', cache_file))
 723 |         return sample_split[image_set]
 724 | 
 725 |     def _get_kfold_pedestrian_ids(self, image_set, num_folds=5, fold=1, sample_type='all'):
 726 |         """
 727 |         Generate kfold pedestrian ids
 728 |         :param image_set: Image set split
 729 |         :param num_folds: Number of folds
 730 |         :param fold: The given fold
 731 |         :return: List of pedestrian ids for the given fold
 732 |         """
 733 |         assert image_set in ['train', 'test'], "For K-fold data split, image-set should be either \"train\" or \"test\""
 734 |         assert fold <= num_folds, "Fold number should be smaller than number of folds"
 735 |         print("################ Generating %d fold data ################" % num_folds)
 736 |         cache_file = join(self.cache_path, "%d_fold_samples.pkl" % num_folds)
 737 | 
 738 |         if exists(cache_file):
 739 |             print("Loading %d-fold data from %s" % (num_folds, cache_file))
 740 |             with open(cache_file, 'rb') as fid:
 741 |                 try:
 742 |                     fold_idx = pickle.load(fid)
 743 |                 except:
 744 |                     fold_idx = pickle.load(fid, encoding='bytes')
 745 |         else:
 746 |             ped_ids = self._get_pedestrian_ids(sample_type)
 747 |             kf = KFold(n_splits=num_folds, shuffle=True)
 748 |             fold_idx = {'pid': ped_ids}
 749 |             count = 1
 750 |             for train_index, test_index in kf.split(ped_ids):
 751 |                 fold_idx[count] = {'train': train_index.tolist(), 'test': test_index.tolist()}
 752 |                 count += 1
 753 |             with open(cache_file, 'wb') as fid:
 754 |                 pickle.dump(fold_idx, fid, pickle.HIGHEST_PROTOCOL)
 755 |                 print('jaad {}-fold samples written to {}'.format(num_folds, cache_file))
 756 |         print("Number of %s tracks %d" % (image_set, len(fold_idx[fold][image_set])))
 757 |         kfold_ids = [fold_idx['pid'][i] for i in range(len(fold_idx['pid'])) if i in fold_idx[fold][image_set]]
 758 |         return kfold_ids
 759 | 
 760 |     # Pedestrian detection generators
 761 |     def get_detection_data(self, image_set, method, occlusion_type=None, file_path='data/', **params):
 762 |         """
 763 |         Generates data for pedestrian detection algorithms
 764 |         :param image_set: Split set name
 765 |         :param method: Detection algorithm: frcnn, retinanet, yolo3, ssd
 766 |         :param occlusion_type: The types of occlusion: None: only unoccluded samples
 767 |                                                        part: Unoccluded and partially occluded samples
 768 |                                                        full: All samples
 769 |         :param file_path: Where to save the script file
 770 |         :return: Pedestrian samples
 771 |         """
 772 |         squarify_ratio = params['squarify_ratio']
 773 |         frame_stride = params['fstride']
 774 |         height_rng = params['height_rng']
 775 |         if not exists(file_path):
 776 |             makedirs(file_path)
 777 |         if height_rng is None:
 778 |             height_rng = [0, float('inf')]
 779 | 
 780 |         annotations = self.generate_database()
 781 |         video_ids, _pids = self._get_data_ids(image_set, params)
 782 | 
 783 |         ped_samples = {}
 784 |         unique_samples = []
 785 |         total_sample_count = 0
 786 |         for vid in video_ids:
 787 |             img_width = annotations[vid]['width']
 788 |             img_height = annotations[vid]['height']
 789 |             num_frames = annotations[vid]['num_frames']
 790 |             for i in range(0,num_frames,frame_stride):
 791 |                 ped_samples[join(self._jaad_path, 'images', vid, '{:05d}.png'.format(i))] = []
 792 |             for pid in annotations[vid]['ped_annotations']:
 793 |                 if params['data_split_type'] != 'default' and pid not in _pids:
 794 |                     continue
 795 |                 difficult =  0
 796 |                 if 'p' in pid:
 797 |                     difficult = -1
 798 |                     if image_set in ['train', 'val']:
 799 |                         continue
 800 |                 imgs = [join(self._jaad_path, 'images', vid, '{:05d}.png'.format(f)) for f in \
 801 |                         annotations[vid]['ped_annotations'][pid]['frames']]
 802 |                 boxes = annotations[vid]['ped_annotations'][pid]['bbox']
 803 |                 occlusion = annotations[vid]['ped_annotations'][pid]['occlusion']
 804 |                 for i, b in enumerate(boxes):
 805 |                     if imgs[i] not in ped_samples:
 806 |                         continue
 807 |                     bbox_height = abs(b[0] - b[2])
 808 |                     if height_rng[0] <= bbox_height <= height_rng[1]:
 809 |                         if (occlusion_type == None and occlusion[i] == 0) or \
 810 |                                 (occlusion_type == 'part' and occlusion[i] < 2) or \
 811 |                                 (occlusion_type == 'full'):
 812 |                             if squarify_ratio:
 813 |                                 b = self._squarify(b, squarify_ratio, img_width)
 814 |                             ped_samples[imgs[i]].append(
 815 |                                                 {'width': img_width,
 816 |                                                 'height': img_height,
 817 |                                                 'tag': pid,
 818 |                                                 'box': b,
 819 |                                                 'seg_area': (b[2] - b[0] + 1) * (b[3] - b[1] + 1),
 820 |                                                 'occlusion': occlusion[i],
 821 |                                                 'difficult': difficult})
 822 |                             if pid not in unique_samples:
 823 |                                 unique_samples.append(pid)
 824 |                             total_sample_count += 1
 825 |         print('Number of unique pedestrians %d ' % len(unique_samples))
 826 |         print('Number of samples %d ' % total_sample_count)
 827 |         if method == 'frcnn':
 828 |             return self._get_data_frcnn(ped_samples)
 829 |         elif method == 'retinanet':
 830 |             return self._generate_csv_data_retinanet(image_set, file_path, ped_samples)
 831 |         elif method == 'yolo3':
 832 |             return self._generate_csv_data_yolo3(image_set, file_path, ped_samples)
 833 |         elif method == 'ssd':
 834 |             return self._generate_csv_data_ssd(image_set, file_path, ped_samples)
 835 | 
 836 |     def _get_data_frcnn(self, ped_samples):
 837 |         """
 838 |         Data generation for Faster-rcnn algorithm
 839 |         :param ped_samples: Dictionary of all samples
 840 |         """
 841 |         classes_count = {}
 842 |         class_mapping = {}
 843 |         all_imgs = {}
 844 |         class_name = 'pedestrian'
 845 |         classes_count['bg'] = 0
 846 |         class_mapping['bg'] = 1
 847 |         classes_count[class_name] = 0
 848 |         class_mapping[class_name] = 0
 849 | 
 850 |         for img, samples in sorted(ped_samples.items()):
 851 |             if not samples:
 852 |                 continue
 853 |             all_imgs[img] = {'filepath': img, 'width': samples[0]['width'],
 854 |                              'height': samples[0]['height'], 'bboxes': []}
 855 |             for s in samples:
 856 |                 box = s['box']
 857 |                 all_imgs[img]['bboxes'].append({'class': class_name, 'x1': box[0],
 858 |                                                 'x2': box[2], 'y1': box[1], 'y2': box[3]})
 859 |         print('Data generated for Faster-rcnn')
 860 |         all_data = []
 861 |         for key in all_imgs:
 862 |             all_data.append(all_imgs[key])
 863 |         return all_data, classes_count, class_mapping
 864 | 
 865 |     def _generate_csv_data_retinanet(self, image_set, file_path, ped_samples):
 866 |         """
 867 |         Data generation for Retinanet algorithm
 868 |         :param image_set: Data split
 869 |         :param file_path: Path to save the data
 870 |         :param ped_samples: Dictionary of all samples
 871 |         """
 872 |         class_name = 'pedestrian'
 873 |         data_save_path = file_path + 'retinanet_' + image_set + '.csv'
 874 |         with open(data_save_path, "wt") as f:
 875 |             for img, samples in sorted(ped_samples.items()):
 876 |                 if not samples:
 877 |                     f.write('%s,,,,,\n' % (img))
 878 |                 for s in samples:
 879 |                     box = s['box']
 880 |                     f.write('%s,%.0f,%.0f,%.0f,%.0f,%s\n' % (img, box[0], box[1], box[2], box[3], class_name))
 881 |             print('Data generated for Retinanet')
 882 | 
 883 |             map_path = file_path + '_mapping.csv'
 884 |             with open(map_path, "w") as f:
 885 |                 f.write('%s,0\n' % (class_name))
 886 |         return data_save_path, map_path
 887 | 
 888 |     def _generate_csv_data_yolo3(self, image_set, file_path, ped_samples):
 889 |         """
 890 |         Data generation for YOLO3 algorithm
 891 |         :param image_set: Data split
 892 |         :param file_path: Path to save the data
 893 |         :param ped_samples: Dictionary of all samples
 894 |         """
 895 |         class_name = 'pedestrian'
 896 |         all_imgs = {}
 897 |         data_save_path = file_path + 'yolo3_' + image_set + '.txt'
 898 |         with open(data_save_path, "wt") as f:
 899 |             for img, samples in sorted(ped_samples.items()):
 900 |                 if not samples:
 901 |                     continue
 902 |                 f.write('%s ' % (img))
 903 |                 for s in samples:
 904 |                     box = s['box']
 905 |                     f.write('%.0f,%.0f,%.0f,%.0f,%.0f ' % (box[0], box[1], box[2], box[3], 0))
 906 |                 f.write('\n')
 907 |             print('Data generated for YOLO3')
 908 |         map_path = file_path + 'mapping_yolo3'
 909 |         with open(map_path, "wt") as f:
 910 |             f.write('%s,0\n' % (class_name))
 911 |         return data_save_path, map_path
 912 | 
 913 |     def _generate_csv_data_ssd(self, image_set, file_path, ped_samples):
 914 |         """
 915 |         Data generation for SSD algorithm
 916 |         :param image_set: Data split
 917 |         :param file_path: Path to save the data
 918 |         :param ped_samples: Dictionary of all samples
 919 |         """
 920 |         data_save_path = file_path + 'ssd_' + image_set + '.csv'
 921 |         with open(data_save_path, "wt") as f:
 922 |             for img, samples in sorted(ped_samples.items()):
 923 |                 if not samples:
 924 |                     continue
 925 |                 for s in samples:
 926 |                     box = s['box']
 927 |                     f.write('%s,%.0f,%.0f,%.0f,%.0f,%s\n' % (img, box[0], box[1], box[2], box[3], 1))
 928 |             print('Data generated for SSD')
 929 |         return data_save_path
 930 | 
 931 |     # Trajectory data generation
 932 |     def _get_data_ids(self, image_set, params):
 933 |         """
 934 |         A helper function to generate set id and ped ids (if needed) for processing
 935 |         :param image_set: Image-set to generate data
 936 |         :param params: Data generation params
 937 |         :return: Set and pedestrian ids
 938 |         """
 939 |         _pids = None
 940 | 
 941 |         if params['data_split_type'] == 'default':
 942 |             return self._get_video_ids_split(image_set, params['subset']), _pids
 943 | 
 944 |         video_ids = self._get_video_ids_split('all', params['subset'])
 945 |         if params['data_split_type'] == 'random':
 946 |             params['random_params']['sample_type'] = params['sample_type']
 947 |             _pids = self._get_random_pedestrian_ids(image_set, **params['random_params'])
 948 |         elif params['data_split_type'] == 'kfold':
 949 |             params['kfold_params']['sample_type'] = params['sample_type']
 950 |             _pids = self._get_kfold_pedestrian_ids(image_set, **params['kfold_params'])
 951 | 
 952 |         return video_ids, _pids
 953 | 
 954 |     def _height_check(self, height_rng, frame_ids, boxes, images, occlusion):
 955 |         """
 956 |         Checks whether the bounding boxes are within a given height limit. If not, it
 957 |         will adjust the length of data sequences accordingly
 958 |         :param height_rng: Height limit [lower, higher]
 959 |         :param frame_ids: List of frame ids
 960 |         :param boxes: List of bounding boxes
 961 |         :param images: List of images
 962 |         :param occlusion: List of occlusions
 963 |         :return: The adjusted data sequences
 964 |         """
 965 |         imgs, box, frames, occ = [], [], [], []
 966 |         for i, b in enumerate(boxes):
 967 |             bbox_height = abs(b[0] - b[2])
 968 |             if height_rng[0] <= bbox_height <= height_rng[1]:
 969 |                 box.append(b)
 970 |                 imgs.append(images[i])
 971 |                 frames.append(frame_ids[i])
 972 |                 occ.append(occlusion[i])
 973 |         return imgs, box, frames, occ
 974 | 
 975 |     def _get_center(self, box):
 976 |         """
 977 |         Calculates the center coordinate of a bounding box
 978 |         :param box: Bounding box coordinates
 979 |         :return: The center coordinate
 980 |         """
 981 |         return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]
 982 | 
 983 |     def generate_data_trajectory_sequence(self, image_set, **opts):
 984 |         """
 985 |         Generates pedestrian tracks
 986 |         :param image_set: the split set to produce for. Options are train, test, val.
 987 |         :param opts:
 988 |                 'fstride': Frequency of sampling from the data.
 989 |                 'sample_type': Whether to use 'all' pedestrian annotations or the ones
 990 |                                     with 'beh'avior only.
 991 |                 'subset': The subset of data annotations to use. Options are: 'default': Includes high resolution and
 992 |                                                                                          high visibility videos
 993 |                                                                            'high_visibility': Only videos with high
 994 |                                                                                              visibility (include low
 995 |                                                                                               resolution videos)
 996 |                                                                            'all': Uses all videos
 997 |                 'height_rng': The height range of pedestrians to use.
 998 |                 'squarify_ratio': The width/height ratio of bounding boxes. A value between (0,1]. 0 the original
 999 |                                         ratio is used.
1000 |                 'data_split_type': How to split the data. Options: 'default', predefined sets, 'random', randomly split the data,
1001 |                                         and 'kfold', k-fold data split (NOTE: only train/test splits).
1002 |                 'seq_type': Sequence type to generate. Options: 'trajectory', generates tracks, 'crossing', generates
1003 |                                   tracks up to 'crossing_point', 'intention' generates tracks similar to human experiments
1004 |                 'min_track_size': Min track length allowable.
1005 |                 'random_params: Parameters for random data split generation. (see _get_random_pedestrian_ids)
1006 |                 'kfold_params: Parameters for kfold split generation. (see _get_kfold_pedestrian_ids)
1007 |         :return: Sequence data
1008 |         """
1009 |         params = {'fstride': 1,
1010 |                   'sample_type': 'all',  # 'beh'
1011 |                   'subset': 'default',
1012 |                   'height_rng': [0, float('inf')],
1013 |                   'squarify_ratio': 0,
1014 |                   'data_split_type': 'default',  # kfold, random, default
1015 |                   'seq_type': 'intention',
1016 |                   'min_track_size': 15,
1017 |                   'random_params': {'ratios': None,
1018 |                                     'val_data': True,
1019 |                                     'regen_data': False},
1020 |                   'kfold_params': {'num_folds': 5, 'fold': 1}}
1021 |         assert all(k in params for k in opts.keys()), "Wrong option(s)."\
1022 |         "Choose one of the following: {}".format(list(params.keys()))
1023 |         params.update(opts)
1024 | 
1025 |         print('---------------------------------------------------------')
1026 |         print("Generating action sequence data")
1027 |         self._print_dict(params)
1028 | 
1029 |         annot_database = self.generate_database()
1030 |         if params['seq_type'] == 'trajectory':
1031 |             sequence = self._get_trajectories(image_set, annot_database, **params)
1032 |         elif params['seq_type'] == 'crossing':
1033 |             sequence = self._get_crossing(image_set, annot_database, **params)
1034 |         elif params['seq_type'] == 'intention':
1035 |             sequence = self._get_intention(image_set, annot_database, **params)
1036 | 
1037 |         return sequence
1038 | 
1039 |     def _get_trajectories(self, image_set, annotations, **params):
1040 |         """
1041 |         Generates trajectory data.
1042 |         :param params: Parameters for generating trajectories
1043 |         :param annotations: The annotations database
1044 |         :return: A dictionary of trajectories
1045 |         """
1046 | 
1047 |         print('---------------------------------------------------------')
1048 |         print("Generating trajectory data")
1049 | 
1050 |         num_pedestrians = 0
1051 |         seq_stride = params['fstride']
1052 |         sq_ratio = params['squarify_ratio']
1053 |         height_rng = params['height_rng']
1054 | 
1055 |         image_seq, pids_seq = [], []
1056 |         box_seq, center_seq, occ_seq = [], [], []
1057 |         intent_seq = []
1058 |         vehicle_seq = []
1059 |         resolution_seq = []
1060 |         video_ids, _pids = self._get_data_ids(image_set, params)
1061 | 
1062 |         for vid in sorted(video_ids):
1063 |             img_width = annotations[vid]['width']
1064 |             img_height = annotations[vid]['height']
1065 |             pid_annots = annotations[vid]['ped_annotations']
1066 |             vid_annots = annotations[vid]['vehicle_annotations']
1067 | 
1068 |             for pid in sorted(annotations[vid]['ped_annotations']):
1069 |                 if params['data_split_type'] != 'default' and pid not in _pids:
1070 |                     continue
1071 |                 if 'p' in pid:
1072 |                     continue
1073 |                 if params['sample_type'] == 'beh' and 'b' not in pid:
1074 |                     continue
1075 |                 num_pedestrians += 1
1076 |                 frame_ids = pid_annots[pid]['frames']
1077 |                 images = [join(self._jaad_path, 'images', vid, '{:05d}.png'.format(f)) for f in
1078 |                           pid_annots[pid]['frames']]
1079 |                 boxes = pid_annots[pid]['bbox']
1080 |                 occlusions = pid_annots[pid]['occlusion']
1081 | 
1082 |                 if height_rng[0] > 0 or height_rng[1] < float('inf'):
1083 |                     images, boxes, frame_ids, occlusions = self._height_check(height_rng,
1084 |                                                                               frame_ids, boxes,
1085 |                                                                               images, occlusions)
1086 | 
1087 |                 if len(boxes) / seq_stride < params['min_track_size']:
1088 |                     continue
1089 | 
1090 |                 if sq_ratio:
1091 |                     boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
1092 | 
1093 |                 ped_ids = [[pid]] * len(boxes)
1094 | 
1095 |                 if params['sample_type'] == 'all':
1096 |                     intent = [[0]] * len(boxes)
1097 |                 else:
1098 |                     if annotations[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1:
1099 |                         intent = [[0]] * len(boxes)
1100 |                     else:
1101 |                         intent = [[1]] * len(boxes)
1102 |                 center = [self._get_center(b) for b in boxes]
1103 | 
1104 |                 occ_seq.append(occlusions[::seq_stride])
1105 |                 image_seq.append(images[::seq_stride])
1106 |                 box_seq.append(boxes[::seq_stride])
1107 |                 center_seq.append(center[::seq_stride])
1108 |                 intent_seq.append(intent[::seq_stride])
1109 |                 pids_seq.append(ped_ids[::seq_stride])
1110 |                 vehicle_seq.append([[vid_annots[i]]
1111 |                                     for i in frame_ids][::seq_stride])
1112 |                 resolutions = [[img_width, img_height]] * len(boxes)
1113 |                 resolution_seq.append(resolutions[::seq_stride])
1114 | 
1115 |         print('Split: %s' % image_set)
1116 |         print('Number of pedestrians: %d ' % num_pedestrians)
1117 |         print('Total number of used pedestrians: %d ' % len(image_seq))
1118 | 
1119 |         return {'image': image_seq,
1120 |                 'resolution': resolution_seq,
1121 |                 'pid': pids_seq,
1122 |                 'bbox': box_seq,
1123 |                 'center': center_seq,
1124 |                 'occlusion': occ_seq,
1125 |                 'intent': intent_seq,
1126 |                 'vehicle_act': vehicle_seq}
1127 | 
1128 |     def _get_crossing(self, image_set, annotations, **params):
1129 |         """
1130 |         Generates crossing data.
1131 |         :param image_set: Data split to use
1132 |         :param annotations: Annotations database
1133 |         :param params: Parameters to generate data (see generade_database)
1134 |         :return: A dictionary of trajectories
1135 |         """
1136 | 
1137 |         print('---------------------------------------------------------')
1138 |         print("Generating crossing data")
1139 | 
1140 |         num_pedestrians = 0
1141 |         seq_stride = params['fstride']
1142 |         sq_ratio = params['squarify_ratio']
1143 |         height_rng = params['height_rng']
1144 |         image_seq, pids_seq = [], []
1145 |         box_seq, center_seq, occ_seq = [], [], []
1146 |         intent_seq = []
1147 |         vehicle_seq = []
1148 |         activities = []
1149 | 
1150 |         video_ids, _pids = self._get_data_ids(image_set, params)
1151 | 
1152 |         for vid in sorted(video_ids):
1153 |             img_width = annotations[vid]['width']
1154 |             img_height = annotations[vid]['height']
1155 |             pid_annots = annotations[vid]['ped_annotations']
1156 |             vid_annots = annotations[vid]['vehicle_annotations']
1157 |             for pid in sorted(pid_annots):
1158 |                 if params['data_split_type'] != 'default' and pid not in _pids:
1159 |                     continue
1160 |                 if 'p' in pid:
1161 |                     continue
1162 |                 if params['sample_type'] == 'beh' and 'b' not in pid:
1163 |                     continue
1164 |                 num_pedestrians += 1
1165 | 
1166 |                 frame_ids = pid_annots[pid]['frames']
1167 | 
1168 |                 if 'b' in pid:
1169 |                     event_frame = pid_annots[pid]['attributes']['crossing_point']
1170 |                 else:
1171 |                     event_frame = -1
1172 | 
1173 |                 if event_frame == -1:
1174 |                     end_idx = -3
1175 |                 else:
1176 |                    end_idx = frame_ids.index(event_frame)
1177 |                 boxes = pid_annots[pid]['bbox'][:end_idx + 1]
1178 |                 frame_ids = frame_ids[: end_idx + 1]
1179 |                 images = [self._get_image_path(vid, f) for f in frame_ids]
1180 |                 occlusions = pid_annots[pid]['occlusion'][:end_idx + 1]
1181 | 
1182 |                 if height_rng[0] > 0 or height_rng[1] < float('inf'):
1183 |                     images, boxes, frame_ids, occlusions = self._height_check(height_rng,
1184 |                                                                               frame_ids, boxes,
1185 |                                                                               images, occlusions)
1186 | 
1187 |                 if len(boxes) / seq_stride < params['min_track_size']:
1188 |                     continue
1189 | 
1190 |                 if sq_ratio:
1191 |                     boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
1192 | 
1193 |                 image_seq.append(images[::seq_stride])
1194 |                 box_seq.append(boxes[::seq_stride])
1195 |                 center_seq.append([self._get_center(b) for b in boxes][::seq_stride])
1196 |                 occ_seq.append(occlusions[::seq_stride])
1197 | 
1198 |                 ped_ids = [[pid]] * len(boxes)
1199 |                 pids_seq.append(ped_ids[::seq_stride])
1200 | 
1201 |                 if 'b' not in pid:
1202 |                     intent = [[0]] * len(boxes)
1203 |                     acts = [[0]] * len(boxes)
1204 |                 else:
1205 |                     if annotations[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1:
1206 |                         intent = [[0]] * len(boxes)
1207 |                     else:
1208 |                         intent = [[1]] * len(boxes)
1209 |                     acts = [[int(pid_annots[pid]['attributes']['crossing'] > 0)]] * len(boxes)
1210 | 
1211 |                 intent_seq.append(intent[::seq_stride])
1212 |                 activities.append(acts[::seq_stride])
1213 |                 vehicle_seq.append([[vid_annots[i]]
1214 |                                     for i in frame_ids][::seq_stride])
1215 | 
1216 |         print('Split: %s' % image_set)
1217 |         print('Number of pedestrians: %d ' % num_pedestrians)
1218 |         print('Total number of samples: %d ' % len(image_seq))
1219 | 
1220 |         return {'image': image_seq,
1221 |                 'pid': pids_seq,
1222 |                 'bbox': box_seq,
1223 |                 'center': center_seq,
1224 |                 'occlusion': occ_seq,
1225 |                 'vehicle_act': vehicle_seq,
1226 |                 'intent': intent_seq,
1227 |                 'activities': activities,
1228 |                 'image_dimension': (img_width, img_height)}
1229 | 
1230 |     def _get_intention(self, image_set, annotations, **params):
1231 |         """
1232 |         Generates intention data.
1233 |         :param image_set: Data split to use
1234 |         :param annotations: Annotations database
1235 |         :param params: Parameters to generate data (see generade_database)
1236 |         :return: A dictionary of trajectories
1237 |         """
1238 |         print('---------------------------------------------------------')
1239 |         print("Generating intention data")
1240 | 
1241 |         num_pedestrians = 0
1242 |         seq_stride = params['fstride']
1243 |         sq_ratio = params['squarify_ratio']
1244 |         height_rng = params['height_rng']
1245 |         image_seq, pids_seq = [], []
1246 |         box_seq, center_seq, occ_seq = [], [], []
1247 |         intent_seq = []
1248 |         video_ids, _pids = self._get_data_ids(image_set, params)
1249 | 
1250 |         for vid in sorted(video_ids):
1251 |             img_width = annotations[vid]['width']
1252 |             pid_annots = annotations[vid]['ped_annotations']
1253 |             for pid in sorted(pid_annots):
1254 |                 if params['data_split_type'] != 'default' and pid not in _pids:
1255 |                     continue
1256 |                 if 'p' in pid:
1257 |                     continue
1258 |                 if params['sample_type'] == 'beh' and 'b' not in pid:
1259 |                     continue
1260 |                 num_pedestrians += 1
1261 |                 frame_ids = pid_annots[pid]['frames']
1262 | 
1263 |                 if params['sample_type'] == 'beh':
1264 |                     event_frame = pid_annots[pid]['attributes']['decision_point']
1265 |                 else:
1266 |                     event_frame = -1
1267 | 
1268 |                 if event_frame == -1:
1269 |                     end_idx = -3
1270 |                 else:
1271 |                     end_idx = frame_ids.index(event_frame)
1272 | 
1273 |                 boxes = pid_annots[pid]['bbox'][:end_idx + 1]
1274 |                 frame_ids = frame_ids[: end_idx + 1]
1275 |                 images = [self._get_image_path(vid, f) for f in frame_ids]
1276 |                 occlusions = pid_annots[pid]['occlusion'][:end_idx + 1]
1277 | 
1278 |                 if height_rng[0] > 0 or height_rng[1] < float('inf'):
1279 |                     images, boxes, frame_ids, occlusions = self._height_check(height_rng,
1280 |                                                                               frame_ids, boxes,
1281 |                                                                               images, occlusions)
1282 |                 if len(boxes) / seq_stride < params['min_track_size']:
1283 |                     continue
1284 | 
1285 |                 if sq_ratio:
1286 |                     boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
1287 | 
1288 |                 center_seq.append([self._get_center(b) for b in boxes][::seq_stride])
1289 |                 image_seq.append(images[::seq_stride])
1290 |                 box_seq.append(boxes[::seq_stride])
1291 |                 occ_seq.append(occlusions[::seq_stride])
1292 |                 ped_ids = [[pid]] * len(boxes)
1293 |                 pids_seq.append(ped_ids[::seq_stride])
1294 | 
1295 |                 if params['sample_type'] == 'all':
1296 |                     intent = [[0]] * len(boxes)
1297 |                 else:
1298 |                     if annotations[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1:
1299 |                         intent = [[0]] * len(boxes)
1300 |                     else:
1301 |                         intent = [[1]] * len(boxes)
1302 |                 intent_seq.append(intent[::seq_stride])
1303 | 
1304 |         print('Split: %s' % image_set)
1305 |         print('Number of pedestrians: %d ' % num_pedestrians)
1306 |         print('Total number of samples: %d ' % len(image_seq))
1307 | 
1308 |         return {'image': image_seq,
1309 |                 'pid': pids_seq,
1310 |                 'bbox': box_seq,
1311 |                 'center': center_seq,
1312 |                 'occlusion': occ_seq,
1313 |                 'intent': intent_seq}


--------------------------------------------------------------------------------