├── README.md ├── model ├── FFN.py ├── BottleNecks.py ├── model_blocks.py └── main_model.py ├── readme.md ├── jaad.py ├── pie.py └── utils ├── jaad_preprocessing.py ├── pie_preprocessing.py ├── pie_data.py └── jaad_data.py /README.md: -------------------------------------------------------------------------------- 1 | # PedCMT 2 | The code of our paper entitled "Pedestrian Crossing Intention Prediction Based on Cross-Modal Transformer and Uncertainty-Aware Multi-Task Learning for Autonomous Driving" is released. 3 | -------------------------------------------------------------------------------- /model/FFN.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class FFN(nn.Module): # 前馈网络 5 | def __init__(self, d_model, hidden_dim, rate=0.3, layer_norm_eps=1e-5): 6 | super(FFN, self).__init__() 7 | 8 | self.norm = nn.LayerNorm(d_model, eps=layer_norm_eps) # 归一化 9 | self.linear1 = nn.Linear(d_model, hidden_dim) # 线性层 10 | self.relu = nn.ReLU() 11 | self.dropout1 = nn.Dropout(rate) 12 | self.linear2 = nn.Linear(hidden_dim, d_model) 13 | self.dropout2 = nn.Dropout(rate) 14 | 15 | def forward(self, x): 16 | y = self.linear2(self.dropout1(self.relu(self.linear1(x)))) # 前馈网络 17 | out = x + self.dropout2(y) 18 | out = self.norm(out) # 归一化 19 | return out -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | ## Structure info 2 | Updating: 2023-06-16
3 | 4 | URL:[（click here）](https://github.com/sellenzh/pedCMT)
5 | structure info： 6 | 7 | ``` 8 | ├── checkpoints <- train model save dir。 9 | │ ├── JAAD_all.pt 10 | │ ├── JAAD_beh.pt 11 | │ └── PIE.pt 12 | │ 13 | ├── logs <- train logs save dir 14 | │ └── PIE 15 | │ └── ... 16 | │ 17 | ├── PIE <- dataset（download:[PIE]） 18 | │ └── ... <- note: need to unzip `annotations.zip`,`annotations_vehicle.zip`, 19 | │ ├── ... <- `annotations_attributes.zip` 20 | │ └── .... <- 21 | ├─── JAAD... 22 | │ ├── ... <- [JAAD] 23 | │ └── ... <- ... 24 | │ 25 | ├── utils 26 | │ ├── pie_data.py 27 | │ └── pie_preprocessing.py 28 | │ 29 | ├── model <- models save dir 30 | │ ├── BottleNeck.py 31 | │ ├── FFN.py 32 | │ ├── model_blocks.py 33 | │ └── main_model.py 34 | │ 35 | ├── pie.py 36 | │ 37 | ├── jaad.py 38 | │ 39 | └── README.md 40 | ``` 41 | Download :[PIE](https://github.com/aras62/PIE.git)
42 | [JAAD](https://github.com/ykotseruba/JAAD.git) 43 | -------------------------------------------------------------------------------- /model/BottleNecks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 5 | 6 | 7 | class Bottlenecks(nn.Module): # 瓶颈结构 8 | def __init__(self, dims, args): 9 | super(Bottlenecks, self).__init__() 10 | self.dims = dims 11 | self.num_bnks = args.num_bnks # 单元数目 12 | self.num_layers = args.bnks_layers # 层数 13 | self.bbox = nn.ModuleList() 14 | self.vel = nn.ModuleList() 15 | 16 | self.bbox.append(nn.Linear(dims, dims + self.num_bnks, bias=True)) 17 | self.vel.append(nn.Linear(dims, dims + self.num_bnks, bias=True)) 18 | 19 | for _ in range(self.num_layers - 1): 20 | self.bbox.append(nn.Linear(dims + self.num_bnks, dims + self.num_bnks, bias=True)) 21 | self.vel.append(nn.Linear(dims + self.num_bnks, dims + self.num_bnks, bias=True)) 22 | self.dropout = nn.Dropout(0.5) 23 | self.relu = nn.ReLU() 24 | 25 | def cut(self, x): # 切片 26 | return x[:, :, :self.dims], x[:, :, -self.num_bnks:] # 从第0个到第dims个，从倒数第num_bnks个到最后一个 27 | 28 | def forward(self, bbox, vel): 29 | bbox, bnk_bbox = self.cut(self.dropout(self.relu(self.bbox[0](bbox)))) # 生成下一层然后切片，得到bbox下一层和bnk_bbox 30 | vel, bnk_vel = self.cut(self.dropout(self.relu(self.vel[0](vel)))) # 生成下一层然后切片，得到vel下一层和bnk_vel 31 | bottlenecks = bnk_bbox + bnk_vel # 加和得到中间的交互单元 32 | 33 | for i in range(self.num_layers - 1): 34 | bbox = torch.cat((bbox, bottlenecks), dim=-1) 35 | bbox, bnk_bbox = self.cut(self.dropout(self.relu(self.bbox[i + 1](bbox)))) 36 | vel, bnk_vel = self.cut(self.dropout(self.relu(self.vel[i + 1](torch.cat((vel, bottlenecks), dim=-1))))) 37 | bottlenecks = bnk_bbox + bnk_vel #+ bnk_token 38 | 39 | return bottlenecks -------------------------------------------------------------------------------- /model/model_blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | #from model.MultiHeadAttention import MultiHeadAttention 4 | from torch import Tensor 5 | import math 6 | from einops import repeat 7 | from einops.layers.torch import Rearrange 8 | 9 | device = 'cuda' if torch.cuda.is_available() else 'cpu' 10 | 11 | 12 | class PositionalEncoding(nn.Module): # 位置编码 13 | def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000): 14 | super().__init__() 15 | self.dropout = nn.Dropout(p=dropout) 16 | 17 | position = torch.arange(max_len).unsqueeze(1) # 生成一个max_len行1列的张量 18 | div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)) # 生成一个d_model/2行1列的张量 19 | pe = torch.zeros(max_len, 1, d_model) # 生成一个【max_len，1，d_model】的张量 20 | pe[:, 0, 0::2] = torch.sin(position * div_term) # 偶数列 21 | pe[:, 0, 1::2] = torch.cos(position * div_term) # 奇数列 22 | self.register_buffer('pe', pe) 23 | 24 | def forward(self, x: Tensor) -> Tensor: 25 | """ 26 | Args: 27 | x: Tensor, shape [batch_size, seq_len, embedding_dim] 28 | """ 29 | x = x + self.pe[:x.size(0)] # 位置编码 30 | return self.dropout(x) 31 | 32 | 33 | class EmbedPosEnc(nn.Module): 34 | def __init__(self, input_size, d_model): 35 | super(EmbedPosEnc, self).__init__() 36 | 37 | self.embedding = nn.Linear(input_size, d_model) 38 | #self.embedding = MultiScaleCNN(input_size, d_model) 39 | self.pos_enc = PositionalEncoding(d_model) # 位置编码 40 | 41 | self.arrange1 = Rearrange('b s e -> s b e') # 重排列 42 | self.arrange2 = Rearrange('s b e -> b s e') # 重排列 43 | 44 | def forward(self, x, token): 45 | b = x.shape[0] # 获取批次大小 46 | y = self.embedding(x) # 嵌入 47 | token = repeat(token, '() s e -> b s e', b=b) # 重复token 48 | y = torch.cat([token, y], dim=1) # 拼接 49 | return self.arrange2(self.pos_enc(self.arrange1(y))) # 位置编码 50 | 51 | 52 | class AttentionBlocks(nn.Module): 53 | def __init__(self, d_model, num_heads, rate=0.3, layer_norm_eps=1e-5): 54 | super(AttentionBlocks, self).__init__() 55 | 56 | self.att = nn.MultiheadAttention(d_model, num_heads=num_heads, batch_first=True) # 多头注意力 57 | self.drop = nn.Dropout(rate) 58 | self.norm = nn.LayerNorm(d_model, eps=layer_norm_eps) # 归一化 59 | 60 | def forward(self, x, y=None): 61 | y = x if y is None else y # 如果y为空，则y=x 62 | att_out, att_w = self.att(x, y, y) # 多头注意力 63 | att_out = self.drop(att_out) # dropout 64 | y = self.norm(x + att_out) # 归一化 65 | return y 66 | 67 | 68 | import torch.nn.functional as F 69 | 70 | 71 | class Time_att(nn.Module): # 在时间维度上进行注意力 72 | def __init__(self, dims): 73 | super(Time_att, self).__init__() 74 | self.linear1 = nn.Linear(dims, dims, bias=False) 75 | self.linear2 = nn.Linear(dims, 1, bias=False) 76 | self.time = nn.AdaptiveAvgPool1d(1) 77 | 78 | def forward(self, x): 79 | y = self.linear1(x.contiguous()) 80 | y = self.linear2(torch.tanh(y)) 81 | beta = F.softmax(y, dim=-1) 82 | c = beta * x 83 | return self.time(c.transpose(-1, -2)).transpose(-1, -2).contiguous().squeeze() 84 | -------------------------------------------------------------------------------- /model/main_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import numpy as np 4 | from model.model_blocks import EmbedPosEnc, AttentionBlocks, Time_att 5 | from model.FFN import FFN 6 | from model.BottleNecks import Bottlenecks 7 | from einops import repeat 8 | 9 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 10 | 11 | 12 | class Model(nn.Module): 13 | def __init__(self, args): 14 | super(Model, self).__init__() 15 | self.sigma_cls = nn.Parameter(torch.ones(1, 1, requires_grad=True, device=device), requires_grad=True) # 生成一个可训练的分类损失参数 16 | nn.init.kaiming_normal_(self.sigma_cls, mode='fan_out') # 初始化参数 17 | self.sigma_reg = nn.Parameter(torch.ones(1, 1, requires_grad=True, device=device), requires_grad=True) # 生成一个可训练的回归损失参数 18 | nn.init.kaiming_normal_(self.sigma_reg, mode='fan_out') # 初始化参数 19 | 20 | d_model = args.d_model 21 | hidden_dim = args.dff 22 | modal_nums = 2 23 | self.num_layers = args.num_layers 24 | self.token = nn.Parameter(torch.ones(1, 1, d_model)) # 生成一个可训练的token @@绿色token 25 | 26 | self.bbox_embedding = EmbedPosEnc(args.bbox_input, d_model) # 张量嵌入以及生成位置编码 27 | self.bbox_token = nn.Parameter(torch.ones(1, 1, d_model)) # 生成一个可训练的bbox_token 28 | 29 | self.vel_embedding = EmbedPosEnc(args.vel_input, d_model) # 张量嵌入以及生成位置编码 30 | self.vel_token = nn.Parameter(torch.ones(1, 1, d_model)) # 生成一个可训练的vel_token 31 | 32 | self.bbox_att = nn.ModuleList() # 生成一个空的ModuleList 33 | self.bbox_ffn = nn.ModuleList() 34 | self.vel_att = nn.ModuleList() 35 | self.vel_ffn = nn.ModuleList() 36 | self.cross_att = nn.ModuleList() 37 | self.cross_ffn = nn.ModuleList() 38 | 39 | for _ in range(self.num_layers): 40 | self.bbox_att.append(AttentionBlocks(d_model, args.num_heads)) # 添加AttentionBlocks 41 | self.bbox_ffn.append(FFN(d_model, hidden_dim)) # 添加FFN 42 | self.vel_att.append(AttentionBlocks(d_model, args.num_heads)) 43 | self.vel_ffn.append(FFN(d_model, hidden_dim)) 44 | self.cross_att.append(AttentionBlocks(d_model, args.num_heads)) # 添加AttentionBlocks 45 | self.cross_ffn.append(FFN(d_model, hidden_dim)) 46 | 47 | self.dense = nn.Linear(modal_nums * d_model, 4) # 全连接层 48 | self.bottlenecks = Bottlenecks(d_model, args) # Bottlenecks 49 | self.time_att = Time_att(dims=args.num_bnks) # Time_att 50 | self.endp = nn.Linear(modal_nums * d_model, 4) # 全连接层 51 | self.relu = nn.ReLU() 52 | self.last = nn.Linear(args.num_bnks, 1) # 全连接层 53 | self.sigmoid = nn.Sigmoid() # sigmoid激活函数 54 | 55 | def forward(self, bbox, vel): 56 | ''' 57 | :bbox :[b, 4, 32] 58 | :vel :[b, 2, 32] 59 | ''' 60 | ''' 61 | bbox: [64, 16, 4] 62 | vel: [64, 16, 2] 63 | ''' 64 | b = bbox.shape[0] 65 | token = repeat(self.token, '() s e -> b s e', b=b) # 重复token，使尺寸匹配 66 | 67 | bbox = self.bbox_embedding(bbox, self.bbox_token) # 张量嵌入以及生成位置编码 68 | vel = self.vel_embedding(vel, self.vel_token) # 张量嵌入以及生成位置编码 69 | 70 | bbox = self.bbox_att[0](bbox) # bbox的自注意力 71 | token = torch.cat([token, bbox[:, 0:1, :]], dim=1) # 拼接token和bbox 72 | vel = self.vel_att[0](vel) # vel的自注意力 73 | token = torch.cat([token, vel[:, 0:1, :]], dim=1) # 拼接token和vel 74 | token = self.cross_att[0](token) # token的交叉注意力 75 | token_new = token[:, 0:1, :] # 取出token的第一个元素 76 | bbox = torch.cat([token_new, bbox[:, 1:, :]], dim=1) # 拼接token_new和bbox 77 | vel = torch.cat([token_new, vel[:, 1:, :]], dim=1) # 拼接token_new和vel 78 | bbox = self.bbox_ffn[0](bbox) # bbox的FFN 79 | vel = self.vel_ffn[0](vel) # vel的FFN 80 | token = self.cross_ffn[0](token)[:, 0:1, :] # token的FFN 81 | 82 | for i in range(self.num_layers - 1): 83 | bbox = self.bbox_att[i + 1](bbox) 84 | token = torch.cat([token, bbox[:, 0:1, :]], dim=1) 85 | vel = self.vel_att[i + 1](vel) 86 | token = torch.cat([token, vel[:, 0:1, :]], dim=1) 87 | token = self.cross_att[i + 1](token) 88 | token_new = token[:, 0:1, :] 89 | bbox = torch.cat([token_new, bbox[:, 1:, :]], dim=1) 90 | vel = torch.cat([token_new, vel[:, 1:, :]], dim=1) 91 | bbox = self.bbox_ffn[i + 1](bbox) 92 | vel = self.vel_ffn[i + 1](vel) 93 | token = self.cross_ffn[i + 1](token)[:, 0:1, :] 94 | 95 | 96 | cls_out = torch.cat([bbox[:, 0:1, :], vel[:, 0:1, :]], dim=1) # 拼接bbox的token和vel的token 97 | cls_out_flatten = torch.flatten(cls_out, start_dim=1) # 展平 98 | end_point = self.endp(cls_out_flatten) # 全连接层预测endpoint 99 | 100 | bnk = self.relu(self.time_att(self.bottlenecks(bbox, vel))) # Bottlenecks 101 | tmp = self.last(bnk) # 全连接层预测穿越行为 102 | pred = self.sigmoid(tmp) 103 | return pred, end_point, self.sigma_cls, self.sigma_reg # 返回预测结果，endpoint预测结果，分类的sigma，回归的sigma 104 | -------------------------------------------------------------------------------- /jaad.py: -------------------------------------------------------------------------------- 1 | from utils.jaad_data import JAAD 2 | from utils.jaad_preprocessing import * 3 | 4 | import torch 5 | from torch import nn 6 | from torch.utils.data import TensorDataset, DataLoader 7 | from torch.utils.tensorboard import SummaryWriter 8 | from model.main_model import Model 9 | from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix 10 | import argparse 11 | 12 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 13 | 14 | def main(args): 15 | if not args.learn: 16 | seed_all(args.seed) 17 | data_opts = {'fstride': 1, 18 | 'sample_type': args.bh, # 'beh' 19 | 'subset': 'default', 20 | 'height_rng': [0, float('inf')], 21 | 'squarify_ratio': 0, 22 | 'data_split_type': 'default', # kfold, random, default 23 | 'seq_type': 'crossing', 24 | 'min_track_size': 15, 25 | 'random_params': {'ratios': None, 26 | 'val_data': True, 27 | 'regen_data': False}, 28 | 'kfold_params': {'num_folds': 5, 'fold': 1}, 29 | } 30 | tte = [30, 60] 31 | imdb = JAAD(data_path=args.set_path) 32 | seq_train = imdb.generate_data_trajectory_sequence('train', **data_opts) 33 | balanced_seq_train = balance_dataset(seq_train) 34 | tte_seq_train, traj_seq_train = tte_dataset(balanced_seq_train, tte, 0.6, args.times_num) 35 | 36 | seq_valid = imdb.generate_data_trajectory_sequence('val', **data_opts) 37 | balanced_seq_valid = balance_dataset(seq_valid) 38 | tte_seq_valid, traj_seq_valid = tte_dataset(balanced_seq_valid, tte, 0, args.times_num) 39 | 40 | seq_test = imdb.generate_data_trajectory_sequence('test', **data_opts) 41 | tte_seq_test, traj_seq_test = tte_dataset(seq_test, tte, 0, args.times_num) 42 | 43 | bbox_train = tte_seq_train['bbox'] 44 | bbox_valid = tte_seq_valid['bbox'] 45 | bbox_test = tte_seq_test['bbox'] 46 | 47 | bbox_dec_train = traj_seq_train['bbox'] 48 | bbox_dec_valid = traj_seq_valid['bbox'] 49 | bbox_dec_test = traj_seq_test['bbox'] 50 | 51 | vel_train = tte_seq_train['vehicle_act'] 52 | vel_valid = tte_seq_valid['vehicle_act'] 53 | vel_test = tte_seq_test['vehicle_act'] 54 | 55 | action_train = tte_seq_train['activities'] 56 | action_valid = tte_seq_valid['activities'] 57 | action_test = tte_seq_test['activities'] 58 | 59 | normalized_bbox_train = normalize_bbox(bbox_train) 60 | normalized_bbox_valid = normalize_bbox(bbox_valid) 61 | normalized_bbox_test = normalize_bbox(bbox_test) 62 | 63 | normalized_bbox_dec_train = normalize_traj(bbox_dec_train) 64 | normalized_bbox_dec_valid = normalize_traj(bbox_dec_valid) 65 | normalized_bbox_dec_test = normalize_traj(bbox_dec_test) 66 | 67 | label_action_train = prepare_label(action_train) 68 | label_action_valid = prepare_label(action_valid) 69 | label_action_test = prepare_label(action_test) 70 | 71 | X_train, X_valid = torch.Tensor(normalized_bbox_train), torch.Tensor(normalized_bbox_valid) 72 | Y_train, Y_valid = torch.Tensor(label_action_train), torch.Tensor(label_action_valid) 73 | X_test = torch.Tensor(normalized_bbox_test) 74 | Y_test = torch.Tensor(label_action_test) 75 | 76 | X_train_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_train, 60)) 77 | X_valid_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_valid, 60)) 78 | X_test_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_test, 60)) 79 | 80 | vel_train = torch.Tensor(vel_train) 81 | vel_valid = torch.Tensor(vel_valid) 82 | vel_test = torch.Tensor(vel_test) 83 | 84 | trainset = TensorDataset(X_train, Y_train, vel_train, X_train_dec) 85 | validset = TensorDataset(X_valid, Y_valid, vel_valid, X_valid_dec) 86 | testset = TensorDataset(X_test, Y_test, vel_test, X_test_dec) 87 | 88 | train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True) 89 | valid_loader = DataLoader(validset, batch_size=args.batch_size, shuffle=True) 90 | test_loader = DataLoader(testset, batch_size=1) 91 | else: # 生成随机数据 92 | train_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)), 93 | torch.randn(size=(args.batch_size, 1)), 94 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)), 95 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]] 96 | valid_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)), 97 | torch.randn(size=(args.batch_size, 1)), 98 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)), 99 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]] 100 | test_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)), 101 | torch.randn(size=(args.batch_size, 1)), 102 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)), 103 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]] 104 | print('Start Training Loop... \n') 105 | 106 | model = Model(args) 107 | model.to(device) 108 | 109 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.98), eps=1e-6) 110 | cls_criterion = nn.BCELoss() 111 | reg_criterion = nn.MSELoss() 112 | 113 | model_folder_name = args.set_path + '_' + args.bh 114 | checkpoint_filepath = 'checkpoints/{}.pt'.format(model_folder_name) 115 | writer = SummaryWriter('logs/{}'.format(model_folder_name)) 116 | 117 | train(model, train_loader, valid_loader, cls_criterion, reg_criterion, optimizer, checkpoint_filepath, writer, args=args) 118 | 119 | #Test 120 | model = Model(args) 121 | model.to(device) 122 | 123 | checkpoint = torch.load(checkpoint_filepath) 124 | model.load_state_dict(checkpoint['model_state_dict']) 125 | 126 | preds, labels = test(model, test_loader) 127 | pred_cpu = torch.Tensor.cpu(preds) 128 | label_cpu = torch.Tensor.cpu(labels) 129 | 130 | acc = accuracy_score(label_cpu, np.round(pred_cpu)) 131 | f1 = f1_score(label_cpu, np.round(pred_cpu)) 132 | pre_s = precision_score(label_cpu, np.round(pred_cpu)) 133 | recall_s = recall_score(label_cpu, np.round(pred_cpu)) 134 | auc = roc_auc_score(label_cpu, np.round(pred_cpu)) 135 | matrix = confusion_matrix(label_cpu, np.round(pred_cpu)) 136 | 137 | print(f'Acc: {acc}\n f1: {f1}\n precision_score: {pre_s}\n recall_score: {recall_s}\n roc_auc_score: {auc}\n confusion_matrix: {matrix}') 138 | 139 | 140 | if __name__ == '__main__': 141 | torch.cuda.empty_cache() 142 | parser = argparse.ArgumentParser('Pedestrain Crossing Intention Prediction.') 143 | parser.add_argument('--epochs', type=int, default=2000, help='Number of epochs to train.') 144 | parser.add_argument('--set_path', type=str, default='JAAD') 145 | parser.add_argument('--bh', type=str, default='beh', help='all or beh, in JAAD dataset.') 146 | parser.add_argument('--balance', type=bool, default=True, help='balance or not for test dataset.') 147 | parser.add_argument('--seed', type=int, default=42) 148 | 149 | parser.add_argument('--d_model', type=int, default=256, help='the dimension after embedding.') 150 | parser.add_argument('--dff', type=int, default=512, help='the number of the units.') 151 | parser.add_argument('--num_heads', type=int, default=8, help='number of the heads of the multi-head model.') 152 | parser.add_argument('--bbox_input', type=int, default=4, help='dimension of bbox.') 153 | parser.add_argument('--vel_input', type=int, default=1, help='dimension of velocity.') 154 | parser.add_argument('--time_crop', type=bool, default=False) 155 | 156 | parser.add_argument('--batch_size', type=int, default=64, help='size of batch.') 157 | parser.add_argument('--lr', type=float, default=0.001, help='learning rate to train.') 158 | 159 | parser.add_argument('--num_layers', type=int, default=4, help='the number of layers.') 160 | parser.add_argument('--times_num', type=int, default=32, help='') 161 | parser.add_argument('--num_bnks', type=int, default=9, help='') 162 | parser.add_argument('--bnks_layers', type=int, default=9, help='') 163 | parser.add_argument('--sta_f', type=int, default=8) 164 | parser.add_argument('--end_f', type=int, default=12) 165 | args = parser.parse_args() 166 | main(args) -------------------------------------------------------------------------------- /pie.py: -------------------------------------------------------------------------------- 1 | from utils.pie_data import PIE 2 | from utils.pie_preprocessing import * 3 | 4 | import torch 5 | from torch import nn 6 | from torch.utils.data import TensorDataset, DataLoader 7 | from torch.utils.tensorboard import SummaryWriter 8 | from model.main_model import Model 9 | from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix 10 | import argparse 11 | 12 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 13 | 14 | def main(args): 15 | if not args.learn: # 如果args.learn为False，则真实训练，读取真实数据 16 | seed_all(args.seed) 17 | data_opts = { 18 | 'fstride': 1, 19 | 'sample_type': 'all', 20 | 'height_rng': [0, float('inf')], 21 | 'squarify_ratio': 0, 22 | 'data_split_type': 'random', # kfold, random, default 23 | 'seq_type': 'crossing', # crossing , intention 24 | 'min_track_size': 15, # discard tracks that are shorter 25 | 'kfold_params': {'num_folds': 1, 'fold': 1}, 26 | 'random_params': {'ratios': [0.7, 0.15, 0.15], 27 | 'val_data': True, 28 | 'regen_data': False}, 29 | 'tte': [30, 60], 30 | 'batch_size': 16 31 | } 32 | imdb = PIE(data_path=args.set_path) 33 | seq_train = imdb.generate_data_trajectory_sequence('train', **data_opts) # 生成训练集 34 | balanced_seq_train = balance_dataset(seq_train) # 平衡数据集 35 | tte_seq_train, traj_seq_train = tte_dataset(balanced_seq_train, data_opts['tte'], 0.6, args.times_num) # 生成训练集的tte和轨迹 36 | 37 | seq_valid = imdb.generate_data_trajectory_sequence('val', **data_opts) 38 | balanced_seq_valid = balance_dataset(seq_valid) 39 | tte_seq_valid, traj_seq_valid = tte_dataset(balanced_seq_valid, data_opts['tte'], 0, args.times_num) 40 | 41 | seq_test = imdb.generate_data_trajectory_sequence('test', **data_opts) 42 | tte_seq_test, traj_seq_test = tte_dataset(seq_test, data_opts['tte'], 0, args.times_num) 43 | 44 | bbox_train = tte_seq_train['bbox'] # 训练集的bbox 45 | bbox_valid = tte_seq_valid['bbox'] 46 | bbox_test = tte_seq_test['bbox'] 47 | 48 | bbox_dec_train = traj_seq_train['bbox'] # 训练集的轨迹 49 | bbox_dec_valid = traj_seq_valid['bbox'] 50 | bbox_dec_test = traj_seq_test['bbox'] 51 | 52 | obd_train = tte_seq_train['obd_speed'] # 训练集的速度 53 | obd_valid = tte_seq_valid['obd_speed'] 54 | obd_test = tte_seq_test['obd_speed'] 55 | 56 | gps_train = tte_seq_train['gps_speed'] # 训练集的速度 57 | gps_valid = tte_seq_valid['gps_speed'] 58 | gps_test = tte_seq_test['gps_speed'] 59 | 60 | action_train = tte_seq_train['activities'] # 训练集的动作 61 | action_valid = tte_seq_valid['activities'] 62 | action_test = tte_seq_test['activities'] 63 | 64 | normalized_bbox_train = normalize_bbox(bbox_train) # 归一化bbox 65 | normalized_bbox_valid = normalize_bbox(bbox_valid) 66 | normalized_bbox_test = normalize_bbox(bbox_test) 67 | 68 | normalized_bbox_dec_train = normalize_traj(bbox_dec_train) # 归一化轨迹 69 | normalized_bbox_dec_valid = normalize_traj(bbox_dec_valid) 70 | normalized_bbox_dec_test = normalize_traj(bbox_dec_test) 71 | 72 | label_action_train = prepare_label(action_train) # 准备标签 73 | label_action_valid = prepare_label(action_valid) 74 | label_action_test = prepare_label(action_test) 75 | 76 | X_train, X_valid = torch.Tensor(normalized_bbox_train), torch.Tensor(normalized_bbox_valid) # 转换为tensor 77 | Y_train, Y_valid = torch.Tensor(label_action_train), torch.Tensor(label_action_valid) 78 | X_test = torch.Tensor(normalized_bbox_test) 79 | Y_test = torch.Tensor(label_action_test) 80 | 81 | 82 | temp = pad_sequence(normalized_bbox_dec_train, 60) 83 | X_train_dec = torch.Tensor(temp) 84 | X_valid_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_valid, 60)) # 转换为tensor 85 | X_test_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_test, 60)) 86 | 87 | obd_train, gps_train = torch.Tensor(obd_train), torch.Tensor(gps_train) # 转换为tensor 88 | obd_valid, gps_valid = torch.Tensor(obd_valid), torch.Tensor(gps_valid) 89 | obd_test, gps_test = torch.Tensor(obd_test), torch.Tensor(gps_test) 90 | 91 | vel_train = torch.cat([obd_train, gps_train], dim=-1) # 拼接obd和gps 92 | vel_valid = torch.cat([obd_valid, gps_valid], dim=-1) 93 | vel_test = torch.cat([obd_test, gps_test], dim=-1) 94 | 95 | trainset = TensorDataset(X_train, Y_train, vel_train, X_train_dec) # 生成dataset 96 | validset = TensorDataset(X_valid, Y_valid, vel_valid, X_valid_dec) 97 | testset = TensorDataset(X_test, Y_test, vel_test, X_test_dec) 98 | 99 | train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True) # 生成dataloader 100 | valid_loader = DataLoader(validset, batch_size=args.batch_size, shuffle=True) 101 | test_loader = DataLoader(testset, batch_size=1) 102 | else: # args.learn为True，不真实训练，生成随机数据。 103 | train_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)), # bbox 104 | torch.randn(size=(args.batch_size, 1)), # label 105 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)), # velocity 106 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]] # trajectory 107 | valid_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)), 108 | torch.randn(size=(args.batch_size, 1)), 109 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)), 110 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]] 111 | test_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)), 112 | torch.randn(size=(args.batch_size, 1)), 113 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)), 114 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]] 115 | print('Start Training Loop... \n') 116 | 117 | model = Model(args) # 生成模型 118 | model.to(device) # 放到gpu上 119 | 120 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.98), eps=1e-6) # 生成优化器 121 | cls_criterion = nn.BCELoss() # 生成损失函数 binary cross entropy 122 | reg_criterion = nn.MSELoss() # 生成损失函数 123 | 124 | model_folder_name = args.set_path 125 | checkpoint_filepath = 'checkpoints/{}.pt'.format(model_folder_name) # 生成checkpoint的路径 126 | writer = SummaryWriter('logs/{}'.format(model_folder_name)) # 生成tensorboard的路径 127 | #Train 128 | train(model, train_loader, valid_loader, cls_criterion, reg_criterion, optimizer, checkpoint_filepath, writer, args=args) 129 | 130 | # #Test 131 | model = Model(args) 132 | model.to(device) 133 | 134 | checkpoint = torch.load(checkpoint_filepath) 135 | model.load_state_dict(checkpoint['model_state_dict']) 136 | 137 | preds, labels = test(model, test_loader) 138 | pred_cpu = torch.Tensor.cpu(preds) 139 | label_cpu = torch.Tensor.cpu(labels) 140 | 141 | acc = accuracy_score(label_cpu, np.round(pred_cpu)) 142 | f1 = f1_score(label_cpu, np.round(pred_cpu)) 143 | pre_s = precision_score(label_cpu, np.round(pred_cpu)) 144 | recall_s = recall_score(label_cpu, np.round(pred_cpu)) 145 | auc = roc_auc_score(label_cpu, np.round(pred_cpu)) 146 | contrix = confusion_matrix(label_cpu, np.round(pred_cpu)) 147 | 148 | print(f'Acc: {acc}\n f1: {f1}\n precision_score: {pre_s}\n recall_score: {recall_s}\n roc_auc_score: {auc}\n confusion_matrix: {contrix}') 149 | 150 | 151 | if __name__ == '__main__': 152 | torch.cuda.empty_cache() 153 | parser = argparse.ArgumentParser('Pedestrain Crossing Intention Prediction.') 154 | 155 | parser.add_argument('--epochs', type=int, default=2000, help='Number of epochs to train.') 156 | parser.add_argument('--set_path', type=str, default='PIE') 157 | parser.add_argument('--balance', type=bool, default=True, help='balance or not for test dataset.') 158 | parser.add_argument('--seed', type=int, default=42) 159 | 160 | parser.add_argument('--d_model', type=int, default=128, help='the dimension after embedding.') 161 | parser.add_argument('--dff', type=int, default=256, help='the number of the units.') 162 | parser.add_argument('--num_heads', type=int, default=8, help='number of the heads of the multi-head model.') 163 | parser.add_argument('--bbox_input', type=int, default=4, help='dimension of bbox.') 164 | parser.add_argument('--vel_input', type=int, default=2, help='dimension of velocity.') 165 | parser.add_argument('--time_crop', type=bool, default=False)# 是否使用随机时间裁剪 166 | 167 | parser.add_argument('--batch_size', type=int, default=64, help='size of batch.') 168 | parser.add_argument('--lr', type=int, default=0.0005, help='learning rate to train.') 169 | 170 | parser.add_argument('--num_layers', type=int, default=4, help='the number of layers.') 171 | parser.add_argument('--times_num', type=int, default=16, help='')# 数据的时间维度 172 | parser.add_argument('--num_bnks', type=int, default=3, help='')# 瓶颈结构的单元数目 173 | parser.add_argument('--bnks_layers', type=int, default=7, help='')# 瓶颈结构的层数 174 | 175 | parser.add_argument('--sta_f', type=int, default=8)# 若采用随机时间裁剪，则从sta_f到end_f中随机选取一个时间点作为保留的时间段。 176 | parser.add_argument('--end_f', type=int, default=12) 177 | 178 | parser.add_argument('--learn', type=bool, default=True)# 是否跳过真实数据读取，生成尺寸相同的随机数据。 179 | # 目的如果是为了了解项目的运行过程，则可以将learn设置为True，这样可以跳过真实数据读取，生成尺寸相同的随机数据。 180 | args = parser.parse_args() 181 | main(args) 182 | -------------------------------------------------------------------------------- /utils/jaad_preprocessing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import numpy as np 4 | import random 5 | 6 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 7 | 8 | 9 | def seed_all(seed): 10 | torch.cuda.empty_cache() 11 | os.environ['PYTHONHASHSEED'] = str(seed) 12 | random.seed(seed) 13 | np.random.seed(seed) 14 | torch.manual_seed(seed) 15 | torch.cuda.manual_seed(seed) 16 | torch.cuda.manual_seed_all(seed) 17 | torch.backends.cudnn.deterministic = True 18 | torch.backends.cudnn.benchmark = False 19 | 20 | 21 | def binary_acc(label, pred): 22 | label_tag = torch.round(label) 23 | correct_results_sum = (label_tag == pred).sum().float() 24 | acc = correct_results_sum / pred.shape[0] 25 | return acc 26 | 27 | def end_point_loss(reg_criterion, pred, end_point): 28 | for i in range(4): 29 | if i == 0 or i == 2: 30 | pred[:, i] = pred[:, i] * 1920 31 | end_point[:, i] = end_point[:, i] * 1920 32 | else: 33 | pred[:, i] = pred[:, i] * 1080 34 | end_point[:, i] = end_point[:, i] * 1080 35 | return reg_criterion(pred, end_point) 36 | 37 | 38 | def train(model, train_loader, valid_loader, class_criterion, reg_criterion, optimizer, checkpoint_filepath, writer, 39 | args): 40 | best_valid_acc = 0.0 41 | improvement_ratio = 0.001 42 | best_valid_loss = np.inf 43 | num_steps_wo_improvement = 0 44 | save_times = 0 45 | epochs = args.epochs 46 | if args.learn: # 调试模式： epoch = 5 47 | epochs = 5 48 | time_crop = args.time_crop 49 | for epoch in range(epochs): 50 | nb_batches_train = len(train_loader) 51 | train_acc = 0 52 | model.train() 53 | f_losses = 0.0 54 | cls_losses = 0.0 55 | reg_losses = 0.0 56 | 57 | print('Epoch: {} training...'.format(epoch + 1)) 58 | for bbox, label, vel, traj in train_loader: 59 | label = label.reshape(-1, 1).to(device).float() 60 | bbox = bbox.to(device) 61 | vel = vel.to(device) 62 | end_point = traj.to(device)[:, -1, :] 63 | 64 | if np.random.randint(10) >= 5 and time_crop: 65 | crop_size = np.random.randint(args.sta_f, args.end_f) 66 | bbox = bbox[:, -crop_size:, :] 67 | vel = vel[:, -crop_size:, :] 68 | 69 | pred, point, s_cls, s_reg = model(bbox, vel) 70 | 71 | cls_loss = class_criterion(pred, label) 72 | reg_loss = reg_criterion(point, end_point) 73 | f_loss = cls_loss / (s_cls * s_cls) + reg_loss / (s_reg * s_reg) + torch.log(s_cls) + torch.log(s_reg) 74 | 75 | model.zero_grad() # 76 | f_loss.backward() 77 | 78 | f_losses += f_loss.item() 79 | cls_losses += cls_loss.item() 80 | reg_losses += reg_loss.item() 81 | 82 | optimizer.step() # 83 | 84 | train_acc += binary_acc(label, torch.round(pred)) 85 | 86 | writer.add_scalar('training full_loss', 87 | f_losses / nb_batches_train, 88 | epoch + 1) 89 | writer.add_scalar('training cls_loss', 90 | cls_losses / nb_batches_train, 91 | epoch + 1) 92 | writer.add_scalar('training reg_loss', 93 | reg_losses / nb_batches_train, 94 | epoch + 1) 95 | writer.add_scalar('training Acc', 96 | train_acc / nb_batches_train, 97 | epoch + 1) 98 | 99 | print( 100 | f"Epoch {epoch + 1}: | Train_Loss {f_losses / nb_batches_train} | Train Cls_loss {cls_losses / nb_batches_train} | Train Reg_loss {reg_losses / nb_batches_train} | Train_Acc {train_acc / nb_batches_train} ") 101 | 102 | valid_f_loss, valid_cls_loss, valid_reg_loss, val_acc = evaluate(model, valid_loader, class_criterion, 103 | reg_criterion) 104 | 105 | writer.add_scalar('validation full_loss', 106 | valid_f_loss, 107 | epoch + 1) 108 | writer.add_scalar('validation cls_loss', 109 | valid_cls_loss, 110 | epoch + 1) 111 | writer.add_scalar('validation reg_loss', 112 | valid_reg_loss, 113 | epoch + 1) 114 | writer.add_scalar('validation Acc', 115 | val_acc, 116 | epoch + 1) 117 | 118 | if best_valid_loss > valid_cls_loss: 119 | best_valid_loss = valid_cls_loss 120 | num_steps_wo_improvement = 0 121 | save_times += 1 122 | print(str(save_times) + ' time(s) File saved.\n') 123 | torch.save({ 124 | 'epoch': epoch, 125 | 'model_state_dict': model.state_dict(), 126 | 'optimizer_state_dict': optimizer.state_dict(), 127 | 'Accuracy': train_acc / nb_batches_train, 128 | 'LOSS': f_losses / nb_batches_train, 129 | }, checkpoint_filepath) 130 | print('Update improvement.\n') 131 | else: 132 | num_steps_wo_improvement += 1 133 | print(str(num_steps_wo_improvement) + '/300 times Not update.\n') 134 | 135 | if num_steps_wo_improvement == 300: 136 | print("Early stopping on epoch:{}".format(str(epoch + 1))) 137 | break 138 | print('save file times: ' + str(save_times) + '.\n') 139 | 140 | 141 | def evaluate(model, val_data, class_criterion, reg_criterion): 142 | nb_batches = len(val_data) 143 | val_f_losses = 0.0 144 | val_cls_losses = 0.0 145 | val_reg_losses = 0.0 146 | print('in Validation...') 147 | with torch.no_grad(): 148 | model.eval() 149 | acc = 0 150 | for bbox, label, vel, traj in val_data: 151 | label = label.reshape(-1, 1).to(device).float() 152 | bbox = bbox.to(device) 153 | vel = vel.to(device) 154 | end_point = traj.to(device)[:, -1, :] 155 | 156 | pred, point, s_cls, s_reg = model(bbox, vel) 157 | 158 | val_cls_loss = class_criterion(pred, label) 159 | val_reg_loss = reg_criterion(point, end_point) 160 | f_loss = val_cls_loss / (s_cls * s_cls) + val_reg_loss / (s_reg * s_reg) + torch.log(s_cls) + torch.log( 161 | s_reg) 162 | 163 | val_f_losses += f_loss.item() 164 | val_cls_losses += val_cls_loss.item() 165 | val_reg_losses += val_reg_loss.item() 166 | 167 | acc += binary_acc(label, torch.round(pred)) 168 | print( 169 | f'Valid_Full_Loss {val_f_losses / nb_batches} | Valid Cls_loss {val_cls_losses / nb_batches} | Valid Reg_loss {val_reg_losses / nb_batches} | Valid_Acc {acc / nb_batches} \n') 170 | return val_f_losses / nb_batches, val_cls_losses / nb_batches, val_reg_losses / nb_batches, acc / nb_batches 171 | 172 | 173 | def test(model, test_data): 174 | print('Tesing...') 175 | with torch.no_grad(): 176 | model.eval() 177 | step = 0 178 | for bbox, label, vel, traj in test_data: 179 | label = label.reshape(-1, 1).to(device).float() 180 | bbox = bbox.to(device) 181 | vel = vel.to(device) 182 | 183 | pred, _, _, _ = model(bbox, vel) 184 | 185 | if step == 0: 186 | preds = pred 187 | labels = label 188 | else: 189 | preds = torch.cat((preds, pred), 0) 190 | labels = torch.cat((labels, label), 0) 191 | step += 1 192 | 193 | return preds, labels 194 | 195 | 196 | def balance_dataset(dataset, flip=True): 197 | d = {'bbox': dataset['bbox'].copy(), 198 | 'pid': dataset['pid'].copy(), 199 | 'activities': dataset['activities'].copy(), 200 | 'image': dataset['image'].copy(), 201 | 'center': dataset['center'].copy(), 202 | 'vehicle_act': dataset['vehicle_act'].copy(), 203 | 'image_dimension': (1920, 1080)} 204 | gt_labels = [gt[0] for gt in d['activities']] 205 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) 206 | num_neg_samples = len(gt_labels) - num_pos_samples 207 | 208 | if num_neg_samples == num_pos_samples: 209 | print('Positive samples is equal to negative samples.') 210 | else: 211 | print('Unbalanced: \t Postive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples)) 212 | if num_neg_samples > num_pos_samples: 213 | gt_augment = 1 214 | else: 215 | gt_augment = 0 216 | 217 | img_width = d['image_dimension'][0] 218 | num_samples = len(d['pid']) 219 | 220 | for i in range(num_samples): 221 | if d['activities'][i][0][0] == gt_augment: 222 | flipped = d['center'][i].copy() 223 | flipped = [[img_width - c[0], c[1]] for c in flipped] 224 | d['center'].append(flipped) 225 | 226 | flipped = d['bbox'][i].copy() 227 | flipped = [np.array([img_width - c[2], c[1], img_width - c[0], c[3]]) for c in flipped] 228 | d['bbox'].append(flipped) 229 | 230 | d['pid'].append(dataset['pid'][i].copy()) 231 | 232 | d['activities'].append(d['activities'][i].copy()) 233 | d['vehicle_act'].append(d['vehicle_act'][i].copy()) 234 | 235 | flipped = d['image'][i].copy() 236 | flipped = [c.replace('.png', '_flip.png') for c in flipped] 237 | 238 | d['image'].append(flipped) 239 | 240 | gt_labels = [gt[0] for gt in d['activities']] 241 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) 242 | num_neg_samples = len(gt_labels) - num_pos_samples 243 | 244 | if num_neg_samples > num_pos_samples: 245 | rm_index = np.where(np.array(gt_labels) == 0)[0] 246 | else: 247 | rm_index = np.where(np.array(gt_labels) == 1)[0] 248 | 249 | dif_samples = abs(num_neg_samples - num_pos_samples) 250 | 251 | np.random.seed(42) 252 | np.random.shuffle(rm_index) 253 | rm_index = rm_index[0:dif_samples] 254 | 255 | for k in d: 256 | seq_data_k = d[k] 257 | d[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index] 258 | 259 | new_gt_labels = [gt[0] for gt in d['activities']] 260 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels)) 261 | print('Balanced: Postive: %d \t Negative: %d \n' % (num_pos_samples, len(d['activities']) - num_pos_samples)) 262 | print('Total Number of samples: %d\n' % (len(d['activities']))) 263 | 264 | return d 265 | 266 | 267 | def tte_dataset(dataset, time_to_event, overlap, obs_length): 268 | d_obs = {'bbox': dataset['bbox'].copy(), 269 | 'pid': dataset['pid'].copy(), 270 | 'activities': dataset['activities'].copy(), 271 | 'image': dataset['image'].copy(), 272 | 'vehicle_act': dataset['vehicle_act'].copy(), 273 | 'center': dataset['center'].copy() 274 | } 275 | 276 | d_tte = {'bbox': dataset['bbox'].copy(), 277 | 'pid': dataset['pid'].copy(), 278 | 'activities': dataset['activities'].copy(), 279 | 'image': dataset['image'].copy(), 280 | 'vehicle_act': dataset['vehicle_act'].copy(), 281 | 'center': dataset['center'].copy()} 282 | 283 | if isinstance(time_to_event, int): 284 | for k in d_obs.keys(): 285 | for i in range(len(d_obs[k])): 286 | d_obs[k][i] = d_obs[k][i][- obs_length - time_to_event: -time_to_event] 287 | d_tte[k][i] = d_tte[k][i][- time_to_event:] 288 | d_obs['tte'] = [[time_to_event]] * len(dataset['bbox']) 289 | d_tte['tte'] = [[time_to_event]] * len(dataset['bbox']) 290 | 291 | else: 292 | olap_res = obs_length if overlap == 0 else int((1 - overlap) * obs_length) 293 | olap_res = 1 if olap_res < 1 else olap_res 294 | 295 | for k in d_obs.keys(): 296 | seqs = [] 297 | seqs_tte = [] 298 | for seq in d_obs[k]: 299 | start_idx = len(seq) - obs_length - time_to_event[1] 300 | end_idx = len(seq) - obs_length - time_to_event[0] 301 | seqs.extend([seq[i:i + obs_length] for i in range(start_idx, end_idx, olap_res)]) 302 | seqs_tte.extend([seq[i + obs_length:] for i in range(start_idx, end_idx, olap_res)]) 303 | d_obs[k] = seqs 304 | d_tte[k] = seqs_tte 305 | tte_seq = [] 306 | for seq in dataset['bbox']: 307 | start_idx = len(seq) - obs_length - time_to_event[1] 308 | end_idx = len(seq) - obs_length - time_to_event[0] 309 | tte_seq.extend([[len(seq) - (i + obs_length)] for i in range(start_idx, end_idx, olap_res)]) 310 | d_obs['tte'] = tte_seq.copy() 311 | d_tte['tte'] = tte_seq.copy() 312 | 313 | remove_index = [] 314 | try: 315 | time_to_event_0 = time_to_event[0] 316 | except: 317 | time_to_event_0 = time_to_event 318 | for seq_index, (seq_obs, seq_tte) in enumerate(zip(d_obs['bbox'], d_tte['bbox'])): 319 | if len(seq_obs) < 16 or len(seq_tte) < time_to_event_0: 320 | remove_index.append(seq_index) 321 | 322 | for k in d_obs.keys(): 323 | for j in sorted(remove_index, reverse=True): 324 | del d_obs[k][j] 325 | del d_tte[k][j] 326 | 327 | return d_obs, d_tte 328 | 329 | 330 | def normalize_bbox(dataset, width=1920, height=1080): 331 | normalized_set = [] 332 | for sequence in dataset: 333 | if sequence == []: 334 | continue 335 | normalized_sequence = [] 336 | for bbox in sequence: 337 | np_bbox = np.zeros(4) 338 | np_bbox[0] = bbox[0] / width 339 | np_bbox[2] = bbox[2] / width 340 | np_bbox[1] = bbox[1] / height 341 | np_bbox[3] = bbox[3] / height 342 | normalized_sequence.append(np_bbox) 343 | normalized_set.append(np.array(normalized_sequence)) 344 | 345 | return normalized_set 346 | 347 | def normalize_traj(dataset, width=1920, height=1080): 348 | normalized_set = [] 349 | for sequence in dataset: 350 | if sequence == []: 351 | continue 352 | normalized_sequence = [] 353 | for bbox in sequence: 354 | np_bbox = np.zeros(4) 355 | np_bbox[0] = bbox[0]# / width 356 | np_bbox[2] = bbox[2]# / width 357 | np_bbox[1] = bbox[1]# / height 358 | np_bbox[3] = bbox[3]# / height 359 | normalized_sequence.append(np_bbox) 360 | normalized_set.append(np.array(normalized_sequence)) 361 | 362 | return normalized_set 363 | 364 | 365 | def prepare_label(dataset): 366 | labels = np.zeros(len(dataset), dtype='int64') 367 | for step, action in enumerate(dataset): 368 | if action == []: 369 | continue 370 | labels[step] = action[0][0] 371 | 372 | return labels 373 | 374 | def pad_sequence(inp_list, max_len): 375 | padded_sequence = [] 376 | for source in inp_list: 377 | target = np.array([source[0]] * max_len) 378 | source = source 379 | target[-source.shape[0]:, :] = source 380 | 381 | padded_sequence.append(target) 382 | 383 | return padded_sequence 384 | -------------------------------------------------------------------------------- /utils/pie_preprocessing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import numpy as np 4 | import random 5 | 6 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 7 | 8 | 9 | def seed_all(seed): # 初始化 10 | torch.cuda.empty_cache() 11 | os.environ['PYTHONHASHSEED'] = str(seed) 12 | random.seed(seed) 13 | np.random.seed(seed) 14 | torch.manual_seed(seed) 15 | torch.cuda.manual_seed(seed) 16 | torch.cuda.manual_seed_all(seed) 17 | torch.backends.cudnn.deterministic = True 18 | torch.backends.cudnn.benchmark = False 19 | 20 | 21 | def binary_acc(label, pred): # 计算准确率 22 | label_tag = torch.round(label)# 四舍五入 23 | correct_results_sum = (label_tag == pred).sum().float()# 计算正确的个数 24 | acc = correct_results_sum / pred.shape[0] # 计算准确率 25 | return acc 26 | 27 | 28 | def end_point_loss(reg_criterion, pred, end_point):# 计算端点误差（未使用） 29 | for i in range(4): 30 | if i == 0 or i == 2: 31 | pred[:, i] = pred[:, i] * 1920 # 1920是视频的宽 32 | end_point[:, i] = end_point[:, i] * 1920 33 | else: 34 | pred[:, i] = pred[:, i] * 1080 # 1080是视频的高 35 | end_point[:, i] = end_point[:, i] * 1080 36 | return reg_criterion(pred, end_point) 37 | 38 | 39 | 40 | def train(model, train_loader, valid_loader, class_criterion, reg_criterion, optimizer, checkpoint_filepath, writer, 41 | args): 42 | # best_valid_acc = 0.0# 最佳准确率 43 | # improvement_ratio = 0.001 44 | best_valid_loss = np.inf # 最佳损失 45 | num_steps_wo_improvement = 0 # 未提升的次数 46 | save_times = 0 # 保存的次数 47 | epochs = args.epochs # 训练的轮数 48 | # time_crop = args.time_crop # 是否进行时间裁剪 49 | if args.learn: # 调试模式： epoch = 5 50 | epochs = 5 51 | for epoch in range(epochs): 52 | nb_batches_train = len(train_loader) # 训练集的batch数 53 | train_acc = 0 # 训练集的准确率 54 | model.train() # 训练模式 55 | f_losses = 0.0 # 总损失 56 | cls_losses = 0.0 # 分类损失 57 | reg_losses = 0.0 # 回归损失 58 | 59 | print('Epoch: {} training...'.format(epoch + 1)) 60 | for bbox, label, vel, traj in train_loader: 61 | label = label.reshape(-1, 1).to(device).float() # 标签 62 | bbox = bbox.to(device) 63 | vel = vel.to(device) 64 | end_point = traj.to(device)[:, -1, :] #轨迹的最后一刻时刻的点 65 | 66 | # if np.random.randint(10) >= 5 and time_crop: # 随机时间裁剪 67 | # crop_size = np.random.randint(args.sta_f, args.end_f) 68 | # bbox = bbox[:, -crop_size:, :] 69 | # # vel = vel[:, -crop_size:, :] 70 | 71 | pred, point, s_cls, s_reg = model(bbox, vel) # 预测值，端点，分类损失系数，回归损失系数 72 | cls_loss = class_criterion(pred, label) # 分类损失 73 | reg_loss = reg_criterion(point, end_point) # 回归损失 74 | f_loss = cls_loss / (s_cls * s_cls) + reg_loss / (s_reg * s_reg) + torch.log(s_cls * s_reg) 75 | # 总损失 76 | 77 | model.zero_grad() # 梯度清零 78 | f_loss.backward() # 反向传播 79 | 80 | f_losses += f_loss.item() # 总损失记录 81 | cls_losses += cls_loss.item() # 分类损失记录 82 | reg_losses += reg_loss.item() # 回归损失记录 83 | 84 | optimizer.step() # 更新参数 85 | 86 | train_acc += binary_acc(label, torch.round(pred)) # 计算准确率 87 | 88 | 89 | writer.add_scalar('training full_loss', 90 | f_losses / nb_batches_train, 91 | epoch + 1) 92 | writer.add_scalar('training cls_loss', 93 | cls_losses / nb_batches_train, 94 | epoch + 1) 95 | writer.add_scalar('training reg_loss', 96 | reg_losses / nb_batches_train, 97 | epoch + 1) 98 | writer.add_scalar('training Acc', 99 | train_acc / nb_batches_train, 100 | epoch + 1) 101 | 102 | 103 | print( 104 | f"Epoch {epoch + 1}: | Train_Loss {f_losses / nb_batches_train} | Train Cls_loss {cls_losses / nb_batches_train} | Train Reg_loss {reg_losses / nb_batches_train} | Train_Acc {train_acc / nb_batches_train} ") 105 | valid_f_loss, valid_cls_loss, valid_reg_loss, val_acc = evaluate(model, valid_loader, class_criterion, 106 | reg_criterion) # 验证 107 | 108 | writer.add_scalar('validation full_loss', 109 | valid_f_loss, 110 | epoch + 1) 111 | writer.add_scalar('validation cls_loss', 112 | valid_cls_loss, 113 | epoch + 1) 114 | writer.add_scalar('validation reg_loss', 115 | valid_reg_loss, 116 | epoch + 1) 117 | writer.add_scalar('validation Acc', 118 | val_acc, 119 | epoch + 1) 120 | 121 | if best_valid_loss > valid_cls_loss: # 保存最佳模型 122 | best_valid_loss = valid_cls_loss # 更新最佳损失 123 | num_steps_wo_improvement = 0 # 未提升的次数清零 124 | save_times += 1 125 | print(str(save_times) + ' time(s) File saved.\n') 126 | torch.save({ 127 | 'epoch': epoch, 128 | 'model_state_dict': model.state_dict(), 129 | 'optimizer_state_dict': optimizer.state_dict(), 130 | 'Accuracy': train_acc / nb_batches_train, 131 | 'LOSS': f_losses / nb_batches_train, 132 | }, checkpoint_filepath) # 保存模型 133 | print('Update improvement.\n') 134 | 135 | else: # 未提升 136 | num_steps_wo_improvement += 1 137 | print(str(num_steps_wo_improvement) + '/300 times Not update.\n') 138 | 139 | if num_steps_wo_improvement == 300: # 300次未提升，提前结束 140 | print("Early stopping on epoch:{}".format(str(epoch + 1))) 141 | break 142 | print('save file times: ' + str(save_times) + '.\n') 143 | 144 | 145 | def evaluate(model, val_data, class_criterion, reg_criterion): 146 | nb_batches = len(val_data) 147 | val_f_losses = 0.0 148 | val_cls_losses = 0.0 149 | val_reg_losses = 0.0 150 | print('in Validation...') 151 | with torch.no_grad(): 152 | model.eval() 153 | acc = 0 154 | for bbox, label, vel, traj in val_data: 155 | label = label.reshape(-1, 1).to(device).float() 156 | bbox = bbox.to(device) 157 | vel = vel.to(device) 158 | end_point = traj.to(device)[:, -1, :] 159 | 160 | pred, point, s_cls, s_reg = model(bbox, vel) 161 | val_reg_loss = reg_criterion(point, end_point) 162 | val_cls_loss = class_criterion(pred, label) 163 | f_loss = val_cls_loss / (s_cls * s_cls) + val_reg_loss / (s_reg * s_reg) + torch.log(s_cls * s_reg) 164 | 165 | val_f_losses += f_loss.item() 166 | val_cls_losses += val_cls_loss.item() 167 | val_reg_losses += val_reg_loss.item() 168 | 169 | acc += binary_acc(label, torch.round(pred)) 170 | print( 171 | f'Valid_Full_Loss {val_f_losses / nb_batches} | Valid Cls_loss {val_cls_losses / nb_batches} | Valid Reg_loss {val_reg_losses / nb_batches} | Valid_Acc {acc / nb_batches} \n') 172 | return val_f_losses / nb_batches, val_cls_losses / nb_batches, val_reg_losses / nb_batches, acc / nb_batches 173 | 174 | 175 | def test(model, test_data): 176 | print('Tesing...') 177 | with torch.no_grad(): 178 | model.eval() 179 | step = 0 180 | for bbox, label, vel, traj in test_data: 181 | label = label.reshape(-1, 1).to(device).float() 182 | bbox = bbox.to(device) 183 | vel = vel.to(device) 184 | 185 | pred, _, _, _ = model(bbox, vel)#测试阶段只需要预测分类结果，不关心回归结果 186 | 187 | if step == 0: 188 | preds = pred 189 | labels = label 190 | else: 191 | preds = torch.cat((preds, pred), 0) 192 | labels = torch.cat((labels, label), 0) 193 | step += 1 194 | return preds, labels 195 | 196 | 197 | def balance_dataset(dataset, flip=True): # 数据集平衡 198 | d = {'bbox': dataset['bbox'].copy(), 199 | 'pid': dataset['pid'].copy(), 200 | 'activities': dataset['activities'].copy(), 201 | 'image': dataset['image'].copy(), 202 | 'center': dataset['center'].copy(), 203 | 'obd_speed': dataset['obd_speed'].copy(), 204 | 'gps_speed': dataset['gps_speed'].copy(), 205 | 'image_dimension': (1920, 1080)} 206 | gt_labels = [gt[0] for gt in d['activities']] # 标签 207 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) # 正样本数 208 | num_neg_samples = len(gt_labels) - num_pos_samples # 负样本数 209 | 210 | if num_neg_samples == num_pos_samples: # 正负样本数相等 211 | print('Positive samples is equal to negative samples.') 212 | else: # 正负样本数不相等 213 | print('Unbalanced: \t Postive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples)) 214 | if num_neg_samples > num_pos_samples: 215 | gt_augment = 1 # 正样本数大于负样本数，增加负样本 216 | else: 217 | gt_augment = 0 # 负样本数大于正样本数，增加正样本 218 | 219 | img_width = d['image_dimension'][0] # 图片宽度 220 | num_samples = len(d['pid']) # 样本数 221 | 222 | for i in range(num_samples): # 遍历样本 223 | if d['activities'][i][0][0] == gt_augment: # 标签与增加的标签相同 224 | flipped = d['center'][i].copy() # 中心点 225 | flipped = [[img_width - c[0], c[1]] for c in flipped] # 水平翻转 226 | d['center'].append(flipped) # 添加到中心点 227 | 228 | flipped = d['bbox'][i].copy() # 边界框 229 | flipped = [np.array([img_width - c[2], c[1], img_width - c[0], c[3]]) for c in flipped] # 水平翻转 230 | d['bbox'].append(flipped) # 添加到边界框 231 | 232 | d['pid'].append(dataset['pid'][i].copy()) # 添加pid 233 | 234 | d['activities'].append(d['activities'][i].copy()) # 添加标签 235 | d['gps_speed'].append(d['gps_speed'][i].copy()) # 添加gps速度 236 | d['obd_speed'].append(d['obd_speed'][i].copy()) # 添加obd速度 237 | 238 | flipped = d['image'][i].copy() # 图片 239 | flipped = [c.replace('.png', '_flip.png') for c in flipped] # 水平翻转 240 | 241 | d['image'].append(flipped) # 添加图片 242 | 243 | gt_labels = [gt[0] for gt in d['activities']] # 标签 244 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) # 正样本数 245 | num_neg_samples = len(gt_labels) - num_pos_samples # 负样本数 246 | 247 | if num_neg_samples > num_pos_samples: # 负样本数大于正样本数 248 | rm_index = np.where(np.array(gt_labels) == 0)[0] # 删除负样本 249 | else: 250 | rm_index = np.where(np.array(gt_labels) == 1)[0] # 删除正样本 251 | 252 | dif_samples = abs(num_neg_samples - num_pos_samples) # 正负样本数差值 253 | 254 | np.random.seed(42) 255 | np.random.shuffle(rm_index) # 打乱索引 256 | rm_index = rm_index[0:dif_samples] # 间隔删除 257 | 258 | for k in d: # 遍历数据 259 | seq_data_k = d[k] # 数据 260 | d[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index] # 删除数据 261 | 262 | new_gt_labels = [gt[0] for gt in d['activities']] # 新标签 263 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels)) # 新正样本数 264 | print('Balanced: Postive: %d \t Negative: %d \n' % (num_pos_samples, len(d['activities']) - num_pos_samples)) 265 | print('Total Number of samples: %d\n' % (len(d['activities']))) 266 | 267 | return d 268 | 269 | 270 | def tte_dataset(dataset, time_to_event, overlap, obs_length): # 时间到事件数据集 271 | d_obs = {'bbox': dataset['bbox'].copy(), 272 | 'pid': dataset['pid'].copy(), 273 | 'activities': dataset['activities'].copy(), 274 | 'image': dataset['image'].copy(), 275 | 'gps_speed': dataset['gps_speed'].copy(), 276 | 'obd_speed': dataset['obd_speed'].copy(), 277 | 'center': dataset['center'].copy() 278 | } 279 | 280 | d_tte = {'bbox': dataset['bbox'].copy(), 281 | 'pid': dataset['pid'].copy(), 282 | 'activities': dataset['activities'].copy(), 283 | 'image': dataset['image'].copy(), 284 | 'gps_speed': dataset['gps_speed'].copy(), 285 | 'obd_speed': dataset['obd_speed'].copy(), 286 | 'center': dataset['center'].copy()} 287 | 288 | if isinstance(time_to_event, int): 289 | for k in d_obs.keys(): 290 | for i in range(len(d_obs[k])): 291 | d_obs[k][i] = d_obs[k][i][- obs_length - time_to_event: -time_to_event] # 观察长度 292 | d_tte[k][i] = d_tte[k][i][- time_to_event:] # 时间到事件 293 | d_obs['tte'] = [[time_to_event]] * len(dataset['bbox']) # 观察长度 294 | d_tte['tte'] = [[time_to_event]] * len(dataset['bbox']) # 时间到事件 295 | 296 | else: # 时间到事件为列表 297 | olap_res = obs_length if overlap == 0 else int((1 - overlap) * obs_length) # 重叠长度 298 | olap_res = 1 if olap_res < 1 else olap_res # 重叠长度 299 | 300 | for k in d_obs.keys(): # 遍历数据 301 | seqs = [] 302 | seqs_tte = [] 303 | for seq in d_obs[k]: 304 | start_idx = len(seq) - obs_length - time_to_event[1] # 开始索引 305 | end_idx = len(seq) - obs_length - time_to_event[0] # 结束索引 306 | seqs.extend([seq[i:i + obs_length] for i in range(start_idx, end_idx, olap_res)]) # 观察长度 307 | seqs_tte.extend([seq[i + obs_length:] for i in range(start_idx, end_idx, olap_res)]) # 时间到事件 308 | d_obs[k] = seqs 309 | d_tte[k] = seqs_tte 310 | tte_seq = [] 311 | for seq in dataset['bbox']: 312 | start_idx = len(seq) - obs_length - time_to_event[1] 313 | end_idx = len(seq) - obs_length - time_to_event[0] 314 | tte_seq.extend([[len(seq) - (i + obs_length)] for i in range(start_idx, end_idx, olap_res)]) 315 | d_obs['tte'] = tte_seq.copy() 316 | d_tte['tte'] = tte_seq.copy() 317 | 318 | remove_index = [] 319 | try: 320 | time_to_event_0 = time_to_event[0] # 时间到事件 321 | except: 322 | time_to_event_0 = time_to_event # 时间到事件 323 | for seq_index, (seq_obs, seq_tte) in enumerate(zip(d_obs['bbox'], d_tte['bbox'])): # 遍历数据 324 | if len(seq_obs) < 16 or len(seq_tte) < time_to_event_0: # 观察长度小于16或时间到事件小于时间到事件 325 | remove_index.append(seq_index) # 删除索引 326 | 327 | for k in d_obs.keys(): 328 | for j in sorted(remove_index, reverse=True): # 倒序删除 329 | del d_obs[k][j] 330 | del d_tte[k][j] 331 | 332 | return d_obs, d_tte 333 | 334 | 335 | def normalize_bbox(dataset, width=1920, height=1080): # 归一化边界框 336 | normalized_set = [] 337 | for sequence in dataset: 338 | if sequence == []: 339 | continue 340 | normalized_sequence = [] 341 | for bbox in sequence: 342 | np_bbox = np.zeros(4) 343 | np_bbox[0] = bbox[0] / width # 左上角x 344 | np_bbox[2] = bbox[2] / width # 右下角x 345 | np_bbox[1] = bbox[1] / height # 左上角y 346 | np_bbox[3] = bbox[3] / height # 右下角y 347 | normalized_sequence.append(np_bbox) 348 | normalized_set.append(np.array(normalized_sequence)) 349 | 350 | return normalized_set 351 | 352 | def normalize_traj(dataset, width=1920, height=1080): # 归一化轨迹 353 | normalized_set = [] 354 | for sequence in dataset: 355 | if sequence == []: 356 | continue 357 | normalized_sequence = [] 358 | for bbox in sequence: 359 | np_bbox = np.zeros(4) 360 | np_bbox[0] = bbox[0]# / width 361 | np_bbox[2] = bbox[2]# / width 362 | np_bbox[1] = bbox[1]# / height 363 | np_bbox[3] = bbox[3]# / height 364 | normalized_sequence.append(np_bbox) 365 | normalized_set.append(np.array(normalized_sequence)) 366 | 367 | return normalized_set 368 | 369 | 370 | def prepare_label(dataset): # 准备标签 371 | labels = np.zeros(len(dataset), dtype='int64') 372 | for step, action in enumerate(dataset): 373 | if action == []: 374 | continue 375 | labels[step] = action[0][0] 376 | 377 | return labels 378 | 379 | def pad_sequence(inp_list, max_len): # 填充序列 380 | padded_sequence = [] 381 | for source in inp_list: 382 | target = np.array([source[0]] * max_len) # 填充序列 383 | source = source 384 | target[-source.shape[0]:, :] = source # 填充序列 385 | 386 | padded_sequence.append(target) 387 | 388 | return padded_sequence 389 | -------------------------------------------------------------------------------- /utils/pie_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interface for the PIE dataset: 3 | 4 | A. Rasouli, I. Kotseruba, T. Kunic, and J. Tsotsos, "PIE: A Large-Scale Dataset and Models for Pedestrian Intention Estimation and 5 | Trajectory Prediction", ICCV 2019. 6 | 7 | MIT License 8 | 9 | Copyright (c) 2019 Amir Rasouli, Iuliia Kotseruba 10 | 11 | Permission is hereby granted, free of charge, to any person obtaining a copy 12 | of this software and associated documentation files (the "Software"), to deal 13 | in the Software without restriction, including without limitation the rights 14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 | copies of the Software, and to permit persons to whom the Software is 16 | furnished to do so, subject to the following conditions: 17 | 18 | The above copyright notice and this permission notice shall be included in all 19 | copies or substantial portions of the Software. 20 | 21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 27 | SOFTWARE. 28 | 29 | """ 30 | import pickle 31 | import cv2 32 | import sys 33 | 34 | import xml.etree.ElementTree as ET 35 | import numpy as np 36 | 37 | from os.path import join, abspath, isfile, isdir 38 | from os import makedirs, listdir 39 | from sklearn.model_selection import train_test_split, KFold 40 | 41 | 42 | class PIE(object): 43 | def __init__(self, regen_database=False, data_path=''): 44 | """ 45 | Class constructor 46 | :param regen_database: Whether generate the database or not 47 | :param data_path: The path to wh 48 | """ 49 | self._year = '2019' 50 | self._name = 'pie' 51 | self._image_ext = '.png' 52 | self._regen_database = regen_database 53 | 54 | # Paths 55 | self._pie_path = data_path if data_path else self._get_default_path() 56 | assert isdir(self._pie_path), \ 57 | 'pie path does not exist: {}'.format(self._pie_path) 58 | 59 | self._annotation_path = join(self._pie_path, 'annotations') 60 | self._annotation_attributes_path = join(self._pie_path, 'annotations_attributes') 61 | self._annotation_vehicle_path = join(self._pie_path, 'annotations_vehicle') 62 | 63 | self._clips_path = join(self._pie_path, 'PIE_clips') 64 | self._images_path = join(self._pie_path, 'images') 65 | 66 | # Path generators 67 | @property 68 | def cache_path(self): 69 | """ 70 | Generates a path to save cache files 71 | :return: Cache file folder path 72 | """ 73 | cache_path = abspath(join(self._pie_path, 'data_cache')) 74 | if not isdir(cache_path): 75 | makedirs(cache_path) 76 | return cache_path 77 | 78 | def _get_default_path(self): 79 | """ 80 | Returns the default path where pie is expected to be installed. 81 | """ 82 | return 'data/pie' 83 | 84 | def _get_image_set_ids(self, image_set): 85 | """ 86 | Returns default image set ids 87 | :param image_set: Image set split 88 | :return: Set ids of the image set 89 | """ 90 | image_set_nums = {'train': ['set01', 'set02', 'set04'], 91 | 'val': ['set05', 'set06'], 92 | 'test': ['set03'], 93 | 'all': ['set01', 'set02', 'set03', 94 | 'set04', 'set05', 'set06']} 95 | return image_set_nums[image_set] 96 | 97 | def _get_image_path(self, sid, vid, fid): 98 | """ 99 | Generates and returns the image path given ids 100 | :param sid: Set id 101 | :param vid: Video id 102 | :param fid: Frame id 103 | :return: Return the path to the given image 104 | """ 105 | return join(self._images_path, sid, vid, 106 | '{:05d}.png'.format(fid)) 107 | 108 | # Visual helpers 109 | def update_progress(self, progress): 110 | """ 111 | Creates a progress bar 112 | :param progress: The progress thus far 113 | """ 114 | barLength = 20 # Modify this to change the length of the progress bar 115 | status = "" 116 | if isinstance(progress, int): 117 | progress = float(progress) 118 | 119 | block = int(round(barLength * progress)) 120 | text = "\r[{}] {:0.2f}% {}".format("#" * block + "-" * (barLength - block), progress * 100, status) 121 | sys.stdout.write(text) 122 | sys.stdout.flush() 123 | 124 | def _print_dict(self, dic): 125 | """ 126 | Prints a dictionary, one key-value pair per line 127 | :param dic: Dictionary 128 | """ 129 | for k, v in dic.items(): 130 | print('%s: %s' % (str(k), str(v))) 131 | 132 | # Data processing helpers 133 | def _get_width(self): 134 | """ 135 | Returns image width 136 | :return: Image width 137 | """ 138 | return 1920 139 | 140 | def _get_height(self): 141 | """ 142 | Returns image height 143 | :return: Image height 144 | """ 145 | return 1080 146 | 147 | def _get_dim(self): 148 | """ 149 | Returns the image dimensions 150 | :return: Image dimensions 151 | """ 152 | return 1920, 1080 153 | 154 | # Image processing helpers 155 | def get_annotated_frame_numbers(self, set_id): 156 | """ 157 | Generates and returns a dictionary of videos and annotated frames for each video in the give set 158 | :param set_id: Set to generate annotated frames 159 | :return: A dictionary of form 160 | {: [,,... ]} 161 | """ 162 | 163 | print("Generating annotated frame numbers for", set_id) 164 | annotated_frames_file = join(self._pie_path, "annotations", set_id, set_id + '_annotated_frames.csv') 165 | # If the file exists, load from the file 166 | if isfile(annotated_frames_file): 167 | with open(annotated_frames_file, 'rt') as f: 168 | annotated_frames = {x.split(',')[0]: 169 | [int(fr) for fr in x.split(',')[1:]] for x in f.readlines()} 170 | return annotated_frames 171 | else: 172 | # Generate annotated frame ids for each video 173 | annotated_frames = {v.split('_annt.xml')[0]: [] for v in sorted(listdir(join(self._annotation_path, 174 | set_id))) if 175 | v.endswith("annt.xml")} 176 | for vid, annot_frames in sorted(annotated_frames.items()): 177 | _frames = [] 178 | path_to_file = join(self._annotation_path, set_id, vid + '_annt.xml') 179 | tree = ET.parse(path_to_file) 180 | tracks = tree.findall('./track') 181 | for t in tracks: 182 | if t.get('label') != 'pedestrian': 183 | continue 184 | boxes = t.findall('./box') 185 | for b in boxes: 186 | # Exclude the annotations that are outside of the frame 187 | if int(b.get('outside')) == 1: 188 | continue 189 | _frames.append(int(b.get('frame'))) 190 | _frames = sorted(list(set(_frames))) 191 | annot_frames.append(len(_frames)) 192 | annot_frames.extend(_frames) 193 | 194 | with open(annotated_frames_file, 'wt') as fid: 195 | for vid, annot_frames in sorted(annotated_frames.items()): 196 | fid.write(vid) 197 | for fr in annot_frames: 198 | fid.write("," + str(fr)) 199 | fid.write('\n') 200 | 201 | return annotated_frames 202 | 203 | def get_frame_numbers(self, set_id): 204 | """ 205 | Generates and returns a dictionary of videos and frames for each video in the give set 206 | :param set_id: Set to generate annotated frames 207 | :return: A dictionary of form 208 | {: [,,... ]} 209 | """ 210 | print("Generating frame numbers for", set_id) 211 | frame_ids = {v.split('_annt.xml')[0]: [] for v in sorted(listdir(join(self._annotation_path, 212 | set_id))) if 213 | v.endswith("annt.xml")} 214 | for vid, frames in sorted(frame_ids.items()): 215 | path_to_file = join(self._annotation_path, set_id, vid + '_annt.xml') 216 | tree = ET.parse(path_to_file) 217 | num_frames = int(tree.find("./meta/task/size").text) 218 | frames.extend([i for i in range(num_frames)]) 219 | frames.insert(0, num_frames) 220 | return frame_ids 221 | 222 | def extract_and_save_images(self, extract_frame_type='annotated'): 223 | """ 224 | Extracts images from clips and saves on hard drive 225 | :param extract_frame_type: Whether to extract 'all' frames or only the ones that are 'annotated' 226 | Note: extracting 'all' frames requires approx. 3TB space whereas 227 | 'annotated' requires approx. 1TB 228 | """ 229 | set_folders = [f for f in sorted(listdir(self._clips_path))] 230 | for set_id in set_folders: 231 | print('Extracting frames from', set_id) 232 | set_folder_path = join(self._clips_path, set_id) 233 | if extract_frame_type == 'annotated': 234 | extract_frames = self.get_annotated_frame_numbers(set_id) 235 | else: 236 | extract_frames = self.get_frame_numbers(set_id) 237 | 238 | set_images_path = join(self._pie_path, "images", set_id) 239 | for vid, frames in sorted(extract_frames.items()): 240 | print(vid) 241 | video_images_path = join(set_images_path, vid) 242 | num_frames = frames[0] 243 | frames_list = frames[1:] 244 | if not isdir(video_images_path): 245 | makedirs(video_images_path) 246 | vidcap = cv2.VideoCapture(join(set_folder_path, vid + '.mp4')) 247 | success, image = vidcap.read() 248 | frame_num = 0 249 | img_count = 0 250 | if not success: 251 | print('Failed to open the video {}'.format(vid)) 252 | while success: 253 | if frame_num in frames_list: 254 | self.update_progress(img_count / num_frames) 255 | img_count += 1 256 | if not isfile(join(video_images_path, "%05.f.png") % frame_num): 257 | cv2.imwrite(join(video_images_path, "%05.f.png") % frame_num, image) 258 | success, image = vidcap.read() 259 | frame_num += 1 260 | if num_frames != img_count: 261 | print('num images don\'t match {}/{}'.format(num_frames, img_count)) 262 | print('\n') 263 | 264 | # Annotation processing helpers 265 | def _map_text_to_scalar(self, label_type, value): 266 | """ 267 | Maps a text label in XML file to scalars 268 | :param label_type: The label type 269 | :param value: The text to be mapped 270 | :return: The scalar value 271 | """ 272 | map_dic = {'occlusion': {'none': 0, 'part': 1, 'full': 2}, 273 | 'action': {'standing': 0, 'walking': 1}, 274 | 'look': {'not-looking': 0, 'looking': 1}, 275 | 'gesture': {'__undefined__': 0, 'hand_ack': 1, 'hand_yield': 2, 276 | 'hand_rightofway': 3, 'nod': 4, 'other': 5}, 277 | 'cross': {'not-crossing': 0, 'crossing': 1, 'crossing-irrelevant': -1}, 278 | 'crossing': {'not-crossing': 0, 'crossing': 1, 'irrelevant': -1}, 279 | 'age': {'child': 0, 'young': 1, 'adult': 2, 'senior': 3}, 280 | 'designated': {'ND': 0, 'D': 1}, 281 | 'gender': {'n/a': 0, 'female': 1, 'male': 2}, 282 | 'intersection': {'midblock': 0, 'T': 1, 'T-left': 2, 'T-right': 3, 'four-way': 4}, 283 | 'motion_direction': {'n/a': 0, 'LAT': 1, 'LONG': 2}, 284 | 'traffic_direction': {'OW': 0, 'TW': 1}, 285 | 'signalized': {'n/a': 0, 'C': 1, 'S': 2, 'CS': 3}, 286 | 'vehicle': {'car': 0, 'truck': 1, 'bus': 2, 'train': 3, 'bicycle': 4, 'bike': 5}, 287 | 'sign': {'ped_blue': 0, 'ped_yellow': 1, 'ped_white': 2, 'ped_text': 3, 'stop_sign': 4, 288 | 'bus_stop': 5, 'train_stop': 6, 'construction': 7, 'other': 8}, 289 | 'traffic_light': {'regular': 0, 'transit': 1, 'pedestrian': 2}, 290 | 'state': {'__undefined__': 0, 'red': 1, 'yellow': 2, 'green': 3}} 291 | 292 | return map_dic[label_type][value] 293 | 294 | def _map_scalar_to_text(self, label_type, value): 295 | """ 296 | Maps a scalar value to a text label 297 | :param label_type: The label type 298 | :param value: The scalar to be mapped 299 | :return: The text label 300 | """ 301 | map_dic = {'occlusion': {0: 'none', 1: 'part', 2: 'full'}, 302 | 'action': {0: 'standing', 1: 'walking'}, 303 | 'look': {0: 'not-looking', 1: 'looking'}, 304 | 'hand_gesture': {0: '__undefined__', 1: 'hand_ack', 305 | 2: 'hand_yield', 3: 'hand_rightofway', 306 | 4: 'nod', 5: 'other'}, 307 | 'cross': {0: 'not-crossing', 1: 'crossing', -1: 'crossing-irrelevant'}, 308 | 'crossing': {0: 'not-crossing', 1: 'crossing', -1: 'irrelevant'}, 309 | 'age': {0: 'child', 1: 'young', 2: 'adult', 3: 'senior'}, 310 | 'designated': {0: 'ND', 1: 'D'}, 311 | 'gender': {0: 'n/a', 1: 'female', 2: 'male'}, 312 | 'intersection': {0: 'midblock', 1: 'T', 2: 'T-left', 3: 'T-right', 4: 'four-way'}, 313 | 'motion_direction': {0: 'n/a', 1: 'LAT', 2: 'LONG'}, 314 | 'traffic_direction': {0: 'OW', 1: 'TW'}, 315 | 'signalized': {0: 'n/a', 1: 'C', 2: 'S', 3: 'CS'}, 316 | 'vehicle': {0: 'car', 1: 'truck', 2: 'bus', 3: 'train', 4: 'bicycle', 5: 'bike'}, 317 | 'sign': {0: 'ped_blue', 1: 'ped_yellow', 2: 'ped_white', 3: 'ped_text', 4: 'stop_sign', 318 | 5: 'bus_stop', 6: 'train_stop', 7: 'construction', 8: 'other'}, 319 | 'traffic_light': {0: 'regular', 1: 'transit', 2: 'pedestrian'}, 320 | 'state': {0: '__undefined__', 1: 'red', 2: 'yellow', 3: 'green'}} 321 | 322 | return map_dic[label_type][value] 323 | 324 | def _get_annotations(self, setid, vid): 325 | """ 326 | Generates a dictionary of annotations by parsing the video XML file 327 | :param setid: The set id 328 | :param vid: The video id 329 | :return: A dictionary of annotations 330 | """ 331 | path_to_file = join(self._annotation_path, setid, vid + '_annt.xml') 332 | print(path_to_file) 333 | 334 | tree = ET.parse(path_to_file) 335 | ped_annt = 'ped_annotations' 336 | traffic_annt = 'traffic_annotations' 337 | 338 | annotations = {} 339 | annotations['num_frames'] = int(tree.find("./meta/task/size").text) 340 | annotations['width'] = int(tree.find("./meta/task/original_size/width").text) 341 | annotations['height'] = int(tree.find("./meta/task/original_size/height").text) 342 | annotations[ped_annt] = {} 343 | annotations[traffic_annt] = {} 344 | tracks = tree.findall('./track') 345 | for t in tracks: 346 | boxes = t.findall('./box') 347 | obj_label = t.get('label') 348 | obj_id = boxes[0].find('./attribute[@name=\"id\"]').text 349 | 350 | if obj_label == 'pedestrian': 351 | annotations[ped_annt][obj_id] = {'frames': [], 'bbox': [], 'occlusion': []} 352 | annotations[ped_annt][obj_id]['behavior'] = {'gesture': [], 'look': [], 'action': [], 'cross': []} 353 | for b in boxes: 354 | # Exclude the annotations that are outside of the frame 355 | if int(b.get('outside')) == 1: 356 | continue 357 | annotations[ped_annt][obj_id]['bbox'].append( 358 | [float(b.get('xtl')), float(b.get('ytl')), 359 | float(b.get('xbr')), float(b.get('ybr'))]) 360 | occ = self._map_text_to_scalar('occlusion', b.find('./attribute[@name=\"occlusion\"]').text) 361 | annotations[ped_annt][obj_id]['occlusion'].append(occ) 362 | annotations[ped_annt][obj_id]['frames'].append(int(b.get('frame'))) 363 | for beh in annotations['ped_annotations'][obj_id]['behavior']: 364 | # Read behavior tags for each frame and add to the database 365 | annotations[ped_annt][obj_id]['behavior'][beh].append( 366 | self._map_text_to_scalar(beh, b.find('./attribute[@name=\"' + beh + '\"]').text)) 367 | 368 | else: 369 | obj_type = boxes[0].find('./attribute[@name=\"type\"]') 370 | if obj_type is not None: 371 | obj_type = self._map_text_to_scalar(obj_label, 372 | boxes[0].find('./attribute[@name=\"type\"]').text) 373 | 374 | annotations[traffic_annt][obj_id] = {'frames': [], 'bbox': [], 'occlusion': [], 375 | 'obj_class': obj_label, 376 | 'obj_type': obj_type, 377 | 'state': []} 378 | 379 | for b in boxes: 380 | # Exclude the annotations that are outside of the frame 381 | if int(b.get('outside')) == 1: 382 | continue 383 | annotations[traffic_annt][obj_id]['bbox'].append( 384 | [float(b.get('xtl')), float(b.get('ytl')), 385 | float(b.get('xbr')), float(b.get('ybr'))]) 386 | annotations[traffic_annt][obj_id]['occlusion'].append(int(b.get('occluded'))) 387 | annotations[traffic_annt][obj_id]['frames'].append(int(b.get('frame'))) 388 | if obj_label == 'traffic_light': 389 | annotations[traffic_annt][obj_id]['state'].append(self._map_text_to_scalar('state', 390 | b.find( 391 | './attribute[@name=\"state\"]').text)) 392 | return annotations 393 | 394 | def _get_ped_attributes(self, setid, vid): 395 | """ 396 | Generates a dictionary of attributes by parsing the video XML file 397 | :param setid: The set id 398 | :param vid: The video id 399 | :return: A dictionary of attributes 400 | """ 401 | path_to_file = join(self._annotation_attributes_path, setid, vid + '_attributes.xml') 402 | tree = ET.parse(path_to_file) 403 | 404 | attributes = {} 405 | pedestrians = tree.findall("./pedestrian") 406 | for p in pedestrians: 407 | ped_id = p.get('id') 408 | attributes[ped_id] = {} 409 | for k, v in p.items(): 410 | if 'id' in k: 411 | continue 412 | try: 413 | if k == 'intention_prob': 414 | attributes[ped_id][k] = float(v) 415 | else: 416 | attributes[ped_id][k] = int(v) 417 | except ValueError: 418 | attributes[ped_id][k] = self._map_text_to_scalar(k, v) 419 | 420 | return attributes 421 | 422 | def _get_vehicle_attributes(self, setid, vid): 423 | """ 424 | Generates a dictionary of vehicle attributes by parsing the video XML file 425 | :param setid: The set id 426 | :param vid: The video id 427 | :return: A dictionary of vehicle attributes (obd sensor recording) 428 | """ 429 | path_to_file = join(self._annotation_vehicle_path, setid, vid + '_obd.xml') 430 | tree = ET.parse(path_to_file) 431 | 432 | veh_attributes = {} 433 | frames = tree.findall("./frame") 434 | 435 | for f in frames: 436 | dict_vals = {k: float(v) for k, v in f.attrib.items() if k != 'id'} 437 | veh_attributes[int(f.get('id'))] = dict_vals 438 | 439 | return veh_attributes 440 | 441 | def generate_database(self): 442 | """ 443 | Generates and saves a database of the pie dataset by integrating all annotations 444 | Dictionary structure: 445 | 'set_id'(str): { 446 | 'vid_id'(str): { 447 | 'num_frames': int 448 | 'width': int 449 | 'height': int 450 | 'traffic_annotations'(str): { 451 | 'obj_id'(str): { 452 | 'frames': list(int) 453 | 'occlusion': list(int) 454 | 'bbox': list([x1, y1, x2, y2]) (float) 455 | 'obj_class': str, 456 | 'obj_type': str, # only for traffic lights, vehicles, signs 457 | 'state': list(int) # only for traffic lights 458 | 'ped_annotations'(str): { 459 | 'ped_id'(str): { 460 | 'frames': list(int) 461 | 'occlusion': list(int) 462 | 'bbox': list([x1, y1, x2, y2]) (float) 463 | 'behavior'(str): { 464 | 'action': list(int) 465 | 'gesture': list(int) 466 | 'cross': list(int) 467 | 'look': list(int) 468 | 'attributes'(str): { 469 | 'age': int 470 | 'id': str 471 | 'num_lanes': int 472 | 'crossing': int 473 | 'gender': int 474 | 'crossing_point': int 475 | 'critical_point': int 476 | 'exp_start_point': int 477 | 'intersection': int 478 | 'designated': int 479 | 'signalized': int 480 | 'traffic_direction': int 481 | 'group_size': int 482 | 'motion_direction': int 483 | 'vehicle_annotations'(str){ 484 | 'frame_id'(int){'longitude': float 485 | 'yaw': float 486 | 'pitch': float 487 | 'roll': float 488 | 'OBD_speed': float 489 | 'GPS_speed': float 490 | 'latitude': float 491 | 'longitude': float 492 | 'heading_angle': float 493 | 'accX': float 494 | 'accY': float 495 | 'accZ: float 496 | 'gyroX': float 497 | 'gyroY': float 498 | 'gyroZ': float 499 | 500 | :return: A database dictionary 501 | """ 502 | 503 | print('---------------------------------------------------------') 504 | print("Generating database for pie") 505 | 506 | cache_file = join(self.cache_path, 'pie_database.pkl') 507 | if isfile(cache_file) and not self._regen_database: 508 | with open(cache_file, 'rb') as fid: 509 | try: 510 | database = pickle.load(fid) 511 | except: 512 | database = pickle.load(fid, encoding='bytes') 513 | print('pie annotations loaded from {}'.format(cache_file)) 514 | return database 515 | 516 | # Path to the folder annotations 517 | set_ids = [f for f in sorted(listdir(self._annotation_path))] 518 | 519 | # Read the content of set folders 520 | database = {} 521 | for setid in set_ids: 522 | video_ids = [v.split('_annt.xml')[0] for v in sorted(listdir(join(self._annotation_path, 523 | setid))) if v.endswith("annt.xml")] 524 | database[setid] = {} 525 | for vid in video_ids: 526 | print('Getting annotations for %s, %s' % (setid, vid)) 527 | database[setid][vid] = self._get_annotations(setid, vid) 528 | vid_attributes = self._get_ped_attributes(setid, vid) 529 | database[setid][vid]['vehicle_annotations'] = self._get_vehicle_attributes(setid, vid) 530 | for ped in database[setid][vid]['ped_annotations']: 531 | database[setid][vid]['ped_annotations'][ped]['attributes'] = vid_attributes[ped] 532 | 533 | with open(cache_file, 'wb') as fid: 534 | pickle.dump(database, fid, pickle.HIGHEST_PROTOCOL) 535 | print('The database is written to {}'.format(cache_file)) 536 | 537 | return database 538 | 539 | def get_data_stats(self): 540 | """ 541 | Generates statistics for the dataset 542 | """ 543 | annotations = self.generate_database() 544 | 545 | set_count = len(annotations.keys()) 546 | 547 | ped_count = 0 548 | ped_box_count = 0 549 | video_count = 0 550 | total_frames = 0 551 | age = {'child': 0, 'adult': 0, 'senior': 0} 552 | gender = {'male': 0, 'female': 0} 553 | signalized = {'n/a': 0, 'C': 0, 'S': 0, 'CS': 0} 554 | traffic_direction = {'OW': 0, 'TW': 0} 555 | intersection = {'midblock': 0, 'T': 0, 'T-right': 0, 'T-left': 0, 'four-way': 0} 556 | crossing = {'crossing': 0, 'not-crossing': 0, 'irrelevant': 0} 557 | 558 | traffic_obj_types = {'vehicle': {'car': 0, 'truck': 0, 'bus': 0, 'train': 0, 'bicycle': 0, 'bike': 0}, 559 | 'sign': {'ped_blue': 0, 'ped_yellow': 0, 'ped_white': 0, 'ped_text': 0, 'stop_sign': 0, 560 | 'bus_stop': 0, 'train_stop': 0, 'construction': 0, 'other': 0}, 561 | 'traffic_light': {'regular': 0, 'transit': 0, 'pedestrian': 0}, 562 | 'crosswalk': 0, 563 | 'transit_station': 0} 564 | traffic_box_count = {'vehicle': 0, 'traffic_light': 0, 'sign': 0, 'crosswalk': 0, 'transit_station': 0} 565 | for sid, vids in annotations.items(): 566 | video_count += len(vids) 567 | for vid, annots in vids.items(): 568 | total_frames += annots['num_frames'] 569 | for trf_ids, trf_annots in annots['traffic_annotations'].items(): 570 | obj_class = trf_annots['obj_class'] 571 | traffic_box_count[obj_class] += len(trf_annots['frames']) 572 | if obj_class in ['traffic_light', 'vehicle', 'sign']: 573 | obj_type = trf_annots['obj_type'] 574 | traffic_obj_types[obj_class][self._map_scalar_to_text(obj_class, obj_type)] += 1 575 | else: 576 | traffic_obj_types[obj_class] += 1 577 | for ped_ids, ped_annots in annots['ped_annotations'].items(): 578 | ped_count += 1 579 | ped_box_count += len(ped_annots['frames']) 580 | age[self._map_scalar_to_text('age', ped_annots['attributes']['age'])] += 1 581 | if self._map_scalar_to_text('crossing', ped_annots['attributes']['crossing']) == 'crossing': 582 | crossing[self._map_scalar_to_text('crossing', ped_annots['attributes']['crossing'])] += 1 583 | else: 584 | if ped_annots['attributes']['intention_prob'] > 0.5: 585 | crossing['not-crossing'] += 1 586 | else: 587 | crossing['irrelevant'] += 1 588 | intersection[ 589 | self._map_scalar_to_text('intersection', ped_annots['attributes']['intersection'])] += 1 590 | traffic_direction[self._map_scalar_to_text('traffic_direction', 591 | ped_annots['attributes']['traffic_direction'])] += 1 592 | signalized[self._map_scalar_to_text('signalized', ped_annots['attributes']['signalized'])] += 1 593 | gender[self._map_scalar_to_text('gender', ped_annots['attributes']['gender'])] += 1 594 | 595 | print('---------------------------------------------------------') 596 | print("Number of sets: %d" % set_count) 597 | print("Number of videos: %d" % video_count) 598 | print("Number of annotated frames: %d" % total_frames) 599 | print("Number of pedestrians %d" % ped_count) 600 | print("age:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(age.items()))) 601 | print("gender:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(gender.items()))) 602 | print("signal:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(signalized.items()))) 603 | print("traffic direction:\n", 604 | '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(traffic_direction.items()))) 605 | print("crossing:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(crossing.items()))) 606 | print("intersection:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(intersection.items()))) 607 | print("Number of pedestrian bounding boxes: %d" % ped_box_count) 608 | print("Number of traffic objects") 609 | for trf_obj, values in sorted(traffic_obj_types.items()): 610 | if isinstance(values, dict): 611 | print(trf_obj + ':\n', '\n '.join('{}: {}'.format(k, v) for k, v in sorted(values.items())), 612 | '\n total: ', sum(values.values())) 613 | else: 614 | print(trf_obj + ': %d' % values) 615 | print("Number of pedestrian bounding boxes:\n", 616 | '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(traffic_box_count.items())), 617 | '\n total: ', sum(traffic_box_count.values())) 618 | 619 | def balance_samples_count(self, seq_data, label_type, random_seed=42): 620 | """ 621 | Balances the number of positive and negative samples by randomly sampling 622 | from the more represented samples. Only works for binary classes. 623 | :param seq_data: The sequence data to be balanced. 624 | :param label_type: The lable type based on which the balancing takes place. 625 | The label values must be binary, i.e. only 0, 1. 626 | :param random_seed: The seed for random number generator. 627 | :return: Balanced data sequence. 628 | """ 629 | for lbl in seq_data[label_type]: 630 | for i in lbl: 631 | if i[0] not in [0, 1]: 632 | raise Exception("The label values used for balancing must be" 633 | " either 0 or 1") 634 | 635 | # balances the number of positive and negative samples 636 | print('---------------------------------------------------------') 637 | print("Balancing the number of positive and negative intention samples") 638 | 639 | gt_labels = [gt[0] for gt in seq_data[label_type]] 640 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) 641 | num_neg_samples = len(gt_labels) - num_pos_samples 642 | 643 | new_seq_data = {} 644 | # finds the indices of the samples with larger quantity 645 | if num_neg_samples == num_pos_samples: 646 | print('Positive and negative samples are already balanced') 647 | return seq_data 648 | else: 649 | print('Unbalanced: \t Positive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples)) 650 | if num_neg_samples > num_pos_samples: 651 | rm_index = np.where(np.array(gt_labels) == 0)[0] 652 | else: 653 | rm_index = np.where(np.array(gt_labels) == 1)[0] 654 | 655 | # Calculate the difference of sample counts 656 | dif_samples = abs(num_neg_samples - num_pos_samples) 657 | # shuffle the indices 658 | np.random.seed(random_seed) 659 | np.random.shuffle(rm_index) 660 | # reduce the number of indices to the difference 661 | rm_index = rm_index[0:dif_samples] 662 | # update the data 663 | for k in seq_data: 664 | seq_data_k = seq_data[k] 665 | if not isinstance(seq_data[k], list): 666 | new_seq_data[k] = seq_data[k] 667 | else: 668 | new_seq_data[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index] 669 | 670 | new_gt_labels = [gt[0] for gt in new_seq_data[label_type]] 671 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels)) 672 | print('Balanced:\t Positive: %d \t Negative: %d\n' 673 | % (num_pos_samples, len(new_seq_data[label_type]) - num_pos_samples)) 674 | return new_seq_data 675 | 676 | # Process pedestrian ids 677 | def _get_pedestrian_ids(self): 678 | """ 679 | Returns all pedestrian ids 680 | :return: A list of pedestrian ids 681 | """ 682 | annotations = self.generate_database() 683 | pids = [] 684 | for sid in sorted(annotations): 685 | for vid in sorted(annotations[sid]): 686 | pids.extend(annotations[sid][vid]['ped_annotations'].keys()) 687 | return pids 688 | 689 | def _get_random_pedestrian_ids(self, image_set, ratios=None, val_data=True, regen_data=False): 690 | """ 691 | Generates and saves a random pedestrian ids 692 | :param image_set: The data split to return 693 | :param ratios: The ratios to split the data. There should be 2 ratios (or 3 if val_data is true) 694 | and they should sum to 1. e.g. [0.4, 0.6], [0.3, 0.5, 0.2] 695 | :param val_data: Whether to generate validation data 696 | :param regen_data: Whether to overwrite the existing data, i.e. regenerate splits 697 | :return: The random sample split 698 | """ 699 | 700 | assert image_set in ['train', 'test', 'val'] 701 | # Generates a list of behavioral xml file names for videos 702 | cache_file = join(self.cache_path, "random_samples.pkl") 703 | if isfile(cache_file) and not regen_data: 704 | print("Random sample currently exists.\n Loading from %s" % cache_file) 705 | with open(cache_file, 'rb') as fid: 706 | try: 707 | rand_samples = pickle.load(fid) 708 | except: 709 | rand_samples = pickle.load(fid, encoding='bytes') 710 | assert image_set in rand_samples, "%s does not exist in random samples\n" \ 711 | "Please try again by setting regen_data = True" % image_set 712 | if val_data: 713 | assert len(rand_samples['ratios']) == 3, "The existing random samples " \ 714 | "does not have validation data.\n" \ 715 | "Please try again by setting regen_data = True" 716 | if ratios is not None: 717 | assert ratios == rand_samples['ratios'], "Specified ratios {} does not match the ones in existing file {}.\n\ 718 | Perform one of the following options:\ 719 | 1- Set ratios to None\ 720 | 2- Set ratios to the same values \ 721 | 3- Regenerate data".format(ratios, rand_samples['ratios']) 722 | 723 | print('The ratios are {}'.format(rand_samples['ratios'])) 724 | print("Number of %s tracks %d" % (image_set, len(rand_samples[image_set]))) 725 | return rand_samples[image_set] 726 | 727 | if ratios is None: 728 | if val_data: 729 | ratios = [0.5, 0.4, 0.1] 730 | else: 731 | ratios = [0.5, 0.5] 732 | 733 | assert sum(ratios) > 0.999999, "Ratios {} do not sum to 1".format(ratios) 734 | if val_data: 735 | assert len(ratios) == 3, "To generate validation data three ratios should be selected" 736 | else: 737 | assert len(ratios) == 2, "With no validation only two ratios should be selected" 738 | 739 | print("################ Generating Random training/testing data ################") 740 | ped_ids = self._get_pedestrian_ids() 741 | print("Toral number of tracks %d" % len(ped_ids)) 742 | print('The ratios are {}'.format(ratios)) 743 | sample_split = {'ratios': ratios} 744 | train_samples, test_samples = train_test_split(ped_ids, train_size=ratios[0]) 745 | print("Number of train tracks %d" % len(train_samples)) 746 | 747 | if val_data: 748 | test_samples, val_samples = train_test_split(test_samples, train_size=ratios[1] / sum(ratios[1:])) 749 | print("Number of val tracks %d" % len(val_samples)) 750 | sample_split['val'] = val_samples 751 | 752 | print("Number of test tracks %d" % len(test_samples)) 753 | sample_split['train'] = train_samples 754 | sample_split['test'] = test_samples 755 | 756 | cache_file = join(self.cache_path, "random_samples.pkl") 757 | with open(cache_file, 'wb') as fid: 758 | pickle.dump(sample_split, fid, pickle.HIGHEST_PROTOCOL) 759 | print('pie {} samples written to {}'.format('random', cache_file)) 760 | return sample_split[image_set] 761 | 762 | def _get_kfold_pedestrian_ids(self, image_set, num_folds=5, fold=1): 763 | """ 764 | Generates kfold pedestrian ids 765 | :param image_set: Image set split 766 | :param num_folds: Number of folds 767 | :param fold: The given fold 768 | :return: List of pedestrian ids for the given fold 769 | """ 770 | assert image_set in ['train', 'test'], "Image set should be either \"train\" or \"test\"" 771 | assert fold <= num_folds, "Fold number should be smaller than number of folds" 772 | print("################ Generating %d fold data ################" % num_folds) 773 | cache_file = join(self.cache_path, "%d_fold_samples.pkl" % num_folds) 774 | 775 | if isfile(cache_file): 776 | print("Loading %d-fold data from %s" % (num_folds, cache_file)) 777 | with open(cache_file, 'rb') as fid: 778 | try: 779 | fold_idx = pickle.load(fid) 780 | except: 781 | fold_idx = pickle.load(fid, encoding='bytes') 782 | else: 783 | ped_ids = self._get_pedestrian_ids() 784 | kf = KFold(n_splits=num_folds, shuffle=True) 785 | fold_idx = {'pid': ped_ids} 786 | count = 1 787 | for train_index, test_index in kf.split(ped_ids): 788 | fold_idx[count] = {'train': train_index.tolist(), 'test': test_index.tolist()} 789 | count += 1 790 | with open(cache_file, 'wb') as fid: 791 | pickle.dump(fold_idx, fid, pickle.HIGHEST_PROTOCOL) 792 | print('pie {}-fold samples written to {}'.format(num_folds, cache_file)) 793 | print("Number of %s tracks %d" % (image_set, len(fold_idx[fold][image_set]))) 794 | kfold_ids = [fold_idx['pid'][i] for i in range(len(fold_idx['pid'])) if i in fold_idx[fold][image_set]] 795 | return kfold_ids 796 | 797 | # Trajectory data generation 798 | def _get_data_ids(self, image_set, params): 799 | """ 800 | Generates set ids and ped ids (if needed) for processing 801 | :param image_set: Image-set to generate data 802 | :param params: Data generation params 803 | :return: Set and pedestrian ids 804 | """ 805 | _pids = None 806 | if params['data_split_type'] == 'default': 807 | set_ids = self._get_image_set_ids(image_set) 808 | else: 809 | set_ids = self._get_image_set_ids('all') 810 | if params['data_split_type'] == 'random': 811 | _pids = self._get_random_pedestrian_ids(image_set, **params['random_params']) 812 | elif params['data_split_type'] == 'kfold': 813 | _pids = self._get_kfold_pedestrian_ids(image_set, **params['kfold_params']) 814 | 815 | return set_ids, _pids 816 | 817 | def _squarify(self, bbox, ratio, img_width): 818 | """ 819 | Changes the ratio of bounding boxes to a fixed ratio 820 | :param bbox: Bounding box 821 | :param ratio: Ratio to be changed to 822 | :param img_width: Image width 823 | :return: Squarified boduning box 824 | """ 825 | width = abs(bbox[0] - bbox[2]) 826 | height = abs(bbox[1] - bbox[3]) 827 | width_change = height * ratio - width 828 | 829 | bbox[0] = bbox[0] - width_change / 2 830 | bbox[2] = bbox[2] + width_change / 2 831 | 832 | if bbox[0] < 0: 833 | bbox[0] = 0 834 | 835 | # check whether the new bounding box goes beyond image boarders 836 | # If this is the case, the bounding box is shifted back 837 | if bbox[2] > img_width: 838 | bbox[0] = bbox[0] - bbox[2] + img_width 839 | bbox[2] = img_width 840 | return bbox 841 | 842 | def _height_check(self, height_rng, frame_ids, boxes, images, occlusion): 843 | """ 844 | Checks whether the bounding boxes are within a given height limit. If not, it 845 | will adjust the length of bounding boxes in data sequences accordingly 846 | :param height_rng: Height limit [lower, higher] 847 | :param frame_ids: List of frame ids 848 | :param boxes: List of bounding boxes 849 | :param images: List of images 850 | :param occlusion: List of occlusions 851 | :return: The adjusted data sequences 852 | """ 853 | imgs, box, frames, occ = [], [], [], [] 854 | for i, b in enumerate(boxes): 855 | bbox_height = abs(b[1] - b[3]) 856 | if height_rng[0] <= bbox_height <= height_rng[1]: 857 | box.append(b) 858 | imgs.append(images[i]) 859 | frames.append(frame_ids[i]) 860 | occ.append(occlusion[i]) 861 | return imgs, box, frames, occ 862 | 863 | def _get_center(self, box): 864 | """ 865 | Calculates the center coordinate of a bounding box 866 | :param box: Bounding box coordinates 867 | :return: The center coordinate 868 | """ 869 | return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2] 870 | 871 | def generate_data_trajectory_sequence(self, image_set, **opts): 872 | """ 873 | Generates pedestrian tracks 874 | :param image_set: the split set to produce for. Options are train, test, val. 875 | :param opts: 876 | 'fstride': Frequency of sampling from the data. 877 | 'height_rng': The height range of pedestrians to use. 878 | 'squarify_ratio': The width/height ratio of bounding boxes. A value between (0,1]. 0 the original 879 | ratio is used. 880 | 'data_split_type': How to split the data. Options: 'default', predefined sets, 'random', randomly split the data, 881 | and 'kfold', k-fold data split (NOTE: only train/test splits). 882 | 'seq_type': Sequence type to generate. Options: 'trajectory', generates tracks, 'crossing', generates 883 | tracks up to 'crossing_point', 'intention' generates tracks similar to human experiments 884 | 'min_track_size': Min track length allowable. 885 | 'random_params: Parameters for random data split generation. (see _get_random_pedestrian_ids) 886 | 'kfold_params: Parameters for kfold split generation. (see _get_kfold_pedestrian_ids) 887 | :return: Sequence data 888 | """ 889 | params = {'fstride': 1, 890 | 'sample_type': 'all', # 'beh' 891 | 'height_rng': [0, float('inf')], 892 | 'squarify_ratio': 0, 893 | 'data_split_type': 'default', # kfold, random, default 894 | 'seq_type': 'intention', 895 | 'min_track_size': 15, 896 | 'random_params': {'ratios': None, 897 | 'val_data': True, 898 | 'regen_data': False}, 899 | 'kfold_params': {'num_folds': 5, 'fold': 1}} 900 | 901 | for i in opts.keys(): 902 | params[i] = opts[i] 903 | 904 | print('---------------------------------------------------------') 905 | print("Generating trajectory sequence data") 906 | self._print_dict(params) 907 | annot_database = self.generate_database() 908 | if params['seq_type'] == 'trajectory': 909 | sequence_data = self._get_trajectories(image_set, annot_database, **params) 910 | elif params['seq_type'] == 'crossing': 911 | sequence_data = self._get_crossing(image_set, annot_database, **params) 912 | elif params['seq_type'] == 'intention': 913 | sequence_data = self._get_intention(image_set, annot_database, **params) 914 | 915 | return sequence_data 916 | 917 | def _get_trajectories(self, image_set, annotations, **params): 918 | """ 919 | Generates trajectory data. 920 | :param image_set: Data split to use 921 | :param annotations: Annotations database 922 | :param params: Parameters to generate data (see generade_database) 923 | :return: A dictionary of trajectories 924 | """ 925 | print('---------------------------------------------------------') 926 | print("Generating trajectory data") 927 | 928 | num_pedestrians = 0 929 | seq_stride = params['fstride'] 930 | sq_ratio = params['squarify_ratio'] 931 | height_rng = params['height_rng'] 932 | 933 | image_seq, pids_seq = [], [] 934 | box_seq, center_seq, occ_seq = [], [], [] 935 | intent_seq = [] 936 | obds_seq, gpss_seq, head_ang_seq, gpsc_seq, yrp_seq = [], [], [], [], [] 937 | 938 | set_ids, _pids = self._get_data_ids(image_set, params) 939 | 940 | for sid in set_ids: 941 | for vid in sorted(annotations[sid]): 942 | img_width = annotations[sid][vid]['width'] 943 | pid_annots = annotations[sid][vid]['ped_annotations'] 944 | vid_annots = annotations[sid][vid]['vehicle_annotations'] 945 | for pid in sorted(pid_annots): 946 | if params['data_split_type'] != 'default' and pid not in _pids: 947 | continue 948 | num_pedestrians += 1 949 | frame_ids = pid_annots[pid]['frames'] 950 | boxes = pid_annots[pid]['bbox'] 951 | images = [self._get_image_path(sid, vid, f) for f in frame_ids] 952 | occlusions = pid_annots[pid]['occlusion'] 953 | 954 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 955 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, 956 | frame_ids, boxes, 957 | images, occlusions) 958 | 959 | if len(boxes) / seq_stride < params['min_track_size']: # max_obs_size: #90 + 45 960 | continue 961 | 962 | if sq_ratio: 963 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 964 | 965 | image_seq.append(images[::seq_stride]) 966 | box_seq.append(boxes[::seq_stride]) 967 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride]) 968 | occ_seq.append(occlusions[::seq_stride]) 969 | 970 | ped_ids = [[pid]] * len(boxes) 971 | pids_seq.append(ped_ids[::seq_stride]) 972 | 973 | intent = [[pid_annots[pid]['attributes']['intention_prob']]] * len(boxes) 974 | intent_seq.append(intent[::seq_stride]) 975 | 976 | gpsc_seq.append([(vid_annots[i]['latitude'], vid_annots[i]['longitude']) 977 | for i in frame_ids][::seq_stride]) 978 | obds_seq.append([[vid_annots[i]['OBD_speed']] for i in frame_ids][::seq_stride]) 979 | gpss_seq.append([[vid_annots[i]['GPS_speed']] for i in frame_ids][::seq_stride]) 980 | head_ang_seq.append([[vid_annots[i]['heading_angle']] for i in frame_ids][::seq_stride]) 981 | yrp_seq.append([(vid_annots[i]['yaw'], vid_annots[i]['roll'], vid_annots[i]['pitch']) 982 | for i in frame_ids][::seq_stride]) 983 | 984 | print('Subset: %s' % image_set) 985 | print('Number of pedestrians: %d ' % num_pedestrians) 986 | print('Total number of samples: %d ' % len(image_seq)) 987 | 988 | return {'image': image_seq, 989 | 'pid': pids_seq, 990 | 'bbox': box_seq, 991 | 'center': center_seq, 992 | 'occlusion': occ_seq, 993 | 'obd_speed': obds_seq, 994 | 'gps_speed': gpss_seq, 995 | 'heading_angle': head_ang_seq, 996 | 'gps_coord': gpsc_seq, 997 | 'yrp': yrp_seq, 998 | 'intention_prob': intent_seq} 999 | 1000 | def _get_crossing(self, image_set, annotations, **params): 1001 | """ 1002 | Generates crossing data. 1003 | :param image_set: Data split to use 1004 | :param annotations: Annotations database 1005 | :param params: Parameters to generate data (see generade_database) 1006 | :return: A dictionary of trajectories 1007 | """ 1008 | 1009 | print('---------------------------------------------------------') 1010 | print("Generating crossing data") 1011 | 1012 | num_pedestrians = 0 1013 | seq_stride = params['fstride'] 1014 | sq_ratio = params['squarify_ratio'] 1015 | height_rng = params['height_rng'] 1016 | 1017 | image_seq, pids_seq = [], [] 1018 | box_seq, center_seq, occ_seq = [], [], [] 1019 | intent_seq = [] 1020 | obds_seq, gpss_seq, head_ang_seq, gpsc_seq, yrp_seq = [], [], [], [], [] 1021 | cross_points = [] 1022 | activities = [] 1023 | 1024 | set_ids, _pids = self._get_data_ids(image_set, params) 1025 | 1026 | for sid in set_ids: 1027 | for vid in sorted(annotations[sid]): 1028 | img_width = annotations[sid][vid]['width'] 1029 | pid_annots = annotations[sid][vid]['ped_annotations'] 1030 | vid_annots = annotations[sid][vid]['vehicle_annotations'] 1031 | for pid in sorted(pid_annots): 1032 | if params['data_split_type'] != 'default' and pid not in _pids: 1033 | continue 1034 | num_pedestrians += 1 1035 | 1036 | frame_ids = pid_annots[pid]['frames'] 1037 | event_frame = pid_annots[pid]['attributes']['crossing_point'] 1038 | 1039 | end_idx = frame_ids.index(event_frame) 1040 | boxes = pid_annots[pid]['bbox'][:end_idx + 1] 1041 | frame_ids = frame_ids[: end_idx + 1] 1042 | images = [self._get_image_path(sid, vid, f) for f in frame_ids] 1043 | occlusions = pid_annots[pid]['occlusion'][:end_idx + 1] 1044 | 1045 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 1046 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, 1047 | frame_ids, boxes, 1048 | images, occlusions) 1049 | 1050 | if len(boxes) / seq_stride < params['min_track_size']: 1051 | continue 1052 | 1053 | if sq_ratio: 1054 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 1055 | 1056 | image_seq.append(images[::seq_stride]) 1057 | box_seq.append(boxes[::seq_stride]) 1058 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride]) 1059 | occ_seq.append(occlusions[::seq_stride]) 1060 | 1061 | ped_ids = [[pid]] * len(boxes) 1062 | pids_seq.append(ped_ids[::seq_stride]) 1063 | 1064 | intent = [[pid_annots[pid]['attributes']['intention_prob']]] * len(boxes) 1065 | intent_seq.append(intent[::seq_stride]) 1066 | 1067 | cross_point = [[pid_annots[pid]['bbox'][end_idx]]] * len(boxes) 1068 | cross_points.append(cross_point[::seq_stride]) 1069 | 1070 | acts = [[int(pid_annots[pid]['attributes']['crossing'] > 0)]] * len(boxes) 1071 | activities.append(acts[::seq_stride]) 1072 | 1073 | gpsc_seq.append([[(vid_annots[i]['latitude'], vid_annots[i]['longitude'])] 1074 | for i in frame_ids][::seq_stride]) 1075 | obds_seq.append([[vid_annots[i]['OBD_speed']] for i in frame_ids][::seq_stride]) 1076 | gpss_seq.append([[vid_annots[i]['GPS_speed']] for i in frame_ids][::seq_stride]) 1077 | head_ang_seq.append([[vid_annots[i]['heading_angle']] for i in frame_ids][::seq_stride]) 1078 | yrp_seq.append([[(vid_annots[i]['yaw'], vid_annots[i]['roll'], vid_annots[i]['pitch'])] 1079 | for i in frame_ids][::seq_stride]) 1080 | 1081 | print('Subset: %s' % image_set) 1082 | print('Number of pedestrians: %d ' % num_pedestrians) 1083 | print('Total number of samples: %d ' % len(image_seq)) 1084 | 1085 | return {'image': image_seq, 1086 | 'pid': pids_seq, 1087 | 'bbox': box_seq, 1088 | 'center': center_seq, 1089 | 'occlusion': occ_seq, 1090 | 'obd_speed': obds_seq, 1091 | 'gps_speed': gpss_seq, 1092 | 'heading_angle': head_ang_seq, 1093 | 'gps_coord': gpsc_seq, 1094 | 'cross_point': cross_points, 1095 | 'yrp': yrp_seq, 1096 | 'intention_prob': intent_seq, 1097 | 'activities': activities, 1098 | 'image_dimension': self._get_dim()} 1099 | 1100 | def _get_intention(self, image_set, annotations, **params): 1101 | """ 1102 | Generates intention data. 1103 | :param image_set: Data split to use 1104 | :param annotations: Annotations database 1105 | :param params: Parameters to generate data (see generade_database) 1106 | :return: A dictionary of trajectories 1107 | """ 1108 | print('---------------------------------------------------------') 1109 | print("Generating intention data") 1110 | 1111 | num_pedestrians = 0 1112 | seq_stride = params['fstride'] 1113 | sq_ratio = params['squarify_ratio'] 1114 | height_rng = params['height_rng'] 1115 | 1116 | intention_prob, intention_binary = [], [] 1117 | image_seq, pids_seq = [], [] 1118 | box_seq, center_seq, occ_seq = [], [], [] 1119 | set_ids, _pids = self._get_data_ids(image_set, params) 1120 | 1121 | for sid in set_ids: 1122 | for vid in sorted(annotations[sid]): 1123 | img_width = annotations[sid][vid]['width'] 1124 | pid_annots = annotations[sid][vid]['ped_annotations'] 1125 | for pid in sorted(pid_annots): 1126 | if params['data_split_type'] != 'default' and pid not in _pids: 1127 | continue 1128 | num_pedestrians += 1 1129 | exp_start_frame = pid_annots[pid]['attributes']['exp_start_point'] 1130 | critical_frame = pid_annots[pid]['attributes']['critical_point'] 1131 | frames = pid_annots[pid]['frames'] 1132 | 1133 | start_idx = frames.index(exp_start_frame) 1134 | end_idx = frames.index(critical_frame) 1135 | 1136 | boxes = pid_annots[pid]['bbox'][start_idx:end_idx + 1] 1137 | frame_ids = frames[start_idx:end_idx + 1] 1138 | images = [self._get_image_path(sid, vid, f) for f in frame_ids] 1139 | occlusions = pid_annots[pid]['occlusion'][start_idx:end_idx + 1] 1140 | 1141 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 1142 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, 1143 | frame_ids, boxes, 1144 | images, occlusions) 1145 | if len(boxes) / seq_stride < params['min_track_size']: 1146 | continue 1147 | 1148 | if sq_ratio: 1149 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 1150 | 1151 | int_prob = [[pid_annots[pid]['attributes']['intention_prob']]] * len(boxes) 1152 | int_bin = [[int(pid_annots[pid]['attributes']['intention_prob'] > 0.5)]] * len(boxes) 1153 | 1154 | image_seq.append(images[::seq_stride]) 1155 | box_seq.append(boxes[::seq_stride]) 1156 | occ_seq.append(occlusions[::seq_stride]) 1157 | 1158 | intention_prob.append(int_prob[::seq_stride]) 1159 | intention_binary.append(int_bin[::seq_stride]) 1160 | 1161 | ped_ids = [[pid]] * len(boxes) 1162 | pids_seq.append(ped_ids[::seq_stride]) 1163 | 1164 | print('Subset: %s' % image_set) 1165 | print('Number of pedestrians: %d ' % num_pedestrians) 1166 | print('Total number of samples: %d ' % len(image_seq)) 1167 | 1168 | return {'image': image_seq, 1169 | 'bbox': box_seq, 1170 | 'occlusion': occ_seq, 1171 | 'intention_prob': intention_prob, 1172 | 'intention_binary': intention_binary, 1173 | 'ped_id': pids_seq} 1174 | -------------------------------------------------------------------------------- /utils/jaad_data.py: -------------------------------------------------------------------------------- 1 | ''' 2 | imitate to rewrite jaad_data.py file. 3 | ''' 4 | import sys 5 | import pickle 6 | import cv2 7 | 8 | import numpy as np 9 | import xml.etree.ElementTree as ET 10 | 11 | from os.path import join, abspath, exists 12 | from os import listdir, makedirs 13 | from sklearn.model_selection import train_test_split, KFold 14 | 15 | class JAAD(object): 16 | def __init__(self, data_path='', regen_pkl=False): 17 | '''param: 18 | data_path: Path to the folder of the dataset, default is current dir. 19 | regen_pkl: Whether to regenerate the database. 20 | ''' 21 | self._name = 'JAAD' 22 | self._regen_pkl = regen_pkl 23 | self._image_ext = '.png' 24 | 25 | #Paths 26 | self._jaad_path = data_path if data_path else self._get_default_path() 27 | assert exists(self._jaad_path), 'Jaad path does not exist: {}'.format(self._jaad_path) 28 | self._data_split_ids_path = join(self._jaad_path, 'split_ids') 29 | self._annotation_path = join(self._jaad_path, 'annotations') 30 | self._annotation_vehicle_path = join(self._jaad_path, 'annotations_vehicle') 31 | self._annotation_traffic_path = join(self._jaad_path, 'annotations_traffic') 32 | self._annotation_attributes_path = join(self._jaad_path, 'annotations_attributes') 33 | self._annotation_appearance_path = join(self._jaad_path, 'annotations_appearance') 34 | self._clips_path = join(self._jaad_path, 'JAAD_clips') 35 | self._images_path = join(self._jaad_path, 'images') 36 | 37 | @property 38 | def cache_path(self): 39 | ''' 40 | generate a path to save cache files. 41 | :return: Cache file folder path 42 | ''' 43 | cache_path = abspath(join(self._jaad_path, 'data_cache')) 44 | if not exists(cache_path): 45 | makedirs(cache_path) 46 | return cache_path 47 | 48 | 49 | def _get_default_path(self): 50 | ''' 51 | return default data_path where jaad_raw files are expected to be palced. 52 | ''' 53 | return 'dataset/jaad' 54 | 55 | def _get_video_ids(self): 56 | ''' 57 | return a list of all video ids 58 | :return: the list of all video ids 59 | ''' 60 | return [vid.split('.')[0] for vid in listdir(self._annotation_path)] 61 | 62 | 63 | def update_progress(self, progress): 64 | ''' 65 | create a progress bar. 66 | :param progress: the progress thus far 67 | ''' 68 | barLength = 20 69 | status = '' 70 | if isinstance(progress, int): 71 | progress = float(progress) 72 | block = int(round(barLength * progress)) 73 | text = '\r[{}] {:0.2f}% {}'.format('#' * block + '-' * (barLength - block), progress * 100, status) 74 | sys.stdout.write(text) 75 | sys.stdout.flush() 76 | 77 | 78 | def extract_and_save_images(self): 79 | ''' 80 | Extract images from clips and save on drive 81 | ''' 82 | videos = [f.split('.')[0] for f in sorted(listdir(self._clips_path))] 83 | #eg: clip_path: JAAD_clips/---->video_0001.mp4 84 | # |-->video_0002.mp4 85 | # |-->video_0003.mp4 86 | # |-->video_0004.mp4 87 | # --->video_0005.mp4 88 | #get videos = [video_0001, video_0002, video_0003, video_0004, video_0005] 89 | for vid in videos: 90 | path_to_file = join(self._annotation_path, vid + '.xml') 91 | #path: annotations/vidXXX.xml 92 | print(vid) 93 | tree = ET.parse(path_to_file)#load element tree. 94 | num_frames = int(tree.find('./meta/task/size').text) 95 | video_clip_path = join(self._clips_path, vid + '.mp4') 96 | #path: JAAD_clips/vidXXX.mp4 97 | save_images_path = join(self._images_path, vid) 98 | #path: images/vidXXX 99 | if not exists(save_images_path): 100 | makedirs(save_images_path) 101 | vidcap = cv2.VideoCapture(video_clip_path) 102 | success, image = vidcap.read() 103 | frame_num = 0 104 | img_count = 0 105 | if not success: 106 | print('Failed to open the video {}'.format(vid)) 107 | while success: 108 | self.update_progress(img_count / num_frames) 109 | img_count += 1 110 | img_path = join(save_images_path, '{:05d}.png'.format(frame_num)) 111 | if not exists(img_path): 112 | cv2.imwrite(img_path, image) 113 | success, image = vidcap.read() 114 | frame_num += 1 115 | if num_frames != img_count: 116 | print('num images don\'t match {}/{}'.format(num_frames, img_count)) 117 | print('\n') 118 | 119 | 120 | def _map_text_to_scalar(self, label_type, value): 121 | ''' 122 | maps a text label in XML file to scalars 123 | :param: label type: the label type 124 | :param value: the scalar value 125 | ''' 126 | map_dic = {'occlusion': {'none': 0, 'part': 1, 'full': 2}, 127 | 'action': {'standing': 0, 'walking': 1}, 128 | 'nod': {'__undefined__': 0, 'nodding': 1}, 129 | 'look': {'not-looking': 0, 'looking': 1}, 130 | 'hand_gesture': {'__undefined__': 0, 'greet': 1, 'yield': 2, 131 | 'rightofway': 3, 'other': 4}, 132 | 'reaction': {'__undefined__': 0, 'clear_path': 1, 'speed_up': 2, 133 | 'slow_down': 3}, 134 | 'cross': {'not-crossing': 0, 'crossing': 1, 'irrelevant': -1}, 135 | 'age': {'child': 0, 'young': 1, 'adult': 2, 'senior': 3}, 136 | 'designated': {'ND': 0, 'D': 1}, 137 | 'gender': {'n/a': 0, 'female': 1, 'male': 2}, 138 | 'intersection': {'no': 0, 'yes': 1}, 139 | 'motion_direction': {'n/a': 0, 'LAT': 1, 'LONG': 2}, 140 | 'traffic_direction': {'OW': 0, 'TW': 1}, 141 | 'signalized': {'n/a': 0, 'NS': 1, 'S': 2}, 142 | 'vehicle': {'stopped': 0, 'moving_slow': 1, 'moving_fast': 2, 143 | 'decelerating': 3, 'accelerating': 4}, 144 | 'road_type': {'street': 0, 'parking_lot': 1, 'garage': 2}, 145 | 'traffic_light': {'n/a': 0, 'red': 1, 'green': 2}} 146 | return map_dic[label_type][value] 147 | 148 | def _map_scalar_to_text(self, label_type, value): 149 | ''' 150 | maps a scalar value to a text label 151 | :param label_type: the label type 152 | :param value: the scalar to be mapped 153 | :return: the text label 154 | ''' 155 | map_dic = {'occlusion': {0: 'none', 1: 'part', 2: 'full'}, 156 | 'action': {0: 'standing', 1: 'walking'}, 157 | 'nod': {0: '__undefined__', 1: 'nodding'}, 158 | 'look': {0: 'not-looking', 1: 'looking'}, 159 | 'hand_gesture': {0: '__undefined__', 1: 'greet', 160 | 2: 'yield', 3: 'rightofway', 161 | 4: 'other'}, 162 | 'reaction': {0: '__undefined__', 1: 'clear_path', 163 | 2: 'speed_up', 3: 'slow_down'}, 164 | 'cross': {0: 'not-crossing', 1: 'crossing', -1: 'irrelevant'}, 165 | 'age': {0: 'child', 1: 'young', 2: 'adult', 3: 'senior'}, 166 | 'designated': {0: 'ND', 1: 'D'}, 167 | 'gender': {0: 'n/a', 1: 'female', 2: 'male'}, 168 | 'intersection': {0: 'no', 1: 'yes'}, 169 | 'motion_direction': {0: 'n/a', 1: 'LAT', 2: 'LONG'}, 170 | 'traffic_direction': {0: 'OW', 1: 'TW'}, 171 | 'signalized': {0: 'n/a', 1: 'NS', 2: 'S'}, 172 | 'vehicle': {0: 'stopped', 1: 'moving_slow', 2: 'moving_fast', 173 | 3: 'decelerating', 4: 'accelerating'}, 174 | 'road_type': {0: 'street', 1: 'parking_lot', 2: 'garage'}, 175 | 'traffic_light': {0: 'n/a', 1: 'red', 2: 'green'}} 176 | return map_dic[label_type][value] 177 | 178 | 179 | def _get_annotations(self, vid): 180 | ''' 181 | Generates a dictionary of annotations by parsing the video XML file 182 | :param vid: the id of video to parse 183 | :return: a dictionary of annotations 184 | ''' 185 | path_to_file = join(self._annotation_path, vid + '.xml') 186 | tree = ET.parse(path_to_file) 187 | ped_annt = 'ped_annotations' 188 | annotations = {} 189 | annotations['num_frames'] = int(tree.find('./meta/task/size').text) 190 | annotations['width'] = int(tree.find('./meta/task/original_size/width').text) 191 | annotations['height'] = int(tree.find('./meta/task/original_size/height').text) 192 | annotations[ped_annt] = {} 193 | ped_tracks = tree.findall('./track') 194 | for t in ped_tracks: 195 | boxes = t.findall('./box') 196 | new_id = boxes[0].find('./attribute[@name=\"id\"]').text 197 | old_id = boxes[0].find('./attribute[@name=\"old_id\"]').text 198 | annotations[ped_annt][new_id] = {'old_id': old_id, 'frames': [], 'bbox': [], 'occlusion': []} 199 | if 'pedestrian' in old_id: 200 | annotations['ped_annotations'][new_id]['behavior'] = { 201 | 'cross': [], 'reaction': [], 'hand_gesture': [], 'look': [], 'action': [], 'nod': [] 202 | } 203 | else: 204 | annotations[ped_annt][new_id]['behavior'] = {} 205 | for b in boxes: 206 | annotations[ped_annt][new_id]['bbox'].append( 207 | [float(b.get('xtl')), float(b.get('ytl')), 208 | float(b.get('xbr')), float(b.get('ybr'))] 209 | ) 210 | occ = self._map_text_to_scalar('occlusion',b.find('./attribute[@name=\"occlusion\"]').text) 211 | annotations[ped_annt][new_id]['occlusion'].append(occ) 212 | annotations[ped_annt][new_id]['frames'].append(int(b.get('frame'))) 213 | for beh in annotations['ped_annotations'][new_id]['behavior'].keys(): 214 | annotations[ped_annt][new_id]['behavior'][beh].append(self._map_text_to_scalar(beh, b.find('./attribute[@name=\"' + beh + '\"]').text)) 215 | return annotations 216 | 217 | def _get_ped_attributes(self, vid): 218 | ''' 219 | Generates a dictionary of attributes by parsing the video XML file. 220 | :param vid: the id of the video to parse 221 | :return: a dictionary of attributes. 222 | ''' 223 | path_to_file = join(self._annotation_attributes_path, vid + '_attributes.xml') 224 | tree = ET.parse(path_to_file) 225 | attributes = {} 226 | pedestrians = tree.findall('./pedestrian') 227 | for p in pedestrians: 228 | new_id = p.get('id') 229 | old_id = p.get('old_id') 230 | attributes[new_id] = {'old_id': old_id} 231 | for k, v in p.items(): 232 | if 'id' in k: 233 | continue 234 | try: 235 | attributes[new_id][k] = int(v) 236 | except ValueError: 237 | attributes[new_id][k] = self._map_text_to_scalar(k, v) 238 | return attributes 239 | 240 | def _get_ped_appearance(self, vid): 241 | ''' 242 | Generates a dictionary of appearance annotations by parsing the video XML file. 243 | :param vid: the id of video to parse. The labels are follows: 244 | -pose_front, pose_back... - coarse pose of the pedestrian relative to the camera. 245 | -clothes_below_knee - long clothing. 246 | -clothes_upper_light, clothes_lower_dark... - coarse clothing color above/below waist. 247 | -backpack - presence of a backpack(wron on the back, not held in hand). 248 | -bag_hand, bag_elbow, bag_shoulder - whether bag(s) are held in a hand, on a bent elbow or worn on a shoulder. 249 | -bag_left_side, bag_right_side - whether bag(s) appear in the left/right side of the pedestrian body. 250 | -cap, hood - headwear. 251 | -umbrella, phone, baby, object - various things carried by the pedestrians. 252 | -stroller/cart - objects being pushed by the pedetrian. 253 | -bicycle/motorcycle - for pedestrians riding ot walking these vehicles. 254 | :return: A dictionary of appearance annotations. 255 | ''' 256 | labels = ['pose_front', 'pose_back', 'pose_left', 'pose_right', 257 | 'clothes_below_knee', 'clothes_upper_light', 'clothes_upper_dark', 'clothes_lower_light', 258 | 'clothes_lower_dark', 'backpack', 'bag_hand', 'bag_elbow', 259 | 'bag_shoulder', 'bag_left_side', 'bag_right_side', 'cap', 260 | 'hood', 'sunglasses', 'umbrella', 'phone', 261 | 'baby', 'object', 'stroller_cart', 'bicycle_motorcycle'] 262 | path_to_file = join(self._annotation_appearance_path, vid + '_appearance.xml') 263 | tree = ET.parse(path_to_file) 264 | annotations = {} 265 | ped_tracks = tree.findall('./track') 266 | for t in ped_tracks: 267 | boxes = t.findall('./box') 268 | new_id = t.get('id') 269 | annotations[new_id] = dict(zip(labels, [[] for _ in range(len(labels))])) 270 | annotations[new_id]['frames'] = [] 271 | for b in boxes: 272 | annotations[new_id]['frames'].append(int(b.get('frame'))) 273 | for l in labels: 274 | annotations[new_id][l].append(b.get(l)) 275 | return annotations 276 | 277 | def _get_vehicle_attributes(self, vid): 278 | ''' 279 | Generate a dictionary of vehicle attributes by parsing the video XML file. 280 | :param vid: the id of the video to parse. 281 | :return: a dictionary of vihecle attributes. 282 | ''' 283 | path_to_file = join(self._annotation_vehicle_path, vid + '_vehicle.xml') 284 | tree = ET.parse(path_to_file) 285 | veh_attributes = {} 286 | frames = tree.findall('./frame') 287 | for f in frames: 288 | veh_attributes[int(f.get('id'))] = self._map_text_to_scalar('vehicle', f.get('action')) 289 | return veh_attributes 290 | 291 | def _get_traffic_attributes(self, vid): 292 | ''' 293 | Generate a dictionary of vehicle attributes by parsing the video XML file. 294 | :param vid: the id of video to parse. 295 | :return: a dictionary of vehicle attributes. 296 | ''' 297 | path_to_file= join(self._annotation_traffic_path, vid + '_traffic.xml') 298 | tree = ET.parse(path_to_file) 299 | road_type = tree.find('./road_type').text 300 | traffic_attributes = {'road_type': self._map_text_to_scalar('road_type', road_type)} 301 | frames = tree.findall('./frame') 302 | for f in frames: 303 | traffic_attributes[int(f.get('id'))] = {'ped_crossing': int(f.get('ped_crossing')), 304 | 'ped_sign': int(f.get('ped_sign')), 305 | 'stop_sign': int(f.get('stop_sign')), 306 | 'traffic_light': self._map_text_to_scalar('traffic_light', 307 | f.get('traffic_light'))} 308 | return traffic_attributes 309 | 310 | def generate_database(self): 311 | ''' 312 | Generate a database of jaad database by integrating all annotations 313 | Dictionary structure 314 | 'vid_id'(str):{ 315 | 'num_frames': int, 316 | 'width': int, 317 | 'height': int, 318 | 'ped_annotations'(str): { 319 | 'ped_id'(str):{ 320 | 'old_id': str, 321 | 'frames': list(int), 322 | 'occlusion': list(int), 323 | 'bbox': list([x1, y1, x2, y2]), 324 | 'behavior'(str): { 325 | 'action': list(int), 326 | 'reaction': list(int), 327 | 'nod': list(int), 328 | 'hand_gesture': list(int), 329 | 'cross': list(int), 330 | 'look': list(int) 331 | } 332 | 'appearance'(str): { 333 | 'pose_front': list(int), 334 | 'pose_back': list(int), 335 | 'pose_left': list(int), 336 | 'pose_right': list(int), 337 | 'clothes_below_knee': list(int), 338 | 'clothes_upper_light': list(int), 339 | 'clothes_upper_dark': list(int), 340 | 'clothes_lower_light': list(int), 341 | 'clothes_lower_dark': list(int), 342 | 'backpack': list(int), 343 | 'bag_hand': list(int), 344 | 'bag_elbow': list(int), 345 | 'bag_shoulder': list(int), 346 | 'bag_left_side': list(int), 347 | 'bag_right_side': list(int), 348 | 'cap': list(int), 349 | 'hood': list(int), 350 | 'sunglasses': list(int), 351 | 'umbrella': list(int), 352 | 'phone': list(int), 353 | 'baby': list(int), 354 | 'object': list(int), 355 | 'stroller_cart': list(int), 356 | 'bicycle_motorcycle': list(int) 357 | } 358 | 'attributes'(str): { 359 | 'age': int, 360 | 'old_id': str, 361 | 'num_lanes': int, 362 | 'crossing': int, 363 | 'gender': int, 364 | 'crossing_point': int, 365 | 'decision_point': int, 366 | 'intersection': int, 367 | 'designated': int, 368 | 'signalized': int, 369 | 'traffic_direction': int, 370 | 'group_size': int, 371 | 'motion_direction': int 372 | } 373 | } 374 | } 375 | 'vehicle_annotations'(str): { 376 | frames(int): { 377 | 'action': int 378 | } 379 | } 380 | 'traffic_annotations'(str): { 381 | road_type: int, 382 | frames(int): { 383 | 'ped_crossing': int, 384 | 'ped_sign': int, 385 | 'stop_sign': int, 386 | 'traffic_light': int 387 | } 388 | } 389 | } 390 | :return: A database dictionary 391 | ''' 392 | print('------------------------------------------------') 393 | print('Generating databse for jaad') 394 | #Generates a list of behavioral xml file names for videos 395 | cache_file = join(self.cache_path, 'jaad_database.pkl') 396 | if exists(cache_file) and not self._regen_pkl: 397 | with open(cache_file, 'rb') as fid: 398 | try: 399 | database = pickle.load(fid) 400 | except: 401 | database = pickle.load(fid, encoding='bytes') 402 | print('jaad database loaded from {}'.format(cache_file)) 403 | return database 404 | 405 | video_ids = sorted(self._get_video_ids()) 406 | database = {} 407 | for vid in video_ids: 408 | print('Getting annotations for %s' % vid) 409 | vid_annotations = self._get_annotations(vid) 410 | vid_attributes = self._get_ped_attributes(vid) 411 | vid_appearance = self._get_ped_appearance(vid) 412 | vid_veh_annotations = self._get_vehicle_attributes(vid) 413 | vid_traffic_annotations = self._get_traffic_attributes(vid) 414 | #combining all annotations. 415 | vid_annotations['vehicle_annotations'] = vid_veh_annotations 416 | vid_annotations['traffic_annotations'] = vid_traffic_annotations 417 | for ped in vid_annotations['ped_annotations']: 418 | try: 419 | vid_annotations['ped_annotations'][ped]['attributes'] = vid_attributes[ped] 420 | except KeyError: 421 | vid_annotations['ped_annotations'][ped]['attributes'] = {} 422 | try: 423 | vid_annotations['ped_annotations'][ped]['appearance'] = vid_appearance[ped] 424 | except KeyError: 425 | vid_annotations['ped_annotations'][ped]['appearance'] = {} 426 | database[vid] = vid_annotations 427 | with open(cache_file, 'wb') as fid: 428 | pickle.dump(database, fid, pickle.HIGHEST_PROTOCOL) 429 | print('The database is written to {}'.format(cache_file)) 430 | return database 431 | 432 | 433 | def get_data_stats(self): 434 | ''' 435 | Generate statistics for jaad database. 436 | ''' 437 | annotations = self.generate_database() 438 | videos_count = len(annotations.keys()) 439 | ped_box_beh_count = 0 440 | ped_beh_count = 0 441 | ped_count = 0 442 | ped_box_count = 0 443 | people_count = 0 444 | people_box_count = 0 445 | total_frames = 0 446 | for vid in annotations: 447 | total_frames += annotations[vid]['num_frames'] 448 | for ped in annotations[vid]['ped_annotations']: 449 | if 'b' in ped: 450 | ped_beh_count += 1 451 | ped_box_beh_count += len(annotations[vid]['ped_annotations'][ped]['bbox']) 452 | elif 'p' in ped: 453 | people_count += 1 454 | people_box_count += len(annotations[vid]['ped_annotations'][ped]['bbox']) 455 | else: 456 | ped_count += 1 457 | ped_box_count += len(annotations[vid]['ped_annotations'][ped]['bbox']) 458 | print('---------------------------------------------------------') 459 | print('Number of videos: %d' % videos_count) 460 | print("Number of frames: %d" % total_frames) 461 | print("Number of pedestrians with behavior tag: %d" % ped_beh_count) 462 | print("Number of pedestrians with no behavior tag: %d" % ped_count) 463 | print("Number of people: %d" % people_count) 464 | print("Total number of pedestrians: %d" % (ped_count + ped_beh_count + people_count)) 465 | 466 | print("Number of pedestrian bounding boxes with behavior tag: %d" % ped_box_beh_count) 467 | print("Number of pedestrian bounding boxes with no behavior tag: %d" % ped_box_count) 468 | print("Number of people bounding boxes: %d" % people_box_count) 469 | print("Total number of pedestrian bounding boxes: %d" % (ped_box_beh_count + ped_box_count)) 470 | 471 | 472 | def balance_samples_count(self, seq_data, label_type, random_seed=42): 473 | ''' 474 | balance the number of positive and negative samples by randomly sampleing 475 | from the more represented samples. Only works for binary classes. 476 | :param seq_data: the sequence data to be balanced. 477 | :param label_type: the label type based in which the balancing takes place. 478 | the label values must be binary, i.e. only 0, 1. 479 | :param random_seed: the seed for random number generator. 480 | :return: balanced data sequence. 481 | ''' 482 | for lbl in seq_data[label_type]: 483 | for i in lbl: 484 | if i[0] not in [0, 1]: 485 | raise Exception('The label values used for balancing must be either 0 or 1') 486 | #balances the number of positive and negative samples. 487 | print('----------------------------------------------------------') 488 | print('Balancing the number of positive and negative intention samples.') 489 | gt_labels = [gt[0] for gt in seq_data[label_type]] 490 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) 491 | num_neg_samples = len(gt_labels) - num_pos_samples 492 | new_seq_data = {} 493 | #finds the indicies of the samples with larger quantity 494 | if num_neg_samples == num_pos_samples: 495 | print('Positive and negative are already balanced.') 496 | return seq_data 497 | else: 498 | print('Unbalanced: \t Positive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples)) 499 | if num_neg_samples > num_pos_samples: 500 | rm_index = np.where(np.array(gt_labels) == 0)[0] 501 | else: 502 | rm_index = np.where(np.array(gt_labels) == 1)[0] 503 | #Calculate the difference of sample counts. 504 | dif_samples = abs(num_neg_samples - num_pos_samples) 505 | np.random.seed(random_seed) 506 | np.random.shuffle(rm_index) 507 | #reduce the number of indices to the difference. 508 | rm_index = rm_index[0:dif_samples] 509 | #update the data, 510 | for k in seq_data: 511 | seq_data_k = seq_data[k] 512 | if not isinstance(seq_data[k], list): 513 | new_seq_data[k] = seq_data[k] 514 | else: 515 | new_seq_data[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index] 516 | new_gt_labels = [gt[0] for gt in new_seq_data[label_type]] 517 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels)) 518 | print('Balanced:\t Positive: %d \t Negative: %d\n' % (num_pos_samples, len(new_seq_data[label_type]) - num_pos_samples)) 519 | return new_seq_data 520 | 521 | 522 | def _get_video_ids_split(self, image_set, subset='default'): 523 | ''' 524 | Returns a list of video ids for a given data split. 525 | :param image_set: Data split, train, test, val. 526 | :return: the list of video ids. 527 | ''' 528 | vid_ids = [] 529 | sets = [image_set] if image_set != 'all' else ['train', 'test', 'val'] 530 | for s in sets: 531 | vid_id_file = join(self._data_split_ids_path, subset, s + '.txt') 532 | with open(vid_id_file, 'rt') as fid: 533 | vid_ids.extend([x.strip() for x in fid.readlines()]) 534 | return vid_ids 535 | 536 | def _get_pedestrian_ids(self, sample_type='all'): 537 | ''' 538 | Get all pedestrian ids. 539 | :return: A list of pedestrian ids. 540 | ''' 541 | annotations = self.generate_database() 542 | pids = [] 543 | for vid in sorted(annotations): 544 | if sample_type == 'beh': 545 | pids.extend([p for p in annotations[vid]['ped_annotations'].keys() if 'b' in p]) 546 | else: 547 | pids.extend(annotations[vid]['ped_annotaions'].keys()) 548 | return pids 549 | 550 | def _get_random_pedestrian_ids(self, image_set, ratios=None, val_data=True, regen_data=False, sample_type='all'): 551 | ''' 552 | Generates and save a database of activities for all pedestrians. 553 | :param image_set: The data split to return. 554 | :param ratios: The ratios to split the data. There should be 2 ratios(or 3 if val_data is true) and they should sum to 1. e.g. [0.4, 0.6], [0.3, 0.5, 0.2]. 555 | :param val_data: Whether to generate validation data. 556 | :param regen_data: Whether to overwrite the existing data. 557 | :return: The random smaple split. 558 | ''' 559 | assert image_set in ['train', 'test', 'val'] 560 | cache_file = join(self.cache_path, 'random_samples.pkl') 561 | if exists(cache_file) and not regen_data: 562 | print('Random smaple currently exists.\n Loading from %s' % cache_file) 563 | with open(cache_file, 'rb') as fid: 564 | try: 565 | rand_samples = pickle.load(fid) 566 | except: 567 | rand_samples = pickle.load(fid, encoding='bytes') 568 | assert image_set in rand_samples, '%s does not exist in random samples\n Please try again by setting regen_data = True' % image_set 569 | if val_data: 570 | assert len(rand_samples['ratios']) == 3, 'The existing random samples does not have validation data.\n Please try again by setting regen_data = True' 571 | if ratios is not None: 572 | assert ratios == rand_samples['ratios'], 'Specified ratios {} does not match te ones in existing file {}.\n\ 573 | Perform one of the following options:\ 574 | 1- Set ratios to None\ 575 | 2- Set ratios to the same values\ 576 | 3- Regenerate data'.format(ratios, rand_samples['ratios']) 577 | print('The ratios are {}'.format(rand_samples['ratios'])) 578 | print('Number of %s tracks %d' % (image_set, len(rand_samples[image_set]))) 579 | return rand_samples[image_set] 580 | if ratios is None: 581 | if val_data: 582 | ratios = [0.5, 0.4, 0.1] 583 | else: 584 | ratios = [0.5, 0.5] 585 | assert sum(ratios) > 0.999999, 'Ratios {} do not sum to 1'.format(ratios) 586 | if val_data: 587 | assert len(ratios) == 3, 'To generate validation data three ratios should be selected.' 588 | else: 589 | assert len(ratios) == 2, 'With no validation only two ratios should be selected.' 590 | print('############# Generating Random training/tesing data #############') 591 | ped_ids = self._get_pedestrian_ids(sample_type) 592 | print('Total number of tracks %d' % len(ped_ids)) 593 | print('The ratuos are {}'.format(ratios)) 594 | sample_split = {'ratios': ratios} 595 | train_samples, test_samples = train_test_split(ped_ids, train_size=ratios[0]) 596 | print('Number of train tracks %d' % len(train_samples)) 597 | if val_data: 598 | test_samples, val_samples = train_test_split(test_samples, train_size=ratios[1] / sum(ratios[1:])) 599 | print('Number of val tracks %d' % len(test_samples)) 600 | sample_split['val'] = val_samples 601 | print('Number of test tracks %d' % len(test_samples)) 602 | sample_split['train'] = train_samples 603 | sample_split['test'] = test_samples 604 | cache_file = join(self.cache_path, 'random_samples.pkl') 605 | with open(cache_file, 'wb') as fid: 606 | pickle.dump(sample_split, fid, pickle.HIGHEST_PROTOCOL) 607 | print('jaad {} samples written to {}'.format('random', cache_file)) 608 | return sample_split[image_set] 609 | 610 | def _get_kfold_pedestrian_ids(self, image_set, num_folds=5, fold=1, sample_type='all'): 611 | ''' 612 | Generate kfold pedestrian ids. 613 | :param image_set: Image set split. 614 | :param num_folds: Number of folds. 615 | :param fold: The given fold. 616 | :return: List of pedestrian ids for the given fold. 617 | ''' 618 | assert image_set in ['train', 'test'], 'For Kfold data split, image_set should be either \'train\' or \'test\'' 619 | assert fold <= num_folds, 'Fold number should be smaller than number of folds' 620 | print('############# Generating %d fold data #############' % num_folds) 621 | cache_file = join(self.cache_path, '%d_fold_samples.pkl' % num_folds) 622 | if exists(cache_file): 623 | print('Loading %d-fold data from %s' % (num_folds, cache_file)) 624 | with open(cache_file, 'rb') as fid: 625 | try: 626 | fold_idx = pickle.load(fid) 627 | except: 628 | fold_idx = pickle.load(fid, encoding='bytes') 629 | else: 630 | ped_ids = self._get_pedestrian_ids(sample_type) 631 | kf = KFold(n_splits=num_folds, shuffle=True) 632 | fold_idx = {'pid': ped_ids} 633 | count = 1 634 | for train_index, test_index in kf.split(ped_ids): 635 | fold_idx[count] = {'train': train_index.tolist(), 'test': test_index.tolist()} 636 | count += 1 637 | with open(cache_file, 'wb') as fid: 638 | pickle.dump(fold_idx, fid, pickle.HIGHEST_PROTOCOL) 639 | print('jaad {}-fold samples written to {}'.format(num_folds, cache_file)) 640 | print('Number of %s tracks %d' % (image_set, len(fold_idx[fold[image_set]]))) 641 | kfold_ids = [fold_idx['pid'][i] for i in range(len(fold_idx['pid'])) if i in fold_idx[fold][image_set]] 642 | return kfold_ids 643 | 644 | def _get_data_ids(self, image_set, params): 645 | ''' 646 | A helper function to generate set id and ped ids (if needed) for processing 647 | :param image_set: Image_set to generate data. 648 | :param params: Data generation params. 649 | :return: Set and pedestrian ids. 650 | ''' 651 | _pids = None 652 | if params['data_split_type'] == 'default': 653 | return self._get_video_ids_split(image_set, params['subset']), _pids 654 | 655 | video_ids = self._get_video_ids_split('all', params['subset']) 656 | if params['data_split_type'] == 'random': 657 | params['random_params']['sample_type'] = params['sample_type'] 658 | _pids = self._get_random_pedestrian_ids(image_set, **params['random_params']) 659 | elif params['data_split_type'] == 'kfold': 660 | params['kfold_params']['sample_type'] = params['sample_type'] 661 | _pids = self._get_kfold_pedestrian_ids(image_set, **params['kfold_params']) 662 | return video_ids, _pids 663 | 664 | def _squarify(self, bbox, ratio, img_width): 665 | ''' 666 | Changes is the ratio of bounding boxes to a fixed ratio. 667 | :param bbox: Bounding box. 668 | :param ratio: Ratio to be changed to. 669 | :param img_width: Image width. 670 | :return: Squarified bounding box. 671 | ''' 672 | width = abs(bbox[0] - bbox[2]) 673 | height = abs(bbox[1] - bbox[3]) 674 | width_change = height * ratio - width 675 | bbox[0] = bbox[0] - width_change / 2 676 | bbox[2] = bbox[2] + width_change / 2 677 | if bbox[0] < 0: 678 | bbox[0] = 0 679 | #check whether the new bbox goes beyond image boarders 680 | #if this is the case, the bbox is shifted back. 681 | if bbox[2] > img_width: 682 | bbox[0] = bbox[0] - bbox[2] + img_width 683 | bbox[2] = img_width 684 | return bbox 685 | 686 | #Pedestrian detection generators 687 | def get_detection_data(self, image_set, method, occlusion_type=None, file_path='data/', **params): 688 | ''' 689 | Generate data for pedestrian detection algorithms 690 | :param image_set: Split set name. 691 | :param method: Detection algorithm: frcnn, retinanet, yolo3, ssd. 692 | :param occlusion_type: the types of occlusion: None: only unoccluded samples. 693 | part: Unoccluded and partially occluded samples. 694 | full: All samples. 695 | :param file_path: Where to save the script file。 696 | :return: Pedestrian samples. 697 | ''' 698 | squarify_ratio = params['squarify_ratio'] 699 | frame_stride = params['fstride'] 700 | height_rng = params['height_rng'] 701 | if not exists(file_path): 702 | makedirs(file_path) 703 | if height_rng is None: 704 | height_rng = [0, float('inf')] 705 | annotations = self.generate_database() 706 | video_ids, _pids = self._get_data_ids(image_set, params) 707 | ped_samples = {} 708 | unique_samples = [] 709 | total_sample_count = 0 710 | for vid in video_ids: 711 | img_width = annotations[vid]['width'] 712 | img_height = annotations[vid]['height'] 713 | num_frames = annotations[vid]['num_frames'] 714 | for i in range(0, num_frames, frame_stride): 715 | ped_samples[join(self._jaad_path, 'image', vid, '{:05d}.png'.format(i))] = [] 716 | for pid in annotations[vid]['ped_annotaions']: 717 | if params['data_split_type'] != 'default' and pid not in _pids: 718 | continue 719 | difficult = 0 720 | if 'p' in pid: 721 | difficult = -1 722 | if image_set in ['train', 'val']: 723 | continue 724 | imgs = [join(self._jaad_path, 'images', vid, '{:05d}.png'.format(f)) for f in annotations[vid]['ped_annotations'][pid]['frames']] 725 | boxes = annotations[vid]['ped_annotations'][pid]['bbox'] 726 | occlusion = annotations[vid]['ped_annotations'][pid]['occlusion'] 727 | for i, b in enumerate(boxes): 728 | if imgs[i] not in ped_samples: 729 | continue 730 | bbox_height = abs(b[0] - b[2]) 731 | if height_rng[0] <= bbox_height <= height_rng[1]: 732 | if (occlusion_type == None and occlusion[i] == 0) or (occlusion_type == 'part' and occlusion[i] < 2) or (occlusion_type == 'full'): 733 | if squarify_ratio: 734 | b = self._squarify(b, squarify_ratio, img_width) 735 | ped_samples[imgs[i]].append({'width': img_width, 736 | 'height': img_height, 737 | 'tag': pid, 738 | 'box': b, 739 | 'seg_area': (b[2] - b[0] + 1) * (b[3] - b[1] + 1), 740 | 'occlusion': occlusion[i], 741 | 'difficult': difficult}) 742 | if pid not in unique_samples: 743 | unique_samples.append(pid) 744 | total_sample_count += 1 745 | print('Number of unique pedestrians %d ' % len(unique_samples)) 746 | print('Number of samples %d ' % total_sample_count) 747 | if method == 'frcnn': 748 | return self._get_data_frcnn(ped_samples) 749 | elif method == 'retinanet': 750 | return self._generate_csv_data_retinanet(image_set, file_path, ped_samples) 751 | elif method == 'yolo3': 752 | return self._generate_csv_data_yolo3(image_set, file_path, ped_samples) 753 | elif method == 'ssd': 754 | return self._generate_csv_data_ssd(image_set, file_path, ped_samples) 755 | 756 | 757 | def _get_data_frcnn(self, ped_samples): 758 | ''' 759 | Data generation for Faster-rcnn algorithm. 760 | :param ped_samples: Dictionary of all samples. 761 | ''' 762 | classes_count = {} 763 | class_mapping = {} 764 | all_imgs = {} 765 | class_name = 'pedestrian' 766 | classes_count['bg'] = 0 767 | class_mapping['bg'] = 1 768 | classes_count[class_name] = 0 769 | class_mapping[class_name] = 0 770 | for img, samples in sorted(ped_samples.items()): 771 | if not samples: 772 | continue 773 | all_imgs[img] = {'filepath': img, 'width': samples[0]['width'], 774 | 'height': samples[0]['height'], 'bboxes': []} 775 | for s in samples: 776 | box = s['box'] 777 | all_imgs[img]['bboxes'].append({'class': class_name, 'x1': box[0], 778 | 'x2': box[2], 'y1': box[1], 'y2': box[3]}) 779 | print('Data generated for Faster-rcnn') 780 | all_data = [] 781 | for key in all_imgs: 782 | all_data.append(all_imgs[key]) 783 | return all_data, classes_count, class_mapping 784 | 785 | def _generate_csv_data_retinanet(image_set, file_path, ped_samples): 786 | ''' 787 | Data generation for Retinanet algorithm. 788 | :param image_set: Data split. 789 | :param file_path: Path to save the data. 790 | :param ped_samples: Dictionary of all samples. 791 | ''' 792 | class_name = 'pedestrian' 793 | data_save_path = file_path + 'retinanet_' + image_set + '.csv' 794 | with open(data_save_path, 'wt') as f: 795 | for img, samples in sorted(ped_samples.items()): 796 | if not samples: 797 | f.write('%s,,,,,\n' % (img)) 798 | for s in samples: 799 | box = s['box'] 800 | f.write('%s,%.0f,%.0f,%.0f,%.0f,%s\n' % (img, box[0], box[1], box[2], box[3], class_name)) 801 | print('Data generated for Retinanet.') 802 | map_path = file_path + '_mapping.csv' 803 | with open(map_path, 'w') as f: 804 | f.write('%s,0\n' % (class_name)) 805 | return data_save_path, map_path 806 | 807 | def _generate_csv_data_yolo3(image_set, file_path, ped_samples): 808 | ''' 809 | Data generation for YOLO3 algorithm. 810 | :param image_set: Data split. 811 | :param file_path: Path to save the data. 812 | :param ped_samples: Dictionary of all samples. 813 | ''' 814 | class_name = 'pedestrian' 815 | all_img = {} 816 | data_save_path = file_path + 'yolo3_' + image_set + '.txt' 817 | with open(data_save_path, 'wt') as f: 818 | for img, samples in sorted(ped_samples.items()): 819 | if not samples: 820 | continue 821 | f.write('%s ' % (img)) 822 | for s in samples: 823 | box = s['box'] 824 | f.write('%.0f,%.0f,%.0f,%.0f,%.0f ' % (box[0], box[1], box[2], box[3], 0)) 825 | f.write('\n') 826 | print('Data generated for YOLO3') 827 | map_path = file_path + 'mapping_yolo3' 828 | with open(map_path, "wt") as f: 829 | f.write('%s,0\n' % (class_name)) 830 | return data_save_path, map_path 831 | 832 | def _generate_csv_data_ssd(image_set, file_path, ped_samples): 833 | ''' 834 | Data generation for SSD algorithm. 835 | :param image_set: Data split. 836 | :param file_path: Path to save the data. 837 | :param ped_samples: Dictionary of all samples. 838 | ''' 839 | data_save_path = file_path + 'ssd_' + image_set + '.csv' 840 | with open(data_save_path, 'wt') as f: 841 | for img, samples in sorted(ped_samples.items()): 842 | if not samples: 843 | continue 844 | for s in samples: 845 | box = s['box'] 846 | f.write('%s,%.0f,%.0f,%.0f,%.0f,%s\n' % (img, box[0], box[1], box[2], box[3], 1)) 847 | print('Data generated for SSD') 848 | return data_save_path 849 | 850 | 851 | 852 | def _print_dict(self, dic): 853 | ''' 854 | Prints a dictionary, one key_value pair per line. 855 | :param dic: Dictionary. 856 | ''' 857 | for k, v in dic.items(): 858 | print('%s: %s' % (str(k), str(v))) 859 | 860 | def _height_check(self, height_rng, frame_ids, boxes, images, occlusion): 861 | ''' 862 | Checks whether the bounding boxes are within a given height limit. If not, it will adjust the length of data sequences accordingly. 863 | :param height_rng: Height limit [lower, higher]. 864 | :param frame_ids: List of frame ids. 865 | :param boxes: List of bounding boxes. 866 | :param images: List of images. 867 | :param occlusion: List of occlusions. 868 | :return: The adjusted data sequences. 869 | ''' 870 | imgs, box, frames, occ = [], [], [], [] 871 | for i, b in enumerate(boxes): 872 | bbox_height = abs(b[1] - b[3]) 873 | if height_rng[0] <= bbox_height <= height_rng[1]: 874 | box.append(b) 875 | imgs.append(images[i]) 876 | frames.append(frame_ids[i]) 877 | occ.append(occlusion[i]) 878 | return imgs, box, frames, occ 879 | 880 | def _get_center(self, box): 881 | ''' 882 | Calculates the center coordinate of a bounding box. 883 | :param box: Bounding box coordinates 884 | :return: The center coordinate. 885 | ''' 886 | return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2] 887 | 888 | def _get_image_path(self, vid, f): 889 | ''' 890 | Generates the image path given ids. 891 | :param vid: Video id. 892 | :param f: Frame id. 893 | :return: Return the path to the given image. 894 | ''' 895 | return join(self._images_path, vid, '{:05d}.png'.format(f)) 896 | 897 | def generate_data_trajectory_sequence(self, image_set, **opts): 898 | ''' 899 | Generates pedestrian tracks. 900 | :param image_set: the split set to produce for. Options are train, test, val. 901 | :param opts: 902 | 'fstride': Frequency f sampling from the data. 903 | 'sample_type': Whether to use 'all' pedestrian annotations or the ones with 'beh'avior only. 904 | 'subset': The subset of data annotations to use. Options are: 905 | 'default': Includes high resolution and high visibility videos. 906 | 'high_visibility': Only videos with high visibility(include low resoltion videos). 907 | 'all': Uses all videos. 908 | 'height_rng': The height range of pedestrian to use. 909 | 'squarify_ratio': The width/height ratio of bounding boxes. A value between (0,1]. 0->the originalratio is used. 910 | 'data_split_type': How to split the data. Options: 911 | 'default': predefined sets, 912 | 'random': randomly split the data. 913 | 'kfold': k-fold data split(NOTE: only train/test splits). 914 | 'seq_type': Sequence type to generate. Options: 915 | 'trajectory': generates tracks. 916 | 'crossing': generates tracks uo to 'crossing_point', 917 | 'intention': generates tracks similar to human experiments. 918 | 'min_track_size': Min track length allowable. 919 | 'random_params': Parameters for random data split generation.(see _get_random_pedestrian_ids()) 920 | 'kfold_params': Parameters for kfold split generation.(see _get_kfold_pedestrian_ids()) 921 | :return: Sequence data. 922 | ''' 923 | params = {'fstride': 1, 924 | 'sample_type': 'all', # 'beh' 925 | 'subset': 'default', 926 | 'height_rng': [0, float('inf')], 927 | 'squarify_ratio': 0, 928 | 'data_split_type': 'default', # kfold, random, default 929 | 'seq_type': 'intention', 930 | 'min_track_size': 15, 931 | 'random_params': {'ratios': None, 932 | 'val_data': True, 933 | 'regen_data': False}, 934 | 'kfold_params': {'num_folds': 5, 'fold': 1}} 935 | assert all(k in params for k in opts.keys()), 'Wrong option(s). Choose one of the following: {}'.format(list(params.keys())) 936 | params.update(opts) 937 | print('--------------------------------------------------------') 938 | print('Generating action sequence data.') 939 | self._print_dict(params) 940 | 941 | annot_database = self.generate_database() 942 | if params['seq_type'] == 'trajectory': 943 | sequence = self._get_trajectories(image_set, annot_database, **params) 944 | elif params['seq_type'] == 'crossing': 945 | sequence = self._get_crossing(image_set, annot_database, **params) 946 | elif params['seq_type'] == 'intention': 947 | sequence = self._get_intention(image_set, annot_database, **params) 948 | return sequence 949 | 950 | def _get_trajectories(self, image_set, annot_database, **params): 951 | ''' 952 | Generates trajectory data. 953 | :param image_set: Data split. 954 | :param annot_database: The annotations database. 955 | :param params: Parameters for generating trajectories. 956 | :return: A dictionary of trajectories. 957 | ''' 958 | print('---------------------------------------------------------') 959 | print('Generating trajectory data.') 960 | num_pedestrians = 0 961 | seq_stride = params['fstride'] 962 | sq_ratio = params['squarify_ratio'] 963 | height_rng = params['height_rng'] 964 | image_seq, pids_seq = [], [] 965 | box_seq, center_seq, occ_seq = [], [], [] 966 | intent_seq = [] 967 | vehicle_seq = [] 968 | video_ids, _pids = self._get_data_ids(image_set, params) 969 | for vid in sorted(video_ids): 970 | img_width = annot_database[vid]['width'] 971 | pid_annots = annot_database[vid]['ped_annotations'] 972 | vid_annots = annot_database[vid]['vehicle_annotations'] 973 | for pid in sorted(annot_database[vid]['ped_annotations']): 974 | if params['data_split_type'] != 'default' and pid not in _pids: 975 | continue 976 | if 'p' in pid: 977 | continue 978 | if params['sample_type'] == 'beh' and 'b' not in pid: 979 | continue 980 | num_pedestrians += 1 981 | frame_ids = pid_annots[pid]['frames'] 982 | images = [join(self._jaad_path, 'images', vid, '{:05d}.png'.format(f)) for f in pid_annots[pid]['frames']] 983 | boxes = pid_annots[pid]['bbox'] 984 | occlusions = pid_annots[pid]['occlusion'] 985 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 986 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, frame_ids, boxes, images, occlusions) 987 | if len(boxes) / seq_stride < params['min_track_size']: 988 | continue 989 | if sq_ratio: 990 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 991 | ped_ids = [[pid]] * len(boxes) 992 | if 'b' not in pid: 993 | intent = [[0]] * len(boxes) 994 | else: 995 | if annot_database[vid]['ped_annotaions'][pid]['attributes']['crossing'] == -1: 996 | intent = [[0]] * len(boxes) 997 | else: 998 | intent = [[1]] * len(boxes) 999 | center = [self._get_center(b) for b in boxes] 1000 | occ_seq.append(occlusions[::seq_stride]) 1001 | image_seq.append(images[::seq_stride]) 1002 | box_seq.append(boxes[::seq_stride]) 1003 | center_seq.append(center[::seq_stride]) 1004 | intent_seq.append(intent[::seq_stride]) 1005 | pids_seq.append(ped_ids[::seq_stride]) 1006 | vehicle_seq.append([[vid_annots[i]] for i in frame_ids][::seq_stride]) 1007 | print('Split: %s' % image_set) 1008 | print('Number of pedestrians: %d ' % num_pedestrians) 1009 | print('Total number of samples: %d ' % len(image_seq)) 1010 | 1011 | return {'image': image_seq, 1012 | 'pid': pids_seq, 1013 | 'bbox': box_seq, 1014 | 'center': center_seq, 1015 | 'occlusion': occ_seq, 1016 | 'intent': intent_seq, 1017 | 'vehicle_act': vehicle_seq} 1018 | 1019 | def _get_crossing(self, image_set, annot_database, **params): 1020 | ''' 1021 | Generates crossing data. 1022 | :param image_set: Data split to use. 1023 | :param annot_database: Annotations database. 1024 | :param params: Parameters to generate data (see generate_database) 1025 | :return: A dictionary of trajectories 1026 | ''' 1027 | print('---------------------------------------------------------') 1028 | print("Generating crossing data") 1029 | num_pedestrians = 0 1030 | seq_stride = params['fstride'] 1031 | sq_ratio = params['squarify_ratio'] 1032 | height_rng = params['height_rng'] 1033 | image_seq, pids_seq = [], [] 1034 | box_seq, center_seq, occ_seq = [], [], [] 1035 | intent_seq = [] 1036 | vehicle_seq = [] 1037 | activities = [] 1038 | video_ids, _pids = self._get_data_ids(image_set, params) 1039 | for vid in sorted(video_ids): 1040 | img_width = annot_database[vid]['width'] 1041 | img_height = annot_database[vid]['height'] 1042 | pid_annots = annot_database[vid]['ped_annotations'] 1043 | vid_annots = annot_database[vid]['vehicle_annotations'] 1044 | for pid in sorted(pid_annots): 1045 | if params['data_split_type'] != 'default' and pid not in _pids: 1046 | continue 1047 | if 'p' in pid: 1048 | continue 1049 | if params['sample_type'] == 'beh' and 'b' not in pid: 1050 | continue 1051 | num_pedestrians += 1 1052 | frame_ids = pid_annots[pid]['frames'] 1053 | if 'b' in pid: 1054 | event_frame = pid_annots[pid]['attributes']['crossing_point'] 1055 | else: 1056 | event_frame = -1 1057 | if event_frame == -1: 1058 | end_idx = -3 1059 | else: 1060 | end_idx = frame_ids.index(event_frame) 1061 | boxes = pid_annots[pid]['bbox'][:end_idx + 1] 1062 | frame_ids = frame_ids[: end_idx + 1] 1063 | images = [self._get_image_path(vid, f) for f in frame_ids] 1064 | occlusions = pid_annots[pid]['occlusion'][:end_idx + 1] 1065 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 1066 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, frame_ids, boxes, images, occlusions) 1067 | if len(boxes) / seq_stride < params['min_track_size']: 1068 | continue 1069 | if sq_ratio: 1070 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 1071 | image_seq.append(images[::seq_stride]) 1072 | box_seq.append(boxes[::seq_stride]) 1073 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride]) 1074 | occ_seq.append(occlusions[::seq_stride]) 1075 | ped_ids = [[pid]] * len(boxes) 1076 | pids_seq.append(ped_ids[::seq_stride]) 1077 | if 'b' not in pid: 1078 | intent = [[0]] * len(boxes) 1079 | acts = [[0]] * len(boxes) 1080 | else: 1081 | if annot_database[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1: 1082 | intent = [[0]] * len(boxes) 1083 | else: 1084 | intent = [[1]] * len(boxes) 1085 | acts = [[int(pid_annots[pid]['attributes']['crossing'] > 0)]] * len(boxes) 1086 | intent_seq.append(intent[::seq_stride]) 1087 | activities.append(acts[::seq_stride]) 1088 | vehicle_seq.append([[vid_annots[i]] for i in frame_ids][::seq_stride]) 1089 | print('Split: %s' % image_set) 1090 | print('Number of pedestrians: %d ' % num_pedestrians) 1091 | print('Total number of samples: %d ' % len(image_seq)) 1092 | return {'image': image_seq, 1093 | 'pid': pids_seq, 1094 | 'bbox': box_seq, 1095 | 'center': center_seq, 1096 | 'occlusion': occ_seq, 1097 | 'vehicle_act': vehicle_seq, 1098 | 'intent': intent_seq, 1099 | 'activities': activities, 1100 | 'image_dimension': (img_width, img_height)} 1101 | 1102 | def _get_intention(self, image_set, annot_database, **params): 1103 | ''' 1104 | Generates intention data. 1105 | :param image_set: Data split to use. 1106 | :param annot_database: Annotations database. 1107 | :param params: Parameters to generate data (see generate_database()) 1108 | :return: A dictionary of trajectories. 1109 | ''' 1110 | print('---------------------------------------------------------') 1111 | print("Generating intention data") 1112 | num_pedestrians = 0 1113 | seq_stride = params['fstride'] 1114 | sq_ratio = params['squarify_ratio'] 1115 | height_rng = params['height_rng'] 1116 | image_seq, pids_seq = [], [] 1117 | box_seq, center_seq, occ_seq = [], [], [] 1118 | intent_seq = [] 1119 | videos_ids, _pids = self._get_data_ids(image_set, params) 1120 | for vid in sorted(videos_ids): 1121 | img_width = annot_database[vid]['width'] 1122 | pid_annots = annot_database[vid]['ped_annotations'] 1123 | for pid in sorted(pid_annots): 1124 | if params['data_split_type'] != 'default' and pid not in _pids: 1125 | continue 1126 | if 'p' in pid: 1127 | continue 1128 | if params['sample_type'] == 'beh' and 'b' not in pid: 1129 | continue 1130 | num_pedestrians += 1 1131 | frame_ids = pid_annots[pid]['frames'] 1132 | if 'b' in pid: 1133 | event_frame = pid_annots[pid]['attributes']['decision_point'] 1134 | else: 1135 | event_frame = -1 1136 | if event_frame == -1: 1137 | end_idx = -3 1138 | else: 1139 | end_idx = frame_ids.index(event_frame) 1140 | boxes = pid_annots[pid]['bbox'][:end_idx + 1] 1141 | frame_ids = frame_ids[:end_idx + 1] 1142 | images = [self._get_image_path(vid, f) for f in frame_ids] 1143 | occlusions = pid_annots[pid]['occlusion'][:end_idx + 1] 1144 | if height_rng[0] > 0 or height_rng[1] < float('inf'): 1145 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, frame_ids, boxes, images, occlusions) 1146 | if len(boxes) / seq_stride < params['min_track_size']: 1147 | continue 1148 | if sq_ratio: 1149 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes] 1150 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride]) 1151 | image_seq.append(images[::seq_stride]) 1152 | box_seq.append(boxes[::seq_stride]) 1153 | occ_seq.append(occlusions[::seq_stride]) 1154 | ped_ids = [[pid]] * len(boxes) 1155 | pids_seq.append(ped_ids[::seq_stride]) 1156 | if 'b' not in pid: 1157 | intent = [[0]] * len(boxes) 1158 | else: 1159 | if annot_database[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1: 1160 | intent = [[0]] * len(boxes) 1161 | else: 1162 | intent = [[1]] * len(boxes) 1163 | intent_seq.append(intent[::seq_stride]) 1164 | print('Split: %s' % image_set) 1165 | print('Number of pedestrians: %d ' % num_pedestrians) 1166 | print('Total number of samples: %d ' % len(image_seq)) 1167 | return {'image': image_seq, 1168 | 'pid': pids_seq, 1169 | 'bbox': box_seq, 1170 | 'center': center_seq, 1171 | 'occlusion': occ_seq, 1172 | 'intent': intent_seq} 1173 | 1174 | --------------------------------------------------------------------------------