├── README.md
├── model
├── FFN.py
├── BottleNecks.py
├── model_blocks.py
└── main_model.py
├── readme.md
├── jaad.py
├── pie.py
└── utils
├── jaad_preprocessing.py
├── pie_preprocessing.py
├── pie_data.py
└── jaad_data.py
/README.md:
--------------------------------------------------------------------------------
1 | # PedCMT
2 | The code of our paper entitled "Pedestrian Crossing Intention Prediction Based on Cross-Modal Transformer and Uncertainty-Aware Multi-Task Learning for Autonomous Driving" is released.
3 |
--------------------------------------------------------------------------------
/model/FFN.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | class FFN(nn.Module): # 前馈网络
5 | def __init__(self, d_model, hidden_dim, rate=0.3, layer_norm_eps=1e-5):
6 | super(FFN, self).__init__()
7 |
8 | self.norm = nn.LayerNorm(d_model, eps=layer_norm_eps) # 归一化
9 | self.linear1 = nn.Linear(d_model, hidden_dim) # 线性层
10 | self.relu = nn.ReLU()
11 | self.dropout1 = nn.Dropout(rate)
12 | self.linear2 = nn.Linear(hidden_dim, d_model)
13 | self.dropout2 = nn.Dropout(rate)
14 |
15 | def forward(self, x):
16 | y = self.linear2(self.dropout1(self.relu(self.linear1(x)))) # 前馈网络
17 | out = x + self.dropout2(y)
18 | out = self.norm(out) # 归一化
19 | return out
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | ## Structure info
2 | Updating: 2023-06-16
3 |
4 | URL:[(click here)](https://github.com/sellenzh/pedCMT)
5 | structure info:
6 |
7 | ```
8 | ├── checkpoints <- train model save dir。
9 | │ ├── JAAD_all.pt
10 | │ ├── JAAD_beh.pt
11 | │ └── PIE.pt
12 | │
13 | ├── logs <- train logs save dir
14 | │ └── PIE
15 | │ └── ...
16 | │
17 | ├── PIE <- dataset(download:[PIE])
18 | │ └── ... <- note: need to unzip `annotations.zip`,`annotations_vehicle.zip`,
19 | │ ├── ... <- `annotations_attributes.zip`
20 | │ └── .... <-
21 | ├─── JAAD...
22 | │ ├── ... <- [JAAD]
23 | │ └── ... <- ...
24 | │
25 | ├── utils
26 | │ ├── pie_data.py
27 | │ └── pie_preprocessing.py
28 | │
29 | ├── model <- models save dir
30 | │ ├── BottleNeck.py
31 | │ ├── FFN.py
32 | │ ├── model_blocks.py
33 | │ └── main_model.py
34 | │
35 | ├── pie.py
36 | │
37 | ├── jaad.py
38 | │
39 | └── README.md
40 | ```
41 | Download :[PIE](https://github.com/aras62/PIE.git)
42 | [JAAD](https://github.com/ykotseruba/JAAD.git)
43 |
--------------------------------------------------------------------------------
/model/BottleNecks.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 |
4 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
5 |
6 |
7 | class Bottlenecks(nn.Module): # 瓶颈结构
8 | def __init__(self, dims, args):
9 | super(Bottlenecks, self).__init__()
10 | self.dims = dims
11 | self.num_bnks = args.num_bnks # 单元数目
12 | self.num_layers = args.bnks_layers # 层数
13 | self.bbox = nn.ModuleList()
14 | self.vel = nn.ModuleList()
15 |
16 | self.bbox.append(nn.Linear(dims, dims + self.num_bnks, bias=True))
17 | self.vel.append(nn.Linear(dims, dims + self.num_bnks, bias=True))
18 |
19 | for _ in range(self.num_layers - 1):
20 | self.bbox.append(nn.Linear(dims + self.num_bnks, dims + self.num_bnks, bias=True))
21 | self.vel.append(nn.Linear(dims + self.num_bnks, dims + self.num_bnks, bias=True))
22 | self.dropout = nn.Dropout(0.5)
23 | self.relu = nn.ReLU()
24 |
25 | def cut(self, x): # 切片
26 | return x[:, :, :self.dims], x[:, :, -self.num_bnks:] # 从第0个到第dims个,从倒数第num_bnks个到最后一个
27 |
28 | def forward(self, bbox, vel):
29 | bbox, bnk_bbox = self.cut(self.dropout(self.relu(self.bbox[0](bbox)))) # 生成下一层然后切片,得到bbox下一层和bnk_bbox
30 | vel, bnk_vel = self.cut(self.dropout(self.relu(self.vel[0](vel)))) # 生成下一层然后切片,得到vel下一层和bnk_vel
31 | bottlenecks = bnk_bbox + bnk_vel # 加和得到中间的交互单元
32 |
33 | for i in range(self.num_layers - 1):
34 | bbox = torch.cat((bbox, bottlenecks), dim=-1)
35 | bbox, bnk_bbox = self.cut(self.dropout(self.relu(self.bbox[i + 1](bbox))))
36 | vel, bnk_vel = self.cut(self.dropout(self.relu(self.vel[i + 1](torch.cat((vel, bottlenecks), dim=-1)))))
37 | bottlenecks = bnk_bbox + bnk_vel #+ bnk_token
38 |
39 | return bottlenecks
--------------------------------------------------------------------------------
/model/model_blocks.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | #from model.MultiHeadAttention import MultiHeadAttention
4 | from torch import Tensor
5 | import math
6 | from einops import repeat
7 | from einops.layers.torch import Rearrange
8 |
9 | device = 'cuda' if torch.cuda.is_available() else 'cpu'
10 |
11 |
12 | class PositionalEncoding(nn.Module): # 位置编码
13 | def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
14 | super().__init__()
15 | self.dropout = nn.Dropout(p=dropout)
16 |
17 | position = torch.arange(max_len).unsqueeze(1) # 生成一个max_len行1列的张量
18 | div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)) # 生成一个d_model/2行1列的张量
19 | pe = torch.zeros(max_len, 1, d_model) # 生成一个【max_len,1,d_model】的张量
20 | pe[:, 0, 0::2] = torch.sin(position * div_term) # 偶数列
21 | pe[:, 0, 1::2] = torch.cos(position * div_term) # 奇数列
22 | self.register_buffer('pe', pe)
23 |
24 | def forward(self, x: Tensor) -> Tensor:
25 | """
26 | Args:
27 | x: Tensor, shape [batch_size, seq_len, embedding_dim]
28 | """
29 | x = x + self.pe[:x.size(0)] # 位置编码
30 | return self.dropout(x)
31 |
32 |
33 | class EmbedPosEnc(nn.Module):
34 | def __init__(self, input_size, d_model):
35 | super(EmbedPosEnc, self).__init__()
36 |
37 | self.embedding = nn.Linear(input_size, d_model)
38 | #self.embedding = MultiScaleCNN(input_size, d_model)
39 | self.pos_enc = PositionalEncoding(d_model) # 位置编码
40 |
41 | self.arrange1 = Rearrange('b s e -> s b e') # 重排列
42 | self.arrange2 = Rearrange('s b e -> b s e') # 重排列
43 |
44 | def forward(self, x, token):
45 | b = x.shape[0] # 获取批次大小
46 | y = self.embedding(x) # 嵌入
47 | token = repeat(token, '() s e -> b s e', b=b) # 重复token
48 | y = torch.cat([token, y], dim=1) # 拼接
49 | return self.arrange2(self.pos_enc(self.arrange1(y))) # 位置编码
50 |
51 |
52 | class AttentionBlocks(nn.Module):
53 | def __init__(self, d_model, num_heads, rate=0.3, layer_norm_eps=1e-5):
54 | super(AttentionBlocks, self).__init__()
55 |
56 | self.att = nn.MultiheadAttention(d_model, num_heads=num_heads, batch_first=True) # 多头注意力
57 | self.drop = nn.Dropout(rate)
58 | self.norm = nn.LayerNorm(d_model, eps=layer_norm_eps) # 归一化
59 |
60 | def forward(self, x, y=None):
61 | y = x if y is None else y # 如果y为空,则y=x
62 | att_out, att_w = self.att(x, y, y) # 多头注意力
63 | att_out = self.drop(att_out) # dropout
64 | y = self.norm(x + att_out) # 归一化
65 | return y
66 |
67 |
68 | import torch.nn.functional as F
69 |
70 |
71 | class Time_att(nn.Module): # 在时间维度上进行注意力
72 | def __init__(self, dims):
73 | super(Time_att, self).__init__()
74 | self.linear1 = nn.Linear(dims, dims, bias=False)
75 | self.linear2 = nn.Linear(dims, 1, bias=False)
76 | self.time = nn.AdaptiveAvgPool1d(1)
77 |
78 | def forward(self, x):
79 | y = self.linear1(x.contiguous())
80 | y = self.linear2(torch.tanh(y))
81 | beta = F.softmax(y, dim=-1)
82 | c = beta * x
83 | return self.time(c.transpose(-1, -2)).transpose(-1, -2).contiguous().squeeze()
84 |
--------------------------------------------------------------------------------
/model/main_model.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import numpy as np
4 | from model.model_blocks import EmbedPosEnc, AttentionBlocks, Time_att
5 | from model.FFN import FFN
6 | from model.BottleNecks import Bottlenecks
7 | from einops import repeat
8 |
9 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10 |
11 |
12 | class Model(nn.Module):
13 | def __init__(self, args):
14 | super(Model, self).__init__()
15 | self.sigma_cls = nn.Parameter(torch.ones(1, 1, requires_grad=True, device=device), requires_grad=True) # 生成一个可训练的分类损失参数
16 | nn.init.kaiming_normal_(self.sigma_cls, mode='fan_out') # 初始化参数
17 | self.sigma_reg = nn.Parameter(torch.ones(1, 1, requires_grad=True, device=device), requires_grad=True) # 生成一个可训练的回归损失参数
18 | nn.init.kaiming_normal_(self.sigma_reg, mode='fan_out') # 初始化参数
19 |
20 | d_model = args.d_model
21 | hidden_dim = args.dff
22 | modal_nums = 2
23 | self.num_layers = args.num_layers
24 | self.token = nn.Parameter(torch.ones(1, 1, d_model)) # 生成一个可训练的token @@绿色token
25 |
26 | self.bbox_embedding = EmbedPosEnc(args.bbox_input, d_model) # 张量嵌入以及生成位置编码
27 | self.bbox_token = nn.Parameter(torch.ones(1, 1, d_model)) # 生成一个可训练的bbox_token
28 |
29 | self.vel_embedding = EmbedPosEnc(args.vel_input, d_model) # 张量嵌入以及生成位置编码
30 | self.vel_token = nn.Parameter(torch.ones(1, 1, d_model)) # 生成一个可训练的vel_token
31 |
32 | self.bbox_att = nn.ModuleList() # 生成一个空的ModuleList
33 | self.bbox_ffn = nn.ModuleList()
34 | self.vel_att = nn.ModuleList()
35 | self.vel_ffn = nn.ModuleList()
36 | self.cross_att = nn.ModuleList()
37 | self.cross_ffn = nn.ModuleList()
38 |
39 | for _ in range(self.num_layers):
40 | self.bbox_att.append(AttentionBlocks(d_model, args.num_heads)) # 添加AttentionBlocks
41 | self.bbox_ffn.append(FFN(d_model, hidden_dim)) # 添加FFN
42 | self.vel_att.append(AttentionBlocks(d_model, args.num_heads))
43 | self.vel_ffn.append(FFN(d_model, hidden_dim))
44 | self.cross_att.append(AttentionBlocks(d_model, args.num_heads)) # 添加AttentionBlocks
45 | self.cross_ffn.append(FFN(d_model, hidden_dim))
46 |
47 | self.dense = nn.Linear(modal_nums * d_model, 4) # 全连接层
48 | self.bottlenecks = Bottlenecks(d_model, args) # Bottlenecks
49 | self.time_att = Time_att(dims=args.num_bnks) # Time_att
50 | self.endp = nn.Linear(modal_nums * d_model, 4) # 全连接层
51 | self.relu = nn.ReLU()
52 | self.last = nn.Linear(args.num_bnks, 1) # 全连接层
53 | self.sigmoid = nn.Sigmoid() # sigmoid激活函数
54 |
55 | def forward(self, bbox, vel):
56 | '''
57 | :bbox :[b, 4, 32]
58 | :vel :[b, 2, 32]
59 | '''
60 | '''
61 | bbox: [64, 16, 4]
62 | vel: [64, 16, 2]
63 | '''
64 | b = bbox.shape[0]
65 | token = repeat(self.token, '() s e -> b s e', b=b) # 重复token,使尺寸匹配
66 |
67 | bbox = self.bbox_embedding(bbox, self.bbox_token) # 张量嵌入以及生成位置编码
68 | vel = self.vel_embedding(vel, self.vel_token) # 张量嵌入以及生成位置编码
69 |
70 | bbox = self.bbox_att[0](bbox) # bbox的自注意力
71 | token = torch.cat([token, bbox[:, 0:1, :]], dim=1) # 拼接token和bbox
72 | vel = self.vel_att[0](vel) # vel的自注意力
73 | token = torch.cat([token, vel[:, 0:1, :]], dim=1) # 拼接token和vel
74 | token = self.cross_att[0](token) # token的交叉注意力
75 | token_new = token[:, 0:1, :] # 取出token的第一个元素
76 | bbox = torch.cat([token_new, bbox[:, 1:, :]], dim=1) # 拼接token_new和bbox
77 | vel = torch.cat([token_new, vel[:, 1:, :]], dim=1) # 拼接token_new和vel
78 | bbox = self.bbox_ffn[0](bbox) # bbox的FFN
79 | vel = self.vel_ffn[0](vel) # vel的FFN
80 | token = self.cross_ffn[0](token)[:, 0:1, :] # token的FFN
81 |
82 | for i in range(self.num_layers - 1):
83 | bbox = self.bbox_att[i + 1](bbox)
84 | token = torch.cat([token, bbox[:, 0:1, :]], dim=1)
85 | vel = self.vel_att[i + 1](vel)
86 | token = torch.cat([token, vel[:, 0:1, :]], dim=1)
87 | token = self.cross_att[i + 1](token)
88 | token_new = token[:, 0:1, :]
89 | bbox = torch.cat([token_new, bbox[:, 1:, :]], dim=1)
90 | vel = torch.cat([token_new, vel[:, 1:, :]], dim=1)
91 | bbox = self.bbox_ffn[i + 1](bbox)
92 | vel = self.vel_ffn[i + 1](vel)
93 | token = self.cross_ffn[i + 1](token)[:, 0:1, :]
94 |
95 |
96 | cls_out = torch.cat([bbox[:, 0:1, :], vel[:, 0:1, :]], dim=1) # 拼接bbox的token和vel的token
97 | cls_out_flatten = torch.flatten(cls_out, start_dim=1) # 展平
98 | end_point = self.endp(cls_out_flatten) # 全连接层预测endpoint
99 |
100 | bnk = self.relu(self.time_att(self.bottlenecks(bbox, vel))) # Bottlenecks
101 | tmp = self.last(bnk) # 全连接层预测穿越行为
102 | pred = self.sigmoid(tmp)
103 | return pred, end_point, self.sigma_cls, self.sigma_reg # 返回预测结果,endpoint预测结果,分类的sigma,回归的sigma
104 |
--------------------------------------------------------------------------------
/jaad.py:
--------------------------------------------------------------------------------
1 | from utils.jaad_data import JAAD
2 | from utils.jaad_preprocessing import *
3 |
4 | import torch
5 | from torch import nn
6 | from torch.utils.data import TensorDataset, DataLoader
7 | from torch.utils.tensorboard import SummaryWriter
8 | from model.main_model import Model
9 | from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix
10 | import argparse
11 |
12 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13 |
14 | def main(args):
15 | if not args.learn:
16 | seed_all(args.seed)
17 | data_opts = {'fstride': 1,
18 | 'sample_type': args.bh, # 'beh'
19 | 'subset': 'default',
20 | 'height_rng': [0, float('inf')],
21 | 'squarify_ratio': 0,
22 | 'data_split_type': 'default', # kfold, random, default
23 | 'seq_type': 'crossing',
24 | 'min_track_size': 15,
25 | 'random_params': {'ratios': None,
26 | 'val_data': True,
27 | 'regen_data': False},
28 | 'kfold_params': {'num_folds': 5, 'fold': 1},
29 | }
30 | tte = [30, 60]
31 | imdb = JAAD(data_path=args.set_path)
32 | seq_train = imdb.generate_data_trajectory_sequence('train', **data_opts)
33 | balanced_seq_train = balance_dataset(seq_train)
34 | tte_seq_train, traj_seq_train = tte_dataset(balanced_seq_train, tte, 0.6, args.times_num)
35 |
36 | seq_valid = imdb.generate_data_trajectory_sequence('val', **data_opts)
37 | balanced_seq_valid = balance_dataset(seq_valid)
38 | tte_seq_valid, traj_seq_valid = tte_dataset(balanced_seq_valid, tte, 0, args.times_num)
39 |
40 | seq_test = imdb.generate_data_trajectory_sequence('test', **data_opts)
41 | tte_seq_test, traj_seq_test = tte_dataset(seq_test, tte, 0, args.times_num)
42 |
43 | bbox_train = tte_seq_train['bbox']
44 | bbox_valid = tte_seq_valid['bbox']
45 | bbox_test = tte_seq_test['bbox']
46 |
47 | bbox_dec_train = traj_seq_train['bbox']
48 | bbox_dec_valid = traj_seq_valid['bbox']
49 | bbox_dec_test = traj_seq_test['bbox']
50 |
51 | vel_train = tte_seq_train['vehicle_act']
52 | vel_valid = tte_seq_valid['vehicle_act']
53 | vel_test = tte_seq_test['vehicle_act']
54 |
55 | action_train = tte_seq_train['activities']
56 | action_valid = tte_seq_valid['activities']
57 | action_test = tte_seq_test['activities']
58 |
59 | normalized_bbox_train = normalize_bbox(bbox_train)
60 | normalized_bbox_valid = normalize_bbox(bbox_valid)
61 | normalized_bbox_test = normalize_bbox(bbox_test)
62 |
63 | normalized_bbox_dec_train = normalize_traj(bbox_dec_train)
64 | normalized_bbox_dec_valid = normalize_traj(bbox_dec_valid)
65 | normalized_bbox_dec_test = normalize_traj(bbox_dec_test)
66 |
67 | label_action_train = prepare_label(action_train)
68 | label_action_valid = prepare_label(action_valid)
69 | label_action_test = prepare_label(action_test)
70 |
71 | X_train, X_valid = torch.Tensor(normalized_bbox_train), torch.Tensor(normalized_bbox_valid)
72 | Y_train, Y_valid = torch.Tensor(label_action_train), torch.Tensor(label_action_valid)
73 | X_test = torch.Tensor(normalized_bbox_test)
74 | Y_test = torch.Tensor(label_action_test)
75 |
76 | X_train_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_train, 60))
77 | X_valid_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_valid, 60))
78 | X_test_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_test, 60))
79 |
80 | vel_train = torch.Tensor(vel_train)
81 | vel_valid = torch.Tensor(vel_valid)
82 | vel_test = torch.Tensor(vel_test)
83 |
84 | trainset = TensorDataset(X_train, Y_train, vel_train, X_train_dec)
85 | validset = TensorDataset(X_valid, Y_valid, vel_valid, X_valid_dec)
86 | testset = TensorDataset(X_test, Y_test, vel_test, X_test_dec)
87 |
88 | train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True)
89 | valid_loader = DataLoader(validset, batch_size=args.batch_size, shuffle=True)
90 | test_loader = DataLoader(testset, batch_size=1)
91 | else: # 生成随机数据
92 | train_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)),
93 | torch.randn(size=(args.batch_size, 1)),
94 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)),
95 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]]
96 | valid_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)),
97 | torch.randn(size=(args.batch_size, 1)),
98 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)),
99 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]]
100 | test_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)),
101 | torch.randn(size=(args.batch_size, 1)),
102 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)),
103 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]]
104 | print('Start Training Loop... \n')
105 |
106 | model = Model(args)
107 | model.to(device)
108 |
109 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.98), eps=1e-6)
110 | cls_criterion = nn.BCELoss()
111 | reg_criterion = nn.MSELoss()
112 |
113 | model_folder_name = args.set_path + '_' + args.bh
114 | checkpoint_filepath = 'checkpoints/{}.pt'.format(model_folder_name)
115 | writer = SummaryWriter('logs/{}'.format(model_folder_name))
116 |
117 | train(model, train_loader, valid_loader, cls_criterion, reg_criterion, optimizer, checkpoint_filepath, writer, args=args)
118 |
119 | #Test
120 | model = Model(args)
121 | model.to(device)
122 |
123 | checkpoint = torch.load(checkpoint_filepath)
124 | model.load_state_dict(checkpoint['model_state_dict'])
125 |
126 | preds, labels = test(model, test_loader)
127 | pred_cpu = torch.Tensor.cpu(preds)
128 | label_cpu = torch.Tensor.cpu(labels)
129 |
130 | acc = accuracy_score(label_cpu, np.round(pred_cpu))
131 | f1 = f1_score(label_cpu, np.round(pred_cpu))
132 | pre_s = precision_score(label_cpu, np.round(pred_cpu))
133 | recall_s = recall_score(label_cpu, np.round(pred_cpu))
134 | auc = roc_auc_score(label_cpu, np.round(pred_cpu))
135 | matrix = confusion_matrix(label_cpu, np.round(pred_cpu))
136 |
137 | print(f'Acc: {acc}\n f1: {f1}\n precision_score: {pre_s}\n recall_score: {recall_s}\n roc_auc_score: {auc}\n confusion_matrix: {matrix}')
138 |
139 |
140 | if __name__ == '__main__':
141 | torch.cuda.empty_cache()
142 | parser = argparse.ArgumentParser('Pedestrain Crossing Intention Prediction.')
143 | parser.add_argument('--epochs', type=int, default=2000, help='Number of epochs to train.')
144 | parser.add_argument('--set_path', type=str, default='JAAD')
145 | parser.add_argument('--bh', type=str, default='beh', help='all or beh, in JAAD dataset.')
146 | parser.add_argument('--balance', type=bool, default=True, help='balance or not for test dataset.')
147 | parser.add_argument('--seed', type=int, default=42)
148 |
149 | parser.add_argument('--d_model', type=int, default=256, help='the dimension after embedding.')
150 | parser.add_argument('--dff', type=int, default=512, help='the number of the units.')
151 | parser.add_argument('--num_heads', type=int, default=8, help='number of the heads of the multi-head model.')
152 | parser.add_argument('--bbox_input', type=int, default=4, help='dimension of bbox.')
153 | parser.add_argument('--vel_input', type=int, default=1, help='dimension of velocity.')
154 | parser.add_argument('--time_crop', type=bool, default=False)
155 |
156 | parser.add_argument('--batch_size', type=int, default=64, help='size of batch.')
157 | parser.add_argument('--lr', type=float, default=0.001, help='learning rate to train.')
158 |
159 | parser.add_argument('--num_layers', type=int, default=4, help='the number of layers.')
160 | parser.add_argument('--times_num', type=int, default=32, help='')
161 | parser.add_argument('--num_bnks', type=int, default=9, help='')
162 | parser.add_argument('--bnks_layers', type=int, default=9, help='')
163 | parser.add_argument('--sta_f', type=int, default=8)
164 | parser.add_argument('--end_f', type=int, default=12)
165 | args = parser.parse_args()
166 | main(args)
--------------------------------------------------------------------------------
/pie.py:
--------------------------------------------------------------------------------
1 | from utils.pie_data import PIE
2 | from utils.pie_preprocessing import *
3 |
4 | import torch
5 | from torch import nn
6 | from torch.utils.data import TensorDataset, DataLoader
7 | from torch.utils.tensorboard import SummaryWriter
8 | from model.main_model import Model
9 | from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, precision_score, recall_score, confusion_matrix
10 | import argparse
11 |
12 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
13 |
14 | def main(args):
15 | if not args.learn: # 如果args.learn为False,则真实训练, 读取真实数据
16 | seed_all(args.seed)
17 | data_opts = {
18 | 'fstride': 1,
19 | 'sample_type': 'all',
20 | 'height_rng': [0, float('inf')],
21 | 'squarify_ratio': 0,
22 | 'data_split_type': 'random', # kfold, random, default
23 | 'seq_type': 'crossing', # crossing , intention
24 | 'min_track_size': 15, # discard tracks that are shorter
25 | 'kfold_params': {'num_folds': 1, 'fold': 1},
26 | 'random_params': {'ratios': [0.7, 0.15, 0.15],
27 | 'val_data': True,
28 | 'regen_data': False},
29 | 'tte': [30, 60],
30 | 'batch_size': 16
31 | }
32 | imdb = PIE(data_path=args.set_path)
33 | seq_train = imdb.generate_data_trajectory_sequence('train', **data_opts) # 生成训练集
34 | balanced_seq_train = balance_dataset(seq_train) # 平衡数据集
35 | tte_seq_train, traj_seq_train = tte_dataset(balanced_seq_train, data_opts['tte'], 0.6, args.times_num) # 生成训练集的tte和轨迹
36 |
37 | seq_valid = imdb.generate_data_trajectory_sequence('val', **data_opts)
38 | balanced_seq_valid = balance_dataset(seq_valid)
39 | tte_seq_valid, traj_seq_valid = tte_dataset(balanced_seq_valid, data_opts['tte'], 0, args.times_num)
40 |
41 | seq_test = imdb.generate_data_trajectory_sequence('test', **data_opts)
42 | tte_seq_test, traj_seq_test = tte_dataset(seq_test, data_opts['tte'], 0, args.times_num)
43 |
44 | bbox_train = tte_seq_train['bbox'] # 训练集的bbox
45 | bbox_valid = tte_seq_valid['bbox']
46 | bbox_test = tte_seq_test['bbox']
47 |
48 | bbox_dec_train = traj_seq_train['bbox'] # 训练集的轨迹
49 | bbox_dec_valid = traj_seq_valid['bbox']
50 | bbox_dec_test = traj_seq_test['bbox']
51 |
52 | obd_train = tte_seq_train['obd_speed'] # 训练集的速度
53 | obd_valid = tte_seq_valid['obd_speed']
54 | obd_test = tte_seq_test['obd_speed']
55 |
56 | gps_train = tte_seq_train['gps_speed'] # 训练集的速度
57 | gps_valid = tte_seq_valid['gps_speed']
58 | gps_test = tte_seq_test['gps_speed']
59 |
60 | action_train = tte_seq_train['activities'] # 训练集的动作
61 | action_valid = tte_seq_valid['activities']
62 | action_test = tte_seq_test['activities']
63 |
64 | normalized_bbox_train = normalize_bbox(bbox_train) # 归一化bbox
65 | normalized_bbox_valid = normalize_bbox(bbox_valid)
66 | normalized_bbox_test = normalize_bbox(bbox_test)
67 |
68 | normalized_bbox_dec_train = normalize_traj(bbox_dec_train) # 归一化轨迹
69 | normalized_bbox_dec_valid = normalize_traj(bbox_dec_valid)
70 | normalized_bbox_dec_test = normalize_traj(bbox_dec_test)
71 |
72 | label_action_train = prepare_label(action_train) # 准备标签
73 | label_action_valid = prepare_label(action_valid)
74 | label_action_test = prepare_label(action_test)
75 |
76 | X_train, X_valid = torch.Tensor(normalized_bbox_train), torch.Tensor(normalized_bbox_valid) # 转换为tensor
77 | Y_train, Y_valid = torch.Tensor(label_action_train), torch.Tensor(label_action_valid)
78 | X_test = torch.Tensor(normalized_bbox_test)
79 | Y_test = torch.Tensor(label_action_test)
80 |
81 |
82 | temp = pad_sequence(normalized_bbox_dec_train, 60)
83 | X_train_dec = torch.Tensor(temp)
84 | X_valid_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_valid, 60)) # 转换为tensor
85 | X_test_dec = torch.Tensor(pad_sequence(normalized_bbox_dec_test, 60))
86 |
87 | obd_train, gps_train = torch.Tensor(obd_train), torch.Tensor(gps_train) # 转换为tensor
88 | obd_valid, gps_valid = torch.Tensor(obd_valid), torch.Tensor(gps_valid)
89 | obd_test, gps_test = torch.Tensor(obd_test), torch.Tensor(gps_test)
90 |
91 | vel_train = torch.cat([obd_train, gps_train], dim=-1) # 拼接obd和gps
92 | vel_valid = torch.cat([obd_valid, gps_valid], dim=-1)
93 | vel_test = torch.cat([obd_test, gps_test], dim=-1)
94 |
95 | trainset = TensorDataset(X_train, Y_train, vel_train, X_train_dec) # 生成dataset
96 | validset = TensorDataset(X_valid, Y_valid, vel_valid, X_valid_dec)
97 | testset = TensorDataset(X_test, Y_test, vel_test, X_test_dec)
98 |
99 | train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True) # 生成dataloader
100 | valid_loader = DataLoader(validset, batch_size=args.batch_size, shuffle=True)
101 | test_loader = DataLoader(testset, batch_size=1)
102 | else: # args.learn为True,不真实训练,生成随机数据。
103 | train_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)), # bbox
104 | torch.randn(size=(args.batch_size, 1)), # label
105 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)), # velocity
106 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]] # trajectory
107 | valid_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)),
108 | torch.randn(size=(args.batch_size, 1)),
109 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)),
110 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]]
111 | test_loader = [[torch.randn(size=(args.batch_size, args.times_num, args.bbox_input)),
112 | torch.randn(size=(args.batch_size, 1)),
113 | torch.randn(size=(args.batch_size, args.times_num, args.vel_input)),
114 | torch.randn(size=(args.batch_size, args.times_num, args.bbox_input))]]
115 | print('Start Training Loop... \n')
116 |
117 | model = Model(args) # 生成模型
118 | model.to(device) # 放到gpu上
119 |
120 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.98), eps=1e-6) # 生成优化器
121 | cls_criterion = nn.BCELoss() # 生成损失函数 binary cross entropy
122 | reg_criterion = nn.MSELoss() # 生成损失函数
123 |
124 | model_folder_name = args.set_path
125 | checkpoint_filepath = 'checkpoints/{}.pt'.format(model_folder_name) # 生成checkpoint的路径
126 | writer = SummaryWriter('logs/{}'.format(model_folder_name)) # 生成tensorboard的路径
127 | #Train
128 | train(model, train_loader, valid_loader, cls_criterion, reg_criterion, optimizer, checkpoint_filepath, writer, args=args)
129 |
130 | # #Test
131 | model = Model(args)
132 | model.to(device)
133 |
134 | checkpoint = torch.load(checkpoint_filepath)
135 | model.load_state_dict(checkpoint['model_state_dict'])
136 |
137 | preds, labels = test(model, test_loader)
138 | pred_cpu = torch.Tensor.cpu(preds)
139 | label_cpu = torch.Tensor.cpu(labels)
140 |
141 | acc = accuracy_score(label_cpu, np.round(pred_cpu))
142 | f1 = f1_score(label_cpu, np.round(pred_cpu))
143 | pre_s = precision_score(label_cpu, np.round(pred_cpu))
144 | recall_s = recall_score(label_cpu, np.round(pred_cpu))
145 | auc = roc_auc_score(label_cpu, np.round(pred_cpu))
146 | contrix = confusion_matrix(label_cpu, np.round(pred_cpu))
147 |
148 | print(f'Acc: {acc}\n f1: {f1}\n precision_score: {pre_s}\n recall_score: {recall_s}\n roc_auc_score: {auc}\n confusion_matrix: {contrix}')
149 |
150 |
151 | if __name__ == '__main__':
152 | torch.cuda.empty_cache()
153 | parser = argparse.ArgumentParser('Pedestrain Crossing Intention Prediction.')
154 |
155 | parser.add_argument('--epochs', type=int, default=2000, help='Number of epochs to train.')
156 | parser.add_argument('--set_path', type=str, default='PIE')
157 | parser.add_argument('--balance', type=bool, default=True, help='balance or not for test dataset.')
158 | parser.add_argument('--seed', type=int, default=42)
159 |
160 | parser.add_argument('--d_model', type=int, default=128, help='the dimension after embedding.')
161 | parser.add_argument('--dff', type=int, default=256, help='the number of the units.')
162 | parser.add_argument('--num_heads', type=int, default=8, help='number of the heads of the multi-head model.')
163 | parser.add_argument('--bbox_input', type=int, default=4, help='dimension of bbox.')
164 | parser.add_argument('--vel_input', type=int, default=2, help='dimension of velocity.')
165 | parser.add_argument('--time_crop', type=bool, default=False)# 是否使用随机时间裁剪
166 |
167 | parser.add_argument('--batch_size', type=int, default=64, help='size of batch.')
168 | parser.add_argument('--lr', type=int, default=0.0005, help='learning rate to train.')
169 |
170 | parser.add_argument('--num_layers', type=int, default=4, help='the number of layers.')
171 | parser.add_argument('--times_num', type=int, default=16, help='')# 数据的时间维度
172 | parser.add_argument('--num_bnks', type=int, default=3, help='')# 瓶颈结构的单元数目
173 | parser.add_argument('--bnks_layers', type=int, default=7, help='')# 瓶颈结构的层数
174 |
175 | parser.add_argument('--sta_f', type=int, default=8)# 若采用随机时间裁剪,则从sta_f到end_f中随机选取一个时间点作为保留的时间段。
176 | parser.add_argument('--end_f', type=int, default=12)
177 |
178 | parser.add_argument('--learn', type=bool, default=True)# 是否跳过真实数据读取,生成尺寸相同的随机数据。
179 | # 目的如果是为了了解项目的运行过程,则可以将learn设置为True,这样可以跳过真实数据读取,生成尺寸相同的随机数据。
180 | args = parser.parse_args()
181 | main(args)
182 |
--------------------------------------------------------------------------------
/utils/jaad_preprocessing.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import os
3 | import numpy as np
4 | import random
5 |
6 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
7 |
8 |
9 | def seed_all(seed):
10 | torch.cuda.empty_cache()
11 | os.environ['PYTHONHASHSEED'] = str(seed)
12 | random.seed(seed)
13 | np.random.seed(seed)
14 | torch.manual_seed(seed)
15 | torch.cuda.manual_seed(seed)
16 | torch.cuda.manual_seed_all(seed)
17 | torch.backends.cudnn.deterministic = True
18 | torch.backends.cudnn.benchmark = False
19 |
20 |
21 | def binary_acc(label, pred):
22 | label_tag = torch.round(label)
23 | correct_results_sum = (label_tag == pred).sum().float()
24 | acc = correct_results_sum / pred.shape[0]
25 | return acc
26 |
27 | def end_point_loss(reg_criterion, pred, end_point):
28 | for i in range(4):
29 | if i == 0 or i == 2:
30 | pred[:, i] = pred[:, i] * 1920
31 | end_point[:, i] = end_point[:, i] * 1920
32 | else:
33 | pred[:, i] = pred[:, i] * 1080
34 | end_point[:, i] = end_point[:, i] * 1080
35 | return reg_criterion(pred, end_point)
36 |
37 |
38 | def train(model, train_loader, valid_loader, class_criterion, reg_criterion, optimizer, checkpoint_filepath, writer,
39 | args):
40 | best_valid_acc = 0.0
41 | improvement_ratio = 0.001
42 | best_valid_loss = np.inf
43 | num_steps_wo_improvement = 0
44 | save_times = 0
45 | epochs = args.epochs
46 | if args.learn: # 调试模式: epoch = 5
47 | epochs = 5
48 | time_crop = args.time_crop
49 | for epoch in range(epochs):
50 | nb_batches_train = len(train_loader)
51 | train_acc = 0
52 | model.train()
53 | f_losses = 0.0
54 | cls_losses = 0.0
55 | reg_losses = 0.0
56 |
57 | print('Epoch: {} training...'.format(epoch + 1))
58 | for bbox, label, vel, traj in train_loader:
59 | label = label.reshape(-1, 1).to(device).float()
60 | bbox = bbox.to(device)
61 | vel = vel.to(device)
62 | end_point = traj.to(device)[:, -1, :]
63 |
64 | if np.random.randint(10) >= 5 and time_crop:
65 | crop_size = np.random.randint(args.sta_f, args.end_f)
66 | bbox = bbox[:, -crop_size:, :]
67 | vel = vel[:, -crop_size:, :]
68 |
69 | pred, point, s_cls, s_reg = model(bbox, vel)
70 |
71 | cls_loss = class_criterion(pred, label)
72 | reg_loss = reg_criterion(point, end_point)
73 | f_loss = cls_loss / (s_cls * s_cls) + reg_loss / (s_reg * s_reg) + torch.log(s_cls) + torch.log(s_reg)
74 |
75 | model.zero_grad() #
76 | f_loss.backward()
77 |
78 | f_losses += f_loss.item()
79 | cls_losses += cls_loss.item()
80 | reg_losses += reg_loss.item()
81 |
82 | optimizer.step() #
83 |
84 | train_acc += binary_acc(label, torch.round(pred))
85 |
86 | writer.add_scalar('training full_loss',
87 | f_losses / nb_batches_train,
88 | epoch + 1)
89 | writer.add_scalar('training cls_loss',
90 | cls_losses / nb_batches_train,
91 | epoch + 1)
92 | writer.add_scalar('training reg_loss',
93 | reg_losses / nb_batches_train,
94 | epoch + 1)
95 | writer.add_scalar('training Acc',
96 | train_acc / nb_batches_train,
97 | epoch + 1)
98 |
99 | print(
100 | f"Epoch {epoch + 1}: | Train_Loss {f_losses / nb_batches_train} | Train Cls_loss {cls_losses / nb_batches_train} | Train Reg_loss {reg_losses / nb_batches_train} | Train_Acc {train_acc / nb_batches_train} ")
101 |
102 | valid_f_loss, valid_cls_loss, valid_reg_loss, val_acc = evaluate(model, valid_loader, class_criterion,
103 | reg_criterion)
104 |
105 | writer.add_scalar('validation full_loss',
106 | valid_f_loss,
107 | epoch + 1)
108 | writer.add_scalar('validation cls_loss',
109 | valid_cls_loss,
110 | epoch + 1)
111 | writer.add_scalar('validation reg_loss',
112 | valid_reg_loss,
113 | epoch + 1)
114 | writer.add_scalar('validation Acc',
115 | val_acc,
116 | epoch + 1)
117 |
118 | if best_valid_loss > valid_cls_loss:
119 | best_valid_loss = valid_cls_loss
120 | num_steps_wo_improvement = 0
121 | save_times += 1
122 | print(str(save_times) + ' time(s) File saved.\n')
123 | torch.save({
124 | 'epoch': epoch,
125 | 'model_state_dict': model.state_dict(),
126 | 'optimizer_state_dict': optimizer.state_dict(),
127 | 'Accuracy': train_acc / nb_batches_train,
128 | 'LOSS': f_losses / nb_batches_train,
129 | }, checkpoint_filepath)
130 | print('Update improvement.\n')
131 | else:
132 | num_steps_wo_improvement += 1
133 | print(str(num_steps_wo_improvement) + '/300 times Not update.\n')
134 |
135 | if num_steps_wo_improvement == 300:
136 | print("Early stopping on epoch:{}".format(str(epoch + 1)))
137 | break
138 | print('save file times: ' + str(save_times) + '.\n')
139 |
140 |
141 | def evaluate(model, val_data, class_criterion, reg_criterion):
142 | nb_batches = len(val_data)
143 | val_f_losses = 0.0
144 | val_cls_losses = 0.0
145 | val_reg_losses = 0.0
146 | print('in Validation...')
147 | with torch.no_grad():
148 | model.eval()
149 | acc = 0
150 | for bbox, label, vel, traj in val_data:
151 | label = label.reshape(-1, 1).to(device).float()
152 | bbox = bbox.to(device)
153 | vel = vel.to(device)
154 | end_point = traj.to(device)[:, -1, :]
155 |
156 | pred, point, s_cls, s_reg = model(bbox, vel)
157 |
158 | val_cls_loss = class_criterion(pred, label)
159 | val_reg_loss = reg_criterion(point, end_point)
160 | f_loss = val_cls_loss / (s_cls * s_cls) + val_reg_loss / (s_reg * s_reg) + torch.log(s_cls) + torch.log(
161 | s_reg)
162 |
163 | val_f_losses += f_loss.item()
164 | val_cls_losses += val_cls_loss.item()
165 | val_reg_losses += val_reg_loss.item()
166 |
167 | acc += binary_acc(label, torch.round(pred))
168 | print(
169 | f'Valid_Full_Loss {val_f_losses / nb_batches} | Valid Cls_loss {val_cls_losses / nb_batches} | Valid Reg_loss {val_reg_losses / nb_batches} | Valid_Acc {acc / nb_batches} \n')
170 | return val_f_losses / nb_batches, val_cls_losses / nb_batches, val_reg_losses / nb_batches, acc / nb_batches
171 |
172 |
173 | def test(model, test_data):
174 | print('Tesing...')
175 | with torch.no_grad():
176 | model.eval()
177 | step = 0
178 | for bbox, label, vel, traj in test_data:
179 | label = label.reshape(-1, 1).to(device).float()
180 | bbox = bbox.to(device)
181 | vel = vel.to(device)
182 |
183 | pred, _, _, _ = model(bbox, vel)
184 |
185 | if step == 0:
186 | preds = pred
187 | labels = label
188 | else:
189 | preds = torch.cat((preds, pred), 0)
190 | labels = torch.cat((labels, label), 0)
191 | step += 1
192 |
193 | return preds, labels
194 |
195 |
196 | def balance_dataset(dataset, flip=True):
197 | d = {'bbox': dataset['bbox'].copy(),
198 | 'pid': dataset['pid'].copy(),
199 | 'activities': dataset['activities'].copy(),
200 | 'image': dataset['image'].copy(),
201 | 'center': dataset['center'].copy(),
202 | 'vehicle_act': dataset['vehicle_act'].copy(),
203 | 'image_dimension': (1920, 1080)}
204 | gt_labels = [gt[0] for gt in d['activities']]
205 | num_pos_samples = np.count_nonzero(np.array(gt_labels))
206 | num_neg_samples = len(gt_labels) - num_pos_samples
207 |
208 | if num_neg_samples == num_pos_samples:
209 | print('Positive samples is equal to negative samples.')
210 | else:
211 | print('Unbalanced: \t Postive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples))
212 | if num_neg_samples > num_pos_samples:
213 | gt_augment = 1
214 | else:
215 | gt_augment = 0
216 |
217 | img_width = d['image_dimension'][0]
218 | num_samples = len(d['pid'])
219 |
220 | for i in range(num_samples):
221 | if d['activities'][i][0][0] == gt_augment:
222 | flipped = d['center'][i].copy()
223 | flipped = [[img_width - c[0], c[1]] for c in flipped]
224 | d['center'].append(flipped)
225 |
226 | flipped = d['bbox'][i].copy()
227 | flipped = [np.array([img_width - c[2], c[1], img_width - c[0], c[3]]) for c in flipped]
228 | d['bbox'].append(flipped)
229 |
230 | d['pid'].append(dataset['pid'][i].copy())
231 |
232 | d['activities'].append(d['activities'][i].copy())
233 | d['vehicle_act'].append(d['vehicle_act'][i].copy())
234 |
235 | flipped = d['image'][i].copy()
236 | flipped = [c.replace('.png', '_flip.png') for c in flipped]
237 |
238 | d['image'].append(flipped)
239 |
240 | gt_labels = [gt[0] for gt in d['activities']]
241 | num_pos_samples = np.count_nonzero(np.array(gt_labels))
242 | num_neg_samples = len(gt_labels) - num_pos_samples
243 |
244 | if num_neg_samples > num_pos_samples:
245 | rm_index = np.where(np.array(gt_labels) == 0)[0]
246 | else:
247 | rm_index = np.where(np.array(gt_labels) == 1)[0]
248 |
249 | dif_samples = abs(num_neg_samples - num_pos_samples)
250 |
251 | np.random.seed(42)
252 | np.random.shuffle(rm_index)
253 | rm_index = rm_index[0:dif_samples]
254 |
255 | for k in d:
256 | seq_data_k = d[k]
257 | d[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index]
258 |
259 | new_gt_labels = [gt[0] for gt in d['activities']]
260 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels))
261 | print('Balanced: Postive: %d \t Negative: %d \n' % (num_pos_samples, len(d['activities']) - num_pos_samples))
262 | print('Total Number of samples: %d\n' % (len(d['activities'])))
263 |
264 | return d
265 |
266 |
267 | def tte_dataset(dataset, time_to_event, overlap, obs_length):
268 | d_obs = {'bbox': dataset['bbox'].copy(),
269 | 'pid': dataset['pid'].copy(),
270 | 'activities': dataset['activities'].copy(),
271 | 'image': dataset['image'].copy(),
272 | 'vehicle_act': dataset['vehicle_act'].copy(),
273 | 'center': dataset['center'].copy()
274 | }
275 |
276 | d_tte = {'bbox': dataset['bbox'].copy(),
277 | 'pid': dataset['pid'].copy(),
278 | 'activities': dataset['activities'].copy(),
279 | 'image': dataset['image'].copy(),
280 | 'vehicle_act': dataset['vehicle_act'].copy(),
281 | 'center': dataset['center'].copy()}
282 |
283 | if isinstance(time_to_event, int):
284 | for k in d_obs.keys():
285 | for i in range(len(d_obs[k])):
286 | d_obs[k][i] = d_obs[k][i][- obs_length - time_to_event: -time_to_event]
287 | d_tte[k][i] = d_tte[k][i][- time_to_event:]
288 | d_obs['tte'] = [[time_to_event]] * len(dataset['bbox'])
289 | d_tte['tte'] = [[time_to_event]] * len(dataset['bbox'])
290 |
291 | else:
292 | olap_res = obs_length if overlap == 0 else int((1 - overlap) * obs_length)
293 | olap_res = 1 if olap_res < 1 else olap_res
294 |
295 | for k in d_obs.keys():
296 | seqs = []
297 | seqs_tte = []
298 | for seq in d_obs[k]:
299 | start_idx = len(seq) - obs_length - time_to_event[1]
300 | end_idx = len(seq) - obs_length - time_to_event[0]
301 | seqs.extend([seq[i:i + obs_length] for i in range(start_idx, end_idx, olap_res)])
302 | seqs_tte.extend([seq[i + obs_length:] for i in range(start_idx, end_idx, olap_res)])
303 | d_obs[k] = seqs
304 | d_tte[k] = seqs_tte
305 | tte_seq = []
306 | for seq in dataset['bbox']:
307 | start_idx = len(seq) - obs_length - time_to_event[1]
308 | end_idx = len(seq) - obs_length - time_to_event[0]
309 | tte_seq.extend([[len(seq) - (i + obs_length)] for i in range(start_idx, end_idx, olap_res)])
310 | d_obs['tte'] = tte_seq.copy()
311 | d_tte['tte'] = tte_seq.copy()
312 |
313 | remove_index = []
314 | try:
315 | time_to_event_0 = time_to_event[0]
316 | except:
317 | time_to_event_0 = time_to_event
318 | for seq_index, (seq_obs, seq_tte) in enumerate(zip(d_obs['bbox'], d_tte['bbox'])):
319 | if len(seq_obs) < 16 or len(seq_tte) < time_to_event_0:
320 | remove_index.append(seq_index)
321 |
322 | for k in d_obs.keys():
323 | for j in sorted(remove_index, reverse=True):
324 | del d_obs[k][j]
325 | del d_tte[k][j]
326 |
327 | return d_obs, d_tte
328 |
329 |
330 | def normalize_bbox(dataset, width=1920, height=1080):
331 | normalized_set = []
332 | for sequence in dataset:
333 | if sequence == []:
334 | continue
335 | normalized_sequence = []
336 | for bbox in sequence:
337 | np_bbox = np.zeros(4)
338 | np_bbox[0] = bbox[0] / width
339 | np_bbox[2] = bbox[2] / width
340 | np_bbox[1] = bbox[1] / height
341 | np_bbox[3] = bbox[3] / height
342 | normalized_sequence.append(np_bbox)
343 | normalized_set.append(np.array(normalized_sequence))
344 |
345 | return normalized_set
346 |
347 | def normalize_traj(dataset, width=1920, height=1080):
348 | normalized_set = []
349 | for sequence in dataset:
350 | if sequence == []:
351 | continue
352 | normalized_sequence = []
353 | for bbox in sequence:
354 | np_bbox = np.zeros(4)
355 | np_bbox[0] = bbox[0]# / width
356 | np_bbox[2] = bbox[2]# / width
357 | np_bbox[1] = bbox[1]# / height
358 | np_bbox[3] = bbox[3]# / height
359 | normalized_sequence.append(np_bbox)
360 | normalized_set.append(np.array(normalized_sequence))
361 |
362 | return normalized_set
363 |
364 |
365 | def prepare_label(dataset):
366 | labels = np.zeros(len(dataset), dtype='int64')
367 | for step, action in enumerate(dataset):
368 | if action == []:
369 | continue
370 | labels[step] = action[0][0]
371 |
372 | return labels
373 |
374 | def pad_sequence(inp_list, max_len):
375 | padded_sequence = []
376 | for source in inp_list:
377 | target = np.array([source[0]] * max_len)
378 | source = source
379 | target[-source.shape[0]:, :] = source
380 |
381 | padded_sequence.append(target)
382 |
383 | return padded_sequence
384 |
--------------------------------------------------------------------------------
/utils/pie_preprocessing.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import os
3 | import numpy as np
4 | import random
5 |
6 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
7 |
8 |
9 | def seed_all(seed): # 初始化
10 | torch.cuda.empty_cache()
11 | os.environ['PYTHONHASHSEED'] = str(seed)
12 | random.seed(seed)
13 | np.random.seed(seed)
14 | torch.manual_seed(seed)
15 | torch.cuda.manual_seed(seed)
16 | torch.cuda.manual_seed_all(seed)
17 | torch.backends.cudnn.deterministic = True
18 | torch.backends.cudnn.benchmark = False
19 |
20 |
21 | def binary_acc(label, pred): # 计算准确率
22 | label_tag = torch.round(label)# 四舍五入
23 | correct_results_sum = (label_tag == pred).sum().float()# 计算正确的个数
24 | acc = correct_results_sum / pred.shape[0] # 计算准确率
25 | return acc
26 |
27 |
28 | def end_point_loss(reg_criterion, pred, end_point):# 计算端点误差(未使用)
29 | for i in range(4):
30 | if i == 0 or i == 2:
31 | pred[:, i] = pred[:, i] * 1920 # 1920是视频的宽
32 | end_point[:, i] = end_point[:, i] * 1920
33 | else:
34 | pred[:, i] = pred[:, i] * 1080 # 1080是视频的高
35 | end_point[:, i] = end_point[:, i] * 1080
36 | return reg_criterion(pred, end_point)
37 |
38 |
39 |
40 | def train(model, train_loader, valid_loader, class_criterion, reg_criterion, optimizer, checkpoint_filepath, writer,
41 | args):
42 | # best_valid_acc = 0.0# 最佳准确率
43 | # improvement_ratio = 0.001
44 | best_valid_loss = np.inf # 最佳损失
45 | num_steps_wo_improvement = 0 # 未提升的次数
46 | save_times = 0 # 保存的次数
47 | epochs = args.epochs # 训练的轮数
48 | # time_crop = args.time_crop # 是否进行时间裁剪
49 | if args.learn: # 调试模式: epoch = 5
50 | epochs = 5
51 | for epoch in range(epochs):
52 | nb_batches_train = len(train_loader) # 训练集的batch数
53 | train_acc = 0 # 训练集的准确率
54 | model.train() # 训练模式
55 | f_losses = 0.0 # 总损失
56 | cls_losses = 0.0 # 分类损失
57 | reg_losses = 0.0 # 回归损失
58 |
59 | print('Epoch: {} training...'.format(epoch + 1))
60 | for bbox, label, vel, traj in train_loader:
61 | label = label.reshape(-1, 1).to(device).float() # 标签
62 | bbox = bbox.to(device)
63 | vel = vel.to(device)
64 | end_point = traj.to(device)[:, -1, :] #轨迹的最后一刻时刻的点
65 |
66 | # if np.random.randint(10) >= 5 and time_crop: # 随机时间裁剪
67 | # crop_size = np.random.randint(args.sta_f, args.end_f)
68 | # bbox = bbox[:, -crop_size:, :]
69 | # # vel = vel[:, -crop_size:, :]
70 |
71 | pred, point, s_cls, s_reg = model(bbox, vel) # 预测值,端点,分类损失系数,回归损失系数
72 | cls_loss = class_criterion(pred, label) # 分类损失
73 | reg_loss = reg_criterion(point, end_point) # 回归损失
74 | f_loss = cls_loss / (s_cls * s_cls) + reg_loss / (s_reg * s_reg) + torch.log(s_cls * s_reg)
75 | # 总损失
76 |
77 | model.zero_grad() # 梯度清零
78 | f_loss.backward() # 反向传播
79 |
80 | f_losses += f_loss.item() # 总损失记录
81 | cls_losses += cls_loss.item() # 分类损失记录
82 | reg_losses += reg_loss.item() # 回归损失记录
83 |
84 | optimizer.step() # 更新参数
85 |
86 | train_acc += binary_acc(label, torch.round(pred)) # 计算准确率
87 |
88 |
89 | writer.add_scalar('training full_loss',
90 | f_losses / nb_batches_train,
91 | epoch + 1)
92 | writer.add_scalar('training cls_loss',
93 | cls_losses / nb_batches_train,
94 | epoch + 1)
95 | writer.add_scalar('training reg_loss',
96 | reg_losses / nb_batches_train,
97 | epoch + 1)
98 | writer.add_scalar('training Acc',
99 | train_acc / nb_batches_train,
100 | epoch + 1)
101 |
102 |
103 | print(
104 | f"Epoch {epoch + 1}: | Train_Loss {f_losses / nb_batches_train} | Train Cls_loss {cls_losses / nb_batches_train} | Train Reg_loss {reg_losses / nb_batches_train} | Train_Acc {train_acc / nb_batches_train} ")
105 | valid_f_loss, valid_cls_loss, valid_reg_loss, val_acc = evaluate(model, valid_loader, class_criterion,
106 | reg_criterion) # 验证
107 |
108 | writer.add_scalar('validation full_loss',
109 | valid_f_loss,
110 | epoch + 1)
111 | writer.add_scalar('validation cls_loss',
112 | valid_cls_loss,
113 | epoch + 1)
114 | writer.add_scalar('validation reg_loss',
115 | valid_reg_loss,
116 | epoch + 1)
117 | writer.add_scalar('validation Acc',
118 | val_acc,
119 | epoch + 1)
120 |
121 | if best_valid_loss > valid_cls_loss: # 保存最佳模型
122 | best_valid_loss = valid_cls_loss # 更新最佳损失
123 | num_steps_wo_improvement = 0 # 未提升的次数清零
124 | save_times += 1
125 | print(str(save_times) + ' time(s) File saved.\n')
126 | torch.save({
127 | 'epoch': epoch,
128 | 'model_state_dict': model.state_dict(),
129 | 'optimizer_state_dict': optimizer.state_dict(),
130 | 'Accuracy': train_acc / nb_batches_train,
131 | 'LOSS': f_losses / nb_batches_train,
132 | }, checkpoint_filepath) # 保存模型
133 | print('Update improvement.\n')
134 |
135 | else: # 未提升
136 | num_steps_wo_improvement += 1
137 | print(str(num_steps_wo_improvement) + '/300 times Not update.\n')
138 |
139 | if num_steps_wo_improvement == 300: # 300次未提升,提前结束
140 | print("Early stopping on epoch:{}".format(str(epoch + 1)))
141 | break
142 | print('save file times: ' + str(save_times) + '.\n')
143 |
144 |
145 | def evaluate(model, val_data, class_criterion, reg_criterion):
146 | nb_batches = len(val_data)
147 | val_f_losses = 0.0
148 | val_cls_losses = 0.0
149 | val_reg_losses = 0.0
150 | print('in Validation...')
151 | with torch.no_grad():
152 | model.eval()
153 | acc = 0
154 | for bbox, label, vel, traj in val_data:
155 | label = label.reshape(-1, 1).to(device).float()
156 | bbox = bbox.to(device)
157 | vel = vel.to(device)
158 | end_point = traj.to(device)[:, -1, :]
159 |
160 | pred, point, s_cls, s_reg = model(bbox, vel)
161 | val_reg_loss = reg_criterion(point, end_point)
162 | val_cls_loss = class_criterion(pred, label)
163 | f_loss = val_cls_loss / (s_cls * s_cls) + val_reg_loss / (s_reg * s_reg) + torch.log(s_cls * s_reg)
164 |
165 | val_f_losses += f_loss.item()
166 | val_cls_losses += val_cls_loss.item()
167 | val_reg_losses += val_reg_loss.item()
168 |
169 | acc += binary_acc(label, torch.round(pred))
170 | print(
171 | f'Valid_Full_Loss {val_f_losses / nb_batches} | Valid Cls_loss {val_cls_losses / nb_batches} | Valid Reg_loss {val_reg_losses / nb_batches} | Valid_Acc {acc / nb_batches} \n')
172 | return val_f_losses / nb_batches, val_cls_losses / nb_batches, val_reg_losses / nb_batches, acc / nb_batches
173 |
174 |
175 | def test(model, test_data):
176 | print('Tesing...')
177 | with torch.no_grad():
178 | model.eval()
179 | step = 0
180 | for bbox, label, vel, traj in test_data:
181 | label = label.reshape(-1, 1).to(device).float()
182 | bbox = bbox.to(device)
183 | vel = vel.to(device)
184 |
185 | pred, _, _, _ = model(bbox, vel)#测试阶段只需要预测分类结果,不关心回归结果
186 |
187 | if step == 0:
188 | preds = pred
189 | labels = label
190 | else:
191 | preds = torch.cat((preds, pred), 0)
192 | labels = torch.cat((labels, label), 0)
193 | step += 1
194 | return preds, labels
195 |
196 |
197 | def balance_dataset(dataset, flip=True): # 数据集平衡
198 | d = {'bbox': dataset['bbox'].copy(),
199 | 'pid': dataset['pid'].copy(),
200 | 'activities': dataset['activities'].copy(),
201 | 'image': dataset['image'].copy(),
202 | 'center': dataset['center'].copy(),
203 | 'obd_speed': dataset['obd_speed'].copy(),
204 | 'gps_speed': dataset['gps_speed'].copy(),
205 | 'image_dimension': (1920, 1080)}
206 | gt_labels = [gt[0] for gt in d['activities']] # 标签
207 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) # 正样本数
208 | num_neg_samples = len(gt_labels) - num_pos_samples # 负样本数
209 |
210 | if num_neg_samples == num_pos_samples: # 正负样本数相等
211 | print('Positive samples is equal to negative samples.')
212 | else: # 正负样本数不相等
213 | print('Unbalanced: \t Postive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples))
214 | if num_neg_samples > num_pos_samples:
215 | gt_augment = 1 # 正样本数大于负样本数,增加负样本
216 | else:
217 | gt_augment = 0 # 负样本数大于正样本数,增加正样本
218 |
219 | img_width = d['image_dimension'][0] # 图片宽度
220 | num_samples = len(d['pid']) # 样本数
221 |
222 | for i in range(num_samples): # 遍历样本
223 | if d['activities'][i][0][0] == gt_augment: # 标签与增加的标签相同
224 | flipped = d['center'][i].copy() # 中心点
225 | flipped = [[img_width - c[0], c[1]] for c in flipped] # 水平翻转
226 | d['center'].append(flipped) # 添加到中心点
227 |
228 | flipped = d['bbox'][i].copy() # 边界框
229 | flipped = [np.array([img_width - c[2], c[1], img_width - c[0], c[3]]) for c in flipped] # 水平翻转
230 | d['bbox'].append(flipped) # 添加到边界框
231 |
232 | d['pid'].append(dataset['pid'][i].copy()) # 添加pid
233 |
234 | d['activities'].append(d['activities'][i].copy()) # 添加标签
235 | d['gps_speed'].append(d['gps_speed'][i].copy()) # 添加gps速度
236 | d['obd_speed'].append(d['obd_speed'][i].copy()) # 添加obd速度
237 |
238 | flipped = d['image'][i].copy() # 图片
239 | flipped = [c.replace('.png', '_flip.png') for c in flipped] # 水平翻转
240 |
241 | d['image'].append(flipped) # 添加图片
242 |
243 | gt_labels = [gt[0] for gt in d['activities']] # 标签
244 | num_pos_samples = np.count_nonzero(np.array(gt_labels)) # 正样本数
245 | num_neg_samples = len(gt_labels) - num_pos_samples # 负样本数
246 |
247 | if num_neg_samples > num_pos_samples: # 负样本数大于正样本数
248 | rm_index = np.where(np.array(gt_labels) == 0)[0] # 删除负样本
249 | else:
250 | rm_index = np.where(np.array(gt_labels) == 1)[0] # 删除正样本
251 |
252 | dif_samples = abs(num_neg_samples - num_pos_samples) # 正负样本数差值
253 |
254 | np.random.seed(42)
255 | np.random.shuffle(rm_index) # 打乱索引
256 | rm_index = rm_index[0:dif_samples] # 间隔删除
257 |
258 | for k in d: # 遍历数据
259 | seq_data_k = d[k] # 数据
260 | d[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index] # 删除数据
261 |
262 | new_gt_labels = [gt[0] for gt in d['activities']] # 新标签
263 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels)) # 新正样本数
264 | print('Balanced: Postive: %d \t Negative: %d \n' % (num_pos_samples, len(d['activities']) - num_pos_samples))
265 | print('Total Number of samples: %d\n' % (len(d['activities'])))
266 |
267 | return d
268 |
269 |
270 | def tte_dataset(dataset, time_to_event, overlap, obs_length): # 时间到事件数据集
271 | d_obs = {'bbox': dataset['bbox'].copy(),
272 | 'pid': dataset['pid'].copy(),
273 | 'activities': dataset['activities'].copy(),
274 | 'image': dataset['image'].copy(),
275 | 'gps_speed': dataset['gps_speed'].copy(),
276 | 'obd_speed': dataset['obd_speed'].copy(),
277 | 'center': dataset['center'].copy()
278 | }
279 |
280 | d_tte = {'bbox': dataset['bbox'].copy(),
281 | 'pid': dataset['pid'].copy(),
282 | 'activities': dataset['activities'].copy(),
283 | 'image': dataset['image'].copy(),
284 | 'gps_speed': dataset['gps_speed'].copy(),
285 | 'obd_speed': dataset['obd_speed'].copy(),
286 | 'center': dataset['center'].copy()}
287 |
288 | if isinstance(time_to_event, int):
289 | for k in d_obs.keys():
290 | for i in range(len(d_obs[k])):
291 | d_obs[k][i] = d_obs[k][i][- obs_length - time_to_event: -time_to_event] # 观察长度
292 | d_tte[k][i] = d_tte[k][i][- time_to_event:] # 时间到事件
293 | d_obs['tte'] = [[time_to_event]] * len(dataset['bbox']) # 观察长度
294 | d_tte['tte'] = [[time_to_event]] * len(dataset['bbox']) # 时间到事件
295 |
296 | else: # 时间到事件为列表
297 | olap_res = obs_length if overlap == 0 else int((1 - overlap) * obs_length) # 重叠长度
298 | olap_res = 1 if olap_res < 1 else olap_res # 重叠长度
299 |
300 | for k in d_obs.keys(): # 遍历数据
301 | seqs = []
302 | seqs_tte = []
303 | for seq in d_obs[k]:
304 | start_idx = len(seq) - obs_length - time_to_event[1] # 开始索引
305 | end_idx = len(seq) - obs_length - time_to_event[0] # 结束索引
306 | seqs.extend([seq[i:i + obs_length] for i in range(start_idx, end_idx, olap_res)]) # 观察长度
307 | seqs_tte.extend([seq[i + obs_length:] for i in range(start_idx, end_idx, olap_res)]) # 时间到事件
308 | d_obs[k] = seqs
309 | d_tte[k] = seqs_tte
310 | tte_seq = []
311 | for seq in dataset['bbox']:
312 | start_idx = len(seq) - obs_length - time_to_event[1]
313 | end_idx = len(seq) - obs_length - time_to_event[0]
314 | tte_seq.extend([[len(seq) - (i + obs_length)] for i in range(start_idx, end_idx, olap_res)])
315 | d_obs['tte'] = tte_seq.copy()
316 | d_tte['tte'] = tte_seq.copy()
317 |
318 | remove_index = []
319 | try:
320 | time_to_event_0 = time_to_event[0] # 时间到事件
321 | except:
322 | time_to_event_0 = time_to_event # 时间到事件
323 | for seq_index, (seq_obs, seq_tte) in enumerate(zip(d_obs['bbox'], d_tte['bbox'])): # 遍历数据
324 | if len(seq_obs) < 16 or len(seq_tte) < time_to_event_0: # 观察长度小于16或时间到事件小于时间到事件
325 | remove_index.append(seq_index) # 删除索引
326 |
327 | for k in d_obs.keys():
328 | for j in sorted(remove_index, reverse=True): # 倒序删除
329 | del d_obs[k][j]
330 | del d_tte[k][j]
331 |
332 | return d_obs, d_tte
333 |
334 |
335 | def normalize_bbox(dataset, width=1920, height=1080): # 归一化边界框
336 | normalized_set = []
337 | for sequence in dataset:
338 | if sequence == []:
339 | continue
340 | normalized_sequence = []
341 | for bbox in sequence:
342 | np_bbox = np.zeros(4)
343 | np_bbox[0] = bbox[0] / width # 左上角x
344 | np_bbox[2] = bbox[2] / width # 右下角x
345 | np_bbox[1] = bbox[1] / height # 左上角y
346 | np_bbox[3] = bbox[3] / height # 右下角y
347 | normalized_sequence.append(np_bbox)
348 | normalized_set.append(np.array(normalized_sequence))
349 |
350 | return normalized_set
351 |
352 | def normalize_traj(dataset, width=1920, height=1080): # 归一化轨迹
353 | normalized_set = []
354 | for sequence in dataset:
355 | if sequence == []:
356 | continue
357 | normalized_sequence = []
358 | for bbox in sequence:
359 | np_bbox = np.zeros(4)
360 | np_bbox[0] = bbox[0]# / width
361 | np_bbox[2] = bbox[2]# / width
362 | np_bbox[1] = bbox[1]# / height
363 | np_bbox[3] = bbox[3]# / height
364 | normalized_sequence.append(np_bbox)
365 | normalized_set.append(np.array(normalized_sequence))
366 |
367 | return normalized_set
368 |
369 |
370 | def prepare_label(dataset): # 准备标签
371 | labels = np.zeros(len(dataset), dtype='int64')
372 | for step, action in enumerate(dataset):
373 | if action == []:
374 | continue
375 | labels[step] = action[0][0]
376 |
377 | return labels
378 |
379 | def pad_sequence(inp_list, max_len): # 填充序列
380 | padded_sequence = []
381 | for source in inp_list:
382 | target = np.array([source[0]] * max_len) # 填充序列
383 | source = source
384 | target[-source.shape[0]:, :] = source # 填充序列
385 |
386 | padded_sequence.append(target)
387 |
388 | return padded_sequence
389 |
--------------------------------------------------------------------------------
/utils/pie_data.py:
--------------------------------------------------------------------------------
1 | """
2 | Interface for the PIE dataset:
3 |
4 | A. Rasouli, I. Kotseruba, T. Kunic, and J. Tsotsos, "PIE: A Large-Scale Dataset and Models for Pedestrian Intention Estimation and
5 | Trajectory Prediction", ICCV 2019.
6 |
7 | MIT License
8 |
9 | Copyright (c) 2019 Amir Rasouli, Iuliia Kotseruba
10 |
11 | Permission is hereby granted, free of charge, to any person obtaining a copy
12 | of this software and associated documentation files (the "Software"), to deal
13 | in the Software without restriction, including without limitation the rights
14 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 | copies of the Software, and to permit persons to whom the Software is
16 | furnished to do so, subject to the following conditions:
17 |
18 | The above copyright notice and this permission notice shall be included in all
19 | copies or substantial portions of the Software.
20 |
21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 | SOFTWARE.
28 |
29 | """
30 | import pickle
31 | import cv2
32 | import sys
33 |
34 | import xml.etree.ElementTree as ET
35 | import numpy as np
36 |
37 | from os.path import join, abspath, isfile, isdir
38 | from os import makedirs, listdir
39 | from sklearn.model_selection import train_test_split, KFold
40 |
41 |
42 | class PIE(object):
43 | def __init__(self, regen_database=False, data_path=''):
44 | """
45 | Class constructor
46 | :param regen_database: Whether generate the database or not
47 | :param data_path: The path to wh
48 | """
49 | self._year = '2019'
50 | self._name = 'pie'
51 | self._image_ext = '.png'
52 | self._regen_database = regen_database
53 |
54 | # Paths
55 | self._pie_path = data_path if data_path else self._get_default_path()
56 | assert isdir(self._pie_path), \
57 | 'pie path does not exist: {}'.format(self._pie_path)
58 |
59 | self._annotation_path = join(self._pie_path, 'annotations')
60 | self._annotation_attributes_path = join(self._pie_path, 'annotations_attributes')
61 | self._annotation_vehicle_path = join(self._pie_path, 'annotations_vehicle')
62 |
63 | self._clips_path = join(self._pie_path, 'PIE_clips')
64 | self._images_path = join(self._pie_path, 'images')
65 |
66 | # Path generators
67 | @property
68 | def cache_path(self):
69 | """
70 | Generates a path to save cache files
71 | :return: Cache file folder path
72 | """
73 | cache_path = abspath(join(self._pie_path, 'data_cache'))
74 | if not isdir(cache_path):
75 | makedirs(cache_path)
76 | return cache_path
77 |
78 | def _get_default_path(self):
79 | """
80 | Returns the default path where pie is expected to be installed.
81 | """
82 | return 'data/pie'
83 |
84 | def _get_image_set_ids(self, image_set):
85 | """
86 | Returns default image set ids
87 | :param image_set: Image set split
88 | :return: Set ids of the image set
89 | """
90 | image_set_nums = {'train': ['set01', 'set02', 'set04'],
91 | 'val': ['set05', 'set06'],
92 | 'test': ['set03'],
93 | 'all': ['set01', 'set02', 'set03',
94 | 'set04', 'set05', 'set06']}
95 | return image_set_nums[image_set]
96 |
97 | def _get_image_path(self, sid, vid, fid):
98 | """
99 | Generates and returns the image path given ids
100 | :param sid: Set id
101 | :param vid: Video id
102 | :param fid: Frame id
103 | :return: Return the path to the given image
104 | """
105 | return join(self._images_path, sid, vid,
106 | '{:05d}.png'.format(fid))
107 |
108 | # Visual helpers
109 | def update_progress(self, progress):
110 | """
111 | Creates a progress bar
112 | :param progress: The progress thus far
113 | """
114 | barLength = 20 # Modify this to change the length of the progress bar
115 | status = ""
116 | if isinstance(progress, int):
117 | progress = float(progress)
118 |
119 | block = int(round(barLength * progress))
120 | text = "\r[{}] {:0.2f}% {}".format("#" * block + "-" * (barLength - block), progress * 100, status)
121 | sys.stdout.write(text)
122 | sys.stdout.flush()
123 |
124 | def _print_dict(self, dic):
125 | """
126 | Prints a dictionary, one key-value pair per line
127 | :param dic: Dictionary
128 | """
129 | for k, v in dic.items():
130 | print('%s: %s' % (str(k), str(v)))
131 |
132 | # Data processing helpers
133 | def _get_width(self):
134 | """
135 | Returns image width
136 | :return: Image width
137 | """
138 | return 1920
139 |
140 | def _get_height(self):
141 | """
142 | Returns image height
143 | :return: Image height
144 | """
145 | return 1080
146 |
147 | def _get_dim(self):
148 | """
149 | Returns the image dimensions
150 | :return: Image dimensions
151 | """
152 | return 1920, 1080
153 |
154 | # Image processing helpers
155 | def get_annotated_frame_numbers(self, set_id):
156 | """
157 | Generates and returns a dictionary of videos and annotated frames for each video in the give set
158 | :param set_id: Set to generate annotated frames
159 | :return: A dictionary of form
160 | {: [,,... ]}
161 | """
162 |
163 | print("Generating annotated frame numbers for", set_id)
164 | annotated_frames_file = join(self._pie_path, "annotations", set_id, set_id + '_annotated_frames.csv')
165 | # If the file exists, load from the file
166 | if isfile(annotated_frames_file):
167 | with open(annotated_frames_file, 'rt') as f:
168 | annotated_frames = {x.split(',')[0]:
169 | [int(fr) for fr in x.split(',')[1:]] for x in f.readlines()}
170 | return annotated_frames
171 | else:
172 | # Generate annotated frame ids for each video
173 | annotated_frames = {v.split('_annt.xml')[0]: [] for v in sorted(listdir(join(self._annotation_path,
174 | set_id))) if
175 | v.endswith("annt.xml")}
176 | for vid, annot_frames in sorted(annotated_frames.items()):
177 | _frames = []
178 | path_to_file = join(self._annotation_path, set_id, vid + '_annt.xml')
179 | tree = ET.parse(path_to_file)
180 | tracks = tree.findall('./track')
181 | for t in tracks:
182 | if t.get('label') != 'pedestrian':
183 | continue
184 | boxes = t.findall('./box')
185 | for b in boxes:
186 | # Exclude the annotations that are outside of the frame
187 | if int(b.get('outside')) == 1:
188 | continue
189 | _frames.append(int(b.get('frame')))
190 | _frames = sorted(list(set(_frames)))
191 | annot_frames.append(len(_frames))
192 | annot_frames.extend(_frames)
193 |
194 | with open(annotated_frames_file, 'wt') as fid:
195 | for vid, annot_frames in sorted(annotated_frames.items()):
196 | fid.write(vid)
197 | for fr in annot_frames:
198 | fid.write("," + str(fr))
199 | fid.write('\n')
200 |
201 | return annotated_frames
202 |
203 | def get_frame_numbers(self, set_id):
204 | """
205 | Generates and returns a dictionary of videos and frames for each video in the give set
206 | :param set_id: Set to generate annotated frames
207 | :return: A dictionary of form
208 | {: [,,... ]}
209 | """
210 | print("Generating frame numbers for", set_id)
211 | frame_ids = {v.split('_annt.xml')[0]: [] for v in sorted(listdir(join(self._annotation_path,
212 | set_id))) if
213 | v.endswith("annt.xml")}
214 | for vid, frames in sorted(frame_ids.items()):
215 | path_to_file = join(self._annotation_path, set_id, vid + '_annt.xml')
216 | tree = ET.parse(path_to_file)
217 | num_frames = int(tree.find("./meta/task/size").text)
218 | frames.extend([i for i in range(num_frames)])
219 | frames.insert(0, num_frames)
220 | return frame_ids
221 |
222 | def extract_and_save_images(self, extract_frame_type='annotated'):
223 | """
224 | Extracts images from clips and saves on hard drive
225 | :param extract_frame_type: Whether to extract 'all' frames or only the ones that are 'annotated'
226 | Note: extracting 'all' frames requires approx. 3TB space whereas
227 | 'annotated' requires approx. 1TB
228 | """
229 | set_folders = [f for f in sorted(listdir(self._clips_path))]
230 | for set_id in set_folders:
231 | print('Extracting frames from', set_id)
232 | set_folder_path = join(self._clips_path, set_id)
233 | if extract_frame_type == 'annotated':
234 | extract_frames = self.get_annotated_frame_numbers(set_id)
235 | else:
236 | extract_frames = self.get_frame_numbers(set_id)
237 |
238 | set_images_path = join(self._pie_path, "images", set_id)
239 | for vid, frames in sorted(extract_frames.items()):
240 | print(vid)
241 | video_images_path = join(set_images_path, vid)
242 | num_frames = frames[0]
243 | frames_list = frames[1:]
244 | if not isdir(video_images_path):
245 | makedirs(video_images_path)
246 | vidcap = cv2.VideoCapture(join(set_folder_path, vid + '.mp4'))
247 | success, image = vidcap.read()
248 | frame_num = 0
249 | img_count = 0
250 | if not success:
251 | print('Failed to open the video {}'.format(vid))
252 | while success:
253 | if frame_num in frames_list:
254 | self.update_progress(img_count / num_frames)
255 | img_count += 1
256 | if not isfile(join(video_images_path, "%05.f.png") % frame_num):
257 | cv2.imwrite(join(video_images_path, "%05.f.png") % frame_num, image)
258 | success, image = vidcap.read()
259 | frame_num += 1
260 | if num_frames != img_count:
261 | print('num images don\'t match {}/{}'.format(num_frames, img_count))
262 | print('\n')
263 |
264 | # Annotation processing helpers
265 | def _map_text_to_scalar(self, label_type, value):
266 | """
267 | Maps a text label in XML file to scalars
268 | :param label_type: The label type
269 | :param value: The text to be mapped
270 | :return: The scalar value
271 | """
272 | map_dic = {'occlusion': {'none': 0, 'part': 1, 'full': 2},
273 | 'action': {'standing': 0, 'walking': 1},
274 | 'look': {'not-looking': 0, 'looking': 1},
275 | 'gesture': {'__undefined__': 0, 'hand_ack': 1, 'hand_yield': 2,
276 | 'hand_rightofway': 3, 'nod': 4, 'other': 5},
277 | 'cross': {'not-crossing': 0, 'crossing': 1, 'crossing-irrelevant': -1},
278 | 'crossing': {'not-crossing': 0, 'crossing': 1, 'irrelevant': -1},
279 | 'age': {'child': 0, 'young': 1, 'adult': 2, 'senior': 3},
280 | 'designated': {'ND': 0, 'D': 1},
281 | 'gender': {'n/a': 0, 'female': 1, 'male': 2},
282 | 'intersection': {'midblock': 0, 'T': 1, 'T-left': 2, 'T-right': 3, 'four-way': 4},
283 | 'motion_direction': {'n/a': 0, 'LAT': 1, 'LONG': 2},
284 | 'traffic_direction': {'OW': 0, 'TW': 1},
285 | 'signalized': {'n/a': 0, 'C': 1, 'S': 2, 'CS': 3},
286 | 'vehicle': {'car': 0, 'truck': 1, 'bus': 2, 'train': 3, 'bicycle': 4, 'bike': 5},
287 | 'sign': {'ped_blue': 0, 'ped_yellow': 1, 'ped_white': 2, 'ped_text': 3, 'stop_sign': 4,
288 | 'bus_stop': 5, 'train_stop': 6, 'construction': 7, 'other': 8},
289 | 'traffic_light': {'regular': 0, 'transit': 1, 'pedestrian': 2},
290 | 'state': {'__undefined__': 0, 'red': 1, 'yellow': 2, 'green': 3}}
291 |
292 | return map_dic[label_type][value]
293 |
294 | def _map_scalar_to_text(self, label_type, value):
295 | """
296 | Maps a scalar value to a text label
297 | :param label_type: The label type
298 | :param value: The scalar to be mapped
299 | :return: The text label
300 | """
301 | map_dic = {'occlusion': {0: 'none', 1: 'part', 2: 'full'},
302 | 'action': {0: 'standing', 1: 'walking'},
303 | 'look': {0: 'not-looking', 1: 'looking'},
304 | 'hand_gesture': {0: '__undefined__', 1: 'hand_ack',
305 | 2: 'hand_yield', 3: 'hand_rightofway',
306 | 4: 'nod', 5: 'other'},
307 | 'cross': {0: 'not-crossing', 1: 'crossing', -1: 'crossing-irrelevant'},
308 | 'crossing': {0: 'not-crossing', 1: 'crossing', -1: 'irrelevant'},
309 | 'age': {0: 'child', 1: 'young', 2: 'adult', 3: 'senior'},
310 | 'designated': {0: 'ND', 1: 'D'},
311 | 'gender': {0: 'n/a', 1: 'female', 2: 'male'},
312 | 'intersection': {0: 'midblock', 1: 'T', 2: 'T-left', 3: 'T-right', 4: 'four-way'},
313 | 'motion_direction': {0: 'n/a', 1: 'LAT', 2: 'LONG'},
314 | 'traffic_direction': {0: 'OW', 1: 'TW'},
315 | 'signalized': {0: 'n/a', 1: 'C', 2: 'S', 3: 'CS'},
316 | 'vehicle': {0: 'car', 1: 'truck', 2: 'bus', 3: 'train', 4: 'bicycle', 5: 'bike'},
317 | 'sign': {0: 'ped_blue', 1: 'ped_yellow', 2: 'ped_white', 3: 'ped_text', 4: 'stop_sign',
318 | 5: 'bus_stop', 6: 'train_stop', 7: 'construction', 8: 'other'},
319 | 'traffic_light': {0: 'regular', 1: 'transit', 2: 'pedestrian'},
320 | 'state': {0: '__undefined__', 1: 'red', 2: 'yellow', 3: 'green'}}
321 |
322 | return map_dic[label_type][value]
323 |
324 | def _get_annotations(self, setid, vid):
325 | """
326 | Generates a dictionary of annotations by parsing the video XML file
327 | :param setid: The set id
328 | :param vid: The video id
329 | :return: A dictionary of annotations
330 | """
331 | path_to_file = join(self._annotation_path, setid, vid + '_annt.xml')
332 | print(path_to_file)
333 |
334 | tree = ET.parse(path_to_file)
335 | ped_annt = 'ped_annotations'
336 | traffic_annt = 'traffic_annotations'
337 |
338 | annotations = {}
339 | annotations['num_frames'] = int(tree.find("./meta/task/size").text)
340 | annotations['width'] = int(tree.find("./meta/task/original_size/width").text)
341 | annotations['height'] = int(tree.find("./meta/task/original_size/height").text)
342 | annotations[ped_annt] = {}
343 | annotations[traffic_annt] = {}
344 | tracks = tree.findall('./track')
345 | for t in tracks:
346 | boxes = t.findall('./box')
347 | obj_label = t.get('label')
348 | obj_id = boxes[0].find('./attribute[@name=\"id\"]').text
349 |
350 | if obj_label == 'pedestrian':
351 | annotations[ped_annt][obj_id] = {'frames': [], 'bbox': [], 'occlusion': []}
352 | annotations[ped_annt][obj_id]['behavior'] = {'gesture': [], 'look': [], 'action': [], 'cross': []}
353 | for b in boxes:
354 | # Exclude the annotations that are outside of the frame
355 | if int(b.get('outside')) == 1:
356 | continue
357 | annotations[ped_annt][obj_id]['bbox'].append(
358 | [float(b.get('xtl')), float(b.get('ytl')),
359 | float(b.get('xbr')), float(b.get('ybr'))])
360 | occ = self._map_text_to_scalar('occlusion', b.find('./attribute[@name=\"occlusion\"]').text)
361 | annotations[ped_annt][obj_id]['occlusion'].append(occ)
362 | annotations[ped_annt][obj_id]['frames'].append(int(b.get('frame')))
363 | for beh in annotations['ped_annotations'][obj_id]['behavior']:
364 | # Read behavior tags for each frame and add to the database
365 | annotations[ped_annt][obj_id]['behavior'][beh].append(
366 | self._map_text_to_scalar(beh, b.find('./attribute[@name=\"' + beh + '\"]').text))
367 |
368 | else:
369 | obj_type = boxes[0].find('./attribute[@name=\"type\"]')
370 | if obj_type is not None:
371 | obj_type = self._map_text_to_scalar(obj_label,
372 | boxes[0].find('./attribute[@name=\"type\"]').text)
373 |
374 | annotations[traffic_annt][obj_id] = {'frames': [], 'bbox': [], 'occlusion': [],
375 | 'obj_class': obj_label,
376 | 'obj_type': obj_type,
377 | 'state': []}
378 |
379 | for b in boxes:
380 | # Exclude the annotations that are outside of the frame
381 | if int(b.get('outside')) == 1:
382 | continue
383 | annotations[traffic_annt][obj_id]['bbox'].append(
384 | [float(b.get('xtl')), float(b.get('ytl')),
385 | float(b.get('xbr')), float(b.get('ybr'))])
386 | annotations[traffic_annt][obj_id]['occlusion'].append(int(b.get('occluded')))
387 | annotations[traffic_annt][obj_id]['frames'].append(int(b.get('frame')))
388 | if obj_label == 'traffic_light':
389 | annotations[traffic_annt][obj_id]['state'].append(self._map_text_to_scalar('state',
390 | b.find(
391 | './attribute[@name=\"state\"]').text))
392 | return annotations
393 |
394 | def _get_ped_attributes(self, setid, vid):
395 | """
396 | Generates a dictionary of attributes by parsing the video XML file
397 | :param setid: The set id
398 | :param vid: The video id
399 | :return: A dictionary of attributes
400 | """
401 | path_to_file = join(self._annotation_attributes_path, setid, vid + '_attributes.xml')
402 | tree = ET.parse(path_to_file)
403 |
404 | attributes = {}
405 | pedestrians = tree.findall("./pedestrian")
406 | for p in pedestrians:
407 | ped_id = p.get('id')
408 | attributes[ped_id] = {}
409 | for k, v in p.items():
410 | if 'id' in k:
411 | continue
412 | try:
413 | if k == 'intention_prob':
414 | attributes[ped_id][k] = float(v)
415 | else:
416 | attributes[ped_id][k] = int(v)
417 | except ValueError:
418 | attributes[ped_id][k] = self._map_text_to_scalar(k, v)
419 |
420 | return attributes
421 |
422 | def _get_vehicle_attributes(self, setid, vid):
423 | """
424 | Generates a dictionary of vehicle attributes by parsing the video XML file
425 | :param setid: The set id
426 | :param vid: The video id
427 | :return: A dictionary of vehicle attributes (obd sensor recording)
428 | """
429 | path_to_file = join(self._annotation_vehicle_path, setid, vid + '_obd.xml')
430 | tree = ET.parse(path_to_file)
431 |
432 | veh_attributes = {}
433 | frames = tree.findall("./frame")
434 |
435 | for f in frames:
436 | dict_vals = {k: float(v) for k, v in f.attrib.items() if k != 'id'}
437 | veh_attributes[int(f.get('id'))] = dict_vals
438 |
439 | return veh_attributes
440 |
441 | def generate_database(self):
442 | """
443 | Generates and saves a database of the pie dataset by integrating all annotations
444 | Dictionary structure:
445 | 'set_id'(str): {
446 | 'vid_id'(str): {
447 | 'num_frames': int
448 | 'width': int
449 | 'height': int
450 | 'traffic_annotations'(str): {
451 | 'obj_id'(str): {
452 | 'frames': list(int)
453 | 'occlusion': list(int)
454 | 'bbox': list([x1, y1, x2, y2]) (float)
455 | 'obj_class': str,
456 | 'obj_type': str, # only for traffic lights, vehicles, signs
457 | 'state': list(int) # only for traffic lights
458 | 'ped_annotations'(str): {
459 | 'ped_id'(str): {
460 | 'frames': list(int)
461 | 'occlusion': list(int)
462 | 'bbox': list([x1, y1, x2, y2]) (float)
463 | 'behavior'(str): {
464 | 'action': list(int)
465 | 'gesture': list(int)
466 | 'cross': list(int)
467 | 'look': list(int)
468 | 'attributes'(str): {
469 | 'age': int
470 | 'id': str
471 | 'num_lanes': int
472 | 'crossing': int
473 | 'gender': int
474 | 'crossing_point': int
475 | 'critical_point': int
476 | 'exp_start_point': int
477 | 'intersection': int
478 | 'designated': int
479 | 'signalized': int
480 | 'traffic_direction': int
481 | 'group_size': int
482 | 'motion_direction': int
483 | 'vehicle_annotations'(str){
484 | 'frame_id'(int){'longitude': float
485 | 'yaw': float
486 | 'pitch': float
487 | 'roll': float
488 | 'OBD_speed': float
489 | 'GPS_speed': float
490 | 'latitude': float
491 | 'longitude': float
492 | 'heading_angle': float
493 | 'accX': float
494 | 'accY': float
495 | 'accZ: float
496 | 'gyroX': float
497 | 'gyroY': float
498 | 'gyroZ': float
499 |
500 | :return: A database dictionary
501 | """
502 |
503 | print('---------------------------------------------------------')
504 | print("Generating database for pie")
505 |
506 | cache_file = join(self.cache_path, 'pie_database.pkl')
507 | if isfile(cache_file) and not self._regen_database:
508 | with open(cache_file, 'rb') as fid:
509 | try:
510 | database = pickle.load(fid)
511 | except:
512 | database = pickle.load(fid, encoding='bytes')
513 | print('pie annotations loaded from {}'.format(cache_file))
514 | return database
515 |
516 | # Path to the folder annotations
517 | set_ids = [f for f in sorted(listdir(self._annotation_path))]
518 |
519 | # Read the content of set folders
520 | database = {}
521 | for setid in set_ids:
522 | video_ids = [v.split('_annt.xml')[0] for v in sorted(listdir(join(self._annotation_path,
523 | setid))) if v.endswith("annt.xml")]
524 | database[setid] = {}
525 | for vid in video_ids:
526 | print('Getting annotations for %s, %s' % (setid, vid))
527 | database[setid][vid] = self._get_annotations(setid, vid)
528 | vid_attributes = self._get_ped_attributes(setid, vid)
529 | database[setid][vid]['vehicle_annotations'] = self._get_vehicle_attributes(setid, vid)
530 | for ped in database[setid][vid]['ped_annotations']:
531 | database[setid][vid]['ped_annotations'][ped]['attributes'] = vid_attributes[ped]
532 |
533 | with open(cache_file, 'wb') as fid:
534 | pickle.dump(database, fid, pickle.HIGHEST_PROTOCOL)
535 | print('The database is written to {}'.format(cache_file))
536 |
537 | return database
538 |
539 | def get_data_stats(self):
540 | """
541 | Generates statistics for the dataset
542 | """
543 | annotations = self.generate_database()
544 |
545 | set_count = len(annotations.keys())
546 |
547 | ped_count = 0
548 | ped_box_count = 0
549 | video_count = 0
550 | total_frames = 0
551 | age = {'child': 0, 'adult': 0, 'senior': 0}
552 | gender = {'male': 0, 'female': 0}
553 | signalized = {'n/a': 0, 'C': 0, 'S': 0, 'CS': 0}
554 | traffic_direction = {'OW': 0, 'TW': 0}
555 | intersection = {'midblock': 0, 'T': 0, 'T-right': 0, 'T-left': 0, 'four-way': 0}
556 | crossing = {'crossing': 0, 'not-crossing': 0, 'irrelevant': 0}
557 |
558 | traffic_obj_types = {'vehicle': {'car': 0, 'truck': 0, 'bus': 0, 'train': 0, 'bicycle': 0, 'bike': 0},
559 | 'sign': {'ped_blue': 0, 'ped_yellow': 0, 'ped_white': 0, 'ped_text': 0, 'stop_sign': 0,
560 | 'bus_stop': 0, 'train_stop': 0, 'construction': 0, 'other': 0},
561 | 'traffic_light': {'regular': 0, 'transit': 0, 'pedestrian': 0},
562 | 'crosswalk': 0,
563 | 'transit_station': 0}
564 | traffic_box_count = {'vehicle': 0, 'traffic_light': 0, 'sign': 0, 'crosswalk': 0, 'transit_station': 0}
565 | for sid, vids in annotations.items():
566 | video_count += len(vids)
567 | for vid, annots in vids.items():
568 | total_frames += annots['num_frames']
569 | for trf_ids, trf_annots in annots['traffic_annotations'].items():
570 | obj_class = trf_annots['obj_class']
571 | traffic_box_count[obj_class] += len(trf_annots['frames'])
572 | if obj_class in ['traffic_light', 'vehicle', 'sign']:
573 | obj_type = trf_annots['obj_type']
574 | traffic_obj_types[obj_class][self._map_scalar_to_text(obj_class, obj_type)] += 1
575 | else:
576 | traffic_obj_types[obj_class] += 1
577 | for ped_ids, ped_annots in annots['ped_annotations'].items():
578 | ped_count += 1
579 | ped_box_count += len(ped_annots['frames'])
580 | age[self._map_scalar_to_text('age', ped_annots['attributes']['age'])] += 1
581 | if self._map_scalar_to_text('crossing', ped_annots['attributes']['crossing']) == 'crossing':
582 | crossing[self._map_scalar_to_text('crossing', ped_annots['attributes']['crossing'])] += 1
583 | else:
584 | if ped_annots['attributes']['intention_prob'] > 0.5:
585 | crossing['not-crossing'] += 1
586 | else:
587 | crossing['irrelevant'] += 1
588 | intersection[
589 | self._map_scalar_to_text('intersection', ped_annots['attributes']['intersection'])] += 1
590 | traffic_direction[self._map_scalar_to_text('traffic_direction',
591 | ped_annots['attributes']['traffic_direction'])] += 1
592 | signalized[self._map_scalar_to_text('signalized', ped_annots['attributes']['signalized'])] += 1
593 | gender[self._map_scalar_to_text('gender', ped_annots['attributes']['gender'])] += 1
594 |
595 | print('---------------------------------------------------------')
596 | print("Number of sets: %d" % set_count)
597 | print("Number of videos: %d" % video_count)
598 | print("Number of annotated frames: %d" % total_frames)
599 | print("Number of pedestrians %d" % ped_count)
600 | print("age:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(age.items())))
601 | print("gender:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(gender.items())))
602 | print("signal:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(signalized.items())))
603 | print("traffic direction:\n",
604 | '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(traffic_direction.items())))
605 | print("crossing:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(crossing.items())))
606 | print("intersection:\n", '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(intersection.items())))
607 | print("Number of pedestrian bounding boxes: %d" % ped_box_count)
608 | print("Number of traffic objects")
609 | for trf_obj, values in sorted(traffic_obj_types.items()):
610 | if isinstance(values, dict):
611 | print(trf_obj + ':\n', '\n '.join('{}: {}'.format(k, v) for k, v in sorted(values.items())),
612 | '\n total: ', sum(values.values()))
613 | else:
614 | print(trf_obj + ': %d' % values)
615 | print("Number of pedestrian bounding boxes:\n",
616 | '\n '.join('{}: {}'.format(tag, cnt) for tag, cnt in sorted(traffic_box_count.items())),
617 | '\n total: ', sum(traffic_box_count.values()))
618 |
619 | def balance_samples_count(self, seq_data, label_type, random_seed=42):
620 | """
621 | Balances the number of positive and negative samples by randomly sampling
622 | from the more represented samples. Only works for binary classes.
623 | :param seq_data: The sequence data to be balanced.
624 | :param label_type: The lable type based on which the balancing takes place.
625 | The label values must be binary, i.e. only 0, 1.
626 | :param random_seed: The seed for random number generator.
627 | :return: Balanced data sequence.
628 | """
629 | for lbl in seq_data[label_type]:
630 | for i in lbl:
631 | if i[0] not in [0, 1]:
632 | raise Exception("The label values used for balancing must be"
633 | " either 0 or 1")
634 |
635 | # balances the number of positive and negative samples
636 | print('---------------------------------------------------------')
637 | print("Balancing the number of positive and negative intention samples")
638 |
639 | gt_labels = [gt[0] for gt in seq_data[label_type]]
640 | num_pos_samples = np.count_nonzero(np.array(gt_labels))
641 | num_neg_samples = len(gt_labels) - num_pos_samples
642 |
643 | new_seq_data = {}
644 | # finds the indices of the samples with larger quantity
645 | if num_neg_samples == num_pos_samples:
646 | print('Positive and negative samples are already balanced')
647 | return seq_data
648 | else:
649 | print('Unbalanced: \t Positive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples))
650 | if num_neg_samples > num_pos_samples:
651 | rm_index = np.where(np.array(gt_labels) == 0)[0]
652 | else:
653 | rm_index = np.where(np.array(gt_labels) == 1)[0]
654 |
655 | # Calculate the difference of sample counts
656 | dif_samples = abs(num_neg_samples - num_pos_samples)
657 | # shuffle the indices
658 | np.random.seed(random_seed)
659 | np.random.shuffle(rm_index)
660 | # reduce the number of indices to the difference
661 | rm_index = rm_index[0:dif_samples]
662 | # update the data
663 | for k in seq_data:
664 | seq_data_k = seq_data[k]
665 | if not isinstance(seq_data[k], list):
666 | new_seq_data[k] = seq_data[k]
667 | else:
668 | new_seq_data[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index]
669 |
670 | new_gt_labels = [gt[0] for gt in new_seq_data[label_type]]
671 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels))
672 | print('Balanced:\t Positive: %d \t Negative: %d\n'
673 | % (num_pos_samples, len(new_seq_data[label_type]) - num_pos_samples))
674 | return new_seq_data
675 |
676 | # Process pedestrian ids
677 | def _get_pedestrian_ids(self):
678 | """
679 | Returns all pedestrian ids
680 | :return: A list of pedestrian ids
681 | """
682 | annotations = self.generate_database()
683 | pids = []
684 | for sid in sorted(annotations):
685 | for vid in sorted(annotations[sid]):
686 | pids.extend(annotations[sid][vid]['ped_annotations'].keys())
687 | return pids
688 |
689 | def _get_random_pedestrian_ids(self, image_set, ratios=None, val_data=True, regen_data=False):
690 | """
691 | Generates and saves a random pedestrian ids
692 | :param image_set: The data split to return
693 | :param ratios: The ratios to split the data. There should be 2 ratios (or 3 if val_data is true)
694 | and they should sum to 1. e.g. [0.4, 0.6], [0.3, 0.5, 0.2]
695 | :param val_data: Whether to generate validation data
696 | :param regen_data: Whether to overwrite the existing data, i.e. regenerate splits
697 | :return: The random sample split
698 | """
699 |
700 | assert image_set in ['train', 'test', 'val']
701 | # Generates a list of behavioral xml file names for videos
702 | cache_file = join(self.cache_path, "random_samples.pkl")
703 | if isfile(cache_file) and not regen_data:
704 | print("Random sample currently exists.\n Loading from %s" % cache_file)
705 | with open(cache_file, 'rb') as fid:
706 | try:
707 | rand_samples = pickle.load(fid)
708 | except:
709 | rand_samples = pickle.load(fid, encoding='bytes')
710 | assert image_set in rand_samples, "%s does not exist in random samples\n" \
711 | "Please try again by setting regen_data = True" % image_set
712 | if val_data:
713 | assert len(rand_samples['ratios']) == 3, "The existing random samples " \
714 | "does not have validation data.\n" \
715 | "Please try again by setting regen_data = True"
716 | if ratios is not None:
717 | assert ratios == rand_samples['ratios'], "Specified ratios {} does not match the ones in existing file {}.\n\
718 | Perform one of the following options:\
719 | 1- Set ratios to None\
720 | 2- Set ratios to the same values \
721 | 3- Regenerate data".format(ratios, rand_samples['ratios'])
722 |
723 | print('The ratios are {}'.format(rand_samples['ratios']))
724 | print("Number of %s tracks %d" % (image_set, len(rand_samples[image_set])))
725 | return rand_samples[image_set]
726 |
727 | if ratios is None:
728 | if val_data:
729 | ratios = [0.5, 0.4, 0.1]
730 | else:
731 | ratios = [0.5, 0.5]
732 |
733 | assert sum(ratios) > 0.999999, "Ratios {} do not sum to 1".format(ratios)
734 | if val_data:
735 | assert len(ratios) == 3, "To generate validation data three ratios should be selected"
736 | else:
737 | assert len(ratios) == 2, "With no validation only two ratios should be selected"
738 |
739 | print("################ Generating Random training/testing data ################")
740 | ped_ids = self._get_pedestrian_ids()
741 | print("Toral number of tracks %d" % len(ped_ids))
742 | print('The ratios are {}'.format(ratios))
743 | sample_split = {'ratios': ratios}
744 | train_samples, test_samples = train_test_split(ped_ids, train_size=ratios[0])
745 | print("Number of train tracks %d" % len(train_samples))
746 |
747 | if val_data:
748 | test_samples, val_samples = train_test_split(test_samples, train_size=ratios[1] / sum(ratios[1:]))
749 | print("Number of val tracks %d" % len(val_samples))
750 | sample_split['val'] = val_samples
751 |
752 | print("Number of test tracks %d" % len(test_samples))
753 | sample_split['train'] = train_samples
754 | sample_split['test'] = test_samples
755 |
756 | cache_file = join(self.cache_path, "random_samples.pkl")
757 | with open(cache_file, 'wb') as fid:
758 | pickle.dump(sample_split, fid, pickle.HIGHEST_PROTOCOL)
759 | print('pie {} samples written to {}'.format('random', cache_file))
760 | return sample_split[image_set]
761 |
762 | def _get_kfold_pedestrian_ids(self, image_set, num_folds=5, fold=1):
763 | """
764 | Generates kfold pedestrian ids
765 | :param image_set: Image set split
766 | :param num_folds: Number of folds
767 | :param fold: The given fold
768 | :return: List of pedestrian ids for the given fold
769 | """
770 | assert image_set in ['train', 'test'], "Image set should be either \"train\" or \"test\""
771 | assert fold <= num_folds, "Fold number should be smaller than number of folds"
772 | print("################ Generating %d fold data ################" % num_folds)
773 | cache_file = join(self.cache_path, "%d_fold_samples.pkl" % num_folds)
774 |
775 | if isfile(cache_file):
776 | print("Loading %d-fold data from %s" % (num_folds, cache_file))
777 | with open(cache_file, 'rb') as fid:
778 | try:
779 | fold_idx = pickle.load(fid)
780 | except:
781 | fold_idx = pickle.load(fid, encoding='bytes')
782 | else:
783 | ped_ids = self._get_pedestrian_ids()
784 | kf = KFold(n_splits=num_folds, shuffle=True)
785 | fold_idx = {'pid': ped_ids}
786 | count = 1
787 | for train_index, test_index in kf.split(ped_ids):
788 | fold_idx[count] = {'train': train_index.tolist(), 'test': test_index.tolist()}
789 | count += 1
790 | with open(cache_file, 'wb') as fid:
791 | pickle.dump(fold_idx, fid, pickle.HIGHEST_PROTOCOL)
792 | print('pie {}-fold samples written to {}'.format(num_folds, cache_file))
793 | print("Number of %s tracks %d" % (image_set, len(fold_idx[fold][image_set])))
794 | kfold_ids = [fold_idx['pid'][i] for i in range(len(fold_idx['pid'])) if i in fold_idx[fold][image_set]]
795 | return kfold_ids
796 |
797 | # Trajectory data generation
798 | def _get_data_ids(self, image_set, params):
799 | """
800 | Generates set ids and ped ids (if needed) for processing
801 | :param image_set: Image-set to generate data
802 | :param params: Data generation params
803 | :return: Set and pedestrian ids
804 | """
805 | _pids = None
806 | if params['data_split_type'] == 'default':
807 | set_ids = self._get_image_set_ids(image_set)
808 | else:
809 | set_ids = self._get_image_set_ids('all')
810 | if params['data_split_type'] == 'random':
811 | _pids = self._get_random_pedestrian_ids(image_set, **params['random_params'])
812 | elif params['data_split_type'] == 'kfold':
813 | _pids = self._get_kfold_pedestrian_ids(image_set, **params['kfold_params'])
814 |
815 | return set_ids, _pids
816 |
817 | def _squarify(self, bbox, ratio, img_width):
818 | """
819 | Changes the ratio of bounding boxes to a fixed ratio
820 | :param bbox: Bounding box
821 | :param ratio: Ratio to be changed to
822 | :param img_width: Image width
823 | :return: Squarified boduning box
824 | """
825 | width = abs(bbox[0] - bbox[2])
826 | height = abs(bbox[1] - bbox[3])
827 | width_change = height * ratio - width
828 |
829 | bbox[0] = bbox[0] - width_change / 2
830 | bbox[2] = bbox[2] + width_change / 2
831 |
832 | if bbox[0] < 0:
833 | bbox[0] = 0
834 |
835 | # check whether the new bounding box goes beyond image boarders
836 | # If this is the case, the bounding box is shifted back
837 | if bbox[2] > img_width:
838 | bbox[0] = bbox[0] - bbox[2] + img_width
839 | bbox[2] = img_width
840 | return bbox
841 |
842 | def _height_check(self, height_rng, frame_ids, boxes, images, occlusion):
843 | """
844 | Checks whether the bounding boxes are within a given height limit. If not, it
845 | will adjust the length of bounding boxes in data sequences accordingly
846 | :param height_rng: Height limit [lower, higher]
847 | :param frame_ids: List of frame ids
848 | :param boxes: List of bounding boxes
849 | :param images: List of images
850 | :param occlusion: List of occlusions
851 | :return: The adjusted data sequences
852 | """
853 | imgs, box, frames, occ = [], [], [], []
854 | for i, b in enumerate(boxes):
855 | bbox_height = abs(b[1] - b[3])
856 | if height_rng[0] <= bbox_height <= height_rng[1]:
857 | box.append(b)
858 | imgs.append(images[i])
859 | frames.append(frame_ids[i])
860 | occ.append(occlusion[i])
861 | return imgs, box, frames, occ
862 |
863 | def _get_center(self, box):
864 | """
865 | Calculates the center coordinate of a bounding box
866 | :param box: Bounding box coordinates
867 | :return: The center coordinate
868 | """
869 | return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]
870 |
871 | def generate_data_trajectory_sequence(self, image_set, **opts):
872 | """
873 | Generates pedestrian tracks
874 | :param image_set: the split set to produce for. Options are train, test, val.
875 | :param opts:
876 | 'fstride': Frequency of sampling from the data.
877 | 'height_rng': The height range of pedestrians to use.
878 | 'squarify_ratio': The width/height ratio of bounding boxes. A value between (0,1]. 0 the original
879 | ratio is used.
880 | 'data_split_type': How to split the data. Options: 'default', predefined sets, 'random', randomly split the data,
881 | and 'kfold', k-fold data split (NOTE: only train/test splits).
882 | 'seq_type': Sequence type to generate. Options: 'trajectory', generates tracks, 'crossing', generates
883 | tracks up to 'crossing_point', 'intention' generates tracks similar to human experiments
884 | 'min_track_size': Min track length allowable.
885 | 'random_params: Parameters for random data split generation. (see _get_random_pedestrian_ids)
886 | 'kfold_params: Parameters for kfold split generation. (see _get_kfold_pedestrian_ids)
887 | :return: Sequence data
888 | """
889 | params = {'fstride': 1,
890 | 'sample_type': 'all', # 'beh'
891 | 'height_rng': [0, float('inf')],
892 | 'squarify_ratio': 0,
893 | 'data_split_type': 'default', # kfold, random, default
894 | 'seq_type': 'intention',
895 | 'min_track_size': 15,
896 | 'random_params': {'ratios': None,
897 | 'val_data': True,
898 | 'regen_data': False},
899 | 'kfold_params': {'num_folds': 5, 'fold': 1}}
900 |
901 | for i in opts.keys():
902 | params[i] = opts[i]
903 |
904 | print('---------------------------------------------------------')
905 | print("Generating trajectory sequence data")
906 | self._print_dict(params)
907 | annot_database = self.generate_database()
908 | if params['seq_type'] == 'trajectory':
909 | sequence_data = self._get_trajectories(image_set, annot_database, **params)
910 | elif params['seq_type'] == 'crossing':
911 | sequence_data = self._get_crossing(image_set, annot_database, **params)
912 | elif params['seq_type'] == 'intention':
913 | sequence_data = self._get_intention(image_set, annot_database, **params)
914 |
915 | return sequence_data
916 |
917 | def _get_trajectories(self, image_set, annotations, **params):
918 | """
919 | Generates trajectory data.
920 | :param image_set: Data split to use
921 | :param annotations: Annotations database
922 | :param params: Parameters to generate data (see generade_database)
923 | :return: A dictionary of trajectories
924 | """
925 | print('---------------------------------------------------------')
926 | print("Generating trajectory data")
927 |
928 | num_pedestrians = 0
929 | seq_stride = params['fstride']
930 | sq_ratio = params['squarify_ratio']
931 | height_rng = params['height_rng']
932 |
933 | image_seq, pids_seq = [], []
934 | box_seq, center_seq, occ_seq = [], [], []
935 | intent_seq = []
936 | obds_seq, gpss_seq, head_ang_seq, gpsc_seq, yrp_seq = [], [], [], [], []
937 |
938 | set_ids, _pids = self._get_data_ids(image_set, params)
939 |
940 | for sid in set_ids:
941 | for vid in sorted(annotations[sid]):
942 | img_width = annotations[sid][vid]['width']
943 | pid_annots = annotations[sid][vid]['ped_annotations']
944 | vid_annots = annotations[sid][vid]['vehicle_annotations']
945 | for pid in sorted(pid_annots):
946 | if params['data_split_type'] != 'default' and pid not in _pids:
947 | continue
948 | num_pedestrians += 1
949 | frame_ids = pid_annots[pid]['frames']
950 | boxes = pid_annots[pid]['bbox']
951 | images = [self._get_image_path(sid, vid, f) for f in frame_ids]
952 | occlusions = pid_annots[pid]['occlusion']
953 |
954 | if height_rng[0] > 0 or height_rng[1] < float('inf'):
955 | images, boxes, frame_ids, occlusions = self._height_check(height_rng,
956 | frame_ids, boxes,
957 | images, occlusions)
958 |
959 | if len(boxes) / seq_stride < params['min_track_size']: # max_obs_size: #90 + 45
960 | continue
961 |
962 | if sq_ratio:
963 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
964 |
965 | image_seq.append(images[::seq_stride])
966 | box_seq.append(boxes[::seq_stride])
967 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride])
968 | occ_seq.append(occlusions[::seq_stride])
969 |
970 | ped_ids = [[pid]] * len(boxes)
971 | pids_seq.append(ped_ids[::seq_stride])
972 |
973 | intent = [[pid_annots[pid]['attributes']['intention_prob']]] * len(boxes)
974 | intent_seq.append(intent[::seq_stride])
975 |
976 | gpsc_seq.append([(vid_annots[i]['latitude'], vid_annots[i]['longitude'])
977 | for i in frame_ids][::seq_stride])
978 | obds_seq.append([[vid_annots[i]['OBD_speed']] for i in frame_ids][::seq_stride])
979 | gpss_seq.append([[vid_annots[i]['GPS_speed']] for i in frame_ids][::seq_stride])
980 | head_ang_seq.append([[vid_annots[i]['heading_angle']] for i in frame_ids][::seq_stride])
981 | yrp_seq.append([(vid_annots[i]['yaw'], vid_annots[i]['roll'], vid_annots[i]['pitch'])
982 | for i in frame_ids][::seq_stride])
983 |
984 | print('Subset: %s' % image_set)
985 | print('Number of pedestrians: %d ' % num_pedestrians)
986 | print('Total number of samples: %d ' % len(image_seq))
987 |
988 | return {'image': image_seq,
989 | 'pid': pids_seq,
990 | 'bbox': box_seq,
991 | 'center': center_seq,
992 | 'occlusion': occ_seq,
993 | 'obd_speed': obds_seq,
994 | 'gps_speed': gpss_seq,
995 | 'heading_angle': head_ang_seq,
996 | 'gps_coord': gpsc_seq,
997 | 'yrp': yrp_seq,
998 | 'intention_prob': intent_seq}
999 |
1000 | def _get_crossing(self, image_set, annotations, **params):
1001 | """
1002 | Generates crossing data.
1003 | :param image_set: Data split to use
1004 | :param annotations: Annotations database
1005 | :param params: Parameters to generate data (see generade_database)
1006 | :return: A dictionary of trajectories
1007 | """
1008 |
1009 | print('---------------------------------------------------------')
1010 | print("Generating crossing data")
1011 |
1012 | num_pedestrians = 0
1013 | seq_stride = params['fstride']
1014 | sq_ratio = params['squarify_ratio']
1015 | height_rng = params['height_rng']
1016 |
1017 | image_seq, pids_seq = [], []
1018 | box_seq, center_seq, occ_seq = [], [], []
1019 | intent_seq = []
1020 | obds_seq, gpss_seq, head_ang_seq, gpsc_seq, yrp_seq = [], [], [], [], []
1021 | cross_points = []
1022 | activities = []
1023 |
1024 | set_ids, _pids = self._get_data_ids(image_set, params)
1025 |
1026 | for sid in set_ids:
1027 | for vid in sorted(annotations[sid]):
1028 | img_width = annotations[sid][vid]['width']
1029 | pid_annots = annotations[sid][vid]['ped_annotations']
1030 | vid_annots = annotations[sid][vid]['vehicle_annotations']
1031 | for pid in sorted(pid_annots):
1032 | if params['data_split_type'] != 'default' and pid not in _pids:
1033 | continue
1034 | num_pedestrians += 1
1035 |
1036 | frame_ids = pid_annots[pid]['frames']
1037 | event_frame = pid_annots[pid]['attributes']['crossing_point']
1038 |
1039 | end_idx = frame_ids.index(event_frame)
1040 | boxes = pid_annots[pid]['bbox'][:end_idx + 1]
1041 | frame_ids = frame_ids[: end_idx + 1]
1042 | images = [self._get_image_path(sid, vid, f) for f in frame_ids]
1043 | occlusions = pid_annots[pid]['occlusion'][:end_idx + 1]
1044 |
1045 | if height_rng[0] > 0 or height_rng[1] < float('inf'):
1046 | images, boxes, frame_ids, occlusions = self._height_check(height_rng,
1047 | frame_ids, boxes,
1048 | images, occlusions)
1049 |
1050 | if len(boxes) / seq_stride < params['min_track_size']:
1051 | continue
1052 |
1053 | if sq_ratio:
1054 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
1055 |
1056 | image_seq.append(images[::seq_stride])
1057 | box_seq.append(boxes[::seq_stride])
1058 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride])
1059 | occ_seq.append(occlusions[::seq_stride])
1060 |
1061 | ped_ids = [[pid]] * len(boxes)
1062 | pids_seq.append(ped_ids[::seq_stride])
1063 |
1064 | intent = [[pid_annots[pid]['attributes']['intention_prob']]] * len(boxes)
1065 | intent_seq.append(intent[::seq_stride])
1066 |
1067 | cross_point = [[pid_annots[pid]['bbox'][end_idx]]] * len(boxes)
1068 | cross_points.append(cross_point[::seq_stride])
1069 |
1070 | acts = [[int(pid_annots[pid]['attributes']['crossing'] > 0)]] * len(boxes)
1071 | activities.append(acts[::seq_stride])
1072 |
1073 | gpsc_seq.append([[(vid_annots[i]['latitude'], vid_annots[i]['longitude'])]
1074 | for i in frame_ids][::seq_stride])
1075 | obds_seq.append([[vid_annots[i]['OBD_speed']] for i in frame_ids][::seq_stride])
1076 | gpss_seq.append([[vid_annots[i]['GPS_speed']] for i in frame_ids][::seq_stride])
1077 | head_ang_seq.append([[vid_annots[i]['heading_angle']] for i in frame_ids][::seq_stride])
1078 | yrp_seq.append([[(vid_annots[i]['yaw'], vid_annots[i]['roll'], vid_annots[i]['pitch'])]
1079 | for i in frame_ids][::seq_stride])
1080 |
1081 | print('Subset: %s' % image_set)
1082 | print('Number of pedestrians: %d ' % num_pedestrians)
1083 | print('Total number of samples: %d ' % len(image_seq))
1084 |
1085 | return {'image': image_seq,
1086 | 'pid': pids_seq,
1087 | 'bbox': box_seq,
1088 | 'center': center_seq,
1089 | 'occlusion': occ_seq,
1090 | 'obd_speed': obds_seq,
1091 | 'gps_speed': gpss_seq,
1092 | 'heading_angle': head_ang_seq,
1093 | 'gps_coord': gpsc_seq,
1094 | 'cross_point': cross_points,
1095 | 'yrp': yrp_seq,
1096 | 'intention_prob': intent_seq,
1097 | 'activities': activities,
1098 | 'image_dimension': self._get_dim()}
1099 |
1100 | def _get_intention(self, image_set, annotations, **params):
1101 | """
1102 | Generates intention data.
1103 | :param image_set: Data split to use
1104 | :param annotations: Annotations database
1105 | :param params: Parameters to generate data (see generade_database)
1106 | :return: A dictionary of trajectories
1107 | """
1108 | print('---------------------------------------------------------')
1109 | print("Generating intention data")
1110 |
1111 | num_pedestrians = 0
1112 | seq_stride = params['fstride']
1113 | sq_ratio = params['squarify_ratio']
1114 | height_rng = params['height_rng']
1115 |
1116 | intention_prob, intention_binary = [], []
1117 | image_seq, pids_seq = [], []
1118 | box_seq, center_seq, occ_seq = [], [], []
1119 | set_ids, _pids = self._get_data_ids(image_set, params)
1120 |
1121 | for sid in set_ids:
1122 | for vid in sorted(annotations[sid]):
1123 | img_width = annotations[sid][vid]['width']
1124 | pid_annots = annotations[sid][vid]['ped_annotations']
1125 | for pid in sorted(pid_annots):
1126 | if params['data_split_type'] != 'default' and pid not in _pids:
1127 | continue
1128 | num_pedestrians += 1
1129 | exp_start_frame = pid_annots[pid]['attributes']['exp_start_point']
1130 | critical_frame = pid_annots[pid]['attributes']['critical_point']
1131 | frames = pid_annots[pid]['frames']
1132 |
1133 | start_idx = frames.index(exp_start_frame)
1134 | end_idx = frames.index(critical_frame)
1135 |
1136 | boxes = pid_annots[pid]['bbox'][start_idx:end_idx + 1]
1137 | frame_ids = frames[start_idx:end_idx + 1]
1138 | images = [self._get_image_path(sid, vid, f) for f in frame_ids]
1139 | occlusions = pid_annots[pid]['occlusion'][start_idx:end_idx + 1]
1140 |
1141 | if height_rng[0] > 0 or height_rng[1] < float('inf'):
1142 | images, boxes, frame_ids, occlusions = self._height_check(height_rng,
1143 | frame_ids, boxes,
1144 | images, occlusions)
1145 | if len(boxes) / seq_stride < params['min_track_size']:
1146 | continue
1147 |
1148 | if sq_ratio:
1149 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
1150 |
1151 | int_prob = [[pid_annots[pid]['attributes']['intention_prob']]] * len(boxes)
1152 | int_bin = [[int(pid_annots[pid]['attributes']['intention_prob'] > 0.5)]] * len(boxes)
1153 |
1154 | image_seq.append(images[::seq_stride])
1155 | box_seq.append(boxes[::seq_stride])
1156 | occ_seq.append(occlusions[::seq_stride])
1157 |
1158 | intention_prob.append(int_prob[::seq_stride])
1159 | intention_binary.append(int_bin[::seq_stride])
1160 |
1161 | ped_ids = [[pid]] * len(boxes)
1162 | pids_seq.append(ped_ids[::seq_stride])
1163 |
1164 | print('Subset: %s' % image_set)
1165 | print('Number of pedestrians: %d ' % num_pedestrians)
1166 | print('Total number of samples: %d ' % len(image_seq))
1167 |
1168 | return {'image': image_seq,
1169 | 'bbox': box_seq,
1170 | 'occlusion': occ_seq,
1171 | 'intention_prob': intention_prob,
1172 | 'intention_binary': intention_binary,
1173 | 'ped_id': pids_seq}
1174 |
--------------------------------------------------------------------------------
/utils/jaad_data.py:
--------------------------------------------------------------------------------
1 | '''
2 | imitate to rewrite jaad_data.py file.
3 | '''
4 | import sys
5 | import pickle
6 | import cv2
7 |
8 | import numpy as np
9 | import xml.etree.ElementTree as ET
10 |
11 | from os.path import join, abspath, exists
12 | from os import listdir, makedirs
13 | from sklearn.model_selection import train_test_split, KFold
14 |
15 | class JAAD(object):
16 | def __init__(self, data_path='', regen_pkl=False):
17 | '''param:
18 | data_path: Path to the folder of the dataset, default is current dir.
19 | regen_pkl: Whether to regenerate the database.
20 | '''
21 | self._name = 'JAAD'
22 | self._regen_pkl = regen_pkl
23 | self._image_ext = '.png'
24 |
25 | #Paths
26 | self._jaad_path = data_path if data_path else self._get_default_path()
27 | assert exists(self._jaad_path), 'Jaad path does not exist: {}'.format(self._jaad_path)
28 | self._data_split_ids_path = join(self._jaad_path, 'split_ids')
29 | self._annotation_path = join(self._jaad_path, 'annotations')
30 | self._annotation_vehicle_path = join(self._jaad_path, 'annotations_vehicle')
31 | self._annotation_traffic_path = join(self._jaad_path, 'annotations_traffic')
32 | self._annotation_attributes_path = join(self._jaad_path, 'annotations_attributes')
33 | self._annotation_appearance_path = join(self._jaad_path, 'annotations_appearance')
34 | self._clips_path = join(self._jaad_path, 'JAAD_clips')
35 | self._images_path = join(self._jaad_path, 'images')
36 |
37 | @property
38 | def cache_path(self):
39 | '''
40 | generate a path to save cache files.
41 | :return: Cache file folder path
42 | '''
43 | cache_path = abspath(join(self._jaad_path, 'data_cache'))
44 | if not exists(cache_path):
45 | makedirs(cache_path)
46 | return cache_path
47 |
48 |
49 | def _get_default_path(self):
50 | '''
51 | return default data_path where jaad_raw files are expected to be palced.
52 | '''
53 | return 'dataset/jaad'
54 |
55 | def _get_video_ids(self):
56 | '''
57 | return a list of all video ids
58 | :return: the list of all video ids
59 | '''
60 | return [vid.split('.')[0] for vid in listdir(self._annotation_path)]
61 |
62 |
63 | def update_progress(self, progress):
64 | '''
65 | create a progress bar.
66 | :param progress: the progress thus far
67 | '''
68 | barLength = 20
69 | status = ''
70 | if isinstance(progress, int):
71 | progress = float(progress)
72 | block = int(round(barLength * progress))
73 | text = '\r[{}] {:0.2f}% {}'.format('#' * block + '-' * (barLength - block), progress * 100, status)
74 | sys.stdout.write(text)
75 | sys.stdout.flush()
76 |
77 |
78 | def extract_and_save_images(self):
79 | '''
80 | Extract images from clips and save on drive
81 | '''
82 | videos = [f.split('.')[0] for f in sorted(listdir(self._clips_path))]
83 | #eg: clip_path: JAAD_clips/---->video_0001.mp4
84 | # |-->video_0002.mp4
85 | # |-->video_0003.mp4
86 | # |-->video_0004.mp4
87 | # --->video_0005.mp4
88 | #get videos = [video_0001, video_0002, video_0003, video_0004, video_0005]
89 | for vid in videos:
90 | path_to_file = join(self._annotation_path, vid + '.xml')
91 | #path: annotations/vidXXX.xml
92 | print(vid)
93 | tree = ET.parse(path_to_file)#load element tree.
94 | num_frames = int(tree.find('./meta/task/size').text)
95 | video_clip_path = join(self._clips_path, vid + '.mp4')
96 | #path: JAAD_clips/vidXXX.mp4
97 | save_images_path = join(self._images_path, vid)
98 | #path: images/vidXXX
99 | if not exists(save_images_path):
100 | makedirs(save_images_path)
101 | vidcap = cv2.VideoCapture(video_clip_path)
102 | success, image = vidcap.read()
103 | frame_num = 0
104 | img_count = 0
105 | if not success:
106 | print('Failed to open the video {}'.format(vid))
107 | while success:
108 | self.update_progress(img_count / num_frames)
109 | img_count += 1
110 | img_path = join(save_images_path, '{:05d}.png'.format(frame_num))
111 | if not exists(img_path):
112 | cv2.imwrite(img_path, image)
113 | success, image = vidcap.read()
114 | frame_num += 1
115 | if num_frames != img_count:
116 | print('num images don\'t match {}/{}'.format(num_frames, img_count))
117 | print('\n')
118 |
119 |
120 | def _map_text_to_scalar(self, label_type, value):
121 | '''
122 | maps a text label in XML file to scalars
123 | :param: label type: the label type
124 | :param value: the scalar value
125 | '''
126 | map_dic = {'occlusion': {'none': 0, 'part': 1, 'full': 2},
127 | 'action': {'standing': 0, 'walking': 1},
128 | 'nod': {'__undefined__': 0, 'nodding': 1},
129 | 'look': {'not-looking': 0, 'looking': 1},
130 | 'hand_gesture': {'__undefined__': 0, 'greet': 1, 'yield': 2,
131 | 'rightofway': 3, 'other': 4},
132 | 'reaction': {'__undefined__': 0, 'clear_path': 1, 'speed_up': 2,
133 | 'slow_down': 3},
134 | 'cross': {'not-crossing': 0, 'crossing': 1, 'irrelevant': -1},
135 | 'age': {'child': 0, 'young': 1, 'adult': 2, 'senior': 3},
136 | 'designated': {'ND': 0, 'D': 1},
137 | 'gender': {'n/a': 0, 'female': 1, 'male': 2},
138 | 'intersection': {'no': 0, 'yes': 1},
139 | 'motion_direction': {'n/a': 0, 'LAT': 1, 'LONG': 2},
140 | 'traffic_direction': {'OW': 0, 'TW': 1},
141 | 'signalized': {'n/a': 0, 'NS': 1, 'S': 2},
142 | 'vehicle': {'stopped': 0, 'moving_slow': 1, 'moving_fast': 2,
143 | 'decelerating': 3, 'accelerating': 4},
144 | 'road_type': {'street': 0, 'parking_lot': 1, 'garage': 2},
145 | 'traffic_light': {'n/a': 0, 'red': 1, 'green': 2}}
146 | return map_dic[label_type][value]
147 |
148 | def _map_scalar_to_text(self, label_type, value):
149 | '''
150 | maps a scalar value to a text label
151 | :param label_type: the label type
152 | :param value: the scalar to be mapped
153 | :return: the text label
154 | '''
155 | map_dic = {'occlusion': {0: 'none', 1: 'part', 2: 'full'},
156 | 'action': {0: 'standing', 1: 'walking'},
157 | 'nod': {0: '__undefined__', 1: 'nodding'},
158 | 'look': {0: 'not-looking', 1: 'looking'},
159 | 'hand_gesture': {0: '__undefined__', 1: 'greet',
160 | 2: 'yield', 3: 'rightofway',
161 | 4: 'other'},
162 | 'reaction': {0: '__undefined__', 1: 'clear_path',
163 | 2: 'speed_up', 3: 'slow_down'},
164 | 'cross': {0: 'not-crossing', 1: 'crossing', -1: 'irrelevant'},
165 | 'age': {0: 'child', 1: 'young', 2: 'adult', 3: 'senior'},
166 | 'designated': {0: 'ND', 1: 'D'},
167 | 'gender': {0: 'n/a', 1: 'female', 2: 'male'},
168 | 'intersection': {0: 'no', 1: 'yes'},
169 | 'motion_direction': {0: 'n/a', 1: 'LAT', 2: 'LONG'},
170 | 'traffic_direction': {0: 'OW', 1: 'TW'},
171 | 'signalized': {0: 'n/a', 1: 'NS', 2: 'S'},
172 | 'vehicle': {0: 'stopped', 1: 'moving_slow', 2: 'moving_fast',
173 | 3: 'decelerating', 4: 'accelerating'},
174 | 'road_type': {0: 'street', 1: 'parking_lot', 2: 'garage'},
175 | 'traffic_light': {0: 'n/a', 1: 'red', 2: 'green'}}
176 | return map_dic[label_type][value]
177 |
178 |
179 | def _get_annotations(self, vid):
180 | '''
181 | Generates a dictionary of annotations by parsing the video XML file
182 | :param vid: the id of video to parse
183 | :return: a dictionary of annotations
184 | '''
185 | path_to_file = join(self._annotation_path, vid + '.xml')
186 | tree = ET.parse(path_to_file)
187 | ped_annt = 'ped_annotations'
188 | annotations = {}
189 | annotations['num_frames'] = int(tree.find('./meta/task/size').text)
190 | annotations['width'] = int(tree.find('./meta/task/original_size/width').text)
191 | annotations['height'] = int(tree.find('./meta/task/original_size/height').text)
192 | annotations[ped_annt] = {}
193 | ped_tracks = tree.findall('./track')
194 | for t in ped_tracks:
195 | boxes = t.findall('./box')
196 | new_id = boxes[0].find('./attribute[@name=\"id\"]').text
197 | old_id = boxes[0].find('./attribute[@name=\"old_id\"]').text
198 | annotations[ped_annt][new_id] = {'old_id': old_id, 'frames': [], 'bbox': [], 'occlusion': []}
199 | if 'pedestrian' in old_id:
200 | annotations['ped_annotations'][new_id]['behavior'] = {
201 | 'cross': [], 'reaction': [], 'hand_gesture': [], 'look': [], 'action': [], 'nod': []
202 | }
203 | else:
204 | annotations[ped_annt][new_id]['behavior'] = {}
205 | for b in boxes:
206 | annotations[ped_annt][new_id]['bbox'].append(
207 | [float(b.get('xtl')), float(b.get('ytl')),
208 | float(b.get('xbr')), float(b.get('ybr'))]
209 | )
210 | occ = self._map_text_to_scalar('occlusion',b.find('./attribute[@name=\"occlusion\"]').text)
211 | annotations[ped_annt][new_id]['occlusion'].append(occ)
212 | annotations[ped_annt][new_id]['frames'].append(int(b.get('frame')))
213 | for beh in annotations['ped_annotations'][new_id]['behavior'].keys():
214 | annotations[ped_annt][new_id]['behavior'][beh].append(self._map_text_to_scalar(beh, b.find('./attribute[@name=\"' + beh + '\"]').text))
215 | return annotations
216 |
217 | def _get_ped_attributes(self, vid):
218 | '''
219 | Generates a dictionary of attributes by parsing the video XML file.
220 | :param vid: the id of the video to parse
221 | :return: a dictionary of attributes.
222 | '''
223 | path_to_file = join(self._annotation_attributes_path, vid + '_attributes.xml')
224 | tree = ET.parse(path_to_file)
225 | attributes = {}
226 | pedestrians = tree.findall('./pedestrian')
227 | for p in pedestrians:
228 | new_id = p.get('id')
229 | old_id = p.get('old_id')
230 | attributes[new_id] = {'old_id': old_id}
231 | for k, v in p.items():
232 | if 'id' in k:
233 | continue
234 | try:
235 | attributes[new_id][k] = int(v)
236 | except ValueError:
237 | attributes[new_id][k] = self._map_text_to_scalar(k, v)
238 | return attributes
239 |
240 | def _get_ped_appearance(self, vid):
241 | '''
242 | Generates a dictionary of appearance annotations by parsing the video XML file.
243 | :param vid: the id of video to parse. The labels are follows:
244 | -pose_front, pose_back... - coarse pose of the pedestrian relative to the camera.
245 | -clothes_below_knee - long clothing.
246 | -clothes_upper_light, clothes_lower_dark... - coarse clothing color above/below waist.
247 | -backpack - presence of a backpack(wron on the back, not held in hand).
248 | -bag_hand, bag_elbow, bag_shoulder - whether bag(s) are held in a hand, on a bent elbow or worn on a shoulder.
249 | -bag_left_side, bag_right_side - whether bag(s) appear in the left/right side of the pedestrian body.
250 | -cap, hood - headwear.
251 | -umbrella, phone, baby, object - various things carried by the pedestrians.
252 | -stroller/cart - objects being pushed by the pedetrian.
253 | -bicycle/motorcycle - for pedestrians riding ot walking these vehicles.
254 | :return: A dictionary of appearance annotations.
255 | '''
256 | labels = ['pose_front', 'pose_back', 'pose_left', 'pose_right',
257 | 'clothes_below_knee', 'clothes_upper_light', 'clothes_upper_dark', 'clothes_lower_light',
258 | 'clothes_lower_dark', 'backpack', 'bag_hand', 'bag_elbow',
259 | 'bag_shoulder', 'bag_left_side', 'bag_right_side', 'cap',
260 | 'hood', 'sunglasses', 'umbrella', 'phone',
261 | 'baby', 'object', 'stroller_cart', 'bicycle_motorcycle']
262 | path_to_file = join(self._annotation_appearance_path, vid + '_appearance.xml')
263 | tree = ET.parse(path_to_file)
264 | annotations = {}
265 | ped_tracks = tree.findall('./track')
266 | for t in ped_tracks:
267 | boxes = t.findall('./box')
268 | new_id = t.get('id')
269 | annotations[new_id] = dict(zip(labels, [[] for _ in range(len(labels))]))
270 | annotations[new_id]['frames'] = []
271 | for b in boxes:
272 | annotations[new_id]['frames'].append(int(b.get('frame')))
273 | for l in labels:
274 | annotations[new_id][l].append(b.get(l))
275 | return annotations
276 |
277 | def _get_vehicle_attributes(self, vid):
278 | '''
279 | Generate a dictionary of vehicle attributes by parsing the video XML file.
280 | :param vid: the id of the video to parse.
281 | :return: a dictionary of vihecle attributes.
282 | '''
283 | path_to_file = join(self._annotation_vehicle_path, vid + '_vehicle.xml')
284 | tree = ET.parse(path_to_file)
285 | veh_attributes = {}
286 | frames = tree.findall('./frame')
287 | for f in frames:
288 | veh_attributes[int(f.get('id'))] = self._map_text_to_scalar('vehicle', f.get('action'))
289 | return veh_attributes
290 |
291 | def _get_traffic_attributes(self, vid):
292 | '''
293 | Generate a dictionary of vehicle attributes by parsing the video XML file.
294 | :param vid: the id of video to parse.
295 | :return: a dictionary of vehicle attributes.
296 | '''
297 | path_to_file= join(self._annotation_traffic_path, vid + '_traffic.xml')
298 | tree = ET.parse(path_to_file)
299 | road_type = tree.find('./road_type').text
300 | traffic_attributes = {'road_type': self._map_text_to_scalar('road_type', road_type)}
301 | frames = tree.findall('./frame')
302 | for f in frames:
303 | traffic_attributes[int(f.get('id'))] = {'ped_crossing': int(f.get('ped_crossing')),
304 | 'ped_sign': int(f.get('ped_sign')),
305 | 'stop_sign': int(f.get('stop_sign')),
306 | 'traffic_light': self._map_text_to_scalar('traffic_light',
307 | f.get('traffic_light'))}
308 | return traffic_attributes
309 |
310 | def generate_database(self):
311 | '''
312 | Generate a database of jaad database by integrating all annotations
313 | Dictionary structure
314 | 'vid_id'(str):{
315 | 'num_frames': int,
316 | 'width': int,
317 | 'height': int,
318 | 'ped_annotations'(str): {
319 | 'ped_id'(str):{
320 | 'old_id': str,
321 | 'frames': list(int),
322 | 'occlusion': list(int),
323 | 'bbox': list([x1, y1, x2, y2]),
324 | 'behavior'(str): {
325 | 'action': list(int),
326 | 'reaction': list(int),
327 | 'nod': list(int),
328 | 'hand_gesture': list(int),
329 | 'cross': list(int),
330 | 'look': list(int)
331 | }
332 | 'appearance'(str): {
333 | 'pose_front': list(int),
334 | 'pose_back': list(int),
335 | 'pose_left': list(int),
336 | 'pose_right': list(int),
337 | 'clothes_below_knee': list(int),
338 | 'clothes_upper_light': list(int),
339 | 'clothes_upper_dark': list(int),
340 | 'clothes_lower_light': list(int),
341 | 'clothes_lower_dark': list(int),
342 | 'backpack': list(int),
343 | 'bag_hand': list(int),
344 | 'bag_elbow': list(int),
345 | 'bag_shoulder': list(int),
346 | 'bag_left_side': list(int),
347 | 'bag_right_side': list(int),
348 | 'cap': list(int),
349 | 'hood': list(int),
350 | 'sunglasses': list(int),
351 | 'umbrella': list(int),
352 | 'phone': list(int),
353 | 'baby': list(int),
354 | 'object': list(int),
355 | 'stroller_cart': list(int),
356 | 'bicycle_motorcycle': list(int)
357 | }
358 | 'attributes'(str): {
359 | 'age': int,
360 | 'old_id': str,
361 | 'num_lanes': int,
362 | 'crossing': int,
363 | 'gender': int,
364 | 'crossing_point': int,
365 | 'decision_point': int,
366 | 'intersection': int,
367 | 'designated': int,
368 | 'signalized': int,
369 | 'traffic_direction': int,
370 | 'group_size': int,
371 | 'motion_direction': int
372 | }
373 | }
374 | }
375 | 'vehicle_annotations'(str): {
376 | frames(int): {
377 | 'action': int
378 | }
379 | }
380 | 'traffic_annotations'(str): {
381 | road_type: int,
382 | frames(int): {
383 | 'ped_crossing': int,
384 | 'ped_sign': int,
385 | 'stop_sign': int,
386 | 'traffic_light': int
387 | }
388 | }
389 | }
390 | :return: A database dictionary
391 | '''
392 | print('------------------------------------------------')
393 | print('Generating databse for jaad')
394 | #Generates a list of behavioral xml file names for videos
395 | cache_file = join(self.cache_path, 'jaad_database.pkl')
396 | if exists(cache_file) and not self._regen_pkl:
397 | with open(cache_file, 'rb') as fid:
398 | try:
399 | database = pickle.load(fid)
400 | except:
401 | database = pickle.load(fid, encoding='bytes')
402 | print('jaad database loaded from {}'.format(cache_file))
403 | return database
404 |
405 | video_ids = sorted(self._get_video_ids())
406 | database = {}
407 | for vid in video_ids:
408 | print('Getting annotations for %s' % vid)
409 | vid_annotations = self._get_annotations(vid)
410 | vid_attributes = self._get_ped_attributes(vid)
411 | vid_appearance = self._get_ped_appearance(vid)
412 | vid_veh_annotations = self._get_vehicle_attributes(vid)
413 | vid_traffic_annotations = self._get_traffic_attributes(vid)
414 | #combining all annotations.
415 | vid_annotations['vehicle_annotations'] = vid_veh_annotations
416 | vid_annotations['traffic_annotations'] = vid_traffic_annotations
417 | for ped in vid_annotations['ped_annotations']:
418 | try:
419 | vid_annotations['ped_annotations'][ped]['attributes'] = vid_attributes[ped]
420 | except KeyError:
421 | vid_annotations['ped_annotations'][ped]['attributes'] = {}
422 | try:
423 | vid_annotations['ped_annotations'][ped]['appearance'] = vid_appearance[ped]
424 | except KeyError:
425 | vid_annotations['ped_annotations'][ped]['appearance'] = {}
426 | database[vid] = vid_annotations
427 | with open(cache_file, 'wb') as fid:
428 | pickle.dump(database, fid, pickle.HIGHEST_PROTOCOL)
429 | print('The database is written to {}'.format(cache_file))
430 | return database
431 |
432 |
433 | def get_data_stats(self):
434 | '''
435 | Generate statistics for jaad database.
436 | '''
437 | annotations = self.generate_database()
438 | videos_count = len(annotations.keys())
439 | ped_box_beh_count = 0
440 | ped_beh_count = 0
441 | ped_count = 0
442 | ped_box_count = 0
443 | people_count = 0
444 | people_box_count = 0
445 | total_frames = 0
446 | for vid in annotations:
447 | total_frames += annotations[vid]['num_frames']
448 | for ped in annotations[vid]['ped_annotations']:
449 | if 'b' in ped:
450 | ped_beh_count += 1
451 | ped_box_beh_count += len(annotations[vid]['ped_annotations'][ped]['bbox'])
452 | elif 'p' in ped:
453 | people_count += 1
454 | people_box_count += len(annotations[vid]['ped_annotations'][ped]['bbox'])
455 | else:
456 | ped_count += 1
457 | ped_box_count += len(annotations[vid]['ped_annotations'][ped]['bbox'])
458 | print('---------------------------------------------------------')
459 | print('Number of videos: %d' % videos_count)
460 | print("Number of frames: %d" % total_frames)
461 | print("Number of pedestrians with behavior tag: %d" % ped_beh_count)
462 | print("Number of pedestrians with no behavior tag: %d" % ped_count)
463 | print("Number of people: %d" % people_count)
464 | print("Total number of pedestrians: %d" % (ped_count + ped_beh_count + people_count))
465 |
466 | print("Number of pedestrian bounding boxes with behavior tag: %d" % ped_box_beh_count)
467 | print("Number of pedestrian bounding boxes with no behavior tag: %d" % ped_box_count)
468 | print("Number of people bounding boxes: %d" % people_box_count)
469 | print("Total number of pedestrian bounding boxes: %d" % (ped_box_beh_count + ped_box_count))
470 |
471 |
472 | def balance_samples_count(self, seq_data, label_type, random_seed=42):
473 | '''
474 | balance the number of positive and negative samples by randomly sampleing
475 | from the more represented samples. Only works for binary classes.
476 | :param seq_data: the sequence data to be balanced.
477 | :param label_type: the label type based in which the balancing takes place.
478 | the label values must be binary, i.e. only 0, 1.
479 | :param random_seed: the seed for random number generator.
480 | :return: balanced data sequence.
481 | '''
482 | for lbl in seq_data[label_type]:
483 | for i in lbl:
484 | if i[0] not in [0, 1]:
485 | raise Exception('The label values used for balancing must be either 0 or 1')
486 | #balances the number of positive and negative samples.
487 | print('----------------------------------------------------------')
488 | print('Balancing the number of positive and negative intention samples.')
489 | gt_labels = [gt[0] for gt in seq_data[label_type]]
490 | num_pos_samples = np.count_nonzero(np.array(gt_labels))
491 | num_neg_samples = len(gt_labels) - num_pos_samples
492 | new_seq_data = {}
493 | #finds the indicies of the samples with larger quantity
494 | if num_neg_samples == num_pos_samples:
495 | print('Positive and negative are already balanced.')
496 | return seq_data
497 | else:
498 | print('Unbalanced: \t Positive: {} \t Negative: {}'.format(num_pos_samples, num_neg_samples))
499 | if num_neg_samples > num_pos_samples:
500 | rm_index = np.where(np.array(gt_labels) == 0)[0]
501 | else:
502 | rm_index = np.where(np.array(gt_labels) == 1)[0]
503 | #Calculate the difference of sample counts.
504 | dif_samples = abs(num_neg_samples - num_pos_samples)
505 | np.random.seed(random_seed)
506 | np.random.shuffle(rm_index)
507 | #reduce the number of indices to the difference.
508 | rm_index = rm_index[0:dif_samples]
509 | #update the data,
510 | for k in seq_data:
511 | seq_data_k = seq_data[k]
512 | if not isinstance(seq_data[k], list):
513 | new_seq_data[k] = seq_data[k]
514 | else:
515 | new_seq_data[k] = [seq_data_k[i] for i in range(0, len(seq_data_k)) if i not in rm_index]
516 | new_gt_labels = [gt[0] for gt in new_seq_data[label_type]]
517 | num_pos_samples = np.count_nonzero(np.array(new_gt_labels))
518 | print('Balanced:\t Positive: %d \t Negative: %d\n' % (num_pos_samples, len(new_seq_data[label_type]) - num_pos_samples))
519 | return new_seq_data
520 |
521 |
522 | def _get_video_ids_split(self, image_set, subset='default'):
523 | '''
524 | Returns a list of video ids for a given data split.
525 | :param image_set: Data split, train, test, val.
526 | :return: the list of video ids.
527 | '''
528 | vid_ids = []
529 | sets = [image_set] if image_set != 'all' else ['train', 'test', 'val']
530 | for s in sets:
531 | vid_id_file = join(self._data_split_ids_path, subset, s + '.txt')
532 | with open(vid_id_file, 'rt') as fid:
533 | vid_ids.extend([x.strip() for x in fid.readlines()])
534 | return vid_ids
535 |
536 | def _get_pedestrian_ids(self, sample_type='all'):
537 | '''
538 | Get all pedestrian ids.
539 | :return: A list of pedestrian ids.
540 | '''
541 | annotations = self.generate_database()
542 | pids = []
543 | for vid in sorted(annotations):
544 | if sample_type == 'beh':
545 | pids.extend([p for p in annotations[vid]['ped_annotations'].keys() if 'b' in p])
546 | else:
547 | pids.extend(annotations[vid]['ped_annotaions'].keys())
548 | return pids
549 |
550 | def _get_random_pedestrian_ids(self, image_set, ratios=None, val_data=True, regen_data=False, sample_type='all'):
551 | '''
552 | Generates and save a database of activities for all pedestrians.
553 | :param image_set: The data split to return.
554 | :param ratios: The ratios to split the data. There should be 2 ratios(or 3 if val_data is true) and they should sum to 1. e.g. [0.4, 0.6], [0.3, 0.5, 0.2].
555 | :param val_data: Whether to generate validation data.
556 | :param regen_data: Whether to overwrite the existing data.
557 | :return: The random smaple split.
558 | '''
559 | assert image_set in ['train', 'test', 'val']
560 | cache_file = join(self.cache_path, 'random_samples.pkl')
561 | if exists(cache_file) and not regen_data:
562 | print('Random smaple currently exists.\n Loading from %s' % cache_file)
563 | with open(cache_file, 'rb') as fid:
564 | try:
565 | rand_samples = pickle.load(fid)
566 | except:
567 | rand_samples = pickle.load(fid, encoding='bytes')
568 | assert image_set in rand_samples, '%s does not exist in random samples\n Please try again by setting regen_data = True' % image_set
569 | if val_data:
570 | assert len(rand_samples['ratios']) == 3, 'The existing random samples does not have validation data.\n Please try again by setting regen_data = True'
571 | if ratios is not None:
572 | assert ratios == rand_samples['ratios'], 'Specified ratios {} does not match te ones in existing file {}.\n\
573 | Perform one of the following options:\
574 | 1- Set ratios to None\
575 | 2- Set ratios to the same values\
576 | 3- Regenerate data'.format(ratios, rand_samples['ratios'])
577 | print('The ratios are {}'.format(rand_samples['ratios']))
578 | print('Number of %s tracks %d' % (image_set, len(rand_samples[image_set])))
579 | return rand_samples[image_set]
580 | if ratios is None:
581 | if val_data:
582 | ratios = [0.5, 0.4, 0.1]
583 | else:
584 | ratios = [0.5, 0.5]
585 | assert sum(ratios) > 0.999999, 'Ratios {} do not sum to 1'.format(ratios)
586 | if val_data:
587 | assert len(ratios) == 3, 'To generate validation data three ratios should be selected.'
588 | else:
589 | assert len(ratios) == 2, 'With no validation only two ratios should be selected.'
590 | print('############# Generating Random training/tesing data #############')
591 | ped_ids = self._get_pedestrian_ids(sample_type)
592 | print('Total number of tracks %d' % len(ped_ids))
593 | print('The ratuos are {}'.format(ratios))
594 | sample_split = {'ratios': ratios}
595 | train_samples, test_samples = train_test_split(ped_ids, train_size=ratios[0])
596 | print('Number of train tracks %d' % len(train_samples))
597 | if val_data:
598 | test_samples, val_samples = train_test_split(test_samples, train_size=ratios[1] / sum(ratios[1:]))
599 | print('Number of val tracks %d' % len(test_samples))
600 | sample_split['val'] = val_samples
601 | print('Number of test tracks %d' % len(test_samples))
602 | sample_split['train'] = train_samples
603 | sample_split['test'] = test_samples
604 | cache_file = join(self.cache_path, 'random_samples.pkl')
605 | with open(cache_file, 'wb') as fid:
606 | pickle.dump(sample_split, fid, pickle.HIGHEST_PROTOCOL)
607 | print('jaad {} samples written to {}'.format('random', cache_file))
608 | return sample_split[image_set]
609 |
610 | def _get_kfold_pedestrian_ids(self, image_set, num_folds=5, fold=1, sample_type='all'):
611 | '''
612 | Generate kfold pedestrian ids.
613 | :param image_set: Image set split.
614 | :param num_folds: Number of folds.
615 | :param fold: The given fold.
616 | :return: List of pedestrian ids for the given fold.
617 | '''
618 | assert image_set in ['train', 'test'], 'For Kfold data split, image_set should be either \'train\' or \'test\''
619 | assert fold <= num_folds, 'Fold number should be smaller than number of folds'
620 | print('############# Generating %d fold data #############' % num_folds)
621 | cache_file = join(self.cache_path, '%d_fold_samples.pkl' % num_folds)
622 | if exists(cache_file):
623 | print('Loading %d-fold data from %s' % (num_folds, cache_file))
624 | with open(cache_file, 'rb') as fid:
625 | try:
626 | fold_idx = pickle.load(fid)
627 | except:
628 | fold_idx = pickle.load(fid, encoding='bytes')
629 | else:
630 | ped_ids = self._get_pedestrian_ids(sample_type)
631 | kf = KFold(n_splits=num_folds, shuffle=True)
632 | fold_idx = {'pid': ped_ids}
633 | count = 1
634 | for train_index, test_index in kf.split(ped_ids):
635 | fold_idx[count] = {'train': train_index.tolist(), 'test': test_index.tolist()}
636 | count += 1
637 | with open(cache_file, 'wb') as fid:
638 | pickle.dump(fold_idx, fid, pickle.HIGHEST_PROTOCOL)
639 | print('jaad {}-fold samples written to {}'.format(num_folds, cache_file))
640 | print('Number of %s tracks %d' % (image_set, len(fold_idx[fold[image_set]])))
641 | kfold_ids = [fold_idx['pid'][i] for i in range(len(fold_idx['pid'])) if i in fold_idx[fold][image_set]]
642 | return kfold_ids
643 |
644 | def _get_data_ids(self, image_set, params):
645 | '''
646 | A helper function to generate set id and ped ids (if needed) for processing
647 | :param image_set: Image_set to generate data.
648 | :param params: Data generation params.
649 | :return: Set and pedestrian ids.
650 | '''
651 | _pids = None
652 | if params['data_split_type'] == 'default':
653 | return self._get_video_ids_split(image_set, params['subset']), _pids
654 |
655 | video_ids = self._get_video_ids_split('all', params['subset'])
656 | if params['data_split_type'] == 'random':
657 | params['random_params']['sample_type'] = params['sample_type']
658 | _pids = self._get_random_pedestrian_ids(image_set, **params['random_params'])
659 | elif params['data_split_type'] == 'kfold':
660 | params['kfold_params']['sample_type'] = params['sample_type']
661 | _pids = self._get_kfold_pedestrian_ids(image_set, **params['kfold_params'])
662 | return video_ids, _pids
663 |
664 | def _squarify(self, bbox, ratio, img_width):
665 | '''
666 | Changes is the ratio of bounding boxes to a fixed ratio.
667 | :param bbox: Bounding box.
668 | :param ratio: Ratio to be changed to.
669 | :param img_width: Image width.
670 | :return: Squarified bounding box.
671 | '''
672 | width = abs(bbox[0] - bbox[2])
673 | height = abs(bbox[1] - bbox[3])
674 | width_change = height * ratio - width
675 | bbox[0] = bbox[0] - width_change / 2
676 | bbox[2] = bbox[2] + width_change / 2
677 | if bbox[0] < 0:
678 | bbox[0] = 0
679 | #check whether the new bbox goes beyond image boarders
680 | #if this is the case, the bbox is shifted back.
681 | if bbox[2] > img_width:
682 | bbox[0] = bbox[0] - bbox[2] + img_width
683 | bbox[2] = img_width
684 | return bbox
685 |
686 | #Pedestrian detection generators
687 | def get_detection_data(self, image_set, method, occlusion_type=None, file_path='data/', **params):
688 | '''
689 | Generate data for pedestrian detection algorithms
690 | :param image_set: Split set name.
691 | :param method: Detection algorithm: frcnn, retinanet, yolo3, ssd.
692 | :param occlusion_type: the types of occlusion: None: only unoccluded samples.
693 | part: Unoccluded and partially occluded samples.
694 | full: All samples.
695 | :param file_path: Where to save the script file。
696 | :return: Pedestrian samples.
697 | '''
698 | squarify_ratio = params['squarify_ratio']
699 | frame_stride = params['fstride']
700 | height_rng = params['height_rng']
701 | if not exists(file_path):
702 | makedirs(file_path)
703 | if height_rng is None:
704 | height_rng = [0, float('inf')]
705 | annotations = self.generate_database()
706 | video_ids, _pids = self._get_data_ids(image_set, params)
707 | ped_samples = {}
708 | unique_samples = []
709 | total_sample_count = 0
710 | for vid in video_ids:
711 | img_width = annotations[vid]['width']
712 | img_height = annotations[vid]['height']
713 | num_frames = annotations[vid]['num_frames']
714 | for i in range(0, num_frames, frame_stride):
715 | ped_samples[join(self._jaad_path, 'image', vid, '{:05d}.png'.format(i))] = []
716 | for pid in annotations[vid]['ped_annotaions']:
717 | if params['data_split_type'] != 'default' and pid not in _pids:
718 | continue
719 | difficult = 0
720 | if 'p' in pid:
721 | difficult = -1
722 | if image_set in ['train', 'val']:
723 | continue
724 | imgs = [join(self._jaad_path, 'images', vid, '{:05d}.png'.format(f)) for f in annotations[vid]['ped_annotations'][pid]['frames']]
725 | boxes = annotations[vid]['ped_annotations'][pid]['bbox']
726 | occlusion = annotations[vid]['ped_annotations'][pid]['occlusion']
727 | for i, b in enumerate(boxes):
728 | if imgs[i] not in ped_samples:
729 | continue
730 | bbox_height = abs(b[0] - b[2])
731 | if height_rng[0] <= bbox_height <= height_rng[1]:
732 | if (occlusion_type == None and occlusion[i] == 0) or (occlusion_type == 'part' and occlusion[i] < 2) or (occlusion_type == 'full'):
733 | if squarify_ratio:
734 | b = self._squarify(b, squarify_ratio, img_width)
735 | ped_samples[imgs[i]].append({'width': img_width,
736 | 'height': img_height,
737 | 'tag': pid,
738 | 'box': b,
739 | 'seg_area': (b[2] - b[0] + 1) * (b[3] - b[1] + 1),
740 | 'occlusion': occlusion[i],
741 | 'difficult': difficult})
742 | if pid not in unique_samples:
743 | unique_samples.append(pid)
744 | total_sample_count += 1
745 | print('Number of unique pedestrians %d ' % len(unique_samples))
746 | print('Number of samples %d ' % total_sample_count)
747 | if method == 'frcnn':
748 | return self._get_data_frcnn(ped_samples)
749 | elif method == 'retinanet':
750 | return self._generate_csv_data_retinanet(image_set, file_path, ped_samples)
751 | elif method == 'yolo3':
752 | return self._generate_csv_data_yolo3(image_set, file_path, ped_samples)
753 | elif method == 'ssd':
754 | return self._generate_csv_data_ssd(image_set, file_path, ped_samples)
755 |
756 |
757 | def _get_data_frcnn(self, ped_samples):
758 | '''
759 | Data generation for Faster-rcnn algorithm.
760 | :param ped_samples: Dictionary of all samples.
761 | '''
762 | classes_count = {}
763 | class_mapping = {}
764 | all_imgs = {}
765 | class_name = 'pedestrian'
766 | classes_count['bg'] = 0
767 | class_mapping['bg'] = 1
768 | classes_count[class_name] = 0
769 | class_mapping[class_name] = 0
770 | for img, samples in sorted(ped_samples.items()):
771 | if not samples:
772 | continue
773 | all_imgs[img] = {'filepath': img, 'width': samples[0]['width'],
774 | 'height': samples[0]['height'], 'bboxes': []}
775 | for s in samples:
776 | box = s['box']
777 | all_imgs[img]['bboxes'].append({'class': class_name, 'x1': box[0],
778 | 'x2': box[2], 'y1': box[1], 'y2': box[3]})
779 | print('Data generated for Faster-rcnn')
780 | all_data = []
781 | for key in all_imgs:
782 | all_data.append(all_imgs[key])
783 | return all_data, classes_count, class_mapping
784 |
785 | def _generate_csv_data_retinanet(image_set, file_path, ped_samples):
786 | '''
787 | Data generation for Retinanet algorithm.
788 | :param image_set: Data split.
789 | :param file_path: Path to save the data.
790 | :param ped_samples: Dictionary of all samples.
791 | '''
792 | class_name = 'pedestrian'
793 | data_save_path = file_path + 'retinanet_' + image_set + '.csv'
794 | with open(data_save_path, 'wt') as f:
795 | for img, samples in sorted(ped_samples.items()):
796 | if not samples:
797 | f.write('%s,,,,,\n' % (img))
798 | for s in samples:
799 | box = s['box']
800 | f.write('%s,%.0f,%.0f,%.0f,%.0f,%s\n' % (img, box[0], box[1], box[2], box[3], class_name))
801 | print('Data generated for Retinanet.')
802 | map_path = file_path + '_mapping.csv'
803 | with open(map_path, 'w') as f:
804 | f.write('%s,0\n' % (class_name))
805 | return data_save_path, map_path
806 |
807 | def _generate_csv_data_yolo3(image_set, file_path, ped_samples):
808 | '''
809 | Data generation for YOLO3 algorithm.
810 | :param image_set: Data split.
811 | :param file_path: Path to save the data.
812 | :param ped_samples: Dictionary of all samples.
813 | '''
814 | class_name = 'pedestrian'
815 | all_img = {}
816 | data_save_path = file_path + 'yolo3_' + image_set + '.txt'
817 | with open(data_save_path, 'wt') as f:
818 | for img, samples in sorted(ped_samples.items()):
819 | if not samples:
820 | continue
821 | f.write('%s ' % (img))
822 | for s in samples:
823 | box = s['box']
824 | f.write('%.0f,%.0f,%.0f,%.0f,%.0f ' % (box[0], box[1], box[2], box[3], 0))
825 | f.write('\n')
826 | print('Data generated for YOLO3')
827 | map_path = file_path + 'mapping_yolo3'
828 | with open(map_path, "wt") as f:
829 | f.write('%s,0\n' % (class_name))
830 | return data_save_path, map_path
831 |
832 | def _generate_csv_data_ssd(image_set, file_path, ped_samples):
833 | '''
834 | Data generation for SSD algorithm.
835 | :param image_set: Data split.
836 | :param file_path: Path to save the data.
837 | :param ped_samples: Dictionary of all samples.
838 | '''
839 | data_save_path = file_path + 'ssd_' + image_set + '.csv'
840 | with open(data_save_path, 'wt') as f:
841 | for img, samples in sorted(ped_samples.items()):
842 | if not samples:
843 | continue
844 | for s in samples:
845 | box = s['box']
846 | f.write('%s,%.0f,%.0f,%.0f,%.0f,%s\n' % (img, box[0], box[1], box[2], box[3], 1))
847 | print('Data generated for SSD')
848 | return data_save_path
849 |
850 |
851 |
852 | def _print_dict(self, dic):
853 | '''
854 | Prints a dictionary, one key_value pair per line.
855 | :param dic: Dictionary.
856 | '''
857 | for k, v in dic.items():
858 | print('%s: %s' % (str(k), str(v)))
859 |
860 | def _height_check(self, height_rng, frame_ids, boxes, images, occlusion):
861 | '''
862 | Checks whether the bounding boxes are within a given height limit. If not, it will adjust the length of data sequences accordingly.
863 | :param height_rng: Height limit [lower, higher].
864 | :param frame_ids: List of frame ids.
865 | :param boxes: List of bounding boxes.
866 | :param images: List of images.
867 | :param occlusion: List of occlusions.
868 | :return: The adjusted data sequences.
869 | '''
870 | imgs, box, frames, occ = [], [], [], []
871 | for i, b in enumerate(boxes):
872 | bbox_height = abs(b[1] - b[3])
873 | if height_rng[0] <= bbox_height <= height_rng[1]:
874 | box.append(b)
875 | imgs.append(images[i])
876 | frames.append(frame_ids[i])
877 | occ.append(occlusion[i])
878 | return imgs, box, frames, occ
879 |
880 | def _get_center(self, box):
881 | '''
882 | Calculates the center coordinate of a bounding box.
883 | :param box: Bounding box coordinates
884 | :return: The center coordinate.
885 | '''
886 | return [(box[0] + box[2]) / 2, (box[1] + box[3]) / 2]
887 |
888 | def _get_image_path(self, vid, f):
889 | '''
890 | Generates the image path given ids.
891 | :param vid: Video id.
892 | :param f: Frame id.
893 | :return: Return the path to the given image.
894 | '''
895 | return join(self._images_path, vid, '{:05d}.png'.format(f))
896 |
897 | def generate_data_trajectory_sequence(self, image_set, **opts):
898 | '''
899 | Generates pedestrian tracks.
900 | :param image_set: the split set to produce for. Options are train, test, val.
901 | :param opts:
902 | 'fstride': Frequency f sampling from the data.
903 | 'sample_type': Whether to use 'all' pedestrian annotations or the ones with 'beh'avior only.
904 | 'subset': The subset of data annotations to use. Options are:
905 | 'default': Includes high resolution and high visibility videos.
906 | 'high_visibility': Only videos with high visibility(include low resoltion videos).
907 | 'all': Uses all videos.
908 | 'height_rng': The height range of pedestrian to use.
909 | 'squarify_ratio': The width/height ratio of bounding boxes. A value between (0,1]. 0->the originalratio is used.
910 | 'data_split_type': How to split the data. Options:
911 | 'default': predefined sets,
912 | 'random': randomly split the data.
913 | 'kfold': k-fold data split(NOTE: only train/test splits).
914 | 'seq_type': Sequence type to generate. Options:
915 | 'trajectory': generates tracks.
916 | 'crossing': generates tracks uo to 'crossing_point',
917 | 'intention': generates tracks similar to human experiments.
918 | 'min_track_size': Min track length allowable.
919 | 'random_params': Parameters for random data split generation.(see _get_random_pedestrian_ids())
920 | 'kfold_params': Parameters for kfold split generation.(see _get_kfold_pedestrian_ids())
921 | :return: Sequence data.
922 | '''
923 | params = {'fstride': 1,
924 | 'sample_type': 'all', # 'beh'
925 | 'subset': 'default',
926 | 'height_rng': [0, float('inf')],
927 | 'squarify_ratio': 0,
928 | 'data_split_type': 'default', # kfold, random, default
929 | 'seq_type': 'intention',
930 | 'min_track_size': 15,
931 | 'random_params': {'ratios': None,
932 | 'val_data': True,
933 | 'regen_data': False},
934 | 'kfold_params': {'num_folds': 5, 'fold': 1}}
935 | assert all(k in params for k in opts.keys()), 'Wrong option(s). Choose one of the following: {}'.format(list(params.keys()))
936 | params.update(opts)
937 | print('--------------------------------------------------------')
938 | print('Generating action sequence data.')
939 | self._print_dict(params)
940 |
941 | annot_database = self.generate_database()
942 | if params['seq_type'] == 'trajectory':
943 | sequence = self._get_trajectories(image_set, annot_database, **params)
944 | elif params['seq_type'] == 'crossing':
945 | sequence = self._get_crossing(image_set, annot_database, **params)
946 | elif params['seq_type'] == 'intention':
947 | sequence = self._get_intention(image_set, annot_database, **params)
948 | return sequence
949 |
950 | def _get_trajectories(self, image_set, annot_database, **params):
951 | '''
952 | Generates trajectory data.
953 | :param image_set: Data split.
954 | :param annot_database: The annotations database.
955 | :param params: Parameters for generating trajectories.
956 | :return: A dictionary of trajectories.
957 | '''
958 | print('---------------------------------------------------------')
959 | print('Generating trajectory data.')
960 | num_pedestrians = 0
961 | seq_stride = params['fstride']
962 | sq_ratio = params['squarify_ratio']
963 | height_rng = params['height_rng']
964 | image_seq, pids_seq = [], []
965 | box_seq, center_seq, occ_seq = [], [], []
966 | intent_seq = []
967 | vehicle_seq = []
968 | video_ids, _pids = self._get_data_ids(image_set, params)
969 | for vid in sorted(video_ids):
970 | img_width = annot_database[vid]['width']
971 | pid_annots = annot_database[vid]['ped_annotations']
972 | vid_annots = annot_database[vid]['vehicle_annotations']
973 | for pid in sorted(annot_database[vid]['ped_annotations']):
974 | if params['data_split_type'] != 'default' and pid not in _pids:
975 | continue
976 | if 'p' in pid:
977 | continue
978 | if params['sample_type'] == 'beh' and 'b' not in pid:
979 | continue
980 | num_pedestrians += 1
981 | frame_ids = pid_annots[pid]['frames']
982 | images = [join(self._jaad_path, 'images', vid, '{:05d}.png'.format(f)) for f in pid_annots[pid]['frames']]
983 | boxes = pid_annots[pid]['bbox']
984 | occlusions = pid_annots[pid]['occlusion']
985 | if height_rng[0] > 0 or height_rng[1] < float('inf'):
986 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, frame_ids, boxes, images, occlusions)
987 | if len(boxes) / seq_stride < params['min_track_size']:
988 | continue
989 | if sq_ratio:
990 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
991 | ped_ids = [[pid]] * len(boxes)
992 | if 'b' not in pid:
993 | intent = [[0]] * len(boxes)
994 | else:
995 | if annot_database[vid]['ped_annotaions'][pid]['attributes']['crossing'] == -1:
996 | intent = [[0]] * len(boxes)
997 | else:
998 | intent = [[1]] * len(boxes)
999 | center = [self._get_center(b) for b in boxes]
1000 | occ_seq.append(occlusions[::seq_stride])
1001 | image_seq.append(images[::seq_stride])
1002 | box_seq.append(boxes[::seq_stride])
1003 | center_seq.append(center[::seq_stride])
1004 | intent_seq.append(intent[::seq_stride])
1005 | pids_seq.append(ped_ids[::seq_stride])
1006 | vehicle_seq.append([[vid_annots[i]] for i in frame_ids][::seq_stride])
1007 | print('Split: %s' % image_set)
1008 | print('Number of pedestrians: %d ' % num_pedestrians)
1009 | print('Total number of samples: %d ' % len(image_seq))
1010 |
1011 | return {'image': image_seq,
1012 | 'pid': pids_seq,
1013 | 'bbox': box_seq,
1014 | 'center': center_seq,
1015 | 'occlusion': occ_seq,
1016 | 'intent': intent_seq,
1017 | 'vehicle_act': vehicle_seq}
1018 |
1019 | def _get_crossing(self, image_set, annot_database, **params):
1020 | '''
1021 | Generates crossing data.
1022 | :param image_set: Data split to use.
1023 | :param annot_database: Annotations database.
1024 | :param params: Parameters to generate data (see generate_database)
1025 | :return: A dictionary of trajectories
1026 | '''
1027 | print('---------------------------------------------------------')
1028 | print("Generating crossing data")
1029 | num_pedestrians = 0
1030 | seq_stride = params['fstride']
1031 | sq_ratio = params['squarify_ratio']
1032 | height_rng = params['height_rng']
1033 | image_seq, pids_seq = [], []
1034 | box_seq, center_seq, occ_seq = [], [], []
1035 | intent_seq = []
1036 | vehicle_seq = []
1037 | activities = []
1038 | video_ids, _pids = self._get_data_ids(image_set, params)
1039 | for vid in sorted(video_ids):
1040 | img_width = annot_database[vid]['width']
1041 | img_height = annot_database[vid]['height']
1042 | pid_annots = annot_database[vid]['ped_annotations']
1043 | vid_annots = annot_database[vid]['vehicle_annotations']
1044 | for pid in sorted(pid_annots):
1045 | if params['data_split_type'] != 'default' and pid not in _pids:
1046 | continue
1047 | if 'p' in pid:
1048 | continue
1049 | if params['sample_type'] == 'beh' and 'b' not in pid:
1050 | continue
1051 | num_pedestrians += 1
1052 | frame_ids = pid_annots[pid]['frames']
1053 | if 'b' in pid:
1054 | event_frame = pid_annots[pid]['attributes']['crossing_point']
1055 | else:
1056 | event_frame = -1
1057 | if event_frame == -1:
1058 | end_idx = -3
1059 | else:
1060 | end_idx = frame_ids.index(event_frame)
1061 | boxes = pid_annots[pid]['bbox'][:end_idx + 1]
1062 | frame_ids = frame_ids[: end_idx + 1]
1063 | images = [self._get_image_path(vid, f) for f in frame_ids]
1064 | occlusions = pid_annots[pid]['occlusion'][:end_idx + 1]
1065 | if height_rng[0] > 0 or height_rng[1] < float('inf'):
1066 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, frame_ids, boxes, images, occlusions)
1067 | if len(boxes) / seq_stride < params['min_track_size']:
1068 | continue
1069 | if sq_ratio:
1070 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
1071 | image_seq.append(images[::seq_stride])
1072 | box_seq.append(boxes[::seq_stride])
1073 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride])
1074 | occ_seq.append(occlusions[::seq_stride])
1075 | ped_ids = [[pid]] * len(boxes)
1076 | pids_seq.append(ped_ids[::seq_stride])
1077 | if 'b' not in pid:
1078 | intent = [[0]] * len(boxes)
1079 | acts = [[0]] * len(boxes)
1080 | else:
1081 | if annot_database[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1:
1082 | intent = [[0]] * len(boxes)
1083 | else:
1084 | intent = [[1]] * len(boxes)
1085 | acts = [[int(pid_annots[pid]['attributes']['crossing'] > 0)]] * len(boxes)
1086 | intent_seq.append(intent[::seq_stride])
1087 | activities.append(acts[::seq_stride])
1088 | vehicle_seq.append([[vid_annots[i]] for i in frame_ids][::seq_stride])
1089 | print('Split: %s' % image_set)
1090 | print('Number of pedestrians: %d ' % num_pedestrians)
1091 | print('Total number of samples: %d ' % len(image_seq))
1092 | return {'image': image_seq,
1093 | 'pid': pids_seq,
1094 | 'bbox': box_seq,
1095 | 'center': center_seq,
1096 | 'occlusion': occ_seq,
1097 | 'vehicle_act': vehicle_seq,
1098 | 'intent': intent_seq,
1099 | 'activities': activities,
1100 | 'image_dimension': (img_width, img_height)}
1101 |
1102 | def _get_intention(self, image_set, annot_database, **params):
1103 | '''
1104 | Generates intention data.
1105 | :param image_set: Data split to use.
1106 | :param annot_database: Annotations database.
1107 | :param params: Parameters to generate data (see generate_database())
1108 | :return: A dictionary of trajectories.
1109 | '''
1110 | print('---------------------------------------------------------')
1111 | print("Generating intention data")
1112 | num_pedestrians = 0
1113 | seq_stride = params['fstride']
1114 | sq_ratio = params['squarify_ratio']
1115 | height_rng = params['height_rng']
1116 | image_seq, pids_seq = [], []
1117 | box_seq, center_seq, occ_seq = [], [], []
1118 | intent_seq = []
1119 | videos_ids, _pids = self._get_data_ids(image_set, params)
1120 | for vid in sorted(videos_ids):
1121 | img_width = annot_database[vid]['width']
1122 | pid_annots = annot_database[vid]['ped_annotations']
1123 | for pid in sorted(pid_annots):
1124 | if params['data_split_type'] != 'default' and pid not in _pids:
1125 | continue
1126 | if 'p' in pid:
1127 | continue
1128 | if params['sample_type'] == 'beh' and 'b' not in pid:
1129 | continue
1130 | num_pedestrians += 1
1131 | frame_ids = pid_annots[pid]['frames']
1132 | if 'b' in pid:
1133 | event_frame = pid_annots[pid]['attributes']['decision_point']
1134 | else:
1135 | event_frame = -1
1136 | if event_frame == -1:
1137 | end_idx = -3
1138 | else:
1139 | end_idx = frame_ids.index(event_frame)
1140 | boxes = pid_annots[pid]['bbox'][:end_idx + 1]
1141 | frame_ids = frame_ids[:end_idx + 1]
1142 | images = [self._get_image_path(vid, f) for f in frame_ids]
1143 | occlusions = pid_annots[pid]['occlusion'][:end_idx + 1]
1144 | if height_rng[0] > 0 or height_rng[1] < float('inf'):
1145 | images, boxes, frame_ids, occlusions = self._height_check(height_rng, frame_ids, boxes, images, occlusions)
1146 | if len(boxes) / seq_stride < params['min_track_size']:
1147 | continue
1148 | if sq_ratio:
1149 | boxes = [self._squarify(b, sq_ratio, img_width) for b in boxes]
1150 | center_seq.append([self._get_center(b) for b in boxes][::seq_stride])
1151 | image_seq.append(images[::seq_stride])
1152 | box_seq.append(boxes[::seq_stride])
1153 | occ_seq.append(occlusions[::seq_stride])
1154 | ped_ids = [[pid]] * len(boxes)
1155 | pids_seq.append(ped_ids[::seq_stride])
1156 | if 'b' not in pid:
1157 | intent = [[0]] * len(boxes)
1158 | else:
1159 | if annot_database[vid]['ped_annotations'][pid]['attributes']['crossing'] == -1:
1160 | intent = [[0]] * len(boxes)
1161 | else:
1162 | intent = [[1]] * len(boxes)
1163 | intent_seq.append(intent[::seq_stride])
1164 | print('Split: %s' % image_set)
1165 | print('Number of pedestrians: %d ' % num_pedestrians)
1166 | print('Total number of samples: %d ' % len(image_seq))
1167 | return {'image': image_seq,
1168 | 'pid': pids_seq,
1169 | 'bbox': box_seq,
1170 | 'center': center_seq,
1171 | 'occlusion': occ_seq,
1172 | 'intent': intent_seq}
1173 |
1174 |
--------------------------------------------------------------------------------