├── README.md ├── data_process.py ├── layers ├── __init__.py ├── graph.py ├── graph_conv_block.py ├── graph_operation_layer.py └── seq2seq.py ├── main.py ├── model.py ├── training_log.txt └── xin_feeder_baidu.py /README.md: -------------------------------------------------------------------------------- 1 | # GRIP 2 | 3 | This repository is the code of [GRIP++: Enhanced Graph-based Interaction-aware Trajectory Prediction for Autonomous Driving](https://arxiv.org/abs/1907.07792) on the Baidu Apollo Trajectory dataset. GRIP++ is an enhanced version of our GRIP ([GRIP: Graph-based Interaction-aware Trajectory Prediction](https://ieeexplore.ieee.org/abstract/document/8917228)). 4 | 5 | ___ 6 | ### License 7 | This code is shared only for research purposes, and this cannot be used for any commercial purposes. 8 | 9 | ___ 10 | ### Training 11 | 12 | 1. Modify "data_root" in data_process.py and then run the script to preprocess the data. 13 | ``` Bash 14 | $ python data_process.py 15 | ``` 16 | 17 | 2. Train the model. We trained the model on a single Nvidia Titan Xp GPU. If your GPU has the same precision, you should get the exact same results. The "training_log.txt" is my training log. If you download the code and run it directly, you should see similar outputs. 18 | ``` Bash 19 | $ python main.py 20 | 21 | # The following are the first 10 training iterations: 22 | #######################################Train 23 | # |2019-09-20 16:50:43.146035| Epoch: 0/ 500| Iteration: 0| Loss:2.69767785|lr: 0.001| 24 | # |2019-09-20 16:50:43.247776| Epoch: 0/ 500| Iteration: 0| Loss:1.39082634|lr: 0.001| 25 | # |2019-09-20 16:50:43.327926| Epoch: 0/ 500| Iteration: 0| Loss:1.42024708|lr: 0.001| 26 | # |2019-09-20 16:50:43.394658| Epoch: 0/ 500| Iteration: 0| Loss:1.32363927|lr: 0.001| 27 | # |2019-09-20 16:50:43.454833| Epoch: 0/ 500| Iteration: 0| Loss:1.15358388|lr: 0.001| 28 | # |2019-09-20 16:50:43.515517| Epoch: 0/ 500| Iteration: 0| Loss:1.15672326|lr: 0.001| 29 | # |2019-09-20 16:50:43.575027| Epoch: 0/ 500| Iteration: 0| Loss:0.93675584|lr: 0.001| 30 | # |2019-09-20 16:50:43.634769| Epoch: 0/ 500| Iteration: 0| Loss:0.90181452|lr: 0.001| 31 | # |2019-09-20 16:50:43.694374| Epoch: 0/ 500| Iteration: 0| Loss:0.75979233|lr: 0.001| 32 | ``` 33 | ___ 34 | 35 | ### Submission 36 | Once you trained the model, you can test the trained models on the testing subset. 37 | 38 | - Our model predicts future locations for all observed objects simultaneously. 39 | - Using separate models for different types of objects should achieve better performance. 40 | 41 | |Method|Epoch|WSADE|ADEv|ADEp|ADEb|WSFDE|FDEv|FDEp|FDEb| 42 | |:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| 43 | |TrafficPredict| |8.5881|7.9467|7.1811|12.8805|24.2262|12.7757|11.121|22.7912| 44 | || 45 | |GRIP|Epoch16|1.2632|2.2511|0.718|1.8024|2.3713|4.0863|1.3838|3.4155| 46 | |GRIP|Epoch18|1.2648|2.2515|0.7142|1.8193|2.3677|4.0863|1.3732|3.4274| 47 | |GRIP|Epoch20|1.2721|2.24|0.717|1.8558|2.3921|4.0762|1.3791|3.5318| 48 | || 49 | |GRIP|Combine|1.2588|2.2400|0.7142|1.8024|2.3631|4.0762|1.3732|3.4155| 50 | 51 | We use the following way to combine multiple results. 52 | 53 | - epoch20 -> 1, 2 (car) 54 | - epoch18 -> 3 (pedestrian) 55 | - epoch16 -> 4 (bike) 56 | 57 | ___ 58 | 59 | ### Citation 60 | Please cite our papers if you used our code. Thanks. 61 | ``` 62 | @inproceedings{2019itsc_grip, 63 | author = {Li, Xin and Ying, Xiaowen and Chuah, Mooi Choo}, 64 | booktitle = {2019 IEEE INTELLIGENT TRANSPORTATION SYSTEMS CONFERENCE (ITSC)}, 65 | organization = {IEEE}, 66 | title = {GRIP: Graph-based Interaction-aware Trajectory Prediction}, 67 | year = {2019} 68 | } 69 | 70 | @article{li2020gripplus, 71 | title={GRIP++: Enhanced Graph-based Interaction-aware Trajectory Prediction for Autonomous Driving}, 72 | author={Li, Xin and Ying, Xiaowen and Chuah, Mooi Choo}, 73 | journal={arXiv preprint arXiv:1907.07792}, 74 | year={2020} 75 | } 76 | ``` 77 | -------------------------------------------------------------------------------- /data_process.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import glob 3 | import os 4 | from scipy import spatial 5 | import pickle 6 | 7 | # Please change this to your location 8 | data_root = '/data/xincoder/ApolloScape/' 9 | 10 | 11 | history_frames = 6 # 3 second * 2 frame/second 12 | future_frames = 6 # 3 second * 2 frame/second 13 | total_frames = history_frames + future_frames 14 | # xy_range = 120 # max_x_range=121, max_y_range=118 15 | max_num_object = 120 # maximum number of observed objects is 70 16 | neighbor_distance = 10 # meter 17 | 18 | # Baidu ApolloScape data format: 19 | # frame_id, object_id, object_type, position_x, position_y, position_z, object_length, pbject_width, pbject_height, heading 20 | total_feature_dimension = 10 + 1 # we add mark "1" to the end of each row to indicate that this row exists 21 | 22 | # after zero centralize data max(x)=127.1, max(y)=106.1, thus choose 130 23 | 24 | def get_frame_instance_dict(pra_file_path): 25 | ''' 26 | Read raw data from files and return a dictionary: 27 | {frame_id: 28 | {object_id: 29 | # 10 features 30 | [frame_id, object_id, object_type, position_x, position_y, position_z, object_length, pbject_width, pbject_height, heading] 31 | } 32 | } 33 | ''' 34 | with open(pra_file_path, 'r') as reader: 35 | # print(train_file_path) 36 | content = np.array([x.strip().split(' ') for x in reader.readlines()]).astype(float) 37 | now_dict = {} 38 | for row in content: 39 | # instance = {row[1]:row[2:]} 40 | n_dict = now_dict.get(row[0], {}) 41 | n_dict[row[1]] = row#[2:] 42 | # n_dict.append(instance) 43 | # now_dict[] 44 | now_dict[row[0]] = n_dict 45 | return now_dict 46 | 47 | def process_data(pra_now_dict, pra_start_ind, pra_end_ind, pra_observed_last): 48 | visible_object_id_list = list(pra_now_dict[pra_observed_last].keys()) # object_id appears at the last observed frame 49 | num_visible_object = len(visible_object_id_list) # number of current observed objects 50 | 51 | # compute the mean values of x and y for zero-centralization. 52 | visible_object_value = np.array(list(pra_now_dict[pra_observed_last].values())) 53 | xy = visible_object_value[:, 3:5].astype(float) 54 | mean_xy = np.zeros_like(visible_object_value[0], dtype=float) 55 | m_xy = np.mean(xy, axis=0) 56 | mean_xy[3:5] = m_xy 57 | 58 | # compute distance between any pair of two objects 59 | dist_xy = spatial.distance.cdist(xy, xy) 60 | # if their distance is less than $neighbor_distance, we regard them are neighbors. 61 | neighbor_matrix = np.zeros((max_num_object, max_num_object)) 62 | neighbor_matrix[:num_visible_object, :num_visible_object] = (dist_xy (object#, frame#, 11) 84 | object_frame_feature = np.zeros((max_num_object, pra_end_ind-pra_start_ind, total_feature_dimension)) 85 | 86 | # np.transpose(object_feature_list, (1,0,2)) 87 | object_frame_feature[:num_visible_object+num_non_visible_object] = np.transpose(object_feature_list, (1,0,2)) 88 | 89 | return object_frame_feature, neighbor_matrix, m_xy 90 | 91 | 92 | def generate_train_data(pra_file_path): 93 | ''' 94 | Read data from $pra_file_path, and split data into clips with $total_frames length. 95 | Return: feature and adjacency_matrix 96 | feture: (N, C, T, V) 97 | N is the number of training data 98 | C is the dimension of features, 10raw_feature + 1mark(valid data or not) 99 | T is the temporal length of the data. history_frames + future_frames 100 | V is the maximum number of objects. zero-padding for less objects. 101 | ''' 102 | now_dict = get_frame_instance_dict(pra_file_path) 103 | frame_id_set = sorted(set(now_dict.keys())) 104 | 105 | all_feature_list = [] 106 | all_adjacency_list = [] 107 | all_mean_list = [] 108 | for start_ind in frame_id_set[:-total_frames+1]: 109 | start_ind = int(start_ind) 110 | end_ind = int(start_ind + total_frames) 111 | observed_last = start_ind + history_frames - 1 112 | object_frame_feature, neighbor_matrix, mean_xy = process_data(now_dict, start_ind, end_ind, observed_last) 113 | 114 | all_feature_list.append(object_frame_feature) 115 | all_adjacency_list.append(neighbor_matrix) 116 | all_mean_list.append(mean_xy) 117 | 118 | # (N, V, T, C) --> (N, C, T, V) 119 | all_feature_list = np.transpose(all_feature_list, (0, 3, 2, 1)) 120 | all_adjacency_list = np.array(all_adjacency_list) 121 | all_mean_list = np.array(all_mean_list) 122 | # print(all_feature_list.shape, all_adjacency_list.shape) 123 | return all_feature_list, all_adjacency_list, all_mean_list 124 | 125 | 126 | def generate_test_data(pra_file_path): 127 | now_dict = get_frame_instance_dict(pra_file_path) 128 | frame_id_set = sorted(set(now_dict.keys())) 129 | 130 | all_feature_list = [] 131 | all_adjacency_list = [] 132 | all_mean_list = [] 133 | # get all start frame id 134 | start_frame_id_list = frame_id_set[::history_frames] 135 | for start_ind in start_frame_id_list: 136 | start_ind = int(start_ind) 137 | end_ind = int(start_ind + history_frames) 138 | observed_last = start_ind + history_frames - 1 139 | # print(start_ind, end_ind) 140 | object_frame_feature, neighbor_matrix, mean_xy = process_data(now_dict, start_ind, end_ind, observed_last) 141 | 142 | all_feature_list.append(object_frame_feature) 143 | all_adjacency_list.append(neighbor_matrix) 144 | all_mean_list.append(mean_xy) 145 | 146 | # (N, V, T, C) --> (N, C, T, V) 147 | all_feature_list = np.transpose(all_feature_list, (0, 3, 2, 1)) 148 | all_adjacency_list = np.array(all_adjacency_list) 149 | all_mean_list = np.array(all_mean_list) 150 | # print(all_feature_list.shape, all_adjacency_list.shape) 151 | return all_feature_list, all_adjacency_list, all_mean_list 152 | 153 | 154 | def generate_data(pra_file_path_list, pra_is_train=True): 155 | all_data = [] 156 | all_adjacency = [] 157 | all_mean_xy = [] 158 | for file_path in pra_file_path_list: 159 | if pra_is_train: 160 | now_data, now_adjacency, now_mean_xy = generate_train_data(file_path) 161 | else: 162 | now_data, now_adjacency, now_mean_xy = generate_test_data(file_path) 163 | all_data.extend(now_data) 164 | all_adjacency.extend(now_adjacency) 165 | all_mean_xy.extend(now_mean_xy) 166 | 167 | all_data = np.array(all_data) #(N, C, T, V)=(5010, 11, 12, 70) Train 168 | all_adjacency = np.array(all_adjacency) #(5010, 70, 70) Train 169 | all_mean_xy = np.array(all_mean_xy) #(5010, 2) Train 170 | 171 | # Train (N, C, T, V)=(5010, 11, 12, 70), (5010, 70, 70), (5010, 2) 172 | # Test (N, C, T, V)=(415, 11, 6, 70), (415, 70, 70), (415, 2) 173 | print(np.shape(all_data), np.shape(all_adjacency), np.shape(all_mean_xy)) 174 | 175 | # save training_data and trainjing_adjacency into a file. 176 | if pra_is_train: 177 | save_path = 'train_data.pkl' 178 | else: 179 | save_path = 'test_data.pkl' 180 | with open(save_path, 'wb') as writer: 181 | pickle.dump([all_data, all_adjacency, all_mean_xy], writer) 182 | 183 | 184 | if __name__ == '__main__': 185 | train_file_path_list = sorted(glob.glob(os.path.join(data_root, 'prediction_train/*.txt'))) 186 | test_file_path_list = sorted(glob.glob(os.path.join(data_root, 'prediction_test/*.txt'))) 187 | 188 | print('Generating Training Data.') 189 | generate_data(train_file_path_list, pra_is_train=True) 190 | 191 | print('Generating Testing Data.') 192 | generate_data(test_file_path_list, pra_is_train=False) 193 | 194 | 195 | 196 | -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xincoder/GRIP/35cca73804ce951d396977238e6ed2d3de8245db/layers/__init__.py -------------------------------------------------------------------------------- /layers/graph.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Graph(): 4 | """ The Graph Representation 5 | How to use: 6 | 1. graph = Graph(max_hop=1) 7 | 2. A = graph.get_adjacency() 8 | 3. A = code to modify A 9 | 4. normalized_A = graph.normalize_adjacency(A) 10 | """ 11 | def __init__(self, 12 | num_node = 120, 13 | max_hop = 1 14 | ): 15 | self.max_hop = max_hop 16 | self.num_node = num_node 17 | 18 | def get_adjacency(self, A): 19 | # compute hop steps 20 | self.hop_dis = np.zeros((self.num_node, self.num_node)) + np.inf 21 | transfer_mat = [np.linalg.matrix_power(A, d) for d in range(self.max_hop + 1)] 22 | arrive_mat = (np.stack(transfer_mat) > 0) 23 | for d in range(self.max_hop, -1, -1): 24 | self.hop_dis[arrive_mat[d]] = d 25 | 26 | # compute adjacency 27 | valid_hop = range(0, self.max_hop + 1) 28 | adjacency = np.zeros((self.num_node, self.num_node)) 29 | for hop in valid_hop: 30 | adjacency[self.hop_dis == hop] = 1 31 | return adjacency 32 | 33 | def normalize_adjacency(self, A): 34 | Dl = np.sum(A, 0) 35 | num_node = A.shape[0] 36 | Dn = np.zeros((num_node, num_node)) 37 | for i in range(num_node): 38 | if Dl[i] > 0: 39 | Dn[i, i] = Dl[i]**(-1) 40 | AD = np.dot(A, Dn) 41 | 42 | valid_hop = range(0, self.max_hop + 1) 43 | A = np.zeros((len(valid_hop), self.num_node, self.num_node)) 44 | for i, hop in enumerate(valid_hop): 45 | A[i][self.hop_dis == hop] = AD[self.hop_dis == hop] 46 | return A 47 | 48 | 49 | -------------------------------------------------------------------------------- /layers/graph_conv_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from layers.graph_operation_layer import ConvTemporalGraphical 4 | 5 | class Graph_Conv_Block(nn.Module): 6 | def __init__(self, 7 | in_channels, 8 | out_channels, 9 | kernel_size, 10 | stride=1, 11 | dropout=0, 12 | residual=True): 13 | super().__init__() 14 | 15 | assert len(kernel_size) == 2 16 | assert kernel_size[0] % 2 == 1 17 | padding = ((kernel_size[0] - 1) // 2, 0) 18 | 19 | self.gcn = ConvTemporalGraphical(in_channels, out_channels, kernel_size[1]) 20 | self.tcn = nn.Sequential( 21 | nn.BatchNorm2d(out_channels), 22 | nn.ReLU(inplace=False), 23 | nn.Conv2d( 24 | out_channels, 25 | out_channels, 26 | (kernel_size[0], 1), 27 | (stride, 1), 28 | padding, 29 | ), 30 | nn.BatchNorm2d(out_channels), 31 | nn.Dropout(dropout, inplace=False), 32 | ) 33 | 34 | if not residual: 35 | self.residual = lambda x: 0 36 | elif (in_channels == out_channels) and (stride == 1): 37 | self.residual = lambda x: x 38 | else: 39 | self.residual = nn.Sequential( 40 | nn.Conv2d( 41 | in_channels, 42 | out_channels, 43 | kernel_size=1, 44 | stride=(stride, 1)), 45 | nn.BatchNorm2d(out_channels), 46 | ) 47 | self.relu = nn.ReLU(inplace=False) 48 | 49 | def forward(self, x, A): 50 | res = self.residual(x) 51 | x, A = self.gcn(x, A) 52 | x = self.tcn(x) + res 53 | return self.relu(x), A 54 | 55 | -------------------------------------------------------------------------------- /layers/graph_operation_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class ConvTemporalGraphical(nn.Module): 5 | def __init__(self, 6 | in_channels, 7 | out_channels, 8 | kernel_size, 9 | t_kernel_size=1, 10 | t_stride=1, 11 | t_padding=0, 12 | t_dilation=1, 13 | bias=True): 14 | super().__init__() 15 | 16 | self.kernel_size = kernel_size 17 | self.conv = nn.Conv2d( 18 | in_channels, 19 | out_channels * kernel_size, 20 | kernel_size=(t_kernel_size, 1), 21 | padding=(t_padding, 0), 22 | stride=(t_stride, 1), 23 | dilation=(t_dilation, 1), 24 | bias=bias) 25 | 26 | def forward(self, x, A): 27 | assert A.size(1) == self.kernel_size 28 | x = self.conv(x) 29 | n, kc, t, v = x.size() 30 | 31 | x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v) 32 | x = torch.einsum('nkctv,nkvw->nctw', (x, A)) 33 | 34 | return x.contiguous(), A 35 | -------------------------------------------------------------------------------- /layers/seq2seq.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | #################################################### 7 | # Seq2Seq LSTM AutoEncoder Model 8 | # - predict locations 9 | #################################################### 10 | class EncoderRNN(nn.Module): 11 | def __init__(self, input_size, hidden_size, num_layers, isCuda=True): 12 | super(EncoderRNN, self).__init__() 13 | self.input_size = input_size 14 | self.hidden_size = hidden_size 15 | self.num_layers = num_layers 16 | self.isCuda = isCuda 17 | # self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True) 18 | self.lstm = nn.GRU(input_size, hidden_size*30, num_layers, batch_first=True) 19 | 20 | def forward(self, input): 21 | output, hidden = self.lstm(input) 22 | return output, hidden 23 | 24 | class DecoderRNN(nn.Module): 25 | def __init__(self, hidden_size, output_size, num_layers, dropout=0.5, isCuda=True): 26 | super(DecoderRNN, self).__init__() 27 | self.hidden_size = hidden_size 28 | self.output_size = output_size 29 | self.num_layers = num_layers 30 | self.isCuda = isCuda 31 | # self.lstm = nn.LSTM(hidden_size, output_size, num_layers, batch_first=True) 32 | self.lstm = nn.GRU(hidden_size, output_size*30, num_layers, batch_first=True) 33 | 34 | #self.relu = nn.ReLU() 35 | self.sigmoid = nn.Sigmoid() 36 | self.dropout = nn.Dropout(p=dropout) 37 | self.linear = nn.Linear(output_size*30, output_size) 38 | self.tanh = nn.Tanh() 39 | 40 | def forward(self, encoded_input, hidden): 41 | decoded_output, hidden = self.lstm(encoded_input, hidden) 42 | # decoded_output = self.tanh(decoded_output) 43 | # decoded_output = self.sigmoid(decoded_output) 44 | decoded_output = self.dropout(decoded_output) 45 | # decoded_output = self.tanh(self.linear(decoded_output)) 46 | decoded_output = self.linear(decoded_output) 47 | # decoded_output = self.sigmoid(self.linear(decoded_output)) 48 | return decoded_output, hidden 49 | 50 | class Seq2Seq(nn.Module): 51 | def __init__(self, input_size, hidden_size, num_layers, dropout=0.5, isCuda=True): 52 | super(Seq2Seq, self).__init__() 53 | self.isCuda = isCuda 54 | # self.pred_length = pred_length 55 | self.encoder = EncoderRNN(input_size, hidden_size, num_layers, isCuda) 56 | self.decoder = DecoderRNN(hidden_size, hidden_size, num_layers, dropout, isCuda) 57 | 58 | def forward(self, in_data, last_location, pred_length, teacher_forcing_ratio=0, teacher_location=None): 59 | batch_size = in_data.shape[0] 60 | out_dim = self.decoder.output_size 61 | self.pred_length = pred_length 62 | 63 | outputs = torch.zeros(batch_size, self.pred_length, out_dim) 64 | if self.isCuda: 65 | outputs = outputs.cuda() 66 | 67 | encoded_output, hidden = self.encoder(in_data) 68 | decoder_input = last_location 69 | for t in range(self.pred_length): 70 | # encoded_input = torch.cat((now_label, encoded_input), dim=-1) # merge class label into input feature 71 | now_out, hidden = self.decoder(decoder_input, hidden) 72 | now_out += decoder_input 73 | outputs[:,t:t+1] = now_out 74 | teacher_force = np.random.random() < teacher_forcing_ratio 75 | decoder_input = (teacher_location[:,t:t+1] if (type(teacher_location) is not type(None)) and teacher_force else now_out) 76 | # decoder_input = now_out 77 | return outputs 78 | 79 | #################################################### 80 | #################################################### -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import sys 4 | import numpy as np 5 | import torch 6 | import torch.optim as optim 7 | from model import Model 8 | from xin_feeder_baidu import Feeder 9 | from datetime import datetime 10 | import random 11 | import itertools 12 | 13 | CUDA_VISIBLE_DEVICES='1' 14 | os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES 15 | 16 | def seed_torch(seed=0): 17 | random.seed(seed) 18 | os.environ['PYTHONHASHSEED'] = str(seed) 19 | np.random.seed(seed) 20 | torch.manual_seed(seed) 21 | torch.cuda.manual_seed(seed) 22 | torch.cuda.manual_seed_all(seed) # if you are using multi-GPU. 23 | torch.backends.cudnn.benchmark = False 24 | torch.backends.cudnn.deterministic = True 25 | seed_torch() 26 | 27 | max_x = 1. 28 | max_y = 1. 29 | history_frames = 6 # 3 second * 2 frame/second 30 | future_frames = 6 # 3 second * 2 frame/second 31 | 32 | batch_size_train = 64 33 | batch_size_val = 32 34 | batch_size_test = 1 35 | total_epoch = 50 36 | base_lr = 0.01 37 | lr_decay_epoch = 5 38 | dev = 'cuda:0' 39 | work_dir = './trained_models' 40 | log_file = os.path.join(work_dir,'log_test.txt') 41 | test_result_file = 'prediction_result.txt' 42 | 43 | criterion = torch.nn.SmoothL1Loss() 44 | 45 | if not os.path.exists(work_dir): 46 | os.makedirs(work_dir) 47 | 48 | def my_print(pra_content): 49 | with open(log_file, 'a') as writer: 50 | print(pra_content) 51 | writer.write(pra_content+'\n') 52 | 53 | def display_result(pra_results, pra_pref='Train_epoch'): 54 | all_overall_sum_list, all_overall_num_list = pra_results 55 | overall_sum_time = np.sum(all_overall_sum_list**0.5, axis=0) 56 | overall_num_time = np.sum(all_overall_num_list, axis=0) 57 | overall_loss_time = (overall_sum_time / overall_num_time) 58 | overall_log = '|{}|[{}] All_All: {}'.format(datetime.now(), pra_pref, ' '.join(['{:.3f}'.format(x) for x in list(overall_loss_time) + [np.sum(overall_loss_time)]])) 59 | my_print(overall_log) 60 | return overall_loss_time 61 | 62 | 63 | def my_save_model(pra_model, pra_epoch): 64 | path = '{}/model_epoch_{:04}.pt'.format(work_dir, pra_epoch) 65 | torch.save( 66 | { 67 | 'xin_graph_seq2seq_model': pra_model.state_dict(), 68 | }, 69 | path) 70 | print('Successfull saved to {}'.format(path)) 71 | 72 | 73 | def my_load_model(pra_model, pra_path): 74 | checkpoint = torch.load(pra_path) 75 | pra_model.load_state_dict(checkpoint['xin_graph_seq2seq_model']) 76 | print('Successfull loaded from {}'.format(pra_path)) 77 | return pra_model 78 | 79 | 80 | def data_loader(pra_path, pra_batch_size=128, pra_shuffle=False, pra_drop_last=False, train_val_test='train'): 81 | feeder = Feeder(data_path=pra_path, graph_args=graph_args, train_val_test=train_val_test) 82 | loader = torch.utils.data.DataLoader( 83 | dataset=feeder, 84 | batch_size=pra_batch_size, 85 | shuffle=pra_shuffle, 86 | drop_last=pra_drop_last, 87 | num_workers=10, 88 | ) 89 | return loader 90 | 91 | def preprocess_data(pra_data, pra_rescale_xy): 92 | # pra_data: (N, C, T, V) 93 | # C = 11: [frame_id, object_id, object_type, position_x, position_y, position_z, object_length, pbject_width, pbject_height, heading] + [mask] 94 | feature_id = [3, 4, 9, 10] 95 | ori_data = pra_data[:,feature_id].detach() 96 | data = ori_data.detach().clone() 97 | 98 | new_mask = (data[:, :2, 1:]!=0) * (data[:, :2, :-1]!=0) 99 | data[:, :2, 1:] = (data[:, :2, 1:] - data[:, :2, :-1]).float() * new_mask.float() 100 | data[:, :2, 0] = 0 101 | 102 | # # small vehicle: 1, big vehicles: 2, pedestrian 3, bicycle: 4, others: 5 103 | object_type = pra_data[:,2:3] 104 | 105 | data = data.float().to(dev) 106 | ori_data = ori_data.float().to(dev) 107 | object_type = object_type.to(dev) #type 108 | data[:,:2] = data[:,:2] / pra_rescale_xy 109 | 110 | return data, ori_data, object_type 111 | 112 | 113 | def compute_RMSE(pra_pred, pra_GT, pra_mask, pra_error_order=2): 114 | pred = pra_pred * pra_mask # (N, C, T, V)=(N, 2, 6, 120) 115 | GT = pra_GT * pra_mask # (N, C, T, V)=(N, 2, 6, 120) 116 | 117 | x2y2 = torch.sum(torch.abs(pred - GT)**pra_error_order, dim=1) # x^2+y^2, (N, C, T, V)->(N, T, V)=(N, 6, 120) 118 | overall_sum_time = x2y2.sum(dim=-1) # (N, T, V) -> (N, T)=(N, 6) 119 | overall_mask = pra_mask.sum(dim=1).sum(dim=-1) # (N, C, T, V) -> (N, T)=(N, 6) 120 | overall_num = overall_mask 121 | 122 | return overall_sum_time, overall_num, x2y2 123 | 124 | 125 | def train_model(pra_model, pra_data_loader, pra_optimizer, pra_epoch_log): 126 | # pra_model.to(dev) 127 | pra_model.train() 128 | rescale_xy = torch.ones((1,2,1,1)).to(dev) 129 | rescale_xy[:,0] = max_x 130 | rescale_xy[:,1] = max_y 131 | 132 | # train model using training data 133 | for iteration, (ori_data, A, _) in enumerate(pra_data_loader): 134 | # print(iteration, ori_data.shape, A.shape) 135 | # ori_data: (N, C, T, V) 136 | # C = 11: [frame_id, object_id, object_type, position_x, position_y, position_z, object_length, pbject_width, pbject_height, heading] + [mask] 137 | data, no_norm_loc_data, object_type = preprocess_data(ori_data, rescale_xy) 138 | for now_history_frames in range(1, data.shape[-2]): 139 | input_data = data[:,:,:now_history_frames,:] # (N, C, T, V)=(N, 4, 6, 120) 140 | output_loc_GT = data[:,:2,now_history_frames:,:] # (N, C, T, V)=(N, 2, 6, 120) 141 | output_mask = data[:,-1:,now_history_frames:,:] # (N, C, T, V)=(N, 1, 6, 120) 142 | 143 | A = A.float().to(dev) 144 | 145 | predicted = pra_model(pra_x=input_data, pra_A=A, pra_pred_length=output_loc_GT.shape[-2], pra_teacher_forcing_ratio=0, pra_teacher_location=output_loc_GT) # (N, C, T, V)=(N, 2, 6, 120) 146 | 147 | ######################################################## 148 | # Compute loss for training 149 | ######################################################## 150 | # We use abs to compute loss to backward update weights 151 | # (N, T), (N, T) 152 | overall_sum_time, overall_num, _ = compute_RMSE(predicted, output_loc_GT, output_mask, pra_error_order=1) 153 | # overall_loss 154 | total_loss = torch.sum(overall_sum_time) / torch.max(torch.sum(overall_num), torch.ones(1,).to(dev)) #(1,) 155 | 156 | now_lr = [param_group['lr'] for param_group in pra_optimizer.param_groups][0] 157 | my_print('|{}|{:>20}|\tIteration:{:>5}|\tLoss:{:.8f}|lr: {}|'.format(datetime.now(), pra_epoch_log, iteration, total_loss.data.item(),now_lr)) 158 | 159 | pra_optimizer.zero_grad() 160 | total_loss.backward() 161 | pra_optimizer.step() 162 | 163 | 164 | 165 | def val_model(pra_model, pra_data_loader): 166 | # pra_model.to(dev) 167 | pra_model.eval() 168 | rescale_xy = torch.ones((1,2,1,1)).to(dev) 169 | rescale_xy[:,0] = max_x 170 | rescale_xy[:,1] = max_y 171 | all_overall_sum_list = [] 172 | all_overall_num_list = [] 173 | 174 | all_car_sum_list = [] 175 | all_car_num_list = [] 176 | all_human_sum_list = [] 177 | all_human_num_list = [] 178 | all_bike_sum_list = [] 179 | all_bike_num_list = [] 180 | # train model using training data 181 | for iteration, (ori_data, A, _) in enumerate(pra_data_loader): 182 | # data: (N, C, T, V) 183 | # C = 11: [frame_id, object_id, object_type, position_x, position_y, position_z, object_length, pbject_width, pbject_height, heading] + [mask] 184 | data, no_norm_loc_data, _ = preprocess_data(ori_data, rescale_xy) 185 | 186 | for now_history_frames in range(6, 7): 187 | input_data = data[:,:,:now_history_frames,:] # (N, C, T, V)=(N, 4, 6, 120) 188 | output_loc_GT = data[:,:2,now_history_frames:,:] # (N, C, T, V)=(N, 2, 6, 120) 189 | output_mask = data[:,-1:,now_history_frames:,:] # (N, C, T, V)=(N, 1, 6, 120) 190 | 191 | ori_output_loc_GT = no_norm_loc_data[:,:2,now_history_frames:,:] 192 | ori_output_last_loc = no_norm_loc_data[:,:2,now_history_frames-1:now_history_frames,:] 193 | 194 | # for category 195 | cat_mask = ori_data[:,2:3, now_history_frames:, :] # (N, C, T, V)=(N, 1, 6, 120) 196 | 197 | A = A.float().to(dev) 198 | predicted = pra_model(pra_x=input_data, pra_A=A, pra_pred_length=output_loc_GT.shape[-2], pra_teacher_forcing_ratio=0, pra_teacher_location=output_loc_GT) # (N, C, T, V)=(N, 2, 6, 120) 199 | ######################################################## 200 | # Compute details for training 201 | ######################################################## 202 | predicted = predicted*rescale_xy 203 | # output_loc_GT = output_loc_GT*rescale_xy 204 | 205 | for ind in range(1, predicted.shape[-2]): 206 | predicted[:,:,ind] = torch.sum(predicted[:,:,ind-1:ind+1], dim=-2) 207 | predicted += ori_output_last_loc 208 | 209 | ### overall dist 210 | # overall_sum_time, overall_num, x2y2 = compute_RMSE(predicted, output_loc_GT, output_mask) 211 | overall_sum_time, overall_num, x2y2 = compute_RMSE(predicted, ori_output_loc_GT, output_mask) 212 | # all_overall_sum_list.extend(overall_sum_time.detach().cpu().numpy()) 213 | all_overall_num_list.extend(overall_num.detach().cpu().numpy()) 214 | # x2y2 (N, 6, 39) 215 | now_x2y2 = x2y2.detach().cpu().numpy() 216 | now_x2y2 = now_x2y2.sum(axis=-1) 217 | all_overall_sum_list.extend(now_x2y2) 218 | 219 | ### car dist 220 | car_mask = (((cat_mask==1)+(cat_mask==2))>0).float().to(dev) 221 | car_mask = output_mask * car_mask 222 | car_sum_time, car_num, car_x2y2 = compute_RMSE(predicted, ori_output_loc_GT, car_mask) 223 | all_car_num_list.extend(car_num.detach().cpu().numpy()) 224 | # x2y2 (N, 6, 39) 225 | car_x2y2 = car_x2y2.detach().cpu().numpy() 226 | car_x2y2 = car_x2y2.sum(axis=-1) 227 | all_car_sum_list.extend(car_x2y2) 228 | 229 | ### human dist 230 | human_mask = (cat_mask==3).float().to(dev) 231 | human_mask = output_mask * human_mask 232 | human_sum_time, human_num, human_x2y2 = compute_RMSE(predicted, ori_output_loc_GT, human_mask) 233 | all_human_num_list.extend(human_num.detach().cpu().numpy()) 234 | # x2y2 (N, 6, 39) 235 | human_x2y2 = human_x2y2.detach().cpu().numpy() 236 | human_x2y2 = human_x2y2.sum(axis=-1) 237 | all_human_sum_list.extend(human_x2y2) 238 | 239 | ### bike dist 240 | bike_mask = (cat_mask==4).float().to(dev) 241 | bike_mask = output_mask * bike_mask 242 | bike_sum_time, bike_num, bike_x2y2 = compute_RMSE(predicted, ori_output_loc_GT, bike_mask) 243 | all_bike_num_list.extend(bike_num.detach().cpu().numpy()) 244 | # x2y2 (N, 6, 39) 245 | bike_x2y2 = bike_x2y2.detach().cpu().numpy() 246 | bike_x2y2 = bike_x2y2.sum(axis=-1) 247 | all_bike_sum_list.extend(bike_x2y2) 248 | 249 | 250 | result_car = display_result([np.array(all_car_sum_list), np.array(all_car_num_list)], pra_pref='car') 251 | result_human = display_result([np.array(all_human_sum_list), np.array(all_human_num_list)], pra_pref='human') 252 | result_bike = display_result([np.array(all_bike_sum_list), np.array(all_bike_num_list)], pra_pref='bike') 253 | 254 | result = 0.20*result_car + 0.58*result_human + 0.22*result_bike 255 | overall_log = '|{}|[{}] All_All: {}'.format(datetime.now(), 'WS', ' '.join(['{:.3f}'.format(x) for x in list(result) + [np.sum(result)]])) 256 | my_print(overall_log) 257 | 258 | all_overall_sum_list = np.array(all_overall_sum_list) 259 | all_overall_num_list = np.array(all_overall_num_list) 260 | return all_overall_sum_list, all_overall_num_list 261 | 262 | 263 | 264 | def test_model(pra_model, pra_data_loader): 265 | # pra_model.to(dev) 266 | pra_model.eval() 267 | rescale_xy = torch.ones((1,2,1,1)).to(dev) 268 | rescale_xy[:,0] = max_x 269 | rescale_xy[:,1] = max_y 270 | all_overall_sum_list = [] 271 | all_overall_num_list = [] 272 | with open(test_result_file, 'w') as writer: 273 | # train model using training data 274 | for iteration, (ori_data, A, mean_xy) in enumerate(pra_data_loader): 275 | # data: (N, C, T, V) 276 | # C = 11: [frame_id, object_id, object_type, position_x, position_y, position_z, object_length, pbject_width, pbject_height, heading] + [mask] 277 | data, no_norm_loc_data, _ = preprocess_data(ori_data, rescale_xy) 278 | input_data = data[:,:,:history_frames,:] # (N, C, T, V)=(N, 4, 6, 120) 279 | output_mask = data[:,-1,-1,:] # (N, V)=(N, 120) 280 | # print(data.shape, A.shape, mean_xy.shape, input_data.shape) 281 | 282 | ori_output_last_loc = no_norm_loc_data[:,:2,history_frames-1:history_frames,:] 283 | 284 | A = A.float().to(dev) 285 | predicted = pra_model(pra_x=input_data, pra_A=A, pra_pred_length=future_frames, pra_teacher_forcing_ratio=0, pra_teacher_location=None) # (N, C, T, V)=(N, 2, 6, 120) 286 | predicted = predicted *rescale_xy 287 | 288 | for ind in range(1, predicted.shape[-2]): 289 | predicted[:,:,ind] = torch.sum(predicted[:,:,ind-1:ind+1], dim=-2) 290 | predicted += ori_output_last_loc 291 | 292 | now_pred = predicted.detach().cpu().numpy() # (N, C, T, V)=(N, 2, 6, 120) 293 | now_mean_xy = mean_xy.detach().cpu().numpy() # (N, 2) 294 | now_ori_data = ori_data.detach().cpu().numpy() # (N, C, T, V)=(N, 11, 6, 120) 295 | now_mask = now_ori_data[:, -1, -1, :] # (N, V) 296 | 297 | now_pred = np.transpose(now_pred, (0, 2, 3, 1)) # (N, T, V, 2) 298 | now_ori_data = np.transpose(now_ori_data, (0, 2, 3, 1)) # (N, T, V, 11) 299 | 300 | # print(now_pred.shape, now_mean_xy.shape, now_ori_data.shape, now_mask.shape) 301 | 302 | for n_pred, n_mean_xy, n_data, n_mask in zip(now_pred, now_mean_xy, now_ori_data, now_mask): 303 | # (6, 120, 2), (2,), (6, 120, 11), (120, ) 304 | num_object = np.sum(n_mask).astype(int) 305 | # only use the last time of original data for ids (frame_id, object_id, object_type) 306 | # (6, 120, 11) -> (num_object, 3) 307 | n_dat = n_data[-1, :num_object, :3].astype(int) 308 | for time_ind, n_pre in enumerate(n_pred[:, :num_object], start=1): 309 | # (120, 2) -> (n, 2) 310 | # print(n_dat.shape, n_pre.shape) 311 | for info, pred in zip(n_dat, n_pre+n_mean_xy): 312 | information = info.copy() 313 | information[0] = information[0] + time_ind 314 | result = ' '.join(information.astype(str)) + ' ' + ' '.join(pred.astype(str)) + '\n' 315 | # print(result) 316 | writer.write(result) 317 | 318 | 319 | def run_trainval(pra_model, pra_traindata_path, pra_testdata_path): 320 | loader_train = data_loader(pra_traindata_path, pra_batch_size=batch_size_train, pra_shuffle=True, pra_drop_last=True, train_val_test='train') 321 | loader_test = data_loader(pra_testdata_path, pra_batch_size=batch_size_train, pra_shuffle=True, pra_drop_last=True, train_val_test='all') 322 | 323 | # evaluate on testing data (observe 5 frame and predict 1 frame) 324 | loader_val = data_loader(pra_traindata_path, pra_batch_size=batch_size_val, pra_shuffle=False, pra_drop_last=False, train_val_test='val') 325 | 326 | optimizer = optim.Adam( 327 | [{'params':model.parameters()},],) # lr = 0.0001) 328 | 329 | for now_epoch in range(total_epoch): 330 | all_loader_train = itertools.chain(loader_train, loader_test) 331 | 332 | my_print('#######################################Train') 333 | train_model(pra_model, all_loader_train, pra_optimizer=optimizer, pra_epoch_log='Epoch:{:>4}/{:>4}'.format(now_epoch, total_epoch)) 334 | 335 | my_save_model(pra_model, now_epoch) 336 | 337 | my_print('#######################################Test') 338 | display_result( 339 | val_model(pra_model, loader_val), 340 | pra_pref='{}_Epoch{}'.format('Test', now_epoch) 341 | ) 342 | 343 | 344 | def run_test(pra_model, pra_data_path): 345 | loader_test = data_loader(pra_data_path, pra_batch_size=batch_size_test, pra_shuffle=False, pra_drop_last=False, train_val_test='test') 346 | test_model(pra_model, loader_test) 347 | 348 | 349 | 350 | if __name__ == '__main__': 351 | graph_args={'max_hop':2, 'num_node':120} 352 | model = Model(in_channels=4, graph_args=graph_args, edge_importance_weighting=True) 353 | model.to(dev) 354 | 355 | # train and evaluate model 356 | run_trainval(model, pra_traindata_path='./train_data.pkl', pra_testdata_path='./test_data.pkl') 357 | 358 | # pretrained_model_path = './trained_models/model_epoch_0016.pt' 359 | # model = my_load_model(model, pretrained_model_path) 360 | # run_test(model, './test_data.pkl') 361 | 362 | 363 | 364 | 365 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | from layers.graph import Graph 7 | from layers.graph_conv_block import Graph_Conv_Block 8 | from layers.seq2seq import Seq2Seq, EncoderRNN 9 | import numpy as np 10 | 11 | class Model(nn.Module): 12 | def __init__(self, in_channels, graph_args, edge_importance_weighting, **kwargs): 13 | super().__init__() 14 | 15 | # load graph 16 | self.graph = Graph(**graph_args) 17 | A = np.ones((graph_args['max_hop']+1, graph_args['num_node'], graph_args['num_node'])) 18 | 19 | # build networks 20 | spatial_kernel_size = np.shape(A)[0] 21 | temporal_kernel_size = 5 #9 #5 # 3 22 | kernel_size = (temporal_kernel_size, spatial_kernel_size) 23 | 24 | # best 25 | self.st_gcn_networks = nn.ModuleList(( 26 | nn.BatchNorm2d(in_channels), 27 | Graph_Conv_Block(in_channels, 64, kernel_size, 1, residual=True, **kwargs), 28 | Graph_Conv_Block(64, 64, kernel_size, 1, **kwargs), 29 | Graph_Conv_Block(64, 64, kernel_size, 1, **kwargs), 30 | )) 31 | 32 | # initialize parameters for edge importance weighting 33 | if edge_importance_weighting: 34 | self.edge_importance = nn.ParameterList( 35 | [nn.Parameter(torch.ones(np.shape(A))) for i in self.st_gcn_networks] 36 | ) 37 | else: 38 | self.edge_importance = [1] * len(self.st_gcn_networks) 39 | 40 | self.num_node = num_node = self.graph.num_node 41 | self.out_dim_per_node = out_dim_per_node = 2 #(x, y) coordinate 42 | self.seq2seq_car = Seq2Seq(input_size=(64), hidden_size=out_dim_per_node, num_layers=2, dropout=0.5, isCuda=True) 43 | self.seq2seq_human = Seq2Seq(input_size=(64), hidden_size=out_dim_per_node, num_layers=2, dropout=0.5, isCuda=True) 44 | self.seq2seq_bike = Seq2Seq(input_size=(64), hidden_size=out_dim_per_node, num_layers=2, dropout=0.5, isCuda=True) 45 | 46 | 47 | def reshape_for_lstm(self, feature): 48 | # prepare for skeleton prediction model 49 | ''' 50 | N: batch_size 51 | C: channel 52 | T: time_step 53 | V: nodes 54 | ''' 55 | N, C, T, V = feature.size() 56 | now_feat = feature.permute(0, 3, 2, 1).contiguous() # to (N, V, T, C) 57 | now_feat = now_feat.view(N*V, T, C) 58 | return now_feat 59 | 60 | def reshape_from_lstm(self, predicted): 61 | # predicted (N*V, T, C) 62 | NV, T, C = predicted.size() 63 | now_feat = predicted.view(-1, self.num_node, T, self.out_dim_per_node) # (N, T, V, C) -> (N, C, T, V) [(N, V, T, C)] 64 | now_feat = now_feat.permute(0, 3, 2, 1).contiguous() # (N, C, T, V) 65 | return now_feat 66 | 67 | def forward(self, pra_x, pra_A, pra_pred_length, pra_teacher_forcing_ratio=0, pra_teacher_location=None): 68 | x = pra_x 69 | 70 | # forwad 71 | for gcn, importance in zip(self.st_gcn_networks, self.edge_importance): 72 | if type(gcn) is nn.BatchNorm2d: 73 | x = gcn(x) 74 | else: 75 | x, _ = gcn(x, pra_A + importance) 76 | 77 | # prepare for seq2seq lstm model 78 | graph_conv_feature = self.reshape_for_lstm(x) 79 | last_position = self.reshape_for_lstm(pra_x[:,:2]) #(N, C, T, V)[:, :2] -> (N, T, V*2) [(N*V, T, C)] 80 | 81 | if pra_teacher_forcing_ratio>0 and type(pra_teacher_location) is not type(None): 82 | pra_teacher_location = self.reshape_for_lstm(pra_teacher_location) 83 | 84 | # now_predict.shape = (N, T, V*C) 85 | now_predict_car = self.seq2seq_car(in_data=graph_conv_feature, last_location=last_position[:,-1:,:], pred_length=pra_pred_length, teacher_forcing_ratio=pra_teacher_forcing_ratio, teacher_location=pra_teacher_location) 86 | now_predict_car = self.reshape_from_lstm(now_predict_car) # (N, C, T, V) 87 | 88 | now_predict_human = self.seq2seq_human(in_data=graph_conv_feature, last_location=last_position[:,-1:,:], pred_length=pra_pred_length, teacher_forcing_ratio=pra_teacher_forcing_ratio, teacher_location=pra_teacher_location) 89 | now_predict_human = self.reshape_from_lstm(now_predict_human) # (N, C, T, V) 90 | 91 | now_predict_bike = self.seq2seq_bike(in_data=graph_conv_feature, last_location=last_position[:,-1:,:], pred_length=pra_pred_length, teacher_forcing_ratio=pra_teacher_forcing_ratio, teacher_location=pra_teacher_location) 92 | now_predict_bike = self.reshape_from_lstm(now_predict_bike) # (N, C, T, V) 93 | 94 | now_predict = (now_predict_car + now_predict_human + now_predict_bike)/3. 95 | 96 | return now_predict 97 | 98 | if __name__ == '__main__': 99 | model = Model(in_channels=3, pred_length=6, graph_args={}, edge_importance_weighting=True) 100 | print(model) 101 | -------------------------------------------------------------------------------- /xin_feeder_baidu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import random 5 | import pickle 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.optim as optim 10 | import torch.nn.functional as F 11 | from torchvision import datasets, transforms 12 | 13 | from layers.graph import Graph 14 | 15 | import time 16 | 17 | 18 | class Feeder(torch.utils.data.Dataset): 19 | """ Feeder for skeleton-based action recognition 20 | Arguments: 21 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) 22 | """ 23 | 24 | def __init__(self, data_path, graph_args={}, train_val_test='train'): 25 | ''' 26 | train_val_test: (train, val, test) 27 | ''' 28 | self.data_path = data_path 29 | self.load_data() 30 | 31 | total_num = len(self.all_feature) 32 | # equally choose validation set 33 | train_id_list = list(np.linspace(0, total_num-1, int(total_num*0.8)).astype(int)) 34 | val_id_list = list(set(list(range(total_num))) - set(train_id_list)) 35 | 36 | # # last 20% data as validation set 37 | self.train_val_test = train_val_test 38 | 39 | if train_val_test.lower() == 'train': 40 | self.all_feature = self.all_feature[train_id_list] 41 | self.all_adjacency = self.all_adjacency[train_id_list] 42 | self.all_mean_xy = self.all_mean_xy[train_id_list] 43 | elif train_val_test.lower() == 'val': 44 | self.all_feature = self.all_feature[val_id_list] 45 | self.all_adjacency = self.all_adjacency[val_id_list] 46 | self.all_mean_xy = self.all_mean_xy[val_id_list] 47 | 48 | self.graph = Graph(**graph_args) #num_node = 120,max_hop = 1 49 | 50 | def load_data(self): 51 | with open(self.data_path, 'rb') as reader: 52 | # Training (N, C, T, V)=(5010, 11, 12, 120), (5010, 120, 120), (5010, 2) 53 | [self.all_feature, self.all_adjacency, self.all_mean_xy]= pickle.load(reader) 54 | 55 | 56 | def __len__(self): 57 | return len(self.all_feature) 58 | 59 | def __getitem__(self, idx): 60 | # C = 11: [frame_id, object_id, object_type, position_x, position_y, position_z, object_length, pbject_width, pbject_height, heading] + [mask] 61 | now_feature = self.all_feature[idx].copy() # (C, T, V) = (11, 12, 120) 62 | now_mean_xy = self.all_mean_xy[idx].copy() # (2,) = (x, y) 63 | 64 | if self.train_val_test.lower() == 'train' and np.random.random()>0.5: 65 | angle = 2 * np.pi * np.random.random() 66 | sin_angle = np.sin(angle) 67 | cos_angle = np.cos(angle) 68 | 69 | angle_mat = np.array( 70 | [[cos_angle, -sin_angle], 71 | [sin_angle, cos_angle]]) 72 | 73 | xy = now_feature[3:5, :, :] 74 | num_xy = np.sum(xy.sum(axis=0).sum(axis=0) != 0) # get the number of valid data 75 | 76 | # angle_mat: (2, 2), xy: (2, 12, 120) 77 | out_xy = np.einsum('ab,btv->atv', angle_mat, xy) 78 | now_mean_xy = np.matmul(angle_mat, now_mean_xy) 79 | xy[:,:,:num_xy] = out_xy[:,:,:num_xy] 80 | 81 | now_feature[3:5, :, :] = xy 82 | 83 | now_adjacency = self.graph.get_adjacency(self.all_adjacency[idx]) 84 | now_A = self.graph.normalize_adjacency(now_adjacency) 85 | 86 | return now_feature, now_A, now_mean_xy 87 | 88 | --------------------------------------------------------------------------------