├── images ├── model.png ├── PSP-NET.png ├── model1.png ├── results.png ├── segmentation-semantic-embedding.png └── noscene-segmentation-scene-embedding.png ├── loader.py ├── README.md ├── utils.py ├── losses.py ├── trajectory_generator.py ├── model.py ├── vizualization.py ├── train.py └── evaluate.py /images/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arsalhuda24/LSTM-autoencoders-for-Trajectory-Prediction/HEAD/images/model.png -------------------------------------------------------------------------------- /images/PSP-NET.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arsalhuda24/LSTM-autoencoders-for-Trajectory-Prediction/HEAD/images/PSP-NET.png -------------------------------------------------------------------------------- /images/model1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arsalhuda24/LSTM-autoencoders-for-Trajectory-Prediction/HEAD/images/model1.png -------------------------------------------------------------------------------- /images/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arsalhuda24/LSTM-autoencoders-for-Trajectory-Prediction/HEAD/images/results.png -------------------------------------------------------------------------------- /images/segmentation-semantic-embedding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arsalhuda24/LSTM-autoencoders-for-Trajectory-Prediction/HEAD/images/segmentation-semantic-embedding.png -------------------------------------------------------------------------------- /images/noscene-segmentation-scene-embedding.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arsalhuda24/LSTM-autoencoders-for-Trajectory-Prediction/HEAD/images/noscene-segmentation-scene-embedding.png -------------------------------------------------------------------------------- /loader.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | from traj import TrajectoryDataset, seq_collate 3 | 4 | 5 | # def data_loader(path): 6 | # dset = TrajectoryDataset( 7 | # path, 8 | # obs_len=8, 9 | # pred_len=12, 10 | # skip=1, 11 | # delim='\t') 12 | # 13 | # loader = DataLoader( 14 | # dset, 15 | # batch_size=64, 16 | # shuffle=False, 17 | # num_workers=0, 18 | # collate_fn=seq_collate) 19 | # return dset, loader 20 | 21 | from torch.utils.data import DataLoader 22 | 23 | # from sgan.data.trajectories import TrajectoryDataset, seq_collate 24 | 25 | 26 | def data_loader(args, path): 27 | dset = TrajectoryDataset( 28 | path, 29 | obs_len=args.obs_len, 30 | pred_len=args.pred_len, 31 | skip=args.skip, 32 | delim=args.delim) 33 | 34 | loader = DataLoader( 35 | dset, 36 | batch_size=args.batch_size, 37 | shuffle=False, 38 | num_workers=args.loader_num_workers, 39 | collate_fn=seq_collate) 40 | return dset, loader -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CNN, Segmentation or Semantic Embedding- Evaluating Scene Context for Trajectory Prediction 2 | Please cite as follows if you find this implementation useful. 3 | 4 | ``` 5 | @published{Syed. A 2020, 6 | title = {{CNN, Segmentation or Semantic Embedding: Evaluating Scene Context for Trajectory Prediction}}, 7 | author = {Arsal Syed, Brendan Morris}, 8 | booktitle = {In: Bebis G. et al. (eds) Advances in Visual Computing. ISVC 2020. 9 | Lecture Notes in Computer Science, vol 12510. Springer, 10 | Cham. https://doi.org/10.1007/978-3-030-64559-5_56}, 11 | year = 2020 12 | 13 | } 14 | ``` 15 | 16 | ## Summary 17 | For autonomous vehicles (AV) and social robot’s navigation, it is important for them to completely understand their surroundings for natural and safe interactions. While it is often recognized that scene context is important for understanding pedestrian behavior, it has received less attention than modeling social-context – influence from interactions between pedestrians. In this paper, we evaluate the effectiveness of various scene representations for deep trajectory prediction. Our work focuses on characterizing the impact of scene representations (sematic images vs. semantic embeddings) and scene quality (competing semantic segmentation networks). We leverage a hierarchical RNN autoencoder to encode historical pedestrian motion, their social interaction and scene semantics into a low dimensional subspace and then decode to generate future motion prediction. Experimental evaluation on the ETH and UCY datasets show that using full scene semantics, specifically segmented images, can improve trajectory prediction over using just embeddings. 18 | 19 | ## Model 20 | ### This is LSTM Autoencoder model. 21 | ![Model](https://github.com/arsalhuda24/VAE-Trajectory-Prediction/blob/master/images/model1.png) 22 | 23 | 24 | ## Semantic Segmentation Comparison of Seg-Net and PSP-Net. 25 | ![Trajectory](https://github.com/arsalhuda24/VAE-Trajectory-Prediction/blob/master/images/PSP-NET.png) 26 | 27 | 28 | ## Results on Pedestrain Trajectories 29 | ![Trajectory](https://github.com/arsalhuda24/VAE-Trajectory-Prediction/blob/master/images/noscene-segmentation-scene-embedding.png) 30 | 31 | ![Trajectory](https://github.com/arsalhuda24/VAE-Trajectory-Prediction/blob/master/images/segmentation-semantic-embedding.png) 32 | 33 | for any questions please send an email to syeda3@unlv.nevada.edu 34 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import numpy as np 5 | import inspect 6 | from contextlib import contextmanager 7 | import subprocess 8 | 9 | 10 | def int_tuple(s): 11 | return tuple(int(i) for i in s.split(',')) 12 | 13 | 14 | def find_nan(variable, var_name): 15 | variable_n = variable.data.cpu().numpy() 16 | if np.isnan(variable_n).any(): 17 | exit('%s has nan' % var_name) 18 | 19 | 20 | def bool_flag(s): 21 | if s == '1': 22 | return True 23 | elif s == '0': 24 | return False 25 | msg = 'Invalid value "%s" for bool flag (should be 0 or 1)' 26 | raise ValueError(msg % s) 27 | 28 | 29 | def lineno(): 30 | return str(inspect.currentframe().f_back.f_lineno) 31 | 32 | 33 | def get_total_norm(parameters, norm_type=2): 34 | if norm_type == float('inf'): 35 | total_norm = max(p.grad.data.abs().max() for p in parameters) 36 | else: 37 | total_norm = 0 38 | for p in parameters: 39 | try: 40 | param_norm = p.grad.data.norm(norm_type) 41 | total_norm += param_norm**norm_type 42 | total_norm = total_norm**(1. / norm_type) 43 | except: 44 | continue 45 | return total_norm 46 | 47 | 48 | @contextmanager 49 | def timeit(msg, should_time=True): 50 | if should_time: 51 | torch.cuda.synchronize() 52 | t0 = time.time() 53 | yield 54 | if should_time: 55 | torch.cuda.synchronize() 56 | t1 = time.time() 57 | duration = (t1 - t0) * 1000.0 58 | print('%s: %.2f ms' % (msg, duration)) 59 | 60 | 61 | def get_gpu_memory(): 62 | torch.cuda.synchronize() 63 | opts = [ 64 | 'nvidia-smi', '-q', '--gpu=' + str(1), '|', 'grep', '"Used GPU Memory"' 65 | ] 66 | cmd = str.join(' ', opts) 67 | ps = subprocess.Popen( 68 | cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) 69 | output = ps.communicate()[0].decode('utf-8') 70 | output = output.split("\n")[0].split(":") 71 | consumed_mem = int(output[1].strip().split(" ")[0]) 72 | return consumed_mem 73 | 74 | 75 | def get_dset_path(dset_name, dset_type): 76 | _dir = os.path.dirname(__file__) 77 | _dir = _dir.split("/")[:-1] 78 | _dir = "/".join(_dir) 79 | return os.path.join(_dir, 'datasets', dset_name, dset_type) 80 | 81 | 82 | def relative_to_abs(rel_traj, start_pos): 83 | """ 84 | Inputs: 85 | - rel_traj: pytorch tensor of shape (seq_len, batch, 2) 86 | - start_pos: pytorch tensor of shape (batch, 2) 87 | Outputs: 88 | - abs_traj: pytorch tensor of shape (seq_len, batch, 2) 89 | """ 90 | # batch, seq_len, 2 91 | rel_traj = rel_traj.permute(1, 0, 2) 92 | displacement = torch.cumsum(rel_traj, dim=1) 93 | start_pos = torch.unsqueeze(start_pos, dim=1) 94 | abs_traj = displacement + start_pos 95 | return abs_traj.permute(1, 0, 2) -------------------------------------------------------------------------------- /losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | 4 | 5 | def bce_loss(input, target): 6 | """ 7 | Numerically stable version of the binary cross-entropy loss function. 8 | As per https://github.com/pytorch/pytorch/issues/751 9 | See the TensorFlow docs for a derivation of this formula: 10 | https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits 11 | Input: 12 | - input: PyTorch Tensor of shape (N, ) giving scores. 13 | - target: PyTorch Tensor of shape (N,) containing 0 and 1 giving targets. 14 | Output: 15 | - A PyTorch Tensor containing the mean BCE loss over the minibatch of 16 | input data. 17 | """ 18 | neg_abs = -input.abs() 19 | loss = input.clamp(min=0) - input * target + (1 + neg_abs.exp()).log() 20 | return loss.mean() 21 | 22 | 23 | def gan_g_loss(scores_fake): 24 | """ 25 | Input: 26 | - scores_fake: Tensor of shape (N,) containing scores for fake samples 27 | Output: 28 | - loss: Tensor of shape (,) giving GAN generator loss 29 | """ 30 | y_fake = torch.ones_like(scores_fake) * random.uniform(0.7, 1.2) 31 | return bce_loss(scores_fake, y_fake) 32 | 33 | 34 | def gan_d_loss(scores_real, scores_fake): 35 | """ 36 | Input: 37 | - scores_real: Tensor of shape (N,) giving scores for real samples 38 | - scores_fake: Tensor of shape (N,) giving scores for fake samples 39 | Output: 40 | - loss: Tensor of shape (,) giving GAN discriminator loss 41 | """ 42 | y_real = torch.ones_like(scores_real) * random.uniform(0.7, 1.2) 43 | y_fake = torch.zeros_like(scores_fake) * random.uniform(0, 0.3) 44 | loss_real = bce_loss(scores_real, y_real) 45 | loss_fake = bce_loss(scores_fake, y_fake) 46 | return loss_real + loss_fake 47 | 48 | 49 | def l2_loss(pred_traj, pred_traj_gt, loss_mask, random=0, mode='average'): 50 | """ 51 | Input: 52 | - pred_traj: Tensor of shape (seq_len, batch, 2). Predicted trajectory. 53 | - pred_traj_gt: Tensor of shape (seq_len, batch, 2). Groud truth 54 | predictions. 55 | - loss_mask: Tensor of shape (batch, seq_len) 56 | - mode: Can be one of sum, average, raw 57 | Output: 58 | - loss: l2 loss depending on mode 59 | """ 60 | seq_len, batch, _ = pred_traj.size() 61 | pred_traj= pred_traj.double() 62 | pred_traj_gt=pred_traj_gt.double() 63 | loss = (loss_mask.unsqueeze(dim=2) * 64 | (pred_traj_gt.permute(1, 0, 2) - pred_traj.permute(1, 0, 2))**2) 65 | if mode == 'sum': 66 | return torch.sum(loss) 67 | elif mode == 'average': 68 | return torch.sum(loss) / torch.numel(loss_mask.data) 69 | elif mode == 'raw': 70 | return loss.sum(dim=2).sum(dim=1) 71 | 72 | 73 | def displacement_error(pred_traj, pred_traj_gt, consider_ped=None, mode='sum'): 74 | """ 75 | Input: 76 | - pred_traj: Tensor of shape (seq_len, batch, 2). Predicted trajectory. 77 | - pred_traj_gt: Tensor of shape (seq_len, batch, 2). Ground truth 78 | predictions. 79 | - consider_ped: Tensor of shape (batch) 80 | - mode: Can be one of sum, raw 81 | Output: 82 | - loss: gives the eculidian displacement error 83 | """ 84 | seq_len, _, _ = pred_traj.size() 85 | pred_traj_gt= pred_traj_gt.double() 86 | pred_traj = pred_traj.double() 87 | loss = pred_traj_gt.permute(1, 0, 2) - pred_traj.permute(1, 0, 2) 88 | loss = loss**2 89 | if consider_ped is not None: 90 | loss = torch.sqrt(loss.sum(dim=2)).sum(dim=1) * consider_ped 91 | else: 92 | loss = torch.sqrt(loss.sum(dim=2)).sum(dim=1) 93 | if mode == 'sum': 94 | return torch.sum(loss) 95 | elif mode == 'raw': 96 | return loss 97 | 98 | 99 | def final_displacement_error( 100 | pred_pos, pred_pos_gt, consider_ped=None, mode='sum' 101 | ): 102 | """ 103 | Input: 104 | - pred_pos: Tensor of shape (batch, 2). Predicted last pos. 105 | - pred_pos_gt: Tensor of shape (seq_len, batch, 2). Groud truth 106 | last pos 107 | - consider_ped: Tensor of shape (batch) 108 | Output: 109 | - loss: gives the eculidian displacement error 110 | """ 111 | # pred_pos=pred_pos.dobule() 112 | # pred_pos_gt=pred_pos_gt.double() 113 | loss = pred_pos_gt - pred_pos 114 | loss = loss**2 115 | if consider_ped is not None: 116 | loss = torch.sqrt(loss.sum(dim=1)) * consider_ped 117 | else: 118 | loss = torch.sqrt(loss.sum(dim=1)) 119 | if mode == 'raw': 120 | return loss 121 | else: 122 | return torch.sum(loss) -------------------------------------------------------------------------------- /trajectory_generator.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import math 4 | 5 | import numpy as np 6 | 7 | import torch 8 | from torch.utils.data import Dataset 9 | import pickle 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | def seq_collate(data): 15 | (obs_seq_list, pred_seq_list, obs_seq_rel_list, pred_seq_rel_list, 16 | non_linear_ped_list, loss_mask_list) = zip(*data) 17 | 18 | _len = [len(seq) for seq in obs_seq_list] 19 | cum_start_idx = [0] + np.cumsum(_len).tolist() 20 | seq_start_end = [[start, end] 21 | for start, end in zip(cum_start_idx, cum_start_idx[1:])] 22 | 23 | # Data format: batch, input_size, seq_len 24 | # LSTM input format: seq_len, batch, input_size 25 | obs_traj = torch.cat(obs_seq_list, dim=0).permute(2, 0, 1) 26 | pred_traj = torch.cat(pred_seq_list, dim=0).permute(2, 0, 1) 27 | obs_traj_rel = torch.cat(obs_seq_rel_list, dim=0).permute(2, 0, 1) 28 | pred_traj_rel = torch.cat(pred_seq_rel_list, dim=0).permute(2, 0, 1) 29 | non_linear_ped = torch.cat(non_linear_ped_list) 30 | loss_mask = torch.cat(loss_mask_list, dim=0) 31 | seq_start_end = torch.LongTensor(seq_start_end) 32 | out = [ 33 | obs_traj, pred_traj, obs_traj_rel, pred_traj_rel, non_linear_ped, 34 | loss_mask, seq_start_end 35 | ] 36 | 37 | return tuple(out) 38 | 39 | 40 | def read_file(_path, delim='\t'): 41 | data = [] 42 | if delim == 'tab': 43 | delim = '\t' 44 | elif delim == 'space': 45 | delim = ' ' 46 | with open(_path, 'r') as f: 47 | for line in f: 48 | line = line.strip().split(delim) 49 | line = [float(i) for i in line] 50 | data.append(line) 51 | return np.asarray(data) 52 | 53 | 54 | def poly_fit(traj, traj_len, threshold): 55 | """ 56 | Input: 57 | - traj: Numpy array of shape (2, traj_len) 58 | - traj_len: Len of trajectory 59 | - threshold: Minimum error to be considered for non linear traj 60 | Output: 61 | - int: 1 -> Non Linear 0-> Linear 62 | """ 63 | t = np.linspace(0, traj_len - 1, traj_len) 64 | res_x = np.polyfit(t, traj[0, -traj_len:], 2, full=True)[1] 65 | res_y = np.polyfit(t, traj[1, -traj_len:], 2, full=True)[1] 66 | if res_x + res_y >= threshold: 67 | return 1.0 68 | else: 69 | return 0.0 70 | 71 | def scene(path): 72 | with open(path,'rb') as open_file: 73 | dat= pickle.load(open_file) 74 | return dat.ravel() 75 | 76 | 77 | class TrajectoryDataset(Dataset): 78 | """Dataloder for the Trajectory datasets""" 79 | def __init__( 80 | self, data_dir, obs_len=8, pred_len=12, skip=1, threshold=0.002, 81 | min_ped=1, delim='\t' 82 | ): 83 | """ 84 | Args: 85 | - data_dir: Directory containing dataset files in the format 86 | 87 | - obs_len: Number of time-steps in input trajectories 88 | - pred_len: Number of time-steps in output trajectories 89 | - skip: Number of frames to skip while making the dataset 90 | - threshold: Minimum error to be considered for non linear traj 91 | when using a linear predictor 92 | - min_ped: Minimum number of pedestrians that should be in a seqeunce 93 | - delim: Delimiter in the dataset files 94 | """ 95 | super(TrajectoryDataset, self).__init__() 96 | 97 | self.data_dir = data_dir 98 | self.obs_len = obs_len 99 | self.pred_len = pred_len 100 | self.skip = skip 101 | self.seq_len = self.obs_len + self.pred_len 102 | self.delim = delim 103 | 104 | all_files = os.listdir(self.data_dir) 105 | all_files = [os.path.join(self.data_dir, _path) for _path in all_files] 106 | scene_path="C://Users//arsal//PycharmProjects//SGAN-AE-master//SGAN-AE-master//scene//out-zara" 107 | num_peds_in_seq = [] 108 | seq_list = [] 109 | seq_list_rel = [] 110 | loss_mask_list = [] 111 | non_linear_ped = [] 112 | indexes=[] 113 | scene_info=[] 114 | scene_info_per_data=[] 115 | print("all_files",all_files) 116 | for path in all_files: 117 | print("current_path",path) 118 | data = read_file(path, delim) 119 | frames = np.unique(data[:, 0]).tolist() 120 | frame_data = [] 121 | for frame in frames: 122 | frame_data.append(data[frame == data[:, 0], :]) 123 | num_sequences = int( 124 | math.ceil((len(frames) - self.seq_len + 1) / skip)) 125 | 126 | for idx in range(0, num_sequences * self.skip + 1, skip): 127 | curr_seq_data = np.concatenate( 128 | frame_data[idx:idx + self.seq_len], axis=0) 129 | peds_in_curr_seq = np.unique(curr_seq_data[:, 1]) 130 | curr_seq_rel = np.zeros((len(peds_in_curr_seq), 2, 131 | self.seq_len)) 132 | curr_seq = np.zeros((len(peds_in_curr_seq), 2, self.seq_len)) 133 | curr_loss_mask = np.zeros((len(peds_in_curr_seq), 134 | self.seq_len)) 135 | num_peds_considered = 0 136 | _non_linear_ped = [] 137 | for _, ped_id in enumerate(peds_in_curr_seq): 138 | # scene_info.append(scene(scene_path)) 139 | 140 | curr_ped_seq = curr_seq_data[curr_seq_data[:, 1] == 141 | ped_id, :] 142 | curr_ped_seq = np.around(curr_ped_seq, decimals=4) 143 | pad_front = frames.index(curr_ped_seq[0, 0]) - idx 144 | pad_end = frames.index(curr_ped_seq[-1, 0]) - idx + 1 145 | if pad_end - pad_front != self.seq_len: 146 | continue 147 | curr_ped_seq = np.transpose(curr_ped_seq[:, 2:]) 148 | curr_ped_seq = curr_ped_seq 149 | # Make coordinates relative 150 | rel_curr_ped_seq = np.zeros(curr_ped_seq.shape) 151 | rel_curr_ped_seq[:, 1:] = \ 152 | curr_ped_seq[:, 1:] - curr_ped_seq[:, :-1] 153 | _idx = num_peds_considered 154 | curr_seq[_idx, :, pad_front:pad_end] = curr_ped_seq 155 | curr_seq_rel[_idx, :, pad_front:pad_end] = rel_curr_ped_seq 156 | # Linear vs Non-Linear Trajectory 157 | _non_linear_ped.append( 158 | poly_fit(curr_ped_seq, pred_len, threshold)) 159 | curr_loss_mask[_idx, pad_front:pad_end] = 1 160 | num_peds_considered += 1 161 | 162 | if num_peds_considered > min_ped: 163 | non_linear_ped += _non_linear_ped 164 | num_peds_in_seq.append(num_peds_considered) 165 | loss_mask_list.append(curr_loss_mask[:num_peds_considered]) 166 | seq_list.append(curr_seq[:num_peds_considered]) 167 | seq_list_rel.append(curr_seq_rel[:num_peds_considered]) 168 | # scene_info.append(scene(scene_path)) 169 | 170 | to_check= np.concatenate(seq_list,axis=0) 171 | # s= np.concatenate(scene_info,axis=0) 172 | # scene_info_per_data.append(s) 173 | indexes.append(len(to_check)) 174 | 175 | self.num_seq = len(seq_list) 176 | seq_list = np.concatenate(seq_list, axis=0) 177 | seq_list_rel = np.concatenate(seq_list_rel, axis=0) 178 | loss_mask_list = np.concatenate(loss_mask_list, axis=0) 179 | non_linear_ped = np.asarray(non_linear_ped) 180 | 181 | # Convert numpy -> Torch Tensor 182 | self.obs_traj = torch.from_numpy( 183 | seq_list[:, :, :self.obs_len]) 184 | self.pred_traj = torch.from_numpy( 185 | seq_list[:, :, self.obs_len:]) 186 | self.obs_traj_rel = torch.from_numpy( 187 | seq_list_rel[:, :, :self.obs_len]) 188 | self.pred_traj_rel = torch.from_numpy( 189 | seq_list_rel[:, :, self.obs_len:]) 190 | self.loss_mask = torch.from_numpy(loss_mask_list) 191 | self.non_linear_ped = torch.from_numpy(non_linear_ped) 192 | cum_start_idx = [0] + np.cumsum(num_peds_in_seq).tolist() 193 | self.seq_start_end = [ 194 | (start, end) 195 | for start, end in zip(cum_start_idx, cum_start_idx[1:]) 196 | ] 197 | 198 | def __len__(self): 199 | return self.num_seq 200 | 201 | def __getitem__(self, index): 202 | start, end = self.seq_start_end[index] 203 | out = [ 204 | self.obs_traj[start:end, :], self.pred_traj[start:end, :], 205 | self.obs_traj_rel[start:end, :], self.pred_traj_rel[start:end, :], 206 | self.non_linear_ped[start:end], self.loss_mask[start:end, :] 207 | ] 208 | return out -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def make_mlp(dim_list, activation='relu', batch_norm=True, dropout=0): 5 | layers = [] 6 | for dim_in, dim_out in zip(dim_list[:-1], dim_list[1:]): 7 | print("dim_in",dim_in) 8 | print("dim_out",dim_out) 9 | layers.append(nn.Linear(dim_in, dim_out)) 10 | if batch_norm: 11 | layers.append(nn.BatchNorm1d(dim_out)) 12 | if activation == 'relu': 13 | layers.append(nn.ReLU()) 14 | elif activation == 'leakyrelu': 15 | layers.append(nn.LeakyReLU()) 16 | if dropout > 0: 17 | layers.append(nn.Dropout(p=dropout)) 18 | return nn.Sequential(*layers) 19 | 20 | 21 | def get_noise(shape, noise_type): 22 | if noise_type == 'gaussian': 23 | return torch.randn(*shape).cuda() 24 | elif noise_type == 'uniform': 25 | return torch.rand(*shape).sub_(0.5).mul_(2.0).cuda() 26 | raise ValueError('Unrecognized noise type "%s"' % noise_type) 27 | 28 | 29 | class Encoder(nn.Module): 30 | """Encoder consists of LSTM network which takes observed motion , 31 | semantic scene information and neighborhood pedestrian information""" 32 | def __init__( 33 | self, embedding_dim=64, h_dim=64, mlp_dim=1024, num_layers=1, 34 | dropout=0.0 35 | ): 36 | super(Encoder, self).__init__() 37 | 38 | self.mlp_dim = 1024 39 | self.h_dim = h_dim 40 | self.embedding_dim = embedding_dim 41 | self.num_layers = num_layers 42 | 43 | self.encoder = nn.LSTM( 44 | embedding_dim, h_dim, num_layers, dropout=dropout 45 | ) 46 | 47 | self.spatial_embedding = nn.Linear(2, embedding_dim) 48 | 49 | def init_hidden(self, batch): 50 | return ( 51 | # torch.zeros(self.num_layers, batch, self.h_dim).cuda(), 52 | torch.zeros(self.num_layers, batch, self.h_dim), 53 | # torch.zeros(self.num_layers, batch, self.h_dim).cuda() 54 | torch.zeros(self.num_layers, batch, self.h_dim) 55 | 56 | ) 57 | 58 | def forward(self, obs_traj): 59 | """ 60 | Inputs: 61 | - obs_traj: Tensor of shape (obs_len, batch, 2) 62 | Output: 63 | - final_h: Tensor of shape (self.num_layers, batch, self.h_dim) 64 | """ 65 | # Encode observed Trajectory 66 | batch = obs_traj.size(1) 67 | # print("obs_tra_size",batch) 68 | obs_traj_embedding = self.spatial_embedding(obs_traj.contiguous().view(-1, 2)) 69 | obs_traj_embedding = obs_traj_embedding.view( 70 | -1, batch, self.embedding_dim 71 | ) 72 | state_tuple = self.init_hidden(batch) 73 | output, state = self.encoder(obs_traj_embedding, state_tuple) 74 | final_h = state[0] 75 | return final_h 76 | 77 | class scene(nn.Module): 78 | def __init__(self, emb_dim,dropout): 79 | 80 | """The scene class takes semantic features 81 | as an input and passes through a fully connected layer """ 82 | 83 | super().__init__() 84 | # self.hid_dim = hid_dim 85 | # self.n_layers= n_layers 86 | self.embedding = nn.Linear(13824, emb_dim) 87 | 88 | # self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout=dropout) 89 | self.dropout = nn.Dropout(dropout) 90 | def forward(self,src): 91 | embedded = self.dropout(self.embedding(src)) 92 | # outputs, (hidden, cell) = self.rnn(embedded) 93 | return embedded 94 | 95 | class Decoder(nn.Module): 96 | """Decoder is part of TrajectoryGenerator""" 97 | def __init__( 98 | self, seq_len, embedding_dim=64, h_dim=128, mlp_dim=1024, num_layers=1, 99 | pool_every_timestep=True, dropout=0.0, bottleneck_dim=1024, 100 | activation='relu', batch_norm=True, pooling_type='pool_net', 101 | neighborhood_size=2.0, grid_size=8,emb_dim= 512 102 | ): 103 | super(Decoder, self).__init__() 104 | 105 | self.seq_len = seq_len 106 | self.mlp_dim = mlp_dim 107 | self.h_dim = h_dim 108 | self.embedding_dim = embedding_dim 109 | self.pool_every_timestep = pool_every_timestep 110 | 111 | self.decoder = nn.LSTM( 112 | embedding_dim, h_dim, num_layers, dropout=dropout 113 | ) 114 | 115 | if pool_every_timestep: 116 | if pooling_type == 'pool_net': 117 | self.pool_net = PoolHiddenNet( 118 | embedding_dim=self.embedding_dim, 119 | h_dim=self.h_dim, 120 | mlp_dim=mlp_dim, 121 | bottleneck_dim=bottleneck_dim, 122 | activation=activation, 123 | batch_norm=batch_norm, 124 | dropout=dropout 125 | ) 126 | elif pooling_type == 'spool': 127 | self.pool_net = SocialPooling( 128 | h_dim=self.h_dim, 129 | activation=activation, 130 | batch_norm=batch_norm, 131 | dropout=dropout, 132 | neighborhood_size=neighborhood_size, 133 | grid_size=grid_size 134 | ) 135 | 136 | 137 | mlp_dims = [1664, mlp_dim, h_dim] 138 | 139 | self.mlp = make_mlp( 140 | mlp_dims, 141 | activation=activation, 142 | batch_norm=batch_norm, 143 | dropout=dropout 144 | ) 145 | 146 | self.spatial_embedding = nn.Linear(2, embedding_dim) 147 | self.hidden2pos = nn.Linear(h_dim, 2) 148 | self.scenes = scene(emb_dim, dropout=0.0) 149 | 150 | def forward(self, last_pos, last_pos_rel, state_tuple, seq_start_end,img): 151 | """ 152 | Inputs: 153 | - last_pos: Tensor of shape (batch, 2) 154 | - last_pos_rel: Tensor of shape (batch, 2) 155 | - state_tuple: (hh, ch) each tensor of shape (num_layers, batch, h_dim) 156 | - seq_start_end: A list of tuples which delimit sequences within batch 157 | Output: 158 | - pred_traj: tensor of shape (self.seq_len, batch, 2) 159 | """ 160 | 161 | img= img.view(-1,13824) 162 | hidden_image= self.scenes(img) 163 | hidden_image= hidden_image.repeat(last_pos_rel.shape[0],1) 164 | batch = last_pos.size(0) 165 | pred_traj_fake_rel = [] 166 | decoder_input = self.spatial_embedding(last_pos_rel.float()).float() 167 | decoder_input = decoder_input.view(1, batch, self.embedding_dim) 168 | 169 | for _ in range(self.seq_len): 170 | output, state_tuple = self.decoder(decoder_input, state_tuple) 171 | 172 | rel_pos = self.hidden2pos(output.view(-1, self.h_dim)) 173 | curr_pos = rel_pos.double()+ last_pos.double() 174 | 175 | if self.pool_every_timestep: 176 | decoder_h = state_tuple[0] 177 | pool_h = self.pool_net(decoder_h, seq_start_end, curr_pos) 178 | decoder_h = torch.cat([decoder_h.view(-1, self.h_dim), pool_h,hidden_image], dim=1) 179 | decoder_h = self.mlp(decoder_h) 180 | decoder_h = torch.unsqueeze(decoder_h, 0) 181 | state_tuple = (decoder_h, state_tuple[1]) 182 | 183 | embedding_input = rel_pos 184 | 185 | decoder_input = self.spatial_embedding(embedding_input) 186 | decoder_input = decoder_input.view(1, batch, self.embedding_dim) 187 | pred_traj_fake_rel.append(rel_pos.view(batch, -1)) 188 | last_pos = curr_pos 189 | 190 | pred_traj_fake_rel = torch.stack(pred_traj_fake_rel, dim=0) 191 | return pred_traj_fake_rel, state_tuple[0] 192 | 193 | 194 | 195 | class PoolHiddenNet(nn.Module): 196 | """Pooling module as proposed in our paper""" 197 | def __init__( 198 | self, embedding_dim=64, h_dim=64, mlp_dim=1024, bottleneck_dim=1024, 199 | activation='relu', batch_norm=True, dropout=0.0 200 | ): 201 | super(PoolHiddenNet, self).__init__() 202 | 203 | self.mlp_dim = 1024 204 | self.h_dim = h_dim 205 | self.bottleneck_dim = bottleneck_dim 206 | self.embedding_dim = embedding_dim 207 | 208 | mlp_pre_dim = embedding_dim + h_dim 209 | mlp_pre_pool_dims = [mlp_pre_dim, 512, bottleneck_dim] 210 | 211 | self.spatial_embedding = nn.Linear(2, embedding_dim) 212 | self.mlp_pre_pool = make_mlp( 213 | mlp_pre_pool_dims, 214 | activation=activation, 215 | batch_norm=batch_norm, 216 | dropout=dropout) 217 | 218 | def repeat(self, tensor, num_reps): 219 | """ 220 | Inputs: 221 | -tensor: 2D tensor of any shape 222 | -num_reps: Number of times to repeat each row 223 | Outpus: 224 | -repeat_tensor: Repeat each row such that: R1, R1, R2, R2 225 | """ 226 | col_len = tensor.size(1) 227 | tensor = tensor.unsqueeze(dim=1).repeat(1, num_reps, 1) 228 | tensor = tensor.view(-1, col_len) 229 | return tensor 230 | 231 | def forward(self, h_states, seq_start_end, end_pos): 232 | """ 233 | Inputs: 234 | - h_states: Tensor of shape (num_layers, batch, h_dim) 235 | - seq_start_end: A list of tuples which delimit sequences within batch 236 | - end_pos: Tensor of shape (batch, 2) 237 | Output: 238 | - pool_h: Tensor of shape (batch, bottleneck_dim) 239 | """ 240 | pool_h = [] 241 | for _, (start, end) in enumerate(seq_start_end): 242 | start = start.item() 243 | end = end.item() 244 | num_ped = end - start 245 | curr_hidden = h_states.view(-1, self.h_dim)[start:end] 246 | curr_end_pos = end_pos[start:end] 247 | # Repeat -> H1, H2, H1, H2 248 | curr_hidden_1 = curr_hidden.repeat(num_ped, 1) 249 | # Repeat position -> P1, P2, P1, P2 250 | curr_end_pos_1 = curr_end_pos.repeat(num_ped, 1) 251 | # Repeat position -> P1, P1, P2, P2 252 | curr_end_pos_2 = self.repeat(curr_end_pos, num_ped) 253 | curr_rel_pos = curr_end_pos_1 - curr_end_pos_2 254 | curr_rel_embedding = self.spatial_embedding(curr_rel_pos.float()).float() 255 | mlp_h_input = torch.cat([curr_rel_embedding, curr_hidden_1], dim=1) 256 | curr_pool_h = self.mlp_pre_pool(mlp_h_input) 257 | curr_pool_h = curr_pool_h.view(num_ped, num_ped, -1).max(1)[0] 258 | pool_h.append(curr_pool_h) 259 | pool_h = torch.cat(pool_h, dim=0) 260 | return pool_h 261 | 262 | 263 | class TrajectoryGenerator_RNN_PSP(nn.Module): 264 | def __init__( 265 | self, obs_len, pred_len, embedding_dim=128, encoder_h_dim=64, 266 | decoder_h_dim=64, mlp_dim=1024, num_layers=1, noise_dim=(0, ), 267 | noise_type='gaussian', noise_mix_type='ped', pooling_type=None, 268 | pool_every_timestep=True, dropout=0.0, bottleneck_dim=1024, 269 | activation='relu', batch_norm=True, neighborhood_size=2.0, grid_size=8,emb_dim= 512 270 | ): 271 | super(TrajectoryGenerator_RNN_PSP, self).__init__() 272 | 273 | if pooling_type and pooling_type.lower() == 'none': 274 | pooling_type = None 275 | 276 | self.obs_len = obs_len 277 | self.pred_len = pred_len 278 | self.mlp_dim = mlp_dim 279 | self.encoder_h_dim = encoder_h_dim 280 | self.decoder_h_dim = decoder_h_dim 281 | self.embedding_dim = embedding_dim 282 | self.noise_dim = noise_dim 283 | self.num_layers = num_layers 284 | self.noise_type = noise_type 285 | self.noise_mix_type = noise_mix_type 286 | self.pooling_type = pooling_type 287 | self.noise_first_dim = 0 288 | self.pool_every_timestep = pool_every_timestep 289 | self.bottleneck_dim = 256 290 | self.encoder = Encoder( 291 | embedding_dim=embedding_dim, 292 | h_dim=encoder_h_dim, 293 | mlp_dim=mlp_dim, 294 | num_layers=num_layers, 295 | dropout=dropout 296 | ) 297 | 298 | self.decoder = Decoder( 299 | pred_len, 300 | embedding_dim=embedding_dim, 301 | h_dim=decoder_h_dim, 302 | mlp_dim=mlp_dim, 303 | num_layers=num_layers, 304 | pool_every_timestep=pool_every_timestep, 305 | dropout=dropout, 306 | bottleneck_dim=bottleneck_dim, 307 | activation=activation, 308 | batch_norm=batch_norm, 309 | pooling_type=pooling_type, 310 | grid_size=grid_size, 311 | neighborhood_size=neighborhood_size 312 | ) 313 | 314 | self.pool_net = PoolHiddenNet( 315 | embedding_dim=self.embedding_dim, 316 | h_dim=encoder_h_dim, 317 | mlp_dim=mlp_dim, 318 | bottleneck_dim=bottleneck_dim, 319 | activation=activation, 320 | batch_norm=batch_norm 321 | ) 322 | 323 | input_dim = encoder_h_dim + bottleneck_dim 324 | 325 | mlp_decoder_context_dims = [ 326 | input_dim, mlp_dim, decoder_h_dim - self.noise_first_dim 327 | ] 328 | 329 | self.mlp_decoder_context = make_mlp( 330 | mlp_decoder_context_dims, 331 | activation=activation, 332 | batch_norm=batch_norm, 333 | dropout=dropout 334 | ) 335 | 336 | 337 | 338 | def forward(self, obs_traj, obs_traj_rel, seq_start_end ,dat,user_noise=None): 339 | """ 340 | Inputs: 341 | - obs_traj: Tensor of shape (obs_len, batch, 2) 342 | - obs_traj_rel: Tensor of shape (obs_len, batch, 2) 343 | - seq_start_end: A list of tuples which delimit sequences within batch. 344 | - user_noise: Generally used for inference when you want to see 345 | relation between different types of noise and outputs. 346 | Output: 347 | - pred_traj_rel: Tensor of shape (self.pred_len, batch, 2) 348 | """ 349 | batch = obs_traj_rel.size(1) 350 | # Encode seq 351 | final_encoder_h = self.encoder(obs_traj_rel.float()).float() 352 | # Pool States 353 | # if self.pooling_type: 354 | end_pos = obs_traj[-1, :, :] 355 | pool_h = self.pool_net(final_encoder_h, seq_start_end, end_pos) 356 | # Construct input hidden states for decoder 357 | 358 | 359 | mlp_decoder_context_input = torch.cat( 360 | [final_encoder_h.view(-1, self.encoder_h_dim), pool_h], dim=1) 361 | 362 | decoder_h1= self.mlp_decoder_context(mlp_decoder_context_input) 363 | """add scene information here""" 364 | 365 | 366 | 367 | decoder_h1= decoder_h1.unsqueeze(0) 368 | 369 | 370 | decoder_h=decoder_h1 371 | 372 | decoder_c = torch.zeros( 373 | 374 | self.num_layers, batch, self.decoder_h_dim 375 | ).cuda() 376 | state_tuple = (decoder_h, decoder_c) 377 | last_pos = obs_traj[-1] 378 | last_pos_rel = obs_traj_rel[-1] 379 | 380 | image=dat 381 | decoder_out = self.decoder( 382 | last_pos, 383 | last_pos_rel, 384 | state_tuple, 385 | seq_start_end, 386 | image 387 | 388 | ) 389 | pred_traj_fake_rel, final_decoder_h = decoder_out 390 | 391 | return pred_traj_fake_rel -------------------------------------------------------------------------------- /vizualization.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import cv2 as cv 5 | import pandas as pd 6 | import pickle 7 | from attrdict import AttrDict 8 | #from cnn.model_cnn import TrajEstimator 9 | #from cnn.model_cnn_moving_threshold import TrajEstimatorThreshold 10 | from vgg_analysis.model import TrajectoryGenerator #as TrajectoryGenerator_sgan 11 | from model_sgan_tokka import TrajectoryGenerator_sgan 12 | from semantic_embedding.model import TrajectoryGenerator_sem 13 | from vgg_analysis.model_rnn import TrajectoryGenerator_R 14 | from vgg_analysis.old_model_segnet_camvid.model_segnet import TrajectoryGenerator_seg_camvid 15 | from vgg_analysis.model_segnet import TrajectoryGenerator_seg #full segnet 16 | 17 | from vgg_analysis.model_gt import TrajectoryGenerator_gt 18 | from utils import relative_to_abs 19 | import torch 20 | import skvideo 21 | skvideo.setFFmpegPath('C:/Users/arsal/Anaconda3/pkgs/ffmpeg-2.7.0-0/Scripts') 22 | import skvideo.io 23 | import cv2 24 | #"C:\Users\arsal\PycharmProjects\SGAN-AE-master\SGAN-AE-master\scene\full-psp\out-hotel" 25 | """RNN-AE-PSP""" 26 | with(open("C://Users//arsal//PycharmProjects//SGAN-AE-master//SGAN-AE-master//scene//full-psp//out-eth", 'rb')) as open_file: 27 | dat_seg =torch.Tensor(pickle.load(open_file)) 28 | 29 | """VGG-16""" 30 | path_traj_vid = "D:/traj_vid" 31 | dat= cv2.imread(path_traj_vid+"/eth/frame0.jpg") 32 | dat=cv2.resize(dat,(224,224)) 33 | dat=torch.tensor(dat) 34 | 35 | """SEG-Net-full""" 36 | #with (open("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-AE-Scene/hotel/hotel.pkl",'rb+')) as ff: (Segnet-camvid) 37 | with (open("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-full-AE-Scene/eth/out-eth",'rb+')) as ff: 38 | dat_segnet_full= torch.Tensor(pickle.load(ff)) 39 | #dat_segnet_full=dat_segnet 40 | print(dat_segnet_full.shape) 41 | 42 | """SegNet CamVid""" 43 | with (open("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-AE-Scene/eth/eth.pkl",'rb+')) as ff: 44 | dat_segnet= torch.Tensor(pickle.load(ff)) 45 | dat_segnet=dat_segnet[0] 46 | print(dat_segnet.shape) 47 | 48 | 49 | 50 | """SGAN""" 51 | def get_generator_sgan(checkpoint): 52 | args = AttrDict(checkpoint['args']) 53 | generator = TrajectoryGenerator_sgan( 54 | obs_len=args.obs_len, 55 | pred_len=args.pred_len, 56 | embedding_dim=args.embedding_dim, 57 | encoder_h_dim=args.encoder_h_dim_g, 58 | decoder_h_dim=args.decoder_h_dim_g, 59 | mlp_dim=args.mlp_dim, 60 | num_layers=args.num_layers, 61 | noise_dim=args.noise_dim, 62 | noise_type=args.noise_type, 63 | noise_mix_type=args.noise_mix_type, 64 | pooling_type=args.pooling_type, 65 | pool_every_timestep=args.pool_every_timestep, 66 | dropout=args.dropout, 67 | bottleneck_dim=args.bottleneck_dim, 68 | neighborhood_size=args.neighborhood_size, 69 | grid_size=args.grid_size, 70 | batch_norm=args.batch_norm) 71 | generator.load_state_dict(checkpoint['g_state']) 72 | # generator.cuda() 73 | generator.eval() 74 | return generator 75 | 76 | """VGG-16""" 77 | def get_generator_vgg(checkpoint): 78 | args = AttrDict(checkpoint['args']) 79 | generator = TrajectoryGenerator( 80 | obs_len=args.obs_len, 81 | pred_len=args.pred_len, 82 | embedding_dim=args.embedding_dim, 83 | encoder_h_dim=args.encoder_h_dim_g, 84 | decoder_h_dim=args.decoder_h_dim_g, 85 | mlp_dim=args.mlp_dim, 86 | num_layers=args.num_layers, 87 | noise_dim=args.noise_dim, 88 | noise_type=args.noise_type, 89 | noise_mix_type=args.noise_mix_type, 90 | pooling_type=args.pooling_type, 91 | pool_every_timestep=args.pool_every_timestep, 92 | dropout=args.dropout, 93 | bottleneck_dim=args.bottleneck_dim, 94 | neighborhood_size=args.neighborhood_size, 95 | grid_size=args.grid_size, 96 | batch_norm=args.batch_norm) 97 | generator.load_state_dict(checkpoint['g_best_state'],strict=False) 98 | # generator.cuda() 99 | generator.eval() 100 | return generator 101 | 102 | """PSP-Net Semantic Segmentation""" 103 | def get_generator_rnn(checkpoint): 104 | args = AttrDict(checkpoint['args']) 105 | print(args) 106 | generator = TrajectoryGenerator_R( 107 | obs_len=args.obs_len, 108 | pred_len=args.pred_len, 109 | embedding_dim=args.embedding_dim, 110 | encoder_h_dim=args.encoder_h_dim_g, 111 | decoder_h_dim=args.decoder_h_dim_g, 112 | mlp_dim=args.mlp_dim, 113 | num_layers=args.num_layers, 114 | noise_dim=args.noise_dim, 115 | noise_type=args.noise_type, 116 | noise_mix_type=args.noise_mix_type, 117 | pooling_type=args.pooling_type, 118 | pool_every_timestep=args.pool_every_timestep, 119 | dropout=args.dropout, 120 | bottleneck_dim=args.bottleneck_dim, 121 | neighborhood_size=args.neighborhood_size, 122 | grid_size=args.grid_size, 123 | batch_norm=args.batch_norm) 124 | generator.load_state_dict(checkpoint['g_best_state']) 125 | # generator.cuda() 126 | generator.eval() 127 | return generator 128 | 129 | """SegNet-full (this was previously segnet on pretrained camvid)""" 130 | 131 | def get_generator_seg(checkpoint): 132 | args = AttrDict(checkpoint['args']) 133 | generator = TrajectoryGenerator_seg( 134 | obs_len=args.obs_len, 135 | pred_len=args.pred_len, 136 | embedding_dim=args.embedding_dim, 137 | encoder_h_dim=args.encoder_h_dim_g, 138 | decoder_h_dim=args.decoder_h_dim_g, 139 | mlp_dim=args.mlp_dim, 140 | num_layers=args.num_layers, 141 | noise_dim=args.noise_dim, 142 | noise_type=args.noise_type, 143 | noise_mix_type=args.noise_mix_type, 144 | pooling_type=args.pooling_type, 145 | pool_every_timestep=args.pool_every_timestep, 146 | dropout=args.dropout, 147 | bottleneck_dim=args.bottleneck_dim, 148 | neighborhood_size=args.neighborhood_size, 149 | grid_size=args.grid_size, 150 | batch_norm=args.batch_norm) 151 | generator.load_state_dict(checkpoint['g_best_state'],strict=False) 152 | # generator.cuda() 153 | generator.eval() 154 | return generator 155 | 156 | def get_generator_seg_camvid(checkpoint): 157 | args = AttrDict(checkpoint['args']) 158 | generator = TrajectoryGenerator_seg_camvid( 159 | obs_len=args.obs_len, 160 | pred_len=args.pred_len, 161 | embedding_dim=args.embedding_dim, 162 | encoder_h_dim=args.encoder_h_dim_g, 163 | decoder_h_dim=args.decoder_h_dim_g, 164 | mlp_dim=args.mlp_dim, 165 | num_layers=args.num_layers, 166 | noise_dim=args.noise_dim, 167 | noise_type=args.noise_type, 168 | noise_mix_type=args.noise_mix_type, 169 | pooling_type=args.pooling_type, 170 | pool_every_timestep=args.pool_every_timestep, 171 | dropout=args.dropout, 172 | bottleneck_dim=args.bottleneck_dim, 173 | neighborhood_size=args.neighborhood_size, 174 | grid_size=args.grid_size, 175 | batch_norm=args.batch_norm) 176 | generator.load_state_dict(checkpoint['g_best_state'],strict=False) 177 | # generator.cuda() 178 | generator.eval() 179 | return generator 180 | 181 | 182 | 183 | def get_generator_gt(checkpoint): 184 | args = AttrDict(checkpoint['args']) 185 | generator = TrajectoryGenerator_gt( 186 | obs_len=args.obs_len, 187 | pred_len=args.pred_len, 188 | embedding_dim=args.embedding_dim, 189 | encoder_h_dim=args.encoder_h_dim_g, 190 | decoder_h_dim=args.decoder_h_dim_g, 191 | mlp_dim=args.mlp_dim, 192 | num_layers=args.num_layers, 193 | noise_dim=args.noise_dim, 194 | noise_type=args.noise_type, 195 | noise_mix_type=args.noise_mix_type, 196 | pooling_type=args.pooling_type, 197 | pool_every_timestep=args.pool_every_timestep, 198 | dropout=args.dropout, 199 | bottleneck_dim=args.bottleneck_dim, 200 | neighborhood_size=args.neighborhood_size, 201 | grid_size=args.grid_size, 202 | batch_norm=args.batch_norm) 203 | generator.load_state_dict(checkpoint['g_state']) 204 | # generator.cuda() 205 | generator.eval() 206 | return generator 207 | 208 | 209 | """Semantic Embedding""" 210 | def get_generator_sem(checkpoint): 211 | args = AttrDict(checkpoint['args']) 212 | generator = TrajectoryGenerator_sem( 213 | obs_len=args.obs_len, 214 | pred_len=args.pred_len, 215 | embedding_dim=args.embedding_dim, 216 | encoder_h_dim=args.encoder_h_dim_g, 217 | decoder_h_dim=args.decoder_h_dim_g, 218 | mlp_dim=args.mlp_dim, 219 | num_layers=args.num_layers, 220 | noise_dim=args.noise_dim, 221 | noise_type=args.noise_type, 222 | noise_mix_type=args.noise_mix_type, 223 | pooling_type=args.pooling_type, 224 | pool_every_timestep=args.pool_every_timestep, 225 | dropout=args.dropout, 226 | bottleneck_dim=args.bottleneck_dim, 227 | neighborhood_size=args.neighborhood_size, 228 | grid_size=args.grid_size, 229 | batch_norm=args.batch_norm) 230 | generator.load_state_dict(checkpoint['g_state']) 231 | # generator.cuda() 232 | generator.eval() 233 | return generator 234 | 235 | def world_to_img(world_coordinates, hom_matrix): 236 | scaled_trajs = [] 237 | 238 | inv_matrix = np.linalg.inv(hom_matrix) 239 | 240 | # if several sequences 241 | if len(world_coordinates.shape) > 2: 242 | # easier to iterate over them 243 | world_coordinates = np.swapaxes(world_coordinates, 0, 1) 244 | 245 | for traj in world_coordinates: 246 | ones = np.ones((len(traj), 1)) 247 | P = np.hstack((traj, ones)) 248 | R = np.dot(inv_matrix, P.transpose()).transpose() 249 | y = (R[:, 0]/R[:, 2]).reshape(-1, 1) 250 | x = (R[:, 1]/R[:, 2]).reshape(-1, 1) 251 | scaled_trajs.append(np.hstack((x, y))) 252 | else: 253 | ones = np.ones((len(world_coordinates), 1)) 254 | P = np.hstack((world_coordinates, ones)) 255 | R = np.dot(inv_matrix, P.transpose()) 256 | y = (R[0, :]/R[2, :]).reshape(-1, 1) 257 | x = (R[1, :]/R[2, :]).reshape(-1, 1) 258 | scaled_trajs.append(np.hstack((x, y))) 259 | return scaled_trajs 260 | 261 | 262 | def img_to_world(input, matrix): 263 | return world_to_img(input, np.linalg.inv(matrix)) 264 | 265 | def get_frame(video_path, frame): 266 | cap = cv.VideoCapture(video_path) 267 | cap.set(cv.CAP_PROP_POS_FRAMES, frame) 268 | _, img = cap.read() 269 | return img 270 | 271 | def print_to_img(trajs, video_path, matrix_path, frame): 272 | img = get_frame(video_path, frame) 273 | if trajs is not None: 274 | matrix = np.loadtxt(matrix_path, dtype=float) 275 | heigth, width, _ = img.shape 276 | 277 | scaled_trajs = {} 278 | for ped_id, ped in trajs.items(): 279 | scaled_trajs[ped_id] = {} 280 | for traj_name, traj in ped.items(): 281 | scaled_traj = [] 282 | if traj.size != 0: 283 | scaled_traj = world_to_img(traj, matrix)[0] 284 | scaled_trajs[ped_id][traj_name] = scaled_traj 285 | 286 | for ped_id, ped in scaled_trajs.items(): 287 | for ped_seq_name, ped_sequence in ped.items(): 288 | print(ped_seq_name) 289 | color = color_dict[ped_seq_name] 290 | if len(ped_sequence) > 0: 291 | #draw pred_gt thicker if we can compute ade/fde on it 292 | thick = 2 if ped_seq_name == "pred_gt" and len(ped_sequence) == 12 else 2 293 | 294 | for index, point in enumerate(ped_sequence[:-1, :]): 295 | real_pt_1 = tuple([int(round(x)) for x in point]) 296 | real_pt_2 = tuple([int(round(x)) for x in ped_sequence[index + 1]]) 297 | cv.line(img, real_pt_1, real_pt_2, color, thick) 298 | return img 299 | 300 | def get_trajs(frame, step=10): 301 | ''' 302 | :param frame: last observed frame 303 | :param step: step between each frame 304 | :returns None if no prediction can be made, or trajs_, a dictionary containing trajectories for each pedestrian 305 | ''' 306 | 307 | trajs_ = {} 308 | 309 | # -1 because we include in selection 310 | seq_range = [frame - (obs_len - 1) * step, frame + pred_len * step] 311 | print("seq_range",seq_range) 312 | obs_range = [frame - (obs_len - 1) * step, frame] 313 | print("obs_range",obs_range) 314 | raw_obs_seq = data.loc[data["frameID"].between(obs_range[0], obs_range[1], inclusive=True)] 315 | raw_pred_seq = data.loc[data["frameID"].between(obs_range[1] + step, seq_range[1], inclusive=True)] 316 | peds_in_seq = raw_obs_seq.pedID.unique() 317 | 318 | curr_seq = np.zeros((len(peds_in_seq), 2, obs_len)) 319 | curr_seq_rel = np.zeros((len(peds_in_seq), 2, obs_len)) 320 | id_list = [] 321 | considered_ped = 0 322 | 323 | for ped_id in peds_in_seq: 324 | obs_ped_seq = raw_obs_seq.loc[raw_obs_seq.pedID == ped_id] 325 | # seq has to have at least obs_len length 326 | if len(obs_ped_seq.frameID) == obs_len: 327 | id_list.append(ped_id) 328 | 329 | pred_ped_seq = raw_pred_seq.loc[raw_pred_seq.pedID == ped_id] 330 | trajs_[ped_id] = {} 331 | 332 | obs_traj = obs_ped_seq[["x", "y"]].values.transpose() 333 | obs_traj_rel = np.zeros(obs_traj.shape) 334 | obs_traj_rel[:, 1:] = obs_traj[:, 1:] - obs_traj[:, :-1] 335 | 336 | curr_seq[considered_ped, :, 0:obs_len] = obs_traj 337 | curr_seq_rel[considered_ped, :, 0:obs_len] = obs_traj_rel 338 | 339 | trajs_[ped_id]["obs"] = obs_traj.transpose() 340 | trajs_[ped_id]["pred_gt"] = pred_ped_seq[["x", "y"]].values 341 | 342 | considered_ped += 1 343 | print(considered_ped) 344 | if considered_ped > 0: 345 | obs_list_tensor = torch.from_numpy(curr_seq[:considered_ped, :]).permute(2, 0, 1).float()#.cuda().float() 346 | obs_list_rel_tensor = torch.from_numpy(curr_seq_rel[:considered_ped, :]).permute(2, 0, 1).float()#.cuda().float() 347 | seq_start_end_tensor = torch.tensor([[0, considered_ped]]) 348 | 349 | for model_name, model in models.items(): 350 | if model_name=='rnn': 351 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor,dat_seg) 352 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 353 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 354 | key = "pred_" + model_name 355 | for i in range(considered_ped): 356 | ped_id = id_list[i] 357 | trajs_[ped_id][key] = pred_abs_reorder[i] 358 | 359 | if model_name=='vgg': 360 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor,dat) 361 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 362 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 363 | key = "pred_" + model_name 364 | for k in range(considered_ped): 365 | ped_id = id_list[k] 366 | trajs_[ped_id][key] = pred_abs_reorder[k] 367 | if model_name == 'sgan': 368 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor) 369 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 370 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 371 | key = "pred_" + model_name 372 | for k in range(considered_ped): 373 | ped_id = id_list[k] 374 | trajs_[ped_id][key] = pred_abs_reorder[k] 375 | if model_name == 'segnet_full': 376 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor,dat_segnet_full) 377 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 378 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 379 | key = "pred_" + model_name 380 | for k in range(considered_ped): 381 | ped_id = id_list[k] 382 | trajs_[ped_id][key] = pred_abs_reorder[k] 383 | 384 | if model_name == 'segnet': 385 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor, dat_segnet) 386 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 387 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 388 | key = "pred_" + model_name 389 | for k in range(considered_ped): 390 | ped_id = id_list[k] 391 | trajs_[ped_id][key] = pred_abs_reorder[k] 392 | 393 | return trajs_ 394 | 395 | else: 396 | return None 397 | 398 | 399 | def get_paths(dset_): 400 | paths_ = {} 401 | 402 | if dset_.split("/")[0] == "split_moving": 403 | dset = dset_.split("/")[1] 404 | model_path_us = os.path.join("scripts/save/", (dset_ + "_50epoch_with_model.pt")) 405 | model_path_sgan = "models/sgan-p-models/" + dset + "_12_model.pt" 406 | # model_path_sgan = "C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/pretrained_sgan_models/hotel_12_model.pt" 407 | 408 | if model_path_sgan.split("/")[1] == "sgan-p-models": 409 | out_vid_path = "visualization/" + dset + "_" + dset_.split("/")[-1] + "_sgan-p.mp4" 410 | else: 411 | out_vid_path = "visualization/" + dset + dset_.split("/")[-1] + ".mp4" 412 | 413 | test_dataset_path = os.listdir("datasets/split_moving/" + dset +"/" + dset_.split("/")[-1] + "/test") 414 | if len(test_dataset_path) > 1: 415 | print("Several test datasets found : {}".format(test_dataset_path)) 416 | while True: 417 | to_keep = input("Enter the name of the dataset you want to use :") 418 | if to_keep in test_dataset_path: 419 | test_dataset_path = "datasets/" + dset + "/test/" + to_keep 420 | break 421 | else: 422 | test_dataset_path = "datasets/split_moving/" + dset +"/" + dset_.split("/")[-1] + "/test/" + test_dataset_path[0] 423 | 424 | 425 | else: 426 | dset = dset_ 427 | 428 | 429 | model_path_vgg="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/Vgg-AE-Scene/eth/checkpoint_with_model.pt" #(use this) 430 | 431 | model_path_rnn="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/RNN-AE-Scene/eth/200_epochs/checkpoint_with_model.pt" 432 | 433 | model_path_sgan= "C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/pretrained_sgan_models/eth_12_model.pt" 434 | 435 | model_path_segnet="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-AE-Scene/eth/checkpoint_with_model.pt" #(pre-trained CAMVID) 436 | 437 | model_path_segnet_full="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-full-AE-Scene/eth/checkpoint_with_model.pt" 438 | 439 | model_path_semantic="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/semantic_embedding/pre-trained_models/eth/checkpoint_with_model.pt" 440 | 441 | 442 | print("arsal") 443 | if model_path_sgan.split("/")[1] == "sgan-p-models": 444 | out_vid_path = "visualization/" + dset + "_sgan-p.mp4" 445 | else: 446 | out_vid_path = "visualization/" + dset + ".mp4" 447 | 448 | dset='eth' 449 | test_dataset_path = "C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/datasets/" + dset + "/test" +"/biwi_eth.txt" 450 | 451 | print(test_dataset_path) 452 | 453 | scenes_and_mat_path="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/scenes_and_matrices/" 454 | mat_path = scenes_and_mat_path + dset + ".txt" 455 | vid_path = scenes_and_mat_path + dset + ".avi" 456 | 457 | paths_["vid"] = vid_path 458 | paths_["mat"] = mat_path 459 | paths_["model_vgg"] = model_path_vgg 460 | paths_["model_sgan"] = model_path_sgan 461 | paths_["model_rnn"]=model_path_rnn 462 | paths_["model_segnet"]=model_path_segnet 463 | paths_["model_segnet_full"]=model_path_segnet_full 464 | 465 | # paths_["model_gt"]=model_path_gt 466 | paths_["model_semantic"]= model_path_semantic 467 | 468 | 469 | paths_["test_dataset"] = test_dataset_path 470 | for key, item in paths_.items(): 471 | if not os.path.exists(item): 472 | print("File not found : {}".format(item)) 473 | sys.exit(0) 474 | #this file is created, not required 475 | paths_["out_vid"] = out_vid_path 476 | return paths_ 477 | 478 | if __name__ == "__main__": 479 | #paths are relative from sgan dir 480 | os.chdir("../../") 481 | dataset = "eth" 482 | obs_len = 8 483 | pred_len = 12 484 | # color_dict = {"obs": (0, 0, 0), "pred_cnn": (250, 250, 0), "pred_cnn_threshold": (250, 250, 250), "pred_gt": (0, 250, 0), "pred_sgan": (0,0,250)} 485 | """For segnet with pre-trained on cam-vid dataset""" 486 | # color_dict = {"obs": (0, 0, 0), "pred_sgan": (250, 250, 0), "pred_gt": (0, 250, 0), "pred_rnn": (0,0,250),"pred_vgg":(255,0,255),"pred_segnet":(240,37,15),"pred_gt_seg":(28,163,170), 487 | """with full-segnet trained on ped dataset""" 488 | # color_dict = {"obs": (255, 127, 39), "pred_sgan": (250, 250, 0), "pred_gt": (0, 250, 0), "pred_rnn": (0,0,250),"pred_vgg":(255,0,255),"pred_segnet_full":(240,37,15),"pred_gt_seg":(28,163,170), 489 | 490 | # "pred_semantic": (0,250,250)} 491 | color_dict = {"obs": (255, 127, 39), "pred_sgan": (250, 250, 0), "pred_gt": (0, 250, 0), "pred_rnn": (0, 0, 250), 492 | "pred_segnet": (240, 37, 15),"pred_segnet_full": (64, 128, 128), "pred_vgg":(255,0,255)} 493 | # 494 | # "pred_semantic": (0, 250, 250)} 495 | 496 | """FOR Semantic embedding and PSP-Semantic Segmentation""" 497 | # color_dict = {"obs": (0, 0, 0), "pred_gt": (0, 250, 0), "pred_rnn": (0, 0, 250), 498 | # "pred_semantic": (0, 250, 250)} 499 | 500 | paths = get_paths(dataset) 501 | print("-------------------------------------------------------------------here is",paths) 502 | print("Paths :") 503 | for key in sorted(paths.keys()): 504 | print("/t{}: {}".format(key, paths[key])) 505 | # "C:\Users\arsal\PycharmProjects\SGAN-AE-master\SGAN-AE-master\trained_models\Tokka\RNN-AE-Scene\ETH\200_epochs\checkpoint_with_model.pt" 506 | print("Loading models.") 507 | models = {} 508 | 509 | checkpoint_vgg = torch.load(paths["model_vgg"],map_location='cpu') 510 | print(checkpoint_vgg) 511 | models['vgg'] = get_generator_vgg(checkpoint_vgg) 512 | # models["cnn_threshold"] = get_generator_cnn_threshold(checkpoint_cnn) 513 | checkpoint_sgan = torch.load(paths["model_sgan"],map_location='cpu') 514 | models['sgan'] = get_generator_sgan(checkpoint_sgan) 515 | 516 | checkpoint_rnn=torch.load(paths["model_rnn"],map_location='cpu') 517 | models['rnn']=get_generator_rnn(checkpoint_rnn) 518 | 519 | checkpoint_segnet = torch.load(paths["model_segnet"], map_location='cpu') 520 | print("ISVC") 521 | print(checkpoint_segnet) 522 | models['segnet'] = get_generator_seg_camvid(checkpoint_segnet) 523 | 524 | checkpoint_segnet_full = torch.load(paths["model_segnet_full"], map_location='cpu') 525 | print("ISVC") 526 | print(checkpoint_segnet) 527 | models['segnet_full'] = get_generator_seg(checkpoint_segnet_full) 528 | 529 | checkpoint_semantic = torch.load(paths["model_semantic"], map_location='cpu') 530 | models["semantic"] = get_generator_sem(checkpoint_semantic) 531 | 532 | print("loaded_models",models) 533 | print("Loading data.") 534 | data = pd.read_csv(paths["test_dataset"], sep="\t", header=None) 535 | # print(data) 536 | data.columns = ["frameID", "pedID", "x", "y"] 537 | data.sort_values(by=["frameID", "pedID"]) 538 | data.reset_index(drop=True) 539 | writer = skvideo.io.FFmpegWriter(paths["out_vid"]) 540 | 541 | frameList = data.frameID.unique() 542 | max = frameList[-1] 543 | #step every ten frame for watchable video 544 | for frame_number in range(0,max,10): 545 | if frame_number%1000 == 0: 546 | print("Frame {}/{}".format(frame_number, max)) 547 | 548 | trajs = None 549 | if frame_number in frameList: 550 | trajs = get_trajs(frame_number) 551 | img = print_to_img(trajs, paths["vid"], paths["mat"], frame_number) 552 | writer.writeFrame(img) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gc 3 | import logging 4 | import os 5 | import sys 6 | import time 7 | 8 | from collections import defaultdict 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.optim as optim 13 | 14 | from loader import data_loader 15 | from losses import gan_g_loss, gan_d_loss, l2_loss 16 | from losses import displacement_error, final_displacement_error 17 | 18 | # from model_no_scene import TrajectoryGenerator 19 | from vgg_analysis.model import TrajectoryGenerator 20 | from utils import int_tuple, bool_flag, get_total_norm 21 | from utils import relative_to_abs, get_dset_path 22 | from torch.autograd import Variable 23 | import cv2 24 | import pickle 25 | torch.backends.cudnn.benchmark = True 26 | 27 | parser = argparse.ArgumentParser() 28 | FORMAT = '[%(levelname)s: %(filename)s: %(lineno)4d]: %(message)s' 29 | logging.basicConfig(level=logging.INFO, format=FORMAT, stream=sys.stdout) 30 | logger = logging.getLogger(__name__) 31 | 32 | # Dataset options 33 | parser.add_argument('--dataset_name', default='zara1', type=str) 34 | parser.add_argument('--delim', default='\t') 35 | parser.add_argument('--loader_num_workers', default=0, type=int) 36 | parser.add_argument('--obs_len', default=8, type=int) 37 | parser.add_argument('--pred_len', default=12, type=int) 38 | parser.add_argument('--skip', default=1, type=int) 39 | 40 | # Optimization 41 | parser.add_argument('--batch_size', default=32, type=int) 42 | parser.add_argument('--num_iterations', default=10000, type=int) 43 | parser.add_argument('--num_epochs', default=1, type=int) 44 | 45 | # Model Options 46 | parser.add_argument('--embedding_dim', default=64, type=int) 47 | parser.add_argument('--num_layers', default=1, type=int) 48 | parser.add_argument('--dropout', default=0, type=float) 49 | parser.add_argument('--batch_norm', default=0, type=bool_flag) 50 | parser.add_argument('--mlp_dim', default=1024, type=int) 51 | 52 | # Generator Options 53 | parser.add_argument('--encoder_h_dim_g', default=64, type=int) 54 | parser.add_argument('--decoder_h_dim_g', default=128, type=int) 55 | parser.add_argument('--noise_dim', default=None, type=int_tuple) 56 | parser.add_argument('--noise_type', default='gaussian') 57 | parser.add_argument('--noise_mix_type', default='ped') 58 | parser.add_argument('--clipping_threshold_g', default=0, type=float) 59 | parser.add_argument('--g_learning_rate', default=5e-4, type=float) 60 | parser.add_argument('--g_steps', default=1, type=int) 61 | 62 | # Pooling Options 63 | parser.add_argument('--pooling_type', default='pool_net') 64 | parser.add_argument('--pool_every_timestep', default=1, type=bool_flag) 65 | 66 | # Pool Net Option 67 | parser.add_argument('--bottleneck_dim', default=1024, type=int) 68 | 69 | # Social Pooling Options 70 | parser.add_argument('--neighborhood_size', default=2.0, type=float) 71 | parser.add_argument('--grid_size', default=8, type=int) 72 | 73 | # Discriminator Options 74 | parser.add_argument('--d_type', default='local', type=str) 75 | parser.add_argument('--encoder_h_dim_d', default=64, type=int) 76 | parser.add_argument('--d_learning_rate', default=5e-4, type=float) 77 | parser.add_argument('--d_steps', default=2, type=int) 78 | parser.add_argument('--clipping_threshold_d', default=0, type=float) 79 | 80 | # Loss Options 81 | parser.add_argument('--l2_loss_weight', default=1, type=float) 82 | parser.add_argument('--best_k', default=1, type=int) 83 | 84 | # Output 85 | parser.add_argument('--output_dir', default=os.getcwd()) 86 | parser.add_argument('--print_every', default=5, type=int) 87 | parser.add_argument('--checkpoint_every', default=100, type=int) 88 | parser.add_argument('--checkpoint_name', default='checkpoint') 89 | parser.add_argument('--checkpoint_start_from', default=None) 90 | parser.add_argument('--restore_from_checkpoint', default=1, type=int) 91 | parser.add_argument('--num_samples_check', default=5000, type=int) 92 | 93 | # Misc 94 | parser.add_argument('--use_gpu', default=0, type=int) 95 | parser.add_argument('--timing', default=0, type=int) 96 | parser.add_argument('--gpu_num', default="0", type=str) 97 | 98 | 99 | def init_weights(m): 100 | classname = m.__class__.__name__ 101 | if classname.find('Linear') != -1: 102 | nn.init.kaiming_normal_(m.weight) 103 | 104 | 105 | def get_dtypes(args): 106 | long_dtype = torch.LongTensor 107 | float_dtype = torch.FloatTensor 108 | if args.use_gpu == 1: 109 | long_dtype = torch.cuda.LongTensor 110 | float_dtype = torch.cuda.FloatTensor 111 | return long_dtype, float_dtype 112 | 113 | 114 | with(open("C://Users//arsal//PycharmProjects//SGAN-AE-master//SGAN-AE-master//scene//out-hotel", 'rb')) as open_file: 115 | hotel =torch.Tensor(pickle.load(open_file)) 116 | 117 | 118 | with(open("C://Users//arsal//PycharmProjects//SGAN-AE-master//SGAN-AE-master//scene//out-zara", 'rb')) as open_file: 119 | zara = torch.Tensor(pickle.load(open_file)) 120 | 121 | with(open("C://Users//arsal//PycharmProjects//SGAN-AE-master//SGAN-AE-master//scene//out-eth", 'rb')) as open_file: 122 | eth = torch.Tensor(pickle.load(open_file)) 123 | 124 | with(open("C://Users//arsal//PycharmProjects//SGAN-AE-master//SGAN-AE-master//scene//out-univ", 'rb')) as open_file: 125 | univ = torch.Tensor(pickle.load(open_file)) 126 | 127 | 128 | 129 | index_train= [101, 859, 5262, 6908, 18599, 27587, 28010] #----->zara1 in train mode 130 | #eth, hotel, zara2,zara3,univ,univ,univ 131 | 132 | index_val= [80, 373, 1629, 2335, 4222, 5056, 5118] #-------> zara1 in val mode 133 | 134 | path = "D:/traj_vid/" 135 | 136 | zara=cv2.imread(path +"zara1/frame0.jpg") 137 | zara=cv2.resize(zara,(224,224)) 138 | 139 | univ=cv2.imread(path +"univ/frame0.jpg") 140 | univ=cv2.resize(univ,(224,224)) 141 | 142 | hotel=cv2.imread(path+"hotel/frame0.jpg") 143 | hotel=cv2.resize(hotel,(224,224)) 144 | 145 | eth=cv2.imread(path+"eth/frame0.jpg") 146 | eth=cv2.resize(eth,(224,224)) 147 | 148 | 149 | 150 | 151 | 152 | def main(args): 153 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num 154 | train_path = get_dset_path(args.dataset_name, 'train') 155 | 156 | id="C:\\Users\\arsal\\PycharmProjects\\SGAN-AE-master\\SGAN-AE-master\\" 157 | train_path=id+train_path 158 | print(train_path) 159 | val_path = get_dset_path(args.dataset_name, 'val') 160 | val_path=id+val_path 161 | long_dtype, float_dtype = get_dtypes(args) 162 | 163 | logger.info("Initializing train dataset") 164 | train_dset, train_loader = data_loader(args,train_path) 165 | logger.info("Initializing val dataset") 166 | # _, val_loader = data_loader(args, val_path) 167 | _, val_loader = data_loader(args,val_path) 168 | 169 | iterations_per_epoch = len(train_dset) / args.batch_size / args.d_steps 170 | if args.num_epochs: 171 | args.num_iterations = int(iterations_per_epoch * args.num_epochs) 172 | 173 | logger.info( 174 | 'There are {} iterations per epoch'.format(iterations_per_epoch) 175 | ) 176 | 177 | generator = TrajectoryGenerator( 178 | obs_len=args.obs_len, 179 | pred_len=args.pred_len, 180 | embedding_dim=args.embedding_dim, 181 | encoder_h_dim=args.encoder_h_dim_g, 182 | decoder_h_dim=args.decoder_h_dim_g, 183 | mlp_dim=args.mlp_dim, 184 | num_layers=args.num_layers, 185 | noise_dim=args.noise_dim, 186 | noise_type=args.noise_type, 187 | noise_mix_type=args.noise_mix_type, 188 | pooling_type=args.pooling_type, 189 | pool_every_timestep=args.pool_every_timestep, 190 | dropout=args.dropout, 191 | bottleneck_dim=args.bottleneck_dim, 192 | neighborhood_size=args.neighborhood_size, 193 | grid_size=args.grid_size, 194 | batch_norm=args.batch_norm) 195 | 196 | generator.apply(init_weights) 197 | generator.type(float_dtype).train() 198 | logger.info('Here is the generator:') 199 | logger.info(generator) 200 | 201 | # discriminator = TrajectoryDiscriminator( 202 | # obs_len=args.obs_len, 203 | # pred_len=args.pred_len, 204 | # embedding_dim=args.embedding_dim, 205 | # h_dim=args.encoder_h_dim_d, 206 | # mlp_dim=args.mlp_dim, 207 | # num_layers=args.num_layers, 208 | # dropout=args.dropout, 209 | # batch_norm=args.batch_norm, 210 | # d_type=args.d_type) 211 | 212 | # discriminator.apply(init_weights) 213 | # discriminator.type(float_dtype).train() 214 | logger.info('Here is the discriminator:') 215 | # logger.info(discriminator) 216 | 217 | g_loss_fn = gan_g_loss 218 | d_loss_fn = gan_d_loss 219 | 220 | optimizer_g = optim.Adam(generator.parameters(), lr=args.g_learning_rate) 221 | # optimizer_d = optim.Adam( 222 | # discriminator.parameters(), lr=args.d_learning_rate 223 | # ) 224 | 225 | # Maybe restore from checkpoint 226 | restore_path = None 227 | if args.checkpoint_start_from is not None: 228 | restore_path = args.checkpoint_start_from 229 | elif args.restore_from_checkpoint == 1: 230 | restore_path = os.path.join(args.output_dir, 231 | '%s_with_model.pt' % args.checkpoint_name) 232 | 233 | if restore_path is None and os.path.isfile(restore_path): 234 | logger.info('Restoring from checkpoint {}'.format(restore_path)) 235 | checkpoint = torch.load(restore_path) 236 | generator.load_state_dict(checkpoint['g_state']) 237 | # discriminator.load_state_dict(checkpoint['d_state']) 238 | optimizer_g.load_state_dict(checkpoint['g_optim_state']) 239 | # optimizer_d.load_state_dict(checkpoint['d_optim_state']) 240 | t = checkpoint['counters']['t'] 241 | epoch = checkpoint['counters']['epoch'] 242 | checkpoint['restore_ts'].append(t) 243 | else: 244 | # Starting from scratch, so initialize checkpoint data structure 245 | t, epoch = 0, 0 246 | checkpoint = { 247 | 'args': args.__dict__, 248 | 'G_losses': defaultdict(list), 249 | 'D_losses': defaultdict(list), 250 | 'losses_ts': [], 251 | 'metrics_val': defaultdict(list), 252 | 'metrics_train': defaultdict(list), 253 | 'sample_ts': [], 254 | 'restore_ts': [], 255 | 'norm_g': [], 256 | 'norm_d': [], 257 | 'counters': { 258 | 't': None, 259 | 'epoch': None, 260 | }, 261 | 'g_state': None, 262 | 'g_optim_state': None, 263 | 'd_state': None, 264 | 'd_optim_state': None, 265 | 'g_best_state': None, 266 | 'd_best_state': None, 267 | 'best_t': None, 268 | 'g_best_nl_state': None, 269 | 'd_best_state_nl': None, 270 | 'best_t_nl': None, 271 | } 272 | t0 = None 273 | a=0 274 | while t < args.num_iterations: 275 | gc.collect() 276 | d_steps_left = args.d_steps 277 | g_steps_left = args.g_steps 278 | epoch += 1 279 | logger.info('Starting epoch {}'.format(epoch)) 280 | 281 | for batch in train_loader: 282 | # if args.timing == 1: 283 | # torch.cuda.synchronize() 284 | # t1 = time.time() 285 | obs_traj1,_,_,_,_,_,_= batch 286 | a=a+len(obs_traj1[0]) 287 | print("track_traj--->", a) 288 | # Decide whether to use the batch for stepping on discriminator or 289 | # generator; an iteration consists of args.d_steps steps on the 290 | # discriminator followed by args.g_steps steps on the generator. 291 | # if d_steps_left > 0: 292 | # step_type = 'd' 293 | # losses_d = discriminator_step(args, batch, generator, 294 | # discriminator, d_loss_fn, 295 | # optimizer_d) 296 | # checkpoint['norm_d'].append( 297 | # get_total_norm(discriminator.parameters())) 298 | # d_steps_left -= 1 299 | # elif g_steps_left > 0: 300 | if a<= index_train[0]: 301 | step_type = 'g' 302 | print("-----------------------------------------------TRAINING ETH SCENE---------------------------------------------") 303 | 304 | losses_g = generator_step(args, batch, generator,optimizer_g,eth) 305 | checkpoint['norm_g'].append(get_total_norm(generator.parameters())) 306 | g_steps_left -= 1 307 | if a>index_train[0] and a<=index_train[1]: 308 | print("-----------------------------------------------TRAINING HOTEL SCENE---------------------------------------------") 309 | 310 | losses_g = generator_step(args, batch, generator, optimizer_g, hotel) 311 | checkpoint['norm_g'].append(get_total_norm(generator.parameters())) 312 | g_steps_left -= 1 313 | if a > index_train[1] and a <= index_train[3]: 314 | print("-----------------------------------------------TRAINING ZARA SCENE---------------------------------------------") 315 | 316 | losses_g = generator_step(args, batch, generator, optimizer_g, zara) 317 | checkpoint['norm_g'].append(get_total_norm(generator.parameters())) 318 | g_steps_left -= 1 319 | if a > index_train[3]: 320 | print("-----------------------------------------------TRAINING UNIV SCENE-----------------------------------------------") 321 | losses_g = generator_step(args, batch, generator, optimizer_g, univ) 322 | checkpoint['norm_g'].append(get_total_norm(generator.parameters())) 323 | g_steps_left -= 1 324 | 325 | # if args.timing == 1: 326 | # torch.cuda.synchronize() 327 | # t2 = time.time() 328 | # logger.info('{} step took {}'.format(step_type, t2 - t1)) 329 | 330 | # Skip the rest if we are not at the end of an iteration 331 | # if d_steps_left > 0 or g_steps_left > 0: 332 | # continue 333 | 334 | # if args.timing == 1: 335 | # if t0 is not None: 336 | # logger.info('Interation {} took {}'.format( 337 | # t - 1, time.time() - t0 338 | # )) 339 | # t0 = time.time() 340 | 341 | # Maybe save loss 342 | if t % args.print_every == 0: 343 | print("ARSAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") 344 | logger.info('t = {} / {}'.format(t + 1, args.num_iterations)) 345 | # for k, v in sorted(losses_d.items()): 346 | # logger.info(' [D] {}: {:.3f}'.format(k, v)) 347 | # checkpoint['D_losses'][k].append(v) 348 | for k, v in sorted(losses_g.items()): 349 | print(k) 350 | print(v) 351 | logger.info(' [G] {}: {:.3f}'.format(k, v)) 352 | checkpoint['G_losses'][k].append(v) 353 | checkpoint['losses_ts'].append(t) 354 | 355 | # Maybe save a checkpoint 356 | # if t > 0 and t % args.checkpoint_every == 0: 357 | if t > 0 : 358 | 359 | checkpoint['counters']['t'] = t 360 | checkpoint['counters']['epoch'] = epoch 361 | checkpoint['sample_ts'].append(t) 362 | 363 | # Check stats on the validation set 364 | 365 | if a<=index_val[0]: 366 | logger.info('Checking stats on val ...') 367 | metrics_val = check_accuracy( 368 | args, val_loader, generator,eth 369 | ) 370 | logger.info('Checking stats on train ...') 371 | metrics_train = check_accuracy( 372 | args, train_loader, generator,eth, 373 | # d_loss_fn, 374 | limit=True 375 | ) 376 | if a>index_val[0] and a<=index_val[1]: 377 | logger.info('Checking stats on val ...') 378 | metrics_val = check_accuracy( 379 | args, val_loader, generator,hotel 380 | ) 381 | logger.info('Checking stats on train ...') 382 | metrics_train = check_accuracy( 383 | args, train_loader, generator,hotel, 384 | # d_loss_fn, 385 | limit=True 386 | ) 387 | 388 | 389 | if a > index_val[1] and a <= index_val[3]: 390 | logger.info('Checking stats on val ...') 391 | metrics_val = check_accuracy( 392 | args, val_loader, generator,zara 393 | ) 394 | logger.info('Checking stats on train ...') 395 | metrics_train = check_accuracy( 396 | args, train_loader, generator,zara, 397 | # d_loss_fn, 398 | limit=True 399 | ) 400 | 401 | 402 | if a>index_val[3]: 403 | logger.info('Checking stats on val ...') 404 | metrics_val = check_accuracy( 405 | args, val_loader, generator,univ 406 | ) 407 | logger.info('Checking stats on train ...') 408 | metrics_train = check_accuracy( 409 | args, train_loader, generator,univ, 410 | # d_loss_fn, 411 | limit=True 412 | ) 413 | 414 | 415 | 416 | 417 | 418 | 419 | for k, v in sorted(metrics_val.items()): 420 | logger.info(' [val] {}: {:.3f}'.format(k, v)) 421 | checkpoint['metrics_val'][k].append(v) 422 | for k, v in sorted(metrics_train.items()): 423 | logger.info(' [train] {}: {:.3f}'.format(k, v)) 424 | checkpoint['metrics_train'][k].append(v) 425 | 426 | min_ade = min(checkpoint['metrics_val']['ade']) 427 | min_ade_nl = min(checkpoint['metrics_val']['ade_nl']) 428 | 429 | if metrics_val['ade'] == min_ade: 430 | logger.info('New low for avg_disp_error') 431 | checkpoint['best_t'] = t 432 | checkpoint['g_best_state'] = generator.state_dict() 433 | # checkpoint['d_best_state'] = discriminator.state_dict() 434 | 435 | if metrics_val['ade_nl'] == min_ade_nl: 436 | logger.info('New low for avg_disp_error_nl') 437 | checkpoint['best_t_nl'] = t 438 | checkpoint['g_best_nl_state'] = generator.state_dict() 439 | # checkpoint['d_best_nl_state'] = discriminator.state_dict() 440 | 441 | # Save another checkpoint with model weights and 442 | # optimizer state 443 | checkpoint['g_state'] = generator.state_dict() 444 | checkpoint['g_optim_state'] = optimizer_g.state_dict() 445 | # checkpoint['d_state'] = discriminator.state_dict() 446 | # checkpoint['d_optim_state'] = optimizer_d.state_dict() 447 | checkpoint_path = os.path.join( 448 | args.output_dir, '%s_with_model.pt' % args.checkpoint_name 449 | ) 450 | logger.info('Saving checkpoint to {}'.format(checkpoint_path)) 451 | torch.save(checkpoint, checkpoint_path) 452 | logger.info('Done.') 453 | 454 | # Save a checkpoint with no model weights by making a shallow 455 | # copy of the checkpoint excluding some items 456 | checkpoint_path = os.path.join( 457 | args.output_dir, '%s_no_model.pt' % args.checkpoint_name) 458 | logger.info('Saving checkpoint to {}'.format(checkpoint_path)) 459 | key_blacklist = [ 460 | 'g_state', 'd_state', 'g_best_state', 'g_best_nl_state', 461 | 'g_optim_state', 'd_optim_state', 'd_best_state', 462 | 'd_best_nl_state' 463 | ] 464 | small_checkpoint = {} 465 | for k, v in checkpoint.items(): 466 | if k not in key_blacklist: 467 | small_checkpoint[k] = v 468 | torch.save(small_checkpoint, checkpoint_path) 469 | logger.info('Done.') 470 | 471 | t += 1 472 | # d_steps_left = args.d_steps 473 | g_steps_left = args.g_steps 474 | if t >= args.num_iterations: 475 | break 476 | 477 | 478 | 479 | def generator_step( 480 | args, batch, generator, optimizer_g,image 481 | ): 482 | # batch = [tensor.cuda() for tensor in batch] 483 | (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, non_linear_ped, 484 | loss_mask, seq_start_end) = batch 485 | losses = {} 486 | 487 | loss = torch.zeros(1).to(pred_traj_gt) 488 | print("loss",loss) 489 | loss = Variable(loss) 490 | 491 | g_l2_loss_rel = [] 492 | 493 | loss_mask = loss_mask[:, args.obs_len:] 494 | 495 | # for _ in range(args.best_k): 496 | obs_traj = obs_traj.float() 497 | obs_traj_rel = obs_traj_rel.float() 498 | generator_out = generator(obs_traj, obs_traj_rel, seq_start_end,image) 499 | 500 | pred_traj_fake_rel = generator_out 501 | # pred_traj_fake = relative_to_abs(pred_traj_fake_rel, obs_traj[-1]) 502 | 503 | if args.l2_loss_weight > 0: 504 | g_l2_loss_rel.append(args.l2_loss_weight * l2_loss( 505 | pred_traj_fake_rel, 506 | pred_traj_gt_rel, 507 | loss_mask, 508 | mode='raw')) 509 | 510 | g_l2_loss_sum_rel = torch.zeros(1).to(pred_traj_gt) 511 | if args.l2_loss_weight > 0: 512 | g_l2_loss_rel = torch.stack(g_l2_loss_rel, dim=1) 513 | for start, end in seq_start_end.data: 514 | _g_l2_loss_rel = g_l2_loss_rel[start:end] 515 | _g_l2_loss_rel = torch.sum(_g_l2_loss_rel, dim=0) 516 | _g_l2_loss_rel = torch.min(_g_l2_loss_rel) / torch.sum( 517 | loss_mask[start:end]) 518 | g_l2_loss_sum_rel += _g_l2_loss_rel 519 | losses['G_l2_loss_rel'] = g_l2_loss_sum_rel.item() 520 | loss += g_l2_loss_sum_rel 521 | 522 | # traj_fake = torch.cat([obs_traj, pred_traj_fake], dim=0) 523 | traj_fake_rel = torch.cat([obs_traj_rel, pred_traj_fake_rel], dim=0) 524 | 525 | # scores_fake = discriminator(traj_fake, traj_fake_rel, seq_start_end) 526 | # discriminator_loss = g_loss_fn(scores_fake) 527 | 528 | # loss += discriminator_loss 529 | # losses['G_discriminator_loss'] = discriminator_loss.item() 530 | losses['G_total_loss'] = loss.item() 531 | # loss = Variable(loss, requires_grad= True) 532 | # optimizer_g.zero_grad() 533 | loss.backward() 534 | 535 | # if args.clipping_threshold_g > 0: 536 | # nn.utils.clip_grad_norm_( 537 | # generator.parameters(), args.clipping_threshold_g 538 | # ) 539 | optimizer_g.step() 540 | optimizer_g.zero_grad() 541 | 542 | return losses 543 | 544 | 545 | def check_accuracy( 546 | args, loader, generator,image, limit=False 547 | ): 548 | d_losses = [] 549 | metrics = {} 550 | g_l2_losses_abs, g_l2_losses_rel = ([],) * 2 551 | disp_error, disp_error_l, disp_error_nl = ([],) * 3 552 | f_disp_error, f_disp_error_l, f_disp_error_nl = ([],) * 3 553 | total_traj, total_traj_l, total_traj_nl = 0, 0, 0 554 | loss_mask_sum = 0 555 | generator.eval() 556 | with torch.no_grad(): 557 | for batch in loader: 558 | # batch = [tensor.cuda() for tensor in batch] 559 | (obs_traj, pred_traj_gt, obs_traj_rel, pred_traj_gt_rel, 560 | non_linear_ped, loss_mask, seq_start_end) = batch 561 | 562 | linear_ped = 1 - non_linear_ped 563 | loss_mask = loss_mask[:, args.obs_len:] 564 | obs_traj = obs_traj.float() 565 | obs_traj_rel = obs_traj_rel.float() 566 | pred_traj_fake_rel = generator( 567 | obs_traj, obs_traj_rel, seq_start_end,image 568 | ) 569 | pred_traj_fake = relative_to_abs(pred_traj_fake_rel, obs_traj[-1]) 570 | 571 | g_l2_loss_abs, g_l2_loss_rel = cal_l2_losses( 572 | pred_traj_gt, pred_traj_gt_rel, pred_traj_fake, 573 | pred_traj_fake_rel, loss_mask 574 | ) 575 | ade, ade_l, ade_nl = cal_ade( 576 | pred_traj_gt, pred_traj_fake, linear_ped, non_linear_ped 577 | ) 578 | 579 | fde, fde_l, fde_nl = cal_fde( 580 | pred_traj_gt, pred_traj_fake, linear_ped, non_linear_ped 581 | ) 582 | 583 | # traj_real = torch.cat([obs_traj, pred_traj_gt], dim=0) 584 | # traj_real_rel = torch.cat([obs_traj_rel, pred_traj_gt_rel], dim=0) 585 | # traj_fake = torch.cat([obs_traj, pred_traj_fake], dim=0) 586 | # traj_fake_rel = torch.cat([obs_traj_rel, pred_traj_fake_rel], dim=0) 587 | 588 | # scores_fake = discriminator(traj_fake, traj_fake_rel, seq_start_end) 589 | # scores_real = discriminator(traj_real, traj_real_rel, seq_start_end) 590 | 591 | # d_loss = d_loss_fn(scores_real, scores_fake) 592 | # d_losses.append(d_loss.item()) 593 | 594 | g_l2_losses_abs.append(g_l2_loss_abs.item()) 595 | g_l2_losses_rel.append(g_l2_loss_rel.item()) 596 | disp_error.append(ade.item()) 597 | disp_error_l.append(ade_l.item()) 598 | disp_error_nl.append(ade_nl.item()) 599 | f_disp_error.append(fde.item()) 600 | f_disp_error_l.append(fde_l.item()) 601 | f_disp_error_nl.append(fde_nl.item()) 602 | 603 | loss_mask_sum += torch.numel(loss_mask.data) 604 | total_traj += pred_traj_gt.size(1) 605 | total_traj_l += torch.sum(linear_ped).item() 606 | total_traj_nl += torch.sum(non_linear_ped).item() 607 | if limit and total_traj >= args.num_samples_check: 608 | break 609 | 610 | # metrics['d_loss'] = sum(d_losses) / len(d_losses) 611 | metrics['g_l2_loss_abs'] = sum(g_l2_losses_abs) / loss_mask_sum 612 | metrics['g_l2_loss_rel'] = sum(g_l2_losses_rel) / loss_mask_sum 613 | 614 | metrics['ade'] = sum(disp_error) / (total_traj * args.pred_len) 615 | metrics['fde'] = sum(f_disp_error) / total_traj 616 | if total_traj_l != 0: 617 | metrics['ade_l'] = sum(disp_error_l) / (total_traj_l * args.pred_len) 618 | metrics['fde_l'] = sum(f_disp_error_l) / total_traj_l 619 | else: 620 | metrics['ade_l'] = 0 621 | metrics['fde_l'] = 0 622 | if total_traj_nl != 0: 623 | metrics['ade_nl'] = sum(disp_error_nl) / ( 624 | total_traj_nl * args.pred_len) 625 | metrics['fde_nl'] = sum(f_disp_error_nl) / total_traj_nl 626 | else: 627 | metrics['ade_nl'] = 0 628 | metrics['fde_nl'] = 0 629 | 630 | generator.train() 631 | return metrics 632 | 633 | 634 | def cal_l2_losses( 635 | pred_traj_gt, pred_traj_gt_rel, pred_traj_fake, pred_traj_fake_rel, 636 | loss_mask 637 | ): 638 | g_l2_loss_abs = l2_loss( 639 | pred_traj_fake, pred_traj_gt, loss_mask, mode='sum' 640 | ) 641 | g_l2_loss_rel = l2_loss( 642 | pred_traj_fake_rel, pred_traj_gt_rel, loss_mask, mode='sum' 643 | ) 644 | return g_l2_loss_abs, g_l2_loss_rel 645 | 646 | 647 | def cal_ade(pred_traj_gt, pred_traj_fake, linear_ped, non_linear_ped): 648 | ade = displacement_error(pred_traj_fake, pred_traj_gt) 649 | ade_l = displacement_error(pred_traj_fake, pred_traj_gt, linear_ped) 650 | ade_nl = displacement_error(pred_traj_fake, pred_traj_gt, non_linear_ped) 651 | return ade, ade_l, ade_nl 652 | 653 | 654 | def cal_fde( 655 | pred_traj_gt, pred_traj_fake, linear_ped, non_linear_ped 656 | ): 657 | pred_traj_gt= pred_traj_gt.double() 658 | pred_traj_fake= pred_traj_fake.double() 659 | fde = final_displacement_error(pred_traj_fake[-1], pred_traj_gt[-1]) 660 | fde_l = final_displacement_error( 661 | pred_traj_fake[-1], pred_traj_gt[-1], linear_ped 662 | ) 663 | fde_nl = final_displacement_error( 664 | pred_traj_fake[-1], pred_traj_gt[-1], non_linear_ped 665 | ) 666 | return fde, fde_l, fde_nl 667 | 668 | 669 | if __name__ == '__main__': 670 | args = parser.parse_args() 671 | main(args) -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import cv2 as cv 5 | import pandas as pd 6 | import pickle 7 | from attrdict import AttrDict 8 | #from cnn.model_cnn import TrajEstimator 9 | #from cnn.model_cnn_moving_threshold import TrajEstimatorThreshold 10 | from vgg_analysis.model import TrajectoryGenerator #as TrajectoryGenerator_sgan 11 | from model_sgan_tokka import TrajectoryGenerator_sgan 12 | from semantic_embedding.model import TrajectoryGenerator_sem 13 | from vgg_analysis.model_rnn import TrajectoryGenerator_R 14 | from vgg_analysis.old_model_segnet_camvid.model_segnet import TrajectoryGenerator_seg_camvid 15 | from vgg_analysis.model_segnet import TrajectoryGenerator_seg #full segnet 16 | 17 | from vgg_analysis.model_gt import TrajectoryGenerator_gt 18 | from utils import relative_to_abs 19 | import torch 20 | import skvideo 21 | skvideo.setFFmpegPath('C:/Users/arsal/Anaconda3/pkgs/ffmpeg-2.7.0-0/Scripts') 22 | import skvideo.io 23 | import cv2 24 | #"C:\Users\arsal\PycharmProjects\SGAN-AE-master\SGAN-AE-master\scene\full-psp\out-hotel" 25 | """RNN-AE-PSP""" 26 | with(open("C://Users//arsal//PycharmProjects//SGAN-AE-master//SGAN-AE-master//scene//full-psp//out-eth", 'rb')) as open_file: 27 | dat_seg =torch.Tensor(pickle.load(open_file)) 28 | 29 | """VGG-16""" 30 | path_traj_vid = "D:/traj_vid" 31 | dat= cv2.imread(path_traj_vid+"/eth/frame0.jpg") 32 | dat=cv2.resize(dat,(224,224)) 33 | dat=torch.tensor(dat) 34 | 35 | """SEG-Net-full""" 36 | #with (open("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-AE-Scene/hotel/hotel.pkl",'rb+')) as ff: (Segnet-camvid) 37 | with (open("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-full-AE-Scene/eth/out-eth",'rb+')) as ff: 38 | dat_segnet_full= torch.Tensor(pickle.load(ff)) 39 | #dat_segnet_full=dat_segnet 40 | print(dat_segnet_full.shape) 41 | 42 | """SegNet CamVid""" 43 | with (open("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-AE-Scene/eth/eth.pkl",'rb+')) as ff: 44 | dat_segnet= torch.Tensor(pickle.load(ff)) 45 | dat_segnet=dat_segnet[0] 46 | print(dat_segnet.shape) 47 | 48 | 49 | 50 | # """Ground truth""" 51 | # dat_gt= cv2.imread("C:/Users/arsal/Desktop/ITSC/annotations_for_segnet/hotel/label/Label_33.png") 52 | # dat_gt=cv2.resize(dat_gt,(480,640)) 53 | # dat_gt= torch.tensor(dat_gt) 54 | 55 | # """semantic embedding""" 56 | # eth_embed= np.load("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/semantic_embedding/embeddings/hotel.npy") 57 | # eth_embed= torch.Tensor(eth_embed) 58 | # 59 | # hotel_embed = np.load("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/semantic_embedding/embeddings/hotel.npy") 60 | # hotel_embed = torch.Tensor(hotel_embed) 61 | 62 | """SGAN""" 63 | def get_generator_sgan(checkpoint): 64 | args = AttrDict(checkpoint['args']) 65 | generator = TrajectoryGenerator_sgan( 66 | obs_len=args.obs_len, 67 | pred_len=args.pred_len, 68 | embedding_dim=args.embedding_dim, 69 | encoder_h_dim=args.encoder_h_dim_g, 70 | decoder_h_dim=args.decoder_h_dim_g, 71 | mlp_dim=args.mlp_dim, 72 | num_layers=args.num_layers, 73 | noise_dim=args.noise_dim, 74 | noise_type=args.noise_type, 75 | noise_mix_type=args.noise_mix_type, 76 | pooling_type=args.pooling_type, 77 | pool_every_timestep=args.pool_every_timestep, 78 | dropout=args.dropout, 79 | bottleneck_dim=args.bottleneck_dim, 80 | neighborhood_size=args.neighborhood_size, 81 | grid_size=args.grid_size, 82 | batch_norm=args.batch_norm) 83 | generator.load_state_dict(checkpoint['g_state']) 84 | # generator.cuda() 85 | generator.eval() 86 | return generator 87 | """VGG-16""" 88 | def get_generator_vgg(checkpoint): 89 | args = AttrDict(checkpoint['args']) 90 | generator = TrajectoryGenerator( 91 | obs_len=args.obs_len, 92 | pred_len=args.pred_len, 93 | embedding_dim=args.embedding_dim, 94 | encoder_h_dim=args.encoder_h_dim_g, 95 | decoder_h_dim=args.decoder_h_dim_g, 96 | mlp_dim=args.mlp_dim, 97 | num_layers=args.num_layers, 98 | noise_dim=args.noise_dim, 99 | noise_type=args.noise_type, 100 | noise_mix_type=args.noise_mix_type, 101 | pooling_type=args.pooling_type, 102 | pool_every_timestep=args.pool_every_timestep, 103 | dropout=args.dropout, 104 | bottleneck_dim=args.bottleneck_dim, 105 | neighborhood_size=args.neighborhood_size, 106 | grid_size=args.grid_size, 107 | batch_norm=args.batch_norm) 108 | generator.load_state_dict(checkpoint['g_best_state'],strict=False) 109 | # generator.cuda() 110 | generator.eval() 111 | return generator 112 | """PSP-Net Semantic Segmentation""" 113 | def get_generator_rnn(checkpoint): 114 | args = AttrDict(checkpoint['args']) 115 | print(args) 116 | generator = TrajectoryGenerator_R( 117 | obs_len=args.obs_len, 118 | pred_len=args.pred_len, 119 | embedding_dim=args.embedding_dim, 120 | encoder_h_dim=args.encoder_h_dim_g, 121 | decoder_h_dim=args.decoder_h_dim_g, 122 | mlp_dim=args.mlp_dim, 123 | num_layers=args.num_layers, 124 | noise_dim=args.noise_dim, 125 | noise_type=args.noise_type, 126 | noise_mix_type=args.noise_mix_type, 127 | pooling_type=args.pooling_type, 128 | pool_every_timestep=args.pool_every_timestep, 129 | dropout=args.dropout, 130 | bottleneck_dim=args.bottleneck_dim, 131 | neighborhood_size=args.neighborhood_size, 132 | grid_size=args.grid_size, 133 | batch_norm=args.batch_norm) 134 | generator.load_state_dict(checkpoint['g_best_state']) 135 | # generator.cuda() 136 | generator.eval() 137 | return generator 138 | 139 | """SegNet-full (this was previously segnet on pretrained camvid)""" 140 | def get_generator_seg(checkpoint): 141 | args = AttrDict(checkpoint['args']) 142 | generator = TrajectoryGenerator_seg( 143 | obs_len=args.obs_len, 144 | pred_len=args.pred_len, 145 | embedding_dim=args.embedding_dim, 146 | encoder_h_dim=args.encoder_h_dim_g, 147 | decoder_h_dim=args.decoder_h_dim_g, 148 | mlp_dim=args.mlp_dim, 149 | num_layers=args.num_layers, 150 | noise_dim=args.noise_dim, 151 | noise_type=args.noise_type, 152 | noise_mix_type=args.noise_mix_type, 153 | pooling_type=args.pooling_type, 154 | pool_every_timestep=args.pool_every_timestep, 155 | dropout=args.dropout, 156 | bottleneck_dim=args.bottleneck_dim, 157 | neighborhood_size=args.neighborhood_size, 158 | grid_size=args.grid_size, 159 | batch_norm=args.batch_norm) 160 | generator.load_state_dict(checkpoint['g_best_state'],strict=False) 161 | # generator.cuda() 162 | generator.eval() 163 | return generator 164 | 165 | def get_generator_seg_camvid(checkpoint): 166 | args = AttrDict(checkpoint['args']) 167 | generator = TrajectoryGenerator_seg_camvid( 168 | obs_len=args.obs_len, 169 | pred_len=args.pred_len, 170 | embedding_dim=args.embedding_dim, 171 | encoder_h_dim=args.encoder_h_dim_g, 172 | decoder_h_dim=args.decoder_h_dim_g, 173 | mlp_dim=args.mlp_dim, 174 | num_layers=args.num_layers, 175 | noise_dim=args.noise_dim, 176 | noise_type=args.noise_type, 177 | noise_mix_type=args.noise_mix_type, 178 | pooling_type=args.pooling_type, 179 | pool_every_timestep=args.pool_every_timestep, 180 | dropout=args.dropout, 181 | bottleneck_dim=args.bottleneck_dim, 182 | neighborhood_size=args.neighborhood_size, 183 | grid_size=args.grid_size, 184 | batch_norm=args.batch_norm) 185 | generator.load_state_dict(checkpoint['g_best_state'],strict=False) 186 | # generator.cuda() 187 | generator.eval() 188 | return generator 189 | 190 | 191 | 192 | def get_generator_gt(checkpoint): 193 | args = AttrDict(checkpoint['args']) 194 | generator = TrajectoryGenerator_gt( 195 | obs_len=args.obs_len, 196 | pred_len=args.pred_len, 197 | embedding_dim=args.embedding_dim, 198 | encoder_h_dim=args.encoder_h_dim_g, 199 | decoder_h_dim=args.decoder_h_dim_g, 200 | mlp_dim=args.mlp_dim, 201 | num_layers=args.num_layers, 202 | noise_dim=args.noise_dim, 203 | noise_type=args.noise_type, 204 | noise_mix_type=args.noise_mix_type, 205 | pooling_type=args.pooling_type, 206 | pool_every_timestep=args.pool_every_timestep, 207 | dropout=args.dropout, 208 | bottleneck_dim=args.bottleneck_dim, 209 | neighborhood_size=args.neighborhood_size, 210 | grid_size=args.grid_size, 211 | batch_norm=args.batch_norm) 212 | generator.load_state_dict(checkpoint['g_state']) 213 | # generator.cuda() 214 | generator.eval() 215 | return generator 216 | """Semantic Embedding""" 217 | def get_generator_sem(checkpoint): 218 | args = AttrDict(checkpoint['args']) 219 | generator = TrajectoryGenerator_sem( 220 | obs_len=args.obs_len, 221 | pred_len=args.pred_len, 222 | embedding_dim=args.embedding_dim, 223 | encoder_h_dim=args.encoder_h_dim_g, 224 | decoder_h_dim=args.decoder_h_dim_g, 225 | mlp_dim=args.mlp_dim, 226 | num_layers=args.num_layers, 227 | noise_dim=args.noise_dim, 228 | noise_type=args.noise_type, 229 | noise_mix_type=args.noise_mix_type, 230 | pooling_type=args.pooling_type, 231 | pool_every_timestep=args.pool_every_timestep, 232 | dropout=args.dropout, 233 | bottleneck_dim=args.bottleneck_dim, 234 | neighborhood_size=args.neighborhood_size, 235 | grid_size=args.grid_size, 236 | batch_norm=args.batch_norm) 237 | generator.load_state_dict(checkpoint['g_state']) 238 | # generator.cuda() 239 | generator.eval() 240 | return generator 241 | 242 | # def get_generator_cnn(checkpoint): 243 | # args = AttrDict(checkpoint['args']) 244 | # generator = TrajEstimator( 245 | # obs_len=args.obs_len, 246 | # pred_len=args.pred_len, 247 | # embedding_dim=args.embedding_dim, 248 | # encoder_h_dim=args.encoder_h_dim_g, 249 | # num_layers=args.num_layers, 250 | # dropout=args.dropout) 251 | # generator.load_state_dict(checkpoint['g_best_state']) 252 | # generator.cuda() 253 | # generator.eval() 254 | # return generator 255 | # 256 | 257 | 258 | 259 | 260 | 261 | 262 | # def get_generator_cnn_threshold(checkpoint): 263 | # args = AttrDict(checkpoint['args']) 264 | # if not hasattr(args, "threshold"): 265 | # threshold = 0.5 266 | # else: 267 | # threshold=args.threshold 268 | # 269 | # generator = TrajEstimatorThreshold( 270 | # obs_len=args.obs_len, 271 | # pred_len=args.pred_len, 272 | # embedding_dim=args.embedding_dim, 273 | # encoder_h_dim=args.encoder_h_dim_g, 274 | # num_layers=args.num_layers, 275 | # threshold=threshold, 276 | # dropout=args.dropout) 277 | # generator.load_state_dict(checkpoint['g_best_state']) 278 | # generator.cuda() 279 | # generator.eval() 280 | # return generator 281 | 282 | def world_to_img(world_coordinates, hom_matrix): 283 | scaled_trajs = [] 284 | 285 | inv_matrix = np.linalg.inv(hom_matrix) 286 | 287 | # if several sequences 288 | if len(world_coordinates.shape) > 2: 289 | # easier to iterate over them 290 | world_coordinates = np.swapaxes(world_coordinates, 0, 1) 291 | 292 | for traj in world_coordinates: 293 | ones = np.ones((len(traj), 1)) 294 | P = np.hstack((traj, ones)) 295 | R = np.dot(inv_matrix, P.transpose()).transpose() 296 | y = (R[:, 0]/R[:, 2]).reshape(-1, 1) 297 | x = (R[:, 1]/R[:, 2]).reshape(-1, 1) 298 | scaled_trajs.append(np.hstack((x, y))) 299 | else: 300 | ones = np.ones((len(world_coordinates), 1)) 301 | P = np.hstack((world_coordinates, ones)) 302 | R = np.dot(inv_matrix, P.transpose()) 303 | y = (R[0, :]/R[2, :]).reshape(-1, 1) 304 | x = (R[1, :]/R[2, :]).reshape(-1, 1) 305 | scaled_trajs.append(np.hstack((x, y))) 306 | return scaled_trajs 307 | 308 | 309 | def img_to_world(input, matrix): 310 | return world_to_img(input, np.linalg.inv(matrix)) 311 | 312 | def get_frame(video_path, frame): 313 | cap = cv.VideoCapture(video_path) 314 | cap.set(cv.CAP_PROP_POS_FRAMES, frame) 315 | _, img = cap.read() 316 | return img 317 | 318 | def print_to_img(trajs, video_path, matrix_path, frame): 319 | img = get_frame(video_path, frame) 320 | if trajs is not None: 321 | matrix = np.loadtxt(matrix_path, dtype=float) 322 | heigth, width, _ = img.shape 323 | 324 | scaled_trajs = {} 325 | for ped_id, ped in trajs.items(): 326 | scaled_trajs[ped_id] = {} 327 | for traj_name, traj in ped.items(): 328 | scaled_traj = [] 329 | if traj.size != 0: 330 | scaled_traj = world_to_img(traj, matrix)[0] 331 | scaled_trajs[ped_id][traj_name] = scaled_traj 332 | 333 | for ped_id, ped in scaled_trajs.items(): 334 | for ped_seq_name, ped_sequence in ped.items(): 335 | print(ped_seq_name) 336 | color = color_dict[ped_seq_name] 337 | if len(ped_sequence) > 0: 338 | #draw pred_gt thicker if we can compute ade/fde on it 339 | thick = 2 if ped_seq_name == "pred_gt" and len(ped_sequence) == 12 else 2 340 | 341 | for index, point in enumerate(ped_sequence[:-1, :]): 342 | real_pt_1 = tuple([int(round(x)) for x in point]) 343 | real_pt_2 = tuple([int(round(x)) for x in ped_sequence[index + 1]]) 344 | cv.line(img, real_pt_1, real_pt_2, color, thick) 345 | return img 346 | 347 | def get_trajs(frame, step=10): 348 | ''' 349 | :param frame: last observed frame 350 | :param step: step between each frame 351 | :returns None if no prediction can be made, or trajs_, a dictionnary containing trajectories for each pedestrian 352 | ''' 353 | 354 | trajs_ = {} 355 | 356 | # -1 because we include in selection 357 | seq_range = [frame - (obs_len - 1) * step, frame + pred_len * step] 358 | print("seq_range",seq_range) 359 | obs_range = [frame - (obs_len - 1) * step, frame] 360 | print("obs_range",obs_range) 361 | raw_obs_seq = data.loc[data["frameID"].between(obs_range[0], obs_range[1], inclusive=True)] 362 | raw_pred_seq = data.loc[data["frameID"].between(obs_range[1] + step, seq_range[1], inclusive=True)] 363 | peds_in_seq = raw_obs_seq.pedID.unique() 364 | 365 | curr_seq = np.zeros((len(peds_in_seq), 2, obs_len)) 366 | curr_seq_rel = np.zeros((len(peds_in_seq), 2, obs_len)) 367 | id_list = [] 368 | considered_ped = 0 369 | 370 | for ped_id in peds_in_seq: 371 | obs_ped_seq = raw_obs_seq.loc[raw_obs_seq.pedID == ped_id] 372 | # seq has to have at least obs_len length 373 | if len(obs_ped_seq.frameID) == obs_len: 374 | id_list.append(ped_id) 375 | 376 | pred_ped_seq = raw_pred_seq.loc[raw_pred_seq.pedID == ped_id] 377 | trajs_[ped_id] = {} 378 | 379 | obs_traj = obs_ped_seq[["x", "y"]].values.transpose() 380 | obs_traj_rel = np.zeros(obs_traj.shape) 381 | obs_traj_rel[:, 1:] = obs_traj[:, 1:] - obs_traj[:, :-1] 382 | 383 | curr_seq[considered_ped, :, 0:obs_len] = obs_traj 384 | curr_seq_rel[considered_ped, :, 0:obs_len] = obs_traj_rel 385 | 386 | trajs_[ped_id]["obs"] = obs_traj.transpose() 387 | trajs_[ped_id]["pred_gt"] = pred_ped_seq[["x", "y"]].values 388 | 389 | considered_ped += 1 390 | print(considered_ped) 391 | if considered_ped > 0: 392 | obs_list_tensor = torch.from_numpy(curr_seq[:considered_ped, :]).permute(2, 0, 1).float()#.cuda().float() 393 | obs_list_rel_tensor = torch.from_numpy(curr_seq_rel[:considered_ped, :]).permute(2, 0, 1).float()#.cuda().float() 394 | seq_start_end_tensor = torch.tensor([[0, considered_ped]]) 395 | 396 | for model_name, model in models.items(): 397 | print("asssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssaaaaaaaaaaaaaaaaaaaaa") 398 | if model_name=='rnn': 399 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor,dat_seg) 400 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 401 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 402 | key = "pred_" + model_name 403 | for i in range(considered_ped): 404 | ped_id = id_list[i] 405 | trajs_[ped_id][key] = pred_abs_reorder[i] 406 | # print(trajs_) 407 | # else: 408 | # break 409 | # return trajs_ 410 | if model_name=='vgg': 411 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor,dat) 412 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 413 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 414 | key = "pred_" + model_name 415 | for k in range(considered_ped): 416 | ped_id = id_list[k] 417 | trajs_[ped_id][key] = pred_abs_reorder[k] 418 | if model_name == 'sgan': 419 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor) 420 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 421 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 422 | key = "pred_" + model_name 423 | for k in range(considered_ped): 424 | ped_id = id_list[k] 425 | trajs_[ped_id][key] = pred_abs_reorder[k] 426 | if model_name == 'segnet_full': 427 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor,dat_segnet_full) 428 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 429 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 430 | key = "pred_" + model_name 431 | for k in range(considered_ped): 432 | ped_id = id_list[k] 433 | trajs_[ped_id][key] = pred_abs_reorder[k] 434 | 435 | if model_name == 'segnet': 436 | pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor, dat_segnet) 437 | pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 438 | pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 439 | key = "pred_" + model_name 440 | for k in range(considered_ped): 441 | ped_id = id_list[k] 442 | trajs_[ped_id][key] = pred_abs_reorder[k] 443 | 444 | 445 | 446 | 447 | # if model_name == 'gt_seg': 448 | # pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor, dat_gt) 449 | # pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 450 | # pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 451 | # key = "pred_" + model_name 452 | # for k in range(considered_ped): 453 | # ped_id = id_list[k] 454 | # trajs_[ped_id][key] = pred_abs_reorder[k] 455 | 456 | # print(trajs_) 457 | """Semantic Embedding PSP-Net""" 458 | # if model_name == 'semantic': 459 | # pred_rel = model(obs_list_tensor, obs_list_rel_tensor, seq_start_end_tensor, hotel_embed) 460 | # pred_abs = relative_to_abs(pred_rel, obs_list_tensor[-1]).detach().cpu().numpy() 461 | # pred_abs_reorder = np.swapaxes(pred_abs, 0, 1) 462 | # key = "pred_" + model_name 463 | # for k in range(considered_ped): 464 | # ped_id = id_list[k] 465 | # trajs_[ped_id][key] = pred_abs_reorder[k] 466 | 467 | return trajs_ 468 | 469 | else: 470 | return None 471 | 472 | 473 | def get_paths(dset_): 474 | paths_ = {} 475 | 476 | if dset_.split("/")[0] == "split_moving": 477 | dset = dset_.split("/")[1] 478 | model_path_us = os.path.join("scripts/save/", (dset_ + "_50epoch_with_model.pt")) 479 | model_path_sgan = "models/sgan-p-models/" + dset + "_12_model.pt" 480 | # model_path_sgan = "C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/pretrained_sgan_models/hotel_12_model.pt" 481 | 482 | if model_path_sgan.split("/")[1] == "sgan-p-models": 483 | out_vid_path = "visualization/" + dset + "_" + dset_.split("/")[-1] + "_sgan-p.mp4" 484 | else: 485 | out_vid_path = "visualization/" + dset + dset_.split("/")[-1] + ".mp4" 486 | 487 | test_dataset_path = os.listdir("datasets/split_moving/" + dset +"/" + dset_.split("/")[-1] + "/test") 488 | if len(test_dataset_path) > 1: 489 | print("Several test datasets found : {}".format(test_dataset_path)) 490 | while True: 491 | to_keep = input("Enter the name of the dataset you want to use :") 492 | if to_keep in test_dataset_path: 493 | test_dataset_path = "datasets/" + dset + "/test/" + to_keep 494 | break 495 | else: 496 | test_dataset_path = "datasets/split_moving/" + dset +"/" + dset_.split("/")[-1] + "/test/" + test_dataset_path[0] 497 | 498 | 499 | else: 500 | dset = dset_ 501 | # model_path_us = "scripts/save/" + dset + "_50epoch_with_model.pt" 502 | # model_path_us="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/RNN-AE-Scene/ETH/200_epochs/checkpoint_with_model.pt" 503 | #model_path_vgg="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/vgg_analysis/pretrained-models/hotel/checkpoint_with_model.pt" 504 | 505 | model_path_vgg="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/Vgg-AE-Scene/eth/checkpoint_with_model.pt" #(use this) 506 | 507 | model_path_rnn="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/RNN-AE-Scene/eth/200_epochs/checkpoint_with_model.pt" 508 | # model_path_sgan = "models/sgan-p-models/" + dset + "_12_model.pt" 509 | # model_path_sgan= "C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/models/sgan-p-models/hotel_12_model.pt" 510 | model_path_sgan= "C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/pretrained_sgan_models/eth_12_model.pt" 511 | 512 | model_path_segnet="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-AE-Scene/eth/checkpoint_with_model.pt" #(pre-trained CAMVID) 513 | 514 | model_path_segnet_full="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/SegNet-full-AE-Scene/eth/checkpoint_with_model.pt" 515 | #"C:\Users\arsal\PycharmProjects\SGAN-AE-master\SGAN-AE-master\trained_models\Tokka\SegNet-full-AE-Scene\hotel\checkpoint_with_model.pt" 516 | # model_path_gt="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/trained_models/Tokka/Ground-Truth/hotel/checkpoint_with_model.pt" 517 | 518 | model_path_semantic="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/semantic_embedding/pre-trained_models/eth/checkpoint_with_model.pt" 519 | 520 | 521 | print("arsal") 522 | if model_path_sgan.split("/")[1] == "sgan-p-models": 523 | out_vid_path = "visualization/" + dset + "_sgan-p.mp4" 524 | else: 525 | out_vid_path = "visualization/" + dset + ".mp4" 526 | # C:\Users\arsal\PycharmProjects\SGAN - AE - master\SGAN - AE - master\datasets 527 | # test_dataset_path = os.listdir("C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/datasets/" + dset + "/test") 528 | dset='eth' 529 | test_dataset_path = "C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/datasets/" + dset + "/test" +"/biwi_eth.txt" 530 | 531 | print(test_dataset_path) 532 | # if len(test_dataset_path) > 1: 533 | # print("Several test datasets found : {}".format(test_dataset_path)) 534 | # while True: 535 | # to_keep = input("Enter the name of the dataset you want to use :") 536 | # if to_keep in test_dataset_path: 537 | # test_dataset_path = "datasets/" + dset + "/test/" + to_keep 538 | # break 539 | # else: 540 | # test_dataset_path = "datasets/" + dset + "/test/" + test_dataset_path[0] 541 | scenes_and_mat_path="C:/Users/arsal/PycharmProjects/SGAN-AE-master/SGAN-AE-master/scenes_and_matrices/" 542 | # scenes_and_mat_path = "scenes_and_matrices/" 543 | mat_path = scenes_and_mat_path + dset + ".txt" 544 | vid_path = scenes_and_mat_path + dset + ".avi" 545 | 546 | paths_["vid"] = vid_path 547 | paths_["mat"] = mat_path 548 | paths_["model_vgg"] = model_path_vgg 549 | paths_["model_sgan"] = model_path_sgan 550 | paths_["model_rnn"]=model_path_rnn 551 | paths_["model_segnet"]=model_path_segnet 552 | paths_["model_segnet_full"]=model_path_segnet_full 553 | 554 | # paths_["model_gt"]=model_path_gt 555 | paths_["model_semantic"]= model_path_semantic 556 | 557 | 558 | paths_["test_dataset"] = test_dataset_path 559 | for key, item in paths_.items(): 560 | if not os.path.exists(item): 561 | print("File not found : {}".format(item)) 562 | sys.exit(0) 563 | #this file is created, not required 564 | paths_["out_vid"] = out_vid_path 565 | return paths_ 566 | 567 | if __name__ == "__main__": 568 | #paths are relative from sgan dir 569 | os.chdir("../../") 570 | dataset = "eth" 571 | obs_len = 8 572 | pred_len = 12 573 | # color_dict = {"obs": (0, 0, 0), "pred_cnn": (250, 250, 0), "pred_cnn_threshold": (250, 250, 250), "pred_gt": (0, 250, 0), "pred_sgan": (0,0,250)} 574 | """For segnet with pre-trained on cam-vid dataset""" 575 | # color_dict = {"obs": (0, 0, 0), "pred_sgan": (250, 250, 0), "pred_gt": (0, 250, 0), "pred_rnn": (0,0,250),"pred_vgg":(255,0,255),"pred_segnet":(240,37,15),"pred_gt_seg":(28,163,170), 576 | """with full-segnet trained on ped dataset""" 577 | # color_dict = {"obs": (255, 127, 39), "pred_sgan": (250, 250, 0), "pred_gt": (0, 250, 0), "pred_rnn": (0,0,250),"pred_vgg":(255,0,255),"pred_segnet_full":(240,37,15),"pred_gt_seg":(28,163,170), 578 | 579 | # "pred_semantic": (0,250,250)} 580 | color_dict = {"obs": (255, 127, 39), "pred_sgan": (250, 250, 0), "pred_gt": (0, 250, 0), "pred_rnn": (0, 0, 250), 581 | "pred_segnet": (240, 37, 15),"pred_segnet_full": (64, 128, 128), "pred_vgg":(255,0,255)} 582 | # 583 | # "pred_semantic": (0, 250, 250)} 584 | 585 | """FOR Semantic embedding and PSP-Semantic Segmentation""" 586 | # color_dict = {"obs": (0, 0, 0), "pred_gt": (0, 250, 0), "pred_rnn": (0, 0, 250), 587 | # "pred_semantic": (0, 250, 250)} 588 | 589 | paths = get_paths(dataset) 590 | print("-------------------------------------------------------------------here is",paths) 591 | print("Paths :") 592 | for key in sorted(paths.keys()): 593 | print("/t{}: {}".format(key, paths[key])) 594 | # "C:\Users\arsal\PycharmProjects\SGAN-AE-master\SGAN-AE-master\trained_models\Tokka\RNN-AE-Scene\ETH\200_epochs\checkpoint_with_model.pt" 595 | print("Loading models.") 596 | models = {} 597 | 598 | checkpoint_vgg = torch.load(paths["model_vgg"],map_location='cpu') 599 | print(checkpoint_vgg) 600 | models['vgg'] = get_generator_vgg(checkpoint_vgg) 601 | # models["cnn_threshold"] = get_generator_cnn_threshold(checkpoint_cnn) 602 | checkpoint_sgan = torch.load(paths["model_sgan"],map_location='cpu') 603 | models['sgan'] = get_generator_sgan(checkpoint_sgan) 604 | 605 | checkpoint_rnn=torch.load(paths["model_rnn"],map_location='cpu') 606 | models['rnn']=get_generator_rnn(checkpoint_rnn) 607 | 608 | checkpoint_segnet = torch.load(paths["model_segnet"], map_location='cpu') 609 | print("ISVC") 610 | print(checkpoint_segnet) 611 | models['segnet'] = get_generator_seg_camvid(checkpoint_segnet) 612 | 613 | checkpoint_segnet_full = torch.load(paths["model_segnet_full"], map_location='cpu') 614 | print("ISVC") 615 | print(checkpoint_segnet) 616 | models['segnet_full'] = get_generator_seg(checkpoint_segnet_full) 617 | 618 | # checkpoint_gt_seg=torch.load(paths["model_gt"], map_location='cpu') 619 | # models['gt_seg'] = get_generator_gt(checkpoint_gt_seg) 620 | 621 | checkpoint_semantic = torch.load(paths["model_semantic"], map_location='cpu') 622 | models["semantic"] = get_generator_sem(checkpoint_semantic) 623 | 624 | print("loaded_models",models) 625 | print("Loading data.") 626 | data = pd.read_csv(paths["test_dataset"], sep="\t", header=None) 627 | # print(data) 628 | data.columns = ["frameID", "pedID", "x", "y"] 629 | data.sort_values(by=["frameID", "pedID"]) 630 | data.reset_index(drop=True) 631 | writer = skvideo.io.FFmpegWriter(paths["out_vid"]) 632 | 633 | frameList = data.frameID.unique() 634 | max = frameList[-1] 635 | #step every ten frame for watchable video 636 | for frame_number in range(0,max,10): 637 | if frame_number%1000 == 0: 638 | print("Frame {}/{}".format(frame_number, max)) 639 | 640 | trajs = None 641 | if frame_number in frameList: 642 | trajs = get_trajs(frame_number) 643 | # print(trajs) 644 | img = print_to_img(trajs, paths["vid"], paths["mat"], frame_number) 645 | # print(img) 646 | writer.writeFrame(img) --------------------------------------------------------------------------------