├── MotionMixer.png ├── README.md ├── amass ├── dataloader_amass.py ├── mlp_mixer.py ├── test_mixer_amass.py └── train_mixer_amass.py ├── checkpoints └── .gitignore ├── h36m ├── datasets │ ├── dataset_h36m.py │ └── dataset_h36m_ang.py ├── h36_3d_viz.py ├── mlp_mixer.py ├── test_mixer_h36m.py ├── train_mixer_h36m.py └── utils │ ├── data_utils.py │ ├── forward_kinematics.py │ └── utils_mixer.py ├── requirements.txt └── utils ├── ang2joint.py ├── body_models └── smpl_skeleton.npz ├── data_utils.py └── forward_kinematics.py /MotionMixer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MotionMLP/MotionMixer/91327c3c3a455d398bd097fa300385bafa80a835/MotionMixer.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 |
3 | 4 |

MotionMixer: MLP-based 3D Human Body Pose Forecasting

5 | 6 | 7 | Official PyTorch Implementation of the paper: MotionMixer: MLP-based 3D Human Body Pose Forecasting. 8 | 9 | Arij Bouazizi, Adrian Holzbock, Ulrich Kressel, Klaus Dietmayer and Vasileios Belagiannis 10 | 11 | 12 | [[Proceedings](https://www.ijcai.org/proceedings/2022/0111.pdf)] [[Papers with Code](https://paperswithcode.com/paper/motionmixer-mlp-based-3d-human-body-pose)] [[Arxiv](https://arxiv.org/abs/2207.00499)] 13 | 14 | 15 | 16 |
17 | 18 | 19 |
20 | 21 |
22 | 23 | ## Installation 24 | 25 | To setup the environment: 26 | ```sh 27 | cd MotionMixer 28 | conda create -n MotionMixer python=3.8.8 29 | conda activate MotionMixer 30 | pip install -r requirements.txt 31 | ``` 32 | 33 | ## Data 34 | 35 | Due to licensing it is not possible to provide any data. Please refer to [STSGCN](https://github.com/FraLuca/STSGCN) for the preparation of the dataset files. 36 | 37 | ## Training 38 | 39 | To train the model on h36m or amass, you can use the following commands: 40 | ``` 41 | python h36m/train_mixer_h36m.py --input_n 10 --output_n 25 --skip_rate 1 42 | ``` 43 | ``` 44 | python amass/train_mixer_amass.py --input_n 10 --output_n 25 --skip_rate 5 45 | ``` 46 | 47 | ## Evaluation 48 | 49 | To test the pretrained models, you can use the following commands: 50 | ``` 51 | python h36m/test_mixer_h36m.py --input_n 10 --output_n 25 --skip_rate 1 52 | ``` 53 | ``` 54 | python amass/test_mixer_amass.py --input_n 10 --output_n 25 --skip_rate 5 55 | ``` 56 | 57 | ## Models 58 | 59 | We release the pretrained models for academic purpose. You can download them from [Google Drive](https://drive.google.com/drive/folders/1SrZpoe__Q3YXdk_TrtcxeJzRQiKAWoT5). Unzip the .zip file in the ```/checkpoints``` directory. 60 | 61 | ## Citation 62 | 63 | If you find this code useful for your research, please consider citing the following paper: 64 | 65 | ```latex 66 | @inproceedings{ijcai2022p111, 67 | title = {MotionMixer: MLP-based 3D Human Body Pose Forecasting}, 68 | author = {Bouazizi, Arij and Holzbock, Adrian and Kressel, Ulrich and Dietmayer, Klaus and Belagiannis, Vasileios}, 69 | booktitle = {Proceedings of the Thirty-First International Joint Conference on 70 | Artificial Intelligence, {IJCAI-22}}, 71 | publisher = {International Joint Conferences on Artificial Intelligence Organization}, 72 | pages = {791--798}, 73 | year = {2022}, 74 | month = {7}, 75 | } 76 | 77 | ``` 78 | 79 | ## Acknowledgments 80 | 81 | Some of our code was adapted from [HisRepsItself](https://github.com/wei-mao-2019/HisRepItself) and [STSGCN](https://github.com/FraLuca/STSGCN). We thank the authors for making their code public. 82 | 83 | ## License 84 | 85 | 86 | Creative Commons License
This work is licensed under Creative Commons Attribution-NonCommercial 4.0 International License. 89 | -------------------------------------------------------------------------------- /amass/dataloader_amass.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.data import Dataset,DataLoader 3 | import numpy as np 4 | #from h5py import File 5 | #import scipy.io as sio 6 | from matplotlib import pyplot as plt 7 | import torch 8 | import os 9 | from utils.ang2joint import * 10 | import networkx as nx 11 | 12 | ''' 13 | adapted from 14 | https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/amass3d.py 15 | ''' 16 | 17 | 18 | class Datasets(Dataset): 19 | 20 | def __init__(self,data_dir,input_n,output_n,skip_rate, actions=None, split=0): 21 | 22 | """ 23 | :param path_to_data: 24 | :param actions: 25 | :param input_n: 26 | :param output_n: 27 | :param dct_used: 28 | :param split: 0 train, 1 testing, 2 validation 29 | :param sample_rate: 30 | """ 31 | self.path_to_data = '/media/bouazia/bouazia_3/AMASS_dataset/' #os.path.join(data_dir,'AMASS') # "D:\data\AMASS\\" 32 | self.split = split 33 | self.in_n = input_n 34 | self.out_n = output_n 35 | # self.sample_rate = opt.sample_rate 36 | self.p3d = [] 37 | self.keys = [] 38 | self.data_idx = [] 39 | self.joint_used = np.arange(4, 22) # start from 4 for 17 joints, removing the non moving ones 40 | seq_len = self.in_n + self.out_n 41 | 42 | amass_splits = [ 43 | ['CMU', 'MPI_Limits', 'TotalCapture', 'Eyes_Japan_Dataset', 'KIT', 'EKUT', 'TCD_handMocap', 'ACCAD'], 44 | ['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'], 45 | ['BioMotionLab_NTroje'], 46 | ] 47 | 48 | 49 | # amass_splits = [ 50 | # ['CMU'], 51 | # #['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'], 52 | # #['BioMotionLab_NTroje'], 53 | # ] 54 | 55 | # amass_splits = [['BioMotionLab_NTroje'], ['HumanEva'], ['SSM_synced']] 56 | # amass_splits = [['HumanEva'], ['HumanEva'], ['HumanEva']] 57 | # amass_splits[0] = list( 58 | # set(amass_splits[0]).difference(set(amass_splits[1] + amass_splits[2]))) 59 | 60 | # from human_body_prior.body_model.body_model import BodyModel 61 | # from smplx import lbs 62 | # root_path = os.path.dirname(__file__) 63 | # bm_path = root_path[:-6] + '/body_models/smplh/neutral/model.npz' 64 | # bm = BodyModel(bm_path=bm_path, num_betas=16, batch_size=1, model_type='smplh') 65 | # beta_mean = np.array([0.41771687, 0.25984767, 0.20500051, 0.13503872, 0.25965645, -2.10198147, -0.11915666, 66 | # -0.5498772, 0.30885323, 1.4813145, -0.60987528, 1.42565269, 2.45862726, 0.23001716, 67 | # -0.64180912, 0.30231911]) 68 | # beta_mean = torch.from_numpy(beta_mean).unsqueeze(0).float() 69 | # # Add shape contribution 70 | # v_shaped = bm.v_template + lbs.blend_shapes(beta_mean, bm.shapedirs) 71 | # # Get the joints 72 | # # NxJx3 array 73 | # p3d0 = lbs.vertices2joints(bm.J_regressor, v_shaped) # [1,52,3] 74 | # p3d0 = (p3d0 - p3d0[:, 0:1, :]).float().cuda().cpu().data.numpy() 75 | # parents = bm.kintree_table.data.numpy()[0, :] 76 | # np.savez_compressed('smpl_skeleton.npz', p3d0=p3d0, parents=parents) 77 | 78 | # load mean skeleton 79 | skel = np.load('/lhome/bouazia/pose_prediction/utils/body_models/smpl_skeleton.npz') 80 | p3d0 = torch.from_numpy(skel['p3d0']).float().cuda() 81 | parents = skel['parents'] 82 | parent = {} 83 | for i in range(len(parents)): 84 | parent[i] = parents[i] 85 | n = 0 86 | for ds in amass_splits[split]: 87 | if not os.path.isdir(self.path_to_data + ds): 88 | print(ds) 89 | continue 90 | print('>>> loading {}'.format(ds)) 91 | for sub in os.listdir(self.path_to_data + ds): 92 | #print ("working in ",self.path_to_data + ds) 93 | 94 | if not os.path.isdir(self.path_to_data + ds + '/' + sub): 95 | continue 96 | for act in os.listdir(self.path_to_data + ds + '/' + sub): 97 | #print ("poses path",self.path_to_data + ds + '/' + sub + '/' + act) 98 | 99 | #print (act) 100 | if not act.endswith('.npz'): 101 | 102 | continue 103 | # if not ('walk' in act or 'jog' in act or 'run' in act or 'treadmill' in act): 104 | # continue 105 | 106 | pose_all = np.load(self.path_to_data + ds + '/' + sub + '/' + act) 107 | try: 108 | poses = pose_all['poses'] 109 | except: 110 | print('no poses at {}_{}_{}'.format(ds, sub, act)) 111 | continue 112 | frame_rate = pose_all['mocap_framerate'] 113 | # gender = pose_all['gender'] 114 | # dmpls = pose_all['dmpls'] 115 | # betas = pose_all['betas'] 116 | # trans = pose_all['trans'] 117 | fn = poses.shape[0] 118 | sample_rate = int(frame_rate // 25) 119 | fidxs = range(0, fn, sample_rate) 120 | fn = len(fidxs) 121 | poses = poses[fidxs] 122 | poses = torch.from_numpy(poses).float().cuda() 123 | poses = poses.reshape([fn, -1, 3]) 124 | # remove global rotation 125 | poses[:, 0] = 0 126 | p3d0_tmp = p3d0.repeat([fn, 1, 1]) 127 | p3d = ang2joint(p3d0_tmp, poses, parent) 128 | # self.p3d[(ds, sub, act)] = p3d.cpu().data.numpy() 129 | self.p3d.append(p3d.cpu().data.numpy()) 130 | if split == 2: 131 | valid_frames = np.arange(0, fn - seq_len + 1, skip_rate) 132 | else: 133 | valid_frames = np.arange(0, fn - seq_len + 1, skip_rate) 134 | 135 | # tmp_data_idx_1 = [(ds, sub, act)] * len(valid_frames) 136 | self.keys.append((ds, sub, act)) 137 | tmp_data_idx_1 = [n] * len(valid_frames) 138 | tmp_data_idx_2 = list(valid_frames) 139 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) 140 | n += 1 141 | 142 | def __len__(self): 143 | return np.shape(self.data_idx)[0] 144 | 145 | def __getitem__(self, item): 146 | key, start_frame = self.data_idx[item] 147 | fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) 148 | return self.p3d[key][fs] # , key 149 | 150 | 151 | # In[12]: 152 | 153 | 154 | def normalize_A(A): # given an adj.matrix, normalize it by multiplying left and right with the degree matrix, in the -1/2 power 155 | 156 | A=A+np.eye(A.shape[0]) 157 | 158 | D=np.sum(A,axis=0) 159 | 160 | 161 | D=np.diag(D.A1) 162 | 163 | 164 | D_inv = D**-0.5 165 | D_inv[D_inv==np.infty]=0 166 | 167 | return D_inv*A*D_inv 168 | 169 | 170 | # In[ ]: 171 | 172 | 173 | def spatio_temporal_graph(joints_to_consider,temporal_kernel_size,spatial_adjacency_matrix): # given a normalized spatial adj.matrix,creates a spatio-temporal adj.matrix 174 | 175 | 176 | number_of_joints=joints_to_consider 177 | 178 | spatio_temporal_adj=np.zeros((temporal_kernel_size,number_of_joints,number_of_joints)) 179 | for t in range(temporal_kernel_size): 180 | for i in range(number_of_joints): 181 | spatio_temporal_adj[t,i,i]=1 # create edge between same body joint,for t consecutive frames 182 | for j in range(number_of_joints): 183 | if spatial_adjacency_matrix[i,j]!=0: # if the body joints are connected 184 | spatio_temporal_adj[t,i,j]=spatial_adjacency_matrix[i,j] 185 | return spatio_temporal_adj 186 | 187 | 188 | # In[20]: 189 | 190 | 191 | def get_adj_AMASS(joints_to_consider,temporal_kernel_size): # returns adj.matrix to be fed to the network 192 | if joints_to_consider==22: 193 | edgelist = [ 194 | (0, 1), (0, 2), #(0, 3), 195 | (1, 4), (5, 2), #(3, 6), 196 | (7, 4), (8, 5), #(6, 9), 197 | (7, 10), (8, 11), #(9, 12), 198 | #(12, 13), (12, 14), 199 | (12, 15), 200 | #(13, 16), (12, 16), (14, 17), (12, 17), 201 | (12, 16), (12, 17), 202 | (16, 18), (19, 17), (20, 18), (21, 19), 203 | #(22, 20), #(23, 21),#wrists 204 | (1, 16), (2, 17)] 205 | 206 | # create a graph 207 | G=nx.Graph() 208 | G.add_edges_from(edgelist) 209 | # create adjacency matrix 210 | A = nx.adjacency_matrix(G,nodelist=list(range(0,joints_to_consider))).todense() 211 | #normalize adjacency matrix 212 | A=normalize_A(A) 213 | return torch.Tensor(spatio_temporal_graph(joints_to_consider,temporal_kernel_size,A)) 214 | 215 | 216 | # In[23]: 217 | 218 | 219 | def mpjpe_error(batch_pred,batch_gt): 220 | #assert batch_pred.requires_grad==True 221 | #assert batch_gt.requires_grad==False 222 | 223 | 224 | batch_pred=batch_pred.contiguous().view(-1,3) 225 | batch_gt=batch_gt.contiguous().view(-1,3) 226 | 227 | return torch.mean(torch.norm(batch_gt-batch_pred,2,1)) 228 | 229 | -------------------------------------------------------------------------------- /amass/mlp_mixer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | class SELayer(nn.Module): 7 | def __init__(self, c, r=4, use_max_pooling=False): 8 | super().__init__() 9 | self.squeeze = nn.AdaptiveAvgPool1d(1) if not use_max_pooling else nn.AdaptiveMaxPool1d(1) 10 | self.excitation = nn.Sequential( 11 | nn.Linear(c, c // r, bias=False), 12 | nn.ReLU(inplace=True), 13 | nn.Linear(c // r, c, bias=False), 14 | nn.Sigmoid() 15 | ) 16 | 17 | 18 | def forward(self, x): 19 | bs, s, h = x.shape 20 | y = self.squeeze(x).view(bs, s) 21 | y = self.excitation(y).view(bs, s, 1) 22 | return x * y.expand_as(x) 23 | 24 | 25 | 26 | 27 | def mish(x): 28 | return (x*torch.tanh(F.softplus(x))) 29 | 30 | 31 | 32 | 33 | class MlpBlock(nn.Module): 34 | def __init__(self, mlp_hidden_dim, mlp_input_dim, mlp_bn_dim, activation='gelu', regularization=0, initialization='none'): 35 | super().__init__() 36 | self.mlp_hidden_dim = mlp_hidden_dim 37 | self.mlp_input_dim = mlp_input_dim 38 | self.mlp_bn_dim = mlp_bn_dim 39 | #self.fc1 = nn.Linear(self.mlp_input_dim, self.mlp_input_dim) 40 | self.fc1 = nn.Linear(self.mlp_input_dim, self.mlp_hidden_dim) 41 | self.fc2 = nn.Linear(self.mlp_hidden_dim, self.mlp_input_dim) 42 | if regularization > 0.0: 43 | self.reg1 = nn.Dropout(regularization) 44 | self.reg2 = nn.Dropout(regularization) 45 | elif regularization == -1.0: 46 | self.reg1 = nn.BatchNorm1d(self.mlp_bn_dim) 47 | self.reg2 = nn.BatchNorm1d(self.mlp_bn_dim) 48 | else: 49 | self.reg1 = None 50 | self.reg2 = None 51 | 52 | if activation == 'gelu': 53 | self.act1 = nn.GELU() 54 | elif activation == 'mish': 55 | self.act1 = mish #nn.Mish() 56 | else: 57 | raise ValueError('Unknown activation function type: %s'%activation) 58 | 59 | 60 | 61 | 62 | def forward(self, x): 63 | x = self.fc1(x) 64 | x = self.act1(x) 65 | if self.reg1 is not None: 66 | x = self.reg1(x) 67 | x = self.fc2(x) 68 | if self.reg2 is not None: 69 | x = self.reg2(x) 70 | 71 | return x 72 | 73 | 74 | 75 | class MixerBlock(nn.Module): 76 | def __init__(self, tokens_mlp_dim, channels_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0, 77 | initialization='none', r_se=4, use_max_pooling=False, use_se=True): 78 | super().__init__() 79 | self.tokens_mlp_dim = tokens_mlp_dim 80 | self.channels_mlp_dim = channels_mlp_dim 81 | self.seq_len = seq_len 82 | self.hidden_dim = hidden_dim # out channels of the conv 83 | self.mlp_block_token_mixing = MlpBlock(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, activation=activation, regularization=regularization, initialization=initialization) 84 | self.mlp_block_channel_mixing = MlpBlock(self.channels_mlp_dim, self.hidden_dim, self.seq_len, activation=activation, regularization=regularization, initialization=initialization) 85 | self.use_se = use_se 86 | if self.use_se: 87 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling) 88 | 89 | self.LN1 = nn.LayerNorm(self.hidden_dim) 90 | self.LN2 = nn.LayerNorm(self.hidden_dim) 91 | 92 | 93 | 94 | def forward(self, x): 95 | # shape x [256, 8, 512] [bs, patches/time_steps, channels 96 | y = self.LN1(x) 97 | 98 | y = y.transpose(1, 2) 99 | y = self.mlp_block_token_mixing(y) 100 | y = y.transpose(1, 2) 101 | 102 | if self.use_se: 103 | y = self.se(y) 104 | x = x + y 105 | 106 | y = self.LN2(x) 107 | y = self.mlp_block_channel_mixing(y) 108 | 109 | if self.use_se: 110 | y = self.se(y) 111 | 112 | return x + y 113 | 114 | 115 | 116 | class MixerBlock_Channel(nn.Module): 117 | def __init__(self, channels_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0, 118 | initialization='none', r_se=4, use_max_pooling=False, use_se=True): 119 | super().__init__() 120 | self.channels_mlp_dim = channels_mlp_dim 121 | self.seq_len = seq_len 122 | self.hidden_dim = hidden_dim # out channels of the conv 123 | self.mlp_block_channel_mixing = MlpBlock(self.channels_mlp_dim, self.hidden_dim, self.seq_len, activation=activation, regularization=regularization, initialization=initialization) 124 | self.use_se = use_se 125 | if self.use_se: 126 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling) 127 | 128 | 129 | self.LN2 = nn.LayerNorm(self.hidden_dim) 130 | 131 | #self.act1 = nn.GELU() 132 | 133 | def forward(self, x): 134 | # shape x [256, 8, 512] [bs, patches/time_steps, channels] 135 | y = x 136 | 137 | if self.use_se: 138 | y = self.se(y) 139 | x = x + y 140 | y = self.LN2(x) 141 | y = self.mlp_block_channel_mixing(y) 142 | if self.use_se: 143 | y = self.se(y) 144 | 145 | return x + y 146 | 147 | 148 | 149 | class MixerBlock_Token(nn.Module): 150 | def __init__(self, tokens_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0, 151 | initialization='none', r_se=4, use_max_pooling=False, use_se=True): 152 | super().__init__() 153 | self.tokens_mlp_dim = tokens_mlp_dim 154 | 155 | self.seq_len = seq_len 156 | self.hidden_dim = hidden_dim # out channels of the conv 157 | self.mlp_block_token_mixing = MlpBlock(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, activation=activation, regularization=regularization, initialization=initialization) 158 | 159 | self.use_se = use_se 160 | 161 | if self.use_se: 162 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling) 163 | 164 | self.LN1 = nn.LayerNorm(self.hidden_dim) 165 | 166 | 167 | def forward(self, x): 168 | # shape x [256, 8, 512] [bs, patches/time_steps, channels] 169 | y = self.LN1(x) 170 | y = y.transpose(1, 2) 171 | y = self.mlp_block_token_mixing(y) 172 | y = y.transpose(1, 2) 173 | 174 | if self.use_se: 175 | y = self.se(y) 176 | x = x + y 177 | 178 | return x + y 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | class MlpMixer(nn.Module): 198 | def __init__(self, num_classes, num_blocks, hidden_dim, tokens_mlp_dim, channels_mlp_dim, seq_len, pred_len, activation='gelu', mlp_block_type='normal', 199 | regularization=0, input_size=51, initialization='none', r_se=4, use_max_pooling=False, use_se=False): 200 | super().__init__() 201 | self.num_classes = num_classes 202 | self.num_blocks = num_blocks 203 | self.hidden_dim = hidden_dim 204 | self.seq_len = seq_len 205 | #self.pred_len = 25 206 | self.tokens_mlp_dim = tokens_mlp_dim 207 | self.channels_mlp_dim = channels_mlp_dim 208 | self.input_size = input_size #varyies with the number of joints 209 | self.conv = nn.Conv1d(1, self.hidden_dim, (1, self.input_size), stride=1) 210 | self.activation = activation 211 | self.Mixer_Block = nn.ModuleList(MixerBlock(self.tokens_mlp_dim, self.channels_mlp_dim, self.seq_len, self.hidden_dim, activation=self.activation, 212 | regularization=regularization, initialization=initialization, r_se=r_se, use_max_pooling=use_max_pooling, use_se=use_se) for _ in range(num_blocks)) 213 | self.LN = nn.LayerNorm(self.hidden_dim) 214 | self.fc_out = nn.Linear(self.hidden_dim, self.num_classes) 215 | self.pred_len = pred_len 216 | self.conv_out = nn.Conv1d(self.seq_len, self.pred_len, 1, stride=1) 217 | 218 | 219 | 220 | def forward(self, x): #, padded 221 | 222 | x = x.unsqueeze(1) 223 | y = self.conv(x) 224 | 225 | y = y.squeeze().transpose(1, 2) 226 | # [256, 8, 512] [bs, patches/time_steps, channels] 227 | for mb in self.Mixer_Block: 228 | y = mb(y) 229 | 230 | y = self.LN(y) 231 | 232 | 233 | # print (self.tcn(y.unsqueeze(0)).shape) 234 | 235 | out = self.fc_out(self.conv_out(y)) 236 | #out = self.fc_out(self.reg(self.conv_out(y))) 237 | 238 | #out = self.fc_out(y) 239 | 240 | return out 241 | 242 | 243 | 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /amass/test_mixer_amass.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle 4 | import torch 5 | import torch.nn as nn 6 | from torch.utils.data import DataLoader 7 | import torch.optim as optim 8 | import torch.autograd 9 | import matplotlib.pyplot as plt 10 | from utils.ang2joint import * 11 | from dataloader_amass import * 12 | import numpy as np 13 | import argparse 14 | import os 15 | from mlp_mixer import MlpMixer 16 | 17 | 18 | 19 | 20 | def test_mixer(model, args): 21 | 22 | device = args.dev 23 | model.eval() 24 | accum_loss = 0 25 | n_batches = 0 # number of batches for all the sequences 26 | 27 | n = 0 28 | 29 | 30 | Dataset = Datasets(args.data_dir,args.input_n,args.output_n,args.skip_rate,split=2) 31 | loader_test = DataLoader( Dataset,batch_size=args.batch_size, 32 | shuffle =False,num_workers=0) 33 | 34 | 35 | joint_used=np.arange(4,22) 36 | full_joint_used=np.arange(0,22) # needed for visualization 37 | with torch.no_grad(): 38 | for cnt,batch in enumerate(loader_test): 39 | batch = batch.float().to(device) 40 | batch_dim=batch.shape[0] 41 | n+=batch_dim 42 | 43 | sequences_train=batch[:,0:args.input_n,joint_used,:].view(-1,args.input_n,args.pose_dim) 44 | 45 | sequences_predict_gt=batch[:,args.input_n:args.input_n+args.output_n,full_joint_used,:]#.view(-1,args.output_n,args.pose_dim) 46 | 47 | sequences_predict=model(sequences_train).view(-1,args.output_n,18,3)#.permute(0,1,3,2) 48 | 49 | 50 | all_joints_seq=sequences_predict_gt.clone() 51 | 52 | all_joints_seq[:,:,joint_used,:]=sequences_predict 53 | 54 | loss=mpjpe_error(all_joints_seq,sequences_predict_gt)*1000 # loss in milimeters 55 | accum_loss+=loss*batch_dim 56 | print('overall average loss in mm is: '+str(accum_loss/n)) 57 | 58 | 59 | 60 | return accum_loss/n_batches 61 | 62 | 63 | 64 | if __name__ == '__main__': 65 | parser = argparse.ArgumentParser(add_help=False) # Parameters for mpjpe 66 | parser.add_argument('--data_dir', type=str, default='../data_amass/', help='path to the unziped dataset directories(H36m/AMASS/3DPW)') 67 | parser.add_argument('--input_n', type=int, default=10, help="number of model's input frames") 68 | parser.add_argument('--output_n', type=int, default=25, help="number of model's output frames") 69 | parser.add_argument('--skip_rate', type=int, default=5, choices=[1, 5], help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW') 70 | parser.add_argument('--num_worker', default=4, type=int, help='number of workers in the dataloader') 71 | parser.add_argument('--root', default='./runs', type=str, help='root path for the logging') 72 | 73 | parser.add_argument('--activation', default='gelu', type=str, required=False) 74 | parser.add_argument('--r_se', default=8, type=int, required=False) 75 | 76 | parser.add_argument('--n_epochs', default=50, type=int, required=False) 77 | parser.add_argument('--batch_size', default=50, type=int, required=False) 78 | parser.add_argument('--loader_shuffle', default=True, type=bool, required=False) 79 | parser.add_argument('--pin_memory', default=False, type=bool, required=False) 80 | parser.add_argument('--loader_workers', default=4, type=int, required=False) 81 | parser.add_argument('--load_checkpoint', default=False, type=bool, required=False) 82 | parser.add_argument('--dev', default='cuda:0', type=str, required=False) 83 | parser.add_argument('--initialization', type=str, default='none', help='none, glorot_normal, glorot_uniform, hee_normal, hee_uniform') 84 | parser.add_argument('--use_scheduler', default=True, type=bool, required=False) 85 | parser.add_argument('--milestones', type=list, default=[15, 25, 35, 40], help='the epochs after which the learning rate is adjusted by gamma') 86 | parser.add_argument('--gamma', type=float, default=0.1, help='gamma correction to the learning rate, after reaching the milestone epochs') 87 | parser.add_argument('--clip_grad', type=float, default=None, help='select max norm to clip gradients') 88 | parser.add_argument('--model_path', type=str, default='./checkpoints/amass_3d_25frames_ckpt', help='directory with the models checkpoints ') 89 | parser.add_argument('--batch_size_test', type=int, default=256, help='batch size for the test set') 90 | parser.add_argument('--visualize_from', type=str, default='test', choices=['train', 'val', 'test'], help='choose data split to visualize from(train-val-test)') 91 | 92 | args = parser.parse_args() 93 | 94 | parser_mpjpe = argparse.ArgumentParser(parents=[parser]) # Parameters for mpjpe 95 | parser_mpjpe.add_argument('--hidden_dim', default=128, type=int, required=False) 96 | parser_mpjpe.add_argument('--num_blocks', default=10, type=int, required=False) 97 | parser_mpjpe.add_argument('--tokens_mlp_dim', default=20, type=int, required=False) 98 | parser_mpjpe.add_argument('--channels_mlp_dim', default=128, type=int, required=False) 99 | parser_mpjpe.add_argument('--regularization', default=0.1, type=float, required=False) 100 | parser_mpjpe.add_argument('--pose_dim', default=54, type=int, required=False) 101 | parser_mpjpe.add_argument('--delta_x', type=bool, default=True, help='predicting the difference between 2 frames') 102 | parser_mpjpe.add_argument('--lr', default=0.001, type=float, required=False) 103 | args = parser_mpjpe.parse_args() 104 | 105 | 106 | 107 | print(args) 108 | 109 | model = MlpMixer(num_classes=args.pose_dim, num_blocks=args.num_blocks, 110 | hidden_dim=args.hidden_dim, tokens_mlp_dim=args.tokens_mlp_dim, 111 | channels_mlp_dim=args.channels_mlp_dim, seq_len=args.input_n, 112 | pred_len=args.output_n, activation=args.activation, 113 | mlp_block_type='normal', regularization=args.regularization, 114 | input_size=args.pose_dim, initialization='none', r_se=args.r_se, 115 | use_max_pooling=False, use_se=True) 116 | 117 | model = model.to(args.dev) 118 | 119 | 120 | model.load_state_dict(torch.load(args.model_path)) 121 | 122 | 123 | model.eval () 124 | 125 | 126 | test_mixer(model, args) 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | -------------------------------------------------------------------------------- /amass/train_mixer_amass.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle 4 | import torch 5 | import torch.nn as nn 6 | from torch.utils.data import DataLoader 7 | import torch.optim as optim 8 | import torch.autograd 9 | import matplotlib.pyplot as plt 10 | from utils.ang2joint import * 11 | from dataloader_amass import * 12 | import numpy as np 13 | import argparse 14 | import os 15 | from mlp_mixer import MlpMixer 16 | from tqdm import tqdm 17 | from torch.utils.tensorboard import SummaryWriter 18 | 19 | 20 | 21 | def get_log_dir(out_dir): 22 | dirs = [x[0] for x in os.walk(out_dir)] 23 | if len(dirs ) < 2: 24 | log_dir = os.path.join(out_dir, 'exp0') 25 | os.mkdir(log_dir) 26 | else: 27 | log_dir = os.path.join(out_dir, 'exp%i'%(len(dirs)-1)) 28 | os.mkdir(log_dir) 29 | 30 | return log_dir 31 | 32 | 33 | #%% 34 | def train(model, model_name, args): 35 | 36 | joint_used=np.arange(4,22) 37 | 38 | log_dir = get_log_dir(args.root) 39 | tb_writer = SummaryWriter(log_dir=log_dir) 40 | print('Save data of the run in: %s'%log_dir) 41 | 42 | device = args.dev 43 | 44 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-05) 45 | 46 | if args.use_scheduler: 47 | scheduler = optim.lr_scheduler.MultiStepLR( 48 | optimizer, milestones=args.milestones, gamma=args.gamma) 49 | 50 | train_loss, val_loss, test_loss = [], [], [] 51 | 52 | 53 | dataset = Datasets(args.data_dir, args.input_n, 54 | args.output_n, args.skip_rate, split=0) 55 | 56 | vald_dataset = Datasets(args.data_dir, args.input_n, 57 | args.output_n, args.skip_rate, split=1) 58 | 59 | 60 | 61 | 62 | print('>>> Training dataset length: {:d}'.format(dataset.__len__())) 63 | print('>>> Validation dataset length: {:d}'.format(vald_dataset.__len__())) 64 | 65 | data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, 66 | num_workers=args.num_worker, pin_memory=True) 67 | vald_loader = DataLoader(vald_dataset, batch_size=args.batch_size, 68 | shuffle=True, num_workers=args.num_worker, pin_memory=True) 69 | 70 | 71 | for epoch in range(args.n_epochs): 72 | print('Run epoch: %i'%epoch) 73 | running_loss = 0 74 | n = 0 75 | model.train() 76 | for cnt, batch in tqdm(enumerate(data_loader), total=len(data_loader)): 77 | batch = batch.to(device) 78 | batch = batch[:, :, joint_used] 79 | batch_dim = batch.shape[0] 80 | n += batch_dim 81 | 82 | 83 | sequences_train = batch[:,0:args.input_n,:,:].reshape( 84 | -1, args.input_n, args.pose_dim) 85 | sequences_gt = batch[:,args.input_n:args.input_n+args.output_n,:,:].reshape(-1, args.output_n, args.pose_dim) 86 | 87 | 88 | optimizer.zero_grad() 89 | 90 | sequences_predict=model(sequences_train) 91 | 92 | loss=mpjpe_error(sequences_predict,sequences_gt)*1000 93 | 94 | if cnt % 200 == 0: 95 | print('[%d, %5d] training loss: %.3f' %(epoch + 1, cnt + 1, loss.item())) 96 | 97 | loss.backward() 98 | 99 | if args.clip_grad is not None: 100 | torch.nn.utils.clip_grad_norm_( 101 | model.parameters(), args.clip_grad) 102 | 103 | optimizer.step() 104 | 105 | running_loss += loss*batch_dim 106 | 107 | train_loss.append(running_loss.detach().cpu()/n) 108 | model.eval() 109 | with torch.no_grad(): 110 | running_loss = 0 111 | n = 0 112 | for cnt, batch in enumerate(vald_loader): 113 | batch = batch.to(device) 114 | batch = batch[:, :, joint_used] 115 | batch_dim = batch.shape[0] 116 | n += batch_dim 117 | 118 | 119 | sequences_train = batch[:,0:args.input_n,:,:].reshape( 120 | -1, args.input_n, args.pose_dim) 121 | sequences_gt = batch[:,args.input_n:args.input_n+args.output_n,:,:].reshape(-1, args.output_n, args.pose_dim) 122 | 123 | 124 | sequences_predict=model(sequences_train) 125 | 126 | loss=mpjpe_error(sequences_predict,sequences_gt)*1000 127 | 128 | if cnt % 200 == 0: 129 | print('[%d, %5d] validation loss: %.3f' %(epoch + 1, cnt + 1, loss.item())) 130 | 131 | 132 | running_loss += loss*batch_dim 133 | val_loss.append(running_loss.detach().cpu()/n) 134 | if args.use_scheduler: 135 | scheduler.step() 136 | 137 | 138 | test_loss.append(test_mpjpe(model, args)) 139 | 140 | 141 | tb_writer.add_scalar('loss/train', train_loss[-1].item(), epoch) 142 | tb_writer.add_scalar('loss/val', val_loss[-1].item(), epoch) 143 | 144 | torch.save(model.state_dict(), os.path.join(log_dir, 'model.pt')) 145 | # TODO write something to save the best model 146 | if (epoch+1)%1==0: 147 | print('----saving model-----') 148 | torch.save(model.state_dict(),os.path.join(args.model_path,model_name)) 149 | 150 | 151 | 152 | #%% 153 | def test_mpjpe(model, args): 154 | 155 | device = args.dev 156 | model.eval() 157 | accum_loss = 0 158 | n_batches = 0 # number of batches for all the sequences 159 | 160 | 161 | running_loss = 0 162 | n = 0 163 | 164 | 165 | Dataset = Datasets(args.data_dir,args.input_n,args.output_n,args.skip_rate,split=2) 166 | loader_test = DataLoader( Dataset, 167 | batch_size=args.batch_size, 168 | shuffle =False, 169 | num_workers=0) 170 | 171 | 172 | 173 | 174 | joint_used=np.arange(4,22) 175 | full_joint_used=np.arange(0,22) # needed for visualization 176 | with torch.no_grad(): 177 | for cnt,batch in enumerate(loader_test): 178 | batch = batch.float().to(device) 179 | batch_dim=batch.shape[0] 180 | n+=batch_dim 181 | 182 | sequences_train=batch[:,0:args.input_n,joint_used,:].view(-1,args.input_n,args.pose_dim) 183 | 184 | sequences_predict_gt=batch[:,args.input_n:args.input_n+args.output_n,full_joint_used,:]#.view(-1,args.output_n,args.pose_dim) 185 | 186 | sequences_predict=model(sequences_train).view(-1,args.output_n,18,3)#.permute(0,1,3,2) 187 | 188 | 189 | all_joints_seq=sequences_predict_gt.clone() 190 | 191 | all_joints_seq[:,:,joint_used,:]=sequences_predict 192 | 193 | loss=mpjpe_error(all_joints_seq,sequences_predict_gt)*1000 # loss in milimeters 194 | accum_loss+=loss*batch_dim 195 | print('overall average loss in mm is: '+str(accum_loss/n)) 196 | 197 | 198 | 199 | return accum_loss/n_batches 200 | 201 | #%% 202 | 203 | if __name__ == '__main__': 204 | parser = argparse.ArgumentParser(add_help=False) # Parameters for mpjpe 205 | parser.add_argument('--data_dir', type=str, default='../data_amass/', help='path to the unziped dataset directories(H36m/AMASS/3DPW)') 206 | parser.add_argument('--input_n', type=int, default=10, help="number of model's input frames") 207 | parser.add_argument('--output_n', type=int, default=25, help="number of model's output frames") 208 | parser.add_argument('--skip_rate', type=int, default=1, choices=[1, 5], help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW') 209 | parser.add_argument('--num_worker', default=4, type=int, help='number of workers in the dataloader') 210 | parser.add_argument('--root', default='./runs', type=str, help='root path for the logging') #'./runs' 211 | 212 | parser.add_argument('--activation', default='gelu', type=str, required=False) 213 | parser.add_argument('--r_se', default=8, type=int, required=False) 214 | 215 | parser.add_argument('--n_epochs', default=50, type=int, required=False) 216 | parser.add_argument('--batch_size', default=200, type=int, required=False) # 100 50 in all original 50 217 | parser.add_argument('--loader_shuffle', default=True, type=bool, required=False) 218 | parser.add_argument('--pin_memory', default=False, type=bool, required=False) 219 | parser.add_argument('--loader_workers', default=4, type=int, required=False) 220 | parser.add_argument('--load_checkpoint', default=False, type=bool, required=False) 221 | parser.add_argument('--dev', default='cuda:0', type=str, required=False) 222 | parser.add_argument('--initialization', type=str, default='none', help='none, glorot_normal, glorot_uniform, hee_normal, hee_uniform') 223 | parser.add_argument('--use_scheduler', default=True, type=bool, required=False) 224 | parser.add_argument('--milestones', type=list, default=[15, 25, 35, 40], help='the epochs after which the learning rate is adjusted by gamma') 225 | parser.add_argument('--gamma', type=float, default=0.1, help='gamma correction to the learning rate, after reaching the milestone epochs') 226 | parser.add_argument('--clip_grad', type=float, default=None, help='select max norm to clip gradients') 227 | parser.add_argument('--model_path', type=str, default='./checkpoints/amass_3d_25frames_ckpt', help='directory with the models checkpoints ') 228 | parser.add_argument('--actions_to_consider', default='all', help='Actions to visualize.Choose either all or a list of actions') 229 | parser.add_argument('--batch_size_test', type=int, default=256, help='batch size for the test set') 230 | parser.add_argument('--visualize_from', type=str, default='test', choices=['train', 'val', 'test'], help='choose data split to visualize from(train-val-test)') 231 | parser.add_argument('--loss_type', type=str, default='mpjpe', choices=['mpjpe', 'angle']) 232 | 233 | args = parser.parse_args() 234 | 235 | parser_mpjpe = argparse.ArgumentParser(parents=[parser]) # Parameters for mpjpe 236 | parser_mpjpe.add_argument('--hidden_dim', default=128, type=int, required=False) 237 | parser_mpjpe.add_argument('--num_blocks', default=5, type=int, required=False) 238 | parser_mpjpe.add_argument('--tokens_mlp_dim', default=20, type=int, required=False) 239 | parser_mpjpe.add_argument('--channels_mlp_dim', default=128, type=int, required=False) 240 | parser_mpjpe.add_argument('--regularization', default=0.1, type=float, required=False) 241 | parser_mpjpe.add_argument('--pose_dim', default=54, type=int, required=False) 242 | parser_mpjpe.add_argument('--delta_x', type=bool, default=True, help='predicting the difference between 2 frames') 243 | parser_mpjpe.add_argument('--lr', default=0.001, type=float, required=False) 244 | args = parser_mpjpe.parse_args() 245 | 246 | 247 | 248 | print(args) 249 | 250 | model = MlpMixer(num_classes=args.pose_dim, num_blocks=args.num_blocks, 251 | hidden_dim=args.hidden_dim, tokens_mlp_dim=args.tokens_mlp_dim, 252 | channels_mlp_dim=args.channels_mlp_dim, seq_len=args.input_n, 253 | pred_len=args.output_n, activation=args.activation, 254 | mlp_block_type='normal', regularization=args.regularization, 255 | input_size=args.pose_dim, initialization='none', r_se=args.r_se, 256 | use_max_pooling=False, use_se=True) 257 | 258 | model = model.to(args.dev) 259 | 260 | print('total number of parameters of the network is: ' + 261 | str(sum(p.numel() for p in model.parameters() if p.requires_grad))) 262 | 263 | model_name = 'h36_3d_'+str(args.output_n)+'frames_ckpt' 264 | 265 | #%% 266 | train(model, model_name, args) 267 | test_mpjpe(model, args) 268 | 269 | 270 | 271 | -------------------------------------------------------------------------------- /checkpoints/.gitignore: -------------------------------------------------------------------------------- 1 | # These are some examples of commonly ignored file patterns. 2 | # You should customize this list as applicable to your project. 3 | # Learn more about .gitignore: 4 | # https://www.atlassian.com/git/tutorials/saving-changes/gitignore 5 | 6 | # Node artifact files 7 | node_modules/ 8 | dist/ 9 | 10 | # Compiled Java class files 11 | *.class 12 | 13 | # Compiled Python bytecode 14 | *.py[cod] 15 | 16 | # Log files 17 | *.log 18 | 19 | # Package files 20 | *.jar 21 | 22 | # Maven 23 | target/ 24 | dist/ 25 | 26 | # JetBrains IDE 27 | .idea/ 28 | 29 | # Unit test reports 30 | TEST*.xml 31 | 32 | # Generated by MacOS 33 | .DS_Store 34 | 35 | # Generated by Windows 36 | Thumbs.db 37 | 38 | # Applications 39 | *.app 40 | *.exe 41 | *.war 42 | 43 | # Large media files 44 | *.mp4 45 | *.tiff 46 | *.avi 47 | *.flv 48 | *.mov 49 | *.wmv 50 | 51 | -------------------------------------------------------------------------------- /h36m/datasets/dataset_h36m.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.data import Dataset 3 | import numpy as np 4 | from h5py import File 5 | import scipy.io as sio 6 | import utils.data_utils as data_utils 7 | from matplotlib import pyplot as plt 8 | import torch 9 | 10 | import os 11 | 12 | ''' 13 | adapted from 14 | https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/h36motion3d.py 15 | ''' 16 | 17 | 18 | class H36M_Dataset(Dataset): 19 | 20 | def __init__(self,data_dir,input_n,output_n,skip_rate, actions=None, split=0): 21 | """ 22 | :param path_to_data: 23 | :param actions: 24 | :param input_n: 25 | :param output_n: 26 | :param dct_used: 27 | :param split: 0 train, 1 testing, 2 validation 28 | :param sample_rate: 29 | """ 30 | self.path_to_data = os.path.join(data_dir,'h3.6m/dataset') 31 | self.split = split 32 | self.in_n = input_n 33 | self.out_n = output_n 34 | self.sample_rate = 2 35 | self.p3d = {} 36 | self.data_idx = [] 37 | seq_len = self.in_n + self.out_n 38 | subs = [[1, 6, 7, 8, 9], [11], [5]] 39 | # acts = data_utils.define_actions(actions) 40 | if actions is None: 41 | acts = ["walking", "eating", "smoking", "discussion", "directions", 42 | "greeting", "phoning", "posing", "purchases", "sitting", 43 | "sittingdown", "takingphoto", "waiting", "walkingdog", 44 | "walkingtogether"] 45 | else: 46 | acts = actions 47 | # subs = np.array([[1], [11], [5]]) 48 | # acts = ['walking'] 49 | # 32 human3.6 joint name: 50 | joint_name = ["Hips", "RightUpLeg", "RightLeg", "RightFoot", "RightToeBase", "Site", "LeftUpLeg", "LeftLeg", 51 | "LeftFoot", 52 | "LeftToeBase", "Site", "Spine", "Spine1", "Neck", "Head", "Site", "LeftShoulder", "LeftArm", 53 | "LeftForeArm", 54 | "LeftHand", "LeftHandThumb", "Site", "L_Wrist_End", "Site", "RightShoulder", "RightArm", 55 | "RightForeArm", 56 | "RightHand", "RightHandThumb", "Site", "R_Wrist_End", "Site"] 57 | 58 | subs = subs[split] 59 | key = 0 60 | for subj in subs: 61 | for action_idx in np.arange(len(acts)): 62 | action = acts[action_idx] 63 | if self.split <= 1: 64 | for subact in [1, 2]: # subactions 65 | #print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) 66 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact) 67 | the_sequence = data_utils.readCSVasFloat(filename) 68 | n, d = the_sequence.shape 69 | even_list = range(0, n, self.sample_rate) 70 | num_frames = len(even_list) 71 | the_sequence = np.array(the_sequence[even_list, :]) 72 | the_sequence = torch.from_numpy(the_sequence).float().cuda() 73 | # remove global rotation and translation 74 | the_sequence[:, 0:6] = 0 75 | p3d = data_utils.expmap2xyz_torch(the_sequence) 76 | # self.p3d[(subj, action, subact)] = p3d.view(num_frames, -1).cpu().data.numpy() 77 | self.p3d[key] = p3d.view(num_frames, -1).cpu().data.numpy() 78 | 79 | valid_frames = np.arange(0, num_frames - seq_len + 1, skip_rate) 80 | 81 | # tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames) 82 | tmp_data_idx_1 = [key] * len(valid_frames) 83 | tmp_data_idx_2 = list(valid_frames) 84 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) 85 | key += 1 86 | else: 87 | #print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) 88 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1) 89 | the_sequence1 = data_utils.readCSVasFloat(filename) 90 | n, d = the_sequence1.shape 91 | even_list = range(0, n, self.sample_rate) 92 | 93 | num_frames1 = len(even_list) 94 | the_sequence1 = np.array(the_sequence1[even_list, :]) 95 | the_seq1 = torch.from_numpy(the_sequence1).float().cuda() 96 | the_seq1[:, 0:6] = 0 97 | p3d1 = data_utils.expmap2xyz_torch(the_seq1) 98 | # self.p3d[(subj, action, 1)] = p3d1.view(num_frames1, -1).cpu().data.numpy() 99 | self.p3d[key] = p3d1.view(num_frames1, -1).cpu().data.numpy() 100 | 101 | #print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) 102 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2) 103 | the_sequence2 = data_utils.readCSVasFloat(filename) 104 | n, d = the_sequence2.shape 105 | even_list = range(0, n, self.sample_rate) 106 | 107 | num_frames2 = len(even_list) 108 | the_sequence2 = np.array(the_sequence2[even_list, :]) 109 | the_seq2 = torch.from_numpy(the_sequence2).float().cuda() 110 | the_seq2[:, 0:6] = 0 111 | p3d2 = data_utils.expmap2xyz_torch(the_seq2) 112 | 113 | # self.p3d[(subj, action, 2)] = p3d2.view(num_frames2, -1).cpu().data.numpy() 114 | self.p3d[key + 1] = p3d2.view(num_frames2, -1).cpu().data.numpy() 115 | 116 | # print("action:{}".format(action)) 117 | # print("subact1:{}".format(num_frames1)) 118 | # print("subact2:{}".format(num_frames2)) 119 | fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len, 120 | input_n=self.in_n) 121 | 122 | valid_frames = fs_sel1[:, 0] 123 | tmp_data_idx_1 = [key] * len(valid_frames) 124 | tmp_data_idx_2 = list(valid_frames) 125 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) 126 | 127 | valid_frames = fs_sel2[:, 0] 128 | tmp_data_idx_1 = [key + 1] * len(valid_frames) 129 | tmp_data_idx_2 = list(valid_frames) 130 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) 131 | key += 2 132 | 133 | # ignore constant joints and joints at same position with other joints 134 | joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31]) 135 | dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) 136 | self.dimensions_to_use = np.setdiff1d(np.arange(96), dimensions_to_ignore) 137 | 138 | def __len__(self): 139 | return np.shape(self.data_idx)[0] 140 | 141 | def __getitem__(self, item): 142 | key, start_frame = self.data_idx[item] 143 | fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) 144 | #print (self.p3d[key][fs].shape) 145 | return self.p3d[key][fs] 146 | 147 | -------------------------------------------------------------------------------- /h36m/datasets/dataset_h36m_ang.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset 2 | import numpy as np 3 | from h5py import File 4 | import scipy.io as sio 5 | from utils import data_utils 6 | from matplotlib import pyplot as plt 7 | import torch 8 | import os 9 | 10 | ''' 11 | adapted from 12 | https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/h36motion.py 13 | ''' 14 | 15 | 16 | class H36M_Dataset_Angle(Dataset): 17 | 18 | def __init__(self,data_dir,input_n,output_n,skip_rate, actions=None, split=0): 19 | """ 20 | :param path_to_data: 21 | :param actions: 22 | :param input_n: 23 | :param output_n: 24 | :param dct_used: 25 | :param split: 0 train, 1 testing, 2 validation 26 | :param sample_rate: 27 | """ 28 | self.path_to_data = os.path.join(data_dir,'h3.6m/dataset') 29 | self.split = split 30 | self.in_n = input_n 31 | self.out_n = output_n 32 | self.sample_rate = 2 33 | self.seq = {} 34 | self.data_idx = [] 35 | 36 | self.dimensions_to_use = np.array( 37 | [6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, 38 | 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86]) 39 | self.dimensions_to_ignore = np.array( 40 | [[0, 1, 2, 3, 4, 5, 10, 11, 16, 17, 18, 19, 20, 25, 26, 31, 32, 33, 34, 35, 48, 49, 50, 58, 41 | 59, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 82, 83, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 42 | 98]]) 43 | 44 | seq_len = self.in_n + self.out_n 45 | subs = [[1, 6, 7, 8, 9], [11], [5]] 46 | # acts = data_utils.define_actions(actions) 47 | if actions is None: 48 | acts = ["walking", "eating", "smoking", "discussion", "directions", 49 | "greeting", "phoning", "posing", "purchases", "sitting", 50 | "sittingdown", "takingphoto", "waiting", "walkingdog", 51 | "walkingtogether"] 52 | else: 53 | acts = actions 54 | # subs = np.array([[1], [11], [5]]) 55 | # acts = ['walking'] 56 | 57 | subs = subs[split] 58 | 59 | for subj in subs: 60 | for action_idx in np.arange(len(acts)): 61 | action = acts[action_idx] 62 | if self.split <= 1: 63 | for subact in [1, 2]: # subactions 64 | # print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact)) 65 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact) 66 | the_sequence = data_utils.readCSVasFloat(filename) 67 | n, d = the_sequence.shape 68 | even_list = range(0, n, self.sample_rate) 69 | num_frames = len(even_list) 70 | the_sequence = np.array(the_sequence[even_list, :]) 71 | # the_sequence = torch.from_numpy(the_sequence).float().cuda() 72 | # remove global rotation and translation 73 | the_sequence[:, 0:6] = 0 74 | # p3d = data_utils.expmap2xyz_torch(the_sequence) 75 | self.seq[(subj, action, subact)] = the_sequence 76 | 77 | valid_frames = np.arange(0, num_frames - seq_len + 1, skip_rate) 78 | 79 | tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames) 80 | tmp_data_idx_2 = list(valid_frames) 81 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) 82 | else: 83 | # print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1)) 84 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1) 85 | the_sequence1 = data_utils.readCSVasFloat(filename) 86 | n, d = the_sequence1.shape 87 | even_list = range(0, n, self.sample_rate) 88 | 89 | num_frames1 = len(even_list) 90 | the_sequence1 = np.array(the_sequence1[even_list, :]) 91 | # the_seq1 = torch.from_numpy(the_sequence1).float().cuda() 92 | the_sequence1[:, 0:6] = 0 93 | # p3d1 = data_utils.expmap2xyz_torch(the_seq1) 94 | self.seq[(subj, action, 1)] = the_sequence1 95 | 96 | # print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2)) 97 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2) 98 | the_sequence2 = data_utils.readCSVasFloat(filename) 99 | n, d = the_sequence2.shape 100 | even_list = range(0, n, self.sample_rate) 101 | 102 | num_frames2 = len(even_list) 103 | the_sequence2 = np.array(the_sequence2[even_list, :]) 104 | # the_seq2 = torch.from_numpy(the_sequence2).float().cuda() 105 | the_sequence2[:, 0:6] = 0 106 | # p3d2 = data_utils.expmap2xyz_torch(the_seq2) 107 | self.seq[(subj, action, 2)] = the_sequence2 108 | 109 | # fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len, 110 | # input_n=self.in_n) 111 | fs_sel1, fs_sel2 = data_utils.find_indices_srnn(num_frames1, num_frames2, seq_len, 112 | input_n=self.in_n) 113 | 114 | valid_frames = fs_sel1[:, 0] 115 | tmp_data_idx_1 = [(subj, action, 1)] * len(valid_frames) 116 | tmp_data_idx_2 = list(valid_frames) 117 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) 118 | 119 | valid_frames = fs_sel2[:, 0] 120 | tmp_data_idx_1 = [(subj, action, 2)] * len(valid_frames) 121 | tmp_data_idx_2 = list(valid_frames) 122 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2)) 123 | 124 | def __len__(self): 125 | return np.shape(self.data_idx)[0] 126 | 127 | def __getitem__(self, item): 128 | key, start_frame = self.data_idx[item] 129 | fs = np.arange(start_frame, start_frame + self.in_n + self.out_n) 130 | return self.seq[key][fs] 131 | -------------------------------------------------------------------------------- /h36m/h36_3d_viz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | import numpy as np 5 | import torch 6 | from torch.utils.data import DataLoader 7 | import matplotlib.pyplot as plt 8 | from mpl_toolkits.mplot3d import Axes3D 9 | import matplotlib.animation as animation 10 | from datasets.dataset_h36m import Datasets 11 | #from utils.data_utils import define_actions 12 | 13 | 14 | def mpjpe_error(batch_pred,batch_gt): 15 | 16 | batch_pred= batch_pred.contiguous().view(-1,3) 17 | batch_gt=batch_gt.contiguous().view(-1,3) 18 | 19 | return torch.mean(torch.norm(batch_gt-batch_pred,2,1)) 20 | 21 | 22 | def define_actions(action): 23 | """ 24 | Define the list of actions we are using. 25 | 26 | Args 27 | action: String with the passed action. Could be "all" 28 | Returns 29 | actions: List of strings of actions 30 | Raises 31 | ValueError if the action is not included in H3.6M 32 | """ 33 | 34 | actions = ["walking", "eating", "smoking", "discussion", "directions", 35 | "greeting", "phoning", "posing", "purchases", "sitting", 36 | "sittingdown", "takingphoto", "waiting", "walkingdog", 37 | "walkingtogether"] 38 | if action in actions: 39 | return [action] 40 | 41 | if action == "all": 42 | return actions 43 | 44 | if action == "all_srnn": 45 | return ["walking", "eating", "smoking", "discussion"] 46 | 47 | raise (ValueError, "Unrecognized action: %d" % action) 48 | 49 | def create_pose(ax,plots,vals,pred=True,update=False): 50 | 51 | 52 | 53 | # h36m 32 joints(full) 54 | connect = [ 55 | (1, 2), (2, 3), (3, 4), (4, 5), 56 | (6, 7), (7, 8), (8, 9), (9, 10), 57 | (0, 1), (0, 6), 58 | (6, 17), (17, 18), (18, 19), (19, 20), (20, 21), (21, 22), 59 | (1, 25), (25, 26), (26, 27), (27, 28), (28, 29), (29, 30), 60 | (24, 25), (24, 17), 61 | (24, 14), (14, 15) 62 | ] 63 | LR = [ 64 | False, True, True, True, True, 65 | True, False, False, False, False, 66 | False, True, True, True, True, 67 | True, True, False, False, False, 68 | False, False, False, False, True, 69 | False, True, True, True, True, 70 | True, True 71 | ] 72 | 73 | 74 | # Start and endpoints of our representation 75 | I = np.array([touple[0] for touple in connect]) 76 | J = np.array([touple[1] for touple in connect]) 77 | # Left / right indicator 78 | LR = np.array([LR[a] or LR[b] for a,b in connect]) 79 | if pred: 80 | lcolor = "#9b59b6" 81 | rcolor = "#2ecc71" 82 | else: 83 | lcolor = "#8e8e8e" 84 | rcolor = "#383838" 85 | 86 | for i in np.arange( len(I)): 87 | x = np.array( [vals[I[i], 0], vals[J[i], 0]] ) 88 | z = np.array( [vals[I[i], 1], vals[J[i], 1]] ) 89 | y = np.array( [vals[I[i], 2], vals[J[i], 2]] ) 90 | if not update: 91 | 92 | if i ==0: 93 | plots.append(ax.plot(x, y, z, lw=2,linestyle='--' ,c=lcolor if LR[i] else rcolor,label=['GT' if not pred else 'Pred'])) 94 | else: 95 | plots.append(ax.plot(x, y, z, lw=2,linestyle='--', c=lcolor if LR[i] else rcolor)) 96 | 97 | elif update: 98 | plots[i][0].set_xdata(x) 99 | plots[i][0].set_ydata(y) 100 | plots[i][0].set_3d_properties(z) 101 | plots[i][0].set_color(lcolor if LR[i] else rcolor) 102 | 103 | return plots 104 | # ax.legend(loc='lower left') 105 | 106 | 107 | # In[11]: 108 | 109 | 110 | def update(num,data_gt,data_pred,plots_gt,plots_pred,fig,ax): 111 | 112 | gt_vals=data_gt[num] 113 | pred_vals=data_pred[num] 114 | plots_gt=create_pose(ax,plots_gt,gt_vals,pred=False,update=True) 115 | plots_pred=create_pose(ax,plots_pred,pred_vals,pred=True,update=True) 116 | 117 | 118 | 119 | 120 | 121 | r = 0.75 122 | xroot, zroot, yroot = gt_vals[0,0], gt_vals[0,1], gt_vals[0,2] 123 | ax.set_xlim3d([-r+xroot, r+xroot]) 124 | ax.set_ylim3d([-r+yroot, r+yroot]) 125 | ax.set_zlim3d([-r+zroot, r+zroot]) 126 | #ax.set_title('pose at time frame: '+str(num)) 127 | #ax.set_aspect('equal') 128 | 129 | return plots_gt,plots_pred 130 | 131 | 132 | 133 | 134 | 135 | 136 | #%% 137 | 138 | 139 | def visualize(input_n,output_n,visualize_from,path,modello,device,n_viz,skip_rate,actions,encoding ='dct'): 140 | 141 | import random 142 | actions=define_actions(actions) 143 | 144 | for action in actions: 145 | 146 | if visualize_from=='train': 147 | loader=Datasets(path,input_n,output_n,skip_rate, split=0,actions=[action]) 148 | elif visualize_from=='validation': 149 | loader=Datasets(path,input_n,output_n,skip_rate, split=1,actions=[action]) 150 | elif visualize_from=='test': 151 | loader=Datasets(path,input_n,output_n,skip_rate, split=2,actions=[action]) 152 | 153 | dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 154 | 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 155 | 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, 156 | 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) 157 | # joints at same loc 158 | joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) 159 | index_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) 160 | joint_equal = np.array([13, 19, 22, 13, 27, 30]) 161 | index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2)) 162 | 163 | 164 | loader = DataLoader( 165 | loader, 166 | batch_size=256, 167 | shuffle = False, # for comparable visualizations with other models 168 | num_workers=0) 169 | 170 | 171 | 172 | for cnt,batch in enumerate(loader): 173 | batch = batch.to(device) 174 | 175 | all_joints_seq=batch.clone()[:, input_n:input_n+output_n,:] 176 | 177 | sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used)) 178 | sequences_gt=batch[:, input_n:input_n+output_n, :] 179 | 180 | sequences_predict=modello(sequences_train).contiguous().view(-1,output_n,len(dim_used)) 181 | 182 | all_joints_seq[:,:,dim_used] = sequences_predict 183 | 184 | all_joints_seq[:,:,index_to_ignore] = all_joints_seq[:,:,index_to_equal] 185 | 186 | 187 | all_joints_seq=all_joints_seq.view(-1,output_n,32,3) 188 | 189 | sequences_gt=sequences_gt.view(-1,output_n,32,3) 190 | 191 | loss=mpjpe_error(all_joints_seq,sequences_gt)# # both must have format (batch,T,V,C) 192 | 193 | 194 | 195 | data_pred=torch.squeeze(all_joints_seq,0).cpu().data.numpy()/1000 # in meters 196 | data_gt=torch.squeeze(sequences_gt,0).cpu().data.numpy()/1000 197 | 198 | i = random.randint(1,256) 199 | 200 | data_pred = data_pred [i] 201 | data_gt = data_gt [i] 202 | 203 | #print (data_gt.shape,data_pred.shape) 204 | 205 | fig = plt.figure() 206 | ax = Axes3D(fig) 207 | ax.view_init(elev=20, azim=-40) 208 | vals = np.zeros((32, 3)) # or joints_to_consider 209 | gt_plots=[] 210 | pred_plots=[] 211 | 212 | gt_plots=create_pose(ax,gt_plots,vals,pred=False,update=False) 213 | pred_plots=create_pose(ax,pred_plots,vals,pred=True,update=False) 214 | 215 | ax.set_xlabel("x") 216 | ax.set_ylabel("y") 217 | ax.set_zlabel("z") 218 | ax.legend(loc='lower left') 219 | 220 | 221 | 222 | ax.set_xlim3d([-1, 1.5]) 223 | ax.set_xlabel('X') 224 | 225 | ax.set_ylim3d([-1, 1.5]) 226 | ax.set_ylabel('Y') 227 | 228 | ax.set_zlim3d([0.0, 1.5]) 229 | ax.set_zlabel('Z') 230 | ax.set_title('loss in mm is: '+str(round(loss.item(),4))+' for action : '+str(action)+' for '+str(output_n)+' frames') 231 | 232 | line_anim = animation.FuncAnimation(fig, update, output_n, fargs=(data_gt,data_pred,gt_plots,pred_plots, 233 | fig,ax),interval=70, blit=False) 234 | plt.show() 235 | 236 | line_anim.save('./visualizations/pred{}/human_viz{}.gif'.format (25,i),writer='pillow') 237 | 238 | 239 | if cnt==n_viz-1: 240 | break 241 | 242 | 243 | 244 | 245 | -------------------------------------------------------------------------------- /h36m/mlp_mixer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | 6 | class SELayer(nn.Module): 7 | def __init__(self, c, r=4, use_max_pooling=False): 8 | super().__init__() 9 | self.squeeze = nn.AdaptiveAvgPool1d(1) if not use_max_pooling else nn.AdaptiveMaxPool1d(1) 10 | self.excitation = nn.Sequential( 11 | nn.Linear(c, c // r, bias=False), 12 | nn.ReLU(inplace=True), 13 | nn.Linear(c // r, c, bias=False), 14 | nn.Sigmoid() 15 | ) 16 | 17 | 18 | def forward(self, x): 19 | bs, s, h = x.shape 20 | y = self.squeeze(x).view(bs, s) 21 | y = self.excitation(y).view(bs, s, 1) 22 | return x * y.expand_as(x) 23 | 24 | 25 | 26 | def mish(x): 27 | return (x*torch.tanh(F.softplus(x))) 28 | 29 | 30 | 31 | 32 | class MlpBlock(nn.Module): 33 | def __init__(self, mlp_hidden_dim, mlp_input_dim, mlp_bn_dim, activation='gelu', regularization=0, initialization='none'): 34 | super().__init__() 35 | self.mlp_hidden_dim = mlp_hidden_dim 36 | self.mlp_input_dim = mlp_input_dim 37 | self.mlp_bn_dim = mlp_bn_dim 38 | #self.fc1 = nn.Linear(self.mlp_input_dim, self.mlp_input_dim) 39 | self.fc1 = nn.Linear(self.mlp_input_dim, self.mlp_hidden_dim) 40 | self.fc2 = nn.Linear(self.mlp_hidden_dim, self.mlp_input_dim) 41 | if regularization > 0.0: 42 | self.reg1 = nn.Dropout(regularization) 43 | self.reg2 = nn.Dropout(regularization) 44 | elif regularization == -1.0: 45 | self.reg1 = nn.BatchNorm1d(self.mlp_bn_dim) 46 | self.reg2 = nn.BatchNorm1d(self.mlp_bn_dim) 47 | else: 48 | self.reg1 = None 49 | self.reg2 = None 50 | 51 | if activation == 'gelu': 52 | self.act1 = nn.GELU() 53 | elif activation == 'mish': 54 | self.act1 = mish #nn.Mish() 55 | else: 56 | raise ValueError('Unknown activation function type: %s'%activation) 57 | 58 | 59 | 60 | def forward(self, x): 61 | x = self.fc1(x) 62 | x = self.act1(x) 63 | if self.reg1 is not None: 64 | x = self.reg1(x) 65 | x = self.fc2(x) 66 | if self.reg2 is not None: 67 | x = self.reg2(x) 68 | 69 | return x 70 | 71 | 72 | 73 | class MixerBlock(nn.Module): 74 | def __init__(self, tokens_mlp_dim, channels_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0, 75 | initialization='none', r_se=4, use_max_pooling=False, use_se=True): 76 | super().__init__() 77 | self.tokens_mlp_dim = tokens_mlp_dim 78 | self.channels_mlp_dim = channels_mlp_dim 79 | self.seq_len = seq_len 80 | self.hidden_dim = hidden_dim # out channels of the conv 81 | self.mlp_block_token_mixing = MlpBlock(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, activation=activation, regularization=regularization, initialization=initialization) 82 | self.mlp_block_channel_mixing = MlpBlock(self.channels_mlp_dim, self.hidden_dim, self.seq_len, activation=activation, regularization=regularization, initialization=initialization) 83 | self.use_se = use_se 84 | if self.use_se: 85 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling) 86 | 87 | self.LN1 = nn.LayerNorm(self.hidden_dim) 88 | self.LN2 = nn.LayerNorm(self.hidden_dim) 89 | 90 | 91 | 92 | def forward(self, x): 93 | # shape x [256, 8, 512] [bs, patches/time_steps, channels 94 | y = self.LN1(x) 95 | 96 | y = y.transpose(1, 2) 97 | y = self.mlp_block_token_mixing(y) 98 | y = y.transpose(1, 2) 99 | 100 | if self.use_se: 101 | y = self.se(y) 102 | x = x + y 103 | 104 | y = self.LN2(x) 105 | y = self.mlp_block_channel_mixing(y) 106 | 107 | if self.use_se: 108 | y = self.se(y) 109 | 110 | return x + y 111 | 112 | 113 | 114 | class MixerBlock_Channel(nn.Module): 115 | def __init__(self, channels_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0, 116 | initialization='none', r_se=4, use_max_pooling=False, use_se=True): 117 | super().__init__() 118 | self.channels_mlp_dim = channels_mlp_dim 119 | self.seq_len = seq_len 120 | self.hidden_dim = hidden_dim # out channels of the conv 121 | self.mlp_block_channel_mixing = MlpBlock(self.channels_mlp_dim, self.hidden_dim, self.seq_len, activation=activation, regularization=regularization, initialization=initialization) 122 | self.use_se = use_se 123 | if self.use_se: 124 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling) 125 | 126 | 127 | self.LN2 = nn.LayerNorm(self.hidden_dim) 128 | 129 | #self.act1 = nn.GELU() 130 | 131 | def forward(self, x): 132 | # shape x [256, 8, 512] [bs, patches/time_steps, channels] 133 | y = x 134 | 135 | if self.use_se: 136 | y = self.se(y) 137 | x = x + y 138 | y = self.LN2(x) 139 | y = self.mlp_block_channel_mixing(y) 140 | if self.use_se: 141 | y = self.se(y) 142 | 143 | return x + y 144 | 145 | 146 | 147 | class MixerBlock_Token(nn.Module): 148 | def __init__(self, tokens_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0, 149 | initialization='none', r_se=4, use_max_pooling=False, use_se=True): 150 | super().__init__() 151 | self.tokens_mlp_dim = tokens_mlp_dim 152 | 153 | self.seq_len = seq_len 154 | self.hidden_dim = hidden_dim # out channels of the conv 155 | self.mlp_block_token_mixing = MlpBlock(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, activation=activation, regularization=regularization, initialization=initialization) 156 | 157 | self.use_se = use_se 158 | 159 | if self.use_se: 160 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling) 161 | 162 | self.LN1 = nn.LayerNorm(self.hidden_dim) 163 | 164 | 165 | def forward(self, x): 166 | # shape x [256, 8, 512] [bs, patches/time_steps, channels] 167 | y = self.LN1(x) 168 | y = y.transpose(1, 2) 169 | y = self.mlp_block_token_mixing(y) 170 | y = y.transpose(1, 2) 171 | 172 | if self.use_se: 173 | y = self.se(y) 174 | x = x + y 175 | 176 | return x + y 177 | 178 | 179 | 180 | class MlpMixer(nn.Module): 181 | def __init__(self, num_classes, num_blocks, hidden_dim, tokens_mlp_dim, 182 | channels_mlp_dim, seq_len,pred_len, activation='gelu', 183 | mlp_block_type='normal',regularization=0, input_size=51, 184 | initialization='none', r_se=4, use_max_pooling=False, 185 | use_se=False): 186 | 187 | super().__init__() 188 | self.num_classes = num_classes 189 | self.num_blocks = num_blocks 190 | self.hidden_dim = hidden_dim 191 | self.seq_len = seq_len 192 | self.tokens_mlp_dim = tokens_mlp_dim 193 | self.channels_mlp_dim = channels_mlp_dim 194 | self.input_size = input_size #varyies with the number of joints 195 | self.conv = nn.Conv1d(1, self.hidden_dim, (1, self.input_size), stride=1) 196 | self.activation = activation 197 | 198 | 199 | self.channel_only = False # False #True 200 | self.token_only = False #False #True 201 | 202 | 203 | 204 | if self.channel_only: 205 | 206 | self.Mixer_Block = nn.ModuleList (MixerBlock_Channel(self.channels_mlp_dim,self.seq_len, self.hidden_dim, 207 | activation=self.activation, regularization=regularization, initialization=initialization, 208 | r_se=r_se, use_max_pooling=use_max_pooling, use_se=use_se) 209 | for _ in range(num_blocks)) 210 | 211 | 212 | if self.token_only: 213 | 214 | self.Mixer_Block = nn.ModuleList(MixerBlock_Token(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, 215 | activation=self.activation, regularization=regularization, initialization=initialization, 216 | r_se=r_se, use_max_pooling=use_max_pooling, use_se=use_se) 217 | for _ in range(num_blocks)) 218 | 219 | else: 220 | 221 | self.Mixer_Block = nn.ModuleList(MixerBlock(self.tokens_mlp_dim, self.channels_mlp_dim, 222 | self.seq_len, self.hidden_dim, activation=self.activation, 223 | regularization=regularization, initialization=initialization, 224 | r_se=r_se, use_max_pooling=use_max_pooling, use_se=use_se) 225 | for _ in range(num_blocks)) 226 | 227 | 228 | self.LN = nn.LayerNorm(self.hidden_dim) 229 | 230 | self.fc_out = nn.Linear(self.hidden_dim, self.num_classes) 231 | 232 | self.pred_len = pred_len 233 | self.conv_out = nn.Conv1d(self.seq_len, self.pred_len, 1, stride=1) 234 | 235 | 236 | 237 | 238 | def forward(self, x): 239 | x = x.unsqueeze(1) 240 | y = self.conv(x) 241 | y = y.squeeze(dim=3).transpose(1, 2) 242 | 243 | # [256, 8, 512] [bs, patches/time_steps, channels] 244 | for mb in self.Mixer_Block: 245 | y = mb(y) 246 | y = self.LN(y) 247 | 248 | out = self.fc_out(self.conv_out(y)) 249 | 250 | return out 251 | -------------------------------------------------------------------------------- /h36m/test_mixer_h36m.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from datasets.dataset_h36m import H36M_Dataset 4 | from datasets.dataset_h36m_ang import H36M_Dataset_Angle 5 | from utils.data_utils import define_actions 6 | from torch.utils.data import DataLoader 7 | from mlp_mixer import MlpMixer 8 | import matplotlib.pyplot as plt 9 | import torch.optim as optim 10 | import numpy as np 11 | import argparse 12 | from utils.utils_mixer import delta_2_gt, mpjpe_error, euler_error 13 | from tqdm import tqdm 14 | from torch.utils.tensorboard import SummaryWriter 15 | 16 | 17 | def test_pretrained(model,args): 18 | 19 | N = 0 20 | eval_frame = [1, 3, 7, 9, 13, 17, 21, 24] 21 | 22 | t_3d = np.zeros(len(eval_frame)) 23 | 24 | t_3d_all = [] 25 | 26 | model.eval() 27 | accum_loss=0 28 | n_batches=0 # number of batches for all the sequences 29 | actions=define_actions(args.actions_to_consider) 30 | dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 31 | 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 32 | 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, 33 | 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) 34 | # joints at same loc 35 | joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) 36 | index_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) 37 | joint_equal = np.array([13, 19, 22, 13, 27, 30]) 38 | index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2)) 39 | 40 | idx_eval = 7 41 | 42 | 43 | for action in actions: 44 | running_loss=0 45 | n=0 46 | dataset_test = H36M_Dataset(args.data_dir,args.input_n,args.output_n,args.skip_rate, split=2,actions=[action]) 47 | 48 | test_loader = DataLoader(dataset_test, batch_size=args.batch_size_test, shuffle=False, num_workers=0, pin_memory=True) 49 | for cnt,batch in enumerate(test_loader): 50 | with torch.no_grad(): 51 | 52 | batch=batch.to(args.device) 53 | batch_dim=batch.shape[0] 54 | n+=batch_dim 55 | 56 | 57 | all_joints_seq=batch.clone()[:, args.input_n:args.input_n+args.output_n,:] 58 | all_joints_seq_gt=batch.clone()[:, args.input_n:args.input_n+args.output_n,:] 59 | 60 | sequences_train=batch[:, 0:args.input_n, dim_used].view(-1,args.input_n,len(dim_used)) 61 | 62 | sequences_gt=batch[:, args.input_n:args.input_n+args.output_n, dim_used].view(-1,args.output_n,args.pose_dim) 63 | 64 | if args.delta_x : 65 | sequences_all = torch.cat((sequences_train, sequences_gt), 1) 66 | sequences_all_delta =[sequences_all[:,1,:] - sequences_all[:,0,:]] 67 | for i in range(args.input_n+args.output_n-1): 68 | sequences_all_delta.append(sequences_all[:,i+1,:] - sequences_all[:,i,:]) 69 | 70 | sequences_all_delta = torch.stack((sequences_all_delta)).permute(1,0,2) 71 | sequences_train_delta = sequences_all_delta[:, 0:args.input_n, :] 72 | sequences_predict=model(sequences_train_delta) 73 | sequences_predict = delta_2_gt (sequences_predict,sequences_train[:,-1,:]) 74 | loss=mpjpe_error(sequences_predict,sequences_gt) 75 | 76 | 77 | 78 | sequences_gt_3d = sequences_gt.reshape (sequences_gt.shape[0],sequences_gt.shape[1],-1,3) 79 | sequences_predict_3d = sequences_predict.reshape (sequences_predict.shape[0],sequences_predict.shape[1],-1,3) 80 | 81 | #print (sequences_gt.shape) 82 | 83 | for k in np.arange(0, len(eval_frame)): 84 | j = eval_frame[k] 85 | t_3d[k] += torch.mean(torch.norm(sequences_gt_3d[:, j, :, :].contiguous().view(-1, 3) - sequences_predict_3d[:, j, :, :].contiguous().view(-1, 3), 2, 1)).item() * n 86 | 87 | 88 | N += n 89 | 90 | 91 | 92 | else: 93 | sequences_predict=model(sequences_train) 94 | loss=mpjpe_error(sequences_predict,sequences_gt) 95 | 96 | 97 | 98 | all_joints_seq[:,:,dim_used] = sequences_predict 99 | all_joints_seq[:,:,index_to_ignore] = all_joints_seq[:,:,index_to_equal] 100 | 101 | 102 | all_joints_seq_gt[:,:,dim_used] = sequences_gt 103 | all_joints_seq_gt[:,:,index_to_ignore] = all_joints_seq_gt[:,:,index_to_equal] 104 | 105 | 106 | loss=mpjpe_error(all_joints_seq.view(-1,args.output_n,32,3),all_joints_seq_gt.view(-1,args.output_n,32,3)) 107 | 108 | 109 | 110 | running_loss+=loss*batch_dim 111 | accum_loss+=loss*batch_dim 112 | 113 | print('loss at test subject for action : '+str(action)+ ' is: '+ str(running_loss/n)) 114 | n_batches+=n 115 | 116 | t_3d_all.append (t_3d[idx_eval]/N) 117 | 118 | print('overall average loss in mm is: '+str(accum_loss/n_batches)) 119 | 120 | 121 | 122 | print('overall final loss in mm is: ',np.mean(t_3d_all)) 123 | 124 | 125 | 126 | 127 | if __name__ == '__main__': 128 | parser = argparse.ArgumentParser(add_help=False) # Parameters for mpjpe 129 | parser.add_argument('--data_dir', type=str, default='../data_h36m/', help='path to the unziped dataset directories(H36m/AMASS/3DPW)') 130 | parser.add_argument('--input_n', type=int, default=10, help="number of model's input frames") 131 | parser.add_argument('--output_n', type=int, default=25, help="number of model's output frames") 132 | parser.add_argument('--skip_rate', type=int, default=1, choices=[1, 5], help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW') 133 | parser.add_argument('--num_worker', default=4, type=int, help='number of workers in the dataloader') 134 | parser.add_argument('--root', default='./runs', type=str, help='root path for the logging') #'./runs' 135 | 136 | parser.add_argument('--activation', default='mish', type=str, required=False) # 'mish', 'gelu' 137 | parser.add_argument('--r_se', default=8, type=int, required=False) 138 | 139 | parser.add_argument('--n_epochs', default=50, type=int, required=False) 140 | parser.add_argument('--batch_size', default=50, type=int, required=False) # 100 50 in all original 50 141 | parser.add_argument('--loader_shuffle', default=True, type=bool, required=False) 142 | parser.add_argument('--pin_memory', default=False, type=bool, required=False) 143 | parser.add_argument('--loader_workers', default=4, type=int, required=False) 144 | parser.add_argument('--load_checkpoint', default=False, type=bool, required=False) 145 | parser.add_argument('--dev', default='cuda:0', type=str, required=False) 146 | parser.add_argument('--initialization', type=str, default='none', help='none, glorot_normal, glorot_uniform, hee_normal, hee_uniform') 147 | parser.add_argument('--use_scheduler', default=True, type=bool, required=False) 148 | parser.add_argument('--milestones', type=list, default=[15, 25, 35, 40], help='the epochs after which the learning rate is adjusted by gamma') 149 | parser.add_argument('--gamma', type=float, default=0.1, help='gamma correction to the learning rate, after reaching the milestone epochs') 150 | parser.add_argument('--clip_grad', type=float, default=None, help='select max norm to clip gradients') 151 | parser.add_argument('--model_path', type=str, default='./checkpoints/h36m_3d_25frames_ckpt', help='directory with the models checkpoints ') 152 | parser.add_argument('--actions_to_consider', default='all', help='Actions to visualize.Choose either all or a list of actions') 153 | parser.add_argument('--batch_size_test', type=int, default=256, help='batch size for the test set') 154 | parser.add_argument('--visualize_from', type=str, default='test', choices=['train', 'val', 'test'], help='choose data split to visualize from(train-val-test)') 155 | parser.add_argument('--loss_type', type=str, default='mpjpe', choices=['mpjpe', 'angle']) 156 | parser.add_argument('--device', type=str, default='cuda:0', choices=['cuda:0', 'cpu']) 157 | 158 | 159 | 160 | 161 | 162 | args = parser.parse_args() 163 | 164 | if args.loss_type == 'mpjpe': 165 | parser_mpjpe = argparse.ArgumentParser(parents=[parser]) # Parameters for mpjpe 166 | parser_mpjpe.add_argument('--hidden_dim', default=50, type=int, required=False) 167 | parser_mpjpe.add_argument('--num_blocks', default=4, type=int, required=False) 168 | parser_mpjpe.add_argument('--tokens_mlp_dim', default=20, type=int, required=False) 169 | parser_mpjpe.add_argument('--channels_mlp_dim', default=50, type=int, required=False) 170 | parser_mpjpe.add_argument('--regularization', default=0.1, type=float, required=False) 171 | parser_mpjpe.add_argument('--pose_dim', default=66, type=int, required=False) 172 | parser_mpjpe.add_argument('--delta_x', type=bool, default=True, help='predicting the difference between 2 frames') 173 | parser_mpjpe.add_argument('--lr', default=0.001, type=float, required=False) 174 | args = parser_mpjpe.parse_args() 175 | 176 | elif args.loss_type == 'angle': 177 | parser_angle = argparse.ArgumentParser(parents=[parser]) # Parameters for angle 178 | parser_angle.add_argument('--hidden_dim', default=60, type=int, required=False) 179 | parser_angle.add_argument('--num_blocks', default=3, type=int, required=False) 180 | parser_angle.add_argument('--tokens_mlp_dim', default=40, type=int, required=False) 181 | parser_angle.add_argument('--channels_mlp_dim', default=60, type=int, required=False) 182 | parser_angle.add_argument('--regularization', default=0.0, type=float, required=False) 183 | parser_angle.add_argument('--pose_dim', default=48, type=int, required=False) 184 | parser_angle.add_argument('--lr', default=1e-02, type=float, required=False) 185 | args = parser_angle.parse_args() 186 | 187 | if args.loss_type == 'angle' and args.delta_x: 188 | raise ValueError('Delta_x and loss type angle cant be used together.') 189 | 190 | print(args) 191 | 192 | model = MlpMixer(num_classes=args.pose_dim, num_blocks=args.num_blocks, 193 | hidden_dim=args.hidden_dim, tokens_mlp_dim=args.tokens_mlp_dim, 194 | channels_mlp_dim=args.channels_mlp_dim, seq_len=args.input_n, 195 | pred_len=args.output_n, activation=args.activation, 196 | mlp_block_type='normal', regularization=args.regularization, 197 | input_size=args.pose_dim, initialization='none', r_se=args.r_se, 198 | use_max_pooling=False, use_se=True) 199 | 200 | model = model.to(args.dev) 201 | 202 | 203 | model.load_state_dict(torch.load(args.model_path)) 204 | 205 | print('total number of parameters of the network is: ' + 206 | str(sum(p.numel() for p in model.parameters() if p.requires_grad))) 207 | 208 | 209 | 210 | 211 | test_pretrained(model, args) 212 | 213 | 214 | 215 | -------------------------------------------------------------------------------- /h36m/train_mixer_h36m.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | from datasets.dataset_h36m import H36M_Dataset 4 | from datasets.dataset_h36m_ang import H36M_Dataset_Angle 5 | from utils.data_utils import define_actions 6 | from torch.utils.data import DataLoader 7 | from mlp_mixer import MlpMixer 8 | import torch.optim as optim 9 | import numpy as np 10 | import argparse 11 | from utils.utils_mixer import delta_2_gt, mpjpe_error, euler_error 12 | from tqdm import tqdm 13 | from torch.utils.tensorboard import SummaryWriter 14 | 15 | 16 | def get_log_dir(out_dir): 17 | dirs = [x[0] for x in os.walk(out_dir)] 18 | if len(dirs ) < 2: 19 | log_dir = os.path.join(out_dir, 'exp0') 20 | os.mkdir(log_dir) 21 | else: 22 | log_dir = os.path.join(out_dir, 'exp%i'%(len(dirs)-1)) 23 | os.mkdir(log_dir) 24 | 25 | return log_dir 26 | 27 | 28 | def train(model, model_name, args): 29 | 30 | log_dir = get_log_dir(args.root) 31 | tb_writer = SummaryWriter(log_dir=log_dir) 32 | print('Save data of the run in: %s'%log_dir) 33 | 34 | device = args.dev 35 | 36 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-05) 37 | 38 | if args.use_scheduler: 39 | scheduler = optim.lr_scheduler.MultiStepLR( 40 | optimizer, milestones=args.milestones, gamma=args.gamma) 41 | 42 | train_loss, val_loss, test_loss = [], [], [] 43 | 44 | if args.loss_type == 'mpjpe': 45 | dataset = H36M_Dataset(args.data_dir, args.input_n, 46 | args.output_n, args.skip_rate, split=0) 47 | vald_dataset = H36M_Dataset(args.data_dir, args.input_n, 48 | args.output_n, args.skip_rate, split=1) 49 | dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 50 | 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 51 | 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, 52 | 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) 53 | 54 | elif args.loss_type == 'angle': 55 | dataset = H36M_Dataset_Angle(args.data_dir, args.input_n, args.output_n, 56 | args.skip_rate, split=0) 57 | vald_dataset = H36M_Dataset_Angle(args.data_dir, args.input_n, 58 | args.output_n, args.skip_rate, split=1) 59 | dim_used = np.array([6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, 60 | 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 61 | 86]) 62 | 63 | print('>>> Training dataset length: {:d}'.format(dataset.__len__())) 64 | print('>>> Validation dataset length: {:d}'.format(vald_dataset.__len__())) 65 | 66 | data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, 67 | num_workers=args.num_worker, pin_memory=True) 68 | vald_loader = DataLoader(vald_dataset, batch_size=args.batch_size, 69 | shuffle=True, num_workers=args.num_worker, pin_memory=True) 70 | 71 | 72 | for epoch in range(args.n_epochs): 73 | print('Run epoch: %i'%epoch) 74 | running_loss = 0 75 | n = 0 76 | model.train() 77 | for cnt, batch in tqdm(enumerate(data_loader), total=len(data_loader)): 78 | batch = batch.to(device) 79 | batch_dim = batch.shape[0] 80 | n += batch_dim 81 | 82 | if args.loss_type == 'mpjpe': 83 | sequences_train = batch[:, 0:args.input_n, dim_used].view( 84 | -1, args.input_n, args.pose_dim) 85 | sequences_gt = batch[:, args.input_n:args.input_n + 86 | args.output_n, dim_used].view(-1, args.output_n, args.pose_dim) 87 | elif args.loss_type == 'angle': 88 | sequences_train=batch[:, 0:args.input_n, dim_used].view( 89 | -1,args.input_n,len(dim_used)) 90 | sequences_gt=batch[:, args.input_n:args.input_n+args.output_n, dim_used] 91 | 92 | optimizer.zero_grad() 93 | 94 | if args.delta_x: 95 | sequences_all = torch.cat((sequences_train, sequences_gt), 1) 96 | sequences_all_delta = [ 97 | sequences_all[:, 1, :] - sequences_all[:, 0, :]] 98 | for i in range(args.input_n+args.output_n-1): 99 | sequences_all_delta.append( 100 | sequences_all[:, i+1, :] - sequences_all[:, i, :]) 101 | 102 | sequences_all_delta = torch.stack( 103 | (sequences_all_delta)).permute(1, 0, 2) 104 | sequences_train_delta = sequences_all_delta[:, 105 | 0:args.input_n, :] 106 | sequences_predict = model(sequences_train_delta) 107 | sequences_predict = delta_2_gt( 108 | sequences_predict, sequences_train[:, -1, :]) 109 | loss = mpjpe_error(sequences_predict, sequences_gt) 110 | 111 | elif args.loss_type == 'mpjpe': 112 | sequences_train = sequences_train/1000 113 | sequences_predict = model(sequences_train) 114 | loss = mpjpe_error(sequences_predict, sequences_gt) 115 | 116 | elif args.loss_type == 'angle': 117 | sequences_predict=model(sequences_train) 118 | loss=torch.mean(torch.sum(torch.abs(sequences_predict.reshape(-1,args.output_n,len(dim_used)) - sequences_gt), dim=2).view(-1)) 119 | 120 | 121 | loss.backward() 122 | if args.clip_grad is not None: 123 | torch.nn.utils.clip_grad_norm_( 124 | model.parameters(), args.clip_grad) 125 | 126 | optimizer.step() 127 | 128 | running_loss += loss*batch_dim 129 | 130 | train_loss.append(running_loss.detach().cpu()/n) 131 | model.eval() 132 | with torch.no_grad(): 133 | running_loss = 0 134 | n = 0 135 | for cnt, batch in enumerate(vald_loader): 136 | batch = batch.to(device) 137 | batch_dim = batch.shape[0] 138 | n += batch_dim 139 | 140 | if args.loss_type == 'mpjpe': 141 | sequences_train = batch[:, 0:args.input_n, dim_used].view( 142 | -1, args.input_n, args.pose_dim) 143 | sequences_gt = batch[:, args.input_n:args.input_n + 144 | args.output_n, dim_used].view(-1, args.output_n, args.pose_dim) 145 | elif args.loss_type == 'angle': 146 | sequences_train=batch[:, 0:args.input_n, dim_used].view(-1,args.input_n,len(dim_used)) 147 | sequences_gt=batch[:, args.input_n:args.input_n+args.output_n,:] 148 | 149 | 150 | if args.delta_x: 151 | sequences_all = torch.cat( 152 | (sequences_train, sequences_gt), 1) 153 | sequences_all_delta = [ 154 | sequences_all[:, 1, :] - sequences_all[:, 0, :]] 155 | for i in range(args.input_n+args.output_n-1): 156 | sequences_all_delta.append( 157 | sequences_all[:, i+1, :] - sequences_all[:, i, :]) 158 | 159 | sequences_all_delta = torch.stack( 160 | (sequences_all_delta)).permute(1, 0, 2) 161 | sequences_train_delta = sequences_all_delta[:, 162 | 0:args.input_n, :] 163 | sequences_predict = model(sequences_train_delta) 164 | sequences_predict = delta_2_gt( 165 | sequences_predict, sequences_train[:, -1, :]) 166 | loss = mpjpe_error(sequences_predict, sequences_gt) 167 | 168 | elif args.loss_type == 'mpjpe': 169 | sequences_train = sequences_train/1000 170 | sequences_predict = model(sequences_train) 171 | loss = mpjpe_error(sequences_predict, sequences_gt) 172 | 173 | elif args.loss_type == 'angle': 174 | all_joints_seq=batch.clone()[:, args.input_n:args.input_n+args.output_n,:] 175 | sequences_predict=model(sequences_train) 176 | all_joints_seq[:,:,dim_used] = sequences_predict 177 | loss = euler_error(all_joints_seq,sequences_gt) 178 | 179 | running_loss += loss*batch_dim 180 | val_loss.append(running_loss.detach().cpu()/n) 181 | if args.use_scheduler: 182 | scheduler.step() 183 | 184 | if args.loss_type == 'mpjpe': 185 | test_loss.append(test_mpjpe(model, args)) 186 | elif args.loss_type == 'angle': 187 | test_loss.append(test_angle(model, args)) 188 | 189 | tb_writer.add_scalar('loss/train', train_loss[-1].item(), epoch) 190 | tb_writer.add_scalar('loss/val', val_loss[-1].item(), epoch) 191 | tb_writer.add_scalar('loss/test', test_loss[-1].item(), epoch) 192 | 193 | torch.save(model.state_dict(), os.path.join(log_dir, 'model.pt')) 194 | # TODO write something to save the best model 195 | if (epoch+1)%1==0: 196 | print('----saving model-----') 197 | torch.save(model.state_dict(),os.path.join(args.model_path,model_name)) 198 | 199 | 200 | def test_mpjpe(model, args): 201 | 202 | device = args.dev 203 | model.eval() 204 | accum_loss = 0 205 | n_batches = 0 # number of batches for all the sequences 206 | actions = define_actions(args.actions_to_consider) 207 | if args.loss_type == 'mpjpe': 208 | dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 209 | 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 210 | 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, 211 | 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92]) 212 | elif args.loss_type == 'angle': 213 | dim_used = np.array([6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 214 | 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 215 | 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86]) 216 | # joints at same loc 217 | joint_to_ignore = np.array([16, 20, 23, 24, 28, 31]) 218 | index_to_ignore = np.concatenate( 219 | (joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) 220 | joint_equal = np.array([13, 19, 22, 13, 27, 30]) 221 | index_to_equal = np.concatenate( 222 | (joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2)) 223 | 224 | for action in actions: 225 | running_loss = 0 226 | n = 0 227 | if args.loss_type == 'mpjpe': 228 | dataset_test = H36M_Dataset(args.data_dir, args.input_n, 229 | args.output_n, args.skip_rate, split=2, actions=[action]) 230 | elif args.loss_type == 'angle': 231 | dataset_test = H36M_Dataset_Angle(args.data_dir, args.input_n, 232 | args.output_n, args.skip_rate, split=2, actions=[action]) 233 | print('>>> Test dataset length: {:d}'.format(dataset_test.__len__())) 234 | 235 | test_loader = DataLoader(dataset_test, batch_size=args.batch_size_test, 236 | shuffle=False, num_workers=0, pin_memory=True) 237 | for cnt, batch in enumerate(test_loader): 238 | with torch.no_grad(): 239 | 240 | batch = batch.to(device) 241 | batch_dim = batch.shape[0] 242 | n += batch_dim 243 | 244 | all_joints_seq = batch.clone( 245 | )[:, args.input_n:args.input_n+args.output_n, :] 246 | all_joints_seq_gt = batch.clone( 247 | )[:, args.input_n:args.input_n+args.output_n, :] 248 | 249 | sequences_train = batch[:, 0:args.input_n, 250 | dim_used].view(-1, args.input_n, len(dim_used)) 251 | 252 | sequences_gt = batch[:, args.input_n:args.input_n + 253 | args.output_n, dim_used].view(-1, args.output_n, args.pose_dim) 254 | 255 | if args.delta_x: 256 | sequences_all = torch.cat( 257 | (sequences_train, sequences_gt), 1) 258 | sequences_all_delta = [ 259 | sequences_all[:, 1, :] - sequences_all[:, 0, :]] 260 | for i in range(args.input_n+args.output_n-1): 261 | sequences_all_delta.append( 262 | sequences_all[:, i+1, :] - sequences_all[:, i, :]) 263 | 264 | sequences_all_delta = torch.stack( 265 | (sequences_all_delta)).permute(1, 0, 2) 266 | sequences_train_delta = sequences_all_delta[:, 267 | 0:args.input_n, :] 268 | sequences_predict = model(sequences_train_delta) 269 | sequences_predict = delta_2_gt( 270 | sequences_predict, sequences_train[:, -1, :]) 271 | loss = mpjpe_error(sequences_predict, sequences_gt) 272 | 273 | else: 274 | sequences_train = sequences_train/1000 275 | sequences_predict = model(sequences_train) 276 | loss = mpjpe_error(sequences_predict, sequences_gt) 277 | 278 | all_joints_seq[:, :, dim_used] = sequences_predict 279 | all_joints_seq[:, :, 280 | index_to_ignore] = all_joints_seq[:, :, index_to_equal] 281 | 282 | all_joints_seq_gt[:, :, dim_used] = sequences_gt 283 | all_joints_seq_gt[:, :, 284 | index_to_ignore] = all_joints_seq_gt[:, :, index_to_equal] 285 | 286 | loss = mpjpe_error(all_joints_seq.view(-1, args.output_n, 32, 3), 287 | all_joints_seq_gt.view(-1, args.output_n, 32, 3)) 288 | 289 | running_loss += loss*batch_dim 290 | accum_loss += loss*batch_dim 291 | 292 | n_batches += n 293 | print('overall average loss in mm is: %f'%(accum_loss/n_batches)) 294 | return accum_loss/n_batches 295 | 296 | 297 | def test_angle(model, args): 298 | 299 | device = args.dev 300 | model.eval() 301 | accum_loss=0 302 | n_batches=0 # number of batches for all the sequences 303 | actions=define_actions(args.actions_to_consider) 304 | dim_used = np.array([6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42, 305 | 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 306 | 86]) 307 | 308 | for action in actions: 309 | running_loss=0 310 | n=0 311 | dataset_test = H36M_Dataset_Angle(args.data_dir,args.input_n,args.output_n,args.skip_rate, split=2,actions=[action]) 312 | #print('>>> Test dataset length: {:d}'.format(dataset_test.__len__())) 313 | 314 | test_loader = DataLoader(dataset_test, batch_size=args.batch_size_test, shuffle=False, num_workers=0, pin_memory=True) 315 | for cnt, batch in enumerate(test_loader): 316 | with torch.no_grad(): 317 | 318 | batch=batch.to(device) 319 | batch_dim=batch.shape[0] 320 | n+=batch_dim 321 | 322 | all_joints_seq=batch.clone()[:, args.input_n:args.input_n+args.output_n,:] 323 | 324 | sequences_train=batch[:, 0:args.input_n, dim_used].view(-1,args.input_n,len(dim_used)) 325 | sequences_gt=batch[:, args.input_n:args.input_n+args.output_n, :] 326 | 327 | sequences_predict=model(sequences_train) 328 | all_joints_seq[:,:,dim_used] = sequences_predict 329 | loss=euler_error(all_joints_seq,sequences_gt) 330 | 331 | running_loss+=loss*batch_dim 332 | accum_loss+=loss*batch_dim 333 | 334 | n_batches+=n 335 | print('overall average loss in euler angle is: '+str(accum_loss/n_batches)) 336 | 337 | return accum_loss/n_batches 338 | 339 | 340 | if __name__ == '__main__': 341 | parser = argparse.ArgumentParser(add_help=False) # Parameters for mpjpe 342 | parser.add_argument('--data_dir', type=str, default='../data_h36m/', help='path to the unziped dataset directories(H36m/AMASS/3DPW)') 343 | parser.add_argument('--input_n', type=int, default=10, help="number of model's input frames") 344 | parser.add_argument('--output_n', type=int, default=25, help="number of model's output frames") 345 | parser.add_argument('--skip_rate', type=int, default=5, choices=[1, 5], help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW') 346 | parser.add_argument('--num_worker', default=4, type=int, help='number of workers in the dataloader') 347 | parser.add_argument('--root', default='./runs', type=str, help='root path for the logging') #'./runs' 348 | 349 | parser.add_argument('--activation', default='mish', type=str, required=False) 350 | parser.add_argument('--r_se', default=8, type=int, required=False) 351 | 352 | parser.add_argument('--n_epochs', default=50, type=int, required=False) 353 | parser.add_argument('--batch_size', default=50, type=int, required=False) 354 | parser.add_argument('--loader_shuffle', default=True, type=bool, required=False) 355 | parser.add_argument('--pin_memory', default=False, type=bool, required=False) 356 | parser.add_argument('--loader_workers', default=4, type=int, required=False) 357 | parser.add_argument('--load_checkpoint', default=False, type=bool, required=False) 358 | parser.add_argument('--dev', default='cuda:0', type=str, required=False) 359 | parser.add_argument('--initialization', type=str, default='none', help='none, glorot_normal, glorot_uniform, hee_normal, hee_uniform') 360 | parser.add_argument('--use_scheduler', default=True, type=bool, required=False) 361 | parser.add_argument('--milestones', type=list, default=[15, 25, 35, 40], help='the epochs after which the learning rate is adjusted by gamma') 362 | parser.add_argument('--gamma', type=float, default=0.1, help='gamma correction to the learning rate, after reaching the milestone epochs') 363 | parser.add_argument('--clip_grad', type=float, default=None, help='select max norm to clip gradients') 364 | parser.add_argument('--model_path', type=str, default='./checkpoints/h36m', help='directory with the models checkpoints ') 365 | parser.add_argument('--actions_to_consider', default='all', help='Actions to visualize.Choose either all or a list of actions') 366 | parser.add_argument('--batch_size_test', type=int, default=256, help='batch size for the test set') 367 | parser.add_argument('--visualize_from', type=str, default='test', choices=['train', 'val', 'test'], help='choose data split to visualize from(train-val-test)') 368 | parser.add_argument('--loss_type', type=str, default='mpjpe', choices=['mpjpe', 'angle']) 369 | 370 | args = parser.parse_args() 371 | 372 | if args.loss_type == 'mpjpe': 373 | parser_mpjpe = argparse.ArgumentParser(parents=[parser]) # Parameters for mpjpe 374 | parser_mpjpe.add_argument('--hidden_dim', default=50, type=int, required=False) 375 | parser_mpjpe.add_argument('--num_blocks', default=4, type=int, required=False) 376 | parser_mpjpe.add_argument('--tokens_mlp_dim', default=20, type=int, required=False) 377 | parser_mpjpe.add_argument('--channels_mlp_dim', default=50, type=int, required=False) 378 | parser_mpjpe.add_argument('--regularization', default=0.1, type=float, required=False) 379 | parser_mpjpe.add_argument('--pose_dim', default=66, type=int, required=False) 380 | parser_mpjpe.add_argument('--delta_x', type=bool, default=True, help='predicting the difference between 2 frames') 381 | parser_mpjpe.add_argument('--lr', default=0.001, type=float, required=False) 382 | args = parser_mpjpe.parse_args() 383 | 384 | elif args.loss_type == 'angle': 385 | parser_angle = argparse.ArgumentParser(parents=[parser]) # Parameters for angle 386 | parser_angle.add_argument('--hidden_dim', default=60, type=int, required=False) 387 | parser_angle.add_argument('--num_blocks', default=3, type=int, required=False) 388 | parser_angle.add_argument('--tokens_mlp_dim', default=40, type=int, required=False) 389 | parser_angle.add_argument('--channels_mlp_dim', default=60, type=int, required=False) 390 | parser_angle.add_argument('--regularization', default=0.0, type=float, required=False) 391 | parser_angle.add_argument('--pose_dim', default=48, type=int, required=False) 392 | parser_angle.add_argument('--lr', default=1e-02, type=float, required=False) 393 | args = parser_angle.parse_args() 394 | 395 | if args.loss_type == 'angle' and args.delta_x: 396 | raise ValueError('Delta_x and loss type angle cant be used together.') 397 | 398 | print(args) 399 | 400 | model = MlpMixer(num_classes=args.pose_dim, num_blocks=args.num_blocks, 401 | hidden_dim=args.hidden_dim, tokens_mlp_dim=args.tokens_mlp_dim, 402 | channels_mlp_dim=args.channels_mlp_dim, seq_len=args.input_n, 403 | pred_len=args.output_n, activation=args.activation, 404 | mlp_block_type='normal', regularization=args.regularization, 405 | input_size=args.pose_dim, initialization='none', r_se=args.r_se, 406 | use_max_pooling=False, use_se=True) 407 | 408 | model = model.to(args.dev) 409 | 410 | print('total number of parameters of the network is: ' + 411 | str(sum(p.numel() for p in model.parameters() if p.requires_grad))) 412 | 413 | model_name = 'h36_3d_'+str(args.output_n)+'frames_ckpt' 414 | 415 | train(model, model_name, args) 416 | test_mpjpe(model, args) 417 | -------------------------------------------------------------------------------- /h36m/utils/data_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import numpy as np 4 | from six.moves import xrange # pylint: disable=redefined-builtin 5 | import torch 6 | # from torch.autograd.variable import Variable 7 | import os 8 | import utils.forward_kinematics as forward_kinematics 9 | 10 | 11 | def rotmat2euler(R): 12 | """ 13 | Converts a rotation matrix to Euler angles 14 | Matlab port to python for evaluation purposes 15 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1 16 | 17 | Args 18 | R: a 3x3 rotation matrix 19 | Returns 20 | eul: a 3x1 Euler angle representation of R 21 | """ 22 | if R[0, 2] == 1 or R[0, 2] == -1: 23 | # special case 24 | E3 = 0 # set arbitrarily 25 | dlta = np.arctan2(R[0, 1], R[0, 2]); 26 | 27 | if R[0, 2] == -1: 28 | E2 = np.pi / 2; 29 | E1 = E3 + dlta; 30 | else: 31 | E2 = -np.pi / 2; 32 | E1 = -E3 + dlta; 33 | 34 | else: 35 | E2 = -np.arcsin(R[0, 2]) 36 | E1 = np.arctan2(R[1, 2] / np.cos(E2), R[2, 2] / np.cos(E2)) 37 | E3 = np.arctan2(R[0, 1] / np.cos(E2), R[0, 0] / np.cos(E2)) 38 | 39 | eul = np.array([E1, E2, E3]); 40 | return eul 41 | 42 | 43 | def rotmat2quat(R): 44 | """ 45 | Converts a rotation matrix to a quaternion 46 | Matlab port to python for evaluation purposes 47 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4 48 | 49 | Args 50 | R: 3x3 rotation matrix 51 | Returns 52 | q: 1x4 quaternion 53 | """ 54 | rotdiff = R - R.T; 55 | 56 | r = np.zeros(3) 57 | r[0] = -rotdiff[1, 2] 58 | r[1] = rotdiff[0, 2] 59 | r[2] = -rotdiff[0, 1] 60 | sintheta = np.linalg.norm(r) / 2; 61 | r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps); 62 | 63 | costheta = (np.trace(R) - 1) / 2; 64 | 65 | theta = np.arctan2(sintheta, costheta); 66 | 67 | q = np.zeros(4) 68 | q[0] = np.cos(theta / 2) 69 | q[1:] = r0 * np.sin(theta / 2) 70 | return q 71 | 72 | 73 | def rotmat2expmap(R): 74 | return quat2expmap(rotmat2quat(R)); 75 | 76 | 77 | def expmap2rotmat(r): 78 | """ 79 | Converts an exponential map angle to a rotation matrix 80 | Matlab port to python for evaluation purposes 81 | I believe this is also called Rodrigues' formula 82 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m 83 | 84 | Args 85 | r: 1x3 exponential map 86 | Returns 87 | R: 3x3 rotation matrix 88 | """ 89 | theta = np.linalg.norm(r) 90 | r0 = np.divide(r, theta + np.finfo(np.float32).eps) 91 | r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3, 3) 92 | r0x = r0x - r0x.T 93 | R = np.eye(3, 3) + np.sin(theta) * r0x + (1 - np.cos(theta)) * (r0x).dot(r0x); 94 | return R 95 | 96 | 97 | def quat2expmap(q): 98 | """ 99 | Converts a quaternion to an exponential map 100 | Matlab port to python for evaluation purposes 101 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1 102 | 103 | Args 104 | q: 1x4 quaternion 105 | Returns 106 | r: 1x3 exponential map 107 | Raises 108 | ValueError if the l2 norm of the quaternion is not close to 1 109 | """ 110 | if (np.abs(np.linalg.norm(q) - 1) > 1e-3): 111 | raise (ValueError, "quat2expmap: input quaternion is not norm 1") 112 | 113 | sinhalftheta = np.linalg.norm(q[1:]) 114 | coshalftheta = q[0] 115 | 116 | r0 = np.divide(q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps)); 117 | theta = 2 * np.arctan2(sinhalftheta, coshalftheta) 118 | theta = np.mod(theta + 2 * np.pi, 2 * np.pi) 119 | 120 | if theta > np.pi: 121 | theta = 2 * np.pi - theta 122 | r0 = -r0 123 | 124 | r = r0 * theta 125 | return r 126 | 127 | 128 | def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot): 129 | """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix. 130 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12 131 | 132 | Args 133 | normalizedData: nxd matrix with normalized data 134 | data_mean: vector of mean used to normalize the data 135 | data_std: vector of standard deviation used to normalize the data 136 | dimensions_to_ignore: vector with dimensions not used by the model 137 | actions: list of strings with the encoded actions 138 | one_hot: whether the data comes with one-hot encoding 139 | Returns 140 | origData: data originally used to 141 | """ 142 | T = normalizedData.shape[0] 143 | D = data_mean.shape[0] 144 | 145 | origData = np.zeros((T, D), dtype=np.float32) 146 | dimensions_to_use = [] 147 | for i in range(D): 148 | if i in dimensions_to_ignore: 149 | continue 150 | dimensions_to_use.append(i) 151 | dimensions_to_use = np.array(dimensions_to_use) 152 | 153 | if one_hot: 154 | origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)] 155 | else: 156 | origData[:, dimensions_to_use] = normalizedData 157 | 158 | # potentially ineficient, but only done once per experiment 159 | stdMat = data_std.reshape((1, D)) 160 | stdMat = np.repeat(stdMat, T, axis=0) 161 | meanMat = data_mean.reshape((1, D)) 162 | meanMat = np.repeat(meanMat, T, axis=0) 163 | origData = np.multiply(origData, stdMat) + meanMat 164 | return origData 165 | 166 | 167 | def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot): 168 | """ 169 | Converts the output of the neural network to a format that is more easy to 170 | manipulate for, e.g. conversion to other format or visualization 171 | 172 | Args 173 | poses: The output from the TF model. A list with (seq_length) entries, 174 | each with a (batch_size, dim) output 175 | Returns 176 | poses_out: A tensor of size (batch_size, seq_length, dim) output. Each 177 | batch is an n-by-d sequence of poses. 178 | """ 179 | seq_len = len(poses) 180 | if seq_len == 0: 181 | return [] 182 | 183 | batch_size, dim = poses[0].shape 184 | 185 | poses_out = np.concatenate(poses) 186 | poses_out = np.reshape(poses_out, (seq_len, batch_size, dim)) 187 | poses_out = np.transpose(poses_out, [1, 0, 2]) 188 | 189 | poses_out_list = [] 190 | for i in xrange(poses_out.shape[0]): 191 | poses_out_list.append( 192 | unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot)) 193 | 194 | return poses_out_list 195 | 196 | 197 | def readCSVasFloat(filename): 198 | """ 199 | Borrowed from SRNN code. Reads a csv and returns a float matrix. 200 | https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34 201 | 202 | Args 203 | filename: string. Path to the csv file 204 | Returns 205 | returnArray: the read data in a float32 matrix 206 | """ 207 | returnArray = [] 208 | lines = open(filename).readlines() 209 | for line in lines: 210 | line = line.strip().split(',') 211 | if len(line) > 0: 212 | returnArray.append(np.array([np.float32(x) for x in line])) 213 | 214 | returnArray = np.array(returnArray) 215 | return returnArray 216 | 217 | 218 | def normalize_data(data, data_mean, data_std, dim_to_use, actions, one_hot): 219 | """ 220 | Normalize input data by removing unused dimensions, subtracting the mean and 221 | dividing by the standard deviation 222 | 223 | Args 224 | data: nx99 matrix with data to normalize 225 | data_mean: vector of mean used to normalize the data 226 | data_std: vector of standard deviation used to normalize the data 227 | dim_to_use: vector with dimensions used by the model 228 | actions: list of strings with the encoded actions 229 | one_hot: whether the data comes with one-hot encoding 230 | Returns 231 | data_out: the passed data matrix, but normalized 232 | """ 233 | data_out = {} 234 | nactions = len(actions) 235 | 236 | if not one_hot: 237 | # No one-hot encoding... no need to do anything special 238 | for key in data.keys(): 239 | data_out[key] = np.divide((data[key] - data_mean), data_std) 240 | data_out[key] = data_out[key][:, dim_to_use] 241 | 242 | else: 243 | # TODO hard-coding 99 dimensions for un-normalized human poses 244 | for key in data.keys(): 245 | data_out[key] = np.divide((data[key][:, 0:99] - data_mean), data_std) 246 | data_out[key] = data_out[key][:, dim_to_use] 247 | data_out[key] = np.hstack((data_out[key], data[key][:, -nactions:])) 248 | 249 | return data_out 250 | 251 | 252 | def normalization_stats(completeData): 253 | """" 254 | Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore. 255 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33 256 | 257 | Args 258 | completeData: nx99 matrix with data to normalize 259 | Returns 260 | data_mean: vector of mean used to normalize the data 261 | data_std: vector of standard deviation used to normalize the data 262 | dimensions_to_ignore: vector with dimensions not used by the model 263 | dimensions_to_use: vector with dimensions used by the model 264 | """ 265 | data_mean = np.mean(completeData, axis=0) 266 | data_std = np.std(completeData, axis=0) 267 | 268 | dimensions_to_ignore = [] 269 | dimensions_to_use = [] 270 | 271 | dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) 272 | dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) 273 | 274 | data_std[dimensions_to_ignore] = 1.0 275 | 276 | return data_mean, data_std, dimensions_to_ignore, dimensions_to_use 277 | 278 | 279 | def define_actions(action): 280 | """ 281 | Define the list of actions we are using. 282 | 283 | Args 284 | action: String with the passed action. Could be "all" 285 | Returns 286 | actions: List of strings of actions 287 | Raises 288 | ValueError if the action is not included in H3.6M 289 | """ 290 | 291 | actions = ["walking", "eating", "smoking", "discussion", "directions", 292 | "greeting", "phoning", "posing", "purchases", "sitting", 293 | "sittingdown", "takingphoto", "waiting", "walkingdog", 294 | "walkingtogether"] 295 | if action in actions: 296 | return [action] 297 | 298 | if action == "all": 299 | return actions 300 | 301 | if action == "all_srnn": 302 | return ["walking", "eating", "smoking", "discussion"] 303 | 304 | raise (ValueError, "Unrecognized action: %d" % action) 305 | 306 | 307 | """all methods above are borrowed from https://github.com/una-dinosauria/human-motion-prediction""" 308 | 309 | 310 | def define_actions_cmu(action): 311 | """ 312 | Define the list of actions we are using. 313 | 314 | Args 315 | action: String with the passed action. Could be "all" 316 | Returns 317 | actions: List of strings of actions 318 | Raises 319 | ValueError if the action is not included in H3.6M 320 | """ 321 | 322 | actions = ["basketball", "basketball_signal", "directing_traffic", "jumping", "running", "soccer", "walking", 323 | "washwindow"] 324 | if action in actions: 325 | return [action] 326 | 327 | if action == "all": 328 | return actions 329 | 330 | raise (ValueError, "Unrecognized action: %d" % action) 331 | 332 | 333 | def load_data_cmu(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): 334 | seq_len = input_n + output_n 335 | nactions = len(actions) 336 | sampled_seq = [] 337 | complete_seq = [] 338 | for action_idx in np.arange(nactions): 339 | action = actions[action_idx] 340 | path = '{}/{}'.format(path_to_dataset, action) 341 | count = 0 342 | for _ in os.listdir(path): 343 | count = count + 1 344 | for examp_index in np.arange(count): 345 | filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) 346 | action_sequence = readCSVasFloat(filename) 347 | n, d = action_sequence.shape 348 | even_list = range(0, n, 2) 349 | the_sequence = np.array(action_sequence[even_list, :]) 350 | num_frames = len(the_sequence) 351 | if not is_test: 352 | fs = np.arange(0, num_frames - seq_len + 1) 353 | fs_sel = fs 354 | for i in np.arange(seq_len - 1): 355 | fs_sel = np.vstack((fs_sel, fs + i + 1)) 356 | fs_sel = fs_sel.transpose() 357 | seq_sel = the_sequence[fs_sel, :] 358 | if len(sampled_seq) == 0: 359 | sampled_seq = seq_sel 360 | complete_seq = the_sequence 361 | else: 362 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) 363 | complete_seq = np.append(complete_seq, the_sequence, axis=0) 364 | else: 365 | source_seq_len = 50 366 | target_seq_len = 25 367 | total_frames = source_seq_len + target_seq_len 368 | batch_size = 8 369 | SEED = 1234567890 370 | rng = np.random.RandomState(SEED) 371 | for _ in range(batch_size): 372 | idx = rng.randint(0, num_frames - total_frames) 373 | seq_sel = the_sequence[ 374 | idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] 375 | seq_sel = np.expand_dims(seq_sel, axis=0) 376 | if len(sampled_seq) == 0: 377 | sampled_seq = seq_sel 378 | complete_seq = the_sequence 379 | else: 380 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) 381 | complete_seq = np.append(complete_seq, the_sequence, axis=0) 382 | 383 | if not is_test: 384 | data_std = np.std(complete_seq, axis=0) 385 | data_mean = np.mean(complete_seq, axis=0) 386 | 387 | dimensions_to_ignore = [] 388 | dimensions_to_use = [] 389 | dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) 390 | dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) 391 | data_std[dimensions_to_ignore] = 1.0 392 | data_mean[dimensions_to_ignore] = 0.0 393 | 394 | return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std 395 | 396 | 397 | def load_data_cmu_3d(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): 398 | seq_len = input_n + output_n 399 | nactions = len(actions) 400 | sampled_seq = [] 401 | complete_seq = [] 402 | for action_idx in np.arange(nactions): 403 | action = actions[action_idx] 404 | path = '{}/{}'.format(path_to_dataset, action) 405 | count = 0 406 | for _ in os.listdir(path): 407 | count = count + 1 408 | for examp_index in np.arange(count): 409 | filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) 410 | action_sequence = readCSVasFloat(filename) 411 | n, d = action_sequence.shape 412 | exptmps = torch.from_numpy(action_sequence).float().cuda() 413 | xyz = expmap2xyz_torch_cmu(exptmps) 414 | xyz = xyz.view(-1, 38 * 3) 415 | xyz = xyz.cpu().data.numpy() 416 | action_sequence = xyz 417 | 418 | even_list = range(0, n, 2) 419 | the_sequence = np.array(action_sequence[even_list, :]) 420 | num_frames = len(the_sequence) 421 | if not is_test: 422 | fs = np.arange(0, num_frames - seq_len + 1) 423 | fs_sel = fs 424 | for i in np.arange(seq_len - 1): 425 | fs_sel = np.vstack((fs_sel, fs + i + 1)) 426 | fs_sel = fs_sel.transpose() 427 | seq_sel = the_sequence[fs_sel, :] 428 | if len(sampled_seq) == 0: 429 | sampled_seq = seq_sel 430 | complete_seq = the_sequence 431 | else: 432 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) 433 | complete_seq = np.append(complete_seq, the_sequence, axis=0) 434 | else: 435 | source_seq_len = 50 436 | target_seq_len = 25 437 | total_frames = source_seq_len + target_seq_len 438 | batch_size = 8 439 | SEED = 1234567890 440 | rng = np.random.RandomState(SEED) 441 | for _ in range(batch_size): 442 | idx = rng.randint(0, num_frames - total_frames) 443 | seq_sel = the_sequence[ 444 | idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] 445 | seq_sel = np.expand_dims(seq_sel, axis=0) 446 | if len(sampled_seq) == 0: 447 | sampled_seq = seq_sel 448 | complete_seq = the_sequence 449 | else: 450 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) 451 | complete_seq = np.append(complete_seq, the_sequence, axis=0) 452 | 453 | if not is_test: 454 | data_std = np.std(complete_seq, axis=0) 455 | data_mean = np.mean(complete_seq, axis=0) 456 | 457 | joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) 458 | dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) 459 | dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) 460 | 461 | data_std[dimensions_to_ignore] = 1.0 462 | data_mean[dimensions_to_ignore] = 0.0 463 | 464 | return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std 465 | 466 | 467 | def rotmat2euler_torch(R): 468 | """ 469 | Converts a rotation matrix to euler angles 470 | batch pytorch version ported from the corresponding numpy method above 471 | 472 | :param R:N*3*3 473 | :return: N*3 474 | """ 475 | n = R.data.shape[0] 476 | eul = torch.zeros(n, 3).float().cuda() 477 | idx_spec1 = (R[:, 0, 2] == 1).nonzero().cpu().data.numpy().reshape(-1).tolist() 478 | idx_spec2 = (R[:, 0, 2] == -1).nonzero().cpu().data.numpy().reshape(-1).tolist() 479 | if len(idx_spec1) > 0: 480 | R_spec1 = R[idx_spec1, :, :] 481 | eul_spec1 = torch.zeros(len(idx_spec1), 3).float().cuda() 482 | eul_spec1[:, 2] = 0 483 | eul_spec1[:, 1] = -np.pi / 2 484 | delta = torch.atan2(R_spec1[:, 0, 1], R_spec1[:, 0, 2]) 485 | eul_spec1[:, 0] = delta 486 | eul[idx_spec1, :] = eul_spec1 487 | 488 | if len(idx_spec2) > 0: 489 | R_spec2 = R[idx_spec2, :, :] 490 | eul_spec2 = torch.zeros(len(idx_spec2), 3).float().cuda() 491 | eul_spec2[:, 2] = 0 492 | eul_spec2[:, 1] = np.pi / 2 493 | delta = torch.atan2(R_spec2[:, 0, 1], R_spec2[:, 0, 2]) 494 | eul_spec2[:, 0] = delta 495 | eul[idx_spec2] = eul_spec2 496 | 497 | idx_remain = np.arange(0, n) 498 | idx_remain = np.setdiff1d(np.setdiff1d(idx_remain, idx_spec1), idx_spec2).tolist() 499 | if len(idx_remain) > 0: 500 | R_remain = R[idx_remain, :, :] 501 | eul_remain = torch.zeros(len(idx_remain), 3).float().cuda() 502 | eul_remain[:, 1] = -torch.asin(R_remain[:, 0, 2]) 503 | eul_remain[:, 0] = torch.atan2(R_remain[:, 1, 2] / torch.cos(eul_remain[:, 1]), 504 | R_remain[:, 2, 2] / torch.cos(eul_remain[:, 1])) 505 | eul_remain[:, 2] = torch.atan2(R_remain[:, 0, 1] / torch.cos(eul_remain[:, 1]), 506 | R_remain[:, 0, 0] / torch.cos(eul_remain[:, 1])) 507 | eul[idx_remain, :] = eul_remain 508 | 509 | return eul 510 | 511 | 512 | def rotmat2quat_torch(R): 513 | """ 514 | Converts a rotation matrix to quaternion 515 | batch pytorch version ported from the corresponding numpy method above 516 | :param R: N * 3 * 3 517 | :return: N * 4 518 | """ 519 | rotdiff = R - R.transpose(1, 2) 520 | r = torch.zeros_like(rotdiff[:, 0]) 521 | r[:, 0] = -rotdiff[:, 1, 2] 522 | r[:, 1] = rotdiff[:, 0, 2] 523 | r[:, 2] = -rotdiff[:, 0, 1] 524 | r_norm = torch.norm(r, dim=1) 525 | sintheta = r_norm / 2 526 | r0 = torch.div(r, r_norm.unsqueeze(1).repeat(1, 3) + 0.00000001) 527 | t1 = R[:, 0, 0] 528 | t2 = R[:, 1, 1] 529 | t3 = R[:, 2, 2] 530 | costheta = (t1 + t2 + t3 - 1) / 2 531 | theta = torch.atan2(sintheta, costheta) 532 | q = torch.zeros(R.shape[0], 4).float().cuda() 533 | q[:, 0] = torch.cos(theta / 2) 534 | q[:, 1:] = torch.mul(r0, torch.sin(theta / 2).unsqueeze(1).repeat(1, 3)) 535 | 536 | return q 537 | 538 | 539 | def expmap2quat_torch(exp): 540 | """ 541 | Converts expmap to quaternion 542 | batch pytorch version ported from the corresponding numpy method above 543 | :param R: N*3 544 | :return: N*4 545 | """ 546 | theta = torch.norm(exp, p=2, dim=1).unsqueeze(1) 547 | v = torch.div(exp, theta.repeat(1, 3) + 0.0000001) 548 | sinhalf = torch.sin(theta / 2) 549 | coshalf = torch.cos(theta / 2) 550 | q1 = torch.mul(v, sinhalf.repeat(1, 3)) 551 | q = torch.cat((coshalf, q1), dim=1) 552 | return q 553 | 554 | 555 | def expmap2rotmat_torch(r): 556 | """ 557 | Converts expmap matrix to rotation 558 | batch pytorch version ported from the corresponding method above 559 | :param r: N*3 560 | :return: N*3*3 561 | """ 562 | theta = torch.norm(r, 2, 1) 563 | r0 = torch.div(r, theta.unsqueeze(1).repeat(1, 3) + 0.0000001) 564 | r1 = torch.zeros_like(r0).repeat(1, 3) 565 | r1[:, 1] = -r0[:, 2] 566 | r1[:, 2] = r0[:, 1] 567 | r1[:, 5] = -r0[:, 0] 568 | r1 = r1.view(-1, 3, 3) 569 | r1 = r1 - r1.transpose(1, 2) 570 | n = r1.data.shape[0] 571 | R = torch.eye(3, 3).repeat(n, 1, 1).float().cuda() + torch.mul( 572 | torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul( 573 | (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1)) 574 | return R 575 | 576 | 577 | def expmap2xyz_torch(expmap): 578 | """ 579 | convert expmaps to joint locations 580 | :param expmap: N*99 581 | :return: N*32*3 582 | """ 583 | parent, offset, rotInd, expmapInd = forward_kinematics._some_variables() 584 | xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd) 585 | return xyz 586 | 587 | 588 | def get_dct_matrix(N): 589 | dct_m = np.eye(N) 590 | for k in np.arange(N): 591 | for i in np.arange(N): 592 | w = np.sqrt(2 / N) 593 | if k == 0: 594 | w = np.sqrt(1 / N) 595 | dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) 596 | idct_m = np.linalg.inv(dct_m) 597 | return dct_m, idct_m 598 | 599 | 600 | def find_indices_256(frame_num1, frame_num2, seq_len, input_n=10): 601 | """ 602 | Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 603 | 604 | which originaly from 605 | In order to find the same action indices as in SRNN. 606 | https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 607 | """ 608 | 609 | # Used a fixed dummy seed, following 610 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 611 | SEED = 1234567890 612 | rng = np.random.RandomState(SEED) 613 | 614 | T1 = frame_num1 - 150 615 | T2 = frame_num2 - 150 # seq_len 616 | idxo1 = None 617 | idxo2 = None 618 | for _ in np.arange(0, 128): 619 | idx_ran1 = rng.randint(16, T1) 620 | idx_ran2 = rng.randint(16, T2) 621 | idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) 622 | idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) 623 | if idxo1 is None: 624 | idxo1 = idxs1 625 | idxo2 = idxs2 626 | else: 627 | idxo1 = np.vstack((idxo1, idxs1)) 628 | idxo2 = np.vstack((idxo2, idxs2)) 629 | return idxo1, idxo2 630 | 631 | 632 | def find_indices_srnn(frame_num1, frame_num2, seq_len, input_n=10): 633 | """ 634 | Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 635 | 636 | which originaly from 637 | In order to find the same action indices as in SRNN. 638 | https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 639 | """ 640 | 641 | # Used a fixed dummy seed, following 642 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 643 | SEED = 1234567890 644 | rng = np.random.RandomState(SEED) 645 | 646 | T1 = frame_num1 - 150 647 | T2 = frame_num2 - 150 # seq_len 648 | idxo1 = None 649 | idxo2 = None 650 | for _ in np.arange(0, 4): 651 | idx_ran1 = rng.randint(16, T1) 652 | idx_ran2 = rng.randint(16, T2) 653 | # print("subact1 {}".format(idx_ran1)) 654 | # print("subact2 {}".format(idx_ran2)) 655 | idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) 656 | idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) 657 | if idxo1 is None: 658 | idxo1 = idxs1 659 | idxo2 = idxs2 660 | else: 661 | idxo1 = np.vstack((idxo1, idxs1)) 662 | idxo2 = np.vstack((idxo2, idxs2)) 663 | return idxo1, idxo2 664 | -------------------------------------------------------------------------------- /h36m/utils/forward_kinematics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.autograd.variable import Variable 4 | import utils.data_utils as data_utils 5 | 6 | 7 | def fkl(angles, parent, offset, rotInd, expmapInd): 8 | """ 9 | Convert joint angles and bone lenghts into the 3d points of a person. 10 | 11 | adapted from 12 | https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L14 13 | 14 | which originaly based on expmap2xyz.m, available at 15 | https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m 16 | Args 17 | angles: 99-long vector with 3d position and 3d joint angles in expmap format 18 | parent: 32-long vector with parent-child relationships in the kinematic tree 19 | offset: 96-long vector with bone lenghts 20 | rotInd: 32-long list with indices into angles 21 | expmapInd: 32-long list with indices into expmap angles 22 | Returns 23 | xyz: 32x3 3d points that represent a person in 3d space 24 | """ 25 | 26 | assert len(angles) == 99 27 | 28 | # Structure that indicates parents for each joint 29 | njoints = 32 30 | xyzStruct = [dict() for x in range(njoints)] 31 | 32 | for i in np.arange(njoints): 33 | 34 | # if not rotInd[i]: # If the list is empty 35 | # xangle, yangle, zangle = 0, 0, 0 36 | # else: 37 | # xangle = angles[rotInd[i][0] - 1] 38 | # yangle = angles[rotInd[i][1] - 1] 39 | # zangle = angles[rotInd[i][2] - 1] 40 | if i == 0: 41 | xangle = angles[0] 42 | yangle = angles[1] 43 | zangle = angles[2] 44 | thisPosition = np.array([xangle, yangle, zangle]) 45 | else: 46 | thisPosition = np.array([0, 0, 0]) 47 | 48 | r = angles[expmapInd[i]] 49 | 50 | thisRotation = data_utils.expmap2rotmat(r) 51 | 52 | if parent[i] == -1: # Root node 53 | xyzStruct[i]['rotation'] = thisRotation 54 | xyzStruct[i]['xyz'] = np.reshape(offset[i, :], (1, 3)) + thisPosition 55 | else: 56 | xyzStruct[i]['xyz'] = (offset[i, :] + thisPosition).dot(xyzStruct[parent[i]]['rotation']) + \ 57 | xyzStruct[parent[i]]['xyz'] 58 | xyzStruct[i]['rotation'] = thisRotation.dot(xyzStruct[parent[i]]['rotation']) 59 | 60 | xyz = [xyzStruct[i]['xyz'] for i in range(njoints)] 61 | xyz = np.array(xyz).squeeze() 62 | # xyz = xyz[:, [0, 2, 1]] 63 | # xyz = xyz[:,[2,0,1]] 64 | 65 | return xyz 66 | 67 | 68 | def _some_variables(): 69 | """ 70 | borrowed from 71 | https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100 72 | 73 | We define some variables that are useful to run the kinematic tree 74 | 75 | Args 76 | None 77 | Returns 78 | parent: 32-long vector with parent-child relationships in the kinematic tree 79 | offset: 96-long vector with bone lenghts 80 | rotInd: 32-long list with indices into angles 81 | expmapInd: 32-long list with indices into expmap angles 82 | """ 83 | 84 | parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13, 85 | 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1 86 | 87 | offset = np.array( 88 | [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000, 89 | -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000, 90 | 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426, 91 | 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000, 92 | 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681, 93 | 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000, 94 | 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000, 95 | 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924, 96 | 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888, 97 | 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000]) 98 | offset = offset.reshape(-1, 3) 99 | 100 | rotInd = [[5, 6, 4], 101 | [8, 9, 7], 102 | [11, 12, 10], 103 | [14, 15, 13], 104 | [17, 18, 16], 105 | [], 106 | [20, 21, 19], 107 | [23, 24, 22], 108 | [26, 27, 25], 109 | [29, 30, 28], 110 | [], 111 | [32, 33, 31], 112 | [35, 36, 34], 113 | [38, 39, 37], 114 | [41, 42, 40], 115 | [], 116 | [44, 45, 43], 117 | [47, 48, 46], 118 | [50, 51, 49], 119 | [53, 54, 52], 120 | [56, 57, 55], 121 | [], 122 | [59, 60, 58], 123 | [], 124 | [62, 63, 61], 125 | [65, 66, 64], 126 | [68, 69, 67], 127 | [71, 72, 70], 128 | [74, 75, 73], 129 | [], 130 | [77, 78, 76], 131 | []] 132 | 133 | expmapInd = np.split(np.arange(4, 100) - 1, 32) 134 | 135 | return parent, offset, rotInd, expmapInd 136 | 137 | 138 | def _some_variables_cmu(): 139 | """ 140 | We define some variables that are useful to run the kinematic tree 141 | 142 | Args 143 | None 144 | Returns 145 | parent: 32-long vector with parent-child relationships in the kinematic tree 146 | offset: 96-long vector with bone lenghts 147 | rotInd: 32-long list with indices into angles 148 | expmapInd: 32-long list with indices into expmap angles 149 | """ 150 | 151 | parent = np.array([0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 1, 14, 15, 16, 17, 18, 19, 16, 152 | 21, 22, 23, 24, 25, 26, 24, 28, 16, 30, 31, 32, 33, 34, 35, 33, 37]) - 1 153 | 154 | offset = 70 * np.array( 155 | [0, 0, 0, 0, 0, 0, 1.65674000000000, -1.80282000000000, 0.624770000000000, 2.59720000000000, -7.13576000000000, 156 | 0, 2.49236000000000, -6.84770000000000, 0, 0.197040000000000, -0.541360000000000, 2.14581000000000, 0, 0, 157 | 1.11249000000000, 0, 0, 0, -1.61070000000000, -1.80282000000000, 0.624760000000000, -2.59502000000000, 158 | -7.12977000000000, 0, -2.46780000000000, -6.78024000000000, 0, -0.230240000000000, -0.632580000000000, 159 | 2.13368000000000, 0, 0, 1.11569000000000, 0, 0, 0, 0.0196100000000000, 2.05450000000000, -0.141120000000000, 160 | 0.0102100000000000, 2.06436000000000, -0.0592100000000000, 0, 0, 0, 0.00713000000000000, 1.56711000000000, 161 | 0.149680000000000, 0.0342900000000000, 1.56041000000000, -0.100060000000000, 0.0130500000000000, 162 | 1.62560000000000, -0.0526500000000000, 0, 0, 0, 3.54205000000000, 0.904360000000000, -0.173640000000000, 163 | 4.86513000000000, 0, 0, 3.35554000000000, 0, 0, 0, 0, 0, 0.661170000000000, 0, 0, 0.533060000000000, 0, 0, 0, 164 | 0, 0, 0.541200000000000, 0, 0.541200000000000, 0, 0, 0, -3.49802000000000, 0.759940000000000, 165 | -0.326160000000000, -5.02649000000000, 0, 0, -3.36431000000000, 0, 0, 0, 0, 0, -0.730410000000000, 0, 0, 166 | -0.588870000000000, 0, 0, 0, 0, 0, -0.597860000000000, 0, 0.597860000000000]) 167 | offset = offset.reshape(-1, 3) 168 | 169 | rotInd = [[6, 5, 4], 170 | [9, 8, 7], 171 | [12, 11, 10], 172 | [15, 14, 13], 173 | [18, 17, 16], 174 | [21, 20, 19], 175 | [], 176 | [24, 23, 22], 177 | [27, 26, 25], 178 | [30, 29, 28], 179 | [33, 32, 31], 180 | [36, 35, 34], 181 | [], 182 | [39, 38, 37], 183 | [42, 41, 40], 184 | [45, 44, 43], 185 | [48, 47, 46], 186 | [51, 50, 49], 187 | [54, 53, 52], 188 | [], 189 | [57, 56, 55], 190 | [60, 59, 58], 191 | [63, 62, 61], 192 | [66, 65, 64], 193 | [69, 68, 67], 194 | [72, 71, 70], 195 | [], 196 | [75, 74, 73], 197 | [], 198 | [78, 77, 76], 199 | [81, 80, 79], 200 | [84, 83, 82], 201 | [87, 86, 85], 202 | [90, 89, 88], 203 | [93, 92, 91], 204 | [], 205 | [96, 95, 94], 206 | []] 207 | posInd = [] 208 | for ii in np.arange(38): 209 | if ii == 0: 210 | posInd.append([1, 2, 3]) 211 | else: 212 | posInd.append([]) 213 | 214 | expmapInd = np.split(np.arange(4, 118) - 1, 38) 215 | 216 | return parent, offset, posInd, expmapInd 217 | 218 | 219 | def fkl_torch(angles, parent, offset, rotInd, expmapInd): 220 | """ 221 | pytorch version of fkl. 222 | 223 | convert joint angles to joint locations 224 | batch pytorch version of the fkl() method above 225 | :param angles: N*99 226 | :param parent: 227 | :param offset: 228 | :param rotInd: 229 | :param expmapInd: 230 | :return: N*joint_n*3 231 | """ 232 | n = angles.data.shape[0] 233 | j_n = offset.shape[0] 234 | p3d = Variable(torch.from_numpy(offset)).float().cuda().unsqueeze(0).repeat(n, 1, 1) 235 | angles = angles[:, 3:].contiguous().view(-1, 3) 236 | R = data_utils.expmap2rotmat_torch(angles).view(n, j_n, 3, 3) 237 | for i in np.arange(1, j_n): 238 | if parent[i] > 0: 239 | R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone() 240 | p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :] 241 | return p3d 242 | 243 | 244 | def main(): 245 | # Load all the data 246 | parent, offset, rotInd, expmapInd = _some_variables() 247 | 248 | # numpy implementation 249 | # with h5py.File('samples.h5', 'r') as h5f: 250 | # expmap_gt = h5f['expmap/gt/walking_0'][:] 251 | # expmap_pred = h5f['expmap/preds/walking_0'][:] 252 | expmap_pred = np.array( 253 | [0.0000000, 0.0000000, 0.0000000, -0.0000001, -0.0000000, -0.0000002, 0.3978439, -0.4166636, 0.1027215, 254 | -0.7767256, -0.0000000, -0.0000000, 0.1704115, 0.3078358, -0.1861640, 0.3330379, -0.0000000, -0.0000000, 255 | -0.0000000, -0.0000000, -0.0000000, 0.0679339, 0.2255526, 0.2394881, -0.0989492, -0.0000000, -0.0000000, 256 | 0.0677801, -0.3607298, 0.0503249, 0.1819232, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, 257 | 0.3236777, -0.0476493, -0.0651256, -0.3150051, -0.0665669, 0.3188994, -0.5980227, -0.1190833, -0.3017127, 258 | 1.2270271, -0.1010960, 0.2072986, -0.0000000, -0.0000000, -0.0000000, -0.2578378, -0.0125206, 2.0266378, 259 | -0.3701521, 0.0199115, 0.5594162, -0.4625384, -0.0000000, -0.0000000, 0.1653314, -0.3952765, -0.1731570, 260 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, 261 | -0.0000000, -0.0000000, -0.0000000, 2.7825687, -1.4196042, -0.0936858, -1.0348599, -2.7419815, 0.4518218, 262 | -0.3902033, -0.0000000, -0.0000000, 0.0597317, 0.0547002, 0.0445105, -0.0000000, -0.0000000, -0.0000000, 263 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 264 | ]) 265 | expmap_gt = np.array( 266 | [0.2240568, -0.0276901, -0.7433901, 0.0004407, -0.0020624, 0.0002131, 0.3974636, -0.4157083, 0.1030248, 267 | -0.7762963, -0.0000000, -0.0000000, 0.1697988, 0.3087364, -0.1863863, 0.3327336, -0.0000000, -0.0000000, 268 | -0.0000000, -0.0000000, -0.0000000, 0.0689423, 0.2282812, 0.2395958, -0.0998311, -0.0000000, -0.0000000, 269 | 0.0672752, -0.3615943, 0.0505299, 0.1816492, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, 270 | 0.3223563, -0.0481131, -0.0659720, -0.3145134, -0.0656419, 0.3206626, -0.5979006, -0.1181534, -0.3033383, 271 | 1.2269648, -0.1011873, 0.2057794, -0.0000000, -0.0000000, -0.0000000, -0.2590978, -0.0141497, 2.0271597, 272 | -0.3699318, 0.0128547, 0.5556172, -0.4714990, -0.0000000, -0.0000000, 0.1603251, -0.4157299, -0.1667608, 273 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, 274 | -0.0000000, -0.0000000, -0.0000000, 2.7811005, -1.4192915, -0.0932141, -1.0294687, -2.7323222, 0.4542309, 275 | -0.4048152, -0.0000000, -0.0000000, 0.0568960, 0.0525994, 0.0493068, -0.0000000, -0.0000000, -0.0000000, 276 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 277 | ]) 278 | xyz1 = fkl(expmap_pred, parent, offset, rotInd, expmapInd) 279 | xyz2 = fkl(expmap_gt, parent, offset, rotInd, expmapInd) 280 | 281 | exp1 = Variable(torch.from_numpy(np.vstack((expmap_pred, expmap_gt))).float()).cuda() 282 | xyz = fkl_torch(exp1, parent, offset, rotInd, expmapInd) 283 | xyz = xyz.cpu().data.numpy() 284 | print(xyz) 285 | 286 | 287 | if __name__ == '__main__': 288 | main() 289 | -------------------------------------------------------------------------------- /h36m/utils/utils_mixer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from random import randint 4 | from utils.data_utils import rotmat2euler_torch, expmap2rotmat_torch 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | def criterion_cos(input_f, target_f): 11 | cos = nn.CosineSimilarity(dim=2, eps=1e-6) 12 | return cos(input_f, target_f) 13 | 14 | 15 | def criterion_cos2(input_f, target_f): 16 | cos = nn.CosineSimilarity(dim=1, eps=1e-6) 17 | return cos(input_f, target_f) 18 | 19 | 20 | 21 | def mpjpe_error(batch_pred,batch_gt): 22 | 23 | batch_pred= batch_pred.contiguous().view(-1,3) 24 | batch_gt=batch_gt.contiguous().view(-1,3) 25 | 26 | return torch.mean(torch.norm(batch_gt-batch_pred,2,1)) 27 | 28 | 29 | def euler_error(ang_pred, ang_gt): 30 | # only for 32 joints 31 | dim_full_len=ang_gt.shape[2] 32 | 33 | pred_expmap = ang_pred.contiguous().view(-1,dim_full_len).view(-1, 3) 34 | targ_expmap = ang_gt.contiguous().view(-1,dim_full_len).view(-1, 3) 35 | 36 | pred_eul = rotmat2euler_torch(expmap2rotmat_torch(pred_expmap)) 37 | pred_eul = pred_eul.view(-1, dim_full_len) 38 | 39 | targ_eul = rotmat2euler_torch(expmap2rotmat_torch(targ_expmap)) 40 | targ_eul = targ_eul.view(-1, dim_full_len) 41 | mean_errors = torch.mean(torch.norm(pred_eul - targ_eul, 2, 1)) 42 | 43 | return mean_errors 44 | 45 | 46 | def get_dct_matrix(N): 47 | dct_m = np.eye(N) 48 | for k in np.arange(N): 49 | for i in np.arange(N): 50 | w = np.sqrt(2 / N) 51 | if k == 0: 52 | w = np.sqrt(1 / N) 53 | dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) 54 | idct_m = np.linalg.inv(dct_m) 55 | return dct_m, idct_m 56 | 57 | 58 | 59 | def get_dct_in (input_seq): 60 | 61 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 62 | 63 | dct_used = input_seq.shape[1] 64 | dct_m_in, _ = get_dct_matrix(dct_used) 65 | 66 | dct_m_in = torch.from_numpy(dct_m_in.astype('float32')).to(device) 67 | 68 | input_dct_seq = torch.matmul(dct_m_in[:, 0:dct_used], input_seq) 69 | 70 | return input_dct_seq 71 | 72 | 73 | def get_dct_out (input_seq): 74 | 75 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 76 | 77 | dct_used = input_seq.shape[1] 78 | _, idct_m_in = get_dct_matrix(dct_used) 79 | 80 | idct_m_in = torch.from_numpy(idct_m_in.astype('float32')).to(device) 81 | 82 | input_dct_seq = torch.matmul(idct_m_in[:, 0:dct_used], input_seq) 83 | 84 | return input_dct_seq 85 | 86 | 87 | 88 | 89 | # def delta_2_gt (prediction, last_timestep): 90 | # prediction = prediction.clone() 91 | 92 | # #print (prediction [:,0,:].shape,last_timestep.shape) 93 | # prediction [:,0,:] = prediction [:,0,:] + last_timestep 94 | # prediction [:,1,:] = prediction [:,1,:] + prediction [:,0,:] 95 | 96 | # prediction [:,2,:] = prediction [:,2,:] + prediction [:,1,:] 97 | # prediction [:,3,:] = prediction [:,3,:] + prediction [:,2,:] 98 | 99 | # prediction [:,4,:] = prediction [:,4,:] + prediction [:,3,:] 100 | # prediction [:,5,:] = prediction [:,5,:] + prediction [:,4,:] 101 | # prediction [:,6,:] = prediction [:,6,:] + prediction [:,5,:] 102 | # prediction [:,7,:] = prediction [:,7,:] + prediction [:,6,:] 103 | 104 | # prediction [:,8,:] = prediction [:,8,:] + prediction [:,7,:] 105 | # prediction [:,9,:] = prediction [:,9,:] + prediction [:,8,:] 106 | 107 | # prediction [:,10,:] = prediction [:,10,:] + prediction [:,9,:] 108 | # prediction [:,11,:] = prediction [:,11,:] + prediction [:,10,:] 109 | # prediction [:,12,:] = prediction [:,12,:] + prediction [:,11,:] 110 | # prediction [:,13,:] = prediction [:,13,:] + prediction [:,12,:] 111 | 112 | # prediction [:,14,:] = prediction [:,14,:] + prediction [:,13,:] 113 | # prediction [:,15,:] = prediction [:,15,:] + prediction [:,14,:] 114 | # prediction [:,16,:] = prediction [:,16,:] + prediction [:,15,:] 115 | # prediction [:,17,:] = prediction [:,17,:] + prediction [:,16,:] 116 | 117 | # prediction [:,18,:] = prediction [:,18,:] + prediction [:,17,:] 118 | # prediction [:,19,:] = prediction [:,19,:] + prediction [:,18,:] 119 | # prediction [:,20,:] = prediction [:,20,:] + prediction [:,19,:] 120 | # prediction [:,21,:] = prediction [:,21,:] + prediction [:,20,:] 121 | 122 | # prediction [:,22,:] = prediction [:,22,:] + prediction [:,21,:] 123 | # prediction [:,23,:] = prediction [:,23,:] + prediction [:,22,:] 124 | # prediction [:,24,:] = prediction [:,24,:] + prediction [:,23,:] 125 | 126 | 127 | 128 | # # for i in range (args.output_n -1): 129 | # # prediction [:,i+1,:] = prediction [:,i+1,:] + prediction [:,0,:] 130 | 131 | # return prediction 132 | 133 | 134 | 135 | def delta_2_gt (prediction, last_timestep): 136 | prediction = prediction.clone() 137 | 138 | #print (prediction [:,0,:].shape,last_timestep.shape) 139 | prediction [:,0,:] = prediction [:,0,:] + last_timestep 140 | for i in range (prediction.shape[1]-1): 141 | prediction [:,i+1,:] = prediction [:,i+1,:] + prediction [:,i,:] 142 | 143 | 144 | 145 | return prediction 146 | 147 | 148 | 149 | 150 | def mask_sequence (seq,mframes): 151 | 152 | x = [randint(0, seq.shape[1]-1) for p in range(0, mframes)] 153 | 154 | for i in x: 155 | seq[:,i,:] = 0 156 | 157 | return seq 158 | 159 | 160 | 161 | def mask_joints (seq,mjoints): 162 | 163 | seq_masked = seq.clone() 164 | #x = [randint(0, seq.shape[1]-1) for p in range(0, 22) if p % 3 == 0 ] 165 | x = [random.randrange(0, 66, 3) for p in range(0, mjoints)] 166 | 167 | for i in x: 168 | seq_masked[:,:,i] = 0 169 | seq_masked[:,:,i+1] = 0 170 | seq_masked[:,:,i+2] = 0 171 | 172 | return seq_masked 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cycler==0.10.0 2 | kiwisolver==1.3.2 3 | matplotlib==3.4.3 4 | numpy==1.21.2 5 | Pillow==8.3.2 6 | pip==21.2.4 7 | pyparsing==2.4.7 8 | python-dateutil==2.8.2 9 | setuptools==58.1.0 10 | six==1.16.0 11 | torch==1.9.1 12 | typing-extensions==3.10.0.2 13 | wheel==0.37.0 14 | 15 | -------------------------------------------------------------------------------- /utils/ang2joint.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | 5 | ''' 6 | https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/ang2joint.py 7 | ''' 8 | 9 | def ang2joint(p3d0, pose, 10 | parent={0: -1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, 11 | 15: 12, 16: 13, 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21}): 12 | """ 13 | :param p3d0:[batch_size, joint_num, 3] 14 | :param pose:[batch_size, joint_num, 3] 15 | :param parent: 16 | :return: 17 | """ 18 | # model_path = './model.npz' 19 | # params = np.load(model_path, allow_pickle=True) 20 | # kintree_table = params['kintree_table'] 21 | batch_num = p3d0.shape[0] 22 | # id_to_col = {kintree_table[1, i]: i 23 | # for i in range(kintree_table.shape[1])} 24 | # parent = { 25 | # i: id_to_col[kintree_table[0, i]] 26 | # for i in range(1, kintree_table.shape[1]) 27 | # } 28 | # parent = {1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, 15: 12, 16: 13, 29 | # 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21} 30 | jnum = len(parent.keys()) 31 | # v_shaped = torch.tensordot(betas, self.shapedirs, dims=([1], [2])) + self.v_template 32 | # J = torch.matmul(self.J_regressor, v_shaped) 33 | # face_J = v_shaped[:, [333, 2801, 6261], :] 34 | J = p3d0 35 | R_cube_big = rodrigues(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3) 36 | results = [] 37 | results.append( 38 | with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2)) 39 | ) 40 | # for i in range(1, kintree_table.shape[1]): 41 | for i in range(1, jnum): 42 | results.append( 43 | torch.matmul( 44 | results[parent[i]], 45 | with_zeros( 46 | torch.cat( 47 | (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))), 48 | dim=2 49 | ) 50 | ) 51 | ) 52 | ) 53 | 54 | stacked = torch.stack(results, dim=1) 55 | J_transformed = stacked[:, :, :3, 3] 56 | return J_transformed 57 | 58 | 59 | # In[ ]: 60 | 61 | 62 | def rodrigues(r): 63 | """ 64 | Rodrigues' rotation formula that turns axis-angle tensor into rotation 65 | matrix in a batch-ed manner. 66 | Parameter: 67 | ---------- 68 | r: Axis-angle rotation tensor of shape [batch_size * angle_num, 1, 3]. 69 | Return: 70 | ------- 71 | Rotation matrix of shape [batch_size * angle_num, 3, 3]. 72 | """ 73 | eps = r.clone().normal_(std=1e-8) 74 | theta = torch.norm(r + eps, dim=(1, 2), keepdim=True) 75 | # theta = torch.norm(r, dim=(1, 2), keepdim=True) # dim cannot be tuple 76 | theta_dim = theta.shape[0] 77 | r_hat = r / theta 78 | cos = torch.cos(theta) 79 | z_stick = torch.zeros(theta_dim, dtype=torch.float).to(r.device) 80 | m = torch.stack( 81 | (z_stick, -r_hat[:, 0, 2], r_hat[:, 0, 1], r_hat[:, 0, 2], z_stick, 82 | -r_hat[:, 0, 0], -r_hat[:, 0, 1], r_hat[:, 0, 0], z_stick), dim=1) 83 | m = torch.reshape(m, (-1, 3, 3)) 84 | i_cube = (torch.eye(3, dtype=torch.float).unsqueeze(dim=0) + torch.zeros((theta_dim, 3, 3), dtype=torch.float)).to(r.device) 85 | A = r_hat.permute(0, 2, 1) 86 | dot = torch.matmul(A, r_hat) 87 | R = cos * i_cube + (1 - cos) * dot + torch.sin(theta) * m 88 | return R 89 | 90 | 91 | # In[ ]: 92 | 93 | 94 | def with_zeros(x): 95 | """ 96 | Append a [0, 0, 0, 1] tensor to a [3, 4] tensor. 97 | Parameter: 98 | --------- 99 | x: Tensor to be appended. 100 | Return: 101 | ------ 102 | Tensor after appending of shape [4,4] 103 | """ 104 | ones = torch.tensor( 105 | [[[0.0, 0.0, 0.0, 1.0]]], dtype=torch.float 106 | ).expand(x.shape[0], -1, -1).to(x.device) 107 | ret = torch.cat((x, ones), dim=1) 108 | return ret 109 | 110 | 111 | def pack(x): 112 | """ 113 | Append zero tensors of shape [4, 3] to a batch of [4, 1] shape tensor. 114 | Parameter: 115 | ---------- 116 | x: A tensor of shape [batch_size, 4, 1] 117 | Return: 118 | ------ 119 | A tensor of shape [batch_size, 4, 4] after appending. 120 | """ 121 | zeros43 = torch.zeros( 122 | (x.shape[0], x.shape[1], 4, 3), dtype=torch.float).to(x.device) 123 | ret = torch.cat((zeros43, x), dim=3) 124 | return ret 125 | 126 | 127 | -------------------------------------------------------------------------------- /utils/body_models/smpl_skeleton.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MotionMLP/MotionMixer/91327c3c3a455d398bd097fa300385bafa80a835/utils/body_models/smpl_skeleton.npz -------------------------------------------------------------------------------- /utils/data_utils.py: -------------------------------------------------------------------------------- 1 | 2 | #!/usr/bin/env python 3 | # -*- coding: utf-8 -*- 4 | import numpy as np 5 | from six.moves import xrange # pylint: disable=redefined-builtin 6 | import torch 7 | # from torch.autograd.variable import Variable 8 | import os 9 | from utils import forward_kinematics 10 | 11 | 12 | def rotmat2euler(R): 13 | """ 14 | Converts a rotation matrix to Euler angles 15 | Matlab port to python for evaluation purposes 16 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1 17 | Args 18 | R: a 3x3 rotation matrix 19 | Returns 20 | eul: a 3x1 Euler angle representation of R 21 | """ 22 | if R[0, 2] == 1 or R[0, 2] == -1: 23 | # special case 24 | E3 = 0 # set arbitrarily 25 | dlta = np.arctan2(R[0, 1], R[0, 2]); 26 | 27 | if R[0, 2] == -1: 28 | E2 = np.pi / 2; 29 | E1 = E3 + dlta; 30 | else: 31 | E2 = -np.pi / 2; 32 | E1 = -E3 + dlta; 33 | 34 | else: 35 | E2 = -np.arcsin(R[0, 2]) 36 | E1 = np.arctan2(R[1, 2] / np.cos(E2), R[2, 2] / np.cos(E2)) 37 | E3 = np.arctan2(R[0, 1] / np.cos(E2), R[0, 0] / np.cos(E2)) 38 | 39 | eul = np.array([E1, E2, E3]); 40 | return eul 41 | 42 | 43 | def rotmat2quat(R): 44 | """ 45 | Converts a rotation matrix to a quaternion 46 | Matlab port to python for evaluation purposes 47 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4 48 | Args 49 | R: 3x3 rotation matrix 50 | Returns 51 | q: 1x4 quaternion 52 | """ 53 | rotdiff = R - R.T; 54 | 55 | r = np.zeros(3) 56 | r[0] = -rotdiff[1, 2] 57 | r[1] = rotdiff[0, 2] 58 | r[2] = -rotdiff[0, 1] 59 | sintheta = np.linalg.norm(r) / 2; 60 | r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps); 61 | 62 | costheta = (np.trace(R) - 1) / 2; 63 | 64 | theta = np.arctan2(sintheta, costheta); 65 | 66 | q = np.zeros(4) 67 | q[0] = np.cos(theta / 2) 68 | q[1:] = r0 * np.sin(theta / 2) 69 | return q 70 | 71 | 72 | def rotmat2expmap(R): 73 | return quat2expmap(rotmat2quat(R)); 74 | 75 | 76 | def expmap2rotmat(r): 77 | """ 78 | Converts an exponential map angle to a rotation matrix 79 | Matlab port to python for evaluation purposes 80 | I believe this is also called Rodrigues' formula 81 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m 82 | Args 83 | r: 1x3 exponential map 84 | Returns 85 | R: 3x3 rotation matrix 86 | """ 87 | theta = np.linalg.norm(r) 88 | r0 = np.divide(r, theta + np.finfo(np.float32).eps) 89 | r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3, 3) 90 | r0x = r0x - r0x.T 91 | R = np.eye(3, 3) + np.sin(theta) * r0x + (1 - np.cos(theta)) * (r0x).dot(r0x); 92 | return R 93 | 94 | 95 | def quat2expmap(q): 96 | """ 97 | Converts a quaternion to an exponential map 98 | Matlab port to python for evaluation purposes 99 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1 100 | Args 101 | q: 1x4 quaternion 102 | Returns 103 | r: 1x3 exponential map 104 | Raises 105 | ValueError if the l2 norm of the quaternion is not close to 1 106 | """ 107 | if (np.abs(np.linalg.norm(q) - 1) > 1e-3): 108 | raise (ValueError, "quat2expmap: input quaternion is not norm 1") 109 | 110 | sinhalftheta = np.linalg.norm(q[1:]) 111 | coshalftheta = q[0] 112 | 113 | r0 = np.divide(q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps)); 114 | theta = 2 * np.arctan2(sinhalftheta, coshalftheta) 115 | theta = np.mod(theta + 2 * np.pi, 2 * np.pi) 116 | 117 | if theta > np.pi: 118 | theta = 2 * np.pi - theta 119 | r0 = -r0 120 | 121 | r = r0 * theta 122 | return r 123 | 124 | 125 | def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot): 126 | """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix. 127 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12 128 | Args 129 | normalizedData: nxd matrix with normalized data 130 | data_mean: vector of mean used to normalize the data 131 | data_std: vector of standard deviation used to normalize the data 132 | dimensions_to_ignore: vector with dimensions not used by the model 133 | actions: list of strings with the encoded actions 134 | one_hot: whether the data comes with one-hot encoding 135 | Returns 136 | origData: data originally used to 137 | """ 138 | T = normalizedData.shape[0] 139 | D = data_mean.shape[0] 140 | 141 | origData = np.zeros((T, D), dtype=np.float32) 142 | dimensions_to_use = [] 143 | for i in range(D): 144 | if i in dimensions_to_ignore: 145 | continue 146 | dimensions_to_use.append(i) 147 | dimensions_to_use = np.array(dimensions_to_use) 148 | 149 | if one_hot: 150 | origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)] 151 | else: 152 | origData[:, dimensions_to_use] = normalizedData 153 | 154 | # potentially ineficient, but only done once per experiment 155 | stdMat = data_std.reshape((1, D)) 156 | stdMat = np.repeat(stdMat, T, axis=0) 157 | meanMat = data_mean.reshape((1, D)) 158 | meanMat = np.repeat(meanMat, T, axis=0) 159 | origData = np.multiply(origData, stdMat) + meanMat 160 | return origData 161 | 162 | 163 | def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot): 164 | """ 165 | Converts the output of the neural network to a format that is more easy to 166 | manipulate for, e.g. conversion to other format or visualization 167 | Args 168 | poses: The output from the TF model. A list with (seq_length) entries, 169 | each with a (batch_size, dim) output 170 | Returns 171 | poses_out: A tensor of size (batch_size, seq_length, dim) output. Each 172 | batch is an n-by-d sequence of poses. 173 | """ 174 | seq_len = len(poses) 175 | if seq_len == 0: 176 | return [] 177 | 178 | batch_size, dim = poses[0].shape 179 | 180 | poses_out = np.concatenate(poses) 181 | poses_out = np.reshape(poses_out, (seq_len, batch_size, dim)) 182 | poses_out = np.transpose(poses_out, [1, 0, 2]) 183 | 184 | poses_out_list = [] 185 | for i in xrange(poses_out.shape[0]): 186 | poses_out_list.append( 187 | unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot)) 188 | 189 | return poses_out_list 190 | 191 | 192 | def readCSVasFloat(filename): 193 | """ 194 | Borrowed from SRNN code. Reads a csv and returns a float matrix. 195 | https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34 196 | Args 197 | filename: string. Path to the csv file 198 | Returns 199 | returnArray: the read data in a float32 matrix 200 | """ 201 | returnArray = [] 202 | lines = open(filename).readlines() 203 | for line in lines: 204 | line = line.strip().split(',') 205 | if len(line) > 0: 206 | returnArray.append(np.array([np.float32(x) for x in line])) 207 | 208 | returnArray = np.array(returnArray) 209 | return returnArray 210 | 211 | 212 | def normalize_data(data, data_mean, data_std, dim_to_use, actions, one_hot): 213 | """ 214 | Normalize input data by removing unused dimensions, subtracting the mean and 215 | dividing by the standard deviation 216 | Args 217 | data: nx99 matrix with data to normalize 218 | data_mean: vector of mean used to normalize the data 219 | data_std: vector of standard deviation used to normalize the data 220 | dim_to_use: vector with dimensions used by the model 221 | actions: list of strings with the encoded actions 222 | one_hot: whether the data comes with one-hot encoding 223 | Returns 224 | data_out: the passed data matrix, but normalized 225 | """ 226 | data_out = {} 227 | nactions = len(actions) 228 | 229 | if not one_hot: 230 | # No one-hot encoding... no need to do anything special 231 | for key in data.keys(): 232 | data_out[key] = np.divide((data[key] - data_mean), data_std) 233 | data_out[key] = data_out[key][:, dim_to_use] 234 | 235 | else: 236 | # TODO hard-coding 99 dimensions for un-normalized human poses 237 | for key in data.keys(): 238 | data_out[key] = np.divide((data[key][:, 0:99] - data_mean), data_std) 239 | data_out[key] = data_out[key][:, dim_to_use] 240 | data_out[key] = np.hstack((data_out[key], data[key][:, -nactions:])) 241 | 242 | return data_out 243 | 244 | 245 | def normalization_stats(completeData): 246 | """" 247 | Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore. 248 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33 249 | Args 250 | completeData: nx99 matrix with data to normalize 251 | Returns 252 | data_mean: vector of mean used to normalize the data 253 | data_std: vector of standard deviation used to normalize the data 254 | dimensions_to_ignore: vector with dimensions not used by the model 255 | dimensions_to_use: vector with dimensions used by the model 256 | """ 257 | data_mean = np.mean(completeData, axis=0) 258 | data_std = np.std(completeData, axis=0) 259 | 260 | dimensions_to_ignore = [] 261 | dimensions_to_use = [] 262 | 263 | dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) 264 | dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) 265 | 266 | data_std[dimensions_to_ignore] = 1.0 267 | 268 | return data_mean, data_std, dimensions_to_ignore, dimensions_to_use 269 | 270 | 271 | def define_actions(action): 272 | """ 273 | Define the list of actions we are using. 274 | Args 275 | action: String with the passed action. Could be "all" 276 | Returns 277 | actions: List of strings of actions 278 | Raises 279 | ValueError if the action is not included in H3.6M 280 | """ 281 | 282 | actions = ["walking", "eating", "smoking", "discussion", "directions", 283 | "greeting", "phoning", "posing", "purchases", "sitting", 284 | "sittingdown", "takingphoto", "waiting", "walkingdog", 285 | "walkingtogether"] 286 | if action in actions: 287 | return [action] 288 | 289 | if action == "all": 290 | return actions 291 | 292 | if action == "all_srnn": 293 | return ["walking", "eating", "smoking", "discussion"] 294 | 295 | raise (ValueError, "Unrecognized action: %d" % action) 296 | 297 | 298 | """all methods above are borrowed from https://github.com/una-dinosauria/human-motion-prediction""" 299 | 300 | 301 | def define_actions_cmu(action): 302 | """ 303 | Define the list of actions we are using. 304 | Args 305 | action: String with the passed action. Could be "all" 306 | Returns 307 | actions: List of strings of actions 308 | Raises 309 | ValueError if the action is not included in H3.6M 310 | """ 311 | 312 | actions = ["basketball", "basketball_signal", "directing_traffic", "jumping", "running", "soccer", "walking", 313 | "washwindow"] 314 | if action in actions: 315 | return [action] 316 | 317 | if action == "all": 318 | return actions 319 | 320 | raise (ValueError, "Unrecognized action: %d" % action) 321 | 322 | 323 | def load_data_cmu(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): 324 | seq_len = input_n + output_n 325 | nactions = len(actions) 326 | sampled_seq = [] 327 | complete_seq = [] 328 | for action_idx in np.arange(nactions): 329 | action = actions[action_idx] 330 | path = '{}/{}'.format(path_to_dataset, action) 331 | count = 0 332 | for _ in os.listdir(path): 333 | count = count + 1 334 | for examp_index in np.arange(count): 335 | filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) 336 | action_sequence = readCSVasFloat(filename) 337 | n, d = action_sequence.shape 338 | even_list = range(0, n, 2) 339 | the_sequence = np.array(action_sequence[even_list, :]) 340 | num_frames = len(the_sequence) 341 | if not is_test: 342 | fs = np.arange(0, num_frames - seq_len + 1) 343 | fs_sel = fs 344 | for i in np.arange(seq_len - 1): 345 | fs_sel = np.vstack((fs_sel, fs + i + 1)) 346 | fs_sel = fs_sel.transpose() 347 | seq_sel = the_sequence[fs_sel, :] 348 | if len(sampled_seq) == 0: 349 | sampled_seq = seq_sel 350 | complete_seq = the_sequence 351 | else: 352 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) 353 | complete_seq = np.append(complete_seq, the_sequence, axis=0) 354 | else: 355 | source_seq_len = 50 356 | target_seq_len = 25 357 | total_frames = source_seq_len + target_seq_len 358 | batch_size = 8 359 | SEED = 1234567890 360 | rng = np.random.RandomState(SEED) 361 | for _ in range(batch_size): 362 | idx = rng.randint(0, num_frames - total_frames) 363 | seq_sel = the_sequence[ 364 | idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] 365 | seq_sel = np.expand_dims(seq_sel, axis=0) 366 | if len(sampled_seq) == 0: 367 | sampled_seq = seq_sel 368 | complete_seq = the_sequence 369 | else: 370 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) 371 | complete_seq = np.append(complete_seq, the_sequence, axis=0) 372 | 373 | if not is_test: 374 | data_std = np.std(complete_seq, axis=0) 375 | data_mean = np.mean(complete_seq, axis=0) 376 | 377 | dimensions_to_ignore = [] 378 | dimensions_to_use = [] 379 | dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0])) 380 | dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0])) 381 | data_std[dimensions_to_ignore] = 1.0 382 | data_mean[dimensions_to_ignore] = 0.0 383 | 384 | return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std 385 | 386 | 387 | def load_data_cmu_3d(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False): 388 | seq_len = input_n + output_n 389 | nactions = len(actions) 390 | sampled_seq = [] 391 | complete_seq = [] 392 | for action_idx in np.arange(nactions): 393 | action = actions[action_idx] 394 | path = '{}/{}'.format(path_to_dataset, action) 395 | count = 0 396 | for _ in os.listdir(path): 397 | count = count + 1 398 | for examp_index in np.arange(count): 399 | filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1) 400 | action_sequence = readCSVasFloat(filename) 401 | n, d = action_sequence.shape 402 | exptmps = torch.from_numpy(action_sequence).float().cuda() 403 | xyz = expmap2xyz_torch_cmu(exptmps) 404 | xyz = xyz.view(-1, 38 * 3) 405 | xyz = xyz.cpu().data.numpy() 406 | action_sequence = xyz 407 | 408 | even_list = range(0, n, 2) 409 | the_sequence = np.array(action_sequence[even_list, :]) 410 | num_frames = len(the_sequence) 411 | if not is_test: 412 | fs = np.arange(0, num_frames - seq_len + 1) 413 | fs_sel = fs 414 | for i in np.arange(seq_len - 1): 415 | fs_sel = np.vstack((fs_sel, fs + i + 1)) 416 | fs_sel = fs_sel.transpose() 417 | seq_sel = the_sequence[fs_sel, :] 418 | if len(sampled_seq) == 0: 419 | sampled_seq = seq_sel 420 | complete_seq = the_sequence 421 | else: 422 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) 423 | complete_seq = np.append(complete_seq, the_sequence, axis=0) 424 | else: 425 | source_seq_len = 50 426 | target_seq_len = 25 427 | total_frames = source_seq_len + target_seq_len 428 | batch_size = 8 429 | SEED = 1234567890 430 | rng = np.random.RandomState(SEED) 431 | for _ in range(batch_size): 432 | idx = rng.randint(0, num_frames - total_frames) 433 | seq_sel = the_sequence[ 434 | idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :] 435 | seq_sel = np.expand_dims(seq_sel, axis=0) 436 | if len(sampled_seq) == 0: 437 | sampled_seq = seq_sel 438 | complete_seq = the_sequence 439 | else: 440 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0) 441 | complete_seq = np.append(complete_seq, the_sequence, axis=0) 442 | 443 | if not is_test: 444 | data_std = np.std(complete_seq, axis=0) 445 | data_mean = np.mean(complete_seq, axis=0) 446 | 447 | joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36]) 448 | dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2)) 449 | dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore) 450 | 451 | data_std[dimensions_to_ignore] = 1.0 452 | data_mean[dimensions_to_ignore] = 0.0 453 | 454 | return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std 455 | 456 | 457 | def rotmat2euler_torch(R): 458 | """ 459 | Converts a rotation matrix to euler angles 460 | batch pytorch version ported from the corresponding numpy method above 461 | :param R:N*3*3 462 | :return: N*3 463 | """ 464 | n = R.data.shape[0] 465 | eul = torch.zeros(n, 3).float().cuda() 466 | idx_spec1 = (R[:, 0, 2] == 1).nonzero().cpu().data.numpy().reshape(-1).tolist() 467 | idx_spec2 = (R[:, 0, 2] == -1).nonzero().cpu().data.numpy().reshape(-1).tolist() 468 | if len(idx_spec1) > 0: 469 | R_spec1 = R[idx_spec1, :, :] 470 | eul_spec1 = torch.zeros(len(idx_spec1), 3).float().cuda() 471 | eul_spec1[:, 2] = 0 472 | eul_spec1[:, 1] = -np.pi / 2 473 | delta = torch.atan2(R_spec1[:, 0, 1], R_spec1[:, 0, 2]) 474 | eul_spec1[:, 0] = delta 475 | eul[idx_spec1, :] = eul_spec1 476 | 477 | if len(idx_spec2) > 0: 478 | R_spec2 = R[idx_spec2, :, :] 479 | eul_spec2 = torch.zeros(len(idx_spec2), 3).float().cuda() 480 | eul_spec2[:, 2] = 0 481 | eul_spec2[:, 1] = np.pi / 2 482 | delta = torch.atan2(R_spec2[:, 0, 1], R_spec2[:, 0, 2]) 483 | eul_spec2[:, 0] = delta 484 | eul[idx_spec2] = eul_spec2 485 | 486 | idx_remain = np.arange(0, n) 487 | idx_remain = np.setdiff1d(np.setdiff1d(idx_remain, idx_spec1), idx_spec2).tolist() 488 | if len(idx_remain) > 0: 489 | R_remain = R[idx_remain, :, :] 490 | eul_remain = torch.zeros(len(idx_remain), 3).float().cuda() 491 | eul_remain[:, 1] = -torch.asin(R_remain[:, 0, 2]) 492 | eul_remain[:, 0] = torch.atan2(R_remain[:, 1, 2] / torch.cos(eul_remain[:, 1]), 493 | R_remain[:, 2, 2] / torch.cos(eul_remain[:, 1])) 494 | eul_remain[:, 2] = torch.atan2(R_remain[:, 0, 1] / torch.cos(eul_remain[:, 1]), 495 | R_remain[:, 0, 0] / torch.cos(eul_remain[:, 1])) 496 | eul[idx_remain, :] = eul_remain 497 | 498 | return eul 499 | 500 | 501 | def rotmat2quat_torch(R): 502 | """ 503 | Converts a rotation matrix to quaternion 504 | batch pytorch version ported from the corresponding numpy method above 505 | :param R: N * 3 * 3 506 | :return: N * 4 507 | """ 508 | rotdiff = R - R.transpose(1, 2) 509 | r = torch.zeros_like(rotdiff[:, 0]) 510 | r[:, 0] = -rotdiff[:, 1, 2] 511 | r[:, 1] = rotdiff[:, 0, 2] 512 | r[:, 2] = -rotdiff[:, 0, 1] 513 | r_norm = torch.norm(r, dim=1) 514 | sintheta = r_norm / 2 515 | r0 = torch.div(r, r_norm.unsqueeze(1).repeat(1, 3) + 0.00000001) 516 | t1 = R[:, 0, 0] 517 | t2 = R[:, 1, 1] 518 | t3 = R[:, 2, 2] 519 | costheta = (t1 + t2 + t3 - 1) / 2 520 | theta = torch.atan2(sintheta, costheta) 521 | q = torch.zeros(R.shape[0], 4).float().cuda() 522 | q[:, 0] = torch.cos(theta / 2) 523 | q[:, 1:] = torch.mul(r0, torch.sin(theta / 2).unsqueeze(1).repeat(1, 3)) 524 | 525 | return q 526 | 527 | 528 | def expmap2quat_torch(exp): 529 | """ 530 | Converts expmap to quaternion 531 | batch pytorch version ported from the corresponding numpy method above 532 | :param R: N*3 533 | :return: N*4 534 | """ 535 | theta = torch.norm(exp, p=2, dim=1).unsqueeze(1) 536 | v = torch.div(exp, theta.repeat(1, 3) + 0.0000001) 537 | sinhalf = torch.sin(theta / 2) 538 | coshalf = torch.cos(theta / 2) 539 | q1 = torch.mul(v, sinhalf.repeat(1, 3)) 540 | q = torch.cat((coshalf, q1), dim=1) 541 | return q 542 | 543 | 544 | def expmap2rotmat_torch(r): 545 | """ 546 | Converts expmap matrix to rotation 547 | batch pytorch version ported from the corresponding method above 548 | :param r: N*3 549 | :return: N*3*3 550 | """ 551 | theta = torch.norm(r, 2, 1) 552 | r0 = torch.div(r, theta.unsqueeze(1).repeat(1, 3) + 0.0000001) 553 | r1 = torch.zeros_like(r0).repeat(1, 3) 554 | r1[:, 1] = -r0[:, 2] 555 | r1[:, 2] = r0[:, 1] 556 | r1[:, 5] = -r0[:, 0] 557 | r1 = r1.view(-1, 3, 3) 558 | r1 = r1 - r1.transpose(1, 2) 559 | n = r1.data.shape[0] 560 | R = torch.eye(3, 3).repeat(n, 1, 1).float().cuda() + torch.mul( 561 | torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul( 562 | (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1)) 563 | return R 564 | 565 | 566 | def expmap2xyz_torch(expmap): 567 | """ 568 | convert expmaps to joint locations 569 | :param expmap: N*99 570 | :return: N*32*3 571 | """ 572 | parent, offset, rotInd, expmapInd = forward_kinematics._some_variables() 573 | xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd) 574 | return xyz 575 | 576 | 577 | def get_dct_matrix(N): 578 | dct_m = np.eye(N) 579 | for k in np.arange(N): 580 | for i in np.arange(N): 581 | w = np.sqrt(2 / N) 582 | if k == 0: 583 | w = np.sqrt(1 / N) 584 | dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N) 585 | idct_m = np.linalg.inv(dct_m) 586 | return dct_m, idct_m 587 | 588 | 589 | def find_indices_256(frame_num1, frame_num2, seq_len, input_n=10): 590 | """ 591 | Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 592 | which originaly from 593 | In order to find the same action indices as in SRNN. 594 | https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 595 | """ 596 | 597 | # Used a fixed dummy seed, following 598 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 599 | SEED = 1234567890 600 | rng = np.random.RandomState(SEED) 601 | 602 | T1 = frame_num1 - 150 603 | T2 = frame_num2 - 150 # seq_len 604 | idxo1 = None 605 | idxo2 = None 606 | for _ in np.arange(0, 128): 607 | idx_ran1 = rng.randint(16, T1) 608 | idx_ran2 = rng.randint(16, T2) 609 | idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) 610 | idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) 611 | if idxo1 is None: 612 | idxo1 = idxs1 613 | idxo2 = idxs2 614 | else: 615 | idxo1 = np.vstack((idxo1, idxs1)) 616 | idxo2 = np.vstack((idxo2, idxs2)) 617 | return idxo1, idxo2 618 | 619 | 620 | def find_indices_srnn(frame_num1, frame_num2, seq_len, input_n=10): 621 | """ 622 | Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478 623 | which originaly from 624 | In order to find the same action indices as in SRNN. 625 | https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325 626 | """ 627 | 628 | # Used a fixed dummy seed, following 629 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29 630 | SEED = 1234567890 631 | rng = np.random.RandomState(SEED) 632 | 633 | T1 = frame_num1 - 150 634 | T2 = frame_num2 - 150 # seq_len 635 | idxo1 = None 636 | idxo2 = None 637 | for _ in np.arange(0, 4): 638 | idx_ran1 = rng.randint(16, T1) 639 | idx_ran2 = rng.randint(16, T2) 640 | # print("subact1 {}".format(idx_ran1)) 641 | # print("subact2 {}".format(idx_ran2)) 642 | idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len) 643 | idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len) 644 | if idxo1 is None: 645 | idxo1 = idxs1 646 | idxo2 = idxs2 647 | else: 648 | idxo1 = np.vstack((idxo1, idxs1)) 649 | idxo2 = np.vstack((idxo2, idxs2)) 650 | return idxo1, idxo2 651 | 652 | -------------------------------------------------------------------------------- /utils/forward_kinematics.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | from torch.autograd.variable import Variable 5 | from utils import data_utils 6 | 7 | 8 | def fkl(angles, parent, offset, rotInd, expmapInd): 9 | """ 10 | Convert joint angles and bone lenghts into the 3d points of a person. 11 | adapted from 12 | https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L14 13 | which originaly based on expmap2xyz.m, available at 14 | https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m 15 | Args 16 | angles: 99-long vector with 3d position and 3d joint angles in expmap format 17 | parent: 32-long vector with parent-child relationships in the kinematic tree 18 | offset: 96-long vector with bone lenghts 19 | rotInd: 32-long list with indices into angles 20 | expmapInd: 32-long list with indices into expmap angles 21 | Returns 22 | xyz: 32x3 3d points that represent a person in 3d space 23 | """ 24 | 25 | assert len(angles) == 99 26 | 27 | # Structure that indicates parents for each joint 28 | njoints = 32 29 | xyzStruct = [dict() for x in range(njoints)] 30 | 31 | for i in np.arange(njoints): 32 | 33 | # if not rotInd[i]: # If the list is empty 34 | # xangle, yangle, zangle = 0, 0, 0 35 | # else: 36 | # xangle = angles[rotInd[i][0] - 1] 37 | # yangle = angles[rotInd[i][1] - 1] 38 | # zangle = angles[rotInd[i][2] - 1] 39 | if i == 0: 40 | xangle = angles[0] 41 | yangle = angles[1] 42 | zangle = angles[2] 43 | thisPosition = np.array([xangle, yangle, zangle]) 44 | else: 45 | thisPosition = np.array([0, 0, 0]) 46 | 47 | r = angles[expmapInd[i]] 48 | 49 | thisRotation = data_utils.expmap2rotmat(r) 50 | 51 | if parent[i] == -1: # Root node 52 | xyzStruct[i]['rotation'] = thisRotation 53 | xyzStruct[i]['xyz'] = np.reshape(offset[i, :], (1, 3)) + thisPosition 54 | else: 55 | xyzStruct[i]['xyz'] = (offset[i, :] + thisPosition).dot(xyzStruct[parent[i]]['rotation']) + \ 56 | xyzStruct[parent[i]]['xyz'] 57 | xyzStruct[i]['rotation'] = thisRotation.dot(xyzStruct[parent[i]]['rotation']) 58 | 59 | xyz = [xyzStruct[i]['xyz'] for i in range(njoints)] 60 | xyz = np.array(xyz).squeeze() 61 | # xyz = xyz[:, [0, 2, 1]] 62 | # xyz = xyz[:,[2,0,1]] 63 | 64 | return xyz 65 | 66 | 67 | def _some_variables(): 68 | """ 69 | borrowed from 70 | https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100 71 | We define some variables that are useful to run the kinematic tree 72 | Args 73 | None 74 | Returns 75 | parent: 32-long vector with parent-child relationships in the kinematic tree 76 | offset: 96-long vector with bone lenghts 77 | rotInd: 32-long list with indices into angles 78 | expmapInd: 32-long list with indices into expmap angles 79 | """ 80 | 81 | parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13, 82 | 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1 83 | 84 | offset = np.array( 85 | [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000, 86 | -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000, 87 | 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426, 88 | 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000, 89 | 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681, 90 | 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000, 91 | 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000, 92 | 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924, 93 | 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888, 94 | 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000]) 95 | offset = offset.reshape(-1, 3) 96 | 97 | rotInd = [[5, 6, 4], 98 | [8, 9, 7], 99 | [11, 12, 10], 100 | [14, 15, 13], 101 | [17, 18, 16], 102 | [], 103 | [20, 21, 19], 104 | [23, 24, 22], 105 | [26, 27, 25], 106 | [29, 30, 28], 107 | [], 108 | [32, 33, 31], 109 | [35, 36, 34], 110 | [38, 39, 37], 111 | [41, 42, 40], 112 | [], 113 | [44, 45, 43], 114 | [47, 48, 46], 115 | [50, 51, 49], 116 | [53, 54, 52], 117 | [56, 57, 55], 118 | [], 119 | [59, 60, 58], 120 | [], 121 | [62, 63, 61], 122 | [65, 66, 64], 123 | [68, 69, 67], 124 | [71, 72, 70], 125 | [74, 75, 73], 126 | [], 127 | [77, 78, 76], 128 | []] 129 | 130 | expmapInd = np.split(np.arange(4, 100) - 1, 32) 131 | 132 | return parent, offset, rotInd, expmapInd 133 | 134 | 135 | def _some_variables_cmu(): 136 | """ 137 | We define some variables that are useful to run the kinematic tree 138 | Args 139 | None 140 | Returns 141 | parent: 32-long vector with parent-child relationships in the kinematic tree 142 | offset: 96-long vector with bone lenghts 143 | rotInd: 32-long list with indices into angles 144 | expmapInd: 32-long list with indices into expmap angles 145 | """ 146 | 147 | parent = np.array([0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 1, 14, 15, 16, 17, 18, 19, 16, 148 | 21, 22, 23, 24, 25, 26, 24, 28, 16, 30, 31, 32, 33, 34, 35, 33, 37]) - 1 149 | 150 | offset = 70 * np.array( 151 | [0, 0, 0, 0, 0, 0, 1.65674000000000, -1.80282000000000, 0.624770000000000, 2.59720000000000, -7.13576000000000, 152 | 0, 2.49236000000000, -6.84770000000000, 0, 0.197040000000000, -0.541360000000000, 2.14581000000000, 0, 0, 153 | 1.11249000000000, 0, 0, 0, -1.61070000000000, -1.80282000000000, 0.624760000000000, -2.59502000000000, 154 | -7.12977000000000, 0, -2.46780000000000, -6.78024000000000, 0, -0.230240000000000, -0.632580000000000, 155 | 2.13368000000000, 0, 0, 1.11569000000000, 0, 0, 0, 0.0196100000000000, 2.05450000000000, -0.141120000000000, 156 | 0.0102100000000000, 2.06436000000000, -0.0592100000000000, 0, 0, 0, 0.00713000000000000, 1.56711000000000, 157 | 0.149680000000000, 0.0342900000000000, 1.56041000000000, -0.100060000000000, 0.0130500000000000, 158 | 1.62560000000000, -0.0526500000000000, 0, 0, 0, 3.54205000000000, 0.904360000000000, -0.173640000000000, 159 | 4.86513000000000, 0, 0, 3.35554000000000, 0, 0, 0, 0, 0, 0.661170000000000, 0, 0, 0.533060000000000, 0, 0, 0, 160 | 0, 0, 0.541200000000000, 0, 0.541200000000000, 0, 0, 0, -3.49802000000000, 0.759940000000000, 161 | -0.326160000000000, -5.02649000000000, 0, 0, -3.36431000000000, 0, 0, 0, 0, 0, -0.730410000000000, 0, 0, 162 | -0.588870000000000, 0, 0, 0, 0, 0, -0.597860000000000, 0, 0.597860000000000]) 163 | offset = offset.reshape(-1, 3) 164 | 165 | rotInd = [[6, 5, 4], 166 | [9, 8, 7], 167 | [12, 11, 10], 168 | [15, 14, 13], 169 | [18, 17, 16], 170 | [21, 20, 19], 171 | [], 172 | [24, 23, 22], 173 | [27, 26, 25], 174 | [30, 29, 28], 175 | [33, 32, 31], 176 | [36, 35, 34], 177 | [], 178 | [39, 38, 37], 179 | [42, 41, 40], 180 | [45, 44, 43], 181 | [48, 47, 46], 182 | [51, 50, 49], 183 | [54, 53, 52], 184 | [], 185 | [57, 56, 55], 186 | [60, 59, 58], 187 | [63, 62, 61], 188 | [66, 65, 64], 189 | [69, 68, 67], 190 | [72, 71, 70], 191 | [], 192 | [75, 74, 73], 193 | [], 194 | [78, 77, 76], 195 | [81, 80, 79], 196 | [84, 83, 82], 197 | [87, 86, 85], 198 | [90, 89, 88], 199 | [93, 92, 91], 200 | [], 201 | [96, 95, 94], 202 | []] 203 | posInd = [] 204 | for ii in np.arange(38): 205 | if ii == 0: 206 | posInd.append([1, 2, 3]) 207 | else: 208 | posInd.append([]) 209 | 210 | expmapInd = np.split(np.arange(4, 118) - 1, 38) 211 | 212 | return parent, offset, posInd, expmapInd 213 | 214 | 215 | def fkl_torch(angles, parent, offset, rotInd, expmapInd): 216 | """ 217 | pytorch version of fkl. 218 | convert joint angles to joint locations 219 | batch pytorch version of the fkl() method above 220 | :param angles: N*99 221 | :param parent: 222 | :param offset: 223 | :param rotInd: 224 | :param expmapInd: 225 | :return: N*joint_n*3 226 | """ 227 | n = angles.data.shape[0] 228 | j_n = offset.shape[0] 229 | p3d = Variable(torch.from_numpy(offset)).float().cuda().unsqueeze(0).repeat(n, 1, 1) 230 | angles = angles[:, 3:].contiguous().view(-1, 3) 231 | R = data_utils.expmap2rotmat_torch(angles).view(n, j_n, 3, 3) 232 | for i in np.arange(1, j_n): 233 | if parent[i] > 0: 234 | R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone() 235 | p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :] 236 | return p3d 237 | 238 | 239 | def main(): 240 | # Load all the data 241 | parent, offset, rotInd, expmapInd = _some_variables() 242 | 243 | # numpy implementation 244 | # with h5py.File('samples.h5', 'r') as h5f: 245 | # expmap_gt = h5f['expmap/gt/walking_0'][:] 246 | # expmap_pred = h5f['expmap/preds/walking_0'][:] 247 | expmap_pred = np.array( 248 | [0.0000000, 0.0000000, 0.0000000, -0.0000001, -0.0000000, -0.0000002, 0.3978439, -0.4166636, 0.1027215, 249 | -0.7767256, -0.0000000, -0.0000000, 0.1704115, 0.3078358, -0.1861640, 0.3330379, -0.0000000, -0.0000000, 250 | -0.0000000, -0.0000000, -0.0000000, 0.0679339, 0.2255526, 0.2394881, -0.0989492, -0.0000000, -0.0000000, 251 | 0.0677801, -0.3607298, 0.0503249, 0.1819232, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, 252 | 0.3236777, -0.0476493, -0.0651256, -0.3150051, -0.0665669, 0.3188994, -0.5980227, -0.1190833, -0.3017127, 253 | 1.2270271, -0.1010960, 0.2072986, -0.0000000, -0.0000000, -0.0000000, -0.2578378, -0.0125206, 2.0266378, 254 | -0.3701521, 0.0199115, 0.5594162, -0.4625384, -0.0000000, -0.0000000, 0.1653314, -0.3952765, -0.1731570, 255 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, 256 | -0.0000000, -0.0000000, -0.0000000, 2.7825687, -1.4196042, -0.0936858, -1.0348599, -2.7419815, 0.4518218, 257 | -0.3902033, -0.0000000, -0.0000000, 0.0597317, 0.0547002, 0.0445105, -0.0000000, -0.0000000, -0.0000000, 258 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 259 | ]) 260 | expmap_gt = np.array( 261 | [0.2240568, -0.0276901, -0.7433901, 0.0004407, -0.0020624, 0.0002131, 0.3974636, -0.4157083, 0.1030248, 262 | -0.7762963, -0.0000000, -0.0000000, 0.1697988, 0.3087364, -0.1863863, 0.3327336, -0.0000000, -0.0000000, 263 | -0.0000000, -0.0000000, -0.0000000, 0.0689423, 0.2282812, 0.2395958, -0.0998311, -0.0000000, -0.0000000, 264 | 0.0672752, -0.3615943, 0.0505299, 0.1816492, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, 265 | 0.3223563, -0.0481131, -0.0659720, -0.3145134, -0.0656419, 0.3206626, -0.5979006, -0.1181534, -0.3033383, 266 | 1.2269648, -0.1011873, 0.2057794, -0.0000000, -0.0000000, -0.0000000, -0.2590978, -0.0141497, 2.0271597, 267 | -0.3699318, 0.0128547, 0.5556172, -0.4714990, -0.0000000, -0.0000000, 0.1603251, -0.4157299, -0.1667608, 268 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, 269 | -0.0000000, -0.0000000, -0.0000000, 2.7811005, -1.4192915, -0.0932141, -1.0294687, -2.7323222, 0.4542309, 270 | -0.4048152, -0.0000000, -0.0000000, 0.0568960, 0.0525994, 0.0493068, -0.0000000, -0.0000000, -0.0000000, 271 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000 272 | ]) 273 | xyz1 = fkl(expmap_pred, parent, offset, rotInd, expmapInd) 274 | xyz2 = fkl(expmap_gt, parent, offset, rotInd, expmapInd) 275 | 276 | exp1 = Variable(torch.from_numpy(np.vstack((expmap_pred, expmap_gt))).float()).cuda() 277 | xyz = fkl_torch(exp1, parent, offset, rotInd, expmapInd) 278 | xyz = xyz.cpu().data.numpy() 279 | print(xyz) 280 | 281 | 282 | if __name__ == '__main__': 283 | main() 284 | --------------------------------------------------------------------------------