├── MotionMixer.png
├── README.md
├── amass
├── dataloader_amass.py
├── mlp_mixer.py
├── test_mixer_amass.py
└── train_mixer_amass.py
├── checkpoints
└── .gitignore
├── h36m
├── datasets
│ ├── dataset_h36m.py
│ └── dataset_h36m_ang.py
├── h36_3d_viz.py
├── mlp_mixer.py
├── test_mixer_h36m.py
├── train_mixer_h36m.py
└── utils
│ ├── data_utils.py
│ ├── forward_kinematics.py
│ └── utils_mixer.py
├── requirements.txt
└── utils
├── ang2joint.py
├── body_models
└── smpl_skeleton.npz
├── data_utils.py
└── forward_kinematics.py
/MotionMixer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MotionMLP/MotionMixer/91327c3c3a455d398bd097fa300385bafa80a835/MotionMixer.png
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
MotionMixer: MLP-based 3D Human Body Pose Forecasting
5 |
6 |
7 | Official PyTorch Implementation of the paper: MotionMixer: MLP-based 3D Human Body Pose Forecasting.
8 |
9 | Arij Bouazizi, Adrian Holzbock, Ulrich Kressel, Klaus Dietmayer and Vasileios Belagiannis
10 |
11 |
12 | [[Proceedings](https://www.ijcai.org/proceedings/2022/0111.pdf)] [[Papers with Code](https://paperswithcode.com/paper/motionmixer-mlp-based-3d-human-body-pose)] [[Arxiv](https://arxiv.org/abs/2207.00499)]
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |

21 |
22 |
23 | ## Installation
24 |
25 | To setup the environment:
26 | ```sh
27 | cd MotionMixer
28 | conda create -n MotionMixer python=3.8.8
29 | conda activate MotionMixer
30 | pip install -r requirements.txt
31 | ```
32 |
33 | ## Data
34 |
35 | Due to licensing it is not possible to provide any data. Please refer to [STSGCN](https://github.com/FraLuca/STSGCN) for the preparation of the dataset files.
36 |
37 | ## Training
38 |
39 | To train the model on h36m or amass, you can use the following commands:
40 | ```
41 | python h36m/train_mixer_h36m.py --input_n 10 --output_n 25 --skip_rate 1
42 | ```
43 | ```
44 | python amass/train_mixer_amass.py --input_n 10 --output_n 25 --skip_rate 5
45 | ```
46 |
47 | ## Evaluation
48 |
49 | To test the pretrained models, you can use the following commands:
50 | ```
51 | python h36m/test_mixer_h36m.py --input_n 10 --output_n 25 --skip_rate 1
52 | ```
53 | ```
54 | python amass/test_mixer_amass.py --input_n 10 --output_n 25 --skip_rate 5
55 | ```
56 |
57 | ## Models
58 |
59 | We release the pretrained models for academic purpose. You can download them from [Google Drive](https://drive.google.com/drive/folders/1SrZpoe__Q3YXdk_TrtcxeJzRQiKAWoT5). Unzip the .zip file in the ```/checkpoints``` directory.
60 |
61 | ## Citation
62 |
63 | If you find this code useful for your research, please consider citing the following paper:
64 |
65 | ```latex
66 | @inproceedings{ijcai2022p111,
67 | title = {MotionMixer: MLP-based 3D Human Body Pose Forecasting},
68 | author = {Bouazizi, Arij and Holzbock, Adrian and Kressel, Ulrich and Dietmayer, Klaus and Belagiannis, Vasileios},
69 | booktitle = {Proceedings of the Thirty-First International Joint Conference on
70 | Artificial Intelligence, {IJCAI-22}},
71 | publisher = {International Joint Conferences on Artificial Intelligence Organization},
72 | pages = {791--798},
73 | year = {2022},
74 | month = {7},
75 | }
76 |
77 | ```
78 |
79 | ## Acknowledgments
80 |
81 | Some of our code was adapted from [HisRepsItself](https://github.com/wei-mao-2019/HisRepItself) and [STSGCN](https://github.com/FraLuca/STSGCN). We thank the authors for making their code public.
82 |
83 | ## License
84 |
85 |
86 | 
This work is licensed under Creative Commons Attribution-NonCommercial 4.0 International License.
89 |
--------------------------------------------------------------------------------
/amass/dataloader_amass.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.data import Dataset,DataLoader
3 | import numpy as np
4 | #from h5py import File
5 | #import scipy.io as sio
6 | from matplotlib import pyplot as plt
7 | import torch
8 | import os
9 | from utils.ang2joint import *
10 | import networkx as nx
11 |
12 | '''
13 | adapted from
14 | https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/amass3d.py
15 | '''
16 |
17 |
18 | class Datasets(Dataset):
19 |
20 | def __init__(self,data_dir,input_n,output_n,skip_rate, actions=None, split=0):
21 |
22 | """
23 | :param path_to_data:
24 | :param actions:
25 | :param input_n:
26 | :param output_n:
27 | :param dct_used:
28 | :param split: 0 train, 1 testing, 2 validation
29 | :param sample_rate:
30 | """
31 | self.path_to_data = '/media/bouazia/bouazia_3/AMASS_dataset/' #os.path.join(data_dir,'AMASS') # "D:\data\AMASS\\"
32 | self.split = split
33 | self.in_n = input_n
34 | self.out_n = output_n
35 | # self.sample_rate = opt.sample_rate
36 | self.p3d = []
37 | self.keys = []
38 | self.data_idx = []
39 | self.joint_used = np.arange(4, 22) # start from 4 for 17 joints, removing the non moving ones
40 | seq_len = self.in_n + self.out_n
41 |
42 | amass_splits = [
43 | ['CMU', 'MPI_Limits', 'TotalCapture', 'Eyes_Japan_Dataset', 'KIT', 'EKUT', 'TCD_handMocap', 'ACCAD'],
44 | ['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'],
45 | ['BioMotionLab_NTroje'],
46 | ]
47 |
48 |
49 | # amass_splits = [
50 | # ['CMU'],
51 | # #['HumanEva', 'MPI_HDM05', 'SFU', 'MPI_mosh'],
52 | # #['BioMotionLab_NTroje'],
53 | # ]
54 |
55 | # amass_splits = [['BioMotionLab_NTroje'], ['HumanEva'], ['SSM_synced']]
56 | # amass_splits = [['HumanEva'], ['HumanEva'], ['HumanEva']]
57 | # amass_splits[0] = list(
58 | # set(amass_splits[0]).difference(set(amass_splits[1] + amass_splits[2])))
59 |
60 | # from human_body_prior.body_model.body_model import BodyModel
61 | # from smplx import lbs
62 | # root_path = os.path.dirname(__file__)
63 | # bm_path = root_path[:-6] + '/body_models/smplh/neutral/model.npz'
64 | # bm = BodyModel(bm_path=bm_path, num_betas=16, batch_size=1, model_type='smplh')
65 | # beta_mean = np.array([0.41771687, 0.25984767, 0.20500051, 0.13503872, 0.25965645, -2.10198147, -0.11915666,
66 | # -0.5498772, 0.30885323, 1.4813145, -0.60987528, 1.42565269, 2.45862726, 0.23001716,
67 | # -0.64180912, 0.30231911])
68 | # beta_mean = torch.from_numpy(beta_mean).unsqueeze(0).float()
69 | # # Add shape contribution
70 | # v_shaped = bm.v_template + lbs.blend_shapes(beta_mean, bm.shapedirs)
71 | # # Get the joints
72 | # # NxJx3 array
73 | # p3d0 = lbs.vertices2joints(bm.J_regressor, v_shaped) # [1,52,3]
74 | # p3d0 = (p3d0 - p3d0[:, 0:1, :]).float().cuda().cpu().data.numpy()
75 | # parents = bm.kintree_table.data.numpy()[0, :]
76 | # np.savez_compressed('smpl_skeleton.npz', p3d0=p3d0, parents=parents)
77 |
78 | # load mean skeleton
79 | skel = np.load('/lhome/bouazia/pose_prediction/utils/body_models/smpl_skeleton.npz')
80 | p3d0 = torch.from_numpy(skel['p3d0']).float().cuda()
81 | parents = skel['parents']
82 | parent = {}
83 | for i in range(len(parents)):
84 | parent[i] = parents[i]
85 | n = 0
86 | for ds in amass_splits[split]:
87 | if not os.path.isdir(self.path_to_data + ds):
88 | print(ds)
89 | continue
90 | print('>>> loading {}'.format(ds))
91 | for sub in os.listdir(self.path_to_data + ds):
92 | #print ("working in ",self.path_to_data + ds)
93 |
94 | if not os.path.isdir(self.path_to_data + ds + '/' + sub):
95 | continue
96 | for act in os.listdir(self.path_to_data + ds + '/' + sub):
97 | #print ("poses path",self.path_to_data + ds + '/' + sub + '/' + act)
98 |
99 | #print (act)
100 | if not act.endswith('.npz'):
101 |
102 | continue
103 | # if not ('walk' in act or 'jog' in act or 'run' in act or 'treadmill' in act):
104 | # continue
105 |
106 | pose_all = np.load(self.path_to_data + ds + '/' + sub + '/' + act)
107 | try:
108 | poses = pose_all['poses']
109 | except:
110 | print('no poses at {}_{}_{}'.format(ds, sub, act))
111 | continue
112 | frame_rate = pose_all['mocap_framerate']
113 | # gender = pose_all['gender']
114 | # dmpls = pose_all['dmpls']
115 | # betas = pose_all['betas']
116 | # trans = pose_all['trans']
117 | fn = poses.shape[0]
118 | sample_rate = int(frame_rate // 25)
119 | fidxs = range(0, fn, sample_rate)
120 | fn = len(fidxs)
121 | poses = poses[fidxs]
122 | poses = torch.from_numpy(poses).float().cuda()
123 | poses = poses.reshape([fn, -1, 3])
124 | # remove global rotation
125 | poses[:, 0] = 0
126 | p3d0_tmp = p3d0.repeat([fn, 1, 1])
127 | p3d = ang2joint(p3d0_tmp, poses, parent)
128 | # self.p3d[(ds, sub, act)] = p3d.cpu().data.numpy()
129 | self.p3d.append(p3d.cpu().data.numpy())
130 | if split == 2:
131 | valid_frames = np.arange(0, fn - seq_len + 1, skip_rate)
132 | else:
133 | valid_frames = np.arange(0, fn - seq_len + 1, skip_rate)
134 |
135 | # tmp_data_idx_1 = [(ds, sub, act)] * len(valid_frames)
136 | self.keys.append((ds, sub, act))
137 | tmp_data_idx_1 = [n] * len(valid_frames)
138 | tmp_data_idx_2 = list(valid_frames)
139 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2))
140 | n += 1
141 |
142 | def __len__(self):
143 | return np.shape(self.data_idx)[0]
144 |
145 | def __getitem__(self, item):
146 | key, start_frame = self.data_idx[item]
147 | fs = np.arange(start_frame, start_frame + self.in_n + self.out_n)
148 | return self.p3d[key][fs] # , key
149 |
150 |
151 | # In[12]:
152 |
153 |
154 | def normalize_A(A): # given an adj.matrix, normalize it by multiplying left and right with the degree matrix, in the -1/2 power
155 |
156 | A=A+np.eye(A.shape[0])
157 |
158 | D=np.sum(A,axis=0)
159 |
160 |
161 | D=np.diag(D.A1)
162 |
163 |
164 | D_inv = D**-0.5
165 | D_inv[D_inv==np.infty]=0
166 |
167 | return D_inv*A*D_inv
168 |
169 |
170 | # In[ ]:
171 |
172 |
173 | def spatio_temporal_graph(joints_to_consider,temporal_kernel_size,spatial_adjacency_matrix): # given a normalized spatial adj.matrix,creates a spatio-temporal adj.matrix
174 |
175 |
176 | number_of_joints=joints_to_consider
177 |
178 | spatio_temporal_adj=np.zeros((temporal_kernel_size,number_of_joints,number_of_joints))
179 | for t in range(temporal_kernel_size):
180 | for i in range(number_of_joints):
181 | spatio_temporal_adj[t,i,i]=1 # create edge between same body joint,for t consecutive frames
182 | for j in range(number_of_joints):
183 | if spatial_adjacency_matrix[i,j]!=0: # if the body joints are connected
184 | spatio_temporal_adj[t,i,j]=spatial_adjacency_matrix[i,j]
185 | return spatio_temporal_adj
186 |
187 |
188 | # In[20]:
189 |
190 |
191 | def get_adj_AMASS(joints_to_consider,temporal_kernel_size): # returns adj.matrix to be fed to the network
192 | if joints_to_consider==22:
193 | edgelist = [
194 | (0, 1), (0, 2), #(0, 3),
195 | (1, 4), (5, 2), #(3, 6),
196 | (7, 4), (8, 5), #(6, 9),
197 | (7, 10), (8, 11), #(9, 12),
198 | #(12, 13), (12, 14),
199 | (12, 15),
200 | #(13, 16), (12, 16), (14, 17), (12, 17),
201 | (12, 16), (12, 17),
202 | (16, 18), (19, 17), (20, 18), (21, 19),
203 | #(22, 20), #(23, 21),#wrists
204 | (1, 16), (2, 17)]
205 |
206 | # create a graph
207 | G=nx.Graph()
208 | G.add_edges_from(edgelist)
209 | # create adjacency matrix
210 | A = nx.adjacency_matrix(G,nodelist=list(range(0,joints_to_consider))).todense()
211 | #normalize adjacency matrix
212 | A=normalize_A(A)
213 | return torch.Tensor(spatio_temporal_graph(joints_to_consider,temporal_kernel_size,A))
214 |
215 |
216 | # In[23]:
217 |
218 |
219 | def mpjpe_error(batch_pred,batch_gt):
220 | #assert batch_pred.requires_grad==True
221 | #assert batch_gt.requires_grad==False
222 |
223 |
224 | batch_pred=batch_pred.contiguous().view(-1,3)
225 | batch_gt=batch_gt.contiguous().view(-1,3)
226 |
227 | return torch.mean(torch.norm(batch_gt-batch_pred,2,1))
228 |
229 |
--------------------------------------------------------------------------------
/amass/mlp_mixer.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 | import torch.nn.functional as F
4 |
5 |
6 | class SELayer(nn.Module):
7 | def __init__(self, c, r=4, use_max_pooling=False):
8 | super().__init__()
9 | self.squeeze = nn.AdaptiveAvgPool1d(1) if not use_max_pooling else nn.AdaptiveMaxPool1d(1)
10 | self.excitation = nn.Sequential(
11 | nn.Linear(c, c // r, bias=False),
12 | nn.ReLU(inplace=True),
13 | nn.Linear(c // r, c, bias=False),
14 | nn.Sigmoid()
15 | )
16 |
17 |
18 | def forward(self, x):
19 | bs, s, h = x.shape
20 | y = self.squeeze(x).view(bs, s)
21 | y = self.excitation(y).view(bs, s, 1)
22 | return x * y.expand_as(x)
23 |
24 |
25 |
26 |
27 | def mish(x):
28 | return (x*torch.tanh(F.softplus(x)))
29 |
30 |
31 |
32 |
33 | class MlpBlock(nn.Module):
34 | def __init__(self, mlp_hidden_dim, mlp_input_dim, mlp_bn_dim, activation='gelu', regularization=0, initialization='none'):
35 | super().__init__()
36 | self.mlp_hidden_dim = mlp_hidden_dim
37 | self.mlp_input_dim = mlp_input_dim
38 | self.mlp_bn_dim = mlp_bn_dim
39 | #self.fc1 = nn.Linear(self.mlp_input_dim, self.mlp_input_dim)
40 | self.fc1 = nn.Linear(self.mlp_input_dim, self.mlp_hidden_dim)
41 | self.fc2 = nn.Linear(self.mlp_hidden_dim, self.mlp_input_dim)
42 | if regularization > 0.0:
43 | self.reg1 = nn.Dropout(regularization)
44 | self.reg2 = nn.Dropout(regularization)
45 | elif regularization == -1.0:
46 | self.reg1 = nn.BatchNorm1d(self.mlp_bn_dim)
47 | self.reg2 = nn.BatchNorm1d(self.mlp_bn_dim)
48 | else:
49 | self.reg1 = None
50 | self.reg2 = None
51 |
52 | if activation == 'gelu':
53 | self.act1 = nn.GELU()
54 | elif activation == 'mish':
55 | self.act1 = mish #nn.Mish()
56 | else:
57 | raise ValueError('Unknown activation function type: %s'%activation)
58 |
59 |
60 |
61 |
62 | def forward(self, x):
63 | x = self.fc1(x)
64 | x = self.act1(x)
65 | if self.reg1 is not None:
66 | x = self.reg1(x)
67 | x = self.fc2(x)
68 | if self.reg2 is not None:
69 | x = self.reg2(x)
70 |
71 | return x
72 |
73 |
74 |
75 | class MixerBlock(nn.Module):
76 | def __init__(self, tokens_mlp_dim, channels_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0,
77 | initialization='none', r_se=4, use_max_pooling=False, use_se=True):
78 | super().__init__()
79 | self.tokens_mlp_dim = tokens_mlp_dim
80 | self.channels_mlp_dim = channels_mlp_dim
81 | self.seq_len = seq_len
82 | self.hidden_dim = hidden_dim # out channels of the conv
83 | self.mlp_block_token_mixing = MlpBlock(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, activation=activation, regularization=regularization, initialization=initialization)
84 | self.mlp_block_channel_mixing = MlpBlock(self.channels_mlp_dim, self.hidden_dim, self.seq_len, activation=activation, regularization=regularization, initialization=initialization)
85 | self.use_se = use_se
86 | if self.use_se:
87 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling)
88 |
89 | self.LN1 = nn.LayerNorm(self.hidden_dim)
90 | self.LN2 = nn.LayerNorm(self.hidden_dim)
91 |
92 |
93 |
94 | def forward(self, x):
95 | # shape x [256, 8, 512] [bs, patches/time_steps, channels
96 | y = self.LN1(x)
97 |
98 | y = y.transpose(1, 2)
99 | y = self.mlp_block_token_mixing(y)
100 | y = y.transpose(1, 2)
101 |
102 | if self.use_se:
103 | y = self.se(y)
104 | x = x + y
105 |
106 | y = self.LN2(x)
107 | y = self.mlp_block_channel_mixing(y)
108 |
109 | if self.use_se:
110 | y = self.se(y)
111 |
112 | return x + y
113 |
114 |
115 |
116 | class MixerBlock_Channel(nn.Module):
117 | def __init__(self, channels_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0,
118 | initialization='none', r_se=4, use_max_pooling=False, use_se=True):
119 | super().__init__()
120 | self.channels_mlp_dim = channels_mlp_dim
121 | self.seq_len = seq_len
122 | self.hidden_dim = hidden_dim # out channels of the conv
123 | self.mlp_block_channel_mixing = MlpBlock(self.channels_mlp_dim, self.hidden_dim, self.seq_len, activation=activation, regularization=regularization, initialization=initialization)
124 | self.use_se = use_se
125 | if self.use_se:
126 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling)
127 |
128 |
129 | self.LN2 = nn.LayerNorm(self.hidden_dim)
130 |
131 | #self.act1 = nn.GELU()
132 |
133 | def forward(self, x):
134 | # shape x [256, 8, 512] [bs, patches/time_steps, channels]
135 | y = x
136 |
137 | if self.use_se:
138 | y = self.se(y)
139 | x = x + y
140 | y = self.LN2(x)
141 | y = self.mlp_block_channel_mixing(y)
142 | if self.use_se:
143 | y = self.se(y)
144 |
145 | return x + y
146 |
147 |
148 |
149 | class MixerBlock_Token(nn.Module):
150 | def __init__(self, tokens_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0,
151 | initialization='none', r_se=4, use_max_pooling=False, use_se=True):
152 | super().__init__()
153 | self.tokens_mlp_dim = tokens_mlp_dim
154 |
155 | self.seq_len = seq_len
156 | self.hidden_dim = hidden_dim # out channels of the conv
157 | self.mlp_block_token_mixing = MlpBlock(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, activation=activation, regularization=regularization, initialization=initialization)
158 |
159 | self.use_se = use_se
160 |
161 | if self.use_se:
162 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling)
163 |
164 | self.LN1 = nn.LayerNorm(self.hidden_dim)
165 |
166 |
167 | def forward(self, x):
168 | # shape x [256, 8, 512] [bs, patches/time_steps, channels]
169 | y = self.LN1(x)
170 | y = y.transpose(1, 2)
171 | y = self.mlp_block_token_mixing(y)
172 | y = y.transpose(1, 2)
173 |
174 | if self.use_se:
175 | y = self.se(y)
176 | x = x + y
177 |
178 | return x + y
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 | class MlpMixer(nn.Module):
198 | def __init__(self, num_classes, num_blocks, hidden_dim, tokens_mlp_dim, channels_mlp_dim, seq_len, pred_len, activation='gelu', mlp_block_type='normal',
199 | regularization=0, input_size=51, initialization='none', r_se=4, use_max_pooling=False, use_se=False):
200 | super().__init__()
201 | self.num_classes = num_classes
202 | self.num_blocks = num_blocks
203 | self.hidden_dim = hidden_dim
204 | self.seq_len = seq_len
205 | #self.pred_len = 25
206 | self.tokens_mlp_dim = tokens_mlp_dim
207 | self.channels_mlp_dim = channels_mlp_dim
208 | self.input_size = input_size #varyies with the number of joints
209 | self.conv = nn.Conv1d(1, self.hidden_dim, (1, self.input_size), stride=1)
210 | self.activation = activation
211 | self.Mixer_Block = nn.ModuleList(MixerBlock(self.tokens_mlp_dim, self.channels_mlp_dim, self.seq_len, self.hidden_dim, activation=self.activation,
212 | regularization=regularization, initialization=initialization, r_se=r_se, use_max_pooling=use_max_pooling, use_se=use_se) for _ in range(num_blocks))
213 | self.LN = nn.LayerNorm(self.hidden_dim)
214 | self.fc_out = nn.Linear(self.hidden_dim, self.num_classes)
215 | self.pred_len = pred_len
216 | self.conv_out = nn.Conv1d(self.seq_len, self.pred_len, 1, stride=1)
217 |
218 |
219 |
220 | def forward(self, x): #, padded
221 |
222 | x = x.unsqueeze(1)
223 | y = self.conv(x)
224 |
225 | y = y.squeeze().transpose(1, 2)
226 | # [256, 8, 512] [bs, patches/time_steps, channels]
227 | for mb in self.Mixer_Block:
228 | y = mb(y)
229 |
230 | y = self.LN(y)
231 |
232 |
233 | # print (self.tcn(y.unsqueeze(0)).shape)
234 |
235 | out = self.fc_out(self.conv_out(y))
236 | #out = self.fc_out(self.reg(self.conv_out(y)))
237 |
238 | #out = self.fc_out(y)
239 |
240 | return out
241 |
242 |
243 |
244 |
245 |
246 |
247 |
--------------------------------------------------------------------------------
/amass/test_mixer_amass.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pickle
4 | import torch
5 | import torch.nn as nn
6 | from torch.utils.data import DataLoader
7 | import torch.optim as optim
8 | import torch.autograd
9 | import matplotlib.pyplot as plt
10 | from utils.ang2joint import *
11 | from dataloader_amass import *
12 | import numpy as np
13 | import argparse
14 | import os
15 | from mlp_mixer import MlpMixer
16 |
17 |
18 |
19 |
20 | def test_mixer(model, args):
21 |
22 | device = args.dev
23 | model.eval()
24 | accum_loss = 0
25 | n_batches = 0 # number of batches for all the sequences
26 |
27 | n = 0
28 |
29 |
30 | Dataset = Datasets(args.data_dir,args.input_n,args.output_n,args.skip_rate,split=2)
31 | loader_test = DataLoader( Dataset,batch_size=args.batch_size,
32 | shuffle =False,num_workers=0)
33 |
34 |
35 | joint_used=np.arange(4,22)
36 | full_joint_used=np.arange(0,22) # needed for visualization
37 | with torch.no_grad():
38 | for cnt,batch in enumerate(loader_test):
39 | batch = batch.float().to(device)
40 | batch_dim=batch.shape[0]
41 | n+=batch_dim
42 |
43 | sequences_train=batch[:,0:args.input_n,joint_used,:].view(-1,args.input_n,args.pose_dim)
44 |
45 | sequences_predict_gt=batch[:,args.input_n:args.input_n+args.output_n,full_joint_used,:]#.view(-1,args.output_n,args.pose_dim)
46 |
47 | sequences_predict=model(sequences_train).view(-1,args.output_n,18,3)#.permute(0,1,3,2)
48 |
49 |
50 | all_joints_seq=sequences_predict_gt.clone()
51 |
52 | all_joints_seq[:,:,joint_used,:]=sequences_predict
53 |
54 | loss=mpjpe_error(all_joints_seq,sequences_predict_gt)*1000 # loss in milimeters
55 | accum_loss+=loss*batch_dim
56 | print('overall average loss in mm is: '+str(accum_loss/n))
57 |
58 |
59 |
60 | return accum_loss/n_batches
61 |
62 |
63 |
64 | if __name__ == '__main__':
65 | parser = argparse.ArgumentParser(add_help=False) # Parameters for mpjpe
66 | parser.add_argument('--data_dir', type=str, default='../data_amass/', help='path to the unziped dataset directories(H36m/AMASS/3DPW)')
67 | parser.add_argument('--input_n', type=int, default=10, help="number of model's input frames")
68 | parser.add_argument('--output_n', type=int, default=25, help="number of model's output frames")
69 | parser.add_argument('--skip_rate', type=int, default=5, choices=[1, 5], help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW')
70 | parser.add_argument('--num_worker', default=4, type=int, help='number of workers in the dataloader')
71 | parser.add_argument('--root', default='./runs', type=str, help='root path for the logging')
72 |
73 | parser.add_argument('--activation', default='gelu', type=str, required=False)
74 | parser.add_argument('--r_se', default=8, type=int, required=False)
75 |
76 | parser.add_argument('--n_epochs', default=50, type=int, required=False)
77 | parser.add_argument('--batch_size', default=50, type=int, required=False)
78 | parser.add_argument('--loader_shuffle', default=True, type=bool, required=False)
79 | parser.add_argument('--pin_memory', default=False, type=bool, required=False)
80 | parser.add_argument('--loader_workers', default=4, type=int, required=False)
81 | parser.add_argument('--load_checkpoint', default=False, type=bool, required=False)
82 | parser.add_argument('--dev', default='cuda:0', type=str, required=False)
83 | parser.add_argument('--initialization', type=str, default='none', help='none, glorot_normal, glorot_uniform, hee_normal, hee_uniform')
84 | parser.add_argument('--use_scheduler', default=True, type=bool, required=False)
85 | parser.add_argument('--milestones', type=list, default=[15, 25, 35, 40], help='the epochs after which the learning rate is adjusted by gamma')
86 | parser.add_argument('--gamma', type=float, default=0.1, help='gamma correction to the learning rate, after reaching the milestone epochs')
87 | parser.add_argument('--clip_grad', type=float, default=None, help='select max norm to clip gradients')
88 | parser.add_argument('--model_path', type=str, default='./checkpoints/amass_3d_25frames_ckpt', help='directory with the models checkpoints ')
89 | parser.add_argument('--batch_size_test', type=int, default=256, help='batch size for the test set')
90 | parser.add_argument('--visualize_from', type=str, default='test', choices=['train', 'val', 'test'], help='choose data split to visualize from(train-val-test)')
91 |
92 | args = parser.parse_args()
93 |
94 | parser_mpjpe = argparse.ArgumentParser(parents=[parser]) # Parameters for mpjpe
95 | parser_mpjpe.add_argument('--hidden_dim', default=128, type=int, required=False)
96 | parser_mpjpe.add_argument('--num_blocks', default=10, type=int, required=False)
97 | parser_mpjpe.add_argument('--tokens_mlp_dim', default=20, type=int, required=False)
98 | parser_mpjpe.add_argument('--channels_mlp_dim', default=128, type=int, required=False)
99 | parser_mpjpe.add_argument('--regularization', default=0.1, type=float, required=False)
100 | parser_mpjpe.add_argument('--pose_dim', default=54, type=int, required=False)
101 | parser_mpjpe.add_argument('--delta_x', type=bool, default=True, help='predicting the difference between 2 frames')
102 | parser_mpjpe.add_argument('--lr', default=0.001, type=float, required=False)
103 | args = parser_mpjpe.parse_args()
104 |
105 |
106 |
107 | print(args)
108 |
109 | model = MlpMixer(num_classes=args.pose_dim, num_blocks=args.num_blocks,
110 | hidden_dim=args.hidden_dim, tokens_mlp_dim=args.tokens_mlp_dim,
111 | channels_mlp_dim=args.channels_mlp_dim, seq_len=args.input_n,
112 | pred_len=args.output_n, activation=args.activation,
113 | mlp_block_type='normal', regularization=args.regularization,
114 | input_size=args.pose_dim, initialization='none', r_se=args.r_se,
115 | use_max_pooling=False, use_se=True)
116 |
117 | model = model.to(args.dev)
118 |
119 |
120 | model.load_state_dict(torch.load(args.model_path))
121 |
122 |
123 | model.eval ()
124 |
125 |
126 | test_mixer(model, args)
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
--------------------------------------------------------------------------------
/amass/train_mixer_amass.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pickle
4 | import torch
5 | import torch.nn as nn
6 | from torch.utils.data import DataLoader
7 | import torch.optim as optim
8 | import torch.autograd
9 | import matplotlib.pyplot as plt
10 | from utils.ang2joint import *
11 | from dataloader_amass import *
12 | import numpy as np
13 | import argparse
14 | import os
15 | from mlp_mixer import MlpMixer
16 | from tqdm import tqdm
17 | from torch.utils.tensorboard import SummaryWriter
18 |
19 |
20 |
21 | def get_log_dir(out_dir):
22 | dirs = [x[0] for x in os.walk(out_dir)]
23 | if len(dirs ) < 2:
24 | log_dir = os.path.join(out_dir, 'exp0')
25 | os.mkdir(log_dir)
26 | else:
27 | log_dir = os.path.join(out_dir, 'exp%i'%(len(dirs)-1))
28 | os.mkdir(log_dir)
29 |
30 | return log_dir
31 |
32 |
33 | #%%
34 | def train(model, model_name, args):
35 |
36 | joint_used=np.arange(4,22)
37 |
38 | log_dir = get_log_dir(args.root)
39 | tb_writer = SummaryWriter(log_dir=log_dir)
40 | print('Save data of the run in: %s'%log_dir)
41 |
42 | device = args.dev
43 |
44 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-05)
45 |
46 | if args.use_scheduler:
47 | scheduler = optim.lr_scheduler.MultiStepLR(
48 | optimizer, milestones=args.milestones, gamma=args.gamma)
49 |
50 | train_loss, val_loss, test_loss = [], [], []
51 |
52 |
53 | dataset = Datasets(args.data_dir, args.input_n,
54 | args.output_n, args.skip_rate, split=0)
55 |
56 | vald_dataset = Datasets(args.data_dir, args.input_n,
57 | args.output_n, args.skip_rate, split=1)
58 |
59 |
60 |
61 |
62 | print('>>> Training dataset length: {:d}'.format(dataset.__len__()))
63 | print('>>> Validation dataset length: {:d}'.format(vald_dataset.__len__()))
64 |
65 | data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True,
66 | num_workers=args.num_worker, pin_memory=True)
67 | vald_loader = DataLoader(vald_dataset, batch_size=args.batch_size,
68 | shuffle=True, num_workers=args.num_worker, pin_memory=True)
69 |
70 |
71 | for epoch in range(args.n_epochs):
72 | print('Run epoch: %i'%epoch)
73 | running_loss = 0
74 | n = 0
75 | model.train()
76 | for cnt, batch in tqdm(enumerate(data_loader), total=len(data_loader)):
77 | batch = batch.to(device)
78 | batch = batch[:, :, joint_used]
79 | batch_dim = batch.shape[0]
80 | n += batch_dim
81 |
82 |
83 | sequences_train = batch[:,0:args.input_n,:,:].reshape(
84 | -1, args.input_n, args.pose_dim)
85 | sequences_gt = batch[:,args.input_n:args.input_n+args.output_n,:,:].reshape(-1, args.output_n, args.pose_dim)
86 |
87 |
88 | optimizer.zero_grad()
89 |
90 | sequences_predict=model(sequences_train)
91 |
92 | loss=mpjpe_error(sequences_predict,sequences_gt)*1000
93 |
94 | if cnt % 200 == 0:
95 | print('[%d, %5d] training loss: %.3f' %(epoch + 1, cnt + 1, loss.item()))
96 |
97 | loss.backward()
98 |
99 | if args.clip_grad is not None:
100 | torch.nn.utils.clip_grad_norm_(
101 | model.parameters(), args.clip_grad)
102 |
103 | optimizer.step()
104 |
105 | running_loss += loss*batch_dim
106 |
107 | train_loss.append(running_loss.detach().cpu()/n)
108 | model.eval()
109 | with torch.no_grad():
110 | running_loss = 0
111 | n = 0
112 | for cnt, batch in enumerate(vald_loader):
113 | batch = batch.to(device)
114 | batch = batch[:, :, joint_used]
115 | batch_dim = batch.shape[0]
116 | n += batch_dim
117 |
118 |
119 | sequences_train = batch[:,0:args.input_n,:,:].reshape(
120 | -1, args.input_n, args.pose_dim)
121 | sequences_gt = batch[:,args.input_n:args.input_n+args.output_n,:,:].reshape(-1, args.output_n, args.pose_dim)
122 |
123 |
124 | sequences_predict=model(sequences_train)
125 |
126 | loss=mpjpe_error(sequences_predict,sequences_gt)*1000
127 |
128 | if cnt % 200 == 0:
129 | print('[%d, %5d] validation loss: %.3f' %(epoch + 1, cnt + 1, loss.item()))
130 |
131 |
132 | running_loss += loss*batch_dim
133 | val_loss.append(running_loss.detach().cpu()/n)
134 | if args.use_scheduler:
135 | scheduler.step()
136 |
137 |
138 | test_loss.append(test_mpjpe(model, args))
139 |
140 |
141 | tb_writer.add_scalar('loss/train', train_loss[-1].item(), epoch)
142 | tb_writer.add_scalar('loss/val', val_loss[-1].item(), epoch)
143 |
144 | torch.save(model.state_dict(), os.path.join(log_dir, 'model.pt'))
145 | # TODO write something to save the best model
146 | if (epoch+1)%1==0:
147 | print('----saving model-----')
148 | torch.save(model.state_dict(),os.path.join(args.model_path,model_name))
149 |
150 |
151 |
152 | #%%
153 | def test_mpjpe(model, args):
154 |
155 | device = args.dev
156 | model.eval()
157 | accum_loss = 0
158 | n_batches = 0 # number of batches for all the sequences
159 |
160 |
161 | running_loss = 0
162 | n = 0
163 |
164 |
165 | Dataset = Datasets(args.data_dir,args.input_n,args.output_n,args.skip_rate,split=2)
166 | loader_test = DataLoader( Dataset,
167 | batch_size=args.batch_size,
168 | shuffle =False,
169 | num_workers=0)
170 |
171 |
172 |
173 |
174 | joint_used=np.arange(4,22)
175 | full_joint_used=np.arange(0,22) # needed for visualization
176 | with torch.no_grad():
177 | for cnt,batch in enumerate(loader_test):
178 | batch = batch.float().to(device)
179 | batch_dim=batch.shape[0]
180 | n+=batch_dim
181 |
182 | sequences_train=batch[:,0:args.input_n,joint_used,:].view(-1,args.input_n,args.pose_dim)
183 |
184 | sequences_predict_gt=batch[:,args.input_n:args.input_n+args.output_n,full_joint_used,:]#.view(-1,args.output_n,args.pose_dim)
185 |
186 | sequences_predict=model(sequences_train).view(-1,args.output_n,18,3)#.permute(0,1,3,2)
187 |
188 |
189 | all_joints_seq=sequences_predict_gt.clone()
190 |
191 | all_joints_seq[:,:,joint_used,:]=sequences_predict
192 |
193 | loss=mpjpe_error(all_joints_seq,sequences_predict_gt)*1000 # loss in milimeters
194 | accum_loss+=loss*batch_dim
195 | print('overall average loss in mm is: '+str(accum_loss/n))
196 |
197 |
198 |
199 | return accum_loss/n_batches
200 |
201 | #%%
202 |
203 | if __name__ == '__main__':
204 | parser = argparse.ArgumentParser(add_help=False) # Parameters for mpjpe
205 | parser.add_argument('--data_dir', type=str, default='../data_amass/', help='path to the unziped dataset directories(H36m/AMASS/3DPW)')
206 | parser.add_argument('--input_n', type=int, default=10, help="number of model's input frames")
207 | parser.add_argument('--output_n', type=int, default=25, help="number of model's output frames")
208 | parser.add_argument('--skip_rate', type=int, default=1, choices=[1, 5], help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW')
209 | parser.add_argument('--num_worker', default=4, type=int, help='number of workers in the dataloader')
210 | parser.add_argument('--root', default='./runs', type=str, help='root path for the logging') #'./runs'
211 |
212 | parser.add_argument('--activation', default='gelu', type=str, required=False)
213 | parser.add_argument('--r_se', default=8, type=int, required=False)
214 |
215 | parser.add_argument('--n_epochs', default=50, type=int, required=False)
216 | parser.add_argument('--batch_size', default=200, type=int, required=False) # 100 50 in all original 50
217 | parser.add_argument('--loader_shuffle', default=True, type=bool, required=False)
218 | parser.add_argument('--pin_memory', default=False, type=bool, required=False)
219 | parser.add_argument('--loader_workers', default=4, type=int, required=False)
220 | parser.add_argument('--load_checkpoint', default=False, type=bool, required=False)
221 | parser.add_argument('--dev', default='cuda:0', type=str, required=False)
222 | parser.add_argument('--initialization', type=str, default='none', help='none, glorot_normal, glorot_uniform, hee_normal, hee_uniform')
223 | parser.add_argument('--use_scheduler', default=True, type=bool, required=False)
224 | parser.add_argument('--milestones', type=list, default=[15, 25, 35, 40], help='the epochs after which the learning rate is adjusted by gamma')
225 | parser.add_argument('--gamma', type=float, default=0.1, help='gamma correction to the learning rate, after reaching the milestone epochs')
226 | parser.add_argument('--clip_grad', type=float, default=None, help='select max norm to clip gradients')
227 | parser.add_argument('--model_path', type=str, default='./checkpoints/amass_3d_25frames_ckpt', help='directory with the models checkpoints ')
228 | parser.add_argument('--actions_to_consider', default='all', help='Actions to visualize.Choose either all or a list of actions')
229 | parser.add_argument('--batch_size_test', type=int, default=256, help='batch size for the test set')
230 | parser.add_argument('--visualize_from', type=str, default='test', choices=['train', 'val', 'test'], help='choose data split to visualize from(train-val-test)')
231 | parser.add_argument('--loss_type', type=str, default='mpjpe', choices=['mpjpe', 'angle'])
232 |
233 | args = parser.parse_args()
234 |
235 | parser_mpjpe = argparse.ArgumentParser(parents=[parser]) # Parameters for mpjpe
236 | parser_mpjpe.add_argument('--hidden_dim', default=128, type=int, required=False)
237 | parser_mpjpe.add_argument('--num_blocks', default=5, type=int, required=False)
238 | parser_mpjpe.add_argument('--tokens_mlp_dim', default=20, type=int, required=False)
239 | parser_mpjpe.add_argument('--channels_mlp_dim', default=128, type=int, required=False)
240 | parser_mpjpe.add_argument('--regularization', default=0.1, type=float, required=False)
241 | parser_mpjpe.add_argument('--pose_dim', default=54, type=int, required=False)
242 | parser_mpjpe.add_argument('--delta_x', type=bool, default=True, help='predicting the difference between 2 frames')
243 | parser_mpjpe.add_argument('--lr', default=0.001, type=float, required=False)
244 | args = parser_mpjpe.parse_args()
245 |
246 |
247 |
248 | print(args)
249 |
250 | model = MlpMixer(num_classes=args.pose_dim, num_blocks=args.num_blocks,
251 | hidden_dim=args.hidden_dim, tokens_mlp_dim=args.tokens_mlp_dim,
252 | channels_mlp_dim=args.channels_mlp_dim, seq_len=args.input_n,
253 | pred_len=args.output_n, activation=args.activation,
254 | mlp_block_type='normal', regularization=args.regularization,
255 | input_size=args.pose_dim, initialization='none', r_se=args.r_se,
256 | use_max_pooling=False, use_se=True)
257 |
258 | model = model.to(args.dev)
259 |
260 | print('total number of parameters of the network is: ' +
261 | str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
262 |
263 | model_name = 'h36_3d_'+str(args.output_n)+'frames_ckpt'
264 |
265 | #%%
266 | train(model, model_name, args)
267 | test_mpjpe(model, args)
268 |
269 |
270 |
271 |
--------------------------------------------------------------------------------
/checkpoints/.gitignore:
--------------------------------------------------------------------------------
1 | # These are some examples of commonly ignored file patterns.
2 | # You should customize this list as applicable to your project.
3 | # Learn more about .gitignore:
4 | # https://www.atlassian.com/git/tutorials/saving-changes/gitignore
5 |
6 | # Node artifact files
7 | node_modules/
8 | dist/
9 |
10 | # Compiled Java class files
11 | *.class
12 |
13 | # Compiled Python bytecode
14 | *.py[cod]
15 |
16 | # Log files
17 | *.log
18 |
19 | # Package files
20 | *.jar
21 |
22 | # Maven
23 | target/
24 | dist/
25 |
26 | # JetBrains IDE
27 | .idea/
28 |
29 | # Unit test reports
30 | TEST*.xml
31 |
32 | # Generated by MacOS
33 | .DS_Store
34 |
35 | # Generated by Windows
36 | Thumbs.db
37 |
38 | # Applications
39 | *.app
40 | *.exe
41 | *.war
42 |
43 | # Large media files
44 | *.mp4
45 | *.tiff
46 | *.avi
47 | *.flv
48 | *.mov
49 | *.wmv
50 |
51 |
--------------------------------------------------------------------------------
/h36m/datasets/dataset_h36m.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.data import Dataset
3 | import numpy as np
4 | from h5py import File
5 | import scipy.io as sio
6 | import utils.data_utils as data_utils
7 | from matplotlib import pyplot as plt
8 | import torch
9 |
10 | import os
11 |
12 | '''
13 | adapted from
14 | https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/h36motion3d.py
15 | '''
16 |
17 |
18 | class H36M_Dataset(Dataset):
19 |
20 | def __init__(self,data_dir,input_n,output_n,skip_rate, actions=None, split=0):
21 | """
22 | :param path_to_data:
23 | :param actions:
24 | :param input_n:
25 | :param output_n:
26 | :param dct_used:
27 | :param split: 0 train, 1 testing, 2 validation
28 | :param sample_rate:
29 | """
30 | self.path_to_data = os.path.join(data_dir,'h3.6m/dataset')
31 | self.split = split
32 | self.in_n = input_n
33 | self.out_n = output_n
34 | self.sample_rate = 2
35 | self.p3d = {}
36 | self.data_idx = []
37 | seq_len = self.in_n + self.out_n
38 | subs = [[1, 6, 7, 8, 9], [11], [5]]
39 | # acts = data_utils.define_actions(actions)
40 | if actions is None:
41 | acts = ["walking", "eating", "smoking", "discussion", "directions",
42 | "greeting", "phoning", "posing", "purchases", "sitting",
43 | "sittingdown", "takingphoto", "waiting", "walkingdog",
44 | "walkingtogether"]
45 | else:
46 | acts = actions
47 | # subs = np.array([[1], [11], [5]])
48 | # acts = ['walking']
49 | # 32 human3.6 joint name:
50 | joint_name = ["Hips", "RightUpLeg", "RightLeg", "RightFoot", "RightToeBase", "Site", "LeftUpLeg", "LeftLeg",
51 | "LeftFoot",
52 | "LeftToeBase", "Site", "Spine", "Spine1", "Neck", "Head", "Site", "LeftShoulder", "LeftArm",
53 | "LeftForeArm",
54 | "LeftHand", "LeftHandThumb", "Site", "L_Wrist_End", "Site", "RightShoulder", "RightArm",
55 | "RightForeArm",
56 | "RightHand", "RightHandThumb", "Site", "R_Wrist_End", "Site"]
57 |
58 | subs = subs[split]
59 | key = 0
60 | for subj in subs:
61 | for action_idx in np.arange(len(acts)):
62 | action = acts[action_idx]
63 | if self.split <= 1:
64 | for subact in [1, 2]: # subactions
65 | #print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact))
66 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact)
67 | the_sequence = data_utils.readCSVasFloat(filename)
68 | n, d = the_sequence.shape
69 | even_list = range(0, n, self.sample_rate)
70 | num_frames = len(even_list)
71 | the_sequence = np.array(the_sequence[even_list, :])
72 | the_sequence = torch.from_numpy(the_sequence).float().cuda()
73 | # remove global rotation and translation
74 | the_sequence[:, 0:6] = 0
75 | p3d = data_utils.expmap2xyz_torch(the_sequence)
76 | # self.p3d[(subj, action, subact)] = p3d.view(num_frames, -1).cpu().data.numpy()
77 | self.p3d[key] = p3d.view(num_frames, -1).cpu().data.numpy()
78 |
79 | valid_frames = np.arange(0, num_frames - seq_len + 1, skip_rate)
80 |
81 | # tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames)
82 | tmp_data_idx_1 = [key] * len(valid_frames)
83 | tmp_data_idx_2 = list(valid_frames)
84 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2))
85 | key += 1
86 | else:
87 | #print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1))
88 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1)
89 | the_sequence1 = data_utils.readCSVasFloat(filename)
90 | n, d = the_sequence1.shape
91 | even_list = range(0, n, self.sample_rate)
92 |
93 | num_frames1 = len(even_list)
94 | the_sequence1 = np.array(the_sequence1[even_list, :])
95 | the_seq1 = torch.from_numpy(the_sequence1).float().cuda()
96 | the_seq1[:, 0:6] = 0
97 | p3d1 = data_utils.expmap2xyz_torch(the_seq1)
98 | # self.p3d[(subj, action, 1)] = p3d1.view(num_frames1, -1).cpu().data.numpy()
99 | self.p3d[key] = p3d1.view(num_frames1, -1).cpu().data.numpy()
100 |
101 | #print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2))
102 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2)
103 | the_sequence2 = data_utils.readCSVasFloat(filename)
104 | n, d = the_sequence2.shape
105 | even_list = range(0, n, self.sample_rate)
106 |
107 | num_frames2 = len(even_list)
108 | the_sequence2 = np.array(the_sequence2[even_list, :])
109 | the_seq2 = torch.from_numpy(the_sequence2).float().cuda()
110 | the_seq2[:, 0:6] = 0
111 | p3d2 = data_utils.expmap2xyz_torch(the_seq2)
112 |
113 | # self.p3d[(subj, action, 2)] = p3d2.view(num_frames2, -1).cpu().data.numpy()
114 | self.p3d[key + 1] = p3d2.view(num_frames2, -1).cpu().data.numpy()
115 |
116 | # print("action:{}".format(action))
117 | # print("subact1:{}".format(num_frames1))
118 | # print("subact2:{}".format(num_frames2))
119 | fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len,
120 | input_n=self.in_n)
121 |
122 | valid_frames = fs_sel1[:, 0]
123 | tmp_data_idx_1 = [key] * len(valid_frames)
124 | tmp_data_idx_2 = list(valid_frames)
125 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2))
126 |
127 | valid_frames = fs_sel2[:, 0]
128 | tmp_data_idx_1 = [key + 1] * len(valid_frames)
129 | tmp_data_idx_2 = list(valid_frames)
130 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2))
131 | key += 2
132 |
133 | # ignore constant joints and joints at same position with other joints
134 | joint_to_ignore = np.array([0, 1, 6, 11, 16, 20, 23, 24, 28, 31])
135 | dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2))
136 | self.dimensions_to_use = np.setdiff1d(np.arange(96), dimensions_to_ignore)
137 |
138 | def __len__(self):
139 | return np.shape(self.data_idx)[0]
140 |
141 | def __getitem__(self, item):
142 | key, start_frame = self.data_idx[item]
143 | fs = np.arange(start_frame, start_frame + self.in_n + self.out_n)
144 | #print (self.p3d[key][fs].shape)
145 | return self.p3d[key][fs]
146 |
147 |
--------------------------------------------------------------------------------
/h36m/datasets/dataset_h36m_ang.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset
2 | import numpy as np
3 | from h5py import File
4 | import scipy.io as sio
5 | from utils import data_utils
6 | from matplotlib import pyplot as plt
7 | import torch
8 | import os
9 |
10 | '''
11 | adapted from
12 | https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/h36motion.py
13 | '''
14 |
15 |
16 | class H36M_Dataset_Angle(Dataset):
17 |
18 | def __init__(self,data_dir,input_n,output_n,skip_rate, actions=None, split=0):
19 | """
20 | :param path_to_data:
21 | :param actions:
22 | :param input_n:
23 | :param output_n:
24 | :param dct_used:
25 | :param split: 0 train, 1 testing, 2 validation
26 | :param sample_rate:
27 | """
28 | self.path_to_data = os.path.join(data_dir,'h3.6m/dataset')
29 | self.split = split
30 | self.in_n = input_n
31 | self.out_n = output_n
32 | self.sample_rate = 2
33 | self.seq = {}
34 | self.data_idx = []
35 |
36 | self.dimensions_to_use = np.array(
37 | [6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42,
38 | 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86])
39 | self.dimensions_to_ignore = np.array(
40 | [[0, 1, 2, 3, 4, 5, 10, 11, 16, 17, 18, 19, 20, 25, 26, 31, 32, 33, 34, 35, 48, 49, 50, 58,
41 | 59, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 82, 83, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
42 | 98]])
43 |
44 | seq_len = self.in_n + self.out_n
45 | subs = [[1, 6, 7, 8, 9], [11], [5]]
46 | # acts = data_utils.define_actions(actions)
47 | if actions is None:
48 | acts = ["walking", "eating", "smoking", "discussion", "directions",
49 | "greeting", "phoning", "posing", "purchases", "sitting",
50 | "sittingdown", "takingphoto", "waiting", "walkingdog",
51 | "walkingtogether"]
52 | else:
53 | acts = actions
54 | # subs = np.array([[1], [11], [5]])
55 | # acts = ['walking']
56 |
57 | subs = subs[split]
58 |
59 | for subj in subs:
60 | for action_idx in np.arange(len(acts)):
61 | action = acts[action_idx]
62 | if self.split <= 1:
63 | for subact in [1, 2]: # subactions
64 | # print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, subact))
65 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, subact)
66 | the_sequence = data_utils.readCSVasFloat(filename)
67 | n, d = the_sequence.shape
68 | even_list = range(0, n, self.sample_rate)
69 | num_frames = len(even_list)
70 | the_sequence = np.array(the_sequence[even_list, :])
71 | # the_sequence = torch.from_numpy(the_sequence).float().cuda()
72 | # remove global rotation and translation
73 | the_sequence[:, 0:6] = 0
74 | # p3d = data_utils.expmap2xyz_torch(the_sequence)
75 | self.seq[(subj, action, subact)] = the_sequence
76 |
77 | valid_frames = np.arange(0, num_frames - seq_len + 1, skip_rate)
78 |
79 | tmp_data_idx_1 = [(subj, action, subact)] * len(valid_frames)
80 | tmp_data_idx_2 = list(valid_frames)
81 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2))
82 | else:
83 | # print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 1))
84 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 1)
85 | the_sequence1 = data_utils.readCSVasFloat(filename)
86 | n, d = the_sequence1.shape
87 | even_list = range(0, n, self.sample_rate)
88 |
89 | num_frames1 = len(even_list)
90 | the_sequence1 = np.array(the_sequence1[even_list, :])
91 | # the_seq1 = torch.from_numpy(the_sequence1).float().cuda()
92 | the_sequence1[:, 0:6] = 0
93 | # p3d1 = data_utils.expmap2xyz_torch(the_seq1)
94 | self.seq[(subj, action, 1)] = the_sequence1
95 |
96 | # print("Reading subject {0}, action {1}, subaction {2}".format(subj, action, 2))
97 | filename = '{0}/S{1}/{2}_{3}.txt'.format(self.path_to_data, subj, action, 2)
98 | the_sequence2 = data_utils.readCSVasFloat(filename)
99 | n, d = the_sequence2.shape
100 | even_list = range(0, n, self.sample_rate)
101 |
102 | num_frames2 = len(even_list)
103 | the_sequence2 = np.array(the_sequence2[even_list, :])
104 | # the_seq2 = torch.from_numpy(the_sequence2).float().cuda()
105 | the_sequence2[:, 0:6] = 0
106 | # p3d2 = data_utils.expmap2xyz_torch(the_seq2)
107 | self.seq[(subj, action, 2)] = the_sequence2
108 |
109 | # fs_sel1, fs_sel2 = data_utils.find_indices_256(num_frames1, num_frames2, seq_len,
110 | # input_n=self.in_n)
111 | fs_sel1, fs_sel2 = data_utils.find_indices_srnn(num_frames1, num_frames2, seq_len,
112 | input_n=self.in_n)
113 |
114 | valid_frames = fs_sel1[:, 0]
115 | tmp_data_idx_1 = [(subj, action, 1)] * len(valid_frames)
116 | tmp_data_idx_2 = list(valid_frames)
117 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2))
118 |
119 | valid_frames = fs_sel2[:, 0]
120 | tmp_data_idx_1 = [(subj, action, 2)] * len(valid_frames)
121 | tmp_data_idx_2 = list(valid_frames)
122 | self.data_idx.extend(zip(tmp_data_idx_1, tmp_data_idx_2))
123 |
124 | def __len__(self):
125 | return np.shape(self.data_idx)[0]
126 |
127 | def __getitem__(self, item):
128 | key, start_frame = self.data_idx[item]
129 | fs = np.arange(start_frame, start_frame + self.in_n + self.out_n)
130 | return self.seq[key][fs]
131 |
--------------------------------------------------------------------------------
/h36m/h36_3d_viz.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 |
4 | import numpy as np
5 | import torch
6 | from torch.utils.data import DataLoader
7 | import matplotlib.pyplot as plt
8 | from mpl_toolkits.mplot3d import Axes3D
9 | import matplotlib.animation as animation
10 | from datasets.dataset_h36m import Datasets
11 | #from utils.data_utils import define_actions
12 |
13 |
14 | def mpjpe_error(batch_pred,batch_gt):
15 |
16 | batch_pred= batch_pred.contiguous().view(-1,3)
17 | batch_gt=batch_gt.contiguous().view(-1,3)
18 |
19 | return torch.mean(torch.norm(batch_gt-batch_pred,2,1))
20 |
21 |
22 | def define_actions(action):
23 | """
24 | Define the list of actions we are using.
25 |
26 | Args
27 | action: String with the passed action. Could be "all"
28 | Returns
29 | actions: List of strings of actions
30 | Raises
31 | ValueError if the action is not included in H3.6M
32 | """
33 |
34 | actions = ["walking", "eating", "smoking", "discussion", "directions",
35 | "greeting", "phoning", "posing", "purchases", "sitting",
36 | "sittingdown", "takingphoto", "waiting", "walkingdog",
37 | "walkingtogether"]
38 | if action in actions:
39 | return [action]
40 |
41 | if action == "all":
42 | return actions
43 |
44 | if action == "all_srnn":
45 | return ["walking", "eating", "smoking", "discussion"]
46 |
47 | raise (ValueError, "Unrecognized action: %d" % action)
48 |
49 | def create_pose(ax,plots,vals,pred=True,update=False):
50 |
51 |
52 |
53 | # h36m 32 joints(full)
54 | connect = [
55 | (1, 2), (2, 3), (3, 4), (4, 5),
56 | (6, 7), (7, 8), (8, 9), (9, 10),
57 | (0, 1), (0, 6),
58 | (6, 17), (17, 18), (18, 19), (19, 20), (20, 21), (21, 22),
59 | (1, 25), (25, 26), (26, 27), (27, 28), (28, 29), (29, 30),
60 | (24, 25), (24, 17),
61 | (24, 14), (14, 15)
62 | ]
63 | LR = [
64 | False, True, True, True, True,
65 | True, False, False, False, False,
66 | False, True, True, True, True,
67 | True, True, False, False, False,
68 | False, False, False, False, True,
69 | False, True, True, True, True,
70 | True, True
71 | ]
72 |
73 |
74 | # Start and endpoints of our representation
75 | I = np.array([touple[0] for touple in connect])
76 | J = np.array([touple[1] for touple in connect])
77 | # Left / right indicator
78 | LR = np.array([LR[a] or LR[b] for a,b in connect])
79 | if pred:
80 | lcolor = "#9b59b6"
81 | rcolor = "#2ecc71"
82 | else:
83 | lcolor = "#8e8e8e"
84 | rcolor = "#383838"
85 |
86 | for i in np.arange( len(I)):
87 | x = np.array( [vals[I[i], 0], vals[J[i], 0]] )
88 | z = np.array( [vals[I[i], 1], vals[J[i], 1]] )
89 | y = np.array( [vals[I[i], 2], vals[J[i], 2]] )
90 | if not update:
91 |
92 | if i ==0:
93 | plots.append(ax.plot(x, y, z, lw=2,linestyle='--' ,c=lcolor if LR[i] else rcolor,label=['GT' if not pred else 'Pred']))
94 | else:
95 | plots.append(ax.plot(x, y, z, lw=2,linestyle='--', c=lcolor if LR[i] else rcolor))
96 |
97 | elif update:
98 | plots[i][0].set_xdata(x)
99 | plots[i][0].set_ydata(y)
100 | plots[i][0].set_3d_properties(z)
101 | plots[i][0].set_color(lcolor if LR[i] else rcolor)
102 |
103 | return plots
104 | # ax.legend(loc='lower left')
105 |
106 |
107 | # In[11]:
108 |
109 |
110 | def update(num,data_gt,data_pred,plots_gt,plots_pred,fig,ax):
111 |
112 | gt_vals=data_gt[num]
113 | pred_vals=data_pred[num]
114 | plots_gt=create_pose(ax,plots_gt,gt_vals,pred=False,update=True)
115 | plots_pred=create_pose(ax,plots_pred,pred_vals,pred=True,update=True)
116 |
117 |
118 |
119 |
120 |
121 | r = 0.75
122 | xroot, zroot, yroot = gt_vals[0,0], gt_vals[0,1], gt_vals[0,2]
123 | ax.set_xlim3d([-r+xroot, r+xroot])
124 | ax.set_ylim3d([-r+yroot, r+yroot])
125 | ax.set_zlim3d([-r+zroot, r+zroot])
126 | #ax.set_title('pose at time frame: '+str(num))
127 | #ax.set_aspect('equal')
128 |
129 | return plots_gt,plots_pred
130 |
131 |
132 |
133 |
134 |
135 |
136 | #%%
137 |
138 |
139 | def visualize(input_n,output_n,visualize_from,path,modello,device,n_viz,skip_rate,actions,encoding ='dct'):
140 |
141 | import random
142 | actions=define_actions(actions)
143 |
144 | for action in actions:
145 |
146 | if visualize_from=='train':
147 | loader=Datasets(path,input_n,output_n,skip_rate, split=0,actions=[action])
148 | elif visualize_from=='validation':
149 | loader=Datasets(path,input_n,output_n,skip_rate, split=1,actions=[action])
150 | elif visualize_from=='test':
151 | loader=Datasets(path,input_n,output_n,skip_rate, split=2,actions=[action])
152 |
153 | dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25,
154 | 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
155 | 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68,
156 | 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92])
157 | # joints at same loc
158 | joint_to_ignore = np.array([16, 20, 23, 24, 28, 31])
159 | index_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2))
160 | joint_equal = np.array([13, 19, 22, 13, 27, 30])
161 | index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2))
162 |
163 |
164 | loader = DataLoader(
165 | loader,
166 | batch_size=256,
167 | shuffle = False, # for comparable visualizations with other models
168 | num_workers=0)
169 |
170 |
171 |
172 | for cnt,batch in enumerate(loader):
173 | batch = batch.to(device)
174 |
175 | all_joints_seq=batch.clone()[:, input_n:input_n+output_n,:]
176 |
177 | sequences_train=batch[:, 0:input_n, dim_used].view(-1,input_n,len(dim_used))
178 | sequences_gt=batch[:, input_n:input_n+output_n, :]
179 |
180 | sequences_predict=modello(sequences_train).contiguous().view(-1,output_n,len(dim_used))
181 |
182 | all_joints_seq[:,:,dim_used] = sequences_predict
183 |
184 | all_joints_seq[:,:,index_to_ignore] = all_joints_seq[:,:,index_to_equal]
185 |
186 |
187 | all_joints_seq=all_joints_seq.view(-1,output_n,32,3)
188 |
189 | sequences_gt=sequences_gt.view(-1,output_n,32,3)
190 |
191 | loss=mpjpe_error(all_joints_seq,sequences_gt)# # both must have format (batch,T,V,C)
192 |
193 |
194 |
195 | data_pred=torch.squeeze(all_joints_seq,0).cpu().data.numpy()/1000 # in meters
196 | data_gt=torch.squeeze(sequences_gt,0).cpu().data.numpy()/1000
197 |
198 | i = random.randint(1,256)
199 |
200 | data_pred = data_pred [i]
201 | data_gt = data_gt [i]
202 |
203 | #print (data_gt.shape,data_pred.shape)
204 |
205 | fig = plt.figure()
206 | ax = Axes3D(fig)
207 | ax.view_init(elev=20, azim=-40)
208 | vals = np.zeros((32, 3)) # or joints_to_consider
209 | gt_plots=[]
210 | pred_plots=[]
211 |
212 | gt_plots=create_pose(ax,gt_plots,vals,pred=False,update=False)
213 | pred_plots=create_pose(ax,pred_plots,vals,pred=True,update=False)
214 |
215 | ax.set_xlabel("x")
216 | ax.set_ylabel("y")
217 | ax.set_zlabel("z")
218 | ax.legend(loc='lower left')
219 |
220 |
221 |
222 | ax.set_xlim3d([-1, 1.5])
223 | ax.set_xlabel('X')
224 |
225 | ax.set_ylim3d([-1, 1.5])
226 | ax.set_ylabel('Y')
227 |
228 | ax.set_zlim3d([0.0, 1.5])
229 | ax.set_zlabel('Z')
230 | ax.set_title('loss in mm is: '+str(round(loss.item(),4))+' for action : '+str(action)+' for '+str(output_n)+' frames')
231 |
232 | line_anim = animation.FuncAnimation(fig, update, output_n, fargs=(data_gt,data_pred,gt_plots,pred_plots,
233 | fig,ax),interval=70, blit=False)
234 | plt.show()
235 |
236 | line_anim.save('./visualizations/pred{}/human_viz{}.gif'.format (25,i),writer='pillow')
237 |
238 |
239 | if cnt==n_viz-1:
240 | break
241 |
242 |
243 |
244 |
245 |
--------------------------------------------------------------------------------
/h36m/mlp_mixer.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 | import torch.nn.functional as F
4 |
5 |
6 | class SELayer(nn.Module):
7 | def __init__(self, c, r=4, use_max_pooling=False):
8 | super().__init__()
9 | self.squeeze = nn.AdaptiveAvgPool1d(1) if not use_max_pooling else nn.AdaptiveMaxPool1d(1)
10 | self.excitation = nn.Sequential(
11 | nn.Linear(c, c // r, bias=False),
12 | nn.ReLU(inplace=True),
13 | nn.Linear(c // r, c, bias=False),
14 | nn.Sigmoid()
15 | )
16 |
17 |
18 | def forward(self, x):
19 | bs, s, h = x.shape
20 | y = self.squeeze(x).view(bs, s)
21 | y = self.excitation(y).view(bs, s, 1)
22 | return x * y.expand_as(x)
23 |
24 |
25 |
26 | def mish(x):
27 | return (x*torch.tanh(F.softplus(x)))
28 |
29 |
30 |
31 |
32 | class MlpBlock(nn.Module):
33 | def __init__(self, mlp_hidden_dim, mlp_input_dim, mlp_bn_dim, activation='gelu', regularization=0, initialization='none'):
34 | super().__init__()
35 | self.mlp_hidden_dim = mlp_hidden_dim
36 | self.mlp_input_dim = mlp_input_dim
37 | self.mlp_bn_dim = mlp_bn_dim
38 | #self.fc1 = nn.Linear(self.mlp_input_dim, self.mlp_input_dim)
39 | self.fc1 = nn.Linear(self.mlp_input_dim, self.mlp_hidden_dim)
40 | self.fc2 = nn.Linear(self.mlp_hidden_dim, self.mlp_input_dim)
41 | if regularization > 0.0:
42 | self.reg1 = nn.Dropout(regularization)
43 | self.reg2 = nn.Dropout(regularization)
44 | elif regularization == -1.0:
45 | self.reg1 = nn.BatchNorm1d(self.mlp_bn_dim)
46 | self.reg2 = nn.BatchNorm1d(self.mlp_bn_dim)
47 | else:
48 | self.reg1 = None
49 | self.reg2 = None
50 |
51 | if activation == 'gelu':
52 | self.act1 = nn.GELU()
53 | elif activation == 'mish':
54 | self.act1 = mish #nn.Mish()
55 | else:
56 | raise ValueError('Unknown activation function type: %s'%activation)
57 |
58 |
59 |
60 | def forward(self, x):
61 | x = self.fc1(x)
62 | x = self.act1(x)
63 | if self.reg1 is not None:
64 | x = self.reg1(x)
65 | x = self.fc2(x)
66 | if self.reg2 is not None:
67 | x = self.reg2(x)
68 |
69 | return x
70 |
71 |
72 |
73 | class MixerBlock(nn.Module):
74 | def __init__(self, tokens_mlp_dim, channels_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0,
75 | initialization='none', r_se=4, use_max_pooling=False, use_se=True):
76 | super().__init__()
77 | self.tokens_mlp_dim = tokens_mlp_dim
78 | self.channels_mlp_dim = channels_mlp_dim
79 | self.seq_len = seq_len
80 | self.hidden_dim = hidden_dim # out channels of the conv
81 | self.mlp_block_token_mixing = MlpBlock(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, activation=activation, regularization=regularization, initialization=initialization)
82 | self.mlp_block_channel_mixing = MlpBlock(self.channels_mlp_dim, self.hidden_dim, self.seq_len, activation=activation, regularization=regularization, initialization=initialization)
83 | self.use_se = use_se
84 | if self.use_se:
85 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling)
86 |
87 | self.LN1 = nn.LayerNorm(self.hidden_dim)
88 | self.LN2 = nn.LayerNorm(self.hidden_dim)
89 |
90 |
91 |
92 | def forward(self, x):
93 | # shape x [256, 8, 512] [bs, patches/time_steps, channels
94 | y = self.LN1(x)
95 |
96 | y = y.transpose(1, 2)
97 | y = self.mlp_block_token_mixing(y)
98 | y = y.transpose(1, 2)
99 |
100 | if self.use_se:
101 | y = self.se(y)
102 | x = x + y
103 |
104 | y = self.LN2(x)
105 | y = self.mlp_block_channel_mixing(y)
106 |
107 | if self.use_se:
108 | y = self.se(y)
109 |
110 | return x + y
111 |
112 |
113 |
114 | class MixerBlock_Channel(nn.Module):
115 | def __init__(self, channels_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0,
116 | initialization='none', r_se=4, use_max_pooling=False, use_se=True):
117 | super().__init__()
118 | self.channels_mlp_dim = channels_mlp_dim
119 | self.seq_len = seq_len
120 | self.hidden_dim = hidden_dim # out channels of the conv
121 | self.mlp_block_channel_mixing = MlpBlock(self.channels_mlp_dim, self.hidden_dim, self.seq_len, activation=activation, regularization=regularization, initialization=initialization)
122 | self.use_se = use_se
123 | if self.use_se:
124 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling)
125 |
126 |
127 | self.LN2 = nn.LayerNorm(self.hidden_dim)
128 |
129 | #self.act1 = nn.GELU()
130 |
131 | def forward(self, x):
132 | # shape x [256, 8, 512] [bs, patches/time_steps, channels]
133 | y = x
134 |
135 | if self.use_se:
136 | y = self.se(y)
137 | x = x + y
138 | y = self.LN2(x)
139 | y = self.mlp_block_channel_mixing(y)
140 | if self.use_se:
141 | y = self.se(y)
142 |
143 | return x + y
144 |
145 |
146 |
147 | class MixerBlock_Token(nn.Module):
148 | def __init__(self, tokens_mlp_dim, seq_len, hidden_dim, activation='gelu', regularization=0,
149 | initialization='none', r_se=4, use_max_pooling=False, use_se=True):
150 | super().__init__()
151 | self.tokens_mlp_dim = tokens_mlp_dim
152 |
153 | self.seq_len = seq_len
154 | self.hidden_dim = hidden_dim # out channels of the conv
155 | self.mlp_block_token_mixing = MlpBlock(self.tokens_mlp_dim, self.seq_len, self.hidden_dim, activation=activation, regularization=regularization, initialization=initialization)
156 |
157 | self.use_se = use_se
158 |
159 | if self.use_se:
160 | self.se = SELayer(self.seq_len, r=r_se, use_max_pooling=use_max_pooling)
161 |
162 | self.LN1 = nn.LayerNorm(self.hidden_dim)
163 |
164 |
165 | def forward(self, x):
166 | # shape x [256, 8, 512] [bs, patches/time_steps, channels]
167 | y = self.LN1(x)
168 | y = y.transpose(1, 2)
169 | y = self.mlp_block_token_mixing(y)
170 | y = y.transpose(1, 2)
171 |
172 | if self.use_se:
173 | y = self.se(y)
174 | x = x + y
175 |
176 | return x + y
177 |
178 |
179 |
180 | class MlpMixer(nn.Module):
181 | def __init__(self, num_classes, num_blocks, hidden_dim, tokens_mlp_dim,
182 | channels_mlp_dim, seq_len,pred_len, activation='gelu',
183 | mlp_block_type='normal',regularization=0, input_size=51,
184 | initialization='none', r_se=4, use_max_pooling=False,
185 | use_se=False):
186 |
187 | super().__init__()
188 | self.num_classes = num_classes
189 | self.num_blocks = num_blocks
190 | self.hidden_dim = hidden_dim
191 | self.seq_len = seq_len
192 | self.tokens_mlp_dim = tokens_mlp_dim
193 | self.channels_mlp_dim = channels_mlp_dim
194 | self.input_size = input_size #varyies with the number of joints
195 | self.conv = nn.Conv1d(1, self.hidden_dim, (1, self.input_size), stride=1)
196 | self.activation = activation
197 |
198 |
199 | self.channel_only = False # False #True
200 | self.token_only = False #False #True
201 |
202 |
203 |
204 | if self.channel_only:
205 |
206 | self.Mixer_Block = nn.ModuleList (MixerBlock_Channel(self.channels_mlp_dim,self.seq_len, self.hidden_dim,
207 | activation=self.activation, regularization=regularization, initialization=initialization,
208 | r_se=r_se, use_max_pooling=use_max_pooling, use_se=use_se)
209 | for _ in range(num_blocks))
210 |
211 |
212 | if self.token_only:
213 |
214 | self.Mixer_Block = nn.ModuleList(MixerBlock_Token(self.tokens_mlp_dim, self.seq_len, self.hidden_dim,
215 | activation=self.activation, regularization=regularization, initialization=initialization,
216 | r_se=r_se, use_max_pooling=use_max_pooling, use_se=use_se)
217 | for _ in range(num_blocks))
218 |
219 | else:
220 |
221 | self.Mixer_Block = nn.ModuleList(MixerBlock(self.tokens_mlp_dim, self.channels_mlp_dim,
222 | self.seq_len, self.hidden_dim, activation=self.activation,
223 | regularization=regularization, initialization=initialization,
224 | r_se=r_se, use_max_pooling=use_max_pooling, use_se=use_se)
225 | for _ in range(num_blocks))
226 |
227 |
228 | self.LN = nn.LayerNorm(self.hidden_dim)
229 |
230 | self.fc_out = nn.Linear(self.hidden_dim, self.num_classes)
231 |
232 | self.pred_len = pred_len
233 | self.conv_out = nn.Conv1d(self.seq_len, self.pred_len, 1, stride=1)
234 |
235 |
236 |
237 |
238 | def forward(self, x):
239 | x = x.unsqueeze(1)
240 | y = self.conv(x)
241 | y = y.squeeze(dim=3).transpose(1, 2)
242 |
243 | # [256, 8, 512] [bs, patches/time_steps, channels]
244 | for mb in self.Mixer_Block:
245 | y = mb(y)
246 | y = self.LN(y)
247 |
248 | out = self.fc_out(self.conv_out(y))
249 |
250 | return out
251 |
--------------------------------------------------------------------------------
/h36m/test_mixer_h36m.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import os
3 | from datasets.dataset_h36m import H36M_Dataset
4 | from datasets.dataset_h36m_ang import H36M_Dataset_Angle
5 | from utils.data_utils import define_actions
6 | from torch.utils.data import DataLoader
7 | from mlp_mixer import MlpMixer
8 | import matplotlib.pyplot as plt
9 | import torch.optim as optim
10 | import numpy as np
11 | import argparse
12 | from utils.utils_mixer import delta_2_gt, mpjpe_error, euler_error
13 | from tqdm import tqdm
14 | from torch.utils.tensorboard import SummaryWriter
15 |
16 |
17 | def test_pretrained(model,args):
18 |
19 | N = 0
20 | eval_frame = [1, 3, 7, 9, 13, 17, 21, 24]
21 |
22 | t_3d = np.zeros(len(eval_frame))
23 |
24 | t_3d_all = []
25 |
26 | model.eval()
27 | accum_loss=0
28 | n_batches=0 # number of batches for all the sequences
29 | actions=define_actions(args.actions_to_consider)
30 | dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25,
31 | 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
32 | 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68,
33 | 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92])
34 | # joints at same loc
35 | joint_to_ignore = np.array([16, 20, 23, 24, 28, 31])
36 | index_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2))
37 | joint_equal = np.array([13, 19, 22, 13, 27, 30])
38 | index_to_equal = np.concatenate((joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2))
39 |
40 | idx_eval = 7
41 |
42 |
43 | for action in actions:
44 | running_loss=0
45 | n=0
46 | dataset_test = H36M_Dataset(args.data_dir,args.input_n,args.output_n,args.skip_rate, split=2,actions=[action])
47 |
48 | test_loader = DataLoader(dataset_test, batch_size=args.batch_size_test, shuffle=False, num_workers=0, pin_memory=True)
49 | for cnt,batch in enumerate(test_loader):
50 | with torch.no_grad():
51 |
52 | batch=batch.to(args.device)
53 | batch_dim=batch.shape[0]
54 | n+=batch_dim
55 |
56 |
57 | all_joints_seq=batch.clone()[:, args.input_n:args.input_n+args.output_n,:]
58 | all_joints_seq_gt=batch.clone()[:, args.input_n:args.input_n+args.output_n,:]
59 |
60 | sequences_train=batch[:, 0:args.input_n, dim_used].view(-1,args.input_n,len(dim_used))
61 |
62 | sequences_gt=batch[:, args.input_n:args.input_n+args.output_n, dim_used].view(-1,args.output_n,args.pose_dim)
63 |
64 | if args.delta_x :
65 | sequences_all = torch.cat((sequences_train, sequences_gt), 1)
66 | sequences_all_delta =[sequences_all[:,1,:] - sequences_all[:,0,:]]
67 | for i in range(args.input_n+args.output_n-1):
68 | sequences_all_delta.append(sequences_all[:,i+1,:] - sequences_all[:,i,:])
69 |
70 | sequences_all_delta = torch.stack((sequences_all_delta)).permute(1,0,2)
71 | sequences_train_delta = sequences_all_delta[:, 0:args.input_n, :]
72 | sequences_predict=model(sequences_train_delta)
73 | sequences_predict = delta_2_gt (sequences_predict,sequences_train[:,-1,:])
74 | loss=mpjpe_error(sequences_predict,sequences_gt)
75 |
76 |
77 |
78 | sequences_gt_3d = sequences_gt.reshape (sequences_gt.shape[0],sequences_gt.shape[1],-1,3)
79 | sequences_predict_3d = sequences_predict.reshape (sequences_predict.shape[0],sequences_predict.shape[1],-1,3)
80 |
81 | #print (sequences_gt.shape)
82 |
83 | for k in np.arange(0, len(eval_frame)):
84 | j = eval_frame[k]
85 | t_3d[k] += torch.mean(torch.norm(sequences_gt_3d[:, j, :, :].contiguous().view(-1, 3) - sequences_predict_3d[:, j, :, :].contiguous().view(-1, 3), 2, 1)).item() * n
86 |
87 |
88 | N += n
89 |
90 |
91 |
92 | else:
93 | sequences_predict=model(sequences_train)
94 | loss=mpjpe_error(sequences_predict,sequences_gt)
95 |
96 |
97 |
98 | all_joints_seq[:,:,dim_used] = sequences_predict
99 | all_joints_seq[:,:,index_to_ignore] = all_joints_seq[:,:,index_to_equal]
100 |
101 |
102 | all_joints_seq_gt[:,:,dim_used] = sequences_gt
103 | all_joints_seq_gt[:,:,index_to_ignore] = all_joints_seq_gt[:,:,index_to_equal]
104 |
105 |
106 | loss=mpjpe_error(all_joints_seq.view(-1,args.output_n,32,3),all_joints_seq_gt.view(-1,args.output_n,32,3))
107 |
108 |
109 |
110 | running_loss+=loss*batch_dim
111 | accum_loss+=loss*batch_dim
112 |
113 | print('loss at test subject for action : '+str(action)+ ' is: '+ str(running_loss/n))
114 | n_batches+=n
115 |
116 | t_3d_all.append (t_3d[idx_eval]/N)
117 |
118 | print('overall average loss in mm is: '+str(accum_loss/n_batches))
119 |
120 |
121 |
122 | print('overall final loss in mm is: ',np.mean(t_3d_all))
123 |
124 |
125 |
126 |
127 | if __name__ == '__main__':
128 | parser = argparse.ArgumentParser(add_help=False) # Parameters for mpjpe
129 | parser.add_argument('--data_dir', type=str, default='../data_h36m/', help='path to the unziped dataset directories(H36m/AMASS/3DPW)')
130 | parser.add_argument('--input_n', type=int, default=10, help="number of model's input frames")
131 | parser.add_argument('--output_n', type=int, default=25, help="number of model's output frames")
132 | parser.add_argument('--skip_rate', type=int, default=1, choices=[1, 5], help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW')
133 | parser.add_argument('--num_worker', default=4, type=int, help='number of workers in the dataloader')
134 | parser.add_argument('--root', default='./runs', type=str, help='root path for the logging') #'./runs'
135 |
136 | parser.add_argument('--activation', default='mish', type=str, required=False) # 'mish', 'gelu'
137 | parser.add_argument('--r_se', default=8, type=int, required=False)
138 |
139 | parser.add_argument('--n_epochs', default=50, type=int, required=False)
140 | parser.add_argument('--batch_size', default=50, type=int, required=False) # 100 50 in all original 50
141 | parser.add_argument('--loader_shuffle', default=True, type=bool, required=False)
142 | parser.add_argument('--pin_memory', default=False, type=bool, required=False)
143 | parser.add_argument('--loader_workers', default=4, type=int, required=False)
144 | parser.add_argument('--load_checkpoint', default=False, type=bool, required=False)
145 | parser.add_argument('--dev', default='cuda:0', type=str, required=False)
146 | parser.add_argument('--initialization', type=str, default='none', help='none, glorot_normal, glorot_uniform, hee_normal, hee_uniform')
147 | parser.add_argument('--use_scheduler', default=True, type=bool, required=False)
148 | parser.add_argument('--milestones', type=list, default=[15, 25, 35, 40], help='the epochs after which the learning rate is adjusted by gamma')
149 | parser.add_argument('--gamma', type=float, default=0.1, help='gamma correction to the learning rate, after reaching the milestone epochs')
150 | parser.add_argument('--clip_grad', type=float, default=None, help='select max norm to clip gradients')
151 | parser.add_argument('--model_path', type=str, default='./checkpoints/h36m_3d_25frames_ckpt', help='directory with the models checkpoints ')
152 | parser.add_argument('--actions_to_consider', default='all', help='Actions to visualize.Choose either all or a list of actions')
153 | parser.add_argument('--batch_size_test', type=int, default=256, help='batch size for the test set')
154 | parser.add_argument('--visualize_from', type=str, default='test', choices=['train', 'val', 'test'], help='choose data split to visualize from(train-val-test)')
155 | parser.add_argument('--loss_type', type=str, default='mpjpe', choices=['mpjpe', 'angle'])
156 | parser.add_argument('--device', type=str, default='cuda:0', choices=['cuda:0', 'cpu'])
157 |
158 |
159 |
160 |
161 |
162 | args = parser.parse_args()
163 |
164 | if args.loss_type == 'mpjpe':
165 | parser_mpjpe = argparse.ArgumentParser(parents=[parser]) # Parameters for mpjpe
166 | parser_mpjpe.add_argument('--hidden_dim', default=50, type=int, required=False)
167 | parser_mpjpe.add_argument('--num_blocks', default=4, type=int, required=False)
168 | parser_mpjpe.add_argument('--tokens_mlp_dim', default=20, type=int, required=False)
169 | parser_mpjpe.add_argument('--channels_mlp_dim', default=50, type=int, required=False)
170 | parser_mpjpe.add_argument('--regularization', default=0.1, type=float, required=False)
171 | parser_mpjpe.add_argument('--pose_dim', default=66, type=int, required=False)
172 | parser_mpjpe.add_argument('--delta_x', type=bool, default=True, help='predicting the difference between 2 frames')
173 | parser_mpjpe.add_argument('--lr', default=0.001, type=float, required=False)
174 | args = parser_mpjpe.parse_args()
175 |
176 | elif args.loss_type == 'angle':
177 | parser_angle = argparse.ArgumentParser(parents=[parser]) # Parameters for angle
178 | parser_angle.add_argument('--hidden_dim', default=60, type=int, required=False)
179 | parser_angle.add_argument('--num_blocks', default=3, type=int, required=False)
180 | parser_angle.add_argument('--tokens_mlp_dim', default=40, type=int, required=False)
181 | parser_angle.add_argument('--channels_mlp_dim', default=60, type=int, required=False)
182 | parser_angle.add_argument('--regularization', default=0.0, type=float, required=False)
183 | parser_angle.add_argument('--pose_dim', default=48, type=int, required=False)
184 | parser_angle.add_argument('--lr', default=1e-02, type=float, required=False)
185 | args = parser_angle.parse_args()
186 |
187 | if args.loss_type == 'angle' and args.delta_x:
188 | raise ValueError('Delta_x and loss type angle cant be used together.')
189 |
190 | print(args)
191 |
192 | model = MlpMixer(num_classes=args.pose_dim, num_blocks=args.num_blocks,
193 | hidden_dim=args.hidden_dim, tokens_mlp_dim=args.tokens_mlp_dim,
194 | channels_mlp_dim=args.channels_mlp_dim, seq_len=args.input_n,
195 | pred_len=args.output_n, activation=args.activation,
196 | mlp_block_type='normal', regularization=args.regularization,
197 | input_size=args.pose_dim, initialization='none', r_se=args.r_se,
198 | use_max_pooling=False, use_se=True)
199 |
200 | model = model.to(args.dev)
201 |
202 |
203 | model.load_state_dict(torch.load(args.model_path))
204 |
205 | print('total number of parameters of the network is: ' +
206 | str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
207 |
208 |
209 |
210 |
211 | test_pretrained(model, args)
212 |
213 |
214 |
215 |
--------------------------------------------------------------------------------
/h36m/train_mixer_h36m.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import os
3 | from datasets.dataset_h36m import H36M_Dataset
4 | from datasets.dataset_h36m_ang import H36M_Dataset_Angle
5 | from utils.data_utils import define_actions
6 | from torch.utils.data import DataLoader
7 | from mlp_mixer import MlpMixer
8 | import torch.optim as optim
9 | import numpy as np
10 | import argparse
11 | from utils.utils_mixer import delta_2_gt, mpjpe_error, euler_error
12 | from tqdm import tqdm
13 | from torch.utils.tensorboard import SummaryWriter
14 |
15 |
16 | def get_log_dir(out_dir):
17 | dirs = [x[0] for x in os.walk(out_dir)]
18 | if len(dirs ) < 2:
19 | log_dir = os.path.join(out_dir, 'exp0')
20 | os.mkdir(log_dir)
21 | else:
22 | log_dir = os.path.join(out_dir, 'exp%i'%(len(dirs)-1))
23 | os.mkdir(log_dir)
24 |
25 | return log_dir
26 |
27 |
28 | def train(model, model_name, args):
29 |
30 | log_dir = get_log_dir(args.root)
31 | tb_writer = SummaryWriter(log_dir=log_dir)
32 | print('Save data of the run in: %s'%log_dir)
33 |
34 | device = args.dev
35 |
36 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-05)
37 |
38 | if args.use_scheduler:
39 | scheduler = optim.lr_scheduler.MultiStepLR(
40 | optimizer, milestones=args.milestones, gamma=args.gamma)
41 |
42 | train_loss, val_loss, test_loss = [], [], []
43 |
44 | if args.loss_type == 'mpjpe':
45 | dataset = H36M_Dataset(args.data_dir, args.input_n,
46 | args.output_n, args.skip_rate, split=0)
47 | vald_dataset = H36M_Dataset(args.data_dir, args.input_n,
48 | args.output_n, args.skip_rate, split=1)
49 | dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25,
50 | 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
51 | 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68,
52 | 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92])
53 |
54 | elif args.loss_type == 'angle':
55 | dataset = H36M_Dataset_Angle(args.data_dir, args.input_n, args.output_n,
56 | args.skip_rate, split=0)
57 | vald_dataset = H36M_Dataset_Angle(args.data_dir, args.input_n,
58 | args.output_n, args.skip_rate, split=1)
59 | dim_used = np.array([6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42,
60 | 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85,
61 | 86])
62 |
63 | print('>>> Training dataset length: {:d}'.format(dataset.__len__()))
64 | print('>>> Validation dataset length: {:d}'.format(vald_dataset.__len__()))
65 |
66 | data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True,
67 | num_workers=args.num_worker, pin_memory=True)
68 | vald_loader = DataLoader(vald_dataset, batch_size=args.batch_size,
69 | shuffle=True, num_workers=args.num_worker, pin_memory=True)
70 |
71 |
72 | for epoch in range(args.n_epochs):
73 | print('Run epoch: %i'%epoch)
74 | running_loss = 0
75 | n = 0
76 | model.train()
77 | for cnt, batch in tqdm(enumerate(data_loader), total=len(data_loader)):
78 | batch = batch.to(device)
79 | batch_dim = batch.shape[0]
80 | n += batch_dim
81 |
82 | if args.loss_type == 'mpjpe':
83 | sequences_train = batch[:, 0:args.input_n, dim_used].view(
84 | -1, args.input_n, args.pose_dim)
85 | sequences_gt = batch[:, args.input_n:args.input_n +
86 | args.output_n, dim_used].view(-1, args.output_n, args.pose_dim)
87 | elif args.loss_type == 'angle':
88 | sequences_train=batch[:, 0:args.input_n, dim_used].view(
89 | -1,args.input_n,len(dim_used))
90 | sequences_gt=batch[:, args.input_n:args.input_n+args.output_n, dim_used]
91 |
92 | optimizer.zero_grad()
93 |
94 | if args.delta_x:
95 | sequences_all = torch.cat((sequences_train, sequences_gt), 1)
96 | sequences_all_delta = [
97 | sequences_all[:, 1, :] - sequences_all[:, 0, :]]
98 | for i in range(args.input_n+args.output_n-1):
99 | sequences_all_delta.append(
100 | sequences_all[:, i+1, :] - sequences_all[:, i, :])
101 |
102 | sequences_all_delta = torch.stack(
103 | (sequences_all_delta)).permute(1, 0, 2)
104 | sequences_train_delta = sequences_all_delta[:,
105 | 0:args.input_n, :]
106 | sequences_predict = model(sequences_train_delta)
107 | sequences_predict = delta_2_gt(
108 | sequences_predict, sequences_train[:, -1, :])
109 | loss = mpjpe_error(sequences_predict, sequences_gt)
110 |
111 | elif args.loss_type == 'mpjpe':
112 | sequences_train = sequences_train/1000
113 | sequences_predict = model(sequences_train)
114 | loss = mpjpe_error(sequences_predict, sequences_gt)
115 |
116 | elif args.loss_type == 'angle':
117 | sequences_predict=model(sequences_train)
118 | loss=torch.mean(torch.sum(torch.abs(sequences_predict.reshape(-1,args.output_n,len(dim_used)) - sequences_gt), dim=2).view(-1))
119 |
120 |
121 | loss.backward()
122 | if args.clip_grad is not None:
123 | torch.nn.utils.clip_grad_norm_(
124 | model.parameters(), args.clip_grad)
125 |
126 | optimizer.step()
127 |
128 | running_loss += loss*batch_dim
129 |
130 | train_loss.append(running_loss.detach().cpu()/n)
131 | model.eval()
132 | with torch.no_grad():
133 | running_loss = 0
134 | n = 0
135 | for cnt, batch in enumerate(vald_loader):
136 | batch = batch.to(device)
137 | batch_dim = batch.shape[0]
138 | n += batch_dim
139 |
140 | if args.loss_type == 'mpjpe':
141 | sequences_train = batch[:, 0:args.input_n, dim_used].view(
142 | -1, args.input_n, args.pose_dim)
143 | sequences_gt = batch[:, args.input_n:args.input_n +
144 | args.output_n, dim_used].view(-1, args.output_n, args.pose_dim)
145 | elif args.loss_type == 'angle':
146 | sequences_train=batch[:, 0:args.input_n, dim_used].view(-1,args.input_n,len(dim_used))
147 | sequences_gt=batch[:, args.input_n:args.input_n+args.output_n,:]
148 |
149 |
150 | if args.delta_x:
151 | sequences_all = torch.cat(
152 | (sequences_train, sequences_gt), 1)
153 | sequences_all_delta = [
154 | sequences_all[:, 1, :] - sequences_all[:, 0, :]]
155 | for i in range(args.input_n+args.output_n-1):
156 | sequences_all_delta.append(
157 | sequences_all[:, i+1, :] - sequences_all[:, i, :])
158 |
159 | sequences_all_delta = torch.stack(
160 | (sequences_all_delta)).permute(1, 0, 2)
161 | sequences_train_delta = sequences_all_delta[:,
162 | 0:args.input_n, :]
163 | sequences_predict = model(sequences_train_delta)
164 | sequences_predict = delta_2_gt(
165 | sequences_predict, sequences_train[:, -1, :])
166 | loss = mpjpe_error(sequences_predict, sequences_gt)
167 |
168 | elif args.loss_type == 'mpjpe':
169 | sequences_train = sequences_train/1000
170 | sequences_predict = model(sequences_train)
171 | loss = mpjpe_error(sequences_predict, sequences_gt)
172 |
173 | elif args.loss_type == 'angle':
174 | all_joints_seq=batch.clone()[:, args.input_n:args.input_n+args.output_n,:]
175 | sequences_predict=model(sequences_train)
176 | all_joints_seq[:,:,dim_used] = sequences_predict
177 | loss = euler_error(all_joints_seq,sequences_gt)
178 |
179 | running_loss += loss*batch_dim
180 | val_loss.append(running_loss.detach().cpu()/n)
181 | if args.use_scheduler:
182 | scheduler.step()
183 |
184 | if args.loss_type == 'mpjpe':
185 | test_loss.append(test_mpjpe(model, args))
186 | elif args.loss_type == 'angle':
187 | test_loss.append(test_angle(model, args))
188 |
189 | tb_writer.add_scalar('loss/train', train_loss[-1].item(), epoch)
190 | tb_writer.add_scalar('loss/val', val_loss[-1].item(), epoch)
191 | tb_writer.add_scalar('loss/test', test_loss[-1].item(), epoch)
192 |
193 | torch.save(model.state_dict(), os.path.join(log_dir, 'model.pt'))
194 | # TODO write something to save the best model
195 | if (epoch+1)%1==0:
196 | print('----saving model-----')
197 | torch.save(model.state_dict(),os.path.join(args.model_path,model_name))
198 |
199 |
200 | def test_mpjpe(model, args):
201 |
202 | device = args.dev
203 | model.eval()
204 | accum_loss = 0
205 | n_batches = 0 # number of batches for all the sequences
206 | actions = define_actions(args.actions_to_consider)
207 | if args.loss_type == 'mpjpe':
208 | dim_used = np.array([6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25,
209 | 26, 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45,
210 | 46, 47, 51, 52, 53, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68,
211 | 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92])
212 | elif args.loss_type == 'angle':
213 | dim_used = np.array([6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36,
214 | 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 51, 52, 53, 54, 55,
215 | 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85, 86])
216 | # joints at same loc
217 | joint_to_ignore = np.array([16, 20, 23, 24, 28, 31])
218 | index_to_ignore = np.concatenate(
219 | (joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2))
220 | joint_equal = np.array([13, 19, 22, 13, 27, 30])
221 | index_to_equal = np.concatenate(
222 | (joint_equal * 3, joint_equal * 3 + 1, joint_equal * 3 + 2))
223 |
224 | for action in actions:
225 | running_loss = 0
226 | n = 0
227 | if args.loss_type == 'mpjpe':
228 | dataset_test = H36M_Dataset(args.data_dir, args.input_n,
229 | args.output_n, args.skip_rate, split=2, actions=[action])
230 | elif args.loss_type == 'angle':
231 | dataset_test = H36M_Dataset_Angle(args.data_dir, args.input_n,
232 | args.output_n, args.skip_rate, split=2, actions=[action])
233 | print('>>> Test dataset length: {:d}'.format(dataset_test.__len__()))
234 |
235 | test_loader = DataLoader(dataset_test, batch_size=args.batch_size_test,
236 | shuffle=False, num_workers=0, pin_memory=True)
237 | for cnt, batch in enumerate(test_loader):
238 | with torch.no_grad():
239 |
240 | batch = batch.to(device)
241 | batch_dim = batch.shape[0]
242 | n += batch_dim
243 |
244 | all_joints_seq = batch.clone(
245 | )[:, args.input_n:args.input_n+args.output_n, :]
246 | all_joints_seq_gt = batch.clone(
247 | )[:, args.input_n:args.input_n+args.output_n, :]
248 |
249 | sequences_train = batch[:, 0:args.input_n,
250 | dim_used].view(-1, args.input_n, len(dim_used))
251 |
252 | sequences_gt = batch[:, args.input_n:args.input_n +
253 | args.output_n, dim_used].view(-1, args.output_n, args.pose_dim)
254 |
255 | if args.delta_x:
256 | sequences_all = torch.cat(
257 | (sequences_train, sequences_gt), 1)
258 | sequences_all_delta = [
259 | sequences_all[:, 1, :] - sequences_all[:, 0, :]]
260 | for i in range(args.input_n+args.output_n-1):
261 | sequences_all_delta.append(
262 | sequences_all[:, i+1, :] - sequences_all[:, i, :])
263 |
264 | sequences_all_delta = torch.stack(
265 | (sequences_all_delta)).permute(1, 0, 2)
266 | sequences_train_delta = sequences_all_delta[:,
267 | 0:args.input_n, :]
268 | sequences_predict = model(sequences_train_delta)
269 | sequences_predict = delta_2_gt(
270 | sequences_predict, sequences_train[:, -1, :])
271 | loss = mpjpe_error(sequences_predict, sequences_gt)
272 |
273 | else:
274 | sequences_train = sequences_train/1000
275 | sequences_predict = model(sequences_train)
276 | loss = mpjpe_error(sequences_predict, sequences_gt)
277 |
278 | all_joints_seq[:, :, dim_used] = sequences_predict
279 | all_joints_seq[:, :,
280 | index_to_ignore] = all_joints_seq[:, :, index_to_equal]
281 |
282 | all_joints_seq_gt[:, :, dim_used] = sequences_gt
283 | all_joints_seq_gt[:, :,
284 | index_to_ignore] = all_joints_seq_gt[:, :, index_to_equal]
285 |
286 | loss = mpjpe_error(all_joints_seq.view(-1, args.output_n, 32, 3),
287 | all_joints_seq_gt.view(-1, args.output_n, 32, 3))
288 |
289 | running_loss += loss*batch_dim
290 | accum_loss += loss*batch_dim
291 |
292 | n_batches += n
293 | print('overall average loss in mm is: %f'%(accum_loss/n_batches))
294 | return accum_loss/n_batches
295 |
296 |
297 | def test_angle(model, args):
298 |
299 | device = args.dev
300 | model.eval()
301 | accum_loss=0
302 | n_batches=0 # number of batches for all the sequences
303 | actions=define_actions(args.actions_to_consider)
304 | dim_used = np.array([6, 7, 8, 9, 12, 13, 14, 15, 21, 22, 23, 24, 27, 28, 29, 30, 36, 37, 38, 39, 40, 41, 42,
305 | 43, 44, 45, 46, 47, 51, 52, 53, 54, 55, 56, 57, 60, 61, 62, 75, 76, 77, 78, 79, 80, 81, 84, 85,
306 | 86])
307 |
308 | for action in actions:
309 | running_loss=0
310 | n=0
311 | dataset_test = H36M_Dataset_Angle(args.data_dir,args.input_n,args.output_n,args.skip_rate, split=2,actions=[action])
312 | #print('>>> Test dataset length: {:d}'.format(dataset_test.__len__()))
313 |
314 | test_loader = DataLoader(dataset_test, batch_size=args.batch_size_test, shuffle=False, num_workers=0, pin_memory=True)
315 | for cnt, batch in enumerate(test_loader):
316 | with torch.no_grad():
317 |
318 | batch=batch.to(device)
319 | batch_dim=batch.shape[0]
320 | n+=batch_dim
321 |
322 | all_joints_seq=batch.clone()[:, args.input_n:args.input_n+args.output_n,:]
323 |
324 | sequences_train=batch[:, 0:args.input_n, dim_used].view(-1,args.input_n,len(dim_used))
325 | sequences_gt=batch[:, args.input_n:args.input_n+args.output_n, :]
326 |
327 | sequences_predict=model(sequences_train)
328 | all_joints_seq[:,:,dim_used] = sequences_predict
329 | loss=euler_error(all_joints_seq,sequences_gt)
330 |
331 | running_loss+=loss*batch_dim
332 | accum_loss+=loss*batch_dim
333 |
334 | n_batches+=n
335 | print('overall average loss in euler angle is: '+str(accum_loss/n_batches))
336 |
337 | return accum_loss/n_batches
338 |
339 |
340 | if __name__ == '__main__':
341 | parser = argparse.ArgumentParser(add_help=False) # Parameters for mpjpe
342 | parser.add_argument('--data_dir', type=str, default='../data_h36m/', help='path to the unziped dataset directories(H36m/AMASS/3DPW)')
343 | parser.add_argument('--input_n', type=int, default=10, help="number of model's input frames")
344 | parser.add_argument('--output_n', type=int, default=25, help="number of model's output frames")
345 | parser.add_argument('--skip_rate', type=int, default=5, choices=[1, 5], help='rate of frames to skip,defaults=1 for H36M or 5 for AMASS/3DPW')
346 | parser.add_argument('--num_worker', default=4, type=int, help='number of workers in the dataloader')
347 | parser.add_argument('--root', default='./runs', type=str, help='root path for the logging') #'./runs'
348 |
349 | parser.add_argument('--activation', default='mish', type=str, required=False)
350 | parser.add_argument('--r_se', default=8, type=int, required=False)
351 |
352 | parser.add_argument('--n_epochs', default=50, type=int, required=False)
353 | parser.add_argument('--batch_size', default=50, type=int, required=False)
354 | parser.add_argument('--loader_shuffle', default=True, type=bool, required=False)
355 | parser.add_argument('--pin_memory', default=False, type=bool, required=False)
356 | parser.add_argument('--loader_workers', default=4, type=int, required=False)
357 | parser.add_argument('--load_checkpoint', default=False, type=bool, required=False)
358 | parser.add_argument('--dev', default='cuda:0', type=str, required=False)
359 | parser.add_argument('--initialization', type=str, default='none', help='none, glorot_normal, glorot_uniform, hee_normal, hee_uniform')
360 | parser.add_argument('--use_scheduler', default=True, type=bool, required=False)
361 | parser.add_argument('--milestones', type=list, default=[15, 25, 35, 40], help='the epochs after which the learning rate is adjusted by gamma')
362 | parser.add_argument('--gamma', type=float, default=0.1, help='gamma correction to the learning rate, after reaching the milestone epochs')
363 | parser.add_argument('--clip_grad', type=float, default=None, help='select max norm to clip gradients')
364 | parser.add_argument('--model_path', type=str, default='./checkpoints/h36m', help='directory with the models checkpoints ')
365 | parser.add_argument('--actions_to_consider', default='all', help='Actions to visualize.Choose either all or a list of actions')
366 | parser.add_argument('--batch_size_test', type=int, default=256, help='batch size for the test set')
367 | parser.add_argument('--visualize_from', type=str, default='test', choices=['train', 'val', 'test'], help='choose data split to visualize from(train-val-test)')
368 | parser.add_argument('--loss_type', type=str, default='mpjpe', choices=['mpjpe', 'angle'])
369 |
370 | args = parser.parse_args()
371 |
372 | if args.loss_type == 'mpjpe':
373 | parser_mpjpe = argparse.ArgumentParser(parents=[parser]) # Parameters for mpjpe
374 | parser_mpjpe.add_argument('--hidden_dim', default=50, type=int, required=False)
375 | parser_mpjpe.add_argument('--num_blocks', default=4, type=int, required=False)
376 | parser_mpjpe.add_argument('--tokens_mlp_dim', default=20, type=int, required=False)
377 | parser_mpjpe.add_argument('--channels_mlp_dim', default=50, type=int, required=False)
378 | parser_mpjpe.add_argument('--regularization', default=0.1, type=float, required=False)
379 | parser_mpjpe.add_argument('--pose_dim', default=66, type=int, required=False)
380 | parser_mpjpe.add_argument('--delta_x', type=bool, default=True, help='predicting the difference between 2 frames')
381 | parser_mpjpe.add_argument('--lr', default=0.001, type=float, required=False)
382 | args = parser_mpjpe.parse_args()
383 |
384 | elif args.loss_type == 'angle':
385 | parser_angle = argparse.ArgumentParser(parents=[parser]) # Parameters for angle
386 | parser_angle.add_argument('--hidden_dim', default=60, type=int, required=False)
387 | parser_angle.add_argument('--num_blocks', default=3, type=int, required=False)
388 | parser_angle.add_argument('--tokens_mlp_dim', default=40, type=int, required=False)
389 | parser_angle.add_argument('--channels_mlp_dim', default=60, type=int, required=False)
390 | parser_angle.add_argument('--regularization', default=0.0, type=float, required=False)
391 | parser_angle.add_argument('--pose_dim', default=48, type=int, required=False)
392 | parser_angle.add_argument('--lr', default=1e-02, type=float, required=False)
393 | args = parser_angle.parse_args()
394 |
395 | if args.loss_type == 'angle' and args.delta_x:
396 | raise ValueError('Delta_x and loss type angle cant be used together.')
397 |
398 | print(args)
399 |
400 | model = MlpMixer(num_classes=args.pose_dim, num_blocks=args.num_blocks,
401 | hidden_dim=args.hidden_dim, tokens_mlp_dim=args.tokens_mlp_dim,
402 | channels_mlp_dim=args.channels_mlp_dim, seq_len=args.input_n,
403 | pred_len=args.output_n, activation=args.activation,
404 | mlp_block_type='normal', regularization=args.regularization,
405 | input_size=args.pose_dim, initialization='none', r_se=args.r_se,
406 | use_max_pooling=False, use_se=True)
407 |
408 | model = model.to(args.dev)
409 |
410 | print('total number of parameters of the network is: ' +
411 | str(sum(p.numel() for p in model.parameters() if p.requires_grad)))
412 |
413 | model_name = 'h36_3d_'+str(args.output_n)+'frames_ckpt'
414 |
415 | train(model, model_name, args)
416 | test_mpjpe(model, args)
417 |
--------------------------------------------------------------------------------
/h36m/utils/data_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | import numpy as np
4 | from six.moves import xrange # pylint: disable=redefined-builtin
5 | import torch
6 | # from torch.autograd.variable import Variable
7 | import os
8 | import utils.forward_kinematics as forward_kinematics
9 |
10 |
11 | def rotmat2euler(R):
12 | """
13 | Converts a rotation matrix to Euler angles
14 | Matlab port to python for evaluation purposes
15 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1
16 |
17 | Args
18 | R: a 3x3 rotation matrix
19 | Returns
20 | eul: a 3x1 Euler angle representation of R
21 | """
22 | if R[0, 2] == 1 or R[0, 2] == -1:
23 | # special case
24 | E3 = 0 # set arbitrarily
25 | dlta = np.arctan2(R[0, 1], R[0, 2]);
26 |
27 | if R[0, 2] == -1:
28 | E2 = np.pi / 2;
29 | E1 = E3 + dlta;
30 | else:
31 | E2 = -np.pi / 2;
32 | E1 = -E3 + dlta;
33 |
34 | else:
35 | E2 = -np.arcsin(R[0, 2])
36 | E1 = np.arctan2(R[1, 2] / np.cos(E2), R[2, 2] / np.cos(E2))
37 | E3 = np.arctan2(R[0, 1] / np.cos(E2), R[0, 0] / np.cos(E2))
38 |
39 | eul = np.array([E1, E2, E3]);
40 | return eul
41 |
42 |
43 | def rotmat2quat(R):
44 | """
45 | Converts a rotation matrix to a quaternion
46 | Matlab port to python for evaluation purposes
47 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4
48 |
49 | Args
50 | R: 3x3 rotation matrix
51 | Returns
52 | q: 1x4 quaternion
53 | """
54 | rotdiff = R - R.T;
55 |
56 | r = np.zeros(3)
57 | r[0] = -rotdiff[1, 2]
58 | r[1] = rotdiff[0, 2]
59 | r[2] = -rotdiff[0, 1]
60 | sintheta = np.linalg.norm(r) / 2;
61 | r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps);
62 |
63 | costheta = (np.trace(R) - 1) / 2;
64 |
65 | theta = np.arctan2(sintheta, costheta);
66 |
67 | q = np.zeros(4)
68 | q[0] = np.cos(theta / 2)
69 | q[1:] = r0 * np.sin(theta / 2)
70 | return q
71 |
72 |
73 | def rotmat2expmap(R):
74 | return quat2expmap(rotmat2quat(R));
75 |
76 |
77 | def expmap2rotmat(r):
78 | """
79 | Converts an exponential map angle to a rotation matrix
80 | Matlab port to python for evaluation purposes
81 | I believe this is also called Rodrigues' formula
82 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m
83 |
84 | Args
85 | r: 1x3 exponential map
86 | Returns
87 | R: 3x3 rotation matrix
88 | """
89 | theta = np.linalg.norm(r)
90 | r0 = np.divide(r, theta + np.finfo(np.float32).eps)
91 | r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3, 3)
92 | r0x = r0x - r0x.T
93 | R = np.eye(3, 3) + np.sin(theta) * r0x + (1 - np.cos(theta)) * (r0x).dot(r0x);
94 | return R
95 |
96 |
97 | def quat2expmap(q):
98 | """
99 | Converts a quaternion to an exponential map
100 | Matlab port to python for evaluation purposes
101 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1
102 |
103 | Args
104 | q: 1x4 quaternion
105 | Returns
106 | r: 1x3 exponential map
107 | Raises
108 | ValueError if the l2 norm of the quaternion is not close to 1
109 | """
110 | if (np.abs(np.linalg.norm(q) - 1) > 1e-3):
111 | raise (ValueError, "quat2expmap: input quaternion is not norm 1")
112 |
113 | sinhalftheta = np.linalg.norm(q[1:])
114 | coshalftheta = q[0]
115 |
116 | r0 = np.divide(q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps));
117 | theta = 2 * np.arctan2(sinhalftheta, coshalftheta)
118 | theta = np.mod(theta + 2 * np.pi, 2 * np.pi)
119 |
120 | if theta > np.pi:
121 | theta = 2 * np.pi - theta
122 | r0 = -r0
123 |
124 | r = r0 * theta
125 | return r
126 |
127 |
128 | def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot):
129 | """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix.
130 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12
131 |
132 | Args
133 | normalizedData: nxd matrix with normalized data
134 | data_mean: vector of mean used to normalize the data
135 | data_std: vector of standard deviation used to normalize the data
136 | dimensions_to_ignore: vector with dimensions not used by the model
137 | actions: list of strings with the encoded actions
138 | one_hot: whether the data comes with one-hot encoding
139 | Returns
140 | origData: data originally used to
141 | """
142 | T = normalizedData.shape[0]
143 | D = data_mean.shape[0]
144 |
145 | origData = np.zeros((T, D), dtype=np.float32)
146 | dimensions_to_use = []
147 | for i in range(D):
148 | if i in dimensions_to_ignore:
149 | continue
150 | dimensions_to_use.append(i)
151 | dimensions_to_use = np.array(dimensions_to_use)
152 |
153 | if one_hot:
154 | origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)]
155 | else:
156 | origData[:, dimensions_to_use] = normalizedData
157 |
158 | # potentially ineficient, but only done once per experiment
159 | stdMat = data_std.reshape((1, D))
160 | stdMat = np.repeat(stdMat, T, axis=0)
161 | meanMat = data_mean.reshape((1, D))
162 | meanMat = np.repeat(meanMat, T, axis=0)
163 | origData = np.multiply(origData, stdMat) + meanMat
164 | return origData
165 |
166 |
167 | def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot):
168 | """
169 | Converts the output of the neural network to a format that is more easy to
170 | manipulate for, e.g. conversion to other format or visualization
171 |
172 | Args
173 | poses: The output from the TF model. A list with (seq_length) entries,
174 | each with a (batch_size, dim) output
175 | Returns
176 | poses_out: A tensor of size (batch_size, seq_length, dim) output. Each
177 | batch is an n-by-d sequence of poses.
178 | """
179 | seq_len = len(poses)
180 | if seq_len == 0:
181 | return []
182 |
183 | batch_size, dim = poses[0].shape
184 |
185 | poses_out = np.concatenate(poses)
186 | poses_out = np.reshape(poses_out, (seq_len, batch_size, dim))
187 | poses_out = np.transpose(poses_out, [1, 0, 2])
188 |
189 | poses_out_list = []
190 | for i in xrange(poses_out.shape[0]):
191 | poses_out_list.append(
192 | unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot))
193 |
194 | return poses_out_list
195 |
196 |
197 | def readCSVasFloat(filename):
198 | """
199 | Borrowed from SRNN code. Reads a csv and returns a float matrix.
200 | https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34
201 |
202 | Args
203 | filename: string. Path to the csv file
204 | Returns
205 | returnArray: the read data in a float32 matrix
206 | """
207 | returnArray = []
208 | lines = open(filename).readlines()
209 | for line in lines:
210 | line = line.strip().split(',')
211 | if len(line) > 0:
212 | returnArray.append(np.array([np.float32(x) for x in line]))
213 |
214 | returnArray = np.array(returnArray)
215 | return returnArray
216 |
217 |
218 | def normalize_data(data, data_mean, data_std, dim_to_use, actions, one_hot):
219 | """
220 | Normalize input data by removing unused dimensions, subtracting the mean and
221 | dividing by the standard deviation
222 |
223 | Args
224 | data: nx99 matrix with data to normalize
225 | data_mean: vector of mean used to normalize the data
226 | data_std: vector of standard deviation used to normalize the data
227 | dim_to_use: vector with dimensions used by the model
228 | actions: list of strings with the encoded actions
229 | one_hot: whether the data comes with one-hot encoding
230 | Returns
231 | data_out: the passed data matrix, but normalized
232 | """
233 | data_out = {}
234 | nactions = len(actions)
235 |
236 | if not one_hot:
237 | # No one-hot encoding... no need to do anything special
238 | for key in data.keys():
239 | data_out[key] = np.divide((data[key] - data_mean), data_std)
240 | data_out[key] = data_out[key][:, dim_to_use]
241 |
242 | else:
243 | # TODO hard-coding 99 dimensions for un-normalized human poses
244 | for key in data.keys():
245 | data_out[key] = np.divide((data[key][:, 0:99] - data_mean), data_std)
246 | data_out[key] = data_out[key][:, dim_to_use]
247 | data_out[key] = np.hstack((data_out[key], data[key][:, -nactions:]))
248 |
249 | return data_out
250 |
251 |
252 | def normalization_stats(completeData):
253 | """"
254 | Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore.
255 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33
256 |
257 | Args
258 | completeData: nx99 matrix with data to normalize
259 | Returns
260 | data_mean: vector of mean used to normalize the data
261 | data_std: vector of standard deviation used to normalize the data
262 | dimensions_to_ignore: vector with dimensions not used by the model
263 | dimensions_to_use: vector with dimensions used by the model
264 | """
265 | data_mean = np.mean(completeData, axis=0)
266 | data_std = np.std(completeData, axis=0)
267 |
268 | dimensions_to_ignore = []
269 | dimensions_to_use = []
270 |
271 | dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0]))
272 | dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0]))
273 |
274 | data_std[dimensions_to_ignore] = 1.0
275 |
276 | return data_mean, data_std, dimensions_to_ignore, dimensions_to_use
277 |
278 |
279 | def define_actions(action):
280 | """
281 | Define the list of actions we are using.
282 |
283 | Args
284 | action: String with the passed action. Could be "all"
285 | Returns
286 | actions: List of strings of actions
287 | Raises
288 | ValueError if the action is not included in H3.6M
289 | """
290 |
291 | actions = ["walking", "eating", "smoking", "discussion", "directions",
292 | "greeting", "phoning", "posing", "purchases", "sitting",
293 | "sittingdown", "takingphoto", "waiting", "walkingdog",
294 | "walkingtogether"]
295 | if action in actions:
296 | return [action]
297 |
298 | if action == "all":
299 | return actions
300 |
301 | if action == "all_srnn":
302 | return ["walking", "eating", "smoking", "discussion"]
303 |
304 | raise (ValueError, "Unrecognized action: %d" % action)
305 |
306 |
307 | """all methods above are borrowed from https://github.com/una-dinosauria/human-motion-prediction"""
308 |
309 |
310 | def define_actions_cmu(action):
311 | """
312 | Define the list of actions we are using.
313 |
314 | Args
315 | action: String with the passed action. Could be "all"
316 | Returns
317 | actions: List of strings of actions
318 | Raises
319 | ValueError if the action is not included in H3.6M
320 | """
321 |
322 | actions = ["basketball", "basketball_signal", "directing_traffic", "jumping", "running", "soccer", "walking",
323 | "washwindow"]
324 | if action in actions:
325 | return [action]
326 |
327 | if action == "all":
328 | return actions
329 |
330 | raise (ValueError, "Unrecognized action: %d" % action)
331 |
332 |
333 | def load_data_cmu(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False):
334 | seq_len = input_n + output_n
335 | nactions = len(actions)
336 | sampled_seq = []
337 | complete_seq = []
338 | for action_idx in np.arange(nactions):
339 | action = actions[action_idx]
340 | path = '{}/{}'.format(path_to_dataset, action)
341 | count = 0
342 | for _ in os.listdir(path):
343 | count = count + 1
344 | for examp_index in np.arange(count):
345 | filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1)
346 | action_sequence = readCSVasFloat(filename)
347 | n, d = action_sequence.shape
348 | even_list = range(0, n, 2)
349 | the_sequence = np.array(action_sequence[even_list, :])
350 | num_frames = len(the_sequence)
351 | if not is_test:
352 | fs = np.arange(0, num_frames - seq_len + 1)
353 | fs_sel = fs
354 | for i in np.arange(seq_len - 1):
355 | fs_sel = np.vstack((fs_sel, fs + i + 1))
356 | fs_sel = fs_sel.transpose()
357 | seq_sel = the_sequence[fs_sel, :]
358 | if len(sampled_seq) == 0:
359 | sampled_seq = seq_sel
360 | complete_seq = the_sequence
361 | else:
362 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0)
363 | complete_seq = np.append(complete_seq, the_sequence, axis=0)
364 | else:
365 | source_seq_len = 50
366 | target_seq_len = 25
367 | total_frames = source_seq_len + target_seq_len
368 | batch_size = 8
369 | SEED = 1234567890
370 | rng = np.random.RandomState(SEED)
371 | for _ in range(batch_size):
372 | idx = rng.randint(0, num_frames - total_frames)
373 | seq_sel = the_sequence[
374 | idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :]
375 | seq_sel = np.expand_dims(seq_sel, axis=0)
376 | if len(sampled_seq) == 0:
377 | sampled_seq = seq_sel
378 | complete_seq = the_sequence
379 | else:
380 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0)
381 | complete_seq = np.append(complete_seq, the_sequence, axis=0)
382 |
383 | if not is_test:
384 | data_std = np.std(complete_seq, axis=0)
385 | data_mean = np.mean(complete_seq, axis=0)
386 |
387 | dimensions_to_ignore = []
388 | dimensions_to_use = []
389 | dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0]))
390 | dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0]))
391 | data_std[dimensions_to_ignore] = 1.0
392 | data_mean[dimensions_to_ignore] = 0.0
393 |
394 | return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std
395 |
396 |
397 | def load_data_cmu_3d(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False):
398 | seq_len = input_n + output_n
399 | nactions = len(actions)
400 | sampled_seq = []
401 | complete_seq = []
402 | for action_idx in np.arange(nactions):
403 | action = actions[action_idx]
404 | path = '{}/{}'.format(path_to_dataset, action)
405 | count = 0
406 | for _ in os.listdir(path):
407 | count = count + 1
408 | for examp_index in np.arange(count):
409 | filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1)
410 | action_sequence = readCSVasFloat(filename)
411 | n, d = action_sequence.shape
412 | exptmps = torch.from_numpy(action_sequence).float().cuda()
413 | xyz = expmap2xyz_torch_cmu(exptmps)
414 | xyz = xyz.view(-1, 38 * 3)
415 | xyz = xyz.cpu().data.numpy()
416 | action_sequence = xyz
417 |
418 | even_list = range(0, n, 2)
419 | the_sequence = np.array(action_sequence[even_list, :])
420 | num_frames = len(the_sequence)
421 | if not is_test:
422 | fs = np.arange(0, num_frames - seq_len + 1)
423 | fs_sel = fs
424 | for i in np.arange(seq_len - 1):
425 | fs_sel = np.vstack((fs_sel, fs + i + 1))
426 | fs_sel = fs_sel.transpose()
427 | seq_sel = the_sequence[fs_sel, :]
428 | if len(sampled_seq) == 0:
429 | sampled_seq = seq_sel
430 | complete_seq = the_sequence
431 | else:
432 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0)
433 | complete_seq = np.append(complete_seq, the_sequence, axis=0)
434 | else:
435 | source_seq_len = 50
436 | target_seq_len = 25
437 | total_frames = source_seq_len + target_seq_len
438 | batch_size = 8
439 | SEED = 1234567890
440 | rng = np.random.RandomState(SEED)
441 | for _ in range(batch_size):
442 | idx = rng.randint(0, num_frames - total_frames)
443 | seq_sel = the_sequence[
444 | idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :]
445 | seq_sel = np.expand_dims(seq_sel, axis=0)
446 | if len(sampled_seq) == 0:
447 | sampled_seq = seq_sel
448 | complete_seq = the_sequence
449 | else:
450 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0)
451 | complete_seq = np.append(complete_seq, the_sequence, axis=0)
452 |
453 | if not is_test:
454 | data_std = np.std(complete_seq, axis=0)
455 | data_mean = np.mean(complete_seq, axis=0)
456 |
457 | joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36])
458 | dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2))
459 | dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore)
460 |
461 | data_std[dimensions_to_ignore] = 1.0
462 | data_mean[dimensions_to_ignore] = 0.0
463 |
464 | return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std
465 |
466 |
467 | def rotmat2euler_torch(R):
468 | """
469 | Converts a rotation matrix to euler angles
470 | batch pytorch version ported from the corresponding numpy method above
471 |
472 | :param R:N*3*3
473 | :return: N*3
474 | """
475 | n = R.data.shape[0]
476 | eul = torch.zeros(n, 3).float().cuda()
477 | idx_spec1 = (R[:, 0, 2] == 1).nonzero().cpu().data.numpy().reshape(-1).tolist()
478 | idx_spec2 = (R[:, 0, 2] == -1).nonzero().cpu().data.numpy().reshape(-1).tolist()
479 | if len(idx_spec1) > 0:
480 | R_spec1 = R[idx_spec1, :, :]
481 | eul_spec1 = torch.zeros(len(idx_spec1), 3).float().cuda()
482 | eul_spec1[:, 2] = 0
483 | eul_spec1[:, 1] = -np.pi / 2
484 | delta = torch.atan2(R_spec1[:, 0, 1], R_spec1[:, 0, 2])
485 | eul_spec1[:, 0] = delta
486 | eul[idx_spec1, :] = eul_spec1
487 |
488 | if len(idx_spec2) > 0:
489 | R_spec2 = R[idx_spec2, :, :]
490 | eul_spec2 = torch.zeros(len(idx_spec2), 3).float().cuda()
491 | eul_spec2[:, 2] = 0
492 | eul_spec2[:, 1] = np.pi / 2
493 | delta = torch.atan2(R_spec2[:, 0, 1], R_spec2[:, 0, 2])
494 | eul_spec2[:, 0] = delta
495 | eul[idx_spec2] = eul_spec2
496 |
497 | idx_remain = np.arange(0, n)
498 | idx_remain = np.setdiff1d(np.setdiff1d(idx_remain, idx_spec1), idx_spec2).tolist()
499 | if len(idx_remain) > 0:
500 | R_remain = R[idx_remain, :, :]
501 | eul_remain = torch.zeros(len(idx_remain), 3).float().cuda()
502 | eul_remain[:, 1] = -torch.asin(R_remain[:, 0, 2])
503 | eul_remain[:, 0] = torch.atan2(R_remain[:, 1, 2] / torch.cos(eul_remain[:, 1]),
504 | R_remain[:, 2, 2] / torch.cos(eul_remain[:, 1]))
505 | eul_remain[:, 2] = torch.atan2(R_remain[:, 0, 1] / torch.cos(eul_remain[:, 1]),
506 | R_remain[:, 0, 0] / torch.cos(eul_remain[:, 1]))
507 | eul[idx_remain, :] = eul_remain
508 |
509 | return eul
510 |
511 |
512 | def rotmat2quat_torch(R):
513 | """
514 | Converts a rotation matrix to quaternion
515 | batch pytorch version ported from the corresponding numpy method above
516 | :param R: N * 3 * 3
517 | :return: N * 4
518 | """
519 | rotdiff = R - R.transpose(1, 2)
520 | r = torch.zeros_like(rotdiff[:, 0])
521 | r[:, 0] = -rotdiff[:, 1, 2]
522 | r[:, 1] = rotdiff[:, 0, 2]
523 | r[:, 2] = -rotdiff[:, 0, 1]
524 | r_norm = torch.norm(r, dim=1)
525 | sintheta = r_norm / 2
526 | r0 = torch.div(r, r_norm.unsqueeze(1).repeat(1, 3) + 0.00000001)
527 | t1 = R[:, 0, 0]
528 | t2 = R[:, 1, 1]
529 | t3 = R[:, 2, 2]
530 | costheta = (t1 + t2 + t3 - 1) / 2
531 | theta = torch.atan2(sintheta, costheta)
532 | q = torch.zeros(R.shape[0], 4).float().cuda()
533 | q[:, 0] = torch.cos(theta / 2)
534 | q[:, 1:] = torch.mul(r0, torch.sin(theta / 2).unsqueeze(1).repeat(1, 3))
535 |
536 | return q
537 |
538 |
539 | def expmap2quat_torch(exp):
540 | """
541 | Converts expmap to quaternion
542 | batch pytorch version ported from the corresponding numpy method above
543 | :param R: N*3
544 | :return: N*4
545 | """
546 | theta = torch.norm(exp, p=2, dim=1).unsqueeze(1)
547 | v = torch.div(exp, theta.repeat(1, 3) + 0.0000001)
548 | sinhalf = torch.sin(theta / 2)
549 | coshalf = torch.cos(theta / 2)
550 | q1 = torch.mul(v, sinhalf.repeat(1, 3))
551 | q = torch.cat((coshalf, q1), dim=1)
552 | return q
553 |
554 |
555 | def expmap2rotmat_torch(r):
556 | """
557 | Converts expmap matrix to rotation
558 | batch pytorch version ported from the corresponding method above
559 | :param r: N*3
560 | :return: N*3*3
561 | """
562 | theta = torch.norm(r, 2, 1)
563 | r0 = torch.div(r, theta.unsqueeze(1).repeat(1, 3) + 0.0000001)
564 | r1 = torch.zeros_like(r0).repeat(1, 3)
565 | r1[:, 1] = -r0[:, 2]
566 | r1[:, 2] = r0[:, 1]
567 | r1[:, 5] = -r0[:, 0]
568 | r1 = r1.view(-1, 3, 3)
569 | r1 = r1 - r1.transpose(1, 2)
570 | n = r1.data.shape[0]
571 | R = torch.eye(3, 3).repeat(n, 1, 1).float().cuda() + torch.mul(
572 | torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul(
573 | (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1))
574 | return R
575 |
576 |
577 | def expmap2xyz_torch(expmap):
578 | """
579 | convert expmaps to joint locations
580 | :param expmap: N*99
581 | :return: N*32*3
582 | """
583 | parent, offset, rotInd, expmapInd = forward_kinematics._some_variables()
584 | xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd)
585 | return xyz
586 |
587 |
588 | def get_dct_matrix(N):
589 | dct_m = np.eye(N)
590 | for k in np.arange(N):
591 | for i in np.arange(N):
592 | w = np.sqrt(2 / N)
593 | if k == 0:
594 | w = np.sqrt(1 / N)
595 | dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N)
596 | idct_m = np.linalg.inv(dct_m)
597 | return dct_m, idct_m
598 |
599 |
600 | def find_indices_256(frame_num1, frame_num2, seq_len, input_n=10):
601 | """
602 | Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478
603 |
604 | which originaly from
605 | In order to find the same action indices as in SRNN.
606 | https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325
607 | """
608 |
609 | # Used a fixed dummy seed, following
610 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29
611 | SEED = 1234567890
612 | rng = np.random.RandomState(SEED)
613 |
614 | T1 = frame_num1 - 150
615 | T2 = frame_num2 - 150 # seq_len
616 | idxo1 = None
617 | idxo2 = None
618 | for _ in np.arange(0, 128):
619 | idx_ran1 = rng.randint(16, T1)
620 | idx_ran2 = rng.randint(16, T2)
621 | idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len)
622 | idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len)
623 | if idxo1 is None:
624 | idxo1 = idxs1
625 | idxo2 = idxs2
626 | else:
627 | idxo1 = np.vstack((idxo1, idxs1))
628 | idxo2 = np.vstack((idxo2, idxs2))
629 | return idxo1, idxo2
630 |
631 |
632 | def find_indices_srnn(frame_num1, frame_num2, seq_len, input_n=10):
633 | """
634 | Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478
635 |
636 | which originaly from
637 | In order to find the same action indices as in SRNN.
638 | https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325
639 | """
640 |
641 | # Used a fixed dummy seed, following
642 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29
643 | SEED = 1234567890
644 | rng = np.random.RandomState(SEED)
645 |
646 | T1 = frame_num1 - 150
647 | T2 = frame_num2 - 150 # seq_len
648 | idxo1 = None
649 | idxo2 = None
650 | for _ in np.arange(0, 4):
651 | idx_ran1 = rng.randint(16, T1)
652 | idx_ran2 = rng.randint(16, T2)
653 | # print("subact1 {}".format(idx_ran1))
654 | # print("subact2 {}".format(idx_ran2))
655 | idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len)
656 | idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len)
657 | if idxo1 is None:
658 | idxo1 = idxs1
659 | idxo2 = idxs2
660 | else:
661 | idxo1 = np.vstack((idxo1, idxs1))
662 | idxo2 = np.vstack((idxo2, idxs2))
663 | return idxo1, idxo2
664 |
--------------------------------------------------------------------------------
/h36m/utils/forward_kinematics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch.autograd.variable import Variable
4 | import utils.data_utils as data_utils
5 |
6 |
7 | def fkl(angles, parent, offset, rotInd, expmapInd):
8 | """
9 | Convert joint angles and bone lenghts into the 3d points of a person.
10 |
11 | adapted from
12 | https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L14
13 |
14 | which originaly based on expmap2xyz.m, available at
15 | https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m
16 | Args
17 | angles: 99-long vector with 3d position and 3d joint angles in expmap format
18 | parent: 32-long vector with parent-child relationships in the kinematic tree
19 | offset: 96-long vector with bone lenghts
20 | rotInd: 32-long list with indices into angles
21 | expmapInd: 32-long list with indices into expmap angles
22 | Returns
23 | xyz: 32x3 3d points that represent a person in 3d space
24 | """
25 |
26 | assert len(angles) == 99
27 |
28 | # Structure that indicates parents for each joint
29 | njoints = 32
30 | xyzStruct = [dict() for x in range(njoints)]
31 |
32 | for i in np.arange(njoints):
33 |
34 | # if not rotInd[i]: # If the list is empty
35 | # xangle, yangle, zangle = 0, 0, 0
36 | # else:
37 | # xangle = angles[rotInd[i][0] - 1]
38 | # yangle = angles[rotInd[i][1] - 1]
39 | # zangle = angles[rotInd[i][2] - 1]
40 | if i == 0:
41 | xangle = angles[0]
42 | yangle = angles[1]
43 | zangle = angles[2]
44 | thisPosition = np.array([xangle, yangle, zangle])
45 | else:
46 | thisPosition = np.array([0, 0, 0])
47 |
48 | r = angles[expmapInd[i]]
49 |
50 | thisRotation = data_utils.expmap2rotmat(r)
51 |
52 | if parent[i] == -1: # Root node
53 | xyzStruct[i]['rotation'] = thisRotation
54 | xyzStruct[i]['xyz'] = np.reshape(offset[i, :], (1, 3)) + thisPosition
55 | else:
56 | xyzStruct[i]['xyz'] = (offset[i, :] + thisPosition).dot(xyzStruct[parent[i]]['rotation']) + \
57 | xyzStruct[parent[i]]['xyz']
58 | xyzStruct[i]['rotation'] = thisRotation.dot(xyzStruct[parent[i]]['rotation'])
59 |
60 | xyz = [xyzStruct[i]['xyz'] for i in range(njoints)]
61 | xyz = np.array(xyz).squeeze()
62 | # xyz = xyz[:, [0, 2, 1]]
63 | # xyz = xyz[:,[2,0,1]]
64 |
65 | return xyz
66 |
67 |
68 | def _some_variables():
69 | """
70 | borrowed from
71 | https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100
72 |
73 | We define some variables that are useful to run the kinematic tree
74 |
75 | Args
76 | None
77 | Returns
78 | parent: 32-long vector with parent-child relationships in the kinematic tree
79 | offset: 96-long vector with bone lenghts
80 | rotInd: 32-long list with indices into angles
81 | expmapInd: 32-long list with indices into expmap angles
82 | """
83 |
84 | parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13,
85 | 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1
86 |
87 | offset = np.array(
88 | [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000,
89 | -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000,
90 | 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426,
91 | 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000,
92 | 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681,
93 | 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000,
94 | 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000,
95 | 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924,
96 | 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888,
97 | 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000])
98 | offset = offset.reshape(-1, 3)
99 |
100 | rotInd = [[5, 6, 4],
101 | [8, 9, 7],
102 | [11, 12, 10],
103 | [14, 15, 13],
104 | [17, 18, 16],
105 | [],
106 | [20, 21, 19],
107 | [23, 24, 22],
108 | [26, 27, 25],
109 | [29, 30, 28],
110 | [],
111 | [32, 33, 31],
112 | [35, 36, 34],
113 | [38, 39, 37],
114 | [41, 42, 40],
115 | [],
116 | [44, 45, 43],
117 | [47, 48, 46],
118 | [50, 51, 49],
119 | [53, 54, 52],
120 | [56, 57, 55],
121 | [],
122 | [59, 60, 58],
123 | [],
124 | [62, 63, 61],
125 | [65, 66, 64],
126 | [68, 69, 67],
127 | [71, 72, 70],
128 | [74, 75, 73],
129 | [],
130 | [77, 78, 76],
131 | []]
132 |
133 | expmapInd = np.split(np.arange(4, 100) - 1, 32)
134 |
135 | return parent, offset, rotInd, expmapInd
136 |
137 |
138 | def _some_variables_cmu():
139 | """
140 | We define some variables that are useful to run the kinematic tree
141 |
142 | Args
143 | None
144 | Returns
145 | parent: 32-long vector with parent-child relationships in the kinematic tree
146 | offset: 96-long vector with bone lenghts
147 | rotInd: 32-long list with indices into angles
148 | expmapInd: 32-long list with indices into expmap angles
149 | """
150 |
151 | parent = np.array([0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 1, 14, 15, 16, 17, 18, 19, 16,
152 | 21, 22, 23, 24, 25, 26, 24, 28, 16, 30, 31, 32, 33, 34, 35, 33, 37]) - 1
153 |
154 | offset = 70 * np.array(
155 | [0, 0, 0, 0, 0, 0, 1.65674000000000, -1.80282000000000, 0.624770000000000, 2.59720000000000, -7.13576000000000,
156 | 0, 2.49236000000000, -6.84770000000000, 0, 0.197040000000000, -0.541360000000000, 2.14581000000000, 0, 0,
157 | 1.11249000000000, 0, 0, 0, -1.61070000000000, -1.80282000000000, 0.624760000000000, -2.59502000000000,
158 | -7.12977000000000, 0, -2.46780000000000, -6.78024000000000, 0, -0.230240000000000, -0.632580000000000,
159 | 2.13368000000000, 0, 0, 1.11569000000000, 0, 0, 0, 0.0196100000000000, 2.05450000000000, -0.141120000000000,
160 | 0.0102100000000000, 2.06436000000000, -0.0592100000000000, 0, 0, 0, 0.00713000000000000, 1.56711000000000,
161 | 0.149680000000000, 0.0342900000000000, 1.56041000000000, -0.100060000000000, 0.0130500000000000,
162 | 1.62560000000000, -0.0526500000000000, 0, 0, 0, 3.54205000000000, 0.904360000000000, -0.173640000000000,
163 | 4.86513000000000, 0, 0, 3.35554000000000, 0, 0, 0, 0, 0, 0.661170000000000, 0, 0, 0.533060000000000, 0, 0, 0,
164 | 0, 0, 0.541200000000000, 0, 0.541200000000000, 0, 0, 0, -3.49802000000000, 0.759940000000000,
165 | -0.326160000000000, -5.02649000000000, 0, 0, -3.36431000000000, 0, 0, 0, 0, 0, -0.730410000000000, 0, 0,
166 | -0.588870000000000, 0, 0, 0, 0, 0, -0.597860000000000, 0, 0.597860000000000])
167 | offset = offset.reshape(-1, 3)
168 |
169 | rotInd = [[6, 5, 4],
170 | [9, 8, 7],
171 | [12, 11, 10],
172 | [15, 14, 13],
173 | [18, 17, 16],
174 | [21, 20, 19],
175 | [],
176 | [24, 23, 22],
177 | [27, 26, 25],
178 | [30, 29, 28],
179 | [33, 32, 31],
180 | [36, 35, 34],
181 | [],
182 | [39, 38, 37],
183 | [42, 41, 40],
184 | [45, 44, 43],
185 | [48, 47, 46],
186 | [51, 50, 49],
187 | [54, 53, 52],
188 | [],
189 | [57, 56, 55],
190 | [60, 59, 58],
191 | [63, 62, 61],
192 | [66, 65, 64],
193 | [69, 68, 67],
194 | [72, 71, 70],
195 | [],
196 | [75, 74, 73],
197 | [],
198 | [78, 77, 76],
199 | [81, 80, 79],
200 | [84, 83, 82],
201 | [87, 86, 85],
202 | [90, 89, 88],
203 | [93, 92, 91],
204 | [],
205 | [96, 95, 94],
206 | []]
207 | posInd = []
208 | for ii in np.arange(38):
209 | if ii == 0:
210 | posInd.append([1, 2, 3])
211 | else:
212 | posInd.append([])
213 |
214 | expmapInd = np.split(np.arange(4, 118) - 1, 38)
215 |
216 | return parent, offset, posInd, expmapInd
217 |
218 |
219 | def fkl_torch(angles, parent, offset, rotInd, expmapInd):
220 | """
221 | pytorch version of fkl.
222 |
223 | convert joint angles to joint locations
224 | batch pytorch version of the fkl() method above
225 | :param angles: N*99
226 | :param parent:
227 | :param offset:
228 | :param rotInd:
229 | :param expmapInd:
230 | :return: N*joint_n*3
231 | """
232 | n = angles.data.shape[0]
233 | j_n = offset.shape[0]
234 | p3d = Variable(torch.from_numpy(offset)).float().cuda().unsqueeze(0).repeat(n, 1, 1)
235 | angles = angles[:, 3:].contiguous().view(-1, 3)
236 | R = data_utils.expmap2rotmat_torch(angles).view(n, j_n, 3, 3)
237 | for i in np.arange(1, j_n):
238 | if parent[i] > 0:
239 | R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone()
240 | p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :]
241 | return p3d
242 |
243 |
244 | def main():
245 | # Load all the data
246 | parent, offset, rotInd, expmapInd = _some_variables()
247 |
248 | # numpy implementation
249 | # with h5py.File('samples.h5', 'r') as h5f:
250 | # expmap_gt = h5f['expmap/gt/walking_0'][:]
251 | # expmap_pred = h5f['expmap/preds/walking_0'][:]
252 | expmap_pred = np.array(
253 | [0.0000000, 0.0000000, 0.0000000, -0.0000001, -0.0000000, -0.0000002, 0.3978439, -0.4166636, 0.1027215,
254 | -0.7767256, -0.0000000, -0.0000000, 0.1704115, 0.3078358, -0.1861640, 0.3330379, -0.0000000, -0.0000000,
255 | -0.0000000, -0.0000000, -0.0000000, 0.0679339, 0.2255526, 0.2394881, -0.0989492, -0.0000000, -0.0000000,
256 | 0.0677801, -0.3607298, 0.0503249, 0.1819232, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000,
257 | 0.3236777, -0.0476493, -0.0651256, -0.3150051, -0.0665669, 0.3188994, -0.5980227, -0.1190833, -0.3017127,
258 | 1.2270271, -0.1010960, 0.2072986, -0.0000000, -0.0000000, -0.0000000, -0.2578378, -0.0125206, 2.0266378,
259 | -0.3701521, 0.0199115, 0.5594162, -0.4625384, -0.0000000, -0.0000000, 0.1653314, -0.3952765, -0.1731570,
260 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000,
261 | -0.0000000, -0.0000000, -0.0000000, 2.7825687, -1.4196042, -0.0936858, -1.0348599, -2.7419815, 0.4518218,
262 | -0.3902033, -0.0000000, -0.0000000, 0.0597317, 0.0547002, 0.0445105, -0.0000000, -0.0000000, -0.0000000,
263 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000
264 | ])
265 | expmap_gt = np.array(
266 | [0.2240568, -0.0276901, -0.7433901, 0.0004407, -0.0020624, 0.0002131, 0.3974636, -0.4157083, 0.1030248,
267 | -0.7762963, -0.0000000, -0.0000000, 0.1697988, 0.3087364, -0.1863863, 0.3327336, -0.0000000, -0.0000000,
268 | -0.0000000, -0.0000000, -0.0000000, 0.0689423, 0.2282812, 0.2395958, -0.0998311, -0.0000000, -0.0000000,
269 | 0.0672752, -0.3615943, 0.0505299, 0.1816492, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000,
270 | 0.3223563, -0.0481131, -0.0659720, -0.3145134, -0.0656419, 0.3206626, -0.5979006, -0.1181534, -0.3033383,
271 | 1.2269648, -0.1011873, 0.2057794, -0.0000000, -0.0000000, -0.0000000, -0.2590978, -0.0141497, 2.0271597,
272 | -0.3699318, 0.0128547, 0.5556172, -0.4714990, -0.0000000, -0.0000000, 0.1603251, -0.4157299, -0.1667608,
273 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000,
274 | -0.0000000, -0.0000000, -0.0000000, 2.7811005, -1.4192915, -0.0932141, -1.0294687, -2.7323222, 0.4542309,
275 | -0.4048152, -0.0000000, -0.0000000, 0.0568960, 0.0525994, 0.0493068, -0.0000000, -0.0000000, -0.0000000,
276 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000
277 | ])
278 | xyz1 = fkl(expmap_pred, parent, offset, rotInd, expmapInd)
279 | xyz2 = fkl(expmap_gt, parent, offset, rotInd, expmapInd)
280 |
281 | exp1 = Variable(torch.from_numpy(np.vstack((expmap_pred, expmap_gt))).float()).cuda()
282 | xyz = fkl_torch(exp1, parent, offset, rotInd, expmapInd)
283 | xyz = xyz.cpu().data.numpy()
284 | print(xyz)
285 |
286 |
287 | if __name__ == '__main__':
288 | main()
289 |
--------------------------------------------------------------------------------
/h36m/utils/utils_mixer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | from random import randint
4 | from utils.data_utils import rotmat2euler_torch, expmap2rotmat_torch
5 |
6 | import torch
7 | import torch.nn as nn
8 |
9 |
10 | def criterion_cos(input_f, target_f):
11 | cos = nn.CosineSimilarity(dim=2, eps=1e-6)
12 | return cos(input_f, target_f)
13 |
14 |
15 | def criterion_cos2(input_f, target_f):
16 | cos = nn.CosineSimilarity(dim=1, eps=1e-6)
17 | return cos(input_f, target_f)
18 |
19 |
20 |
21 | def mpjpe_error(batch_pred,batch_gt):
22 |
23 | batch_pred= batch_pred.contiguous().view(-1,3)
24 | batch_gt=batch_gt.contiguous().view(-1,3)
25 |
26 | return torch.mean(torch.norm(batch_gt-batch_pred,2,1))
27 |
28 |
29 | def euler_error(ang_pred, ang_gt):
30 | # only for 32 joints
31 | dim_full_len=ang_gt.shape[2]
32 |
33 | pred_expmap = ang_pred.contiguous().view(-1,dim_full_len).view(-1, 3)
34 | targ_expmap = ang_gt.contiguous().view(-1,dim_full_len).view(-1, 3)
35 |
36 | pred_eul = rotmat2euler_torch(expmap2rotmat_torch(pred_expmap))
37 | pred_eul = pred_eul.view(-1, dim_full_len)
38 |
39 | targ_eul = rotmat2euler_torch(expmap2rotmat_torch(targ_expmap))
40 | targ_eul = targ_eul.view(-1, dim_full_len)
41 | mean_errors = torch.mean(torch.norm(pred_eul - targ_eul, 2, 1))
42 |
43 | return mean_errors
44 |
45 |
46 | def get_dct_matrix(N):
47 | dct_m = np.eye(N)
48 | for k in np.arange(N):
49 | for i in np.arange(N):
50 | w = np.sqrt(2 / N)
51 | if k == 0:
52 | w = np.sqrt(1 / N)
53 | dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N)
54 | idct_m = np.linalg.inv(dct_m)
55 | return dct_m, idct_m
56 |
57 |
58 |
59 | def get_dct_in (input_seq):
60 |
61 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
62 |
63 | dct_used = input_seq.shape[1]
64 | dct_m_in, _ = get_dct_matrix(dct_used)
65 |
66 | dct_m_in = torch.from_numpy(dct_m_in.astype('float32')).to(device)
67 |
68 | input_dct_seq = torch.matmul(dct_m_in[:, 0:dct_used], input_seq)
69 |
70 | return input_dct_seq
71 |
72 |
73 | def get_dct_out (input_seq):
74 |
75 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
76 |
77 | dct_used = input_seq.shape[1]
78 | _, idct_m_in = get_dct_matrix(dct_used)
79 |
80 | idct_m_in = torch.from_numpy(idct_m_in.astype('float32')).to(device)
81 |
82 | input_dct_seq = torch.matmul(idct_m_in[:, 0:dct_used], input_seq)
83 |
84 | return input_dct_seq
85 |
86 |
87 |
88 |
89 | # def delta_2_gt (prediction, last_timestep):
90 | # prediction = prediction.clone()
91 |
92 | # #print (prediction [:,0,:].shape,last_timestep.shape)
93 | # prediction [:,0,:] = prediction [:,0,:] + last_timestep
94 | # prediction [:,1,:] = prediction [:,1,:] + prediction [:,0,:]
95 |
96 | # prediction [:,2,:] = prediction [:,2,:] + prediction [:,1,:]
97 | # prediction [:,3,:] = prediction [:,3,:] + prediction [:,2,:]
98 |
99 | # prediction [:,4,:] = prediction [:,4,:] + prediction [:,3,:]
100 | # prediction [:,5,:] = prediction [:,5,:] + prediction [:,4,:]
101 | # prediction [:,6,:] = prediction [:,6,:] + prediction [:,5,:]
102 | # prediction [:,7,:] = prediction [:,7,:] + prediction [:,6,:]
103 |
104 | # prediction [:,8,:] = prediction [:,8,:] + prediction [:,7,:]
105 | # prediction [:,9,:] = prediction [:,9,:] + prediction [:,8,:]
106 |
107 | # prediction [:,10,:] = prediction [:,10,:] + prediction [:,9,:]
108 | # prediction [:,11,:] = prediction [:,11,:] + prediction [:,10,:]
109 | # prediction [:,12,:] = prediction [:,12,:] + prediction [:,11,:]
110 | # prediction [:,13,:] = prediction [:,13,:] + prediction [:,12,:]
111 |
112 | # prediction [:,14,:] = prediction [:,14,:] + prediction [:,13,:]
113 | # prediction [:,15,:] = prediction [:,15,:] + prediction [:,14,:]
114 | # prediction [:,16,:] = prediction [:,16,:] + prediction [:,15,:]
115 | # prediction [:,17,:] = prediction [:,17,:] + prediction [:,16,:]
116 |
117 | # prediction [:,18,:] = prediction [:,18,:] + prediction [:,17,:]
118 | # prediction [:,19,:] = prediction [:,19,:] + prediction [:,18,:]
119 | # prediction [:,20,:] = prediction [:,20,:] + prediction [:,19,:]
120 | # prediction [:,21,:] = prediction [:,21,:] + prediction [:,20,:]
121 |
122 | # prediction [:,22,:] = prediction [:,22,:] + prediction [:,21,:]
123 | # prediction [:,23,:] = prediction [:,23,:] + prediction [:,22,:]
124 | # prediction [:,24,:] = prediction [:,24,:] + prediction [:,23,:]
125 |
126 |
127 |
128 | # # for i in range (args.output_n -1):
129 | # # prediction [:,i+1,:] = prediction [:,i+1,:] + prediction [:,0,:]
130 |
131 | # return prediction
132 |
133 |
134 |
135 | def delta_2_gt (prediction, last_timestep):
136 | prediction = prediction.clone()
137 |
138 | #print (prediction [:,0,:].shape,last_timestep.shape)
139 | prediction [:,0,:] = prediction [:,0,:] + last_timestep
140 | for i in range (prediction.shape[1]-1):
141 | prediction [:,i+1,:] = prediction [:,i+1,:] + prediction [:,i,:]
142 |
143 |
144 |
145 | return prediction
146 |
147 |
148 |
149 |
150 | def mask_sequence (seq,mframes):
151 |
152 | x = [randint(0, seq.shape[1]-1) for p in range(0, mframes)]
153 |
154 | for i in x:
155 | seq[:,i,:] = 0
156 |
157 | return seq
158 |
159 |
160 |
161 | def mask_joints (seq,mjoints):
162 |
163 | seq_masked = seq.clone()
164 | #x = [randint(0, seq.shape[1]-1) for p in range(0, 22) if p % 3 == 0 ]
165 | x = [random.randrange(0, 66, 3) for p in range(0, mjoints)]
166 |
167 | for i in x:
168 | seq_masked[:,:,i] = 0
169 | seq_masked[:,:,i+1] = 0
170 | seq_masked[:,:,i+2] = 0
171 |
172 | return seq_masked
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cycler==0.10.0
2 | kiwisolver==1.3.2
3 | matplotlib==3.4.3
4 | numpy==1.21.2
5 | Pillow==8.3.2
6 | pip==21.2.4
7 | pyparsing==2.4.7
8 | python-dateutil==2.8.2
9 | setuptools==58.1.0
10 | six==1.16.0
11 | torch==1.9.1
12 | typing-extensions==3.10.0.2
13 | wheel==0.37.0
14 |
15 |
--------------------------------------------------------------------------------
/utils/ang2joint.py:
--------------------------------------------------------------------------------
1 |
2 |
3 | import torch
4 |
5 | '''
6 | https://github.com/wei-mao-2019/HisRepItself/blob/master/utils/ang2joint.py
7 | '''
8 |
9 | def ang2joint(p3d0, pose,
10 | parent={0: -1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9,
11 | 15: 12, 16: 13, 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21}):
12 | """
13 | :param p3d0:[batch_size, joint_num, 3]
14 | :param pose:[batch_size, joint_num, 3]
15 | :param parent:
16 | :return:
17 | """
18 | # model_path = './model.npz'
19 | # params = np.load(model_path, allow_pickle=True)
20 | # kintree_table = params['kintree_table']
21 | batch_num = p3d0.shape[0]
22 | # id_to_col = {kintree_table[1, i]: i
23 | # for i in range(kintree_table.shape[1])}
24 | # parent = {
25 | # i: id_to_col[kintree_table[0, i]]
26 | # for i in range(1, kintree_table.shape[1])
27 | # }
28 | # parent = {1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7, 11: 8, 12: 9, 13: 9, 14: 9, 15: 12, 16: 13,
29 | # 17: 14, 18: 16, 19: 17, 20: 18, 21: 19, 22: 20, 23: 21}
30 | jnum = len(parent.keys())
31 | # v_shaped = torch.tensordot(betas, self.shapedirs, dims=([1], [2])) + self.v_template
32 | # J = torch.matmul(self.J_regressor, v_shaped)
33 | # face_J = v_shaped[:, [333, 2801, 6261], :]
34 | J = p3d0
35 | R_cube_big = rodrigues(pose.contiguous().view(-1, 1, 3)).reshape(batch_num, -1, 3, 3)
36 | results = []
37 | results.append(
38 | with_zeros(torch.cat((R_cube_big[:, 0], torch.reshape(J[:, 0, :], (-1, 3, 1))), dim=2))
39 | )
40 | # for i in range(1, kintree_table.shape[1]):
41 | for i in range(1, jnum):
42 | results.append(
43 | torch.matmul(
44 | results[parent[i]],
45 | with_zeros(
46 | torch.cat(
47 | (R_cube_big[:, i], torch.reshape(J[:, i, :] - J[:, parent[i], :], (-1, 3, 1))),
48 | dim=2
49 | )
50 | )
51 | )
52 | )
53 |
54 | stacked = torch.stack(results, dim=1)
55 | J_transformed = stacked[:, :, :3, 3]
56 | return J_transformed
57 |
58 |
59 | # In[ ]:
60 |
61 |
62 | def rodrigues(r):
63 | """
64 | Rodrigues' rotation formula that turns axis-angle tensor into rotation
65 | matrix in a batch-ed manner.
66 | Parameter:
67 | ----------
68 | r: Axis-angle rotation tensor of shape [batch_size * angle_num, 1, 3].
69 | Return:
70 | -------
71 | Rotation matrix of shape [batch_size * angle_num, 3, 3].
72 | """
73 | eps = r.clone().normal_(std=1e-8)
74 | theta = torch.norm(r + eps, dim=(1, 2), keepdim=True)
75 | # theta = torch.norm(r, dim=(1, 2), keepdim=True) # dim cannot be tuple
76 | theta_dim = theta.shape[0]
77 | r_hat = r / theta
78 | cos = torch.cos(theta)
79 | z_stick = torch.zeros(theta_dim, dtype=torch.float).to(r.device)
80 | m = torch.stack(
81 | (z_stick, -r_hat[:, 0, 2], r_hat[:, 0, 1], r_hat[:, 0, 2], z_stick,
82 | -r_hat[:, 0, 0], -r_hat[:, 0, 1], r_hat[:, 0, 0], z_stick), dim=1)
83 | m = torch.reshape(m, (-1, 3, 3))
84 | i_cube = (torch.eye(3, dtype=torch.float).unsqueeze(dim=0) + torch.zeros((theta_dim, 3, 3), dtype=torch.float)).to(r.device)
85 | A = r_hat.permute(0, 2, 1)
86 | dot = torch.matmul(A, r_hat)
87 | R = cos * i_cube + (1 - cos) * dot + torch.sin(theta) * m
88 | return R
89 |
90 |
91 | # In[ ]:
92 |
93 |
94 | def with_zeros(x):
95 | """
96 | Append a [0, 0, 0, 1] tensor to a [3, 4] tensor.
97 | Parameter:
98 | ---------
99 | x: Tensor to be appended.
100 | Return:
101 | ------
102 | Tensor after appending of shape [4,4]
103 | """
104 | ones = torch.tensor(
105 | [[[0.0, 0.0, 0.0, 1.0]]], dtype=torch.float
106 | ).expand(x.shape[0], -1, -1).to(x.device)
107 | ret = torch.cat((x, ones), dim=1)
108 | return ret
109 |
110 |
111 | def pack(x):
112 | """
113 | Append zero tensors of shape [4, 3] to a batch of [4, 1] shape tensor.
114 | Parameter:
115 | ----------
116 | x: A tensor of shape [batch_size, 4, 1]
117 | Return:
118 | ------
119 | A tensor of shape [batch_size, 4, 4] after appending.
120 | """
121 | zeros43 = torch.zeros(
122 | (x.shape[0], x.shape[1], 4, 3), dtype=torch.float).to(x.device)
123 | ret = torch.cat((zeros43, x), dim=3)
124 | return ret
125 |
126 |
127 |
--------------------------------------------------------------------------------
/utils/body_models/smpl_skeleton.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MotionMLP/MotionMixer/91327c3c3a455d398bd097fa300385bafa80a835/utils/body_models/smpl_skeleton.npz
--------------------------------------------------------------------------------
/utils/data_utils.py:
--------------------------------------------------------------------------------
1 |
2 | #!/usr/bin/env python
3 | # -*- coding: utf-8 -*-
4 | import numpy as np
5 | from six.moves import xrange # pylint: disable=redefined-builtin
6 | import torch
7 | # from torch.autograd.variable import Variable
8 | import os
9 | from utils import forward_kinematics
10 |
11 |
12 | def rotmat2euler(R):
13 | """
14 | Converts a rotation matrix to Euler angles
15 | Matlab port to python for evaluation purposes
16 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/RotMat2Euler.m#L1
17 | Args
18 | R: a 3x3 rotation matrix
19 | Returns
20 | eul: a 3x1 Euler angle representation of R
21 | """
22 | if R[0, 2] == 1 or R[0, 2] == -1:
23 | # special case
24 | E3 = 0 # set arbitrarily
25 | dlta = np.arctan2(R[0, 1], R[0, 2]);
26 |
27 | if R[0, 2] == -1:
28 | E2 = np.pi / 2;
29 | E1 = E3 + dlta;
30 | else:
31 | E2 = -np.pi / 2;
32 | E1 = -E3 + dlta;
33 |
34 | else:
35 | E2 = -np.arcsin(R[0, 2])
36 | E1 = np.arctan2(R[1, 2] / np.cos(E2), R[2, 2] / np.cos(E2))
37 | E3 = np.arctan2(R[0, 1] / np.cos(E2), R[0, 0] / np.cos(E2))
38 |
39 | eul = np.array([E1, E2, E3]);
40 | return eul
41 |
42 |
43 | def rotmat2quat(R):
44 | """
45 | Converts a rotation matrix to a quaternion
46 | Matlab port to python for evaluation purposes
47 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/rotmat2quat.m#L4
48 | Args
49 | R: 3x3 rotation matrix
50 | Returns
51 | q: 1x4 quaternion
52 | """
53 | rotdiff = R - R.T;
54 |
55 | r = np.zeros(3)
56 | r[0] = -rotdiff[1, 2]
57 | r[1] = rotdiff[0, 2]
58 | r[2] = -rotdiff[0, 1]
59 | sintheta = np.linalg.norm(r) / 2;
60 | r0 = np.divide(r, np.linalg.norm(r) + np.finfo(np.float32).eps);
61 |
62 | costheta = (np.trace(R) - 1) / 2;
63 |
64 | theta = np.arctan2(sintheta, costheta);
65 |
66 | q = np.zeros(4)
67 | q[0] = np.cos(theta / 2)
68 | q[1:] = r0 * np.sin(theta / 2)
69 | return q
70 |
71 |
72 | def rotmat2expmap(R):
73 | return quat2expmap(rotmat2quat(R));
74 |
75 |
76 | def expmap2rotmat(r):
77 | """
78 | Converts an exponential map angle to a rotation matrix
79 | Matlab port to python for evaluation purposes
80 | I believe this is also called Rodrigues' formula
81 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/expmap2rotmat.m
82 | Args
83 | r: 1x3 exponential map
84 | Returns
85 | R: 3x3 rotation matrix
86 | """
87 | theta = np.linalg.norm(r)
88 | r0 = np.divide(r, theta + np.finfo(np.float32).eps)
89 | r0x = np.array([0, -r0[2], r0[1], 0, 0, -r0[0], 0, 0, 0]).reshape(3, 3)
90 | r0x = r0x - r0x.T
91 | R = np.eye(3, 3) + np.sin(theta) * r0x + (1 - np.cos(theta)) * (r0x).dot(r0x);
92 | return R
93 |
94 |
95 | def quat2expmap(q):
96 | """
97 | Converts a quaternion to an exponential map
98 | Matlab port to python for evaluation purposes
99 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/quat2expmap.m#L1
100 | Args
101 | q: 1x4 quaternion
102 | Returns
103 | r: 1x3 exponential map
104 | Raises
105 | ValueError if the l2 norm of the quaternion is not close to 1
106 | """
107 | if (np.abs(np.linalg.norm(q) - 1) > 1e-3):
108 | raise (ValueError, "quat2expmap: input quaternion is not norm 1")
109 |
110 | sinhalftheta = np.linalg.norm(q[1:])
111 | coshalftheta = q[0]
112 |
113 | r0 = np.divide(q[1:], (np.linalg.norm(q[1:]) + np.finfo(np.float32).eps));
114 | theta = 2 * np.arctan2(sinhalftheta, coshalftheta)
115 | theta = np.mod(theta + 2 * np.pi, 2 * np.pi)
116 |
117 | if theta > np.pi:
118 | theta = 2 * np.pi - theta
119 | r0 = -r0
120 |
121 | r = r0 * theta
122 | return r
123 |
124 |
125 | def unNormalizeData(normalizedData, data_mean, data_std, dimensions_to_ignore, actions, one_hot):
126 | """Borrowed from SRNN code. Reads a csv file and returns a float32 matrix.
127 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/generateMotionData.py#L12
128 | Args
129 | normalizedData: nxd matrix with normalized data
130 | data_mean: vector of mean used to normalize the data
131 | data_std: vector of standard deviation used to normalize the data
132 | dimensions_to_ignore: vector with dimensions not used by the model
133 | actions: list of strings with the encoded actions
134 | one_hot: whether the data comes with one-hot encoding
135 | Returns
136 | origData: data originally used to
137 | """
138 | T = normalizedData.shape[0]
139 | D = data_mean.shape[0]
140 |
141 | origData = np.zeros((T, D), dtype=np.float32)
142 | dimensions_to_use = []
143 | for i in range(D):
144 | if i in dimensions_to_ignore:
145 | continue
146 | dimensions_to_use.append(i)
147 | dimensions_to_use = np.array(dimensions_to_use)
148 |
149 | if one_hot:
150 | origData[:, dimensions_to_use] = normalizedData[:, :-len(actions)]
151 | else:
152 | origData[:, dimensions_to_use] = normalizedData
153 |
154 | # potentially ineficient, but only done once per experiment
155 | stdMat = data_std.reshape((1, D))
156 | stdMat = np.repeat(stdMat, T, axis=0)
157 | meanMat = data_mean.reshape((1, D))
158 | meanMat = np.repeat(meanMat, T, axis=0)
159 | origData = np.multiply(origData, stdMat) + meanMat
160 | return origData
161 |
162 |
163 | def revert_output_format(poses, data_mean, data_std, dim_to_ignore, actions, one_hot):
164 | """
165 | Converts the output of the neural network to a format that is more easy to
166 | manipulate for, e.g. conversion to other format or visualization
167 | Args
168 | poses: The output from the TF model. A list with (seq_length) entries,
169 | each with a (batch_size, dim) output
170 | Returns
171 | poses_out: A tensor of size (batch_size, seq_length, dim) output. Each
172 | batch is an n-by-d sequence of poses.
173 | """
174 | seq_len = len(poses)
175 | if seq_len == 0:
176 | return []
177 |
178 | batch_size, dim = poses[0].shape
179 |
180 | poses_out = np.concatenate(poses)
181 | poses_out = np.reshape(poses_out, (seq_len, batch_size, dim))
182 | poses_out = np.transpose(poses_out, [1, 0, 2])
183 |
184 | poses_out_list = []
185 | for i in xrange(poses_out.shape[0]):
186 | poses_out_list.append(
187 | unNormalizeData(poses_out[i, :, :], data_mean, data_std, dim_to_ignore, actions, one_hot))
188 |
189 | return poses_out_list
190 |
191 |
192 | def readCSVasFloat(filename):
193 | """
194 | Borrowed from SRNN code. Reads a csv and returns a float matrix.
195 | https://github.com/asheshjain399/NeuralModels/blob/master/neuralmodels/utils.py#L34
196 | Args
197 | filename: string. Path to the csv file
198 | Returns
199 | returnArray: the read data in a float32 matrix
200 | """
201 | returnArray = []
202 | lines = open(filename).readlines()
203 | for line in lines:
204 | line = line.strip().split(',')
205 | if len(line) > 0:
206 | returnArray.append(np.array([np.float32(x) for x in line]))
207 |
208 | returnArray = np.array(returnArray)
209 | return returnArray
210 |
211 |
212 | def normalize_data(data, data_mean, data_std, dim_to_use, actions, one_hot):
213 | """
214 | Normalize input data by removing unused dimensions, subtracting the mean and
215 | dividing by the standard deviation
216 | Args
217 | data: nx99 matrix with data to normalize
218 | data_mean: vector of mean used to normalize the data
219 | data_std: vector of standard deviation used to normalize the data
220 | dim_to_use: vector with dimensions used by the model
221 | actions: list of strings with the encoded actions
222 | one_hot: whether the data comes with one-hot encoding
223 | Returns
224 | data_out: the passed data matrix, but normalized
225 | """
226 | data_out = {}
227 | nactions = len(actions)
228 |
229 | if not one_hot:
230 | # No one-hot encoding... no need to do anything special
231 | for key in data.keys():
232 | data_out[key] = np.divide((data[key] - data_mean), data_std)
233 | data_out[key] = data_out[key][:, dim_to_use]
234 |
235 | else:
236 | # TODO hard-coding 99 dimensions for un-normalized human poses
237 | for key in data.keys():
238 | data_out[key] = np.divide((data[key][:, 0:99] - data_mean), data_std)
239 | data_out[key] = data_out[key][:, dim_to_use]
240 | data_out[key] = np.hstack((data_out[key], data[key][:, -nactions:]))
241 |
242 | return data_out
243 |
244 |
245 | def normalization_stats(completeData):
246 | """"
247 | Also borrowed for SRNN code. Computes mean, stdev and dimensions to ignore.
248 | https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/CRFProblems/H3.6m/processdata.py#L33
249 | Args
250 | completeData: nx99 matrix with data to normalize
251 | Returns
252 | data_mean: vector of mean used to normalize the data
253 | data_std: vector of standard deviation used to normalize the data
254 | dimensions_to_ignore: vector with dimensions not used by the model
255 | dimensions_to_use: vector with dimensions used by the model
256 | """
257 | data_mean = np.mean(completeData, axis=0)
258 | data_std = np.std(completeData, axis=0)
259 |
260 | dimensions_to_ignore = []
261 | dimensions_to_use = []
262 |
263 | dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0]))
264 | dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0]))
265 |
266 | data_std[dimensions_to_ignore] = 1.0
267 |
268 | return data_mean, data_std, dimensions_to_ignore, dimensions_to_use
269 |
270 |
271 | def define_actions(action):
272 | """
273 | Define the list of actions we are using.
274 | Args
275 | action: String with the passed action. Could be "all"
276 | Returns
277 | actions: List of strings of actions
278 | Raises
279 | ValueError if the action is not included in H3.6M
280 | """
281 |
282 | actions = ["walking", "eating", "smoking", "discussion", "directions",
283 | "greeting", "phoning", "posing", "purchases", "sitting",
284 | "sittingdown", "takingphoto", "waiting", "walkingdog",
285 | "walkingtogether"]
286 | if action in actions:
287 | return [action]
288 |
289 | if action == "all":
290 | return actions
291 |
292 | if action == "all_srnn":
293 | return ["walking", "eating", "smoking", "discussion"]
294 |
295 | raise (ValueError, "Unrecognized action: %d" % action)
296 |
297 |
298 | """all methods above are borrowed from https://github.com/una-dinosauria/human-motion-prediction"""
299 |
300 |
301 | def define_actions_cmu(action):
302 | """
303 | Define the list of actions we are using.
304 | Args
305 | action: String with the passed action. Could be "all"
306 | Returns
307 | actions: List of strings of actions
308 | Raises
309 | ValueError if the action is not included in H3.6M
310 | """
311 |
312 | actions = ["basketball", "basketball_signal", "directing_traffic", "jumping", "running", "soccer", "walking",
313 | "washwindow"]
314 | if action in actions:
315 | return [action]
316 |
317 | if action == "all":
318 | return actions
319 |
320 | raise (ValueError, "Unrecognized action: %d" % action)
321 |
322 |
323 | def load_data_cmu(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False):
324 | seq_len = input_n + output_n
325 | nactions = len(actions)
326 | sampled_seq = []
327 | complete_seq = []
328 | for action_idx in np.arange(nactions):
329 | action = actions[action_idx]
330 | path = '{}/{}'.format(path_to_dataset, action)
331 | count = 0
332 | for _ in os.listdir(path):
333 | count = count + 1
334 | for examp_index in np.arange(count):
335 | filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1)
336 | action_sequence = readCSVasFloat(filename)
337 | n, d = action_sequence.shape
338 | even_list = range(0, n, 2)
339 | the_sequence = np.array(action_sequence[even_list, :])
340 | num_frames = len(the_sequence)
341 | if not is_test:
342 | fs = np.arange(0, num_frames - seq_len + 1)
343 | fs_sel = fs
344 | for i in np.arange(seq_len - 1):
345 | fs_sel = np.vstack((fs_sel, fs + i + 1))
346 | fs_sel = fs_sel.transpose()
347 | seq_sel = the_sequence[fs_sel, :]
348 | if len(sampled_seq) == 0:
349 | sampled_seq = seq_sel
350 | complete_seq = the_sequence
351 | else:
352 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0)
353 | complete_seq = np.append(complete_seq, the_sequence, axis=0)
354 | else:
355 | source_seq_len = 50
356 | target_seq_len = 25
357 | total_frames = source_seq_len + target_seq_len
358 | batch_size = 8
359 | SEED = 1234567890
360 | rng = np.random.RandomState(SEED)
361 | for _ in range(batch_size):
362 | idx = rng.randint(0, num_frames - total_frames)
363 | seq_sel = the_sequence[
364 | idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :]
365 | seq_sel = np.expand_dims(seq_sel, axis=0)
366 | if len(sampled_seq) == 0:
367 | sampled_seq = seq_sel
368 | complete_seq = the_sequence
369 | else:
370 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0)
371 | complete_seq = np.append(complete_seq, the_sequence, axis=0)
372 |
373 | if not is_test:
374 | data_std = np.std(complete_seq, axis=0)
375 | data_mean = np.mean(complete_seq, axis=0)
376 |
377 | dimensions_to_ignore = []
378 | dimensions_to_use = []
379 | dimensions_to_ignore.extend(list(np.where(data_std < 1e-4)[0]))
380 | dimensions_to_use.extend(list(np.where(data_std >= 1e-4)[0]))
381 | data_std[dimensions_to_ignore] = 1.0
382 | data_mean[dimensions_to_ignore] = 0.0
383 |
384 | return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std
385 |
386 |
387 | def load_data_cmu_3d(path_to_dataset, actions, input_n, output_n, data_std=0, data_mean=0, is_test=False):
388 | seq_len = input_n + output_n
389 | nactions = len(actions)
390 | sampled_seq = []
391 | complete_seq = []
392 | for action_idx in np.arange(nactions):
393 | action = actions[action_idx]
394 | path = '{}/{}'.format(path_to_dataset, action)
395 | count = 0
396 | for _ in os.listdir(path):
397 | count = count + 1
398 | for examp_index in np.arange(count):
399 | filename = '{}/{}/{}_{}.txt'.format(path_to_dataset, action, action, examp_index + 1)
400 | action_sequence = readCSVasFloat(filename)
401 | n, d = action_sequence.shape
402 | exptmps = torch.from_numpy(action_sequence).float().cuda()
403 | xyz = expmap2xyz_torch_cmu(exptmps)
404 | xyz = xyz.view(-1, 38 * 3)
405 | xyz = xyz.cpu().data.numpy()
406 | action_sequence = xyz
407 |
408 | even_list = range(0, n, 2)
409 | the_sequence = np.array(action_sequence[even_list, :])
410 | num_frames = len(the_sequence)
411 | if not is_test:
412 | fs = np.arange(0, num_frames - seq_len + 1)
413 | fs_sel = fs
414 | for i in np.arange(seq_len - 1):
415 | fs_sel = np.vstack((fs_sel, fs + i + 1))
416 | fs_sel = fs_sel.transpose()
417 | seq_sel = the_sequence[fs_sel, :]
418 | if len(sampled_seq) == 0:
419 | sampled_seq = seq_sel
420 | complete_seq = the_sequence
421 | else:
422 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0)
423 | complete_seq = np.append(complete_seq, the_sequence, axis=0)
424 | else:
425 | source_seq_len = 50
426 | target_seq_len = 25
427 | total_frames = source_seq_len + target_seq_len
428 | batch_size = 8
429 | SEED = 1234567890
430 | rng = np.random.RandomState(SEED)
431 | for _ in range(batch_size):
432 | idx = rng.randint(0, num_frames - total_frames)
433 | seq_sel = the_sequence[
434 | idx + (source_seq_len - input_n):(idx + source_seq_len + output_n), :]
435 | seq_sel = np.expand_dims(seq_sel, axis=0)
436 | if len(sampled_seq) == 0:
437 | sampled_seq = seq_sel
438 | complete_seq = the_sequence
439 | else:
440 | sampled_seq = np.concatenate((sampled_seq, seq_sel), axis=0)
441 | complete_seq = np.append(complete_seq, the_sequence, axis=0)
442 |
443 | if not is_test:
444 | data_std = np.std(complete_seq, axis=0)
445 | data_mean = np.mean(complete_seq, axis=0)
446 |
447 | joint_to_ignore = np.array([0, 1, 2, 7, 8, 13, 16, 20, 29, 24, 27, 33, 36])
448 | dimensions_to_ignore = np.concatenate((joint_to_ignore * 3, joint_to_ignore * 3 + 1, joint_to_ignore * 3 + 2))
449 | dimensions_to_use = np.setdiff1d(np.arange(complete_seq.shape[1]), dimensions_to_ignore)
450 |
451 | data_std[dimensions_to_ignore] = 1.0
452 | data_mean[dimensions_to_ignore] = 0.0
453 |
454 | return sampled_seq, dimensions_to_ignore, dimensions_to_use, data_mean, data_std
455 |
456 |
457 | def rotmat2euler_torch(R):
458 | """
459 | Converts a rotation matrix to euler angles
460 | batch pytorch version ported from the corresponding numpy method above
461 | :param R:N*3*3
462 | :return: N*3
463 | """
464 | n = R.data.shape[0]
465 | eul = torch.zeros(n, 3).float().cuda()
466 | idx_spec1 = (R[:, 0, 2] == 1).nonzero().cpu().data.numpy().reshape(-1).tolist()
467 | idx_spec2 = (R[:, 0, 2] == -1).nonzero().cpu().data.numpy().reshape(-1).tolist()
468 | if len(idx_spec1) > 0:
469 | R_spec1 = R[idx_spec1, :, :]
470 | eul_spec1 = torch.zeros(len(idx_spec1), 3).float().cuda()
471 | eul_spec1[:, 2] = 0
472 | eul_spec1[:, 1] = -np.pi / 2
473 | delta = torch.atan2(R_spec1[:, 0, 1], R_spec1[:, 0, 2])
474 | eul_spec1[:, 0] = delta
475 | eul[idx_spec1, :] = eul_spec1
476 |
477 | if len(idx_spec2) > 0:
478 | R_spec2 = R[idx_spec2, :, :]
479 | eul_spec2 = torch.zeros(len(idx_spec2), 3).float().cuda()
480 | eul_spec2[:, 2] = 0
481 | eul_spec2[:, 1] = np.pi / 2
482 | delta = torch.atan2(R_spec2[:, 0, 1], R_spec2[:, 0, 2])
483 | eul_spec2[:, 0] = delta
484 | eul[idx_spec2] = eul_spec2
485 |
486 | idx_remain = np.arange(0, n)
487 | idx_remain = np.setdiff1d(np.setdiff1d(idx_remain, idx_spec1), idx_spec2).tolist()
488 | if len(idx_remain) > 0:
489 | R_remain = R[idx_remain, :, :]
490 | eul_remain = torch.zeros(len(idx_remain), 3).float().cuda()
491 | eul_remain[:, 1] = -torch.asin(R_remain[:, 0, 2])
492 | eul_remain[:, 0] = torch.atan2(R_remain[:, 1, 2] / torch.cos(eul_remain[:, 1]),
493 | R_remain[:, 2, 2] / torch.cos(eul_remain[:, 1]))
494 | eul_remain[:, 2] = torch.atan2(R_remain[:, 0, 1] / torch.cos(eul_remain[:, 1]),
495 | R_remain[:, 0, 0] / torch.cos(eul_remain[:, 1]))
496 | eul[idx_remain, :] = eul_remain
497 |
498 | return eul
499 |
500 |
501 | def rotmat2quat_torch(R):
502 | """
503 | Converts a rotation matrix to quaternion
504 | batch pytorch version ported from the corresponding numpy method above
505 | :param R: N * 3 * 3
506 | :return: N * 4
507 | """
508 | rotdiff = R - R.transpose(1, 2)
509 | r = torch.zeros_like(rotdiff[:, 0])
510 | r[:, 0] = -rotdiff[:, 1, 2]
511 | r[:, 1] = rotdiff[:, 0, 2]
512 | r[:, 2] = -rotdiff[:, 0, 1]
513 | r_norm = torch.norm(r, dim=1)
514 | sintheta = r_norm / 2
515 | r0 = torch.div(r, r_norm.unsqueeze(1).repeat(1, 3) + 0.00000001)
516 | t1 = R[:, 0, 0]
517 | t2 = R[:, 1, 1]
518 | t3 = R[:, 2, 2]
519 | costheta = (t1 + t2 + t3 - 1) / 2
520 | theta = torch.atan2(sintheta, costheta)
521 | q = torch.zeros(R.shape[0], 4).float().cuda()
522 | q[:, 0] = torch.cos(theta / 2)
523 | q[:, 1:] = torch.mul(r0, torch.sin(theta / 2).unsqueeze(1).repeat(1, 3))
524 |
525 | return q
526 |
527 |
528 | def expmap2quat_torch(exp):
529 | """
530 | Converts expmap to quaternion
531 | batch pytorch version ported from the corresponding numpy method above
532 | :param R: N*3
533 | :return: N*4
534 | """
535 | theta = torch.norm(exp, p=2, dim=1).unsqueeze(1)
536 | v = torch.div(exp, theta.repeat(1, 3) + 0.0000001)
537 | sinhalf = torch.sin(theta / 2)
538 | coshalf = torch.cos(theta / 2)
539 | q1 = torch.mul(v, sinhalf.repeat(1, 3))
540 | q = torch.cat((coshalf, q1), dim=1)
541 | return q
542 |
543 |
544 | def expmap2rotmat_torch(r):
545 | """
546 | Converts expmap matrix to rotation
547 | batch pytorch version ported from the corresponding method above
548 | :param r: N*3
549 | :return: N*3*3
550 | """
551 | theta = torch.norm(r, 2, 1)
552 | r0 = torch.div(r, theta.unsqueeze(1).repeat(1, 3) + 0.0000001)
553 | r1 = torch.zeros_like(r0).repeat(1, 3)
554 | r1[:, 1] = -r0[:, 2]
555 | r1[:, 2] = r0[:, 1]
556 | r1[:, 5] = -r0[:, 0]
557 | r1 = r1.view(-1, 3, 3)
558 | r1 = r1 - r1.transpose(1, 2)
559 | n = r1.data.shape[0]
560 | R = torch.eye(3, 3).repeat(n, 1, 1).float().cuda() + torch.mul(
561 | torch.sin(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3), r1) + torch.mul(
562 | (1 - torch.cos(theta).unsqueeze(1).repeat(1, 9).view(-1, 3, 3)), torch.matmul(r1, r1))
563 | return R
564 |
565 |
566 | def expmap2xyz_torch(expmap):
567 | """
568 | convert expmaps to joint locations
569 | :param expmap: N*99
570 | :return: N*32*3
571 | """
572 | parent, offset, rotInd, expmapInd = forward_kinematics._some_variables()
573 | xyz = forward_kinematics.fkl_torch(expmap, parent, offset, rotInd, expmapInd)
574 | return xyz
575 |
576 |
577 | def get_dct_matrix(N):
578 | dct_m = np.eye(N)
579 | for k in np.arange(N):
580 | for i in np.arange(N):
581 | w = np.sqrt(2 / N)
582 | if k == 0:
583 | w = np.sqrt(1 / N)
584 | dct_m[k, i] = w * np.cos(np.pi * (i + 1 / 2) * k / N)
585 | idct_m = np.linalg.inv(dct_m)
586 | return dct_m, idct_m
587 |
588 |
589 | def find_indices_256(frame_num1, frame_num2, seq_len, input_n=10):
590 | """
591 | Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478
592 | which originaly from
593 | In order to find the same action indices as in SRNN.
594 | https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325
595 | """
596 |
597 | # Used a fixed dummy seed, following
598 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29
599 | SEED = 1234567890
600 | rng = np.random.RandomState(SEED)
601 |
602 | T1 = frame_num1 - 150
603 | T2 = frame_num2 - 150 # seq_len
604 | idxo1 = None
605 | idxo2 = None
606 | for _ in np.arange(0, 128):
607 | idx_ran1 = rng.randint(16, T1)
608 | idx_ran2 = rng.randint(16, T2)
609 | idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len)
610 | idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len)
611 | if idxo1 is None:
612 | idxo1 = idxs1
613 | idxo2 = idxs2
614 | else:
615 | idxo1 = np.vstack((idxo1, idxs1))
616 | idxo2 = np.vstack((idxo2, idxs2))
617 | return idxo1, idxo2
618 |
619 |
620 | def find_indices_srnn(frame_num1, frame_num2, seq_len, input_n=10):
621 | """
622 | Adapted from https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/seq2seq_model.py#L478
623 | which originaly from
624 | In order to find the same action indices as in SRNN.
625 | https://github.com/asheshjain399/RNNexp/blob/master/structural_rnn/CRFProblems/H3.6m/processdata.py#L325
626 | """
627 |
628 | # Used a fixed dummy seed, following
629 | # https://github.com/asheshjain399/RNNexp/blob/srnn/structural_rnn/forecastTrajectories.py#L29
630 | SEED = 1234567890
631 | rng = np.random.RandomState(SEED)
632 |
633 | T1 = frame_num1 - 150
634 | T2 = frame_num2 - 150 # seq_len
635 | idxo1 = None
636 | idxo2 = None
637 | for _ in np.arange(0, 4):
638 | idx_ran1 = rng.randint(16, T1)
639 | idx_ran2 = rng.randint(16, T2)
640 | # print("subact1 {}".format(idx_ran1))
641 | # print("subact2 {}".format(idx_ran2))
642 | idxs1 = np.arange(idx_ran1 + 50 - input_n, idx_ran1 + 50 - input_n + seq_len)
643 | idxs2 = np.arange(idx_ran2 + 50 - input_n, idx_ran2 + 50 - input_n + seq_len)
644 | if idxo1 is None:
645 | idxo1 = idxs1
646 | idxo2 = idxs2
647 | else:
648 | idxo1 = np.vstack((idxo1, idxs1))
649 | idxo2 = np.vstack((idxo2, idxs2))
650 | return idxo1, idxo2
651 |
652 |
--------------------------------------------------------------------------------
/utils/forward_kinematics.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import torch
4 | from torch.autograd.variable import Variable
5 | from utils import data_utils
6 |
7 |
8 | def fkl(angles, parent, offset, rotInd, expmapInd):
9 | """
10 | Convert joint angles and bone lenghts into the 3d points of a person.
11 | adapted from
12 | https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L14
13 | which originaly based on expmap2xyz.m, available at
14 | https://github.com/asheshjain399/RNNexp/blob/7fc5a53292dc0f232867beb66c3a9ef845d705cb/structural_rnn/CRFProblems/H3.6m/mhmublv/Motion/exp2xyz.m
15 | Args
16 | angles: 99-long vector with 3d position and 3d joint angles in expmap format
17 | parent: 32-long vector with parent-child relationships in the kinematic tree
18 | offset: 96-long vector with bone lenghts
19 | rotInd: 32-long list with indices into angles
20 | expmapInd: 32-long list with indices into expmap angles
21 | Returns
22 | xyz: 32x3 3d points that represent a person in 3d space
23 | """
24 |
25 | assert len(angles) == 99
26 |
27 | # Structure that indicates parents for each joint
28 | njoints = 32
29 | xyzStruct = [dict() for x in range(njoints)]
30 |
31 | for i in np.arange(njoints):
32 |
33 | # if not rotInd[i]: # If the list is empty
34 | # xangle, yangle, zangle = 0, 0, 0
35 | # else:
36 | # xangle = angles[rotInd[i][0] - 1]
37 | # yangle = angles[rotInd[i][1] - 1]
38 | # zangle = angles[rotInd[i][2] - 1]
39 | if i == 0:
40 | xangle = angles[0]
41 | yangle = angles[1]
42 | zangle = angles[2]
43 | thisPosition = np.array([xangle, yangle, zangle])
44 | else:
45 | thisPosition = np.array([0, 0, 0])
46 |
47 | r = angles[expmapInd[i]]
48 |
49 | thisRotation = data_utils.expmap2rotmat(r)
50 |
51 | if parent[i] == -1: # Root node
52 | xyzStruct[i]['rotation'] = thisRotation
53 | xyzStruct[i]['xyz'] = np.reshape(offset[i, :], (1, 3)) + thisPosition
54 | else:
55 | xyzStruct[i]['xyz'] = (offset[i, :] + thisPosition).dot(xyzStruct[parent[i]]['rotation']) + \
56 | xyzStruct[parent[i]]['xyz']
57 | xyzStruct[i]['rotation'] = thisRotation.dot(xyzStruct[parent[i]]['rotation'])
58 |
59 | xyz = [xyzStruct[i]['xyz'] for i in range(njoints)]
60 | xyz = np.array(xyz).squeeze()
61 | # xyz = xyz[:, [0, 2, 1]]
62 | # xyz = xyz[:,[2,0,1]]
63 |
64 | return xyz
65 |
66 |
67 | def _some_variables():
68 | """
69 | borrowed from
70 | https://github.com/una-dinosauria/human-motion-prediction/blob/master/src/forward_kinematics.py#L100
71 | We define some variables that are useful to run the kinematic tree
72 | Args
73 | None
74 | Returns
75 | parent: 32-long vector with parent-child relationships in the kinematic tree
76 | offset: 96-long vector with bone lenghts
77 | rotInd: 32-long list with indices into angles
78 | expmapInd: 32-long list with indices into expmap angles
79 | """
80 |
81 | parent = np.array([0, 1, 2, 3, 4, 5, 1, 7, 8, 9, 10, 1, 12, 13, 14, 15, 13,
82 | 17, 18, 19, 20, 21, 20, 23, 13, 25, 26, 27, 28, 29, 28, 31]) - 1
83 |
84 | offset = np.array(
85 | [0.000000, 0.000000, 0.000000, -132.948591, 0.000000, 0.000000, 0.000000, -442.894612, 0.000000, 0.000000,
86 | -454.206447, 0.000000, 0.000000, 0.000000, 162.767078, 0.000000, 0.000000, 74.999437, 132.948826, 0.000000,
87 | 0.000000, 0.000000, -442.894413, 0.000000, 0.000000, -454.206590, 0.000000, 0.000000, 0.000000, 162.767426,
88 | 0.000000, 0.000000, 74.999948, 0.000000, 0.100000, 0.000000, 0.000000, 233.383263, 0.000000, 0.000000,
89 | 257.077681, 0.000000, 0.000000, 121.134938, 0.000000, 0.000000, 115.002227, 0.000000, 0.000000, 257.077681,
90 | 0.000000, 0.000000, 151.034226, 0.000000, 0.000000, 278.882773, 0.000000, 0.000000, 251.733451, 0.000000,
91 | 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999627, 0.000000, 100.000188, 0.000000, 0.000000,
92 | 0.000000, 0.000000, 0.000000, 257.077681, 0.000000, 0.000000, 151.031437, 0.000000, 0.000000, 278.892924,
93 | 0.000000, 0.000000, 251.728680, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 99.999888,
94 | 0.000000, 137.499922, 0.000000, 0.000000, 0.000000, 0.000000])
95 | offset = offset.reshape(-1, 3)
96 |
97 | rotInd = [[5, 6, 4],
98 | [8, 9, 7],
99 | [11, 12, 10],
100 | [14, 15, 13],
101 | [17, 18, 16],
102 | [],
103 | [20, 21, 19],
104 | [23, 24, 22],
105 | [26, 27, 25],
106 | [29, 30, 28],
107 | [],
108 | [32, 33, 31],
109 | [35, 36, 34],
110 | [38, 39, 37],
111 | [41, 42, 40],
112 | [],
113 | [44, 45, 43],
114 | [47, 48, 46],
115 | [50, 51, 49],
116 | [53, 54, 52],
117 | [56, 57, 55],
118 | [],
119 | [59, 60, 58],
120 | [],
121 | [62, 63, 61],
122 | [65, 66, 64],
123 | [68, 69, 67],
124 | [71, 72, 70],
125 | [74, 75, 73],
126 | [],
127 | [77, 78, 76],
128 | []]
129 |
130 | expmapInd = np.split(np.arange(4, 100) - 1, 32)
131 |
132 | return parent, offset, rotInd, expmapInd
133 |
134 |
135 | def _some_variables_cmu():
136 | """
137 | We define some variables that are useful to run the kinematic tree
138 | Args
139 | None
140 | Returns
141 | parent: 32-long vector with parent-child relationships in the kinematic tree
142 | offset: 96-long vector with bone lenghts
143 | rotInd: 32-long list with indices into angles
144 | expmapInd: 32-long list with indices into expmap angles
145 | """
146 |
147 | parent = np.array([0, 1, 2, 3, 4, 5, 6, 1, 8, 9, 10, 11, 12, 1, 14, 15, 16, 17, 18, 19, 16,
148 | 21, 22, 23, 24, 25, 26, 24, 28, 16, 30, 31, 32, 33, 34, 35, 33, 37]) - 1
149 |
150 | offset = 70 * np.array(
151 | [0, 0, 0, 0, 0, 0, 1.65674000000000, -1.80282000000000, 0.624770000000000, 2.59720000000000, -7.13576000000000,
152 | 0, 2.49236000000000, -6.84770000000000, 0, 0.197040000000000, -0.541360000000000, 2.14581000000000, 0, 0,
153 | 1.11249000000000, 0, 0, 0, -1.61070000000000, -1.80282000000000, 0.624760000000000, -2.59502000000000,
154 | -7.12977000000000, 0, -2.46780000000000, -6.78024000000000, 0, -0.230240000000000, -0.632580000000000,
155 | 2.13368000000000, 0, 0, 1.11569000000000, 0, 0, 0, 0.0196100000000000, 2.05450000000000, -0.141120000000000,
156 | 0.0102100000000000, 2.06436000000000, -0.0592100000000000, 0, 0, 0, 0.00713000000000000, 1.56711000000000,
157 | 0.149680000000000, 0.0342900000000000, 1.56041000000000, -0.100060000000000, 0.0130500000000000,
158 | 1.62560000000000, -0.0526500000000000, 0, 0, 0, 3.54205000000000, 0.904360000000000, -0.173640000000000,
159 | 4.86513000000000, 0, 0, 3.35554000000000, 0, 0, 0, 0, 0, 0.661170000000000, 0, 0, 0.533060000000000, 0, 0, 0,
160 | 0, 0, 0.541200000000000, 0, 0.541200000000000, 0, 0, 0, -3.49802000000000, 0.759940000000000,
161 | -0.326160000000000, -5.02649000000000, 0, 0, -3.36431000000000, 0, 0, 0, 0, 0, -0.730410000000000, 0, 0,
162 | -0.588870000000000, 0, 0, 0, 0, 0, -0.597860000000000, 0, 0.597860000000000])
163 | offset = offset.reshape(-1, 3)
164 |
165 | rotInd = [[6, 5, 4],
166 | [9, 8, 7],
167 | [12, 11, 10],
168 | [15, 14, 13],
169 | [18, 17, 16],
170 | [21, 20, 19],
171 | [],
172 | [24, 23, 22],
173 | [27, 26, 25],
174 | [30, 29, 28],
175 | [33, 32, 31],
176 | [36, 35, 34],
177 | [],
178 | [39, 38, 37],
179 | [42, 41, 40],
180 | [45, 44, 43],
181 | [48, 47, 46],
182 | [51, 50, 49],
183 | [54, 53, 52],
184 | [],
185 | [57, 56, 55],
186 | [60, 59, 58],
187 | [63, 62, 61],
188 | [66, 65, 64],
189 | [69, 68, 67],
190 | [72, 71, 70],
191 | [],
192 | [75, 74, 73],
193 | [],
194 | [78, 77, 76],
195 | [81, 80, 79],
196 | [84, 83, 82],
197 | [87, 86, 85],
198 | [90, 89, 88],
199 | [93, 92, 91],
200 | [],
201 | [96, 95, 94],
202 | []]
203 | posInd = []
204 | for ii in np.arange(38):
205 | if ii == 0:
206 | posInd.append([1, 2, 3])
207 | else:
208 | posInd.append([])
209 |
210 | expmapInd = np.split(np.arange(4, 118) - 1, 38)
211 |
212 | return parent, offset, posInd, expmapInd
213 |
214 |
215 | def fkl_torch(angles, parent, offset, rotInd, expmapInd):
216 | """
217 | pytorch version of fkl.
218 | convert joint angles to joint locations
219 | batch pytorch version of the fkl() method above
220 | :param angles: N*99
221 | :param parent:
222 | :param offset:
223 | :param rotInd:
224 | :param expmapInd:
225 | :return: N*joint_n*3
226 | """
227 | n = angles.data.shape[0]
228 | j_n = offset.shape[0]
229 | p3d = Variable(torch.from_numpy(offset)).float().cuda().unsqueeze(0).repeat(n, 1, 1)
230 | angles = angles[:, 3:].contiguous().view(-1, 3)
231 | R = data_utils.expmap2rotmat_torch(angles).view(n, j_n, 3, 3)
232 | for i in np.arange(1, j_n):
233 | if parent[i] > 0:
234 | R[:, i, :, :] = torch.matmul(R[:, i, :, :], R[:, parent[i], :, :]).clone()
235 | p3d[:, i, :] = torch.matmul(p3d[0, i, :], R[:, parent[i], :, :]) + p3d[:, parent[i], :]
236 | return p3d
237 |
238 |
239 | def main():
240 | # Load all the data
241 | parent, offset, rotInd, expmapInd = _some_variables()
242 |
243 | # numpy implementation
244 | # with h5py.File('samples.h5', 'r') as h5f:
245 | # expmap_gt = h5f['expmap/gt/walking_0'][:]
246 | # expmap_pred = h5f['expmap/preds/walking_0'][:]
247 | expmap_pred = np.array(
248 | [0.0000000, 0.0000000, 0.0000000, -0.0000001, -0.0000000, -0.0000002, 0.3978439, -0.4166636, 0.1027215,
249 | -0.7767256, -0.0000000, -0.0000000, 0.1704115, 0.3078358, -0.1861640, 0.3330379, -0.0000000, -0.0000000,
250 | -0.0000000, -0.0000000, -0.0000000, 0.0679339, 0.2255526, 0.2394881, -0.0989492, -0.0000000, -0.0000000,
251 | 0.0677801, -0.3607298, 0.0503249, 0.1819232, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000,
252 | 0.3236777, -0.0476493, -0.0651256, -0.3150051, -0.0665669, 0.3188994, -0.5980227, -0.1190833, -0.3017127,
253 | 1.2270271, -0.1010960, 0.2072986, -0.0000000, -0.0000000, -0.0000000, -0.2578378, -0.0125206, 2.0266378,
254 | -0.3701521, 0.0199115, 0.5594162, -0.4625384, -0.0000000, -0.0000000, 0.1653314, -0.3952765, -0.1731570,
255 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000,
256 | -0.0000000, -0.0000000, -0.0000000, 2.7825687, -1.4196042, -0.0936858, -1.0348599, -2.7419815, 0.4518218,
257 | -0.3902033, -0.0000000, -0.0000000, 0.0597317, 0.0547002, 0.0445105, -0.0000000, -0.0000000, -0.0000000,
258 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000
259 | ])
260 | expmap_gt = np.array(
261 | [0.2240568, -0.0276901, -0.7433901, 0.0004407, -0.0020624, 0.0002131, 0.3974636, -0.4157083, 0.1030248,
262 | -0.7762963, -0.0000000, -0.0000000, 0.1697988, 0.3087364, -0.1863863, 0.3327336, -0.0000000, -0.0000000,
263 | -0.0000000, -0.0000000, -0.0000000, 0.0689423, 0.2282812, 0.2395958, -0.0998311, -0.0000000, -0.0000000,
264 | 0.0672752, -0.3615943, 0.0505299, 0.1816492, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000,
265 | 0.3223563, -0.0481131, -0.0659720, -0.3145134, -0.0656419, 0.3206626, -0.5979006, -0.1181534, -0.3033383,
266 | 1.2269648, -0.1011873, 0.2057794, -0.0000000, -0.0000000, -0.0000000, -0.2590978, -0.0141497, 2.0271597,
267 | -0.3699318, 0.0128547, 0.5556172, -0.4714990, -0.0000000, -0.0000000, 0.1603251, -0.4157299, -0.1667608,
268 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000,
269 | -0.0000000, -0.0000000, -0.0000000, 2.7811005, -1.4192915, -0.0932141, -1.0294687, -2.7323222, 0.4542309,
270 | -0.4048152, -0.0000000, -0.0000000, 0.0568960, 0.0525994, 0.0493068, -0.0000000, -0.0000000, -0.0000000,
271 | -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000, -0.0000000
272 | ])
273 | xyz1 = fkl(expmap_pred, parent, offset, rotInd, expmapInd)
274 | xyz2 = fkl(expmap_gt, parent, offset, rotInd, expmapInd)
275 |
276 | exp1 = Variable(torch.from_numpy(np.vstack((expmap_pred, expmap_gt))).float()).cuda()
277 | xyz = fkl_torch(exp1, parent, offset, rotInd, expmapInd)
278 | xyz = xyz.cpu().data.numpy()
279 | print(xyz)
280 |
281 |
282 | if __name__ == '__main__':
283 | main()
284 |
--------------------------------------------------------------------------------