├── .gitignore ├── LICENSE ├── README.md ├── preprocess_dataset.md ├── src ├── H36M_dataset.py ├── SMPL_pytorch.py ├── Surreal_GlobalRotationCorrection.py ├── Surreal_dataset.py ├── UP3D_dataset.py ├── __init__.py ├── eval_metric.py ├── models.py └── utils.py └── train └── train_surreal.py /.gitignore: -------------------------------------------------------------------------------- 1 | */*.pyc 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Yudhik Agrawal 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HumanMeshNet: Polygonal Mesh Recovery of Humans 2 | Abbhinav Venkat, Chaitanya Patel, Yudhik Agrawal, Avinash Sharma 3 | 4 | ICCV 2019 Worshop on 3D Reconstruction in the Wild (3DRW 2019) 5 | 6 | [ArXiv Link](https://arxiv.org/abs/1908.06544) 7 | 8 | [Project Page](http://cvit.iiit.ac.in/research/projects/cvit-projects/surface-reconstruction) 9 | 10 | ### Citation 11 | ``` 12 | @InProceedings{Venkat_2019_ICCV_Workshops, 13 | author = {Venkat, Abbhinav and Patel, Chaitanya and Agrawal, Yudhik and Sharma, Avinash}, 14 | title = {HumanMeshNet: Polygonal Mesh Recovery of Humans}, 15 | booktitle = {The IEEE International Conference on Computer Vision (ICCV) Workshops}, 16 | month = {Oct}, 17 | year = {2019} 18 | } 19 | ``` 20 | -------------------------------------------------------------------------------- /preprocess_dataset.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yudhik11/HumanMeshNet/7cfae2e654b27feca9d02abdad20028acc9584bb/preprocess_dataset.md -------------------------------------------------------------------------------- /src/H36M_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | 4 | import torchvision.transforms as transforms 5 | 6 | import scipy.io 7 | from PIL import Image 8 | import os 9 | 10 | import numpy as np 11 | import cv2 12 | 13 | from .utils import smart_padding, smart_padding_depth, smart_padding_iuv 14 | 15 | 16 | class H36MDataset_S9_seq(data.Dataset): 17 | def __init__(self, prefix): 18 | super(H36MDataset_S9_seq, self).__init__() 19 | 20 | self.root_dir = "/home/abbhinav/h36_smpl/hmr/src/datasets/s9_train/" 21 | LM_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/s9_train_gt3ds.mat" 22 | theta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/s9_train_pose.mat" 23 | beta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/s9_train_shape.mat" 24 | # dense pose dir 25 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/s9_train_segm" 26 | self.iuv_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/s9_train_IUV" 27 | # invalid denspose output list 28 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/s9_train.txt" 29 | 30 | # get invalid image names 31 | with open(invalid_path) as f: 32 | self.invalid_images = f.readlines() 33 | self.invalid_images = [ii.strip() for ii in self.invalid_images] 34 | 35 | self.tsfm = transforms.Compose([ 36 | transforms.Lambda(lambda img : smart_padding(img)), 37 | transforms.Resize(224), 38 | transforms.ToTensor(), 39 | ]) 40 | 41 | self.pose_images = sorted(os.listdir(self.root_dir)) 42 | #self.length = len(self.pose_images) 43 | 44 | self.seq_idxs = [i for i, pm in enumerate(self.pose_images) if pm.startswith(prefix)] 45 | self.length = len(self.seq_idxs) 46 | print("Length:", self.length) 47 | 48 | self.landmarks = torch.tensor(scipy.io.loadmat(LM_matfile)['gt3ds'], dtype=torch.float) 49 | self.thetas = torch.tensor(scipy.io.loadmat(theta_matfile)['poses'], dtype=torch.float) 50 | self.betas = torch.tensor(scipy.io.loadmat(beta_matfile)['shapes'], dtype=torch.float) 51 | 52 | 53 | def __len__(self): 54 | return self.length 55 | 56 | def get_name(self, idx): 57 | return self.pose_images[idx] 58 | 59 | def __getitem__(self, idx): 60 | idx = self.seq_idxs[idx] 61 | 62 | # load IUV image 63 | img_name = self.pose_images[idx] 64 | if img_name in self.invalid_images: 65 | idx = idx-1 66 | img_name = self.pose_images[idx] 67 | 68 | seg_name = img_name[:-4] + "_IUV.mat" 69 | 70 | seg_path = os.path.join(self.dp_root_dir, seg_name) 71 | seg = scipy.io.loadmat(seg_path)['segm'] 72 | seg = smart_padding_depth(seg) 73 | seg = cv2.resize(seg, (224, 224)) # resize to 224 74 | seg = torch.tensor(seg).unsqueeze(0).float() 75 | 76 | iuv_path = os.path.join(self.iuv_root_dir, seg_name) 77 | iuv = scipy.io.loadmat(iuv_path)['segm'] 78 | iuv = smart_padding_iuv(iuv) 79 | iuv = cv2.resize(iuv, (224, 224)) # resize to 224 80 | iuv = np.transpose(iuv, (2, 0, 1)) 81 | iuv = torch.tensor(iuv).unsqueeze(0).float() 82 | 83 | img_name = self.pose_images[idx] 84 | img_path = os.path.join(self.root_dir, img_name) 85 | img = Image.open(img_path).convert('RGB') 86 | img = self.tsfm(img) 87 | 88 | return img, seg, iuv, self.thetas[idx], self.betas[idx], self.landmarks[idx].view(-1, 3) 89 | 90 | 91 | class H36MDatasetFull(data.Dataset): 92 | def __init__(self, dstype): 93 | super(H36MDatasetFull, self).__init__() 94 | 95 | if dstype == "train": 96 | self.root_dir = "/home/abbhinav/h36_smpl/hmr/src/datasets/total_images_train/" 97 | LM_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/total_train_gt3ds.mat" 98 | theta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/total_train_pose.mat" 99 | beta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/total_train_shape.mat" 100 | # dense pose dir 101 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/total_train/" 102 | 103 | elif dstype == "val": 104 | self.root_dir = "/home/abbhinav/h36_smpl/hmr/src/datasets/" 105 | LM_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/" 106 | theta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/" 107 | beta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/" 108 | # dense pose dir 109 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/" 110 | 111 | self.tsfm = transforms.Compose([ 112 | transforms.Lambda(lambda img : smart_padding(img)), 113 | transforms.Resize(224), 114 | transforms.ToTensor(), 115 | ]) 116 | 117 | if dstype == "val": 118 | self.pose_images = sorted(os.listdir(os.path.join(self.root_dir, "images_train") )) 119 | self.pose_images = [os.path.join("images_train", pim) for pim in self.pose_images] 120 | self.ids = [0]*len(self.pose_images) 121 | self.pose_images.extend( [os.path.join("images_val", pim) for pim in \ 122 | sorted(os.listdir(os.path.join(self.root_dir, "images_val") )) ] ) 123 | self.ids.extend( [1]*(len(self.pose_images) - len(self.ids)) ) 124 | 125 | elif dstype == "train": 126 | self.pose_images = sorted(os.listdir(self.root_dir)) 127 | 128 | self.length = len(self.pose_images) 129 | 130 | self.dstype = dstype 131 | if dstype == "train": 132 | self.landmarks = torch.tensor(scipy.io.loadmat(LM_matfile)['gt3ds'], dtype=torch.float) 133 | self.thetas = torch.tensor(scipy.io.loadmat(theta_matfile)['poses'], dtype=torch.float) 134 | self.betas = torch.tensor(scipy.io.loadmat(beta_matfile)['shapes'], dtype=torch.float) 135 | elif dstype == "val": 136 | self.landmarks1 = torch.tensor(scipy.io.loadmat(os.path.join(LM_matfile, "train_gt3ds.mat") )['gt3ds'], dtype=torch.float) 137 | self.thetas1 = torch.tensor(scipy.io.loadmat(os.path.join(theta_matfile, "train_pose.mat"))['poses'], dtype=torch.float) 138 | self.betas1 = torch.tensor(scipy.io.loadmat(os.path.join(beta_matfile, "train_shape.mat"))['shapes'], dtype=torch.float) 139 | 140 | self.landmarks2 = torch.tensor(scipy.io.loadmat(os.path.join(LM_matfile, "val_gt3ds.mat") )['gt3ds'], dtype=torch.float) 141 | self.thetas2 = torch.tensor(scipy.io.loadmat(os.path.join(theta_matfile, "val_pose.mat"))['poses'], dtype=torch.float) 142 | self.betas2 = torch.tensor(scipy.io.loadmat(os.path.join(beta_matfile, "val_shape.mat"))['shapes'], dtype=torch.float) 143 | 144 | self.landmarks = torch.cat((self.landmarks1, self.landmarks2), dim=0) 145 | self.thetas = torch.cat((self.thetas1, self.thetas2), dim=0) 146 | self.betas = torch.cat((self.betas1, self.betas2), dim=0) 147 | 148 | def __len__(self): 149 | return self.length 150 | 151 | def __getitem__(self, idx): 152 | 153 | # load IUV image 154 | img_name = self.pose_images[idx] 155 | 156 | if self.dstype == "train": 157 | seg_name = img_name[:-4] + "_IUV.mat" 158 | else: 159 | if self.ids[idx] == 0: 160 | seg_name = os.path.join("train", img_name.split("/")[-1][:-4] + "_IUV.mat") 161 | else: 162 | seg_name = os.path.join("val", img_name.split("/")[-1][:-4] + "_IUV.mat") 163 | seg_path = os.path.join(self.dp_root_dir, seg_name) 164 | 165 | while not os.path.exists(seg_path): 166 | idx = np.random.randint(self.length) 167 | img_name = self.pose_images[idx] 168 | 169 | if self.dstype == "train": 170 | seg_name = img_name[:-4] + "_IUV.mat" 171 | else: 172 | if self.ids[idx] == 0: 173 | seg_name = os.path.join("train", img_name.split("/")[-1][:-4] + "_IUV.mat") 174 | else: 175 | seg_name = os.path.join("val", img_name.split("/")[-1][:-4] + "_IUV.mat") 176 | seg_path = os.path.join(self.dp_root_dir, seg_name) 177 | 178 | seg = scipy.io.loadmat(seg_path)['segm'] 179 | seg = smart_padding_depth(seg) 180 | seg = cv2.resize(seg, (224, 224)) # resize to 224 181 | seg = torch.tensor(seg).unsqueeze(0).float() 182 | 183 | img_name = self.pose_images[idx] 184 | img_path = os.path.join(self.root_dir, img_name) 185 | img = Image.open(img_path).convert('RGB') 186 | img = self.tsfm(img) 187 | 188 | return img, seg, self.thetas[idx], self.betas[idx], self.landmarks[idx].view(-1, 3) 189 | 190 | 191 | 192 | class H36MDataset_S9(data.Dataset): 193 | def __init__(self, dstype): 194 | super(H36MDataset_S9, self).__init__() 195 | 196 | if dstype == "train" or dstype == "val": 197 | self.root_dir = "/home/abbhinav/h36_smpl/hmr/src/datasets/s9_train/" 198 | LM_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/s9_train_gt3ds.mat" 199 | theta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/s9_train_pose.mat" 200 | beta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/s9_train_shape.mat" 201 | # dense pose dir 202 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/s9_train_segm" 203 | # invalid denspose output list 204 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/s9_train.txt" 205 | 206 | # get invalid image names 207 | with open(invalid_path) as f: 208 | self.invalid_images = f.readlines() 209 | self.invalid_images = [ii.strip() for ii in self.invalid_images] 210 | 211 | self.tsfm = transforms.Compose([ 212 | transforms.Lambda(lambda img : smart_padding(img)), 213 | transforms.Resize(224), 214 | transforms.ToTensor(), 215 | ]) 216 | 217 | self.pose_images = sorted(os.listdir(self.root_dir)) 218 | self.length = len(self.pose_images) 219 | 220 | self.landmarks = torch.tensor(scipy.io.loadmat(LM_matfile)['gt3ds'], dtype=torch.float) 221 | self.thetas = torch.tensor(scipy.io.loadmat(theta_matfile)['poses'], dtype=torch.float) 222 | self.betas = torch.tensor(scipy.io.loadmat(beta_matfile)['shapes'], dtype=torch.float) 223 | 224 | 225 | def __len__(self): 226 | return self.length 227 | 228 | def get_name(self, idx): 229 | return self.pose_images[idx] 230 | 231 | def __getitem__(self, idx): 232 | 233 | # load IUV image 234 | img_name = self.pose_images[idx] 235 | if img_name in self.invalid_images: 236 | idx = 0 237 | img_name = self.pose_images[idx] 238 | 239 | seg_name = img_name[:-4] + "_IUV.mat" 240 | 241 | seg_path = os.path.join(self.dp_root_dir, seg_name) 242 | seg = scipy.io.loadmat(seg_path)['segm'] 243 | seg = smart_padding_depth(seg) 244 | seg = cv2.resize(seg, (224, 224)) # resize to 224 245 | seg = torch.tensor(seg).unsqueeze(0).float() 246 | 247 | 248 | img_name = self.pose_images[idx] 249 | img_path = os.path.join(self.root_dir, img_name) 250 | img = Image.open(img_path).convert('RGB') 251 | img = self.tsfm(img) 252 | 253 | return img, seg, self.thetas[idx], self.betas[idx], self.landmarks[idx].view(-1, 3) 254 | 255 | 256 | 257 | class H36MDatasetTest(data.Dataset): 258 | def __init__(self, dstype): 259 | super(H36MDatasetTest, self).__init__() 260 | 261 | if dstype == "train": 262 | self.root_dir = "/home/abbhinav/h36_smpl/hmr/src/datasets/images_train" 263 | LM_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/train_gt3ds.mat" 264 | theta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/train_pose.mat" 265 | beta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/train_shape.mat" 266 | # dense pose dir 267 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/train/" 268 | # invalid denspose output list 269 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/train.txt" 270 | 271 | elif dstype == "val": 272 | self.root_dir = "/home/abbhinav/h36_smpl/hmr/src/datasets/images_val" 273 | LM_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/val_gt3ds.mat" 274 | theta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/val_pose.mat" 275 | beta_matfile = "/home/abbhinav/h36_smpl/hmr/src/datasets/val_shape.mat" 276 | # dense pose dir 277 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/val/" 278 | # invalid denspose output list 279 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m_smpl/val.txt" 280 | 281 | # get invalid image names 282 | with open(invalid_path) as f: 283 | self.invalid_images = f.readlines() 284 | self.invalid_images = [ii.strip() for ii in self.invalid_images] 285 | 286 | self.tsfm = transforms.Compose([ 287 | transforms.Lambda(lambda img : smart_padding(img)), 288 | transforms.Resize(224), 289 | transforms.ToTensor(), 290 | ]) 291 | 292 | self.pose_images = sorted(os.listdir(self.root_dir)) 293 | self.length = len(self.pose_images) 294 | 295 | self.landmarks = torch.tensor(scipy.io.loadmat(LM_matfile)['gt3ds'], dtype=torch.float) 296 | self.thetas = torch.tensor(scipy.io.loadmat(theta_matfile)['poses'], dtype=torch.float) 297 | self.betas = torch.tensor(scipy.io.loadmat(beta_matfile)['shapes'], dtype=torch.float) 298 | 299 | 300 | def __len__(self): 301 | return self.length 302 | 303 | def get_name(self, idx): 304 | return self.pose_images[idx] 305 | 306 | def __getitem__(self, idx): 307 | 308 | # load IUV image 309 | img_name = self.pose_images[idx] 310 | if img_name in self.invalid_images: 311 | idx = 0 312 | img_name = self.pose_images[idx] 313 | 314 | seg_name = img_name[:-4] + "_IUV.mat" 315 | 316 | seg_path = os.path.join(self.dp_root_dir, seg_name) 317 | seg = scipy.io.loadmat(seg_path)['segm'] 318 | seg = smart_padding_depth(seg) 319 | seg = cv2.resize(seg, (224, 224)) # resize to 224 320 | seg = torch.tensor(seg).unsqueeze(0).float() 321 | 322 | 323 | img_name = self.pose_images[idx] 324 | img_path = os.path.join(self.root_dir, img_name) 325 | img = Image.open(img_path).convert('RGB') 326 | img = self.tsfm(img) 327 | 328 | return img, seg, self.thetas[idx], self.betas[idx], self.landmarks[idx].view(-1, 3) 329 | 330 | 331 | 332 | class H36MDataset_DPSMRGB(data.Dataset): 333 | def __init__(self, dstype): 334 | super(H36MDataset_DPSMRGB, self).__init__() 335 | 336 | if dstype == "train": 337 | self.root_dir = "/media/HDD_2TB/himansh/new_human3.6m/train" 338 | jointsmatfile = "/media/HDD_2TB/himansh/new_human3.6m/new_train_joints.mat" 339 | # dense pose dir 340 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m/train/" 341 | # invalid denspose output list 342 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m/train.txt" 343 | 344 | elif dstype == "val": 345 | self.root_dir = "/media/HDD_2TB/himansh/new_human3.6m/test" 346 | jointsmatfile = "/media/HDD_2TB/himansh/new_human3.6m/new_test_joints.mat" 347 | # dense pose dir 348 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m/test/" 349 | # invalid denspose output list 350 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m/test.txt" 351 | 352 | # get invalid image names 353 | with open(invalid_path) as f: 354 | self.invalid_images = f.readlines() 355 | self.invalid_images = [ii.strip() for ii in self.invalid_images] 356 | 357 | self.tsfm = transforms.Compose([ 358 | transforms.Lambda(lambda img : smart_padding(img)), 359 | transforms.Resize(224), 360 | transforms.ToTensor(), 361 | ]) 362 | 363 | self.pose_images = sorted(os.listdir(self.root_dir)) 364 | self.length = len(self.pose_images) 365 | 366 | self.joints = torch.tensor(scipy.io.loadmat(jointsmatfile)['joints'], dtype=torch.float) 367 | 368 | 369 | def __len__(self): 370 | return self.length 371 | 372 | def __getitem__(self, idx): 373 | 374 | """ 375 | img_name = self.pose_images[idx] 376 | img_path = os.path.join(self.root_dir, img_name) 377 | img = scipy.io.loadmat(img_path)['tmp'] 378 | """ 379 | # load IUV image 380 | img_name = self.pose_images[idx] 381 | if img_name in self.invalid_images: 382 | idx = 0 383 | img_name = self.pose_images[idx] 384 | 385 | img_path = os.path.join(self.root_dir, img_name) 386 | img = Image.open(img_path).convert('RGB') 387 | img = self.tsfm(img).float() 388 | 389 | segs_name = img_name[:-4] + "_IUV.mat" 390 | segs_path = os.path.join(self.dp_root_dir, segs_name) 391 | segs = scipy.io.loadmat(segs_path)['segm'] 392 | segs = smart_padding_depth(segs) # smart pad to make square 393 | segs = cv2.resize(segs, (224, 224)) # resize to 224 394 | segs = torch.tensor(segs).unsqueeze(0).float() 395 | 396 | joints = self.joints[idx].view(-1, 3) / 1000.0 397 | joints = joints - joints[6].unsqueeze(0) 398 | 399 | return img, segs, joints 400 | 401 | 402 | 403 | class H36MDataset(data.Dataset): 404 | def __init__(self, dstype): 405 | super(H36MDataset, self).__init__() 406 | 407 | if dstype == "train": 408 | self.root_dir = "/media/HDD_2TB/himansh/new_human3.6m/train" 409 | jointsmatfile = "/media/HDD_2TB/himansh/new_human3.6m/new_train_joints.mat" 410 | # dense pose dir 411 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m/train/" 412 | # invalid denspose output list 413 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m/train.txt" 414 | 415 | elif dstype == "val": 416 | self.root_dir = "/media/HDD_2TB/himansh/new_human3.6m/test" 417 | jointsmatfile = "/media/HDD_2TB/himansh/new_human3.6m/new_test_joints.mat" 418 | # dense pose dir 419 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m/test/" 420 | # invalid denspose output list 421 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m/test.txt" 422 | 423 | # get invalid image names 424 | with open(invalid_path) as f: 425 | self.invalid_images = f.readlines() 426 | self.invalid_images = [ii.strip() for ii in self.invalid_images] 427 | 428 | self.tsfm = transforms.Compose([ 429 | transforms.Lambda(lambda img : smart_padding_depth(img)), 430 | transforms.Resize(224), 431 | transforms.ToTensor(), 432 | ]) 433 | 434 | self.pose_images = sorted(os.listdir(self.root_dir)) 435 | self.length = len(self.pose_images) 436 | 437 | self.joints = torch.tensor(scipy.io.loadmat(jointsmatfile)['joints'], dtype=torch.float) 438 | 439 | 440 | def __len__(self): 441 | return self.length 442 | 443 | def __getitem__(self, idx): 444 | 445 | """ 446 | img_name = self.pose_images[idx] 447 | img_path = os.path.join(self.root_dir, img_name) 448 | img = scipy.io.loadmat(img_path)['tmp'] 449 | """ 450 | # load IUV image 451 | img_name = self.pose_images[idx] 452 | if img_name in self.invalid_images: 453 | idx = 0 454 | img_name = self.pose_images[idx] 455 | img_name = img_name[:-4] + "_IUV.mat" 456 | 457 | img_path = os.path.join(self.dp_root_dir, img_name) 458 | img = scipy.io.loadmat(img_path)['segm'] 459 | 460 | img = smart_padding_depth(img) # smart pad to make square 461 | img = cv2.resize(img, (224, 224)) # resize to 224 462 | img = torch.tensor(img).unsqueeze(0) 463 | 464 | joints = self.joints[idx].view(-1, 3) / 1000.0 465 | joints = joints - joints[6].unsqueeze(0) 466 | 467 | return img.float(), joints 468 | 469 | 470 | 471 | class H36MDataset_4C(data.Dataset): 472 | def __init__(self, dstype): 473 | super(H36MDataset_4C, self).__init__() 474 | 475 | if dstype == "train": 476 | self.root_dir = "/media/HDD_2TB/himansh/new_human3.6m/train" 477 | jointsmatfile = "/media/HDD_2TB/himansh/new_human3.6m/new_train_joints.mat" 478 | # dense pose dir 479 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m/train/" 480 | # invalid denspose output list 481 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m/train.txt" 482 | 483 | elif dstype == "val": 484 | self.root_dir = "/media/HDD_2TB/himansh/new_human3.6m/test" 485 | jointsmatfile = "/media/HDD_2TB/himansh/new_human3.6m/new_test_joints.mat" 486 | # dense pose dir 487 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m/test/" 488 | # invalid denspose output list 489 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m/test.txt" 490 | 491 | # get invalid image names 492 | with open(invalid_path) as f: 493 | self.invalid_images = f.readlines() 494 | self.invalid_images = [ii.strip() for ii in self.invalid_images] 495 | 496 | self.tsfm = transforms.Compose([ 497 | transforms.Lambda(lambda img : smart_padding(img)), 498 | transforms.Resize(224), 499 | transforms.ToTensor(), 500 | ]) 501 | 502 | self.pose_images = sorted(os.listdir(self.root_dir)) 503 | self.length = len(self.pose_images) 504 | 505 | self.joints = torch.tensor(scipy.io.loadmat(jointsmatfile)['joints'], dtype=torch.float) 506 | 507 | 508 | def __len__(self): 509 | return self.length 510 | 511 | def __getitem__(self, idx): 512 | 513 | """ 514 | img_name = self.pose_images[idx] 515 | img_path = os.path.join(self.root_dir, img_name) 516 | img = scipy.io.loadmat(img_path)['tmp'] 517 | """ 518 | # load IUV image 519 | img_name = self.pose_images[idx] 520 | if img_name in self.invalid_images: 521 | idx = 0 522 | img_name = self.pose_images[idx] 523 | 524 | ori_img_name = img_name 525 | ori_img_path = os.path.join(self.root_dir, ori_img_name) 526 | ori_img = Image.open(ori_img_path).convert('RGB') 527 | ori_img = self.tsfm(ori_img).float() 528 | 529 | img_name = img_name[:-4] + "_IUV.mat" 530 | img_path = os.path.join(self.dp_root_dir, img_name) 531 | img = scipy.io.loadmat(img_path)['segm'] 532 | img = smart_padding_depth(img) # smart pad to make square 533 | img = cv2.resize(img, (224, 224)) # resize to 224 534 | img = torch.tensor(img).unsqueeze(0).float() 535 | 536 | combined_img = torch.cat((ori_img, img), dim=0) 537 | 538 | joints = self.joints[idx].view(-1, 3) / 1000.0 539 | joints = joints - joints[6].unsqueeze(0) 540 | 541 | return combined_img, joints 542 | 543 | 544 | 545 | class H36MDatasetCJ(data.Dataset): 546 | def __init__(self, dstype): 547 | super(H36MDatasetCJ, self).__init__() 548 | 549 | if dstype == "train": 550 | self.root_dir = "/media/HDD_2TB/himansh/new_human3.6m/train" 551 | jointsmatfile = "/media/HDD_2TB/himansh/new_human3.6m/new_train_joints.mat" 552 | clustermatfile_j = "/media/HDD_2TB/himansh/new_human3.6m/part_clusters/full_body/cluster_train_500.mat" 553 | # dense pose dir 554 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m/train/" 555 | # invalid denspose output list 556 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m/train.txt" 557 | 558 | elif dstype == "val": 559 | self.root_dir = "/media/HDD_2TB/himansh/new_human3.6m/test" 560 | jointsmatfile = "/media/HDD_2TB/himansh/new_human3.6m/new_test_joints.mat" 561 | clustermatfile_j = "/media/HDD_2TB/himansh/new_human3.6m/part_clusters/full_body/cluster_test_500.mat" 562 | # dense pose dir 563 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/h36m/test/" 564 | # invalid denspose output list 565 | invalid_path = "/home/saketh/Densepose/densepose/DensePoseData/h36m/test.txt" 566 | 567 | # get invalid image names 568 | with open(invalid_path) as f: 569 | self.invalid_images = f.readlines() 570 | self.invalid_images = [ii.strip() for ii in self.invalid_images] 571 | 572 | self.tsfm = transforms.Compose([ 573 | transforms.Lambda(lambda img : smart_padding_depth(img)), 574 | transforms.Resize(224), 575 | transforms.ToTensor(), 576 | ]) 577 | 578 | self.pose_images = sorted(os.listdir(self.root_dir)) 579 | self.length = len(self.pose_images) 580 | 581 | self.joints = torch.tensor(scipy.io.loadmat(jointsmatfile)['joints'], dtype=torch.float) 582 | 583 | dct = scipy.io.loadmat(clustermatfile_j) 584 | 585 | if dstype == "train": 586 | self.cluster_labels_j = torch.tensor(dct['idx_joint'].astype(np.int), dtype=torch.long).squeeze() - 1 587 | elif dstype == "val": 588 | self.cluster_labels_j = torch.tensor(dct['idx_centers'].astype(np.int), dtype=torch.long).squeeze() - 1 589 | 590 | 591 | def __len__(self): 592 | return self.length 593 | 594 | def __getitem__(self, idx): 595 | 596 | """ 597 | img_name = self.pose_images[idx] 598 | img_path = os.path.join(self.root_dir, img_name) 599 | img = scipy.io.loadmat(img_path)['tmp'] 600 | """ 601 | # load IUV image 602 | img_name = self.pose_images[idx] 603 | if img_name in self.invalid_images: 604 | idx = 0 605 | img_name = self.pose_images[idx] 606 | img_name = img_name[:-4] + "_IUV.mat" 607 | 608 | img_path = os.path.join(self.dp_root_dir, img_name) 609 | img = scipy.io.loadmat(img_path)['segm'] 610 | 611 | img = smart_padding_depth(img) # smart pad to make square 612 | img = cv2.resize(img, (224, 224)) # resize to 224 613 | img = torch.tensor(img).unsqueeze(0) 614 | 615 | joints = self.joints[idx].view(-1, 3) / 1000.0 616 | joints = joints - joints[6].unsqueeze(0) 617 | 618 | return img.float(), joints, self.cluster_labels_j[idx] 619 | 620 | -------------------------------------------------------------------------------- /src/SMPL_pytorch.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import pickle 5 | import numpy as np 6 | from torch.autograd import Variable 7 | 8 | def batch_rodrigues(theta): 9 | #theta N x 3 10 | batch_size = theta.shape[0] 11 | l1norm = torch.norm(theta + 1e-8, p = 2, dim = 1) 12 | angle = torch.unsqueeze(l1norm, -1) 13 | normalized = torch.div(theta, angle) 14 | angle = angle * 0.5 15 | v_cos = torch.cos(angle) 16 | v_sin = torch.sin(angle) 17 | quat = torch.cat([v_cos, v_sin * normalized], dim = 1) 18 | 19 | return quat2mat(quat) 20 | 21 | def quat2mat(quat): 22 | """Convert quaternion coefficients to rotation matrix. 23 | Args: 24 | quat: size = [B, 4] 4 <===>(w, x, y, z) 25 | Returns: 26 | Rotation matrix corresponding to the quaternion -- size = [B, 3, 3] 27 | """ 28 | norm_quat = quat 29 | norm_quat = norm_quat/norm_quat.norm(p=2, dim=1, keepdim=True) 30 | w, x, y, z = norm_quat[:,0], norm_quat[:,1], norm_quat[:,2], norm_quat[:,3] 31 | 32 | B = quat.size(0) 33 | 34 | w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2) 35 | wx, wy, wz = w*x, w*y, w*z 36 | xy, xz, yz = x*y, x*z, y*z 37 | 38 | rotMat = torch.stack([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz, 39 | 2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx, 40 | 2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3) 41 | return rotMat 42 | 43 | def batch_global_rigid_transformation(Rs, Js, parent, device, rotate_base = False): 44 | N = Rs.shape[0] 45 | if rotate_base: 46 | np_rot_x = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]], dtype = np.float) 47 | np_rot_x = np.reshape(np.tile(np_rot_x, [N, 1]), [N, 3, 3]) 48 | rot_x = Variable(torch.from_numpy(np_rot_x).float()).to(device) 49 | root_rotation = torch.matmul(Rs[:, 0, :, :], rot_x) 50 | else: 51 | root_rotation = Rs[:, 0, :, :] 52 | Js = torch.unsqueeze(Js, -1) 53 | 54 | def make_A(R, t): 55 | R_homo = F.pad(R, [0, 0, 0, 1, 0, 0]) 56 | t_homo = torch.cat([t, Variable(torch.ones(N, 1, 1)).to(device)], dim = 1) 57 | return torch.cat([R_homo, t_homo], 2) 58 | 59 | A0 = make_A(root_rotation, Js[:, 0]) 60 | results = [A0] 61 | 62 | for i in range(1, parent.shape[0]): 63 | j_here = Js[:, i] - Js[:, parent[i]] 64 | A_here = make_A(Rs[:, i], j_here) 65 | res_here = torch.matmul(results[parent[i]], A_here) 66 | results.append(res_here) 67 | 68 | results = torch.stack(results, dim = 1) 69 | 70 | new_J = results[:, :, :3, 3] 71 | Js_w0 = torch.cat([Js, Variable(torch.zeros(N, 24, 1, 1)).to(device)], dim = 2) 72 | init_bone = torch.matmul(results, Js_w0) 73 | init_bone = F.pad(init_bone, [3, 0, 0, 0, 0, 0, 0, 0]) 74 | A = results - init_bone 75 | 76 | return new_J, A 77 | 78 | class SMPL(nn.Module): 79 | def __init__(self, model_path = '../../model_smpl_male.pkl'): 80 | super(SMPL, self).__init__() 81 | 82 | 83 | self.model_path = model_path 84 | with open(model_path, 'rb') as f: 85 | # model = pickle.load(f, encoding='latin1') 86 | model = pickle.load(f) 87 | # for key in model.keys(): 88 | # print(key, "\t\t", type(model[key])) 89 | 90 | np_v_template = np.array(model['v_template'], dtype = np.float) 91 | self.register_buffer('v_template', torch.from_numpy(np_v_template).float()) 92 | self.size = [np_v_template.shape[0], 3] 93 | 94 | np_shapedirs = np.array(model['shapedirs'], dtype = np.float) 95 | self.num_betas = np_shapedirs.shape[-1] 96 | np_shapedirs = np.reshape(np_shapedirs, [-1, self.num_betas]).T 97 | self.register_buffer('shapedirs', torch.from_numpy(np_shapedirs).float()) 98 | 99 | np_J_regressor = np.array(model['J_regressor'].toarray().T, dtype = np.float) 100 | self.register_buffer('J_regressor', torch.from_numpy(np_J_regressor).float()) 101 | 102 | np_faces = np.array(model['f'], dtype = np.float) 103 | self.register_buffer('faces', torch.from_numpy(np_faces).float()) 104 | 105 | np_posedirs = np.array(model['posedirs'], dtype = np.float) 106 | num_pose_basis = np_posedirs.shape[-1] 107 | np_posedirs = np.reshape(np_posedirs, [-1, num_pose_basis]).T 108 | self.register_buffer('posedirs', torch.from_numpy(np_posedirs).float()) 109 | 110 | self.parents = np.array(model['kintree_table'])[0].astype(np.int32) 111 | 112 | np_weights = np.array(model['weights'], dtype = np.float) 113 | 114 | vertex_count = np_weights.shape[0] 115 | vertex_component = np_weights.shape[1] 116 | 117 | # Maximum possible size of the batch 118 | batch_size = 256 119 | np_weights = np.tile(np_weights, (batch_size, 1)) 120 | self.register_buffer('weight', torch.from_numpy(np_weights).float().reshape(-1, vertex_count, vertex_component)) 121 | 122 | self.register_buffer('e3', torch.eye(3).float()) 123 | 124 | self.cur_device = None 125 | 126 | 127 | self.SMPL_to_H36M_indices16 = torch.tensor([11, 5, 2, 1, 4, 10, 0, 6, 12, 12, 21, 19, 17, 16, 18, 20]) 128 | self.SMPL_to_H36M_indices14 = torch.tensor([11, 5, 2, 1, 4, 10, 12, 12, 21, 19, 17, 16, 18, 20]) 129 | self.head_index = 410 130 | 131 | 132 | def forward(self, beta, theta, get_skin = False): 133 | if not self.cur_device: 134 | device = theta.device 135 | self.cur_device = torch.device(device.type, device.index) 136 | num_batch = beta.shape[0] 137 | 138 | v_shaped = torch.matmul(beta, self.shapedirs).view(-1, self.size[0], self.size[1]) + self.v_template 139 | # print(v_shaped.shape) 140 | Jx = torch.matmul(v_shaped[:, :, 0], self.J_regressor) 141 | Jy = torch.matmul(v_shaped[:, :, 1], self.J_regressor) 142 | Jz = torch.matmul(v_shaped[:, :, 2], self.J_regressor) 143 | J = torch.stack([Jx, Jy, Jz], dim = 2) 144 | # print("theta : ", theta.size()) 145 | Rs = batch_rodrigues(theta.view(-1, 3)).view(-1, 24, 3, 3) 146 | pose_feature = (Rs[:, 1:, :, :]).sub(1.0, self.e3).view(-1, 207) 147 | v_posed = torch.matmul(pose_feature, self.posedirs).view(-1, self.size[0], self.size[1]) + v_shaped 148 | self.J_transformed, A = batch_global_rigid_transformation(Rs, J, self.parents, self.cur_device, rotate_base = False) 149 | 150 | weight = self.weight[:num_batch] 151 | W = weight.view(num_batch, -1, 24) 152 | T = torch.matmul(W, A.view(num_batch, 24, 16)).view(num_batch, -1, 4, 4) 153 | 154 | v_posed_homo = torch.cat([v_posed, torch.ones(num_batch, v_posed.shape[1], 1, device = self.cur_device)], dim = 2) 155 | v_homo = torch.matmul(T, torch.unsqueeze(v_posed_homo, -1)) 156 | 157 | verts = v_homo[:, :, :3, 0] 158 | 159 | joint_x = torch.matmul(verts[:, :, 0], self.J_regressor) 160 | joint_y = torch.matmul(verts[:, :, 1], self.J_regressor) 161 | joint_z = torch.matmul(verts[:, :, 2], self.J_regressor) 162 | # joint_x = torch.matmul(verts[:, :, 0], self.joint_regressor) 163 | # joint_y = torch.matmul(verts[:, :, 1], self.joint_regressor) 164 | # joint_z = torch.matmul(verts[:, :, 2], self.joint_regressor) 165 | 166 | joints = torch.stack([joint_x, joint_y, joint_z], dim = 2) 167 | # print("joints", joints.size()) 168 | if get_skin: 169 | return verts, joints, Rs 170 | else: 171 | return joints 172 | 173 | 174 | def H36M_forward(self, beta, theta, get_skin=False): 175 | 176 | verts, joints, Rs = self.forward(beta, theta, get_skin=True) 177 | new_joints = joints[:, self.SMPL_to_H36M_indices14, :] 178 | new_joints[:, 7, :] = verts[:, self.head_index, :] 179 | 180 | if get_skin: 181 | return verts, new_joints, Rs 182 | else: 183 | return new_joints 184 | 185 | def H36M_forward_old(self, beta, theta, get_skin=False): 186 | 187 | verts, joints, Rs = self.forward(beta, theta, get_skin=True) 188 | new_joints = joints[:, self.SMPL_to_H36M_indices16, :] 189 | new_joints[:, 9, :] = verts[:, self.head_index, :] 190 | 191 | if get_skin: 192 | return verts, new_joints, Rs 193 | else: 194 | return new_joints 195 | -------------------------------------------------------------------------------- /src/Surreal_GlobalRotationCorrection.py: -------------------------------------------------------------------------------- 1 | import transforms3d 2 | import math 3 | import numpy as np 4 | import scipy.io as sio 5 | 6 | # Returns intrinsic camera matrix 7 | # Parameters are hard-coded since all SURREAL images use the same. 8 | def get_intrinsic(): 9 | # These are set in Blender (datageneration/main_part1.py) 10 | res_x_px = 320 # *scn.render.resolution_x 11 | res_y_px = 240 # *scn.render.resolution_y 12 | f_mm = 60 # *cam_ob.data.lens 13 | sensor_w_mm = 32 # *cam_ob.data.sensor_width 14 | sensor_h_mm = sensor_w_mm * res_y_px / res_x_px # *cam_ob.data.sensor_height (function of others) 15 | 16 | scale = 1 # *scn.render.resolution_percentage/100 17 | skew = 0 # only use rectangular pixels 18 | pixel_aspect_ratio = 1 19 | 20 | # From similar triangles: 21 | # sensor_width_in_mm / resolution_x_inx_pix = focal_length_x_in_mm / focal_length_x_in_pix 22 | fx_px = f_mm * res_x_px * scale / sensor_w_mm 23 | fy_px = f_mm * res_y_px * scale * pixel_aspect_ratio / sensor_h_mm 24 | 25 | # Center of the image 26 | u = res_x_px * scale / 2 27 | v = res_y_px * scale / 2 28 | 29 | # Intrinsic camera matrix 30 | K = np.array([[fx_px, skew, u], [0, fy_px, v], [0, 0, 1]]) 31 | return K 32 | 33 | 34 | # Returns extrinsic camera matrix 35 | # T : translation vector from Blender (*cam_ob.location) 36 | # RT: extrinsic computer vision camera matrix 37 | # Script based on https://blender.stackexchange.com/questions/38009/3x4-camera-matrix-from-blender-camera 38 | def get_extrinsic(T): 39 | # Take the first 3 columns of the matrix_world in Blender and transpose. 40 | # This is hard-coded since all images in SURREAL use the same. 41 | R_world2bcam = np.array([[0, 0, 1], [0, -1, 0], [-1, 0, 0]]).transpose() 42 | # *cam_ob.matrix_world = Matrix(((0., 0., 1, params['camera_distance']), 43 | # (0., -1, 0., -1.0), 44 | # (-1., 0., 0., 0.), 45 | # (0.0, 0.0, 0.0, 1.0))) 46 | 47 | # Convert camera location to translation vector used in coordinate changes 48 | T_world2bcam = -1 * np.dot(R_world2bcam, T) 49 | 50 | # Following is needed to convert Blender camera to computer vision camera 51 | R_bcam2cv = np.array([[1, 0, 0], [0, -1, 0], [0, 0, -1]]) 52 | 53 | # Build the coordinate transform matrix from world to computer vision camera 54 | R_world2cv = np.dot(R_bcam2cv, R_world2bcam) 55 | T_world2cv = np.dot(R_bcam2cv, T_world2bcam) 56 | 57 | # Put into 3x4 matrix 58 | RT = np.concatenate([R_world2cv, T_world2cv], axis=1) 59 | return RT, R_world2cv, T_world2cv 60 | 61 | 62 | def rotateBody(RzBody, pelvisRotVec): 63 | angle = np.linalg.norm(pelvisRotVec) 64 | Rpelvis = transforms3d.axangles.axangle2mat(pelvisRotVec / angle, angle) 65 | globRotMat = np.dot(RzBody, Rpelvis) 66 | R90 = transforms3d.euler.euler2mat(np.pi / 2, 0, 0) 67 | globRotAx, globRotAngle = transforms3d.axangles.mat2axangle(np.dot(R90, globRotMat)) 68 | globRotVec = globRotAx * globRotAngle 69 | return globRotVec 70 | 71 | 72 | # provide info path 73 | # we need camLoc and zrot from it to correct global rotation 74 | info_path = \ 75 | "/media/HDD_2TB/sourabh/surreal_complete/surreal/download/dump/SURREAL/data/cmu/train/run0/01_01/01_01_c0001_info.mat" 76 | fno = 0 # frame number 77 | 78 | info = sio.loadmat(info_path) 79 | zrot = info['zrot'][0][0] 80 | RzBody = np.array(((math.cos(zrot), -math.sin(zrot), 0), 81 | (math.sin(zrot), math.cos(zrot), 0), 82 | (0, 0, 1))) 83 | 84 | intrinsic = get_intrinsic() 85 | extrinsic, R, T = get_extrinsic(info['camLoc']) 86 | 87 | # change global rotation to align wrt camera 88 | pose = info['pose'][:, fno] 89 | pose[0:3] = rotateBody(RzBody, pose[0:3]) 90 | 91 | shape = info['shape'][:, fno] -------------------------------------------------------------------------------- /src/Surreal_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | 4 | import torchvision.transforms as transforms 5 | 6 | import scipy.io 7 | from PIL import Image 8 | import os 9 | 10 | import numpy as np 11 | import cv2 12 | 13 | from .utils import smart_padding, smart_padding_depth, flip_smpl 14 | 15 | """ 16 | Dataloaders for direct surface regression 17 | """ 18 | 19 | 20 | class SurrealBothSMRGBCrop_FlipCorrect(data.Dataset): 21 | def __init__(self, dstype): 22 | super(SurrealBothSMRGBCrop_FlipCorrect, self).__init__() 23 | 24 | if dstype == "train": 25 | self.gtsm_root_dir = \ 26 | "/media/SSD_150/abbhinav/body3/data/segm_train_run0/" 27 | posematfile = "/media/SSD_150/abbhinav/body3/data/poses_gr/train_pose_matrix.mat" 28 | shapematfile = \ 29 | "/media/HDD_2TB/sourabh/surreal_complete/surreal/download/dump/SURREAL/data/cmu/shapes/train_shape_matrix.mat" 30 | jointsmatfile = "/media/SSD_150/abbhinav/body3/data/joints_gr/train_joint_matrix.mat" 31 | self.rgb_root_dir = "/media/HDD_2TB/sourabh/surreal_complete/surreal/download/dump/SURREAL/data/cmu/images_train_run0" 32 | # dense pose dir 33 | self.dp_root_dir = \ 34 | "/home/saketh/Densepose/densepose/DensePoseData/surreal/train" 35 | # invalid denspose output list 36 | invalid_path = \ 37 | "/home/saketh/Densepose/densepose/DensePoseData/surreal/train.txt" 38 | 39 | 40 | elif dstype == "val": 41 | self.gtsm_root_dir = \ 42 | "/media/SSD_150/abbhinav/body3/data/segm_val_run0/" 43 | posematfile = "/media/SSD_150/abbhinav/body3/data/poses_gr/val_pose_matrix.mat" 44 | shapematfile = \ 45 | "/media/HDD_2TB/sourabh/surreal_complete/surreal/download/dump/SURREAL/data/cmu/shapes/val_shape_matrix.mat" 46 | jointsmatfile = "/media/SSD_150/abbhinav/body3/data/joints_gr/val_joint_matrix.mat" 47 | self.rgb_root_dir = "/media/HDD_2TB/sourabh/surreal_complete/surreal/download/dump/SURREAL/data/cmu/images_val_run0" 48 | # dense pose dir 49 | self.dp_root_dir = \ 50 | "/home/saketh/Densepose/densepose/DensePoseData/surreal/val" 51 | # invalid denspose output list 52 | invalid_path = \ 53 | "/home/saketh/Densepose/densepose/DensePoseData/surreal/val.txt" 54 | 55 | 56 | self.tsfm = transforms.Compose([ 57 | transforms.Lambda(lambda img : smart_padding(img)), 58 | transforms.Resize(224), 59 | transforms.ToTensor(), 60 | ]) 61 | 62 | self.rgb_images = sorted(os.listdir(self.rgb_root_dir)) 63 | 64 | # self.length = len(self.pose_images) 65 | self.length = len(self.rgb_images) // 5 66 | 67 | self.joints = torch.tensor(scipy.io.loadmat(jointsmatfile)['joints'].T, dtype=torch.float) 68 | self.pose_params = torch.tensor(scipy.io.loadmat(posematfile)['poses'].T, dtype=torch.float) 69 | self.shape_params = torch.tensor(scipy.io.loadmat(shapematfile)['shapes'].T, dtype=torch.float) 70 | 71 | 72 | # get invalid image names 73 | with open(invalid_path) as f: 74 | self.invalid_images = f.readlines() 75 | self.invalid_images = [ii.strip().split(".")[0]+".jpg" for ii in self.invalid_images] 76 | # store valid indices to sample in case of invalid 77 | subsampled_images = self.rgb_images[::5] 78 | self.valid_indices = [i for i, pi in enumerate(subsampled_images) if pi not in self.invalid_images] 79 | self.invalid_indices = [i for i, pi in enumerate(subsampled_images) if pi in self.invalid_images] 80 | 81 | def __len__(self): 82 | return self.length 83 | 84 | def _crop_tight(self, img, segs, dpsegs): 85 | nz = np.nonzero(segs) 86 | if len(nz[0]) == 0: 87 | return img, segs, dpsegs 88 | miny, maxy = nz[0].min(), nz[0].max() 89 | minx, maxx = nz[1].min(), nz[1].max() 90 | img = img.crop((minx, miny, maxx+1, maxy+1)) 91 | segs = segs[miny:maxy+1, minx:maxx+1] 92 | dpsegs = dpsegs[miny:maxy+1, minx:maxx+1] 93 | return img, segs, dpsegs 94 | 95 | def __getitem__(self, idx): 96 | 97 | # if invalid then sample a random valid 98 | if idx in self.invalid_indices: 99 | past = idx 100 | idx = self.valid_indices[np.random.randint(len(self.valid_indices)) ] 101 | 102 | # get original index 103 | idx = idx*5 104 | 105 | # load seg image 106 | seg_name = self.rgb_images[idx][:-4] + ".mat" 107 | seg_path = os.path.join(self.gtsm_root_dir, seg_name) 108 | segs = scipy.io.loadmat(seg_path)['tmp'] 109 | 110 | # load original image 111 | img_name = self.rgb_images[idx] 112 | img_path = os.path.join(self.rgb_root_dir, img_name) 113 | img = Image.open(img_path).convert('RGB') 114 | 115 | # load IUV image 116 | dpseg_name = self.rgb_images[idx][:-4] + "_IUV.mat" 117 | dpseg_path = os.path.join(self.dp_root_dir, dpseg_name) 118 | dpsegs = scipy.io.loadmat(dpseg_path)['segm'] 119 | 120 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 121 | segs = np.flip(segs, 1) 122 | dpsegs = np.flip(dpsegs, 1) 123 | 124 | img, segs, dpsegs = self._crop_tight(img, segs, dpsegs) 125 | 126 | segs = smart_padding_depth(segs) # smart pad to make square 127 | segs = cv2.resize(segs, (224, 224)) # resize to 224 128 | segs = torch.tensor(segs).unsqueeze(0).float() 129 | 130 | dpsegs = smart_padding_depth(dpsegs) # smart pad to make square 131 | dpsegs = cv2.resize(dpsegs, (224, 224)) # resize to 224 132 | dpsegs = torch.tensor(dpsegs).unsqueeze(0).float() 133 | 134 | 135 | img = self.tsfm(img).float() 136 | theta = flip_smpl(self.pose_params[idx]) 137 | 138 | return img, segs, dpsegs, theta, self.shape_params[idx], self.joints[idx].view(-1, 3) 139 | 140 | 141 | class SurrealGTSMRGBCrop_FlipCorrect(data.Dataset): 142 | def __init__(self, dstype): 143 | super(SurrealGTSMRGBCrop_FlipCorrect, self).__init__() 144 | 145 | if dstype == "train": 146 | self.gtsm_root_dir = \ 147 | "/media/SSD_150/abbhinav/body3/data/segm_train_run0/" 148 | posematfile = "/media/SSD_150/abbhinav/body3/data/poses_gr/train_pose_matrix.mat" 149 | shapematfile = \ 150 | "/media/HDD_2TB/sourabh/surreal_complete/surreal/download/dump/SURREAL/data/cmu/shapes/train_shape_matrix.mat" 151 | jointsmatfile = "/media/SSD_150/abbhinav/body3/data/joints_gr/train_joint_matrix.mat" 152 | self.rgb_root_dir = "/home/abbhinav/body3/data/images_train_run0" 153 | 154 | elif dstype == "val": 155 | self.gtsm_root_dir = \ 156 | "/media/SSD_150/abbhinav/body3/data/segm_val_run0/" 157 | posematfile = "/media/SSD_150/abbhinav/body3/data/poses_gr/val_pose_matrix.mat" 158 | shapematfile = \ 159 | "/media/HDD_2TB/sourabh/surreal_complete/surreal/download/dump/SURREAL/data/cmu/shapes/val_shape_matrix.mat" 160 | jointsmatfile = "/media/SSD_150/abbhinav/body3/data/joints_gr/val_joint_matrix.mat" 161 | self.rgb_root_dir = "/home/abbhinav/body3/data/images_val_run0" 162 | 163 | self.tsfm = transforms.Compose([ 164 | transforms.Lambda(lambda img : smart_padding(img)), 165 | transforms.Resize(224), 166 | transforms.ToTensor(), 167 | ]) 168 | 169 | self.rgb_images = sorted(os.listdir(self.rgb_root_dir)) 170 | 171 | # self.length = len(self.pose_images) 172 | self.length = len(self.rgb_images) // 5 173 | 174 | self.joints = torch.tensor(scipy.io.loadmat(jointsmatfile)['joints'].T, dtype=torch.float) 175 | self.pose_params = torch.tensor(scipy.io.loadmat(posematfile)['poses'].T, dtype=torch.float) 176 | self.shape_params = torch.tensor(scipy.io.loadmat(shapematfile)['shapes'].T, dtype=torch.float) 177 | 178 | def __len__(self): 179 | return self.length 180 | 181 | def _crop_tight(self, img, segs): 182 | nz = np.nonzero(segs) 183 | if len(nz[0]) == 0: 184 | return img, segs 185 | miny, maxy = nz[0].min(), nz[0].max() 186 | minx, maxx = nz[1].min(), nz[1].max() 187 | img = img.crop((minx, miny, maxx+1, maxy+1)) 188 | segs = segs[miny:maxy+1, minx:maxx+1] 189 | return img, segs 190 | 191 | def __getitem__(self, idx): 192 | 193 | # get original index 194 | idx = idx*5 195 | 196 | # load seg image 197 | seg_name = self.rgb_images[idx][:-4] + ".mat" 198 | seg_path = os.path.join(self.gtsm_root_dir, seg_name) 199 | segs = scipy.io.loadmat(seg_path)['tmp'] 200 | 201 | # load original image 202 | img_name = self.rgb_images[idx] 203 | img_path = os.path.join(self.rgb_root_dir, img_name) 204 | img = Image.open(img_path).convert('RGB') 205 | 206 | # flip image and segs 207 | img = img.transpose(Image.FLIP_LEFT_RIGHT) 208 | segs = np.flip(segs, 1) 209 | 210 | img, segs = self._crop_tight(img, segs) 211 | 212 | segs = smart_padding_depth(segs) # smart pad to make square 213 | segs = cv2.resize(segs, (224, 224)) # resize to 224 214 | segs = torch.tensor(segs).unsqueeze(0).float() 215 | 216 | img = self.tsfm(img).float() 217 | 218 | theta = flip_smpl(self.pose_params[idx]) 219 | 220 | return img, segs, theta, self.shape_params[idx], self.joints[idx].view(-1, 3) 221 | 222 | 223 | def get_img_name(self, idx): 224 | return self.rgb_images[5*idx] 225 | -------------------------------------------------------------------------------- /src/UP3D_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | 4 | import torchvision.transforms as transforms 5 | 6 | import scipy.io 7 | from PIL import Image 8 | import os 9 | 10 | from .utils import smart_padding, smart_padding_depth 11 | import numpy as np 12 | import cv2 13 | import pickle as pkl 14 | 15 | 16 | class UP3DDatasetGTSMRGB(data.Dataset): 17 | def __init__(self, dstype): 18 | super(UP3DDatasetGTSMRGB, self).__init__() 19 | 20 | self.root_dir = "/media/HDD_2TB/yudhik/up-3d/up-3d/" 21 | 22 | if dstype == "train": 23 | listfile = "/media/HDD_2TB/yudhik/up-3d/up-3d/train.txt" 24 | elif dstype == "val": 25 | listfile = "/media/HDD_2TB/yudhik/up-3d/up-3d/val.txt" 26 | 27 | with open(listfile, "r") as f: 28 | self.img_names = f.readlines() 29 | 30 | self.img_names = [nm.strip()[1:] for nm in self.img_names] 31 | self.length = len(self.img_names) 32 | 33 | self.tsfm = transforms.Compose([ 34 | transforms.Lambda(lambda img : smart_padding(img)), 35 | transforms.Resize(224), 36 | transforms.ToTensor(), 37 | ]) 38 | 39 | 40 | def __len__(self): 41 | return self.length 42 | 43 | def get_image_name(self, idx): 44 | return self.img_names[idx] 45 | 46 | def __getitem__(self, idx): 47 | 48 | img_name = self.img_names[idx] 49 | seg_name = img_name[:-9] + "render_light.png" 50 | 51 | crop_name = img_name[:5] + "_fit_crop_info.txt" 52 | pkl_name = img_name[:5] + "_body.pkl" 53 | 54 | with open(os.path.join(self.root_dir, crop_name) ) as f: 55 | crop_params = f.readlines()[0] 56 | crop_params = [int(ii) for ii in crop_params.strip().split()] 57 | 58 | img_path = os.path.join(self.root_dir, img_name) 59 | img = Image.open(img_path).convert('RGB') 60 | img = img.crop((crop_params[4], crop_params[2], crop_params[5], crop_params[3])) 61 | img = self.tsfm(img).float() 62 | 63 | seg_path = os.path.join(self.root_dir, seg_name) 64 | seg = Image.open(seg_path).convert('RGB') 65 | seg = seg.crop((crop_params[4], crop_params[2], crop_params[5], crop_params[3])) 66 | seg = self.tsfm(seg).float() 67 | 68 | datapkl = pkl.load(open( os.path.join(self.root_dir, pkl_name), "rb" ), encoding='latin1') 69 | beta = torch.tensor(datapkl['betas'], dtype=torch.float) 70 | theta = torch.tensor(datapkl['pose'], dtype=torch.float) 71 | 72 | return img, seg, theta, beta 73 | 74 | 75 | 76 | class UP3DDatasetDPSMRGB(data.Dataset): 77 | def __init__(self, dstype): 78 | super(UP3DDatasetDPSMRGB, self).__init__() 79 | 80 | self.root_dir = "/media/HDD_2TB/yudhik/up-3d/up-3d/" 81 | self.dp_root_dir = "/home/saketh/Densepose/densepose/DensePoseData/up3d" 82 | 83 | if dstype == "train": 84 | listfile = "/media/HDD_2TB/yudhik/up-3d/up-3d/train.txt" 85 | elif dstype == "val": 86 | listfile = "/media/HDD_2TB/yudhik/up-3d/up-3d/val.txt" 87 | 88 | with open(listfile, "r") as f: 89 | self.img_names = f.readlines() 90 | 91 | self.img_names = [nm.strip()[1:] for nm in self.img_names] 92 | self.length = len(self.img_names) 93 | 94 | self.tsfm = transforms.Compose([ 95 | transforms.Lambda(lambda img : smart_padding(img)), 96 | transforms.Resize(224), 97 | transforms.ToTensor(), 98 | ]) 99 | 100 | 101 | def __len__(self): 102 | return self.length 103 | 104 | def __getitem__(self, idx): 105 | 106 | img_name = self.img_names[idx] 107 | iuv_mat_name = img_name.split(".")[0] + "_IUV.mat" 108 | 109 | if not os.path.exists( os.path.join(self.dp_root_dir, iuv_mat_name) ): 110 | idx = 0 111 | img_name = self.img_names[idx] 112 | iuv_mat_name = img_name.split(".")[0] + "_IUV.mat" 113 | 114 | crop_name = img_name[:5] + "_fit_crop_info.txt" 115 | pkl_name = img_name[:5] + "_body.pkl" 116 | 117 | with open(os.path.join(self.root_dir, crop_name) ) as f: 118 | crop_params = f.readlines()[0] 119 | crop_params = [int(ii) for ii in crop_params.strip().split()] 120 | 121 | img_path = os.path.join(self.root_dir, img_name) 122 | img = Image.open(img_path).convert('RGB') 123 | img = img.crop((crop_params[4], crop_params[2], crop_params[5], crop_params[3])) 124 | img = self.tsfm(img).float() 125 | 126 | segmask = scipy.io.loadmat(os.path.join(self.dp_root_dir, iuv_mat_name) )['segm'] 127 | segmask = segmask[crop_params[2]:crop_params[3], crop_params[4]:crop_params[5] ] 128 | segmask = smart_padding_depth(segmask) 129 | segmask = cv2.resize(segmask, (224, 224)) 130 | segmask = torch.tensor(segmask).unsqueeze(0).float() 131 | 132 | datapkl = pkl.load(open( os.path.join(self.root_dir, pkl_name), "rb" ), encoding='latin1') 133 | 134 | beta = torch.tensor(datapkl['betas'], dtype=torch.float) 135 | theta = torch.tensor(datapkl['pose'], dtype=torch.float) 136 | 137 | return img, segmask, theta, beta 138 | 139 | def get_name(self, idx): 140 | return self.img_names[idx] 141 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yudhik11/HumanMeshNet/7cfae2e654b27feca9d02abdad20028acc9584bb/src/__init__.py -------------------------------------------------------------------------------- /src/eval_metric.py: -------------------------------------------------------------------------------- 1 | from scipy.spatial import procrustes 2 | import numpy as np 3 | 4 | 5 | def procrustes_my(X, Y): 6 | Xnew, Ynew, d = procrustes(X, Y) 7 | return Xnew, Ynew 8 | 9 | def MPJPE(X, Y): 10 | return np.sqrt(((X - Y)**2).sum(1)).mean() 11 | 12 | 13 | def procrustes_hmr(S1, S2): 14 | ''' 15 | Computes a similarity transform (sR, t) that takes 16 | a set of 3D points S1 (3 x N) closest to a set of 3D points S2, 17 | where R is an 3x3 rotation matrix, t 3x1 translation, s scale. 18 | i.e. solves the orthogonal Procrutes problem. 19 | ''' 20 | transposed = False 21 | if S1.shape[0] != 3 and S1.shape[0] != 2: 22 | S1 = S1.T 23 | S2 = S2.T 24 | transposed = True 25 | assert(S2.shape[1] == S1.shape[1]) 26 | 27 | # 1. Remove mean. 28 | mu1 = S1.mean(axis=1, keepdims=True) 29 | mu2 = S2.mean(axis=1, keepdims=True) 30 | X1 = S1 - mu1 31 | X2 = S2 - mu2 32 | 33 | # 2. Compute variance of X1 used for scale. 34 | var1 = np.sum(X1**2) 35 | 36 | # 3. The outer product of X1 and X2. 37 | K = X1.dot(X2.T) 38 | 39 | # 4. Solution that Maximizes trace(R'K) is R=U*V', where U, V are 40 | # singular vectors of K. 41 | U, s, Vh = np.linalg.svd(K) 42 | V = Vh.T 43 | # Construct Z that fixes the orientation of R to get det(R)=1. 44 | Z = np.eye(U.shape[0]) 45 | Z[-1, -1] *= np.sign(np.linalg.det(U.dot(V.T))) 46 | # Construct R. 47 | R = V.dot(Z.dot(U.T)) 48 | 49 | # 5. Recover scale. 50 | scale = np.trace(R.dot(K)) / var1 51 | 52 | # 6. Recover translation. 53 | t = mu2 - scale*(R.dot(mu1)) 54 | 55 | # 7. Error: 56 | S1_hat = scale*R.dot(S1) + t 57 | 58 | if transposed: 59 | S1_hat = S1_hat.T 60 | S2 = S2.T 61 | 62 | return S1_hat, S2 63 | -------------------------------------------------------------------------------- /src/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import scipy.io 5 | from src.SMPL_pytorch import SMPL 6 | 7 | from torchvision.models import resnet50, resnet18 8 | 9 | class MyResnet18(nn.Module): 10 | def __init__(self, num_out, pretrained=False): 11 | super(MyResnet18, self).__init__() 12 | 13 | self.num_out = num_out 14 | 15 | self.resnet = resnet18(pretrained=pretrained) 16 | self.lrelu = nn.LeakyReLU() 17 | self.myfc = nn.Linear(1000, self.num_out) 18 | 19 | def forward(self, x): 20 | x = self.resnet(x) 21 | x = self.lrelu(x) 22 | x = self.myfc(x) 23 | return x 24 | 25 | class MyFCNet2(nn.Module): 26 | def __init__(self, num_inp, num_out): 27 | super(MyFCNet2, self).__init__() 28 | self.fc1 = nn.Linear(num_inp, 1024) 29 | self.fc2 = nn.Linear(1024, 1024) 30 | self.fc3 = nn.Linear(1024, 1024) 31 | self.fc4 = nn.Linear(1024, num_out) 32 | 33 | def forward(self, x): 34 | x = F.leaky_relu(self.fc1(x)) 35 | x = F.leaky_relu(self.fc2(x)) 36 | x = F.leaky_relu(self.fc3(x)) 37 | x = self.fc4(x) 38 | return x 39 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | import torchvision.transforms.functional as TVF 5 | import torch 6 | 7 | def get_joints_from_surf(verts, smpl): 8 | joint_x = torch.matmul(verts[:, :, 0], smpl.J_regressor) 9 | joint_y = torch.matmul(verts[:, :, 1], smpl.J_regressor) 10 | joint_z = torch.matmul(verts[:, :, 2], smpl.J_regressor) 11 | 12 | joints = torch.stack([joint_x, joint_y, joint_z], dim = 2) 13 | return joints 14 | 15 | def SMPLJ_to_H36MJ16(joints, verts): 16 | SMPL_to_H36M_indices16 = torch.tensor([11, 5, 2, 1, 4, 10, 0, 6, 12, 12, 21, 19, 17, 17 | 16, 18, 20]).to(verts.device) 18 | head_index = 410 19 | 20 | new_joints = joints[:, SMPL_to_H36M_indices16, :] 21 | new_joints[:, 9, :] = verts[:, head_index, :] 22 | return new_joints 23 | 24 | def SMPLJ_to_H36MJ14(joints, verts): 25 | SMPL_to_H36M_indices14 = torch.tensor([11, 5, 2, 1, 4, 10, 12, 12, 21, 19, 17, 26 | 16, 18, 20]).to(verts.device) 27 | head_index = 410 28 | 29 | new_joints = joints[:, SMPL_to_H36M_indices14, :] 30 | new_joints[:, 7, :] = verts[:, head_index, :] 31 | return new_joints 32 | 33 | def flip_smpl(theta): 34 | theta = theta.reshape(-1, 3) 35 | indices = [0, 2, 1, 3, 5, 4, 6, 8, 7, 9, 11, 10, 12, 14, 13, 15, 17, 16, 19, 18, 21, 20, 23, 22] 36 | theta = theta[indices] 37 | theta[:, 1:3] = theta[:, 1:3]*-1 38 | return theta.reshape(-1) 39 | 40 | 41 | def get_num_correct_class(pred, gt): 42 | """ 43 | Get correct number of predictions from predicted class affinities and groundtruth labels 44 | """ 45 | 46 | return (pred.argmax(1) == gt).sum() 47 | 48 | 49 | def read_verts(inp_mesh_path): 50 | verts = [] 51 | with open(inp_mesh_path, "r") as f: 52 | lines = f.readlines() 53 | for l in lines: 54 | l = l.strip().split() 55 | if l[0] == "v": 56 | verts.append(list(map(float, l[1:4]))) 57 | 58 | verts = np.array(verts) 59 | verts = torch.tensor(verts) 60 | return verts 61 | 62 | def show(img): 63 | """ 64 | Show torch image 65 | """ 66 | 67 | npimg = img.numpy() 68 | plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest') 69 | 70 | def show1C(img): 71 | """ 72 | Show 1C image 73 | """ 74 | plt.imshow(img, interpolation='nearest') 75 | 76 | 77 | 78 | def smart_padding(img): 79 | """ 80 | Smart padding of PIL image to pad it in minimal way to make it square 81 | Input : H*W*3 82 | Output : K*K*3 where K=max(H,W) 83 | """ 84 | desired_size = max(img.size[0], img.size[1]) 85 | 86 | delta_width = desired_size - img.size[0] 87 | delta_height = desired_size - img.size[1] 88 | pad_width = delta_width //2 89 | pad_height = delta_height //2 90 | return TVF.pad(img, (pad_width, pad_height, delta_width-pad_width, delta_height-pad_height)) 91 | 92 | 93 | def smart_padding_iuv(img): 94 | """ 95 | Smart padding of numpy image to pad it in minimal way to make it square 96 | Input : H*W*3 97 | Output : K*K*3 where K=max(H,W) 98 | """ 99 | desired_size = max(img.shape[0], img.shape[1]) 100 | 101 | delta_width = desired_size - img.shape[0] 102 | delta_height = desired_size - img.shape[1] 103 | pad_width = delta_width //2 104 | pad_height = delta_height //2 105 | return np.pad(img, [(0, 0), (pad_width, delta_width-pad_width), (pad_height, delta_height-pad_height)], 'constant') 106 | 107 | 108 | 109 | def smart_padding_depth(img): 110 | """ 111 | Smart padding of numpy depth image to pad it in minimal way to make it square 112 | Input : H*W 113 | Output : K*K where K=max(H,W) 114 | """ 115 | desired_size = max(img.shape[0], img.shape[1]) 116 | 117 | delta_width = desired_size - img.shape[0] 118 | delta_height = desired_size - img.shape[1] 119 | pad_width = delta_width //2 120 | pad_height = delta_height //2 121 | 122 | return np.pad(img, [(pad_width, delta_width-pad_width), (pad_height, delta_height-pad_height)], 'constant') 123 | 124 | 125 | def get_regularization_matrix(smpl): 126 | N = 6890 127 | regu = np.zeros((6890, 6890)) 128 | for a, b, c in smpl.faces.cpu().numpy().astype(np.long): 129 | regu[a, b] = 1 130 | regu[a, c] = 1 131 | regu[b, a] = 1 132 | regu[b, c] = 1 133 | regu[c, a] = 1 134 | regu[c, b] = 1 135 | 136 | degree = regu.sum(1)[:, np.newaxis] 137 | final_regu = regu / degree 138 | 139 | return torch.tensor(final_regu.T, dtype=torch.float) 140 | 141 | def regularize_mesh(surf, regu): 142 | inp_shape = surf.shape 143 | surf = surf.view(-1, 6890, 3) 144 | surf_x = torch.matmul(surf[:, :, 0], regu).unsqueeze(2) 145 | surf_y = torch.matmul(surf[:, :, 1], regu).unsqueeze(2) 146 | surf_z = torch.matmul(surf[:, :, 2], regu).unsqueeze(2) 147 | 148 | final_surf = torch.cat((surf_x, surf_y, surf_z), dim=2) 149 | return final_surf.view(inp_shape) 150 | 151 | def orthographic_projection(X, camera, dataset='surreal'): 152 | """Perform orthographic projection of 3D points X using the camera parameters 153 | Args: 154 | X: size = [B, N, 3] 155 | camera: size = [B, 3] 156 | Returns: 157 | Projected 2D points -- size = [B, N, 2] 158 | """ 159 | camera = camera.view(-1, 1, 3) 160 | X = X.view(-1, 24, 3) 161 | if dataset == 'surreal': 162 | X_trans = torch.zeros_like(X[:, :, 1:]) 163 | X_trans[:, :, 0] = X[:, :, 2] 164 | X_trans[:, :, 1] = X[:, :, 1] 165 | X_trans += camera[:, :, 1:] 166 | else: 167 | X_trans = X[:, :, :2] + camera[:, :, 1:] 168 | shape = X_trans.shape 169 | X_2d = (camera[:, :, 0] * X_trans.view(shape[0], -1)).view(shape) 170 | return X_2d 171 | 172 | 173 | def visualise_keypoints(gt_keypoints2d, img=None): 174 | if img is None: 175 | plt.scatter(list(gt_keypoints2d[:, 0].cpu().numpy()) , list(gt_keypoints2d[:, 1].cpu().numpy())) 176 | plt.gca().set_aspect('equal', adjustable='box') 177 | plt.show() 178 | else: 179 | gt_keypoints2d = gt_keypoints2d.clone() 180 | gt_keypoints2d*=224 181 | npimg = img.numpy() 182 | gt_keypoints2d = gt_keypoints2d.view(24, 2) 183 | plt.imshow(np.transpose(npimg, (1,2,0)), interpolation='nearest') 184 | plt.plot(list(gt_keypoints2d[:, 0]),list(gt_keypoints2d[:, 1]),'o') 185 | plt.show() 186 | -------------------------------------------------------------------------------- /train/train_surreal.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data.dataloader import DataLoader 3 | from torchvision.models import resnet18 4 | 5 | import torch.nn.functional as F 6 | import torch.nn as nn 7 | 8 | import logging 9 | import os 10 | import sys 11 | import numpy as np 12 | 13 | sys.path.append(os.path.abspath("../")) 14 | 15 | from src import utils 16 | 17 | from src.models import MyResnet18, MyFCNet2 18 | from src.Surreal_dataset import SurrealBothSMRGBCrop_FlipCorrect 19 | from src.eval_metric import procrustes_hmr, MPJPE 20 | from src.SMPL_pytorch import SMPL 21 | 22 | from matplotlib import pyplot as plt 23 | 24 | device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu') 25 | 26 | num_workers = 6 27 | batch_size = 64 28 | learning_rate = 0.0001 29 | 30 | save_model_path = "../../logs/DPSMRGB_Surf/" 31 | if not os.path.exists(save_model_path): 32 | os.mkdir(save_model_path) 33 | logfile_path = "../../logs/DPSMRGB_Surf.log" 34 | 35 | log_freq = 500 36 | validate_freq = 1000 37 | save_freq = 3000 38 | 39 | 40 | 41 | logging.basicConfig(filename=logfile_path, filemode='a', level=logging.INFO, format='%(asctime)s => %(message)s') 42 | logging.info(torch.__version__) 43 | logging.info(device) 44 | logging.info("------------------------------------------------------------------------") 45 | 46 | 47 | 48 | class MyDRNetwork(torch.nn.Module): 49 | def __init__(self): 50 | super(MyDRNetwork, self).__init__() 51 | 52 | self.SM_cnn_s = resnet18(pretrained=True) 53 | self.SM_cnn_s.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, 54 | bias=False) 55 | 56 | self.RGB_cnn_s = resnet18(pretrained=True) 57 | 58 | self.R_fc_surface = MyFCNet2(2*1000, 6890*3) 59 | self.R_fc_joints = MyFCNet2(2*1000, 24*3) 60 | 61 | def forward(self, rgbs, segs): 62 | """ 63 | with torch.no_grad(): 64 | prob = self.C_net(x) 65 | prob = F.softmax(prob, dim=1) 66 | idxs = prob.argmax(dim=1) 67 | prior = self.cluster_centers[idxs].to(prob.device) 68 | """ 69 | 70 | feat_segs_s = self.SM_cnn_s(segs) 71 | feat_rgbs_s = self.RGB_cnn_s(rgbs) 72 | y = torch.cat((feat_segs_s, feat_rgbs_s), dim=1) 73 | surf = self.R_fc_surface(y) 74 | joints = self.R_fc_joints(y) 75 | 76 | return joints, surf 77 | 78 | train_dataset = SurrealBothSMRGBCrop_FlipCorrect("train") 79 | val_dataset = SurrealBothSMRGBCrop_FlipCorrect("val") 80 | 81 | train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True) 82 | val_dataloader = DataLoader(dataset=val_dataset, batch_size=batch_size, num_workers=num_workers) 83 | 84 | 85 | model = MyDRNetwork().to(device) 86 | smpl = SMPL().to(device) 87 | 88 | criterion1 = torch.nn.MSELoss().to(device) 89 | criterion2 = torch.nn.CrossEntropyLoss().to(device) 90 | 91 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 92 | 93 | reg_mat = utils.get_regularization_matrix(smpl=smpl).to(device) 94 | 95 | 96 | start_epoch = 0 97 | num_epochs = 50 98 | 99 | 100 | def validate(): 101 | model.eval() 102 | val_loss = 0.0 103 | mpjpe_j = 0.0 104 | mpjpe_j_pa = 0.0 105 | mpjpe_s = 0.0 106 | mpjpe_s_pa = 0.0 107 | mpjpe_js = 0.0 108 | mpjpe_js_pa = 0.0 109 | 110 | with torch.no_grad(): 111 | for i, (imgs, _, dpsegs, gt_theta, gt_beta, gt_joints) in enumerate(val_dataloader): 112 | imgs = imgs.to(device) 113 | dpsegs = dpsegs.to(device) 114 | gt_theta = gt_theta.to(device) 115 | gt_beta = gt_beta.to(device) 116 | 117 | #zero_shape = torch.zeros(imgs.size(0), 10).to(device) 118 | gt_S, gt_J, _ = smpl(gt_beta, gt_theta, get_skin=True) 119 | 120 | out_J, out_S = model(imgs, dpsegs) 121 | out_S = utils.regularize_mesh(regu=reg_mat, surf=out_S) 122 | 123 | out_S = out_S.view(gt_S.shape) 124 | out_J = out_J.view(gt_J.shape) 125 | 126 | out_JS = utils.get_joints_from_surf(out_S, smpl) 127 | 128 | loss = criterion1(out_S, gt_S) + 100*criterion1(out_J, gt_J) 129 | 130 | val_loss += loss.item() 131 | 132 | out_J = out_J.cpu().numpy() 133 | gt_J = gt_J.cpu().numpy() 134 | 135 | out_S = out_S.cpu().numpy() 136 | gt_S = gt_S.cpu().numpy() 137 | out_JS = out_JS.cpu().numpy() 138 | 139 | for j in range(imgs.size(0)): 140 | mpjpe_j += MPJPE(out_J[j], gt_J[j]) 141 | oj, gj = procrustes_hmr(out_J[j], gt_J[j]) 142 | mpjpe_j_pa += MPJPE(oj, gj) 143 | 144 | mpjpe_s += MPJPE(out_S[j], gt_S[j]) 145 | os, gs = procrustes_hmr(out_S[j], gt_S[j]) 146 | mpjpe_s_pa += MPJPE(os, gs) 147 | 148 | mpjpe_js += MPJPE(out_JS[j], gt_J[j]) 149 | oj, gj = procrustes_hmr(out_JS[j], gt_J[j]) 150 | mpjpe_js_pa += MPJPE(oj, gj) 151 | 152 | 153 | logging.info("Validation Loss : {:0.6f} | MPJPE_J : {:0.6f} | MPJPE_J_PA : {:0.6f} \ 154 | | MPJPE_S : {:0.6f} | MPJPE_S_PA : {:0.6f} | MPJPE_JS : {:0.6f} | MPJPE_JS_PA : {:0.6f}".format( 155 | val_loss / len(val_dataloader), 156 | mpjpe_j / len(val_dataset), 157 | mpjpe_j_pa / len(val_dataset), 158 | mpjpe_s / len(val_dataset), 159 | mpjpe_s_pa / len(val_dataset), 160 | mpjpe_js / len(val_dataset), 161 | mpjpe_js_pa / len(val_dataset), 162 | )) 163 | 164 | model.train() 165 | 166 | 167 | def train(epoch): 168 | model.train() 169 | total_iters = len(train_dataloader) 170 | 171 | total_loss = 0.0 172 | running_loss = 0.0 173 | 174 | for i, (imgs, _, dpsegs, gt_theta, gt_beta, gt_joints) in enumerate(train_dataloader): 175 | imgs = imgs.to(device) 176 | dpsegs = dpsegs.to(device) 177 | gt_theta = gt_theta.to(device) 178 | gt_beta = gt_beta.to(device) 179 | 180 | #zero_shape = torch.zeros(imgs.size(0), 10).to(device) 181 | gt_S, gt_J, _ = smpl(gt_beta, gt_theta, get_skin=True) 182 | 183 | out_J, out_S = model(imgs, dpsegs) 184 | out_S = utils.regularize_mesh(surf=out_S, regu=reg_mat) 185 | 186 | out_S = out_S.view(gt_S.shape) 187 | out_J = out_J.view(gt_J.shape) 188 | 189 | out_JS = utils.get_joints_from_surf(out_S, smpl) 190 | 191 | optimizer.zero_grad() 192 | 193 | loss = criterion1(out_S, gt_S) + 100*criterion1(out_J, gt_J) 194 | 195 | loss.backward() 196 | optimizer.step() 197 | 198 | running_loss += loss.item() 199 | total_loss += loss.item() 200 | 201 | iters = epoch * total_iters + i + 1 202 | if iters % log_freq == 0: 203 | logging.info("Epoch {:02d} [{:05d}/{:05d}] Loss : {:.6f}".format( 204 | epoch, i, total_iters, running_loss/log_freq 205 | )) 206 | running_loss = 0.0 207 | if iters % validate_freq == 0: 208 | validate() 209 | if iters % save_freq == 0: 210 | torch.save(model.state_dict(), os.path.join(save_model_path, 'e{}-i{}.ckpt'.format(epoch,i))) 211 | 212 | logging.info("Epoch {} Finished Training Loss : {:0.6f}".format(epoch, total_loss)) 213 | validate() 214 | torch.save(model.state_dict(), os.path.join(save_model_path, 'e{}-i{}.ckpt'.format(epoch,i))) 215 | 216 | for epoch in range(start_epoch, num_epochs): 217 | logging.info("Epoch {} started".format(epoch)) 218 | train(epoch) 219 | --------------------------------------------------------------------------------