├── common ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── vis.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── dir_utils.cpython-36.pyc │ │ └── pose_utils.cpython-36.pyc │ ├── dir_utils.py │ ├── vis.py │ └── pose_utils.py ├── __pycache__ │ ├── base.cpython-36.pyc │ ├── logger.cpython-36.pyc │ ├── timer.cpython-36.pyc │ └── loss_recorder.cpython-36.pyc ├── nets │ ├── __pycache__ │ │ ├── loss.cpython-36.pyc │ │ ├── resnet.cpython-36.pyc │ │ └── balanced_parallel.cpython-36.pyc │ ├── loss.py │ ├── resnet.py │ └── balanced_parallel.py ├── timer.py ├── logger.py └── base.py ├── assets ├── 1.png └── 2.png ├── main ├── __pycache__ │ ├── model.cpython-36.pyc │ └── config.cpython-36.pyc ├── config.py ├── model.py ├── train.py └── test.py ├── data ├── __pycache__ │ └── dataset.cpython-36.pyc ├── MPII │ ├── __pycache__ │ │ └── MPII.cpython-36.pyc │ └── MPII.py ├── Human36M │ ├── __pycache__ │ │ └── Human36M.cpython-36.pyc │ └── Human36M.py └── dataset.py ├── tool └── preprocess_h36m.m └── README.md /common/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/assets/1.png -------------------------------------------------------------------------------- /assets/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/assets/2.png -------------------------------------------------------------------------------- /main/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/main/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/base.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/base.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/timer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/timer.cpython-36.pyc -------------------------------------------------------------------------------- /data/__pycache__/dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/__pycache__/dataset.cpython-36.pyc -------------------------------------------------------------------------------- /main/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/main/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /data/MPII/__pycache__/MPII.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/MPII/__pycache__/MPII.cpython-36.pyc -------------------------------------------------------------------------------- /common/nets/__pycache__/loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/loss.cpython-36.pyc -------------------------------------------------------------------------------- /common/nets/__pycache__/resnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/resnet.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/__pycache__/vis.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/vis.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/loss_recorder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/loss_recorder.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/__pycache__/dir_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/dir_utils.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/__pycache__/pose_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/pose_utils.cpython-36.pyc -------------------------------------------------------------------------------- /data/Human36M/__pycache__/Human36M.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/Human36M/__pycache__/Human36M.cpython-36.pyc -------------------------------------------------------------------------------- /common/nets/__pycache__/balanced_parallel.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/balanced_parallel.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/dir_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | def make_folder(folder_name): 5 | if not os.path.exists(folder_name): 6 | os.makedirs(folder_name) 7 | 8 | def add_pypath(path): 9 | if path not in sys.path: 10 | sys.path.insert(0, path) 11 | 12 | -------------------------------------------------------------------------------- /common/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | self.warm_up = 0 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | if self.warm_up < 10: 28 | self.warm_up += 1 29 | return self.diff 30 | else: 31 | self.total_time += self.diff 32 | self.calls += 1 33 | self.average_time = self.total_time / self.calls 34 | 35 | if average: 36 | return self.average_time 37 | else: 38 | return self.diff 39 | -------------------------------------------------------------------------------- /common/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | OK = '\033[92m' 5 | WARNING = '\033[93m' 6 | FAIL = '\033[91m' 7 | END = '\033[0m' 8 | 9 | PINK = '\033[95m' 10 | BLUE = '\033[94m' 11 | GREEN = OK 12 | RED = FAIL 13 | WHITE = END 14 | YELLOW = WARNING 15 | 16 | class colorlogger(): 17 | def __init__(self, log_dir, log_name='train_logs.txt'): 18 | # set log 19 | self._logger = logging.getLogger(log_name) 20 | self._logger.setLevel(logging.INFO) 21 | log_file = os.path.join(log_dir, log_name) 22 | if not os.path.exists(log_dir): 23 | os.makedirs(log_dir) 24 | file_log = logging.FileHandler(log_file, mode='a') 25 | file_log.setLevel(logging.INFO) 26 | console_log = logging.StreamHandler() 27 | console_log.setLevel(logging.INFO) 28 | formatter = logging.Formatter( 29 | "{}%(asctime)s{} %(message)s".format(GREEN, END), 30 | "%m-%d %H:%M:%S") 31 | file_log.setFormatter(formatter) 32 | console_log.setFormatter(formatter) 33 | self._logger.addHandler(file_log) 34 | self._logger.addHandler(console_log) 35 | 36 | def debug(self, msg): 37 | self._logger.debug(str(msg)) 38 | 39 | def info(self, msg): 40 | self._logger.info(str(msg)) 41 | 42 | def warning(self, msg): 43 | self._logger.warning(WARNING + 'WRN: ' + str(msg) + END) 44 | 45 | def critical(self, msg): 46 | self._logger.critical(RED + 'CRI: ' + str(msg) + END) 47 | 48 | def error(self, msg): 49 | self._logger.error(RED + 'ERR: ' + str(msg) + END) 50 | 51 | -------------------------------------------------------------------------------- /common/nets/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | from utils.pose_utils import pixel2cam, warp_coord_to_original 6 | from config import cfg 7 | 8 | def _assert_no_grad(tensor): 9 | assert not tensor.requires_grad, \ 10 | "nn criterions don't compute the gradient w.r.t. targets - please " \ 11 | "mark these tensors as not requiring gradients" 12 | 13 | def soft_argmax(heatmaps, joint_num): 14 | assert isinstance(heatmaps, torch.Tensor) 15 | 16 | heatmaps = heatmaps.reshape((-1, joint_num, cfg.depth_dim*cfg.output_shape[0]*cfg.output_shape[1])) 17 | heatmaps = F.softmax(heatmaps, 2) 18 | heatmaps = heatmaps.reshape((-1, joint_num, cfg.depth_dim, cfg.output_shape[0], cfg.output_shape[1])) 19 | 20 | accu_x = heatmaps.sum(dim=(2,3)) 21 | accu_y = heatmaps.sum(dim=(2,4)) 22 | accu_z = heatmaps.sum(dim=(3,4)) 23 | 24 | accu_x = accu_x * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[1]+1).type(torch.cuda.FloatTensor), devices=[accu_x.device.index])[0] 25 | accu_y = accu_y * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[0]+1).type(torch.cuda.FloatTensor), devices=[accu_y.device.index])[0] 26 | accu_z = accu_z * torch.cuda.comm.broadcast(torch.arange(1,cfg.depth_dim+1).type(torch.cuda.FloatTensor), devices=[accu_z.device.index])[0] 27 | 28 | accu_x = accu_x.sum(dim=2, keepdim=True) -1 29 | accu_y = accu_y.sum(dim=2, keepdim=True) -1 30 | accu_z = accu_z.sum(dim=2, keepdim=True) -1 31 | 32 | coord_out = torch.cat((accu_x, accu_y, accu_z), dim=2) 33 | 34 | return coord_out 35 | 36 | class JointLocationLoss(nn.Module): 37 | def __init__(self): 38 | super(JointLocationLoss, self).__init__() 39 | 40 | def forward(self, heatmap_out, gt_coord, gt_vis, gt_have_depth): 41 | 42 | joint_num = gt_coord.shape[1] 43 | coord_out = soft_argmax(heatmap_out, joint_num) 44 | 45 | _assert_no_grad(gt_coord) 46 | _assert_no_grad(gt_vis) 47 | _assert_no_grad(gt_have_depth) 48 | 49 | loss = torch.abs(coord_out - gt_coord) * gt_vis 50 | loss = (loss[:,:,0] + loss[:,:,1] + loss[:,:,2] * gt_have_depth)/3. 51 | 52 | return loss.mean() 53 | 54 | -------------------------------------------------------------------------------- /main/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import sys 4 | import numpy as np 5 | 6 | class Config: 7 | 8 | ## dataset 9 | trainset = ['Human36M', 'MPII'] # Human36M, MPII. !!Note that 0th db is reference db!! 10 | testset = 'Human36M' # Human36M, MPII 11 | 12 | ## directory 13 | cur_dir = osp.dirname(os.path.abspath(__file__)) 14 | root_dir = osp.join(cur_dir, '..') 15 | data_dir = osp.join(root_dir, 'data') 16 | output_dir = osp.join(root_dir, 'output') 17 | model_dir = osp.join(output_dir, 'model_dump') 18 | vis_dir = osp.join(output_dir, 'vis') 19 | log_dir = osp.join(output_dir, 'log') 20 | result_dir = osp.join(output_dir, 'result') 21 | 22 | ## model setting 23 | resnet_type = 50 # 18, 34, 50, 101, 152 24 | 25 | ## input, output 26 | input_shape = (256, 256) 27 | output_shape = (input_shape[0]//4, input_shape[1]//4) 28 | depth_dim = 64 29 | bbox_3d_shape = (2000, 2000, 2000) # depth, height, width 30 | pixel_mean = (0.485, 0.456, 0.406) 31 | pixel_std = (0.229, 0.224, 0.225) 32 | 33 | ## training config 34 | lr_dec_epoch = [15, 17] 35 | end_epoch = 20 36 | lr = 1e-3 37 | lr_dec_factor = 0.1 38 | optimizer = 'adam' 39 | weight_decay = 1e-5 40 | batch_size = 32 41 | 42 | ## testing config 43 | test_batch_size = 32 44 | flip_test = True 45 | 46 | ## others 47 | num_thread = 20 #8 48 | gpu_ids = '0' 49 | num_gpus = 1 50 | continue_train = False 51 | 52 | def set_args(self, gpu_ids, continue_train=False): 53 | self.gpu_ids = gpu_ids 54 | self.num_gpus = len(self.gpu_ids.split(',')) 55 | self.continue_train = continue_train 56 | os.environ["CUDA_VISIBLE_DEVICES"] = self.gpu_ids 57 | print('>>> Using GPU: {}'.format(self.gpu_ids)) 58 | 59 | cfg = Config() 60 | 61 | sys.path.insert(0, osp.join(cfg.root_dir, 'common')) 62 | from utils.dir_utils import add_pypath, make_folder 63 | add_pypath(osp.join(cfg.data_dir)) 64 | for i in range(len(cfg.trainset)): 65 | add_pypath(osp.join(cfg.data_dir, cfg.trainset[i])) 66 | add_pypath(osp.join(cfg.data_dir, cfg.testset)) 67 | make_folder(cfg.model_dir) 68 | make_folder(cfg.vis_dir) 69 | make_folder(cfg.log_dir) 70 | make_folder(cfg.result_dir) 71 | 72 | -------------------------------------------------------------------------------- /main/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from nets.resnet import ResNetBackbone 3 | from config import cfg 4 | 5 | class HeadNet(nn.Module): 6 | 7 | def __init__(self, joint_num): 8 | self.inplanes = 2048 9 | self.outplanes = 256 10 | 11 | super(HeadNet, self).__init__() 12 | 13 | self.deconv_layers = self._make_deconv_layer(3) 14 | self.final_layer = nn.Conv2d( 15 | in_channels=self.inplanes, 16 | out_channels=joint_num * cfg.depth_dim, 17 | kernel_size=1, 18 | stride=1, 19 | padding=0 20 | ) 21 | 22 | def _make_deconv_layer(self, num_layers): 23 | layers = [] 24 | for i in range(num_layers): 25 | layers.append( 26 | nn.ConvTranspose2d( 27 | in_channels=self.inplanes, 28 | out_channels=self.outplanes, 29 | kernel_size=4, 30 | stride=2, 31 | padding=1, 32 | output_padding=0, 33 | bias=False)) 34 | layers.append(nn.BatchNorm2d(self.outplanes)) 35 | layers.append(nn.ReLU(inplace=True)) 36 | self.inplanes = self.outplanes 37 | 38 | return nn.Sequential(*layers) 39 | 40 | def forward(self, x): 41 | x = self.deconv_layers(x) 42 | x = self.final_layer(x) 43 | 44 | return x 45 | 46 | def init_weights(self): 47 | for name, m in self.deconv_layers.named_modules(): 48 | if isinstance(m, nn.ConvTranspose2d): 49 | nn.init.normal_(m.weight, std=0.001) 50 | elif isinstance(m, nn.BatchNorm2d): 51 | nn.init.constant_(m.weight, 1) 52 | nn.init.constant_(m.bias, 0) 53 | for m in self.final_layer.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | nn.init.normal_(m.weight, std=0.001) 56 | nn.init.constant_(m.bias, 0) 57 | 58 | class ResPoseNet(nn.Module): 59 | def __init__(self, backbone, head): 60 | super(ResPoseNet, self).__init__() 61 | self.backbone = backbone 62 | self.head = head 63 | 64 | def forward(self, x): 65 | x = self.backbone(x) 66 | x = self.head(x) 67 | return x 68 | 69 | def get_pose_net(cfg, is_train, joint_num): 70 | 71 | backbone = ResNetBackbone(cfg.resnet_type) 72 | head_net = HeadNet(joint_num) 73 | if is_train: 74 | backbone.init_weights() 75 | head_net.init_weights() 76 | 77 | model = ResPoseNet(backbone, head_net) 78 | return model 79 | 80 | -------------------------------------------------------------------------------- /common/utils/vis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | from mpl_toolkits.mplot3d import Axes3D 5 | import matplotlib.pyplot as plt 6 | import matplotlib as mpl 7 | from config import cfg 8 | 9 | def vis_keypoints(img, kps, kps_lines, kp_thresh=0.4, alpha=1): 10 | 11 | # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. 12 | cmap = plt.get_cmap('rainbow') 13 | colors = [cmap(i) for i in np.linspace(0, 1, len(kps_lines) + 2)] 14 | colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors] 15 | 16 | # Perform the drawing on a copy of the image, to allow for blending. 17 | kp_mask = np.copy(img) 18 | 19 | # Draw the keypoints. 20 | for l in range(len(kps_lines)): 21 | i1 = kps_lines[l][0] 22 | i2 = kps_lines[l][1] 23 | p1 = kps[0, i1].astype(np.int32), kps[1, i1].astype(np.int32) 24 | p2 = kps[0, i2].astype(np.int32), kps[1, i2].astype(np.int32) 25 | if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: 26 | cv2.line( 27 | kp_mask, p1, p2, 28 | color=colors[l], thickness=2, lineType=cv2.LINE_AA) 29 | if kps[2, i1] > kp_thresh: 30 | cv2.circle( 31 | kp_mask, p1, 32 | radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) 33 | if kps[2, i2] > kp_thresh: 34 | cv2.circle( 35 | kp_mask, p2, 36 | radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) 37 | 38 | # Blend the keypoints. 39 | return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0) 40 | 41 | def vis_3d_skeleton(kpt_3d, kpt_3d_vis, kps_lines, filename=None): 42 | 43 | fig = plt.figure() 44 | ax = fig.add_subplot(111, projection='3d') 45 | 46 | # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. 47 | cmap = plt.get_cmap('rainbow') 48 | colors = [cmap(i) for i in np.linspace(0, 1, len(kps_lines) + 2)] 49 | colors = [np.array((c[2], c[1], c[0])) for c in colors] 50 | 51 | for l in range(len(kps_lines)): 52 | i1 = kps_lines[l][0] 53 | i2 = kps_lines[l][1] 54 | x = np.array([kpt_3d[i1,0], kpt_3d[i2,0]]) 55 | y = np.array([kpt_3d[i1,1], kpt_3d[i2,1]]) 56 | z = np.array([kpt_3d[i1,2], kpt_3d[i2,2]]) 57 | 58 | if kpt_3d_vis[i1,0] > 0 and kpt_3d_vis[i2,0] > 0: 59 | ax.plot(x, z, -y, c=colors[l], linewidth=2) 60 | if kpt_3d_vis[i1,0] > 0: 61 | ax.scatter(kpt_3d[i1,0], kpt_3d[i1,2], -kpt_3d[i1,1], c=colors[l], marker='o') 62 | if kpt_3d_vis[i2,0] > 0: 63 | ax.scatter(kpt_3d[i2,0], kpt_3d[i2,2], -kpt_3d[i2,1], c=colors[l], marker='o') 64 | 65 | x_r = np.array([0, cfg.input_shape[1]], dtype=np.float32) 66 | y_r = np.array([0, cfg.input_shape[0]], dtype=np.float32) 67 | z_r = np.array([0, 1], dtype=np.float32) 68 | 69 | if filename is None: 70 | ax.set_title('3D vis') 71 | else: 72 | ax.set_title(filename) 73 | 74 | ax.set_xlabel('X Label') 75 | ax.set_ylabel('Z Label') 76 | ax.set_zlabel('Y Label') 77 | #ax.set_xlim([0,cfg.input_shape[1]]) 78 | #ax.set_ylim([0,1]) 79 | #ax.set_zlim([-cfg.input_shape[0],0]) 80 | ax.legend() 81 | 82 | plt.show() 83 | cv2.waitKey(0) 84 | 85 | -------------------------------------------------------------------------------- /common/nets/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torchvision.models.resnet import BasicBlock, Bottleneck 3 | from torchvision.models.resnet import model_zoo, model_urls 4 | 5 | class ResNetBackbone(nn.Module): 6 | 7 | def __init__(self, resnet_type): 8 | 9 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'), 10 | 34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'), 11 | 50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'), 12 | 101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'), 13 | 152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')} 14 | block, layers, channels, name = resnet_spec[resnet_type] 15 | 16 | self.name = name 17 | self.inplanes = 64 18 | super(ResNetBackbone, self).__init__() 19 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 20 | bias=False) 21 | self.bn1 = nn.BatchNorm2d(64) 22 | self.relu = nn.ReLU(inplace=True) 23 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 24 | self.layer1 = self._make_layer(block, 64, layers[0]) 25 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 26 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 27 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 28 | 29 | for m in self.modules(): 30 | if isinstance(m, nn.Conv2d): 31 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 32 | nn.init.normal_(m.weight, mean=0, std=0.001) 33 | elif isinstance(m, nn.BatchNorm2d): 34 | nn.init.constant_(m.weight, 1) 35 | nn.init.constant_(m.bias, 0) 36 | 37 | def _make_layer(self, block, planes, blocks, stride=1): 38 | downsample = None 39 | if stride != 1 or self.inplanes != planes * block.expansion: 40 | downsample = nn.Sequential( 41 | nn.Conv2d(self.inplanes, planes * block.expansion, 42 | kernel_size=1, stride=stride, bias=False), 43 | nn.BatchNorm2d(planes * block.expansion), 44 | ) 45 | 46 | layers = [] 47 | layers.append(block(self.inplanes, planes, stride, downsample)) 48 | self.inplanes = planes * block.expansion 49 | for i in range(1, blocks): 50 | layers.append(block(self.inplanes, planes)) 51 | 52 | return nn.Sequential(*layers) 53 | 54 | def forward(self, x): 55 | x = self.conv1(x) 56 | x = self.bn1(x) 57 | x = self.relu(x) 58 | x = self.maxpool(x) 59 | 60 | x = self.layer1(x) 61 | x = self.layer2(x) 62 | x = self.layer3(x) 63 | x = self.layer4(x) 64 | 65 | return x 66 | 67 | def init_weights(self): 68 | org_resnet = model_zoo.load_url(model_urls[self.name]) 69 | # drop orginal resnet fc layer, add 'None' in case of no fc layer, that will raise error 70 | org_resnet.pop('fc.weight', None) 71 | org_resnet.pop('fc.bias', None) 72 | self.load_state_dict(org_resnet) 73 | print("Initialize resnet from model zoo") 74 | 75 | 76 | -------------------------------------------------------------------------------- /main/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from config import cfg 3 | from base import Trainer 4 | import torch.backends.cudnn as cudnn 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--gpu', type=str, dest='gpu_ids') 9 | parser.add_argument('--continue', dest='continue_train', action='store_true') 10 | args = parser.parse_args() 11 | 12 | if not args.gpu_ids: 13 | assert 0, print("Input GPU ids") 14 | 15 | if '-' in args.gpu_ids: 16 | gpus = args.gpu_ids.split('-') 17 | gpus[0] = 0 if not gpus[0].isdigit() else int(gpus[0]) 18 | gpus[1] = len(mem_info()) if not gpus[1].isdigit() else int(gpus[1]) + 1 19 | args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus)))) 20 | 21 | return args 22 | 23 | def main(): 24 | 25 | # argument parse and create log 26 | args = parse_args() 27 | cfg.set_args(args.gpu_ids, args.continue_train) 28 | cudnn.fastest = True 29 | cudnn.benchmark = True 30 | cudnn.deterministic = False 31 | cudnn.enabled = True 32 | 33 | trainer = Trainer(cfg) 34 | trainer._make_batch_generator() 35 | trainer._make_model() 36 | 37 | # train 38 | for epoch in range(trainer.start_epoch, cfg.end_epoch): 39 | trainer.scheduler.step() 40 | trainer.tot_timer.tic() 41 | trainer.read_timer.tic() 42 | 43 | for itr, (input_img, joint_img, joint_vis, joints_have_depth) in enumerate(trainer.batch_generator): 44 | trainer.read_timer.toc() 45 | trainer.gpu_timer.tic() 46 | 47 | trainer.optimizer.zero_grad() 48 | 49 | input_img = input_img.cuda() 50 | joint_img = joint_img.cuda() 51 | joint_vis = joint_vis.cuda() 52 | joints_have_depth = joints_have_depth.cuda() 53 | 54 | 55 | # forward 56 | heatmap_out = trainer.model(input_img) 57 | 58 | # backward 59 | JointLocationLoss = trainer.JointLocationLoss(heatmap_out, joint_img, joint_vis, joints_have_depth) 60 | 61 | loss = JointLocationLoss 62 | 63 | loss.backward() 64 | trainer.optimizer.step() 65 | 66 | trainer.gpu_timer.toc() 67 | 68 | screen = [ 69 | 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 70 | 'lr: %g' % (trainer.scheduler.get_lr()[0]), 71 | 'speed: %.2f(%.2fs r%.2f)s/itr' % ( 72 | trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), 73 | '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), 74 | '%s: %.4f' % ('loss_loc', JointLocationLoss.detach()), 75 | ] 76 | trainer.logger.info(' '.join(screen)) 77 | 78 | trainer.tot_timer.toc() 79 | trainer.tot_timer.tic() 80 | trainer.read_timer.tic() 81 | 82 | trainer.save_model({ 83 | 'epoch': epoch, 84 | 'network': trainer.model.state_dict(), 85 | 'optimizer': trainer.optimizer.state_dict(), 86 | 'scheduler': trainer.scheduler.state_dict(), 87 | }, epoch) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /main/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import argparse 4 | from tqdm import tqdm 5 | import numpy as np 6 | import cv2 7 | from config import cfg 8 | import torch 9 | from base import Tester 10 | from torch.nn.parallel.scatter_gather import gather 11 | from nets.loss import soft_argmax 12 | from utils.vis import vis_keypoints 13 | from utils.pose_utils import flip 14 | import torch.backends.cudnn as cudnn 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--gpu', type=str, dest='gpu_ids') 19 | parser.add_argument('--test_epoch', type=str, dest='test_epoch') 20 | args = parser.parse_args() 21 | 22 | # test gpus 23 | if not args.gpu_ids: 24 | args.gpu_ids = str(np.argmin(mem_info())) 25 | 26 | if '-' in args.gpu_ids: 27 | gpus = args.gpu_ids.split('-') 28 | gpus[0] = 0 if not gpus[0].isdigit() else int(gpus[0]) 29 | gpus[1] = len(mem_info()) if not gpus[1].isdigit() else int(gpus[1]) + 1 30 | args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus)))) 31 | 32 | assert args.test_epoch, 'Test epoch is required.' 33 | return args 34 | 35 | def main(): 36 | 37 | args = parse_args() 38 | cfg.set_args(args.gpu_ids) 39 | cudnn.fastest = True 40 | cudnn.benchmark = True 41 | cudnn.deterministic = False 42 | cudnn.enabled = True 43 | 44 | tester = Tester(cfg, args.test_epoch) 45 | tester._make_batch_generator() 46 | tester._make_model() 47 | 48 | preds = [] 49 | 50 | with torch.no_grad(): 51 | for itr, input_img in enumerate(tqdm(tester.batch_generator)): 52 | 53 | input_img = input_img.cuda() 54 | 55 | # forward 56 | heatmap_out = tester.model(input_img) 57 | if cfg.num_gpus > 1: 58 | heatmap_out = gather(heatmap_out,0) 59 | coord_out = soft_argmax(heatmap_out, tester.joint_num) 60 | 61 | if cfg.flip_test: 62 | flipped_input_img = flip(input_img, dims=3) 63 | flipped_heatmap_out = tester.model(flipped_input_img) 64 | if cfg.num_gpus > 1: 65 | flipped_heatmap_out = gather(flipped_heatmap_out,0) 66 | flipped_coord_out = soft_argmax(flipped_heatmap_out, tester.joint_num) 67 | flipped_coord_out[:, :, 0] = cfg.output_shape[1] - flipped_coord_out[:, :, 0] - 1 68 | for pair in tester.flip_pairs: 69 | flipped_coord_out[:, pair[0], :], flipped_coord_out[:, pair[1], :] = flipped_coord_out[:, pair[1], :].clone(), flipped_coord_out[:, pair[0], :].clone() 70 | coord_out = (coord_out + flipped_coord_out)/2. 71 | 72 | vis = False 73 | if vis: 74 | filename = str(itr) 75 | tmpimg = input_img[0].cpu().numpy() 76 | tmpimg = tmpimg * cfg.pixel_std.reshape(3,1,1) + cfg.pixel_mean.reshape(3,1,1) 77 | tmpimg = tmpimg.astype(np.uint8) 78 | tmpimg = tmpimg[::-1, :, :] 79 | tmpimg = np.transpose(tmpimg,(1,2,0)).copy() 80 | tmpkps = np.zeros((3,tester.joint_num)) 81 | tmpkps[:2,:] = coord_out[0,:,:2].transpose(1,0) / cfg.output_shape[0] * cfg.input_shape[0] 82 | tmpkps[2,:] = 1 83 | tmpimg = vis_keypoints(tmpimg, tmpkps, tester.skeleton) 84 | cv2.imwrite(osp.join(cfg.vis_dir, filename + '_output.jpg'), tmpimg) 85 | 86 | coord_out = coord_out.cpu().numpy() 87 | preds.append(coord_out) 88 | 89 | # evaluate 90 | preds = np.concatenate(preds, axis=0) 91 | tester._evaluate(preds, cfg.result_dir) 92 | 93 | if __name__ == "__main__": 94 | main() 95 | -------------------------------------------------------------------------------- /data/MPII/MPII.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import numpy as np 4 | import json 5 | from pycocotools.coco import COCO 6 | from config import cfg 7 | 8 | class MPII: 9 | 10 | def __init__(self, data_split): 11 | self.data_split = data_split 12 | self.img_dir = osp.join('..', 'data', 'MPII') 13 | self.train_annot_path = osp.join('..', 'data', 'MPII', 'annotations', 'train.json') 14 | self.test_annot_path = osp.join('..', 'data', 'MPII', 'annotations', 'test.json') 15 | self.joint_num = 16 16 | self.joints_name = ('R_Ankle', 'R_Knee', 'R_Hip', 'L_Hip', 'L_Knee', 'L_Ankle', 'Pelvis', 'Thorax', 'Neck', 'Head', 'R_Wrist', 'R_Elbow', 'R_Shoulder', 'L_Shoulder', 'L_Elbow', 'L_Wrist') 17 | self.flip_pairs = ( (0, 5), (1, 4), (2, 3), (10, 15), (11, 14), (12, 13) ) 18 | self.skeleton = ( (0, 1), (1, 2), (2, 6), (7, 12), (12, 11), (11, 10), (5, 4), (4, 3), (3, 6), (7, 13), (13, 14), (14, 15), (6, 7), (7, 8), (8, 9) ) 19 | self.lr_skeleton = ( ((0,0),(0,0)) ) # dumy value 20 | 21 | self.joints_have_depth = False 22 | 23 | def load_data(self): 24 | 25 | if self.data_split == 'train': 26 | db = COCO(self.train_annot_path) 27 | elif self.data_split == 'test': 28 | db = COCO(self.test_annot_path) 29 | else: 30 | print('Unknown data subset') 31 | assert 0 32 | 33 | data = [] 34 | for aid in db.anns.keys(): 35 | ann = db.anns[aid] 36 | 37 | if (ann['image_id'] not in db.imgs) or ann['iscrowd'] or (ann['num_keypoints'] == 0): 38 | continue 39 | 40 | # sanitize bboxes 41 | x, y, w, h = ann['bbox'] 42 | img = db.loadImgs(ann['image_id'])[0] 43 | width, height = img['width'], img['height'] 44 | x1 = np.max((0, x)) 45 | y1 = np.max((0, y)) 46 | x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) 47 | y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) 48 | if ann['area'] > 0 and x2 >= x1 and y2 >= y1: 49 | bbox = np.array([x1, y1, x2-x1, y2-y1]) 50 | else: 51 | continue 52 | 53 | # aspect ratio preserving bbox 54 | w = bbox[2] 55 | h = bbox[3] 56 | c_x = bbox[0] + w/2. 57 | c_y = bbox[1] + h/2. 58 | aspect_ratio = cfg.input_shape[1]/cfg.input_shape[0] 59 | if w > aspect_ratio * h: 60 | h = w / aspect_ratio 61 | elif w < aspect_ratio * h: 62 | w = h * aspect_ratio 63 | bbox[2] = w#*1.25 64 | bbox[3] = h#*1.25 65 | bbox[0] = c_x - bbox[2]/2. 66 | bbox[1] = c_y - bbox[3]/2. 67 | 68 | # joints and vis 69 | if self.data_split == 'train': 70 | joint_img = np.array(ann['keypoints']).reshape(self.joint_num,3) 71 | joint_vis = joint_img[:,2].copy().reshape(-1,1) 72 | joint_img[:,2] = 0 73 | else: 74 | joint_img = np.zeros((self.joint_num, 3), dtype=np.float) 75 | joint_vis = np.zeros((self.joint_num, 1), dtype=np.float) 76 | 77 | imgname = db.imgs[ann['image_id']]['file_name'] 78 | img_path = osp.join(self.img_dir, imgname) 79 | data.append({ 80 | 'img_path': img_path, 81 | 'bbox': bbox, 82 | 'joint_img': joint_img, # [org_img_x, org_img_y, 0] 83 | 'joint_cam': np.ones(joint_img.shape), # dummy value 84 | 'joint_vis': joint_vis, 85 | 'center_cam': np.ones(3), # dummy value 86 | 'f': np.ones(2), # dummy value 87 | 'c': np.ones(2) # dummy value 88 | }) 89 | 90 | return data 91 | 92 | def evaluate(self, preds, result_dir): 93 | print('MPII evaluation not supported') 94 | 95 | 96 | -------------------------------------------------------------------------------- /common/utils/pose_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from config import cfg 4 | import copy 5 | 6 | def cam2pixel(cam_coord, f, c): 7 | 8 | x = cam_coord[..., 0] / cam_coord[..., 2] * f[0] + c[0] 9 | y = cam_coord[..., 1] / cam_coord[..., 2] * f[1] + c[1] 10 | z = cam_coord[..., 2] 11 | 12 | return x,y,z 13 | 14 | def pixel2cam(pixel_coord, f, c): 15 | 16 | x = (pixel_coord[..., 0] - c[0]) / f[0] * pixel_coord[..., 2] 17 | y = (pixel_coord[..., 1] - c[1]) / f[1] * pixel_coord[..., 2] 18 | z = pixel_coord[..., 2] 19 | 20 | return x,y,z 21 | 22 | def rigid_transform_3D(A, B): 23 | centroid_A = np.mean(A, axis = 0) 24 | centroid_B = np.mean(B, axis = 0) 25 | H = np.dot(np.transpose(A - centroid_A), B - centroid_B) 26 | U, s, V = np.linalg.svd(H) 27 | R = np.dot(np.transpose(V), np.transpose(U)) 28 | if np.linalg.det(R) < 0: 29 | V[2] = -V[2] 30 | R = np.dot(np.transpose(V), np.transpose(U)) 31 | t = -np.dot(R, np.transpose(centroid_A)) + np.transpose(centroid_B) 32 | return R, t 33 | 34 | def rigid_align(A, B): 35 | R, t = rigid_transform_3D(A, B) 36 | A2 = np.transpose(np.dot(R, np.transpose(A))) + t 37 | return A2 38 | 39 | def process_world_coordinate(joint_world, root_idx, joint_num, R, T, f, c): 40 | 41 | # project world coordinates to image space 42 | joint_cam = np.zeros((joint_num, 3)) 43 | for i in range(joint_num): 44 | joint_cam[i] = np.dot(R, joint_world[i] - T) 45 | center_cam = joint_cam[root_idx] 46 | 47 | # Subtract center depth 48 | joint_img = np.zeros((joint_num, 3)) 49 | joint_img[:, 0], joint_img[:, 1], joint_img[:, 2] = cam2pixel(joint_cam, f, c) 50 | joint_img[:, 2] = joint_img[:, 2] - center_cam[2] 51 | joint_vis = np.ones((joint_num,1)) 52 | 53 | ## bbox3d 54 | # build 3D bounding box centered on center_cam, sized with bbox_3d_shape 55 | bbox3d_lt = center_cam - np.array([cfg.bbox_3d_shape[2] / 2, cfg.bbox_3d_shape[1] / 2, 0]) 56 | bbox3d_rb = center_cam + np.array([cfg.bbox_3d_shape[2] / 2, cfg.bbox_3d_shape[1] / 2, 0]) 57 | 58 | # back-project 3D BBox to 2D image 59 | bbox2d_l, bbox2d_t, _ = cam2pixel(bbox3d_lt, f, c) 60 | bbox2d_r, bbox2d_b, _ = cam2pixel(bbox3d_rb, f, c) 61 | bbox = np.array([bbox2d_l, bbox2d_t, bbox2d_r-bbox2d_l+1, bbox2d_b-bbox2d_t+1]) 62 | 63 | return joint_img, joint_cam, joint_vis, center_cam, bbox 64 | 65 | def warp_coord_to_original(joint_out, bbox, center_cam): 66 | 67 | # joint_out: output from soft-argmax 68 | x = joint_out[:, 0] / cfg.output_shape[1] * bbox[2] + bbox[0] 69 | y = joint_out[:, 1] / cfg.output_shape[0] * bbox[3] + bbox[1] 70 | z = (joint_out[:, 2] / cfg.depth_dim * 2. - 1.) * (cfg.bbox_3d_shape[0]/2.) + center_cam[2] 71 | 72 | return x, y, z 73 | 74 | def fliplr_joints(_joints, width, matched_parts): 75 | """ 76 | flip coords 77 | joints: numpy array, nJoints * dim, dim == 2 [x, y] or dim == 3 [x, y, z] 78 | width: image width 79 | matched_parts: list of pairs 80 | """ 81 | joints = _joints.copy() 82 | # Flip horizontal 83 | joints[:, 0] = width - joints[:, 0] - 1 84 | 85 | # Change left-right parts 86 | for pair in matched_parts: 87 | joints[pair[0], :], joints[pair[1], :] = joints[pair[1], :], joints[pair[0], :].copy() 88 | 89 | return joints 90 | 91 | def multi_meshgrid(*args): 92 | """ 93 | Creates a meshgrid from possibly many 94 | elements (instead of only 2). 95 | Returns a nd tensor with as many dimensions 96 | as there are arguments 97 | """ 98 | args = list(args) 99 | template = [1 for _ in args] 100 | for i in range(len(args)): 101 | n = args[i].shape[0] 102 | template_copy = template.copy() 103 | template_copy[i] = n 104 | args[i] = args[i].view(*template_copy) 105 | # there will be some broadcast magic going on 106 | return tuple(args) 107 | 108 | 109 | def flip(tensor, dims): 110 | if not isinstance(dims, (tuple, list)): 111 | dims = [dims] 112 | indices = [torch.arange(tensor.shape[dim] - 1, -1, -1, 113 | dtype=torch.int64) for dim in dims] 114 | multi_indices = multi_meshgrid(*indices) 115 | final_indices = [slice(i) for i in tensor.shape] 116 | for i, dim in enumerate(dims): 117 | final_indices[dim] = multi_indices[i] 118 | flipped = tensor[final_indices] 119 | assert flipped.device == tensor.device 120 | assert flipped.requires_grad == tensor.requires_grad 121 | return flipped 122 | 123 | -------------------------------------------------------------------------------- /tool/preprocess_h36m.m: -------------------------------------------------------------------------------- 1 | % Preprocess human3.6m dataset 2 | % Place this file to the Release-v1.1 folder and run it 3 | 4 | function preprocess_h36m() 5 | 6 | close all; 7 | %clear; 8 | %clc; 9 | 10 | addpaths; 11 | 12 | %-------------------------------------------------------------------------- 13 | % PARAMETERS 14 | 15 | % Subject (1, 5, 6, 7, 8, 9, 11) 16 | SUBJECT = [1 5 6 7 8 9 11]; 17 | 18 | % Action (2 ~ 16) 19 | ACTION = 2:16; 20 | 21 | % Subaction (1 ~ 2) 22 | SUBACTION = 1:2; 23 | 24 | % Camera (1 ~ 4) 25 | CAMERA = 1:4; 26 | 27 | num_joint = 17; 28 | root_dir = 'SET_YOUR_OUTPUT_DIRECTORY'; %you have to set your output directory 29 | 30 | % if rgb sequence is declared in the loop, it causes stuck (do not know 31 | % reason) 32 | rgb_sequence = cell(1,100000000); 33 | COUNT = 1; 34 | %-------------------------------------------------------------------------- 35 | % MAIN LOOP 36 | % For each subject, action, subaction, and camera.. 37 | for subject = SUBJECT 38 | for action = ACTION 39 | for subaction = SUBACTION 40 | for camera = CAMERA 41 | 42 | fprintf('Processing subject %d, action %d, subaction %d, camera %d..\n', ... 43 | subject, action, subaction, camera); 44 | 45 | save_dir = sprintf('%s/s_%02d_act_%02d_subact_%02d_ca_%02d', ... 46 | root_dir, subject, action, subaction, camera); 47 | if ~exist(save_dir, 'dir') 48 | mkdir(save_dir); 49 | end 50 | 51 | if (subject==11) && (action==2) && (subaction==2) && (camera==1) 52 | fprintf('There is an error in subject 11, action 2, subaction 2, and camera 1\n'); 53 | continue; 54 | end 55 | 56 | % Select sequence 57 | Sequence = H36MSequence(subject, action, subaction, camera); 58 | 59 | % Get 3D pose and 2D pose 60 | Features{1} = H36MPose3DPositionsFeature(); % 3D world coordinates 61 | Features{1}.Part = 'body'; % Only consider 17 joints 62 | Features{2} = H36MPose3DPositionsFeature('Monocular', true); % 3D camera coordinates 63 | Features{2}.Part = 'body'; % Only consider 17 joints 64 | Features{3} = H36MPose2DPositionsFeature(); % 2D image coordinates 65 | Features{3}.Part = 'body'; % Only consider 17 joints 66 | F = H36MComputeFeatures(Sequence, Features); 67 | num_frame = Sequence.NumFrames; 68 | pose3d_world = reshape(F{1}, num_frame, 3, num_joint); 69 | pose3d = reshape(F{2}, num_frame, 3, num_joint); 70 | pose2d = reshape(F{3}, num_frame, 2, num_joint); 71 | 72 | % Camera (in global coordinate) 73 | Camera = Sequence.getCamera(); 74 | 75 | % Sanity check 76 | if false 77 | R = Camera.R; % rotation matrix 78 | T = Camera.T'; % origin of the world coord system 79 | K = [Camera.f(1) 0 Camera.c(1); 80 | 0 Camera.f(2) Camera.c(2); 81 | 0 0 1]; % f: focal length, c: principal points 82 | error = 0; 83 | for i = 1:num_frame 84 | X = squeeze(pose3d_global(i,:,:)); 85 | x = squeeze(pose2d(i,:,:)); 86 | px = K*R*(X-T); 87 | px = px ./ px(3,:); 88 | px = px(1:2,:); 89 | error = error + mean(sqrt(sum((px-x).^2, 1))); 90 | end 91 | error = error / num_frame; 92 | fprintf('reprojection error = %.2f (pixels)\n', error); 93 | keyboard; 94 | end 95 | 96 | %% Image, bounding box for each sampled frame 97 | fprintf('Load RGB video: '); 98 | rgb_extractor = H36MRGBVideoFeature(); 99 | rgb_sequence{COUNT} = rgb_extractor.serializer(Sequence); 100 | fprintf('Done!!\n'); 101 | img_height = zeros(num_frame,1); 102 | img_width = zeros(num_frame,1); 103 | 104 | % For each frame, 105 | for i = 1:num_frame 106 | if mod(i,100) == 1 107 | fprintf('.'); 108 | end 109 | 110 | % Save image 111 | % Get data 112 | img = rgb_sequence{COUNT}.getFrame(i); 113 | [h, w, c] = size(img); 114 | img_height(i) = h; 115 | img_width(i) = w; 116 | img_name = sprintf('%s/s_%02d_act_%02d_subact_%02d_ca_%02d_%06d.jpg', ... 117 | save_dir, subject, action, subaction, camera, i); 118 | imwrite(img, img_name); 119 | 120 | end 121 | 122 | COUNT = COUNT + 1; 123 | 124 | % Save data 125 | pose3d_world = permute(pose3d_world,[1,3,2]); % world coordinate 3D keypoint coordinates 126 | R = Camera.R; % rotation matrix 127 | T = Camera.T; % origin of the world coord system 128 | f = Camera.f; % focal length 129 | c = Camera.c; % principal points 130 | filename = sprintf('%s/h36m_meta.mat', save_dir); 131 | save(filename, 'pose3d_world', 'f', 'c', 'R', 'T', 'img_height', 'img_width'); 132 | 133 | fprintf('\n'); 134 | 135 | end 136 | end 137 | end 138 | end 139 | 140 | end 141 | 142 | -------------------------------------------------------------------------------- /common/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import math 4 | import time 5 | import glob 6 | import abc 7 | from torch.utils.data import DataLoader 8 | import torch.optim 9 | import torchvision.transforms as transforms 10 | 11 | from config import cfg 12 | from dataset import DatasetLoader 13 | from timer import Timer 14 | from logger import colorlogger 15 | from nets.balanced_parallel import DataParallelModel, DataParallelCriterion 16 | from model import get_pose_net 17 | from nets import loss 18 | 19 | # dynamic dataset import 20 | for i in range(len(cfg.trainset)): 21 | exec('from ' + cfg.trainset[i] + ' import ' + cfg.trainset[i]) 22 | exec('from ' + cfg.testset + ' import ' + cfg.testset) 23 | 24 | class Base(object): 25 | __metaclass__ = abc.ABCMeta 26 | 27 | def __init__(self, cfg, log_name='logs.txt'): 28 | 29 | self.cfg = cfg 30 | self.cur_epoch = 0 31 | 32 | # timer 33 | self.tot_timer = Timer() 34 | self.gpu_timer = Timer() 35 | self.read_timer = Timer() 36 | 37 | # logger 38 | self.logger = colorlogger(cfg.log_dir, log_name=log_name) 39 | 40 | @abc.abstractmethod 41 | def _make_batch_generator(self): 42 | return 43 | 44 | @abc.abstractmethod 45 | def _make_model(self): 46 | return 47 | 48 | def save_model(self, state, epoch): 49 | file_path = osp.join(self.cfg.model_dir,'snapshot_{}.pth.tar'.format(str(epoch))) 50 | torch.save(state, file_path) 51 | self.logger.info("Write snapshot into {}".format(file_path)) 52 | 53 | def load_model(self, model, optimizer, scheduler): 54 | model_file_list = glob.glob(osp.join(self.cfg.model_dir,'*.pth.tar')) 55 | cur_epoch = max([int(file_name[file_name.find('snapshot_') + 9 : file_name.find('.pth.tar')]) for file_name in model_file_list]) 56 | ckpt = torch.load(osp.join(self.cfg.model_dir, 'snapshot_' + str(cur_epoch) + '.pth.tar')) 57 | start_epoch = ckpt['epoch'] + 1 58 | model.load_state_dict(ckpt['network']) 59 | optimizer.load_state_dict(ckpt['optimizer']) 60 | scheduler.load_state_dict(ckpt['scheduler']) 61 | 62 | return start_epoch, model, optimizer, scheduler 63 | 64 | 65 | class Trainer(Base): 66 | 67 | def __init__(self, cfg): 68 | self.JointLocationLoss = DataParallelCriterion(loss.JointLocationLoss()) 69 | super(Trainer, self).__init__(cfg, log_name = 'train_logs.txt') 70 | 71 | def get_optimizer(self, optimizer_name, model): 72 | if optimizer_name == 'adam': 73 | optimizer = torch.optim.Adam(model.parameters(), lr=self.cfg.lr) 74 | elif optimizer_name == 'sgd': 75 | optimizer = torch.optim.SGD(model.parameters(), lr=self.cfg.lr, momentum=self.cfg.momentum, weight_decay=self.cfg.wd) 76 | else: 77 | print("Error! Unknown optimizer name: ", optimizer_name) 78 | assert 0 79 | 80 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.cfg.lr_dec_epoch, gamma=self.cfg.lr_dec_factor) 81 | return optimizer, scheduler 82 | 83 | def _make_batch_generator(self): 84 | # data load and construct batch generator 85 | self.logger.info("Creating dataset...") 86 | trainset_list = [] 87 | for i in range(len(self.cfg.trainset)): 88 | trainset_list.append(eval(self.cfg.trainset[i])("train")) 89 | trainset_loader = DatasetLoader(trainset_list, True, transforms.Compose([\ 90 | transforms.ToTensor(), 91 | transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)]\ 92 | )) 93 | batch_generator = DataLoader(dataset=trainset_loader, batch_size=self.cfg.num_gpus*self.cfg.batch_size, shuffle=True, num_workers=self.cfg.num_thread, pin_memory=True) 94 | 95 | self.joint_num = trainset_loader.joint_num[0] 96 | self.itr_per_epoch = math.ceil(trainset_loader.__len__() / cfg.num_gpus / cfg.batch_size) 97 | self.batch_generator = batch_generator 98 | 99 | def _make_model(self): 100 | # prepare network 101 | self.logger.info("Creating graph and optimizer...") 102 | model = get_pose_net(self.cfg, True, self.joint_num) 103 | model = DataParallelModel(model).cuda() 104 | optimizer, scheduler = self.get_optimizer(self.cfg.optimizer, model) 105 | if self.cfg.continue_train: 106 | start_epoch, model, optimizer, scheduler = self.load_model(model, optimizer, scheduler) 107 | else: 108 | start_epoch = 0 109 | model.train() 110 | 111 | self.start_epoch = start_epoch 112 | self.model = model 113 | self.optimizer = optimizer 114 | self.scheduler = scheduler 115 | 116 | class Tester(Base): 117 | 118 | def __init__(self, cfg, test_epoch): 119 | self.coord_out = loss.soft_argmax 120 | self.test_epoch = int(test_epoch) 121 | super(Tester, self).__init__(cfg, log_name = 'test_logs.txt') 122 | 123 | def _make_batch_generator(self): 124 | # data load and construct batch generator 125 | self.logger.info("Creating dataset...") 126 | testset = eval(self.cfg.testset)("test") 127 | testset_loader = DatasetLoader(testset, False, transforms.Compose([\ 128 | transforms.ToTensor(), 129 | transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)]\ 130 | )) 131 | batch_generator = DataLoader(dataset=testset_loader, batch_size=self.cfg.num_gpus*self.cfg.test_batch_size, shuffle=False, num_workers=self.cfg.num_thread, pin_memory=True) 132 | 133 | self.testset = testset 134 | self.joint_num = testset_loader.joint_num 135 | self.skeleton = testset_loader.skeleton 136 | self.flip_pairs = testset.flip_pairs 137 | self.tot_sample_num = testset_loader.__len__() 138 | self.batch_generator = batch_generator 139 | 140 | def _make_model(self): 141 | 142 | model_path = os.path.join(self.cfg.model_dir, 'snapshot_%d.pth.tar' % self.test_epoch) 143 | assert os.path.exists(model_path), 'Cannot find model at ' + model_path 144 | self.logger.info('Load checkpoint from {}'.format(model_path)) 145 | 146 | # prepare network 147 | self.logger.info("Creating graph...") 148 | model = get_pose_net(self.cfg, False, self.joint_num) 149 | model = DataParallelModel(model).cuda() 150 | ckpt = torch.load(model_path) 151 | model.load_state_dict(ckpt['network']) 152 | model.eval() 153 | 154 | self.model = model 155 | 156 | def _evaluate(self, preds, result_save_path): 157 | self.testset.evaluate(preds, result_save_path) 158 | 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation 2 |

3 | 4 |

5 | 6 | ## Introduction 7 | 8 | This repo is **[PyTorch](https://pytorch.org/)** implementation of **[Integral Human Pose Regression (ECCV 2018)](https://arxiv.org/abs/1711.08229)** of MSRA for **3D human pose estimation** from a single RGB image. 9 | 10 | **What this repo provides:** 11 | * [PyTorch](https://pytorch.org/) implementation of [Integral Human Pose Regression](https://arxiv.org/abs/1711.08229). 12 | * Flexible and simple code. 13 | * Dataset pre-processing codes for **[MPII](http://human-pose.mpi-inf.mpg.de/)** and **[Human3.6M](http://vision.imar.ro/human3.6m/description.php)** dataset. 14 | 15 | ## Dependencies 16 | * [PyTorch](https://pytorch.org/) 17 | * [CUDA](https://developer.nvidia.com/cuda-downloads) 18 | * [cuDNN](https://developer.nvidia.com/cudnn) 19 | * [Anaconda](https://www.anaconda.com/download/) 20 | * [COCO API](https://github.com/cocodataset/cocoapi) 21 | 22 | This code is tested under Ubuntu 16.04, CUDA 9.0, cuDNN 7.1 environment with two NVIDIA 1080Ti GPUs. 23 | 24 | Python 3.6.5 version with Anaconda 3 and PyTorch 1.0.0 is used for development. 25 | 26 | ## Directory 27 | 28 | ### Root 29 | The `${POSE_ROOT}` is described as below. 30 | ``` 31 | ${POSE_ROOT} 32 | |-- data 33 | |-- common 34 | |-- main 35 | |-- tool 36 | `-- output 37 | ``` 38 | * `data` contains data loading codes and soft links to images and annotations directories. 39 | * `common` contains kernel codes for 3d human pose estimation system. 40 | * `main` contains high-level codes for training or testing the network. 41 | * `tool` contains Human3.6M dataset preprocessing code. 42 | * `output` contains log, trained models, visualized outputs, and test result. 43 | 44 | ### Data 45 | You need to follow directory structure of the `data` as below. 46 | ``` 47 | ${POSE_ROOT} 48 | |-- data 49 | |-- |-- MPII 50 | | `-- |-- annotations 51 | | | |-- train.json 52 | | | `-- test.json 53 | | `-- images 54 | | |-- 000001163.jpg 55 | | |-- 000003072.jpg 56 | |-- |-- Human36M 57 | | `-- |-- data 58 | | | |-- s_01_act_02_subact_01_ca_01 59 | | | |-- s_01_act_02_subact_01_ca_02 60 | ``` 61 | * In the `tool`, run `preprocess_h36m.m` to preprocess Human3.6M dataset. It converts videos to images and save meta data for each frame. `data` in `Human36M` contains the preprocessed data. 62 | * Use MPII dataset preprocessing code in my [TF-SimpleHumanPose](https://github.com/mks0601/TF-SimpleHumanPose) git repo 63 | * You can change default directory structure of `data` by modifying `$DATASET_NAME.py` of each dataset folder. 64 | 65 | ### Output 66 | You need to follow the directory structure of the `output` folder as below. 67 | ``` 68 | ${POSE_ROOT} 69 | |-- output 70 | |-- |-- log 71 | |-- |-- model_dump 72 | |-- |-- result 73 | `-- |-- vis 74 | ``` 75 | * Creating `output` folder as soft link form is recommended instead of folder form because it would take large storage capacity. 76 | * `log` folder contains training log file. 77 | * `model_dump` folder contains saved checkpoints for each epoch. 78 | * `result` folder contains final estimation files generated in the testing stage. 79 | * `vis` folder contains visualized results. 80 | * You can change default directory structure of `output` by modifying `main/config.py`. 81 | 82 | ## Running code 83 | ### Start 84 | * In the `main/config.py`, you can change settings of the model including dataset to use, network backbone, and input size and so on. 85 | 86 | ### Train 87 | In the `main` folder, set training set in `config.py`. Note that `trainset` must be `list` type and `0th` dataset is the reference dataset. 88 | 89 | In the `main` folder, run 90 | ```bash 91 | python train.py --gpu 0-1 92 | ``` 93 | to train the network on the GPU 0,1. 94 | 95 | If you want to continue experiment, run 96 | ```bash 97 | python train.py --gpu 0-1 --continue 98 | ``` 99 | `--gpu 0,1` can be used instead of `--gpu 0-1`. 100 | 101 | ### Test 102 | In the `main` folder, set testing set in `config.py`. Note that `testset` must be `str` type. 103 | 104 | Place trained model at the `output/model_dump/`. 105 | 106 | In the `main` folder, run 107 | ```bash 108 | python test.py --gpu 0-1 --test_epoch 16 109 | ``` 110 | to test the network on the GPU 0,1 with 16th epoch trained model. `--gpu 0,1` can be used instead of `--gpu 0-1`. 111 | 112 | ## Results 113 | Here I report the performance of the model from this repo and [the original paper](https://arxiv.org/abs/1711.08229). Also, I provide pre-trained 3d human pose estimation models. 114 | 115 | ### Results on Human3.6M dataset 116 | The tables below are PA MPJPE and MPJPE on Human3.6M dataset. Provided `config.py` file is used to achieve below results. It's currently slightly worse than the performance of the original paper, however I'm trying to achieve the same performance. I think training schedule has to be changed. 117 | 118 | #### Protocol 2 (training subjects: 1,5,6,7,8, testing subjects: 9, 11), PA MPJPE 119 | The PA MPJPEs of the paper are from protocol 1, however, note that protocol 2 uses smaller training set. 120 | 121 | | Methods | Dir. | Dis. | Eat | Gre. | Phon. | Pose | Pur. | Sit. | Sit D. | Smo. | Phot. | Wait | Walk | Walk D. | Walk P. | Avg | 122 | |:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| 123 | | my repo | 39.0 | 38.6 | 44.1 | 42.5 | 40.6 | 35.3 | 38.2 | 49.9 | 59.4 | 41.00 | 46.1 | 37.6 | 30.3 | 40.8 | 35.5 | 41.5 | 124 | | [original paper](https://arxiv.org/abs/1711.08229) | 36.9 | 36.2 | 40.6 | 40.4 | 41.9 | 34.9 | 35.7 | 50.1 | 59.4 | 40.4 | 44.9 | 39.0 | 30.8 | 39.8 | 36.7 | 40.6 | 125 | 126 | #### Protocol 2 (training subjects: 1,5,6,7,8, testing subjects: 9, 11), MPJPE 127 | | Methods | Dir. | Dis. | Eat | Gre. | Phon. | Pose | Pur. | Sit. | Sit D. | Smo. | Phot. | Wait | Walk | Walk D. | Walk P. | Avg | 128 | |:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| 129 | | my repo | 50.8 | 52.3 | 54.8 | 57.9 | 52.8 | 47.0 | 52.1 | 62.0 | 73.7 | 52.6 | 58.3 | 50.4 | 40.9 | 54.1 | 45.1 | 53.9 | 130 | | [original paper](https://arxiv.org/abs/1711.08229) | 47.5 | 47.7 | 49.5 | 50.2 | 51.4 | 43.8 | 46.4 | 58.9 | 65.7 | 49.4 | 55.8 | 47.8 | 38.9 | 49.0 | 43.8 | 49.6 | 131 | 132 | * Pre-trained model of protocol 2 [[model](https://github.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/releases/download/1.0/snapshot_16.pth.tar)] 133 | 134 | ## Troubleshooting 135 | If you get an extremely large error, disable cudnn for batch normalization. This typically occurs in low version of PyTorch. 136 | 137 | ``` 138 | # PYTORCH=/path/to/pytorch 139 | # for pytorch v0.4.0 140 | sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 141 | # for pytorch v0.4.1 142 | sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py 143 | ``` 144 | 145 | ## Acknowledgement 146 | This repo is largely modified from [Original PyTorch repo of IntegralHumanPose](https://github.com/JimmySuen/integral-human-pose). 147 | 148 | ## Reference 149 | [1] Sun, Xiao and Xiao, Bin and Liang, Shuang and Wei, Yichen. "Integral human pose regression". ECCV 2018. 150 | -------------------------------------------------------------------------------- /common/nets/balanced_parallel.py: -------------------------------------------------------------------------------- 1 | """Encoding Data Parallel""" 2 | import threading 3 | import functools 4 | import torch 5 | from torch.autograd import Variable, Function 6 | import torch.cuda.comm as comm 7 | from torch.nn.parallel.data_parallel import DataParallel 8 | from torch.nn.parallel.parallel_apply import get_a_var 9 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast 10 | 11 | torch_ver = torch.__version__[:3] 12 | 13 | __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion', 14 | 'patch_replication_callback'] 15 | 16 | def allreduce(*inputs): 17 | """Cross GPU all reduce autograd operation for calculate mean and 18 | variance in SyncBN. 19 | """ 20 | return AllReduce.apply(*inputs) 21 | 22 | 23 | class AllReduce(Function): 24 | @staticmethod 25 | def forward(ctx, num_inputs, *inputs): 26 | ctx.num_inputs = num_inputs 27 | ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)] 28 | inputs = [inputs[i:i + num_inputs] 29 | for i in range(0, len(inputs), num_inputs)] 30 | # sort before reduce sum 31 | inputs = sorted(inputs, key=lambda i: i[0].get_device()) 32 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) 33 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus) 34 | return tuple([t for tensors in outputs for t in tensors]) 35 | 36 | @staticmethod 37 | def backward(ctx, *inputs): 38 | inputs = [i.data for i in inputs] 39 | inputs = [inputs[i:i + ctx.num_inputs] 40 | for i in range(0, len(inputs), ctx.num_inputs)] 41 | results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0]) 42 | outputs = comm.broadcast_coalesced(results, ctx.target_gpus) 43 | return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors]) 44 | 45 | 46 | class Reduce(Function): 47 | @staticmethod 48 | def forward(ctx, *inputs): 49 | ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))] 50 | inputs = sorted(inputs, key=lambda i: i.get_device()) 51 | return comm.reduce_add(inputs) 52 | 53 | @staticmethod 54 | def backward(ctx, gradOutput): 55 | return Broadcast.apply(ctx.target_gpus, gradOutput) 56 | 57 | 58 | class DataParallelModel(DataParallel): 59 | """Implements data parallelism at the module level. 60 | 61 | This container parallelizes the application of the given module by 62 | splitting the input across the specified devices by chunking in the 63 | batch dimension. 64 | In the forward pass, the module is replicated on each device, 65 | and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module. 66 | Note that the outputs are not gathered, please use compatible 67 | :class:`encoding.parallel.DataParallelCriterion`. 68 | 69 | The batch size should be larger than the number of GPUs used. It should 70 | also be an integer multiple of the number of GPUs so that each chunk is 71 | the same size (so that each GPU processes the same number of samples). 72 | 73 | Args: 74 | module: module to be parallelized 75 | device_ids: CUDA devices (default: all devices) 76 | 77 | Reference: 78 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 79 | Amit Agrawal. Context Encoding for Semantic Segmentation. 80 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 81 | 82 | Example:: 83 | 84 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) 85 | >>> y = net(x) 86 | """ 87 | def gather(self, outputs, output_device): 88 | return outputs 89 | 90 | def replicate(self, module, device_ids): 91 | modules = super(DataParallelModel, self).replicate(module, device_ids) 92 | execute_replication_callbacks(modules) 93 | return modules 94 | 95 | 96 | 97 | class DataParallelCriterion(DataParallel): 98 | """ 99 | Calculate loss in multiple-GPUs, which balance the memory usage for 100 | Semantic Segmentation. 101 | 102 | The targets are splitted across the specified devices by chunking in 103 | the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`. 104 | 105 | Reference: 106 | Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, 107 | Amit Agrawal. Context Encoding for Semantic Segmentation. 108 | *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018* 109 | 110 | Example:: 111 | 112 | >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2]) 113 | >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2]) 114 | >>> y = net(x) 115 | >>> loss = criterion(y, target) 116 | """ 117 | def forward(self, inputs, *targets, **kwargs): 118 | # input should be already scatterd 119 | # scattering the targets instead 120 | # if not self.device_ids: 121 | # return self.module(inputs, *targets, **kwargs) 122 | targets, kwargs = self.scatter(targets, kwargs, self.device_ids) 123 | if len(self.device_ids) == 1: 124 | return self.module(inputs, *targets[0]) 125 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 126 | outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs) 127 | return Reduce.apply(*outputs) / len(outputs) 128 | 129 | #return self.gather(outputs, self.output_device).mean() 130 | 131 | 132 | def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None): 133 | assert len(modules) == len(inputs) 134 | assert len(targets) == len(inputs) 135 | if kwargs_tup: 136 | assert len(modules) == len(kwargs_tup) 137 | else: 138 | kwargs_tup = ({},) * len(modules) 139 | if devices is not None: 140 | assert len(modules) == len(devices) 141 | else: 142 | devices = [None] * len(modules) 143 | 144 | lock = threading.Lock() 145 | results = {} 146 | if torch_ver != "0.3": 147 | grad_enabled = torch.is_grad_enabled() 148 | 149 | def _worker(i, module, input, target, kwargs, device=None): 150 | if torch_ver != "0.3": 151 | torch.set_grad_enabled(grad_enabled) 152 | if device is None: 153 | device = get_a_var(input).get_device() 154 | try: 155 | with torch.cuda.device(device): 156 | output = module(input, *target) 157 | with lock: 158 | results[i] = output 159 | except Exception as e: 160 | with lock: 161 | results[i] = e 162 | 163 | if len(modules) > 1: 164 | threads = [threading.Thread(target=_worker, 165 | args=(i, module, input, target, 166 | kwargs, device),) 167 | for i, (module, input, target, kwargs, device) in 168 | enumerate(zip(modules, inputs, targets, kwargs_tup, devices))] 169 | 170 | for thread in threads: 171 | thread.start() 172 | for thread in threads: 173 | thread.join() 174 | else: 175 | _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0]) 176 | 177 | outputs = [] 178 | for i in range(len(inputs)): 179 | output = results[i] 180 | if isinstance(output, Exception): 181 | raise output 182 | outputs.append(output) 183 | return outputs 184 | 185 | 186 | ########################################################################### 187 | # Adapted from Synchronized-BatchNorm-PyTorch. 188 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch 189 | # 190 | class CallbackContext(object): 191 | pass 192 | 193 | 194 | def execute_replication_callbacks(modules): 195 | """ 196 | Execute an replication callback `__data_parallel_replicate__` on each module created 197 | by original replication. 198 | 199 | The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` 200 | 201 | Note that, as all modules are isomorphism, we assign each sub-module with a context 202 | (shared among multiple copies of this module on different devices). 203 | Through this context, different copies can share some information. 204 | 205 | We guarantee that the callback on the master copy (the first copy) will be called ahead 206 | of calling the callback of any slave copies. 207 | """ 208 | master_copy = modules[0] 209 | nr_modules = len(list(master_copy.modules())) 210 | ctxs = [CallbackContext() for _ in range(nr_modules)] 211 | 212 | for i, module in enumerate(modules): 213 | for j, m in enumerate(module.modules()): 214 | if hasattr(m, '__data_parallel_replicate__'): 215 | m.__data_parallel_replicate__(ctxs[j], i) 216 | 217 | 218 | def patch_replication_callback(data_parallel): 219 | """ 220 | Monkey-patch an existing `DataParallel` object. Add the replication callback. 221 | Useful when you have customized `DataParallel` implementation. 222 | 223 | Examples: 224 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 225 | > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) 226 | > patch_replication_callback(sync_bn) 227 | # this is equivalent to 228 | > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) 229 | > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) 230 | """ 231 | 232 | assert isinstance(data_parallel, DataParallel) 233 | 234 | old_replicate = data_parallel.replicate 235 | 236 | @functools.wraps(old_replicate) 237 | def new_replicate(module, device_ids): 238 | modules = old_replicate(module, device_ids) 239 | execute_replication_callbacks(modules) 240 | return modules 241 | 242 | data_parallel.replicate = new_replicate -------------------------------------------------------------------------------- /data/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import numpy as np 4 | import cv2 5 | import random 6 | import time 7 | import torch 8 | import copy 9 | from torch.utils.data.dataset import Dataset 10 | from utils.vis import vis_keypoints, vis_3d_skeleton 11 | from utils.pose_utils import fliplr_joints 12 | from config import cfg 13 | 14 | class DatasetLoader(Dataset): 15 | def __init__(self, db, is_train, transform): 16 | 17 | if isinstance(db, list): 18 | self.multiple_db = True 19 | self.db = [d.load_data() for d in db] 20 | self.joints_name = [d.joints_name for d in db] 21 | self.joint_num = [d.joint_num for d in db] 22 | self.skeleton = [d.skeleton for d in db] 23 | self.lr_skeleton = [d.lr_skeleton for d in db] 24 | self.flip_pairs = [d.flip_pairs for d in db] 25 | self.joints_have_depth = [d.joints_have_depth for d in db] 26 | else: 27 | self.multiple_db = False 28 | self.db = db.load_data() 29 | self.joint_num = db.joint_num 30 | self.skeleton = db.skeleton 31 | self.lr_skeleton = db.lr_skeleton 32 | self.flip_pairs = db.flip_pairs 33 | self.joints_have_depth = db.joints_have_depth 34 | 35 | self.transform = transform 36 | self.is_train = is_train 37 | 38 | if self.is_train: 39 | self.do_augment = True 40 | else: 41 | self.do_augment = False 42 | 43 | def __getitem__(self, index): 44 | 45 | if self.multiple_db: 46 | db_idx = index // max([len(db) for db in self.db]) 47 | 48 | joint_num = self.joint_num[db_idx] 49 | skeleton = self.skeleton[db_idx] 50 | lr_skeleton = self.lr_skeleton[0] 51 | flip_pairs = self.flip_pairs[db_idx] 52 | joints_have_depth = self.joints_have_depth[db_idx] 53 | 54 | ref_joints_name = self.joints_name[0] 55 | joints_name = self.joints_name[db_idx] 56 | 57 | item_idx = index % max([len(db) for db in self.db]) % len(self.db[db_idx]) 58 | data = copy.deepcopy(self.db[db_idx][item_idx]) 59 | 60 | else: 61 | joint_num = self.joint_num 62 | skeleton = self.skeleton 63 | lr_skeleton = self.lr_skeleton 64 | flip_pairs = self.flip_pairs 65 | joints_have_depth = self.joints_have_depth 66 | 67 | data = copy.deepcopy(self.db[index]) 68 | 69 | bbox = data['bbox'] 70 | joint_img = data['joint_img'] 71 | joint_vis = data['joint_vis'] 72 | 73 | # 1. load image 74 | cvimg = cv2.imread(data['img_path'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) 75 | if not isinstance(cvimg, np.ndarray): 76 | raise IOError("Fail to read %s" % data['img_path']) 77 | img_height, img_width, img_channels = cvimg.shape 78 | 79 | # 2. get augmentation params 80 | if self.do_augment: 81 | scale, rot, do_flip, color_scale = get_aug_config() 82 | else: 83 | scale, rot, do_flip, color_scale = 1.0, 0, False, [1.0, 1.0, 1.0] 84 | 85 | # 3. crop patch from img and perform data augmentation (flip, scale, rot, color scale) 86 | img_patch, trans = generate_patch_image(cvimg, bbox, do_flip, scale, rot) 87 | for i in range(img_channels): 88 | img_patch[:, :, i] = np.clip(img_patch[:, :, i] * color_scale[i], 0, 255) 89 | 90 | # 4. generate patch joint ground truth 91 | # flip joints and apply Affine Transform on joints 92 | if do_flip: 93 | joint_img[:, 0] = img_width - joint_img[:, 0] - 1 94 | for pair in flip_pairs: 95 | joint_img[pair[0], :], joint_img[pair[1], :] = joint_img[pair[1], :], joint_img[pair[0], :].copy() 96 | joint_vis[pair[0], :], joint_vis[pair[1], :] = joint_vis[pair[1], :], joint_vis[pair[0], :].copy() 97 | 98 | for i in range(len(joint_img)): 99 | joint_img[i, 0:2] = trans_point2d(joint_img[i, 0:2], trans) 100 | joint_img[i, 2] /= (cfg.bbox_3d_shape[0]/2. * scale) # expect depth lies in -bbox_3d_shape[0]/2 ~ bbox_3d_shape[0]/2 -> -1.0 ~ 1.0 101 | joint_img[i, 2] = (joint_img[i,2] + 1.0)/2. # 0~1 normalize 102 | joint_vis[i] *= ( 103 | (joint_img[i,0] >= 0) & \ 104 | (joint_img[i,0] < cfg.input_shape[1]) & \ 105 | (joint_img[i,1] >= 0) & \ 106 | (joint_img[i,1] < cfg.input_shape[0]) & \ 107 | (joint_img[i,2] >= 0) & \ 108 | (joint_img[i,2] < 1) 109 | ) 110 | 111 | vis = False 112 | if vis: 113 | filename = str(random.randrange(1,500)) 114 | tmpimg = img_patch.copy().astype(np.uint8) 115 | tmpkps = np.zeros((3,joint_num)) 116 | tmpkps[:2,:] = joint_img[:,:2].transpose(1,0) 117 | tmpkps[2,:] = joint_vis[:,0] 118 | tmpimg = vis_keypoints(tmpimg, tmpkps, skeleton) 119 | cv2.imwrite(osp.join(cfg.vis_dir, filename + '_gt.jpg'), tmpimg) 120 | 121 | vis = False 122 | if vis: 123 | vis_3d_skeleton(joint_img, joint_vis, skeleton, filename) 124 | 125 | # change coordinates to output space 126 | joint_img[:, 0] = joint_img[:, 0] / cfg.input_shape[1] * cfg.output_shape[1] 127 | joint_img[:, 1] = joint_img[:, 1] / cfg.input_shape[0] * cfg.output_shape[0] 128 | joint_img[:, 2] = joint_img[:, 2] * cfg.depth_dim 129 | 130 | # change joint coord, vis to reference dataset. 0th db is reference dataset 131 | if self.multiple_db: 132 | joint_img = transform_joint_to_other_db(joint_img, joints_name, ref_joints_name) 133 | joint_vis = transform_joint_to_other_db(joint_vis, joints_name, ref_joints_name) 134 | 135 | if self.is_train: 136 | img_patch = self.transform(img_patch) 137 | joint_img = joint_img.astype(np.float32) 138 | joint_vis = (joint_vis > 0).astype(np.float32) 139 | joints_have_depth = np.array([joints_have_depth]).astype(np.float32) 140 | 141 | return img_patch, joint_img, joint_vis, joints_have_depth 142 | else: 143 | img_patch = self.transform(img_patch) 144 | return img_patch 145 | 146 | def __len__(self): 147 | if self.multiple_db: 148 | return max([len(db) for db in self.db]) * len(self.db) 149 | else: 150 | return len(self.db) 151 | 152 | # helper functions 153 | def transform_joint_to_other_db(src_joint, src_name, dst_name): 154 | 155 | src_joint_num = len(src_name) 156 | dst_joint_num = len(dst_name) 157 | 158 | new_joint = np.zeros(((dst_joint_num,) + src_joint.shape[1:])) 159 | 160 | for src_idx in range(len(src_name)): 161 | name = src_name[src_idx] 162 | if name in dst_name: 163 | dst_idx = dst_name.index(name) 164 | new_joint[dst_idx] = src_joint[src_idx] 165 | 166 | return new_joint 167 | 168 | def get_aug_config(): 169 | 170 | scale_factor = 0.25 171 | rot_factor = 30 172 | color_factor = 0.2 173 | 174 | scale = np.clip(np.random.randn(), -1.0, 1.0) * scale_factor + 1.0 175 | rot = np.clip(np.random.randn(), -2.0, 176 | 2.0) * rot_factor if random.random() <= 0.6 else 0 177 | do_flip = random.random() <= 0.5 178 | c_up = 1.0 + color_factor 179 | c_low = 1.0 - color_factor 180 | color_scale = [random.uniform(c_low, c_up), random.uniform(c_low, c_up), random.uniform(c_low, c_up)] 181 | 182 | return scale, rot, do_flip, color_scale 183 | 184 | 185 | def generate_patch_image(cvimg, bbox, do_flip, scale, rot): 186 | img = cvimg.copy() 187 | img_height, img_width, img_channels = img.shape 188 | 189 | bb_c_x = float(bbox[0] + 0.5*bbox[2]) 190 | bb_c_y = float(bbox[1] + 0.5*bbox[3]) 191 | bb_width = float(bbox[2]) 192 | bb_height = float(bbox[3]) 193 | 194 | if do_flip: 195 | img = img[:, ::-1, :] 196 | bb_c_x = img_width - bb_c_x - 1 197 | 198 | trans = gen_trans_from_patch_cv(bb_c_x, bb_c_y, bb_width, bb_height, cfg.input_shape[1], cfg.input_shape[0], scale, rot, inv=False) 199 | img_patch = cv2.warpAffine(img, trans, (int(cfg.input_shape[1]), int(cfg.input_shape[0])), flags=cv2.INTER_LINEAR) 200 | 201 | img_patch = img_patch[:,:,::-1].copy() 202 | img_patch = img_patch.astype(np.float32) 203 | 204 | return img_patch, trans 205 | 206 | def rotate_2d(pt_2d, rot_rad): 207 | x = pt_2d[0] 208 | y = pt_2d[1] 209 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 210 | xx = x * cs - y * sn 211 | yy = x * sn + y * cs 212 | return np.array([xx, yy], dtype=np.float32) 213 | 214 | def gen_trans_from_patch_cv(c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False): 215 | # augment size with scale 216 | src_w = src_width * scale 217 | src_h = src_height * scale 218 | src_center = np.array([c_x, c_y], dtype=np.float32) 219 | # augment rotation 220 | rot_rad = np.pi * rot / 180 221 | src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad) 222 | src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad) 223 | 224 | dst_w = dst_width 225 | dst_h = dst_height 226 | dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32) 227 | dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32) 228 | dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32) 229 | 230 | src = np.zeros((3, 2), dtype=np.float32) 231 | src[0, :] = src_center 232 | src[1, :] = src_center + src_downdir 233 | src[2, :] = src_center + src_rightdir 234 | 235 | dst = np.zeros((3, 2), dtype=np.float32) 236 | dst[0, :] = dst_center 237 | dst[1, :] = dst_center + dst_downdir 238 | dst[2, :] = dst_center + dst_rightdir 239 | 240 | if inv: 241 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 242 | else: 243 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 244 | 245 | return trans 246 | 247 | def trans_point2d(pt_2d, trans): 248 | src_pt = np.array([pt_2d[0], pt_2d[1], 1.]).T 249 | dst_pt = np.dot(trans, src_pt) 250 | return dst_pt[0:2] 251 | 252 | 253 | -------------------------------------------------------------------------------- /data/Human36M/Human36M.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import scipy.io as sio 4 | import numpy as np 5 | from config import cfg 6 | from utils.pose_utils import pixel2cam, rigid_align, process_world_coordinate, warp_coord_to_original 7 | import cv2 8 | import random 9 | from utils.vis import vis_keypoints, vis_3d_skeleton 10 | 11 | class Human36M: 12 | def __init__(self, data_split): 13 | self.data_split = data_split 14 | self.data_dir = osp.join('..', 'data', 'Human36M', 'data') 15 | self.subsampling = self.get_subsampling_ratio(data_split) 16 | self.joint_num = 18 17 | self.joints_name = ('Pelvis', 'R_Hip', 'R_Knee', 'R_Ankle', 'L_Hip', 'L_Knee', 'L_Ankle', 'Torso', 'Neck', 'Nose', 'Head', 'L_Shoulder', 'L_Elbow', 'L_Wrist', 'R_Shoulder', 'R_Elbow', 'R_Wrist', 'Thorax') 18 | self.flip_pairs = ( (1, 4), (2, 5), (3, 6), (14, 11), (15, 12), (16, 13) ) 19 | self.skeleton = ( (0, 7), (7, 8), (8, 9), (9, 10), (8, 11), (11, 12), (12, 13), (8, 14), (14, 15), (15, 16), (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6) ) 20 | self.lr_skeleton = ( ((8,11),(8,14)), ((11,12),(14,15)), ((12,13),(15,16)), ((0,1),(0,4)), ((1,2),(4,5)), ((2,3),(5,6)) ) 21 | self.eval_joint = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) 22 | self.joints_have_depth = True 23 | 24 | self.action_idx = (2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) 25 | self.subaction_idx = (1, 2) 26 | self.camera_idx = (1, 2, 3, 4) 27 | self.action_name = ['Directions', 'Discussion', 'Eating', 'Greeting', 'Phoning', 'Posing', 'Purchases', 'Sitting', 'SittingDown', 'Smoking', 'Photo', 'Waiting', 'Walking', 'WalkDog', 'WalkTogether'] 28 | 29 | self.root_idx = self.joints_name.index('Pelvis') 30 | self.lshoulder_idx = self.joints_name.index('L_Shoulder') 31 | self.rshoulder_idx = self.joints_name.index('R_Shoulder') 32 | 33 | def get_subsampling_ratio(self, data_split): 34 | 35 | if data_split == 'train': 36 | return 5 37 | elif data_split == 'test': 38 | return 64 39 | else: 40 | assert 0, print('Unknown subset') 41 | 42 | def load_h36m_annot_file(self, annot_file): 43 | 44 | data = sio.loadmat(annot_file) 45 | joint_world = data['pose3d_world'] # 3D world coordinates of keypoints 46 | R = data['R'] # extrinsic 47 | T = np.reshape(data['T'],(3)) # extrinsic 48 | f = np.reshape(data['f'],(-1)) # focal legnth 49 | c = np.reshape(data['c'],(-1)) # principal points 50 | img_heights = np.reshape(data['img_height'],(-1)) 51 | img_widths = np.reshape(data['img_width'],(-1)) 52 | 53 | # add thorax 54 | thorax = (joint_world[:, self.lshoulder_idx, :] + joint_world[:, self.rshoulder_idx, :]) * 0.5 55 | thorax = thorax.reshape((thorax.shape[0], 1, thorax.shape[1])) 56 | joint_world = np.concatenate((joint_world, thorax), axis=1) 57 | 58 | return joint_world, R, T, f, c, img_widths, img_heights 59 | 60 | def _H36FolderName(self, subject_id, act_id, subact_id, camera_id): 61 | return "s_%02d_act_%02d_subact_%02d_ca_%02d" % \ 62 | (subject_id, act_id, subact_id, camera_id) 63 | 64 | def _H36ImageName(self, folder_name, frame_id): 65 | return "%s_%06d.jpg" % (folder_name, frame_id + 1) 66 | 67 | def _AllHuman36Folders(self, subject_list): 68 | folders = [] 69 | for i in subject_list: 70 | for j in self.action_idx: 71 | for m in self.subaction_idx: 72 | for n in self.camera_idx: 73 | folders.append(self._H36FolderName(i, j, m, n)) 74 | return folders 75 | 76 | def _sample_dataset(self, data_split): 77 | if data_split == 'train': 78 | folders = self._AllHuman36Folders([1, 5, 6, 7, 8]) 79 | elif data_split == 'test': 80 | folders = self._AllHuman36Folders([9, 11]) 81 | else: 82 | print("Unknown subset") 83 | assert 0 84 | 85 | return folders 86 | 87 | def load_data(self): 88 | 89 | folders = self._sample_dataset(self.data_split) 90 | data = [] 91 | for folder in folders: 92 | 93 | if folder == 's_11_act_02_subact_02_ca_01': 94 | continue 95 | 96 | folder_dir = osp.join(self.data_dir, folder) 97 | 98 | # load ground truth 99 | joint_world, R, T, f, c, img_widths, img_heights = self.load_h36m_annot_file(osp.join(folder_dir, 'h36m_meta.mat')) 100 | img_num = np.shape(joint_world)[0] 101 | 102 | for n in range(0, img_num, self.subsampling): 103 | 104 | img_path = osp.join(folder_dir, self._H36ImageName(folder, n)) 105 | joint_img, joint_cam, joint_vis, center_cam, bbox = process_world_coordinate(joint_world[n], self.root_idx, self.joint_num, R, T, f, c) 106 | 107 | img_width = img_widths[n] 108 | img_height = img_heights[n] 109 | 110 | data.append({ 111 | 'img_path': img_path, 112 | 'bbox': bbox, 113 | 'joint_img': joint_img, # [org_img_x, org_img_y, depth - root_depth] 114 | 'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate 115 | 'joint_vis': joint_vis, 116 | 'center_cam': center_cam, # [X, Y, Z] in camera coordinate 117 | 'f': f, 118 | 'c': c 119 | }) 120 | 121 | return data 122 | 123 | def evaluate(self, preds, result_dir): 124 | 125 | print() 126 | print('Evaluation start...') 127 | 128 | gts = self.load_data() 129 | 130 | assert len(gts) == len(preds) 131 | 132 | sample_num = len(gts) 133 | joint_num = self.joint_num 134 | 135 | p1_error = np.zeros((sample_num, joint_num, 3)) # PA MPJPE (protocol #1 metric) 136 | p2_error = np.zeros((sample_num, joint_num, 3)) # MPJPE (protocol #2 metroc) 137 | p1_error_action = [ [] for _ in range(len(self.action_idx)) ] # PA MPJPE for each action 138 | p2_error_action = [ [] for _ in range(len(self.action_idx)) ] # MPJPE error for each action 139 | pred_to_save = [] 140 | for n in range(sample_num): 141 | 142 | gt = gts[n] 143 | f = gt['f'] 144 | c = gt['c'] 145 | bbox = gt['bbox'] 146 | gt_3d_center = gt['center_cam'] 147 | gt_3d_kpt = gt['joint_cam'] 148 | gt_vis = gt['joint_vis'].copy() 149 | 150 | # restore coordinates to original space 151 | pre_2d_kpt = preds[n].copy() 152 | pre_2d_kpt[:,0], pre_2d_kpt[:,1], pre_2d_kpt[:,2] = warp_coord_to_original(pre_2d_kpt, bbox, gt_3d_center) 153 | 154 | vis = False 155 | if vis: 156 | cvimg = cv2.imread(gt['img_path'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) 157 | filename = str(random.randrange(1,500)) 158 | tmpimg = cvimg.copy().astype(np.uint8) 159 | tmpkps = np.zeros((3,joint_num)) 160 | tmpkps[0,:], tmpkps[1,:] = pre_2d_kpt[:,0], pre_2d_kpt[:,1] 161 | tmpkps[2,:] = 1 162 | tmpimg = vis_keypoints(tmpimg, tmpkps, self.skeleton) 163 | cv2.imwrite(osp.join(cfg.vis_dir, filename + '_output.jpg'), tmpimg) 164 | 165 | # back project to camera coordinate system 166 | pre_3d_kpt = np.zeros((joint_num,3)) 167 | pre_3d_kpt[:,0], pre_3d_kpt[:,1], pre_3d_kpt[:,2] = pixel2cam(pre_2d_kpt, f, c) 168 | 169 | vis = False 170 | if vis: 171 | vis_3d_skeleton(pre_3d_kpt, gt_vis, self.skeleton, filename) 172 | 173 | # root joint alignment 174 | pre_3d_kpt = pre_3d_kpt - pre_3d_kpt[self.root_idx] 175 | gt_3d_kpt = gt_3d_kpt - gt_3d_kpt[self.root_idx] 176 | 177 | # rigid alignment for PA MPJPE (protocol #1) 178 | pre_3d_kpt_align = rigid_align(pre_3d_kpt, gt_3d_kpt) 179 | 180 | # prediction save 181 | pred_to_save.append({'pred': pre_3d_kpt, 182 | 'align_pred': pre_3d_kpt_align, 183 | 'gt': gt_3d_kpt}) 184 | 185 | # error save 186 | p1_error[n] = np.power(pre_3d_kpt_align - gt_3d_kpt,2) # PA MPJPE (protocol #1) 187 | p2_error[n] = np.power(pre_3d_kpt - gt_3d_kpt,2) # MPJPE (protocol #2) 188 | 189 | img_name = gt['img_path'] 190 | action_idx = int(img_name[img_name.find('act')+4:img_name.find('act')+6]) - 2 191 | p1_error_action[action_idx].append(p1_error[n].copy()) 192 | p2_error_action[action_idx].append(p2_error[n].copy()) 193 | 194 | 195 | # total error calculate 196 | p1_error = np.take(p1_error, self.eval_joint, axis=1) 197 | p2_error = np.take(p2_error, self.eval_joint, axis=1) 198 | p1_error = np.mean(np.power(np.sum(p1_error,axis=2),0.5)) 199 | p2_error = np.mean(np.power(np.sum(p2_error,axis=2),0.5)) 200 | 201 | p1_eval_summary = 'Protocol #1 error (PA MPJPE) >> %.2f' % (p1_error) 202 | p2_eval_summary = 'Protocol #2 error (MPJPE) >> %.2f' % (p2_error) 203 | print() 204 | print(p1_eval_summary) 205 | print(p2_eval_summary) 206 | 207 | # error for each action calculate 208 | p1_action_eval_summary = 'Protocol #1 error (PA MPJPE) for each action: \n' 209 | for i in range(len(p1_error_action)): 210 | err = np.array(p1_error_action[i]) 211 | err = np.take(err, self.eval_joint, axis=1) 212 | err = np.mean(np.power(np.sum(err,axis=2),0.5)) 213 | 214 | action_name = self.action_name[i] 215 | p1_action_eval_summary += (action_name + ': %.2f\n' % err) 216 | 217 | 218 | p2_action_eval_summary = 'Protocol #2 error (MPJPE) for each action: \n' 219 | for i in range(len(p2_error_action)): 220 | err = np.array(p2_error_action[i]) 221 | err = np.take(err, self.eval_joint, axis=1) 222 | err = np.mean(np.power(np.sum(err,axis=2),0.5)) 223 | 224 | action_name = self.action_name[i] 225 | p2_action_eval_summary += (action_name + ': %.2f\n' % err) 226 | print() 227 | print(p1_action_eval_summary) 228 | print(p2_action_eval_summary) 229 | 230 | # result save 231 | f_pred_3d_kpt = open(osp.join(result_dir, 'pred_3d_kpt.txt'), 'w') 232 | f_pred_3d_kpt_align = open(osp.join(result_dir, 'pred_3d_kpt_align.txt'), 'w') 233 | f_gt_3d_kpt = open(osp.join(result_dir, 'gt_3d_kpt.txt'), 'w') 234 | for i in range(len(pred_to_save)): 235 | for j in range(joint_num): 236 | for k in range(3): 237 | f_pred_3d_kpt.write('%.3f ' % pred_to_save[i]['pred'][j][k]) 238 | f_pred_3d_kpt_align.write('%.3f ' % pred_to_save[i]['align_pred'][j][k]) 239 | f_gt_3d_kpt.write('%.3f ' % pred_to_save[i]['gt'][j][k]) 240 | f_pred_3d_kpt.write('\n') 241 | f_pred_3d_kpt_align.write('\n') 242 | f_gt_3d_kpt.write('\n') 243 | f_pred_3d_kpt.close() 244 | f_pred_3d_kpt_align.close() 245 | f_gt_3d_kpt.close() 246 | 247 | f_eval_result = open(osp.join(result_dir, 'eval_result.txt'), 'w') 248 | f_eval_result.write(p1_eval_summary) 249 | f_eval_result.write('\n') 250 | f_eval_result.write(p2_eval_summary) 251 | f_eval_result.write('\n') 252 | f_eval_result.write(p1_action_eval_summary) 253 | f_eval_result.write('\n') 254 | f_eval_result.write(p2_action_eval_summary) 255 | f_eval_result.write('\n') 256 | f_eval_result.close() 257 | 258 | 259 | 260 | 261 | --------------------------------------------------------------------------------