├── common ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── vis.cpython-36.pyc │ │ ├── __init__.cpython-36.pyc │ │ ├── dir_utils.cpython-36.pyc │ │ └── pose_utils.cpython-36.pyc │ ├── dir_utils.py │ ├── vis.py │ └── pose_utils.py ├── __pycache__ │ ├── base.cpython-36.pyc │ ├── logger.cpython-36.pyc │ ├── timer.cpython-36.pyc │ └── loss_recorder.cpython-36.pyc ├── nets │ ├── __pycache__ │ │ ├── loss.cpython-36.pyc │ │ ├── resnet.cpython-36.pyc │ │ └── balanced_parallel.cpython-36.pyc │ ├── loss.py │ ├── resnet.py │ └── balanced_parallel.py ├── timer.py ├── logger.py └── base.py ├── assets ├── 1.png └── 2.png ├── main ├── __pycache__ │ ├── model.cpython-36.pyc │ └── config.cpython-36.pyc ├── config.py ├── model.py ├── train.py └── test.py ├── data ├── __pycache__ │ └── dataset.cpython-36.pyc ├── MPII │ ├── __pycache__ │ │ └── MPII.cpython-36.pyc │ └── MPII.py ├── Human36M │ ├── __pycache__ │ │ └── Human36M.cpython-36.pyc │ └── Human36M.py └── dataset.py ├── tool └── preprocess_h36m.m └── README.md /common/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /assets/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/assets/1.png -------------------------------------------------------------------------------- /assets/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/assets/2.png -------------------------------------------------------------------------------- /main/__pycache__/model.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/main/__pycache__/model.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/base.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/base.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/timer.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/timer.cpython-36.pyc -------------------------------------------------------------------------------- /data/__pycache__/dataset.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/__pycache__/dataset.cpython-36.pyc -------------------------------------------------------------------------------- /main/__pycache__/config.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/main/__pycache__/config.cpython-36.pyc -------------------------------------------------------------------------------- /data/MPII/__pycache__/MPII.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/MPII/__pycache__/MPII.cpython-36.pyc -------------------------------------------------------------------------------- /common/nets/__pycache__/loss.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/loss.cpython-36.pyc -------------------------------------------------------------------------------- /common/nets/__pycache__/resnet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/resnet.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/__pycache__/vis.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/vis.cpython-36.pyc -------------------------------------------------------------------------------- /common/__pycache__/loss_recorder.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/loss_recorder.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/__pycache__/dir_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/dir_utils.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/__pycache__/pose_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/pose_utils.cpython-36.pyc -------------------------------------------------------------------------------- /data/Human36M/__pycache__/Human36M.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/Human36M/__pycache__/Human36M.cpython-36.pyc -------------------------------------------------------------------------------- /common/nets/__pycache__/balanced_parallel.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/balanced_parallel.cpython-36.pyc -------------------------------------------------------------------------------- /common/utils/dir_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | def make_folder(folder_name): 5 | if not os.path.exists(folder_name): 6 | os.makedirs(folder_name) 7 | 8 | def add_pypath(path): 9 | if path not in sys.path: 10 | sys.path.insert(0, path) 11 | 12 | -------------------------------------------------------------------------------- /common/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | self.warm_up = 0 19 | 20 | def tic(self): 21 | # using time.time instead of time.clock because time time.clock 22 | # does not normalize for multithreading 23 | self.start_time = time.time() 24 | 25 | def toc(self, average=True): 26 | self.diff = time.time() - self.start_time 27 | if self.warm_up < 10: 28 | self.warm_up += 1 29 | return self.diff 30 | else: 31 | self.total_time += self.diff 32 | self.calls += 1 33 | self.average_time = self.total_time / self.calls 34 | 35 | if average: 36 | return self.average_time 37 | else: 38 | return self.diff 39 | -------------------------------------------------------------------------------- /common/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | OK = '\033[92m' 5 | WARNING = '\033[93m' 6 | FAIL = '\033[91m' 7 | END = '\033[0m' 8 | 9 | PINK = '\033[95m' 10 | BLUE = '\033[94m' 11 | GREEN = OK 12 | RED = FAIL 13 | WHITE = END 14 | YELLOW = WARNING 15 | 16 | class colorlogger(): 17 | def __init__(self, log_dir, log_name='train_logs.txt'): 18 | # set log 19 | self._logger = logging.getLogger(log_name) 20 | self._logger.setLevel(logging.INFO) 21 | log_file = os.path.join(log_dir, log_name) 22 | if not os.path.exists(log_dir): 23 | os.makedirs(log_dir) 24 | file_log = logging.FileHandler(log_file, mode='a') 25 | file_log.setLevel(logging.INFO) 26 | console_log = logging.StreamHandler() 27 | console_log.setLevel(logging.INFO) 28 | formatter = logging.Formatter( 29 | "{}%(asctime)s{} %(message)s".format(GREEN, END), 30 | "%m-%d %H:%M:%S") 31 | file_log.setFormatter(formatter) 32 | console_log.setFormatter(formatter) 33 | self._logger.addHandler(file_log) 34 | self._logger.addHandler(console_log) 35 | 36 | def debug(self, msg): 37 | self._logger.debug(str(msg)) 38 | 39 | def info(self, msg): 40 | self._logger.info(str(msg)) 41 | 42 | def warning(self, msg): 43 | self._logger.warning(WARNING + 'WRN: ' + str(msg) + END) 44 | 45 | def critical(self, msg): 46 | self._logger.critical(RED + 'CRI: ' + str(msg) + END) 47 | 48 | def error(self, msg): 49 | self._logger.error(RED + 'ERR: ' + str(msg) + END) 50 | 51 | -------------------------------------------------------------------------------- /common/nets/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | from utils.pose_utils import pixel2cam, warp_coord_to_original 6 | from config import cfg 7 | 8 | def _assert_no_grad(tensor): 9 | assert not tensor.requires_grad, \ 10 | "nn criterions don't compute the gradient w.r.t. targets - please " \ 11 | "mark these tensors as not requiring gradients" 12 | 13 | def soft_argmax(heatmaps, joint_num): 14 | assert isinstance(heatmaps, torch.Tensor) 15 | 16 | heatmaps = heatmaps.reshape((-1, joint_num, cfg.depth_dim*cfg.output_shape[0]*cfg.output_shape[1])) 17 | heatmaps = F.softmax(heatmaps, 2) 18 | heatmaps = heatmaps.reshape((-1, joint_num, cfg.depth_dim, cfg.output_shape[0], cfg.output_shape[1])) 19 | 20 | accu_x = heatmaps.sum(dim=(2,3)) 21 | accu_y = heatmaps.sum(dim=(2,4)) 22 | accu_z = heatmaps.sum(dim=(3,4)) 23 | 24 | accu_x = accu_x * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[1]+1).type(torch.cuda.FloatTensor), devices=[accu_x.device.index])[0] 25 | accu_y = accu_y * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[0]+1).type(torch.cuda.FloatTensor), devices=[accu_y.device.index])[0] 26 | accu_z = accu_z * torch.cuda.comm.broadcast(torch.arange(1,cfg.depth_dim+1).type(torch.cuda.FloatTensor), devices=[accu_z.device.index])[0] 27 | 28 | accu_x = accu_x.sum(dim=2, keepdim=True) -1 29 | accu_y = accu_y.sum(dim=2, keepdim=True) -1 30 | accu_z = accu_z.sum(dim=2, keepdim=True) -1 31 | 32 | coord_out = torch.cat((accu_x, accu_y, accu_z), dim=2) 33 | 34 | return coord_out 35 | 36 | class JointLocationLoss(nn.Module): 37 | def __init__(self): 38 | super(JointLocationLoss, self).__init__() 39 | 40 | def forward(self, heatmap_out, gt_coord, gt_vis, gt_have_depth): 41 | 42 | joint_num = gt_coord.shape[1] 43 | coord_out = soft_argmax(heatmap_out, joint_num) 44 | 45 | _assert_no_grad(gt_coord) 46 | _assert_no_grad(gt_vis) 47 | _assert_no_grad(gt_have_depth) 48 | 49 | loss = torch.abs(coord_out - gt_coord) * gt_vis 50 | loss = (loss[:,:,0] + loss[:,:,1] + loss[:,:,2] * gt_have_depth)/3. 51 | 52 | return loss.mean() 53 | 54 | -------------------------------------------------------------------------------- /main/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import sys 4 | import numpy as np 5 | 6 | class Config: 7 | 8 | ## dataset 9 | trainset = ['Human36M', 'MPII'] # Human36M, MPII. !!Note that 0th db is reference db!! 10 | testset = 'Human36M' # Human36M, MPII 11 | 12 | ## directory 13 | cur_dir = osp.dirname(os.path.abspath(__file__)) 14 | root_dir = osp.join(cur_dir, '..') 15 | data_dir = osp.join(root_dir, 'data') 16 | output_dir = osp.join(root_dir, 'output') 17 | model_dir = osp.join(output_dir, 'model_dump') 18 | vis_dir = osp.join(output_dir, 'vis') 19 | log_dir = osp.join(output_dir, 'log') 20 | result_dir = osp.join(output_dir, 'result') 21 | 22 | ## model setting 23 | resnet_type = 50 # 18, 34, 50, 101, 152 24 | 25 | ## input, output 26 | input_shape = (256, 256) 27 | output_shape = (input_shape[0]//4, input_shape[1]//4) 28 | depth_dim = 64 29 | bbox_3d_shape = (2000, 2000, 2000) # depth, height, width 30 | pixel_mean = (0.485, 0.456, 0.406) 31 | pixel_std = (0.229, 0.224, 0.225) 32 | 33 | ## training config 34 | lr_dec_epoch = [15, 17] 35 | end_epoch = 20 36 | lr = 1e-3 37 | lr_dec_factor = 0.1 38 | optimizer = 'adam' 39 | weight_decay = 1e-5 40 | batch_size = 32 41 | 42 | ## testing config 43 | test_batch_size = 32 44 | flip_test = True 45 | 46 | ## others 47 | num_thread = 20 #8 48 | gpu_ids = '0' 49 | num_gpus = 1 50 | continue_train = False 51 | 52 | def set_args(self, gpu_ids, continue_train=False): 53 | self.gpu_ids = gpu_ids 54 | self.num_gpus = len(self.gpu_ids.split(',')) 55 | self.continue_train = continue_train 56 | os.environ["CUDA_VISIBLE_DEVICES"] = self.gpu_ids 57 | print('>>> Using GPU: {}'.format(self.gpu_ids)) 58 | 59 | cfg = Config() 60 | 61 | sys.path.insert(0, osp.join(cfg.root_dir, 'common')) 62 | from utils.dir_utils import add_pypath, make_folder 63 | add_pypath(osp.join(cfg.data_dir)) 64 | for i in range(len(cfg.trainset)): 65 | add_pypath(osp.join(cfg.data_dir, cfg.trainset[i])) 66 | add_pypath(osp.join(cfg.data_dir, cfg.testset)) 67 | make_folder(cfg.model_dir) 68 | make_folder(cfg.vis_dir) 69 | make_folder(cfg.log_dir) 70 | make_folder(cfg.result_dir) 71 | 72 | -------------------------------------------------------------------------------- /main/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from nets.resnet import ResNetBackbone 3 | from config import cfg 4 | 5 | class HeadNet(nn.Module): 6 | 7 | def __init__(self, joint_num): 8 | self.inplanes = 2048 9 | self.outplanes = 256 10 | 11 | super(HeadNet, self).__init__() 12 | 13 | self.deconv_layers = self._make_deconv_layer(3) 14 | self.final_layer = nn.Conv2d( 15 | in_channels=self.inplanes, 16 | out_channels=joint_num * cfg.depth_dim, 17 | kernel_size=1, 18 | stride=1, 19 | padding=0 20 | ) 21 | 22 | def _make_deconv_layer(self, num_layers): 23 | layers = [] 24 | for i in range(num_layers): 25 | layers.append( 26 | nn.ConvTranspose2d( 27 | in_channels=self.inplanes, 28 | out_channels=self.outplanes, 29 | kernel_size=4, 30 | stride=2, 31 | padding=1, 32 | output_padding=0, 33 | bias=False)) 34 | layers.append(nn.BatchNorm2d(self.outplanes)) 35 | layers.append(nn.ReLU(inplace=True)) 36 | self.inplanes = self.outplanes 37 | 38 | return nn.Sequential(*layers) 39 | 40 | def forward(self, x): 41 | x = self.deconv_layers(x) 42 | x = self.final_layer(x) 43 | 44 | return x 45 | 46 | def init_weights(self): 47 | for name, m in self.deconv_layers.named_modules(): 48 | if isinstance(m, nn.ConvTranspose2d): 49 | nn.init.normal_(m.weight, std=0.001) 50 | elif isinstance(m, nn.BatchNorm2d): 51 | nn.init.constant_(m.weight, 1) 52 | nn.init.constant_(m.bias, 0) 53 | for m in self.final_layer.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | nn.init.normal_(m.weight, std=0.001) 56 | nn.init.constant_(m.bias, 0) 57 | 58 | class ResPoseNet(nn.Module): 59 | def __init__(self, backbone, head): 60 | super(ResPoseNet, self).__init__() 61 | self.backbone = backbone 62 | self.head = head 63 | 64 | def forward(self, x): 65 | x = self.backbone(x) 66 | x = self.head(x) 67 | return x 68 | 69 | def get_pose_net(cfg, is_train, joint_num): 70 | 71 | backbone = ResNetBackbone(cfg.resnet_type) 72 | head_net = HeadNet(joint_num) 73 | if is_train: 74 | backbone.init_weights() 75 | head_net.init_weights() 76 | 77 | model = ResPoseNet(backbone, head_net) 78 | return model 79 | 80 | -------------------------------------------------------------------------------- /common/utils/vis.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | from mpl_toolkits.mplot3d import Axes3D 5 | import matplotlib.pyplot as plt 6 | import matplotlib as mpl 7 | from config import cfg 8 | 9 | def vis_keypoints(img, kps, kps_lines, kp_thresh=0.4, alpha=1): 10 | 11 | # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. 12 | cmap = plt.get_cmap('rainbow') 13 | colors = [cmap(i) for i in np.linspace(0, 1, len(kps_lines) + 2)] 14 | colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors] 15 | 16 | # Perform the drawing on a copy of the image, to allow for blending. 17 | kp_mask = np.copy(img) 18 | 19 | # Draw the keypoints. 20 | for l in range(len(kps_lines)): 21 | i1 = kps_lines[l][0] 22 | i2 = kps_lines[l][1] 23 | p1 = kps[0, i1].astype(np.int32), kps[1, i1].astype(np.int32) 24 | p2 = kps[0, i2].astype(np.int32), kps[1, i2].astype(np.int32) 25 | if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh: 26 | cv2.line( 27 | kp_mask, p1, p2, 28 | color=colors[l], thickness=2, lineType=cv2.LINE_AA) 29 | if kps[2, i1] > kp_thresh: 30 | cv2.circle( 31 | kp_mask, p1, 32 | radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) 33 | if kps[2, i2] > kp_thresh: 34 | cv2.circle( 35 | kp_mask, p2, 36 | radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA) 37 | 38 | # Blend the keypoints. 39 | return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0) 40 | 41 | def vis_3d_skeleton(kpt_3d, kpt_3d_vis, kps_lines, filename=None): 42 | 43 | fig = plt.figure() 44 | ax = fig.add_subplot(111, projection='3d') 45 | 46 | # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv. 47 | cmap = plt.get_cmap('rainbow') 48 | colors = [cmap(i) for i in np.linspace(0, 1, len(kps_lines) + 2)] 49 | colors = [np.array((c[2], c[1], c[0])) for c in colors] 50 | 51 | for l in range(len(kps_lines)): 52 | i1 = kps_lines[l][0] 53 | i2 = kps_lines[l][1] 54 | x = np.array([kpt_3d[i1,0], kpt_3d[i2,0]]) 55 | y = np.array([kpt_3d[i1,1], kpt_3d[i2,1]]) 56 | z = np.array([kpt_3d[i1,2], kpt_3d[i2,2]]) 57 | 58 | if kpt_3d_vis[i1,0] > 0 and kpt_3d_vis[i2,0] > 0: 59 | ax.plot(x, z, -y, c=colors[l], linewidth=2) 60 | if kpt_3d_vis[i1,0] > 0: 61 | ax.scatter(kpt_3d[i1,0], kpt_3d[i1,2], -kpt_3d[i1,1], c=colors[l], marker='o') 62 | if kpt_3d_vis[i2,0] > 0: 63 | ax.scatter(kpt_3d[i2,0], kpt_3d[i2,2], -kpt_3d[i2,1], c=colors[l], marker='o') 64 | 65 | x_r = np.array([0, cfg.input_shape[1]], dtype=np.float32) 66 | y_r = np.array([0, cfg.input_shape[0]], dtype=np.float32) 67 | z_r = np.array([0, 1], dtype=np.float32) 68 | 69 | if filename is None: 70 | ax.set_title('3D vis') 71 | else: 72 | ax.set_title(filename) 73 | 74 | ax.set_xlabel('X Label') 75 | ax.set_ylabel('Z Label') 76 | ax.set_zlabel('Y Label') 77 | #ax.set_xlim([0,cfg.input_shape[1]]) 78 | #ax.set_ylim([0,1]) 79 | #ax.set_zlim([-cfg.input_shape[0],0]) 80 | ax.legend() 81 | 82 | plt.show() 83 | cv2.waitKey(0) 84 | 85 | -------------------------------------------------------------------------------- /common/nets/resnet.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torchvision.models.resnet import BasicBlock, Bottleneck 3 | from torchvision.models.resnet import model_zoo, model_urls 4 | 5 | class ResNetBackbone(nn.Module): 6 | 7 | def __init__(self, resnet_type): 8 | 9 | resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'), 10 | 34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'), 11 | 50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'), 12 | 101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'), 13 | 152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')} 14 | block, layers, channels, name = resnet_spec[resnet_type] 15 | 16 | self.name = name 17 | self.inplanes = 64 18 | super(ResNetBackbone, self).__init__() 19 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 20 | bias=False) 21 | self.bn1 = nn.BatchNorm2d(64) 22 | self.relu = nn.ReLU(inplace=True) 23 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 24 | self.layer1 = self._make_layer(block, 64, layers[0]) 25 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 26 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 27 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 28 | 29 | for m in self.modules(): 30 | if isinstance(m, nn.Conv2d): 31 | # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 32 | nn.init.normal_(m.weight, mean=0, std=0.001) 33 | elif isinstance(m, nn.BatchNorm2d): 34 | nn.init.constant_(m.weight, 1) 35 | nn.init.constant_(m.bias, 0) 36 | 37 | def _make_layer(self, block, planes, blocks, stride=1): 38 | downsample = None 39 | if stride != 1 or self.inplanes != planes * block.expansion: 40 | downsample = nn.Sequential( 41 | nn.Conv2d(self.inplanes, planes * block.expansion, 42 | kernel_size=1, stride=stride, bias=False), 43 | nn.BatchNorm2d(planes * block.expansion), 44 | ) 45 | 46 | layers = [] 47 | layers.append(block(self.inplanes, planes, stride, downsample)) 48 | self.inplanes = planes * block.expansion 49 | for i in range(1, blocks): 50 | layers.append(block(self.inplanes, planes)) 51 | 52 | return nn.Sequential(*layers) 53 | 54 | def forward(self, x): 55 | x = self.conv1(x) 56 | x = self.bn1(x) 57 | x = self.relu(x) 58 | x = self.maxpool(x) 59 | 60 | x = self.layer1(x) 61 | x = self.layer2(x) 62 | x = self.layer3(x) 63 | x = self.layer4(x) 64 | 65 | return x 66 | 67 | def init_weights(self): 68 | org_resnet = model_zoo.load_url(model_urls[self.name]) 69 | # drop orginal resnet fc layer, add 'None' in case of no fc layer, that will raise error 70 | org_resnet.pop('fc.weight', None) 71 | org_resnet.pop('fc.bias', None) 72 | self.load_state_dict(org_resnet) 73 | print("Initialize resnet from model zoo") 74 | 75 | 76 | -------------------------------------------------------------------------------- /main/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from config import cfg 3 | from base import Trainer 4 | import torch.backends.cudnn as cudnn 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser() 8 | parser.add_argument('--gpu', type=str, dest='gpu_ids') 9 | parser.add_argument('--continue', dest='continue_train', action='store_true') 10 | args = parser.parse_args() 11 | 12 | if not args.gpu_ids: 13 | assert 0, print("Input GPU ids") 14 | 15 | if '-' in args.gpu_ids: 16 | gpus = args.gpu_ids.split('-') 17 | gpus[0] = 0 if not gpus[0].isdigit() else int(gpus[0]) 18 | gpus[1] = len(mem_info()) if not gpus[1].isdigit() else int(gpus[1]) + 1 19 | args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus)))) 20 | 21 | return args 22 | 23 | def main(): 24 | 25 | # argument parse and create log 26 | args = parse_args() 27 | cfg.set_args(args.gpu_ids, args.continue_train) 28 | cudnn.fastest = True 29 | cudnn.benchmark = True 30 | cudnn.deterministic = False 31 | cudnn.enabled = True 32 | 33 | trainer = Trainer(cfg) 34 | trainer._make_batch_generator() 35 | trainer._make_model() 36 | 37 | # train 38 | for epoch in range(trainer.start_epoch, cfg.end_epoch): 39 | trainer.scheduler.step() 40 | trainer.tot_timer.tic() 41 | trainer.read_timer.tic() 42 | 43 | for itr, (input_img, joint_img, joint_vis, joints_have_depth) in enumerate(trainer.batch_generator): 44 | trainer.read_timer.toc() 45 | trainer.gpu_timer.tic() 46 | 47 | trainer.optimizer.zero_grad() 48 | 49 | input_img = input_img.cuda() 50 | joint_img = joint_img.cuda() 51 | joint_vis = joint_vis.cuda() 52 | joints_have_depth = joints_have_depth.cuda() 53 | 54 | 55 | # forward 56 | heatmap_out = trainer.model(input_img) 57 | 58 | # backward 59 | JointLocationLoss = trainer.JointLocationLoss(heatmap_out, joint_img, joint_vis, joints_have_depth) 60 | 61 | loss = JointLocationLoss 62 | 63 | loss.backward() 64 | trainer.optimizer.step() 65 | 66 | trainer.gpu_timer.toc() 67 | 68 | screen = [ 69 | 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 70 | 'lr: %g' % (trainer.scheduler.get_lr()[0]), 71 | 'speed: %.2f(%.2fs r%.2f)s/itr' % ( 72 | trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), 73 | '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), 74 | '%s: %.4f' % ('loss_loc', JointLocationLoss.detach()), 75 | ] 76 | trainer.logger.info(' '.join(screen)) 77 | 78 | trainer.tot_timer.toc() 79 | trainer.tot_timer.tic() 80 | trainer.read_timer.tic() 81 | 82 | trainer.save_model({ 83 | 'epoch': epoch, 84 | 'network': trainer.model.state_dict(), 85 | 'optimizer': trainer.optimizer.state_dict(), 86 | 'scheduler': trainer.scheduler.state_dict(), 87 | }, epoch) 88 | 89 | 90 | if __name__ == "__main__": 91 | main() 92 | -------------------------------------------------------------------------------- /main/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import argparse 4 | from tqdm import tqdm 5 | import numpy as np 6 | import cv2 7 | from config import cfg 8 | import torch 9 | from base import Tester 10 | from torch.nn.parallel.scatter_gather import gather 11 | from nets.loss import soft_argmax 12 | from utils.vis import vis_keypoints 13 | from utils.pose_utils import flip 14 | import torch.backends.cudnn as cudnn 15 | 16 | def parse_args(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('--gpu', type=str, dest='gpu_ids') 19 | parser.add_argument('--test_epoch', type=str, dest='test_epoch') 20 | args = parser.parse_args() 21 | 22 | # test gpus 23 | if not args.gpu_ids: 24 | args.gpu_ids = str(np.argmin(mem_info())) 25 | 26 | if '-' in args.gpu_ids: 27 | gpus = args.gpu_ids.split('-') 28 | gpus[0] = 0 if not gpus[0].isdigit() else int(gpus[0]) 29 | gpus[1] = len(mem_info()) if not gpus[1].isdigit() else int(gpus[1]) + 1 30 | args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus)))) 31 | 32 | assert args.test_epoch, 'Test epoch is required.' 33 | return args 34 | 35 | def main(): 36 | 37 | args = parse_args() 38 | cfg.set_args(args.gpu_ids) 39 | cudnn.fastest = True 40 | cudnn.benchmark = True 41 | cudnn.deterministic = False 42 | cudnn.enabled = True 43 | 44 | tester = Tester(cfg, args.test_epoch) 45 | tester._make_batch_generator() 46 | tester._make_model() 47 | 48 | preds = [] 49 | 50 | with torch.no_grad(): 51 | for itr, input_img in enumerate(tqdm(tester.batch_generator)): 52 | 53 | input_img = input_img.cuda() 54 | 55 | # forward 56 | heatmap_out = tester.model(input_img) 57 | if cfg.num_gpus > 1: 58 | heatmap_out = gather(heatmap_out,0) 59 | coord_out = soft_argmax(heatmap_out, tester.joint_num) 60 | 61 | if cfg.flip_test: 62 | flipped_input_img = flip(input_img, dims=3) 63 | flipped_heatmap_out = tester.model(flipped_input_img) 64 | if cfg.num_gpus > 1: 65 | flipped_heatmap_out = gather(flipped_heatmap_out,0) 66 | flipped_coord_out = soft_argmax(flipped_heatmap_out, tester.joint_num) 67 | flipped_coord_out[:, :, 0] = cfg.output_shape[1] - flipped_coord_out[:, :, 0] - 1 68 | for pair in tester.flip_pairs: 69 | flipped_coord_out[:, pair[0], :], flipped_coord_out[:, pair[1], :] = flipped_coord_out[:, pair[1], :].clone(), flipped_coord_out[:, pair[0], :].clone() 70 | coord_out = (coord_out + flipped_coord_out)/2. 71 | 72 | vis = False 73 | if vis: 74 | filename = str(itr) 75 | tmpimg = input_img[0].cpu().numpy() 76 | tmpimg = tmpimg * cfg.pixel_std.reshape(3,1,1) + cfg.pixel_mean.reshape(3,1,1) 77 | tmpimg = tmpimg.astype(np.uint8) 78 | tmpimg = tmpimg[::-1, :, :] 79 | tmpimg = np.transpose(tmpimg,(1,2,0)).copy() 80 | tmpkps = np.zeros((3,tester.joint_num)) 81 | tmpkps[:2,:] = coord_out[0,:,:2].transpose(1,0) / cfg.output_shape[0] * cfg.input_shape[0] 82 | tmpkps[2,:] = 1 83 | tmpimg = vis_keypoints(tmpimg, tmpkps, tester.skeleton) 84 | cv2.imwrite(osp.join(cfg.vis_dir, filename + '_output.jpg'), tmpimg) 85 | 86 | coord_out = coord_out.cpu().numpy() 87 | preds.append(coord_out) 88 | 89 | # evaluate 90 | preds = np.concatenate(preds, axis=0) 91 | tester._evaluate(preds, cfg.result_dir) 92 | 93 | if __name__ == "__main__": 94 | main() 95 | -------------------------------------------------------------------------------- /data/MPII/MPII.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import numpy as np 4 | import json 5 | from pycocotools.coco import COCO 6 | from config import cfg 7 | 8 | class MPII: 9 | 10 | def __init__(self, data_split): 11 | self.data_split = data_split 12 | self.img_dir = osp.join('..', 'data', 'MPII') 13 | self.train_annot_path = osp.join('..', 'data', 'MPII', 'annotations', 'train.json') 14 | self.test_annot_path = osp.join('..', 'data', 'MPII', 'annotations', 'test.json') 15 | self.joint_num = 16 16 | self.joints_name = ('R_Ankle', 'R_Knee', 'R_Hip', 'L_Hip', 'L_Knee', 'L_Ankle', 'Pelvis', 'Thorax', 'Neck', 'Head', 'R_Wrist', 'R_Elbow', 'R_Shoulder', 'L_Shoulder', 'L_Elbow', 'L_Wrist') 17 | self.flip_pairs = ( (0, 5), (1, 4), (2, 3), (10, 15), (11, 14), (12, 13) ) 18 | self.skeleton = ( (0, 1), (1, 2), (2, 6), (7, 12), (12, 11), (11, 10), (5, 4), (4, 3), (3, 6), (7, 13), (13, 14), (14, 15), (6, 7), (7, 8), (8, 9) ) 19 | self.lr_skeleton = ( ((0,0),(0,0)) ) # dumy value 20 | 21 | self.joints_have_depth = False 22 | 23 | def load_data(self): 24 | 25 | if self.data_split == 'train': 26 | db = COCO(self.train_annot_path) 27 | elif self.data_split == 'test': 28 | db = COCO(self.test_annot_path) 29 | else: 30 | print('Unknown data subset') 31 | assert 0 32 | 33 | data = [] 34 | for aid in db.anns.keys(): 35 | ann = db.anns[aid] 36 | 37 | if (ann['image_id'] not in db.imgs) or ann['iscrowd'] or (ann['num_keypoints'] == 0): 38 | continue 39 | 40 | # sanitize bboxes 41 | x, y, w, h = ann['bbox'] 42 | img = db.loadImgs(ann['image_id'])[0] 43 | width, height = img['width'], img['height'] 44 | x1 = np.max((0, x)) 45 | y1 = np.max((0, y)) 46 | x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) 47 | y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) 48 | if ann['area'] > 0 and x2 >= x1 and y2 >= y1: 49 | bbox = np.array([x1, y1, x2-x1, y2-y1]) 50 | else: 51 | continue 52 | 53 | # aspect ratio preserving bbox 54 | w = bbox[2] 55 | h = bbox[3] 56 | c_x = bbox[0] + w/2. 57 | c_y = bbox[1] + h/2. 58 | aspect_ratio = cfg.input_shape[1]/cfg.input_shape[0] 59 | if w > aspect_ratio * h: 60 | h = w / aspect_ratio 61 | elif w < aspect_ratio * h: 62 | w = h * aspect_ratio 63 | bbox[2] = w#*1.25 64 | bbox[3] = h#*1.25 65 | bbox[0] = c_x - bbox[2]/2. 66 | bbox[1] = c_y - bbox[3]/2. 67 | 68 | # joints and vis 69 | if self.data_split == 'train': 70 | joint_img = np.array(ann['keypoints']).reshape(self.joint_num,3) 71 | joint_vis = joint_img[:,2].copy().reshape(-1,1) 72 | joint_img[:,2] = 0 73 | else: 74 | joint_img = np.zeros((self.joint_num, 3), dtype=np.float) 75 | joint_vis = np.zeros((self.joint_num, 1), dtype=np.float) 76 | 77 | imgname = db.imgs[ann['image_id']]['file_name'] 78 | img_path = osp.join(self.img_dir, imgname) 79 | data.append({ 80 | 'img_path': img_path, 81 | 'bbox': bbox, 82 | 'joint_img': joint_img, # [org_img_x, org_img_y, 0] 83 | 'joint_cam': np.ones(joint_img.shape), # dummy value 84 | 'joint_vis': joint_vis, 85 | 'center_cam': np.ones(3), # dummy value 86 | 'f': np.ones(2), # dummy value 87 | 'c': np.ones(2) # dummy value 88 | }) 89 | 90 | return data 91 | 92 | def evaluate(self, preds, result_dir): 93 | print('MPII evaluation not supported') 94 | 95 | 96 | -------------------------------------------------------------------------------- /common/utils/pose_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from config import cfg 4 | import copy 5 | 6 | def cam2pixel(cam_coord, f, c): 7 | 8 | x = cam_coord[..., 0] / cam_coord[..., 2] * f[0] + c[0] 9 | y = cam_coord[..., 1] / cam_coord[..., 2] * f[1] + c[1] 10 | z = cam_coord[..., 2] 11 | 12 | return x,y,z 13 | 14 | def pixel2cam(pixel_coord, f, c): 15 | 16 | x = (pixel_coord[..., 0] - c[0]) / f[0] * pixel_coord[..., 2] 17 | y = (pixel_coord[..., 1] - c[1]) / f[1] * pixel_coord[..., 2] 18 | z = pixel_coord[..., 2] 19 | 20 | return x,y,z 21 | 22 | def rigid_transform_3D(A, B): 23 | centroid_A = np.mean(A, axis = 0) 24 | centroid_B = np.mean(B, axis = 0) 25 | H = np.dot(np.transpose(A - centroid_A), B - centroid_B) 26 | U, s, V = np.linalg.svd(H) 27 | R = np.dot(np.transpose(V), np.transpose(U)) 28 | if np.linalg.det(R) < 0: 29 | V[2] = -V[2] 30 | R = np.dot(np.transpose(V), np.transpose(U)) 31 | t = -np.dot(R, np.transpose(centroid_A)) + np.transpose(centroid_B) 32 | return R, t 33 | 34 | def rigid_align(A, B): 35 | R, t = rigid_transform_3D(A, B) 36 | A2 = np.transpose(np.dot(R, np.transpose(A))) + t 37 | return A2 38 | 39 | def process_world_coordinate(joint_world, root_idx, joint_num, R, T, f, c): 40 | 41 | # project world coordinates to image space 42 | joint_cam = np.zeros((joint_num, 3)) 43 | for i in range(joint_num): 44 | joint_cam[i] = np.dot(R, joint_world[i] - T) 45 | center_cam = joint_cam[root_idx] 46 | 47 | # Subtract center depth 48 | joint_img = np.zeros((joint_num, 3)) 49 | joint_img[:, 0], joint_img[:, 1], joint_img[:, 2] = cam2pixel(joint_cam, f, c) 50 | joint_img[:, 2] = joint_img[:, 2] - center_cam[2] 51 | joint_vis = np.ones((joint_num,1)) 52 | 53 | ## bbox3d 54 | # build 3D bounding box centered on center_cam, sized with bbox_3d_shape 55 | bbox3d_lt = center_cam - np.array([cfg.bbox_3d_shape[2] / 2, cfg.bbox_3d_shape[1] / 2, 0]) 56 | bbox3d_rb = center_cam + np.array([cfg.bbox_3d_shape[2] / 2, cfg.bbox_3d_shape[1] / 2, 0]) 57 | 58 | # back-project 3D BBox to 2D image 59 | bbox2d_l, bbox2d_t, _ = cam2pixel(bbox3d_lt, f, c) 60 | bbox2d_r, bbox2d_b, _ = cam2pixel(bbox3d_rb, f, c) 61 | bbox = np.array([bbox2d_l, bbox2d_t, bbox2d_r-bbox2d_l+1, bbox2d_b-bbox2d_t+1]) 62 | 63 | return joint_img, joint_cam, joint_vis, center_cam, bbox 64 | 65 | def warp_coord_to_original(joint_out, bbox, center_cam): 66 | 67 | # joint_out: output from soft-argmax 68 | x = joint_out[:, 0] / cfg.output_shape[1] * bbox[2] + bbox[0] 69 | y = joint_out[:, 1] / cfg.output_shape[0] * bbox[3] + bbox[1] 70 | z = (joint_out[:, 2] / cfg.depth_dim * 2. - 1.) * (cfg.bbox_3d_shape[0]/2.) + center_cam[2] 71 | 72 | return x, y, z 73 | 74 | def fliplr_joints(_joints, width, matched_parts): 75 | """ 76 | flip coords 77 | joints: numpy array, nJoints * dim, dim == 2 [x, y] or dim == 3 [x, y, z] 78 | width: image width 79 | matched_parts: list of pairs 80 | """ 81 | joints = _joints.copy() 82 | # Flip horizontal 83 | joints[:, 0] = width - joints[:, 0] - 1 84 | 85 | # Change left-right parts 86 | for pair in matched_parts: 87 | joints[pair[0], :], joints[pair[1], :] = joints[pair[1], :], joints[pair[0], :].copy() 88 | 89 | return joints 90 | 91 | def multi_meshgrid(*args): 92 | """ 93 | Creates a meshgrid from possibly many 94 | elements (instead of only 2). 95 | Returns a nd tensor with as many dimensions 96 | as there are arguments 97 | """ 98 | args = list(args) 99 | template = [1 for _ in args] 100 | for i in range(len(args)): 101 | n = args[i].shape[0] 102 | template_copy = template.copy() 103 | template_copy[i] = n 104 | args[i] = args[i].view(*template_copy) 105 | # there will be some broadcast magic going on 106 | return tuple(args) 107 | 108 | 109 | def flip(tensor, dims): 110 | if not isinstance(dims, (tuple, list)): 111 | dims = [dims] 112 | indices = [torch.arange(tensor.shape[dim] - 1, -1, -1, 113 | dtype=torch.int64) for dim in dims] 114 | multi_indices = multi_meshgrid(*indices) 115 | final_indices = [slice(i) for i in tensor.shape] 116 | for i, dim in enumerate(dims): 117 | final_indices[dim] = multi_indices[i] 118 | flipped = tensor[final_indices] 119 | assert flipped.device == tensor.device 120 | assert flipped.requires_grad == tensor.requires_grad 121 | return flipped 122 | 123 | -------------------------------------------------------------------------------- /tool/preprocess_h36m.m: -------------------------------------------------------------------------------- 1 | % Preprocess human3.6m dataset 2 | % Place this file to the Release-v1.1 folder and run it 3 | 4 | function preprocess_h36m() 5 | 6 | close all; 7 | %clear; 8 | %clc; 9 | 10 | addpaths; 11 | 12 | %-------------------------------------------------------------------------- 13 | % PARAMETERS 14 | 15 | % Subject (1, 5, 6, 7, 8, 9, 11) 16 | SUBJECT = [1 5 6 7 8 9 11]; 17 | 18 | % Action (2 ~ 16) 19 | ACTION = 2:16; 20 | 21 | % Subaction (1 ~ 2) 22 | SUBACTION = 1:2; 23 | 24 | % Camera (1 ~ 4) 25 | CAMERA = 1:4; 26 | 27 | num_joint = 17; 28 | root_dir = 'SET_YOUR_OUTPUT_DIRECTORY'; %you have to set your output directory 29 | 30 | % if rgb sequence is declared in the loop, it causes stuck (do not know 31 | % reason) 32 | rgb_sequence = cell(1,100000000); 33 | COUNT = 1; 34 | %-------------------------------------------------------------------------- 35 | % MAIN LOOP 36 | % For each subject, action, subaction, and camera.. 37 | for subject = SUBJECT 38 | for action = ACTION 39 | for subaction = SUBACTION 40 | for camera = CAMERA 41 | 42 | fprintf('Processing subject %d, action %d, subaction %d, camera %d..\n', ... 43 | subject, action, subaction, camera); 44 | 45 | save_dir = sprintf('%s/s_%02d_act_%02d_subact_%02d_ca_%02d', ... 46 | root_dir, subject, action, subaction, camera); 47 | if ~exist(save_dir, 'dir') 48 | mkdir(save_dir); 49 | end 50 | 51 | if (subject==11) && (action==2) && (subaction==2) && (camera==1) 52 | fprintf('There is an error in subject 11, action 2, subaction 2, and camera 1\n'); 53 | continue; 54 | end 55 | 56 | % Select sequence 57 | Sequence = H36MSequence(subject, action, subaction, camera); 58 | 59 | % Get 3D pose and 2D pose 60 | Features{1} = H36MPose3DPositionsFeature(); % 3D world coordinates 61 | Features{1}.Part = 'body'; % Only consider 17 joints 62 | Features{2} = H36MPose3DPositionsFeature('Monocular', true); % 3D camera coordinates 63 | Features{2}.Part = 'body'; % Only consider 17 joints 64 | Features{3} = H36MPose2DPositionsFeature(); % 2D image coordinates 65 | Features{3}.Part = 'body'; % Only consider 17 joints 66 | F = H36MComputeFeatures(Sequence, Features); 67 | num_frame = Sequence.NumFrames; 68 | pose3d_world = reshape(F{1}, num_frame, 3, num_joint); 69 | pose3d = reshape(F{2}, num_frame, 3, num_joint); 70 | pose2d = reshape(F{3}, num_frame, 2, num_joint); 71 | 72 | % Camera (in global coordinate) 73 | Camera = Sequence.getCamera(); 74 | 75 | % Sanity check 76 | if false 77 | R = Camera.R; % rotation matrix 78 | T = Camera.T'; % origin of the world coord system 79 | K = [Camera.f(1) 0 Camera.c(1); 80 | 0 Camera.f(2) Camera.c(2); 81 | 0 0 1]; % f: focal length, c: principal points 82 | error = 0; 83 | for i = 1:num_frame 84 | X = squeeze(pose3d_global(i,:,:)); 85 | x = squeeze(pose2d(i,:,:)); 86 | px = K*R*(X-T); 87 | px = px ./ px(3,:); 88 | px = px(1:2,:); 89 | error = error + mean(sqrt(sum((px-x).^2, 1))); 90 | end 91 | error = error / num_frame; 92 | fprintf('reprojection error = %.2f (pixels)\n', error); 93 | keyboard; 94 | end 95 | 96 | %% Image, bounding box for each sampled frame 97 | fprintf('Load RGB video: '); 98 | rgb_extractor = H36MRGBVideoFeature(); 99 | rgb_sequence{COUNT} = rgb_extractor.serializer(Sequence); 100 | fprintf('Done!!\n'); 101 | img_height = zeros(num_frame,1); 102 | img_width = zeros(num_frame,1); 103 | 104 | % For each frame, 105 | for i = 1:num_frame 106 | if mod(i,100) == 1 107 | fprintf('.'); 108 | end 109 | 110 | % Save image 111 | % Get data 112 | img = rgb_sequence{COUNT}.getFrame(i); 113 | [h, w, c] = size(img); 114 | img_height(i) = h; 115 | img_width(i) = w; 116 | img_name = sprintf('%s/s_%02d_act_%02d_subact_%02d_ca_%02d_%06d.jpg', ... 117 | save_dir, subject, action, subaction, camera, i); 118 | imwrite(img, img_name); 119 | 120 | end 121 | 122 | COUNT = COUNT + 1; 123 | 124 | % Save data 125 | pose3d_world = permute(pose3d_world,[1,3,2]); % world coordinate 3D keypoint coordinates 126 | R = Camera.R; % rotation matrix 127 | T = Camera.T; % origin of the world coord system 128 | f = Camera.f; % focal length 129 | c = Camera.c; % principal points 130 | filename = sprintf('%s/h36m_meta.mat', save_dir); 131 | save(filename, 'pose3d_world', 'f', 'c', 'R', 'T', 'img_height', 'img_width'); 132 | 133 | fprintf('\n'); 134 | 135 | end 136 | end 137 | end 138 | end 139 | 140 | end 141 | 142 | -------------------------------------------------------------------------------- /common/base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import math 4 | import time 5 | import glob 6 | import abc 7 | from torch.utils.data import DataLoader 8 | import torch.optim 9 | import torchvision.transforms as transforms 10 | 11 | from config import cfg 12 | from dataset import DatasetLoader 13 | from timer import Timer 14 | from logger import colorlogger 15 | from nets.balanced_parallel import DataParallelModel, DataParallelCriterion 16 | from model import get_pose_net 17 | from nets import loss 18 | 19 | # dynamic dataset import 20 | for i in range(len(cfg.trainset)): 21 | exec('from ' + cfg.trainset[i] + ' import ' + cfg.trainset[i]) 22 | exec('from ' + cfg.testset + ' import ' + cfg.testset) 23 | 24 | class Base(object): 25 | __metaclass__ = abc.ABCMeta 26 | 27 | def __init__(self, cfg, log_name='logs.txt'): 28 | 29 | self.cfg = cfg 30 | self.cur_epoch = 0 31 | 32 | # timer 33 | self.tot_timer = Timer() 34 | self.gpu_timer = Timer() 35 | self.read_timer = Timer() 36 | 37 | # logger 38 | self.logger = colorlogger(cfg.log_dir, log_name=log_name) 39 | 40 | @abc.abstractmethod 41 | def _make_batch_generator(self): 42 | return 43 | 44 | @abc.abstractmethod 45 | def _make_model(self): 46 | return 47 | 48 | def save_model(self, state, epoch): 49 | file_path = osp.join(self.cfg.model_dir,'snapshot_{}.pth.tar'.format(str(epoch))) 50 | torch.save(state, file_path) 51 | self.logger.info("Write snapshot into {}".format(file_path)) 52 | 53 | def load_model(self, model, optimizer, scheduler): 54 | model_file_list = glob.glob(osp.join(self.cfg.model_dir,'*.pth.tar')) 55 | cur_epoch = max([int(file_name[file_name.find('snapshot_') + 9 : file_name.find('.pth.tar')]) for file_name in model_file_list]) 56 | ckpt = torch.load(osp.join(self.cfg.model_dir, 'snapshot_' + str(cur_epoch) + '.pth.tar')) 57 | start_epoch = ckpt['epoch'] + 1 58 | model.load_state_dict(ckpt['network']) 59 | optimizer.load_state_dict(ckpt['optimizer']) 60 | scheduler.load_state_dict(ckpt['scheduler']) 61 | 62 | return start_epoch, model, optimizer, scheduler 63 | 64 | 65 | class Trainer(Base): 66 | 67 | def __init__(self, cfg): 68 | self.JointLocationLoss = DataParallelCriterion(loss.JointLocationLoss()) 69 | super(Trainer, self).__init__(cfg, log_name = 'train_logs.txt') 70 | 71 | def get_optimizer(self, optimizer_name, model): 72 | if optimizer_name == 'adam': 73 | optimizer = torch.optim.Adam(model.parameters(), lr=self.cfg.lr) 74 | elif optimizer_name == 'sgd': 75 | optimizer = torch.optim.SGD(model.parameters(), lr=self.cfg.lr, momentum=self.cfg.momentum, weight_decay=self.cfg.wd) 76 | else: 77 | print("Error! Unknown optimizer name: ", optimizer_name) 78 | assert 0 79 | 80 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.cfg.lr_dec_epoch, gamma=self.cfg.lr_dec_factor) 81 | return optimizer, scheduler 82 | 83 | def _make_batch_generator(self): 84 | # data load and construct batch generator 85 | self.logger.info("Creating dataset...") 86 | trainset_list = [] 87 | for i in range(len(self.cfg.trainset)): 88 | trainset_list.append(eval(self.cfg.trainset[i])("train")) 89 | trainset_loader = DatasetLoader(trainset_list, True, transforms.Compose([\ 90 | transforms.ToTensor(), 91 | transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)]\ 92 | )) 93 | batch_generator = DataLoader(dataset=trainset_loader, batch_size=self.cfg.num_gpus*self.cfg.batch_size, shuffle=True, num_workers=self.cfg.num_thread, pin_memory=True) 94 | 95 | self.joint_num = trainset_loader.joint_num[0] 96 | self.itr_per_epoch = math.ceil(trainset_loader.__len__() / cfg.num_gpus / cfg.batch_size) 97 | self.batch_generator = batch_generator 98 | 99 | def _make_model(self): 100 | # prepare network 101 | self.logger.info("Creating graph and optimizer...") 102 | model = get_pose_net(self.cfg, True, self.joint_num) 103 | model = DataParallelModel(model).cuda() 104 | optimizer, scheduler = self.get_optimizer(self.cfg.optimizer, model) 105 | if self.cfg.continue_train: 106 | start_epoch, model, optimizer, scheduler = self.load_model(model, optimizer, scheduler) 107 | else: 108 | start_epoch = 0 109 | model.train() 110 | 111 | self.start_epoch = start_epoch 112 | self.model = model 113 | self.optimizer = optimizer 114 | self.scheduler = scheduler 115 | 116 | class Tester(Base): 117 | 118 | def __init__(self, cfg, test_epoch): 119 | self.coord_out = loss.soft_argmax 120 | self.test_epoch = int(test_epoch) 121 | super(Tester, self).__init__(cfg, log_name = 'test_logs.txt') 122 | 123 | def _make_batch_generator(self): 124 | # data load and construct batch generator 125 | self.logger.info("Creating dataset...") 126 | testset = eval(self.cfg.testset)("test") 127 | testset_loader = DatasetLoader(testset, False, transforms.Compose([\ 128 | transforms.ToTensor(), 129 | transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)]\ 130 | )) 131 | batch_generator = DataLoader(dataset=testset_loader, batch_size=self.cfg.num_gpus*self.cfg.test_batch_size, shuffle=False, num_workers=self.cfg.num_thread, pin_memory=True) 132 | 133 | self.testset = testset 134 | self.joint_num = testset_loader.joint_num 135 | self.skeleton = testset_loader.skeleton 136 | self.flip_pairs = testset.flip_pairs 137 | self.tot_sample_num = testset_loader.__len__() 138 | self.batch_generator = batch_generator 139 | 140 | def _make_model(self): 141 | 142 | model_path = os.path.join(self.cfg.model_dir, 'snapshot_%d.pth.tar' % self.test_epoch) 143 | assert os.path.exists(model_path), 'Cannot find model at ' + model_path 144 | self.logger.info('Load checkpoint from {}'.format(model_path)) 145 | 146 | # prepare network 147 | self.logger.info("Creating graph...") 148 | model = get_pose_net(self.cfg, False, self.joint_num) 149 | model = DataParallelModel(model).cuda() 150 | ckpt = torch.load(model_path) 151 | model.load_state_dict(ckpt['network']) 152 | model.eval() 153 | 154 | self.model = model 155 | 156 | def _evaluate(self, preds, result_save_path): 157 | self.testset.evaluate(preds, result_save_path) 158 | 159 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation 2 |
3 |
4 |