├── common
    ├── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── vis.cpython-36.pyc
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── dir_utils.cpython-36.pyc
    │   │   └── pose_utils.cpython-36.pyc
    │   ├── dir_utils.py
    │   ├── vis.py
    │   └── pose_utils.py
    ├── __pycache__
    │   ├── base.cpython-36.pyc
    │   ├── logger.cpython-36.pyc
    │   ├── timer.cpython-36.pyc
    │   └── loss_recorder.cpython-36.pyc
    ├── nets
    │   ├── __pycache__
    │   │   ├── loss.cpython-36.pyc
    │   │   ├── resnet.cpython-36.pyc
    │   │   └── balanced_parallel.cpython-36.pyc
    │   ├── loss.py
    │   ├── resnet.py
    │   └── balanced_parallel.py
    ├── timer.py
    ├── logger.py
    └── base.py
├── assets
    ├── 1.png
    └── 2.png
├── main
    ├── __pycache__
    │   ├── model.cpython-36.pyc
    │   └── config.cpython-36.pyc
    ├── config.py
    ├── model.py
    ├── train.py
    └── test.py
├── data
    ├── __pycache__
    │   └── dataset.cpython-36.pyc
    ├── MPII
    │   ├── __pycache__
    │   │   └── MPII.cpython-36.pyc
    │   └── MPII.py
    ├── Human36M
    │   ├── __pycache__
    │   │   └── Human36M.cpython-36.pyc
    │   └── Human36M.py
    └── dataset.py
├── tool
    └── preprocess_h36m.m
└── README.md


/common/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/assets/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/assets/1.png


--------------------------------------------------------------------------------
/assets/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/assets/2.png


--------------------------------------------------------------------------------
/main/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/main/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/common/__pycache__/base.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/base.cpython-36.pyc


--------------------------------------------------------------------------------
/common/__pycache__/logger.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/logger.cpython-36.pyc


--------------------------------------------------------------------------------
/common/__pycache__/timer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/timer.cpython-36.pyc


--------------------------------------------------------------------------------
/data/__pycache__/dataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/__pycache__/dataset.cpython-36.pyc


--------------------------------------------------------------------------------
/main/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/main/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/data/MPII/__pycache__/MPII.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/MPII/__pycache__/MPII.cpython-36.pyc


--------------------------------------------------------------------------------
/common/nets/__pycache__/loss.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/loss.cpython-36.pyc


--------------------------------------------------------------------------------
/common/nets/__pycache__/resnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/resnet.cpython-36.pyc


--------------------------------------------------------------------------------
/common/utils/__pycache__/vis.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/vis.cpython-36.pyc


--------------------------------------------------------------------------------
/common/__pycache__/loss_recorder.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/__pycache__/loss_recorder.cpython-36.pyc


--------------------------------------------------------------------------------
/common/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/common/utils/__pycache__/dir_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/dir_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/common/utils/__pycache__/pose_utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/utils/__pycache__/pose_utils.cpython-36.pyc


--------------------------------------------------------------------------------
/data/Human36M/__pycache__/Human36M.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/data/Human36M/__pycache__/Human36M.cpython-36.pyc


--------------------------------------------------------------------------------
/common/nets/__pycache__/balanced_parallel.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/HEAD/common/nets/__pycache__/balanced_parallel.cpython-36.pyc


--------------------------------------------------------------------------------
/common/utils/dir_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | def make_folder(folder_name):
 5 |     if not os.path.exists(folder_name):
 6 |         os.makedirs(folder_name)
 7 | 
 8 | def add_pypath(path):
 9 |     if path not in sys.path:
10 |         sys.path.insert(0, path)
11 | 
12 | 


--------------------------------------------------------------------------------
/common/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 |         self.warm_up = 0
19 | 
20 |     def tic(self):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         self.start_time = time.time()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.time() - self.start_time
27 |         if self.warm_up < 10:
28 |             self.warm_up += 1
29 |             return self.diff
30 |         else:
31 |             self.total_time += self.diff
32 |             self.calls += 1
33 |             self.average_time = self.total_time / self.calls
34 | 
35 |         if average:
36 |             return self.average_time
37 |         else:
38 |             return self.diff
39 | 


--------------------------------------------------------------------------------
/common/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | 
 4 | OK = '\033[92m'
 5 | WARNING = '\033[93m'
 6 | FAIL = '\033[91m'
 7 | END = '\033[0m'
 8 | 
 9 | PINK = '\033[95m'
10 | BLUE = '\033[94m'
11 | GREEN = OK
12 | RED = FAIL
13 | WHITE = END
14 | YELLOW = WARNING
15 | 
16 | class colorlogger():
17 |     def __init__(self, log_dir, log_name='train_logs.txt'):
18 |         # set log
19 |         self._logger = logging.getLogger(log_name)
20 |         self._logger.setLevel(logging.INFO)
21 |         log_file = os.path.join(log_dir, log_name)
22 |         if not os.path.exists(log_dir):
23 |             os.makedirs(log_dir)
24 |         file_log = logging.FileHandler(log_file, mode='a')
25 |         file_log.setLevel(logging.INFO)
26 |         console_log = logging.StreamHandler()
27 |         console_log.setLevel(logging.INFO)
28 |         formatter = logging.Formatter(
29 |             "{}%(asctime)s{} %(message)s".format(GREEN, END),
30 |             "%m-%d %H:%M:%S")
31 |         file_log.setFormatter(formatter)
32 |         console_log.setFormatter(formatter)
33 |         self._logger.addHandler(file_log)
34 |         self._logger.addHandler(console_log)
35 | 
36 |     def debug(self, msg):
37 |         self._logger.debug(str(msg))
38 | 
39 |     def info(self, msg):
40 |         self._logger.info(str(msg))
41 | 
42 |     def warning(self, msg):
43 |         self._logger.warning(WARNING + 'WRN: ' + str(msg) + END)
44 | 
45 |     def critical(self, msg):
46 |         self._logger.critical(RED + 'CRI: ' + str(msg) + END)
47 | 
48 |     def error(self, msg):
49 |         self._logger.error(RED + 'ERR: ' + str(msg) + END)
50 | 
51 | 


--------------------------------------------------------------------------------
/common/nets/loss.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.nn import functional as F
 5 | from utils.pose_utils import pixel2cam, warp_coord_to_original
 6 | from config import cfg
 7 | 
 8 | def _assert_no_grad(tensor):
 9 |     assert not tensor.requires_grad, \
10 |         "nn criterions don't compute the gradient w.r.t. targets - please " \
11 |         "mark these tensors as not requiring gradients"
12 | 
13 | def soft_argmax(heatmaps, joint_num):
14 |     assert isinstance(heatmaps, torch.Tensor)
15 | 
16 |     heatmaps = heatmaps.reshape((-1, joint_num, cfg.depth_dim*cfg.output_shape[0]*cfg.output_shape[1]))
17 |     heatmaps = F.softmax(heatmaps, 2)
18 |     heatmaps = heatmaps.reshape((-1, joint_num, cfg.depth_dim, cfg.output_shape[0], cfg.output_shape[1]))
19 | 
20 |     accu_x = heatmaps.sum(dim=(2,3))
21 |     accu_y = heatmaps.sum(dim=(2,4))
22 |     accu_z = heatmaps.sum(dim=(3,4))
23 | 
24 |     accu_x = accu_x * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[1]+1).type(torch.cuda.FloatTensor), devices=[accu_x.device.index])[0]
25 |     accu_y = accu_y * torch.cuda.comm.broadcast(torch.arange(1,cfg.output_shape[0]+1).type(torch.cuda.FloatTensor), devices=[accu_y.device.index])[0]
26 |     accu_z = accu_z * torch.cuda.comm.broadcast(torch.arange(1,cfg.depth_dim+1).type(torch.cuda.FloatTensor), devices=[accu_z.device.index])[0]
27 | 
28 |     accu_x = accu_x.sum(dim=2, keepdim=True) -1
29 |     accu_y = accu_y.sum(dim=2, keepdim=True) -1
30 |     accu_z = accu_z.sum(dim=2, keepdim=True) -1
31 | 
32 |     coord_out = torch.cat((accu_x, accu_y, accu_z), dim=2)
33 | 
34 |     return coord_out
35 | 
36 | class JointLocationLoss(nn.Module):
37 |     def __init__(self):
38 |         super(JointLocationLoss, self).__init__()
39 | 
40 |     def forward(self, heatmap_out, gt_coord, gt_vis, gt_have_depth):
41 |         
42 |         joint_num = gt_coord.shape[1]
43 |         coord_out = soft_argmax(heatmap_out, joint_num)
44 | 
45 |         _assert_no_grad(gt_coord)
46 |         _assert_no_grad(gt_vis)
47 |         _assert_no_grad(gt_have_depth)
48 | 
49 |         loss = torch.abs(coord_out - gt_coord) * gt_vis
50 |         loss = (loss[:,:,0] + loss[:,:,1] + loss[:,:,2] * gt_have_depth)/3.
51 | 
52 |         return loss.mean()
53 | 
54 | 


--------------------------------------------------------------------------------
/main/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import sys
 4 | import numpy as np
 5 | 
 6 | class Config:
 7 |     
 8 |     ## dataset
 9 |     trainset = ['Human36M', 'MPII'] # Human36M, MPII. !!Note that 0th db is reference db!!
10 |     testset = 'Human36M' # Human36M, MPII
11 | 
12 |     ## directory
13 |     cur_dir = osp.dirname(os.path.abspath(__file__))
14 |     root_dir = osp.join(cur_dir, '..')
15 |     data_dir = osp.join(root_dir, 'data')
16 |     output_dir = osp.join(root_dir, 'output')
17 |     model_dir = osp.join(output_dir, 'model_dump')
18 |     vis_dir = osp.join(output_dir, 'vis')
19 |     log_dir = osp.join(output_dir, 'log')
20 |     result_dir = osp.join(output_dir, 'result')
21 |  
22 |     ## model setting
23 |     resnet_type = 50 # 18, 34, 50, 101, 152
24 |     
25 |     ## input, output
26 |     input_shape = (256, 256) 
27 |     output_shape = (input_shape[0]//4, input_shape[1]//4)
28 |     depth_dim = 64
29 |     bbox_3d_shape = (2000, 2000, 2000) # depth, height, width
30 |     pixel_mean = (0.485, 0.456, 0.406)
31 |     pixel_std = (0.229, 0.224, 0.225)
32 | 
33 |     ## training config
34 |     lr_dec_epoch = [15, 17]
35 |     end_epoch = 20
36 |     lr = 1e-3
37 |     lr_dec_factor = 0.1
38 |     optimizer = 'adam'
39 |     weight_decay = 1e-5
40 |     batch_size = 32
41 | 
42 |     ## testing config
43 |     test_batch_size = 32
44 |     flip_test = True
45 | 
46 |     ## others
47 |     num_thread = 20 #8
48 |     gpu_ids = '0'
49 |     num_gpus = 1
50 |     continue_train = False
51 | 
52 |     def set_args(self, gpu_ids, continue_train=False):
53 |         self.gpu_ids = gpu_ids
54 |         self.num_gpus = len(self.gpu_ids.split(','))
55 |         self.continue_train = continue_train
56 |         os.environ["CUDA_VISIBLE_DEVICES"] = self.gpu_ids
57 |         print('>>> Using GPU: {}'.format(self.gpu_ids))
58 | 
59 | cfg = Config()
60 | 
61 | sys.path.insert(0, osp.join(cfg.root_dir, 'common'))
62 | from utils.dir_utils import add_pypath, make_folder
63 | add_pypath(osp.join(cfg.data_dir))
64 | for i in range(len(cfg.trainset)):
65 |     add_pypath(osp.join(cfg.data_dir, cfg.trainset[i]))
66 | add_pypath(osp.join(cfg.data_dir, cfg.testset))
67 | make_folder(cfg.model_dir)
68 | make_folder(cfg.vis_dir)
69 | make_folder(cfg.log_dir)
70 | make_folder(cfg.result_dir)
71 | 
72 | 


--------------------------------------------------------------------------------
/main/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from nets.resnet import ResNetBackbone
 3 | from config import cfg
 4 | 
 5 | class HeadNet(nn.Module):
 6 | 
 7 |     def __init__(self, joint_num):
 8 |         self.inplanes = 2048
 9 |         self.outplanes = 256
10 | 
11 |         super(HeadNet, self).__init__()
12 | 
13 |         self.deconv_layers = self._make_deconv_layer(3)
14 |         self.final_layer = nn.Conv2d(
15 |             in_channels=self.inplanes,
16 |             out_channels=joint_num * cfg.depth_dim,
17 |             kernel_size=1,
18 |             stride=1,
19 |             padding=0
20 |         )
21 | 
22 |     def _make_deconv_layer(self, num_layers):
23 |         layers = []
24 |         for i in range(num_layers):
25 |             layers.append(
26 |                 nn.ConvTranspose2d(
27 |                     in_channels=self.inplanes,
28 |                     out_channels=self.outplanes,
29 |                     kernel_size=4,
30 |                     stride=2,
31 |                     padding=1,
32 |                     output_padding=0,
33 |                     bias=False))
34 |             layers.append(nn.BatchNorm2d(self.outplanes))
35 |             layers.append(nn.ReLU(inplace=True))
36 |             self.inplanes = self.outplanes
37 | 
38 |         return nn.Sequential(*layers)
39 | 
40 |     def forward(self, x):
41 |         x = self.deconv_layers(x)
42 |         x = self.final_layer(x)
43 | 
44 |         return x
45 | 
46 |     def init_weights(self):
47 |         for name, m in self.deconv_layers.named_modules():
48 |             if isinstance(m, nn.ConvTranspose2d):
49 |                 nn.init.normal_(m.weight, std=0.001)
50 |             elif isinstance(m, nn.BatchNorm2d):
51 |                 nn.init.constant_(m.weight, 1)
52 |                 nn.init.constant_(m.bias, 0)
53 |         for m in self.final_layer.modules():
54 |             if isinstance(m, nn.Conv2d):
55 |                 nn.init.normal_(m.weight, std=0.001)
56 |                 nn.init.constant_(m.bias, 0)
57 | 
58 | class ResPoseNet(nn.Module):
59 |     def __init__(self, backbone, head):
60 |         super(ResPoseNet, self).__init__()
61 |         self.backbone = backbone
62 |         self.head = head
63 | 
64 |     def forward(self, x):
65 |         x = self.backbone(x)
66 |         x = self.head(x)
67 |         return x
68 | 
69 | def get_pose_net(cfg, is_train, joint_num):
70 |     
71 |     backbone = ResNetBackbone(cfg.resnet_type)
72 |     head_net = HeadNet(joint_num)
73 |     if is_train:
74 |         backbone.init_weights()
75 |         head_net.init_weights()
76 | 
77 |     model = ResPoseNet(backbone, head_net)
78 |     return model
79 | 
80 | 


--------------------------------------------------------------------------------
/common/utils/vis.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | from mpl_toolkits.mplot3d import Axes3D
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib as mpl
 7 | from config import cfg
 8 | 
 9 | def vis_keypoints(img, kps, kps_lines, kp_thresh=0.4, alpha=1):
10 | 
11 |     # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
12 |     cmap = plt.get_cmap('rainbow')
13 |     colors = [cmap(i) for i in np.linspace(0, 1, len(kps_lines) + 2)]
14 |     colors = [(c[2] * 255, c[1] * 255, c[0] * 255) for c in colors]
15 | 
16 |     # Perform the drawing on a copy of the image, to allow for blending.
17 |     kp_mask = np.copy(img)
18 | 
19 |     # Draw the keypoints.
20 |     for l in range(len(kps_lines)):
21 |         i1 = kps_lines[l][0]
22 |         i2 = kps_lines[l][1]
23 |         p1 = kps[0, i1].astype(np.int32), kps[1, i1].astype(np.int32)
24 |         p2 = kps[0, i2].astype(np.int32), kps[1, i2].astype(np.int32)
25 |         if kps[2, i1] > kp_thresh and kps[2, i2] > kp_thresh:
26 |             cv2.line(
27 |                 kp_mask, p1, p2,
28 |                 color=colors[l], thickness=2, lineType=cv2.LINE_AA)
29 |         if kps[2, i1] > kp_thresh:
30 |             cv2.circle(
31 |                 kp_mask, p1,
32 |                 radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
33 |         if kps[2, i2] > kp_thresh:
34 |             cv2.circle(
35 |                 kp_mask, p2,
36 |                 radius=3, color=colors[l], thickness=-1, lineType=cv2.LINE_AA)
37 | 
38 |     # Blend the keypoints.
39 |     return cv2.addWeighted(img, 1.0 - alpha, kp_mask, alpha, 0)
40 | 
41 | def vis_3d_skeleton(kpt_3d, kpt_3d_vis, kps_lines, filename=None):
42 | 
43 |     fig = plt.figure()
44 |     ax = fig.add_subplot(111, projection='3d')
45 | 
46 |     # Convert from plt 0-1 RGBA colors to 0-255 BGR colors for opencv.
47 |     cmap = plt.get_cmap('rainbow')
48 |     colors = [cmap(i) for i in np.linspace(0, 1, len(kps_lines) + 2)]
49 |     colors = [np.array((c[2], c[1], c[0])) for c in colors]
50 | 
51 |     for l in range(len(kps_lines)):
52 |         i1 = kps_lines[l][0]
53 |         i2 = kps_lines[l][1]
54 |         x = np.array([kpt_3d[i1,0], kpt_3d[i2,0]])
55 |         y = np.array([kpt_3d[i1,1], kpt_3d[i2,1]])
56 |         z = np.array([kpt_3d[i1,2], kpt_3d[i2,2]])
57 | 
58 |         if kpt_3d_vis[i1,0] > 0 and kpt_3d_vis[i2,0] > 0:
59 |             ax.plot(x, z, -y, c=colors[l], linewidth=2)
60 |         if kpt_3d_vis[i1,0] > 0:
61 |             ax.scatter(kpt_3d[i1,0], kpt_3d[i1,2], -kpt_3d[i1,1], c=colors[l], marker='o')
62 |         if kpt_3d_vis[i2,0] > 0:
63 |             ax.scatter(kpt_3d[i2,0], kpt_3d[i2,2], -kpt_3d[i2,1], c=colors[l], marker='o')
64 | 
65 |     x_r = np.array([0, cfg.input_shape[1]], dtype=np.float32)
66 |     y_r = np.array([0, cfg.input_shape[0]], dtype=np.float32)
67 |     z_r = np.array([0, 1], dtype=np.float32)
68 |     
69 |     if filename is None:
70 |         ax.set_title('3D vis')
71 |     else:
72 |         ax.set_title(filename)
73 | 
74 |     ax.set_xlabel('X Label')
75 |     ax.set_ylabel('Z Label')
76 |     ax.set_zlabel('Y Label')
77 |     #ax.set_xlim([0,cfg.input_shape[1]])
78 |     #ax.set_ylim([0,1])
79 |     #ax.set_zlim([-cfg.input_shape[0],0])
80 |     ax.legend()
81 | 
82 |     plt.show()
83 |     cv2.waitKey(0)
84 | 
85 | 


--------------------------------------------------------------------------------
/common/nets/resnet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torchvision.models.resnet import BasicBlock, Bottleneck
 3 | from torchvision.models.resnet import model_zoo, model_urls
 4 | 
 5 | class ResNetBackbone(nn.Module):
 6 | 
 7 |     def __init__(self, resnet_type):
 8 | 	
 9 |         resnet_spec = {18: (BasicBlock, [2, 2, 2, 2], [64, 64, 128, 256, 512], 'resnet18'),
10 | 		       34: (BasicBlock, [3, 4, 6, 3], [64, 64, 128, 256, 512], 'resnet34'),
11 | 		       50: (Bottleneck, [3, 4, 6, 3], [64, 256, 512, 1024, 2048], 'resnet50'),
12 | 		       101: (Bottleneck, [3, 4, 23, 3], [64, 256, 512, 1024, 2048], 'resnet101'),
13 | 		       152: (Bottleneck, [3, 8, 36, 3], [64, 256, 512, 1024, 2048], 'resnet152')}
14 |         block, layers, channels, name = resnet_spec[resnet_type]
15 |         
16 |         self.name = name
17 |         self.inplanes = 64
18 |         super(ResNetBackbone, self).__init__()
19 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
20 |                                bias=False)
21 |         self.bn1 = nn.BatchNorm2d(64)
22 |         self.relu = nn.ReLU(inplace=True)
23 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
24 |         self.layer1 = self._make_layer(block, 64, layers[0])
25 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
26 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
27 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
28 | 
29 |         for m in self.modules():
30 |             if isinstance(m, nn.Conv2d):
31 |                 # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
32 |                 nn.init.normal_(m.weight, mean=0, std=0.001)
33 |             elif isinstance(m, nn.BatchNorm2d):
34 |                 nn.init.constant_(m.weight, 1)
35 |                 nn.init.constant_(m.bias, 0)
36 | 
37 |     def _make_layer(self, block, planes, blocks, stride=1):
38 |         downsample = None
39 |         if stride != 1 or self.inplanes != planes * block.expansion:
40 |             downsample = nn.Sequential(
41 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
42 |                           kernel_size=1, stride=stride, bias=False),
43 |                 nn.BatchNorm2d(planes * block.expansion),
44 |             )
45 | 
46 |         layers = []
47 |         layers.append(block(self.inplanes, planes, stride, downsample))
48 |         self.inplanes = planes * block.expansion
49 |         for i in range(1, blocks):
50 |             layers.append(block(self.inplanes, planes))
51 | 
52 |         return nn.Sequential(*layers)
53 | 
54 |     def forward(self, x):
55 |         x = self.conv1(x)
56 |         x = self.bn1(x)
57 |         x = self.relu(x)
58 |         x = self.maxpool(x)
59 | 
60 |         x = self.layer1(x)
61 |         x = self.layer2(x)
62 |         x = self.layer3(x)
63 |         x = self.layer4(x)
64 | 
65 |         return x
66 | 
67 |     def init_weights(self):
68 |         org_resnet = model_zoo.load_url(model_urls[self.name])
69 |         # drop orginal resnet fc layer, add 'None' in case of no fc layer, that will raise error
70 |         org_resnet.pop('fc.weight', None)
71 |         org_resnet.pop('fc.bias', None)
72 |         self.load_state_dict(org_resnet)
73 |         print("Initialize resnet from model zoo")
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/main/train.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from config import cfg
 3 | from base import Trainer
 4 | import torch.backends.cudnn as cudnn
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser()
 8 |     parser.add_argument('--gpu', type=str, dest='gpu_ids')
 9 |     parser.add_argument('--continue', dest='continue_train', action='store_true')
10 |     args = parser.parse_args()
11 | 
12 |     if not args.gpu_ids:
13 |         assert 0, print("Input GPU ids")
14 | 
15 |     if '-' in args.gpu_ids:
16 |         gpus = args.gpu_ids.split('-')
17 |         gpus[0] = 0 if not gpus[0].isdigit() else int(gpus[0])
18 |         gpus[1] = len(mem_info()) if not gpus[1].isdigit() else int(gpus[1]) + 1
19 |         args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus))))
20 | 
21 |     return args
22 | 
23 | def main():
24 |     
25 |     # argument parse and create log
26 |     args = parse_args()
27 |     cfg.set_args(args.gpu_ids, args.continue_train)
28 |     cudnn.fastest = True
29 |     cudnn.benchmark = True
30 |     cudnn.deterministic = False
31 |     cudnn.enabled = True
32 | 
33 |     trainer = Trainer(cfg)
34 |     trainer._make_batch_generator()
35 |     trainer._make_model()
36 | 
37 |     # train
38 |     for epoch in range(trainer.start_epoch, cfg.end_epoch):
39 |         trainer.scheduler.step()
40 |         trainer.tot_timer.tic()
41 |         trainer.read_timer.tic()
42 | 
43 |         for itr, (input_img, joint_img, joint_vis, joints_have_depth) in enumerate(trainer.batch_generator):
44 |             trainer.read_timer.toc()
45 |             trainer.gpu_timer.tic()
46 | 
47 |             trainer.optimizer.zero_grad()
48 | 
49 |             input_img = input_img.cuda()
50 |             joint_img = joint_img.cuda()
51 |             joint_vis = joint_vis.cuda()
52 |             joints_have_depth = joints_have_depth.cuda()
53 | 
54 | 
55 |             # forward
56 |             heatmap_out = trainer.model(input_img)
57 | 
58 |             # backward
59 |             JointLocationLoss = trainer.JointLocationLoss(heatmap_out, joint_img, joint_vis, joints_have_depth)
60 | 
61 |             loss = JointLocationLoss
62 | 
63 |             loss.backward()
64 |             trainer.optimizer.step()
65 |             
66 |             trainer.gpu_timer.toc()
67 | 
68 |             screen = [
69 |                 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch),
70 |                 'lr: %g' % (trainer.scheduler.get_lr()[0]),
71 |                 'speed: %.2f(%.2fs r%.2f)s/itr' % (
72 |                     trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time),
73 |                 '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch),
74 |                 '%s: %.4f' % ('loss_loc', JointLocationLoss.detach()),
75 |                 ]
76 |             trainer.logger.info(' '.join(screen))
77 | 
78 |             trainer.tot_timer.toc()
79 |             trainer.tot_timer.tic()
80 |             trainer.read_timer.tic()
81 | 
82 |         trainer.save_model({
83 |             'epoch': epoch,
84 |             'network': trainer.model.state_dict(),
85 |             'optimizer': trainer.optimizer.state_dict(),
86 |             'scheduler': trainer.scheduler.state_dict(),
87 |         }, epoch)
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     main()
92 | 


--------------------------------------------------------------------------------
/main/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import argparse
 4 | from tqdm import tqdm
 5 | import numpy as np
 6 | import cv2
 7 | from config import cfg
 8 | import torch
 9 | from base import Tester
10 | from torch.nn.parallel.scatter_gather import gather
11 | from nets.loss import soft_argmax
12 | from utils.vis import vis_keypoints
13 | from utils.pose_utils import flip
14 | import torch.backends.cudnn as cudnn
15 | 
16 | def parse_args():
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument('--gpu', type=str, dest='gpu_ids')
19 |     parser.add_argument('--test_epoch', type=str, dest='test_epoch')
20 |     args = parser.parse_args()
21 | 
22 |     # test gpus
23 |     if not args.gpu_ids:
24 |         args.gpu_ids = str(np.argmin(mem_info()))
25 | 
26 |     if '-' in args.gpu_ids:
27 |         gpus = args.gpu_ids.split('-')
28 |         gpus[0] = 0 if not gpus[0].isdigit() else int(gpus[0])
29 |         gpus[1] = len(mem_info()) if not gpus[1].isdigit() else int(gpus[1]) + 1
30 |         args.gpu_ids = ','.join(map(lambda x: str(x), list(range(*gpus))))
31 |     
32 |     assert args.test_epoch, 'Test epoch is required.'
33 |     return args
34 | 
35 | def main():
36 | 
37 |     args = parse_args()
38 |     cfg.set_args(args.gpu_ids)
39 |     cudnn.fastest = True
40 |     cudnn.benchmark = True
41 |     cudnn.deterministic = False
42 |     cudnn.enabled = True
43 | 
44 |     tester = Tester(cfg, args.test_epoch)
45 |     tester._make_batch_generator()
46 |     tester._make_model()
47 | 
48 |     preds = []
49 | 
50 |     with torch.no_grad():
51 |         for itr, input_img in enumerate(tqdm(tester.batch_generator)):
52 |             
53 |             input_img = input_img.cuda()
54 | 
55 |             # forward
56 |             heatmap_out = tester.model(input_img)
57 |             if cfg.num_gpus > 1:
58 |                 heatmap_out = gather(heatmap_out,0)
59 |             coord_out = soft_argmax(heatmap_out, tester.joint_num)
60 | 
61 |             if cfg.flip_test:
62 |                 flipped_input_img = flip(input_img, dims=3)
63 |                 flipped_heatmap_out = tester.model(flipped_input_img)
64 |                 if cfg.num_gpus > 1:
65 |                     flipped_heatmap_out = gather(flipped_heatmap_out,0)
66 |                 flipped_coord_out = soft_argmax(flipped_heatmap_out, tester.joint_num)
67 |                 flipped_coord_out[:, :, 0] = cfg.output_shape[1] - flipped_coord_out[:, :, 0] - 1
68 |                 for pair in tester.flip_pairs:
69 |                     flipped_coord_out[:, pair[0], :], flipped_coord_out[:, pair[1], :] = flipped_coord_out[:, pair[1], :].clone(), flipped_coord_out[:, pair[0], :].clone()
70 |                 coord_out = (coord_out + flipped_coord_out)/2.
71 | 
72 |             vis = False
73 |             if vis:
74 |                 filename = str(itr)
75 |                 tmpimg = input_img[0].cpu().numpy()
76 |                 tmpimg = tmpimg * cfg.pixel_std.reshape(3,1,1) + cfg.pixel_mean.reshape(3,1,1)
77 |                 tmpimg = tmpimg.astype(np.uint8)
78 |                 tmpimg = tmpimg[::-1, :, :]
79 |                 tmpimg = np.transpose(tmpimg,(1,2,0)).copy()
80 |                 tmpkps = np.zeros((3,tester.joint_num))
81 |                 tmpkps[:2,:] = coord_out[0,:,:2].transpose(1,0) / cfg.output_shape[0] * cfg.input_shape[0]
82 |                 tmpkps[2,:] = 1
83 |                 tmpimg = vis_keypoints(tmpimg, tmpkps, tester.skeleton)
84 |                 cv2.imwrite(osp.join(cfg.vis_dir, filename + '_output.jpg'), tmpimg)
85 | 
86 |             coord_out = coord_out.cpu().numpy()
87 |             preds.append(coord_out)
88 |             
89 |     # evaluate
90 |     preds = np.concatenate(preds, axis=0)
91 |     tester._evaluate(preds, cfg.result_dir)    
92 | 
93 | if __name__ == "__main__":
94 |     main()
95 | 


--------------------------------------------------------------------------------
/data/MPII/MPII.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import numpy as np
 4 | import json
 5 | from pycocotools.coco import COCO
 6 | from config import cfg
 7 | 
 8 | class MPII:
 9 | 
10 |     def __init__(self, data_split):
11 |         self.data_split = data_split
12 |         self.img_dir = osp.join('..', 'data', 'MPII')
13 |         self.train_annot_path = osp.join('..', 'data', 'MPII', 'annotations', 'train.json')
14 |         self.test_annot_path = osp.join('..', 'data', 'MPII', 'annotations', 'test.json')
15 |         self.joint_num = 16
16 |         self.joints_name = ('R_Ankle', 'R_Knee', 'R_Hip', 'L_Hip', 'L_Knee', 'L_Ankle', 'Pelvis', 'Thorax', 'Neck', 'Head', 'R_Wrist', 'R_Elbow', 'R_Shoulder', 'L_Shoulder', 'L_Elbow', 'L_Wrist')
17 |         self.flip_pairs = ( (0, 5), (1, 4), (2, 3), (10, 15), (11, 14), (12, 13) )
18 |         self.skeleton = ( (0, 1), (1, 2), (2, 6), (7, 12), (12, 11), (11, 10), (5, 4), (4, 3), (3, 6), (7, 13), (13, 14), (14, 15), (6, 7), (7, 8), (8, 9) )
19 |         self.lr_skeleton = ( ((0,0),(0,0)) ) # dumy value
20 | 
21 |         self.joints_have_depth = False
22 | 
23 |     def load_data(self):
24 |         
25 |         if self.data_split == 'train':
26 |             db = COCO(self.train_annot_path)
27 |         elif self.data_split == 'test':
28 |             db = COCO(self.test_annot_path)
29 |         else:
30 |             print('Unknown data subset')
31 |             assert 0
32 | 
33 |         data = []
34 |         for aid in db.anns.keys():
35 |             ann = db.anns[aid]
36 | 
37 |             if (ann['image_id'] not in db.imgs) or ann['iscrowd'] or (ann['num_keypoints'] == 0):
38 |                 continue
39 | 
40 |             # sanitize bboxes
41 |             x, y, w, h = ann['bbox']
42 |             img = db.loadImgs(ann['image_id'])[0]
43 |             width, height = img['width'], img['height']
44 |             x1 = np.max((0, x))
45 |             y1 = np.max((0, y))
46 |             x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
47 |             y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
48 |             if ann['area'] > 0 and x2 >= x1 and y2 >= y1:
49 |                 bbox = np.array([x1, y1, x2-x1, y2-y1])
50 |             else:
51 |                 continue
52 | 
53 |             # aspect ratio preserving bbox
54 |             w = bbox[2]
55 |             h = bbox[3]
56 |             c_x = bbox[0] + w/2.
57 |             c_y = bbox[1] + h/2.
58 |             aspect_ratio = cfg.input_shape[1]/cfg.input_shape[0]
59 |             if w > aspect_ratio * h:
60 |                 h = w / aspect_ratio
61 |             elif w < aspect_ratio * h:
62 |                 w = h * aspect_ratio
63 |             bbox[2] = w#*1.25
64 |             bbox[3] = h#*1.25
65 |             bbox[0] = c_x - bbox[2]/2.
66 |             bbox[1] = c_y - bbox[3]/2.
67 | 
68 |             # joints and vis
69 |             if self.data_split == 'train':
70 |                 joint_img = np.array(ann['keypoints']).reshape(self.joint_num,3)
71 |                 joint_vis = joint_img[:,2].copy().reshape(-1,1)
72 |                 joint_img[:,2] = 0
73 |             else:
74 |                 joint_img = np.zeros((self.joint_num, 3), dtype=np.float)
75 |                 joint_vis = np.zeros((self.joint_num, 1), dtype=np.float)
76 | 
77 |             imgname = db.imgs[ann['image_id']]['file_name']
78 |             img_path = osp.join(self.img_dir, imgname)
79 |             data.append({
80 |                 'img_path': img_path,
81 |                 'bbox': bbox,
82 |                 'joint_img': joint_img, # [org_img_x, org_img_y, 0]
83 |                 'joint_cam': np.ones(joint_img.shape), # dummy value
84 |                 'joint_vis': joint_vis,
85 |                 'center_cam': np.ones(3), # dummy value
86 |                 'f': np.ones(2), # dummy value
87 |                 'c': np.ones(2) # dummy value
88 |             })
89 | 
90 |         return data
91 | 
92 |     def evaluate(self, preds, result_dir):
93 |         print('MPII evaluation not supported')
94 | 
95 | 
96 | 


--------------------------------------------------------------------------------
/common/utils/pose_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from config import cfg
  4 | import copy
  5 | 
  6 | def cam2pixel(cam_coord, f, c):
  7 | 
  8 |     x = cam_coord[..., 0] / cam_coord[..., 2] * f[0] + c[0]
  9 |     y = cam_coord[..., 1] / cam_coord[..., 2] * f[1] + c[1]
 10 |     z = cam_coord[..., 2]
 11 |     
 12 |     return x,y,z
 13 | 
 14 | def pixel2cam(pixel_coord, f, c):
 15 | 
 16 |     x = (pixel_coord[..., 0] - c[0]) / f[0] * pixel_coord[..., 2]
 17 |     y = (pixel_coord[..., 1] - c[1]) / f[1] * pixel_coord[..., 2]
 18 |     z = pixel_coord[..., 2]
 19 |     
 20 |     return x,y,z
 21 | 
 22 | def rigid_transform_3D(A, B):
 23 |     centroid_A = np.mean(A, axis = 0)
 24 |     centroid_B = np.mean(B, axis = 0)
 25 |     H = np.dot(np.transpose(A - centroid_A), B - centroid_B)
 26 |     U, s, V = np.linalg.svd(H)
 27 |     R = np.dot(np.transpose(V), np.transpose(U))
 28 |     if np.linalg.det(R) < 0:
 29 |         V[2] = -V[2]
 30 |         R = np.dot(np.transpose(V), np.transpose(U))
 31 |     t = -np.dot(R, np.transpose(centroid_A)) + np.transpose(centroid_B)
 32 |     return R, t
 33 | 
 34 | def rigid_align(A, B):
 35 |     R, t = rigid_transform_3D(A, B)
 36 |     A2 = np.transpose(np.dot(R, np.transpose(A))) + t
 37 |     return A2
 38 | 
 39 | def process_world_coordinate(joint_world, root_idx, joint_num, R, T, f, c):
 40 | 
 41 |     # project world coordinates to image space
 42 |     joint_cam = np.zeros((joint_num, 3))
 43 |     for i in range(joint_num):
 44 |         joint_cam[i] = np.dot(R, joint_world[i] - T)
 45 |     center_cam = joint_cam[root_idx]
 46 | 
 47 |     # Subtract center depth
 48 |     joint_img = np.zeros((joint_num, 3))
 49 |     joint_img[:, 0], joint_img[:, 1], joint_img[:, 2] = cam2pixel(joint_cam, f, c)
 50 |     joint_img[:, 2] = joint_img[:, 2] - center_cam[2]
 51 |     joint_vis = np.ones((joint_num,1))
 52 |     
 53 |     ## bbox3d
 54 |     # build 3D bounding box centered on center_cam, sized with bbox_3d_shape
 55 |     bbox3d_lt = center_cam - np.array([cfg.bbox_3d_shape[2] / 2, cfg.bbox_3d_shape[1] / 2, 0])
 56 |     bbox3d_rb = center_cam + np.array([cfg.bbox_3d_shape[2] / 2, cfg.bbox_3d_shape[1] / 2, 0])
 57 | 
 58 |     # back-project 3D BBox to 2D image
 59 |     bbox2d_l, bbox2d_t, _ = cam2pixel(bbox3d_lt, f, c)
 60 |     bbox2d_r, bbox2d_b, _ = cam2pixel(bbox3d_rb, f, c)
 61 |     bbox = np.array([bbox2d_l, bbox2d_t, bbox2d_r-bbox2d_l+1, bbox2d_b-bbox2d_t+1])
 62 | 
 63 |     return joint_img, joint_cam, joint_vis, center_cam, bbox
 64 | 
 65 | def warp_coord_to_original(joint_out, bbox, center_cam):
 66 | 
 67 |     # joint_out: output from soft-argmax
 68 |     x = joint_out[:, 0] / cfg.output_shape[1] * bbox[2] + bbox[0]
 69 |     y = joint_out[:, 1] / cfg.output_shape[0] * bbox[3] + bbox[1]
 70 |     z = (joint_out[:, 2] / cfg.depth_dim * 2. - 1.) * (cfg.bbox_3d_shape[0]/2.) + center_cam[2]
 71 | 
 72 |     return x, y, z
 73 | 
 74 | def fliplr_joints(_joints, width, matched_parts):
 75 |     """
 76 |     flip coords
 77 |     joints: numpy array, nJoints * dim, dim == 2 [x, y] or dim == 3  [x, y, z]
 78 |     width: image width
 79 |     matched_parts: list of pairs
 80 |     """
 81 |     joints = _joints.copy()
 82 |     # Flip horizontal
 83 |     joints[:, 0] = width - joints[:, 0] - 1
 84 | 
 85 |     # Change left-right parts
 86 |     for pair in matched_parts:
 87 |         joints[pair[0], :], joints[pair[1], :] = joints[pair[1], :], joints[pair[0], :].copy()
 88 | 
 89 |     return joints
 90 | 
 91 | def multi_meshgrid(*args):
 92 |     """
 93 |     Creates a meshgrid from possibly many
 94 |     elements (instead of only 2).
 95 |     Returns a nd tensor with as many dimensions
 96 |     as there are arguments
 97 |     """
 98 |     args = list(args)
 99 |     template = [1 for _ in args]
100 |     for i in range(len(args)):
101 |         n = args[i].shape[0]
102 |         template_copy = template.copy()
103 |         template_copy[i] = n
104 |         args[i] = args[i].view(*template_copy)
105 |         # there will be some broadcast magic going on
106 |     return tuple(args)
107 | 
108 | 
109 | def flip(tensor, dims):
110 |     if not isinstance(dims, (tuple, list)):
111 |         dims = [dims]
112 |     indices = [torch.arange(tensor.shape[dim] - 1, -1, -1,
113 |                             dtype=torch.int64) for dim in dims]
114 |     multi_indices = multi_meshgrid(*indices)
115 |     final_indices = [slice(i) for i in tensor.shape]
116 |     for i, dim in enumerate(dims):
117 |         final_indices[dim] = multi_indices[i]
118 |     flipped = tensor[final_indices]
119 |     assert flipped.device == tensor.device
120 |     assert flipped.requires_grad == tensor.requires_grad
121 |     return flipped
122 | 
123 | 


--------------------------------------------------------------------------------
/tool/preprocess_h36m.m:
--------------------------------------------------------------------------------
  1 | % Preprocess human3.6m dataset
  2 | % Place this file to the Release-v1.1 folder and run it
  3 | 
  4 | function preprocess_h36m()
  5 | 
  6 |     close all;
  7 |     %clear;
  8 |     %clc;
  9 | 
 10 |     addpaths;
 11 | 
 12 |     %--------------------------------------------------------------------------
 13 |     % PARAMETERS
 14 | 
 15 |     % Subject (1, 5, 6, 7, 8, 9, 11)
 16 |     SUBJECT = [1 5 6 7 8 9 11];
 17 |      
 18 |     % Action (2 ~ 16)
 19 |     ACTION = 2:16;
 20 |     
 21 |     % Subaction (1 ~ 2)
 22 |     SUBACTION = 1:2;
 23 |     
 24 |     % Camera (1 ~ 4)
 25 |     CAMERA = 1:4;
 26 |     
 27 |     num_joint = 17;
 28 |     root_dir = 'SET_YOUR_OUTPUT_DIRECTORY'; %you have to set your output directory
 29 |     
 30 |     % if rgb sequence is declared in the loop, it causes stuck (do not know
 31 |     % reason)
 32 |     rgb_sequence = cell(1,100000000);
 33 |     COUNT = 1;
 34 |     %--------------------------------------------------------------------------
 35 |     % MAIN LOOP
 36 |     % For each subject, action, subaction, and camera..
 37 |     for subject = SUBJECT
 38 |         for action = ACTION
 39 |             for subaction = SUBACTION
 40 |                 for camera = CAMERA
 41 | 
 42 |                     fprintf('Processing subject %d, action %d, subaction %d, camera %d..\n', ...
 43 |                         subject, action, subaction, camera);
 44 | 
 45 |                     save_dir = sprintf('%s/s_%02d_act_%02d_subact_%02d_ca_%02d', ...
 46 |                         root_dir, subject, action, subaction, camera);
 47 |                     if ~exist(save_dir, 'dir')
 48 |                         mkdir(save_dir);
 49 |                     end
 50 | 
 51 |                     if (subject==11) && (action==2) && (subaction==2) && (camera==1)
 52 |                         fprintf('There is an error in subject 11, action 2, subaction 2, and camera 1\n');
 53 |                         continue;
 54 |                     end
 55 |                     
 56 |                     % Select sequence
 57 |                     Sequence = H36MSequence(subject, action, subaction, camera);
 58 | 
 59 |                     % Get 3D pose and 2D pose
 60 |                     Features{1} = H36MPose3DPositionsFeature(); % 3D world coordinates
 61 |                     Features{1}.Part = 'body'; % Only consider 17 joints
 62 |                     Features{2} = H36MPose3DPositionsFeature('Monocular', true); % 3D camera coordinates
 63 |                     Features{2}.Part = 'body'; % Only consider 17 joints
 64 |                     Features{3} = H36MPose2DPositionsFeature(); % 2D image coordinates
 65 |                     Features{3}.Part = 'body'; % Only consider 17 joints
 66 |                     F = H36MComputeFeatures(Sequence, Features);
 67 |                     num_frame = Sequence.NumFrames;
 68 |                     pose3d_world = reshape(F{1}, num_frame, 3, num_joint);
 69 |                     pose3d = reshape(F{2}, num_frame, 3, num_joint);
 70 |                     pose2d = reshape(F{3}, num_frame, 2, num_joint);
 71 | 
 72 |                     % Camera (in global coordinate)
 73 |                     Camera = Sequence.getCamera();
 74 | 
 75 |                     % Sanity check
 76 |                     if false
 77 |                         R = Camera.R; % rotation matrix
 78 |                         T = Camera.T'; % origin of the world coord system
 79 |                         K = [Camera.f(1)    0           Camera.c(1);
 80 |                             0              Camera.f(2) Camera.c(2);
 81 |                             0              0           1]; % f: focal length, c: principal points
 82 |                         error = 0;
 83 |                         for i = 1:num_frame
 84 |                             X = squeeze(pose3d_global(i,:,:));
 85 |                             x = squeeze(pose2d(i,:,:));
 86 |                             px = K*R*(X-T);
 87 |                             px = px ./ px(3,:);
 88 |                             px = px(1:2,:);
 89 |                             error = error + mean(sqrt(sum((px-x).^2, 1)));
 90 |                         end
 91 |                         error = error / num_frame;
 92 |                         fprintf('reprojection error = %.2f (pixels)\n', error);
 93 |                         keyboard;
 94 |                     end
 95 | 
 96 |                     %% Image, bounding box for each sampled frame
 97 |                     fprintf('Load RGB video: ');
 98 |                     rgb_extractor = H36MRGBVideoFeature();
 99 |                     rgb_sequence{COUNT} = rgb_extractor.serializer(Sequence);
100 |                     fprintf('Done!!\n');
101 |                     img_height = zeros(num_frame,1);
102 |                     img_width = zeros(num_frame,1);
103 |                     
104 |                     % For each frame,
105 |                     for i = 1:num_frame
106 |                         if mod(i,100) == 1
107 |                             fprintf('.');
108 |                         end
109 |                        
110 |                         % Save image
111 |                         % Get data
112 |                         img = rgb_sequence{COUNT}.getFrame(i);  
113 |                         [h, w, c] = size(img);
114 |                         img_height(i) = h;
115 |                         img_width(i) = w;
116 |                         img_name = sprintf('%s/s_%02d_act_%02d_subact_%02d_ca_%02d_%06d.jpg', ...
117 |                             save_dir, subject, action, subaction, camera, i);
118 |                         imwrite(img, img_name);
119 |                         
120 |                     end
121 |                     
122 |                     COUNT = COUNT + 1;
123 |                     
124 |                     % Save data
125 |                     pose3d_world = permute(pose3d_world,[1,3,2]); % world coordinate 3D keypoint coordinates
126 |                     R = Camera.R; % rotation matrix
127 |                     T = Camera.T; % origin of the world coord system
128 |                     f = Camera.f; % focal length
129 |                     c = Camera.c; % principal points
130 |                     filename = sprintf('%s/h36m_meta.mat', save_dir);
131 |                     save(filename, 'pose3d_world', 'f', 'c', 'R', 'T', 'img_height', 'img_width');
132 |                     
133 |                     fprintf('\n');
134 |                     
135 |                 end
136 |             end
137 |         end
138 |     end
139 | 
140 | end
141 | 
142 | 


--------------------------------------------------------------------------------
/common/base.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import math
  4 | import time
  5 | import glob
  6 | import abc
  7 | from torch.utils.data import DataLoader
  8 | import torch.optim
  9 | import torchvision.transforms as transforms
 10 | 
 11 | from config import cfg
 12 | from dataset import DatasetLoader
 13 | from timer import Timer
 14 | from logger import colorlogger
 15 | from nets.balanced_parallel import DataParallelModel, DataParallelCriterion
 16 | from model import get_pose_net
 17 | from nets import loss
 18 | 
 19 | # dynamic dataset import
 20 | for i in range(len(cfg.trainset)):
 21 |     exec('from ' + cfg.trainset[i] + ' import ' + cfg.trainset[i])
 22 | exec('from ' + cfg.testset + ' import ' + cfg.testset)
 23 | 
 24 | class Base(object):
 25 |     __metaclass__ = abc.ABCMeta
 26 | 
 27 |     def __init__(self, cfg, log_name='logs.txt'):
 28 |         
 29 |         self.cfg = cfg
 30 |         self.cur_epoch = 0
 31 | 
 32 |         # timer
 33 |         self.tot_timer = Timer()
 34 |         self.gpu_timer = Timer()
 35 |         self.read_timer = Timer()
 36 | 
 37 |         # logger
 38 |         self.logger = colorlogger(cfg.log_dir, log_name=log_name)
 39 | 
 40 |     @abc.abstractmethod
 41 |     def _make_batch_generator(self):
 42 |         return
 43 | 
 44 |     @abc.abstractmethod
 45 |     def _make_model(self):
 46 |         return
 47 | 
 48 |     def save_model(self, state, epoch):
 49 |         file_path = osp.join(self.cfg.model_dir,'snapshot_{}.pth.tar'.format(str(epoch)))
 50 |         torch.save(state, file_path)
 51 |         self.logger.info("Write snapshot into {}".format(file_path))
 52 | 
 53 |     def load_model(self, model, optimizer, scheduler):
 54 |         model_file_list = glob.glob(osp.join(self.cfg.model_dir,'*.pth.tar'))
 55 |         cur_epoch = max([int(file_name[file_name.find('snapshot_') + 9 : file_name.find('.pth.tar')]) for file_name in model_file_list])
 56 |         ckpt = torch.load(osp.join(self.cfg.model_dir, 'snapshot_' + str(cur_epoch) + '.pth.tar')) 
 57 |         start_epoch = ckpt['epoch'] + 1
 58 |         model.load_state_dict(ckpt['network'])
 59 |         optimizer.load_state_dict(ckpt['optimizer'])
 60 |         scheduler.load_state_dict(ckpt['scheduler'])
 61 | 
 62 |         return start_epoch, model, optimizer, scheduler
 63 | 
 64 | 
 65 | class Trainer(Base):
 66 |     
 67 |     def __init__(self, cfg):
 68 |         self.JointLocationLoss = DataParallelCriterion(loss.JointLocationLoss())
 69 |         super(Trainer, self).__init__(cfg, log_name = 'train_logs.txt')
 70 | 
 71 |     def get_optimizer(self, optimizer_name, model):
 72 |         if optimizer_name == 'adam':
 73 |             optimizer = torch.optim.Adam(model.parameters(), lr=self.cfg.lr)
 74 |         elif optimizer_name == 'sgd':
 75 |             optimizer = torch.optim.SGD(model.parameters(), lr=self.cfg.lr, momentum=self.cfg.momentum, weight_decay=self.cfg.wd) 
 76 |         else:
 77 |             print("Error! Unknown optimizer name: ", optimizer_name)
 78 |             assert 0
 79 | 
 80 |         scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.cfg.lr_dec_epoch, gamma=self.cfg.lr_dec_factor)
 81 |         return optimizer, scheduler
 82 |     
 83 |     def _make_batch_generator(self):
 84 |         # data load and construct batch generator
 85 |         self.logger.info("Creating dataset...")
 86 |         trainset_list = []
 87 |         for i in range(len(self.cfg.trainset)):
 88 |             trainset_list.append(eval(self.cfg.trainset[i])("train"))
 89 |         trainset_loader = DatasetLoader(trainset_list, True, transforms.Compose([\
 90 |                                                                                                         transforms.ToTensor(),
 91 |                                                                                                         transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)]\
 92 |                                                                                                         ))
 93 |         batch_generator = DataLoader(dataset=trainset_loader, batch_size=self.cfg.num_gpus*self.cfg.batch_size, shuffle=True, num_workers=self.cfg.num_thread, pin_memory=True)
 94 |         
 95 |         self.joint_num = trainset_loader.joint_num[0]
 96 |         self.itr_per_epoch = math.ceil(trainset_loader.__len__() / cfg.num_gpus / cfg.batch_size)
 97 |         self.batch_generator = batch_generator
 98 |     
 99 |     def _make_model(self):
100 |         # prepare network
101 |         self.logger.info("Creating graph and optimizer...")
102 |         model = get_pose_net(self.cfg, True, self.joint_num)
103 |         model = DataParallelModel(model).cuda()
104 |         optimizer, scheduler = self.get_optimizer(self.cfg.optimizer, model)
105 |         if self.cfg.continue_train:
106 |             start_epoch, model, optimizer, scheduler = self.load_model(model, optimizer, scheduler)
107 |         else:
108 |             start_epoch = 0
109 |         model.train()
110 | 
111 |         self.start_epoch = start_epoch
112 |         self.model = model
113 |         self.optimizer = optimizer
114 |         self.scheduler = scheduler
115 | 
116 | class Tester(Base):
117 |     
118 |     def __init__(self, cfg, test_epoch):
119 |         self.coord_out = loss.soft_argmax
120 |         self.test_epoch = int(test_epoch)
121 |         super(Tester, self).__init__(cfg, log_name = 'test_logs.txt')
122 | 
123 |     def _make_batch_generator(self):
124 |         # data load and construct batch generator
125 |         self.logger.info("Creating dataset...")
126 |         testset = eval(self.cfg.testset)("test")
127 |         testset_loader = DatasetLoader(testset, False, transforms.Compose([\
128 |                                                                                                         transforms.ToTensor(),
129 |                                                                                                         transforms.Normalize(mean=cfg.pixel_mean, std=cfg.pixel_std)]\
130 |                                                                                                         ))
131 |         batch_generator = DataLoader(dataset=testset_loader, batch_size=self.cfg.num_gpus*self.cfg.test_batch_size, shuffle=False, num_workers=self.cfg.num_thread, pin_memory=True)
132 |         
133 |         self.testset = testset
134 |         self.joint_num = testset_loader.joint_num
135 |         self.skeleton = testset_loader.skeleton
136 |         self.flip_pairs = testset.flip_pairs
137 |         self.tot_sample_num = testset_loader.__len__()
138 |         self.batch_generator = batch_generator
139 |     
140 |     def _make_model(self):
141 |         
142 |         model_path = os.path.join(self.cfg.model_dir, 'snapshot_%d.pth.tar' % self.test_epoch)
143 |         assert os.path.exists(model_path), 'Cannot find model at ' + model_path
144 |         self.logger.info('Load checkpoint from {}'.format(model_path))
145 |         
146 |         # prepare network
147 |         self.logger.info("Creating graph...")
148 |         model = get_pose_net(self.cfg, False, self.joint_num)
149 |         model = DataParallelModel(model).cuda()
150 |         ckpt = torch.load(model_path)
151 |         model.load_state_dict(ckpt['network'])
152 |         model.eval()
153 | 
154 |         self.model = model
155 | 
156 |     def _evaluate(self, preds, result_save_path):
157 |         self.testset.evaluate(preds, result_save_path)
158 | 
159 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation
  2 | <p align="center">
  3 | <img src="assets/1.png" width="400" height="250"> <img src="assets/2.png" width="400" height="250">
  4 | </p>
  5 | 
  6 | ## Introduction
  7 | 
  8 | This repo is **[PyTorch](https://pytorch.org/)** implementation of **[Integral Human Pose Regression (ECCV 2018)](https://arxiv.org/abs/1711.08229)** of MSRA for **3D human pose estimation** from a single RGB image.
  9 | 
 10 | **What this repo provides:**
 11 | * [PyTorch](https://pytorch.org/) implementation of [Integral Human Pose Regression](https://arxiv.org/abs/1711.08229).
 12 | * Flexible and simple code.
 13 | * Dataset pre-processing codes for **[MPII](http://human-pose.mpi-inf.mpg.de/)** and **[Human3.6M](http://vision.imar.ro/human3.6m/description.php)** dataset.
 14 | 
 15 | ## Dependencies
 16 | * [PyTorch](https://pytorch.org/)
 17 | * [CUDA](https://developer.nvidia.com/cuda-downloads)
 18 | * [cuDNN](https://developer.nvidia.com/cudnn)
 19 | * [Anaconda](https://www.anaconda.com/download/)
 20 | * [COCO API](https://github.com/cocodataset/cocoapi)
 21 | 
 22 | This code is tested under Ubuntu 16.04, CUDA 9.0, cuDNN 7.1 environment with two NVIDIA 1080Ti GPUs.
 23 | 
 24 | Python 3.6.5 version with Anaconda 3 and PyTorch 1.0.0 is used for development.
 25 | 
 26 | ## Directory
 27 | 
 28 | ### Root
 29 | The `${POSE_ROOT}` is described as below.
 30 | ```
 31 | ${POSE_ROOT}
 32 | |-- data
 33 | |-- common
 34 | |-- main
 35 | |-- tool
 36 | `-- output
 37 | ```
 38 | * `data` contains data loading codes and soft links to images and annotations directories.
 39 | * `common` contains kernel codes for 3d human pose estimation system.
 40 | * `main` contains high-level codes for training or testing the network.
 41 | * `tool` contains Human3.6M dataset preprocessing code.
 42 | * `output` contains log, trained models, visualized outputs, and test result.
 43 | 
 44 | ### Data
 45 | You need to follow directory structure of the `data` as below.
 46 | ```
 47 | ${POSE_ROOT}
 48 | |-- data
 49 | |-- |-- MPII
 50 | |   `-- |-- annotations
 51 | |       |   |-- train.json
 52 | |       |   `-- test.json
 53 | |       `-- images
 54 | |           |-- 000001163.jpg
 55 | |           |-- 000003072.jpg
 56 | |-- |-- Human36M
 57 | |   `-- |-- data
 58 | |       |   |-- s_01_act_02_subact_01_ca_01
 59 | |       |   |-- s_01_act_02_subact_01_ca_02
 60 | ```
 61 | * In the `tool`, run `preprocess_h36m.m` to preprocess Human3.6M dataset. It converts videos to images and save meta data for each frame. `data` in `Human36M` contains the preprocessed data.
 62 | * Use MPII dataset preprocessing code in my [TF-SimpleHumanPose](https://github.com/mks0601/TF-SimpleHumanPose) git repo
 63 | * You can change default directory structure of `data` by modifying `$DATASET_NAME.py` of each dataset folder.
 64 | 
 65 | ### Output
 66 | You need to follow the directory structure of the `output` folder as below.
 67 | ```
 68 | ${POSE_ROOT}
 69 | |-- output
 70 | |-- |-- log
 71 | |-- |-- model_dump
 72 | |-- |-- result
 73 | `-- |-- vis
 74 | ```
 75 | * Creating `output` folder as soft link form is recommended instead of folder form because it would take large storage capacity.
 76 | * `log` folder contains training log file.
 77 | * `model_dump` folder contains saved checkpoints for each epoch.
 78 | * `result` folder contains final estimation files generated in the testing stage.
 79 | * `vis` folder contains visualized results.
 80 | * You can change default directory structure of `output` by modifying `main/config.py`.
 81 | 
 82 | ## Running code
 83 | ### Start
 84 | * In the `main/config.py`, you can change settings of the model including dataset to use, network backbone, and input size and so on.
 85 | 
 86 | ### Train
 87 | In the `main` folder, set training set in `config.py`. Note that `trainset` must be `list` type and `0th` dataset is the reference dataset.
 88 | 
 89 | In the `main` folder, run
 90 | ```bash
 91 | python train.py --gpu 0-1
 92 | ```
 93 | to train the network on the GPU 0,1. 
 94 | 
 95 | If you want to continue experiment, run 
 96 | ```bash
 97 | python train.py --gpu 0-1 --continue
 98 | ```
 99 | `--gpu 0,1` can be used instead of `--gpu 0-1`.
100 | 
101 | ### Test
102 | In the `main` folder, set testing set in `config.py`. Note that `testset` must be `str` type.
103 | 
104 | Place trained model at the `output/model_dump/`.
105 | 
106 | In the `main` folder, run 
107 | ```bash
108 | python test.py --gpu 0-1 --test_epoch 16
109 | ```
110 | to test the network on the GPU 0,1 with 16th epoch trained model. `--gpu 0,1` can be used instead of `--gpu 0-1`.
111 | 
112 | ## Results
113 | Here I report the performance of the model from this repo and [the original paper](https://arxiv.org/abs/1711.08229). Also, I provide pre-trained 3d human pose estimation models.
114 |  
115 | ### Results on Human3.6M dataset
116 | The tables below are PA MPJPE and MPJPE on Human3.6M dataset. Provided `config.py` file is used to achieve below results. It's currently slightly worse than the performance of the original paper, however I'm trying to achieve the same performance. I think training schedule has to be changed.
117 | 
118 | #### Protocol 2 (training subjects: 1,5,6,7,8, testing subjects: 9, 11), PA MPJPE
119 | The PA MPJPEs of the paper are from protocol 1, however, note that protocol 2 uses smaller training set.
120 | 
121 | | Methods | Dir. | Dis. | Eat | Gre. | Phon. | Pose | Pur. | Sit. | Sit D. | Smo. | Phot. | Wait | Walk | Walk D. | Walk P. | Avg |
122 | |:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
123 | | my repo | 39.0 | 38.6 | 44.1 | 42.5 | 40.6 | 35.3 | 38.2 | 49.9 | 59.4 | 41.00 | 46.1 | 37.6 | 30.3 | 40.8 | 35.5 | 41.5 |
124 | | [original paper](https://arxiv.org/abs/1711.08229) | 36.9 | 36.2 | 40.6 | 40.4 | 41.9 | 34.9 | 35.7 | 50.1 | 59.4 | 40.4 | 44.9 | 39.0 | 30.8 | 39.8 | 36.7 | 40.6 |
125 | 
126 | #### Protocol 2 (training subjects: 1,5,6,7,8, testing subjects: 9, 11), MPJPE
127 | | Methods | Dir. | Dis. | Eat | Gre. | Phon. | Pose | Pur. | Sit. | Sit D. | Smo. | Phot. | Wait | Walk | Walk D. | Walk P. | Avg |
128 | |:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|
129 | | my repo | 50.8 | 52.3 | 54.8 | 57.9 | 52.8 | 47.0 | 52.1 | 62.0 | 73.7 | 52.6 | 58.3 | 50.4 | 40.9 | 54.1 | 45.1 | 53.9 |
130 | | [original paper](https://arxiv.org/abs/1711.08229) | 47.5 | 47.7 | 49.5 | 50.2 | 51.4 | 43.8 | 46.4 | 58.9 | 65.7 | 49.4 | 55.8 | 47.8 | 38.9 | 49.0 | 43.8 | 49.6 |
131 | 
132 | * Pre-trained model of protocol 2 [[model](https://github.com/mks0601/Integral-Human-Pose-Regression-for-3D-Human-Pose-Estimation/releases/download/1.0/snapshot_16.pth.tar)]
133 | 
134 | ## Troubleshooting
135 | If you get an extremely large error, disable cudnn for batch normalization. This typically occurs in low version of PyTorch.
136 | 
137 | ```
138 | # PYTORCH=/path/to/pytorch
139 | # for pytorch v0.4.0
140 | sed -i "1194s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
141 | # for pytorch v0.4.1
142 | sed -i "1254s/torch\.backends\.cudnn\.enabled/False/g" ${PYTORCH}/torch/nn/functional.py
143 | ```
144 | 
145 | ## Acknowledgement
146 | This repo is largely modified from [Original PyTorch repo of IntegralHumanPose](https://github.com/JimmySuen/integral-human-pose).
147 | 
148 | ## Reference
149 | [1] Sun, Xiao and Xiao, Bin and Liang, Shuang and Wei, Yichen. "Integral human pose regression". ECCV 2018.
150 | 


--------------------------------------------------------------------------------
/common/nets/balanced_parallel.py:
--------------------------------------------------------------------------------
  1 | """Encoding Data Parallel"""
  2 | import threading
  3 | import functools
  4 | import torch
  5 | from torch.autograd import Variable, Function
  6 | import torch.cuda.comm as comm
  7 | from torch.nn.parallel.data_parallel import DataParallel
  8 | from torch.nn.parallel.parallel_apply import get_a_var
  9 | from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast
 10 | 
 11 | torch_ver = torch.__version__[:3]
 12 | 
 13 | __all__ = ['allreduce', 'DataParallelModel', 'DataParallelCriterion',
 14 |            'patch_replication_callback']
 15 | 
 16 | def allreduce(*inputs):
 17 |     """Cross GPU all reduce autograd operation for calculate mean and
 18 |     variance in SyncBN.
 19 |     """
 20 |     return AllReduce.apply(*inputs)
 21 | 
 22 | 
 23 | class AllReduce(Function):
 24 |     @staticmethod
 25 |     def forward(ctx, num_inputs, *inputs):
 26 |         ctx.num_inputs = num_inputs
 27 |         ctx.target_gpus = [inputs[i].get_device() for i in range(0, len(inputs), num_inputs)]
 28 |         inputs = [inputs[i:i + num_inputs]
 29 |                  for i in range(0, len(inputs), num_inputs)]
 30 |         # sort before reduce sum
 31 |         inputs = sorted(inputs, key=lambda i: i[0].get_device())
 32 |         results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
 33 |         outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
 34 |         return tuple([t for tensors in outputs for t in tensors])
 35 | 
 36 |     @staticmethod
 37 |     def backward(ctx, *inputs):
 38 |         inputs = [i.data for i in inputs]
 39 |         inputs = [inputs[i:i + ctx.num_inputs]
 40 |                  for i in range(0, len(inputs), ctx.num_inputs)]
 41 |         results = comm.reduce_add_coalesced(inputs, ctx.target_gpus[0])
 42 |         outputs = comm.broadcast_coalesced(results, ctx.target_gpus)
 43 |         return (None,) + tuple([Variable(t) for tensors in outputs for t in tensors])
 44 | 
 45 | 
 46 | class Reduce(Function):
 47 |     @staticmethod
 48 |     def forward(ctx, *inputs):
 49 |         ctx.target_gpus = [inputs[i].get_device() for i in range(len(inputs))]
 50 |         inputs = sorted(inputs, key=lambda i: i.get_device())
 51 |         return comm.reduce_add(inputs)
 52 | 
 53 |     @staticmethod
 54 |     def backward(ctx, gradOutput):
 55 |         return Broadcast.apply(ctx.target_gpus, gradOutput)
 56 | 
 57 | 
 58 | class DataParallelModel(DataParallel):
 59 |     """Implements data parallelism at the module level.
 60 | 
 61 |     This container parallelizes the application of the given module by
 62 |     splitting the input across the specified devices by chunking in the
 63 |     batch dimension.
 64 |     In the forward pass, the module is replicated on each device,
 65 |     and each replica handles a portion of the input. During the backwards pass, gradients from each replica are summed into the original module.
 66 |     Note that the outputs are not gathered, please use compatible
 67 |     :class:`encoding.parallel.DataParallelCriterion`.
 68 | 
 69 |     The batch size should be larger than the number of GPUs used. It should
 70 |     also be an integer multiple of the number of GPUs so that each chunk is
 71 |     the same size (so that each GPU processes the same number of samples).
 72 | 
 73 |     Args:
 74 |         module: module to be parallelized
 75 |         device_ids: CUDA devices (default: all devices)
 76 | 
 77 |     Reference:
 78 |         Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
 79 |         Amit Agrawal. Context Encoding for Semantic Segmentation.
 80 |         *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
 81 | 
 82 |     Example::
 83 | 
 84 |         >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
 85 |         >>> y = net(x)
 86 |     """
 87 |     def gather(self, outputs, output_device):
 88 |         return outputs
 89 | 
 90 |     def replicate(self, module, device_ids):
 91 |         modules = super(DataParallelModel, self).replicate(module, device_ids)
 92 |         execute_replication_callbacks(modules)
 93 |         return modules
 94 | 
 95 | 
 96 | 
 97 | class DataParallelCriterion(DataParallel):
 98 |     """
 99 |     Calculate loss in multiple-GPUs, which balance the memory usage for
100 |     Semantic Segmentation.
101 | 
102 |     The targets are splitted across the specified devices by chunking in
103 |     the batch dimension. Please use together with :class:`encoding.parallel.DataParallelModel`.
104 | 
105 |     Reference:
106 |         Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi,
107 |         Amit Agrawal. Context Encoding for Semantic Segmentation.
108 |         *The IEEE Conference on Computer Vision and Pattern Recognition (CVPR) 2018*
109 | 
110 |     Example::
111 | 
112 |         >>> net = encoding.nn.DataParallelModel(model, device_ids=[0, 1, 2])
113 |         >>> criterion = encoding.nn.DataParallelCriterion(criterion, device_ids=[0, 1, 2])
114 |         >>> y = net(x)
115 |         >>> loss = criterion(y, target)
116 |     """
117 |     def forward(self, inputs, *targets, **kwargs):
118 |         # input should be already scatterd
119 |         # scattering the targets instead
120 |         # if not self.device_ids:
121 |             # return self.module(inputs, *targets, **kwargs)
122 |         targets, kwargs = self.scatter(targets, kwargs, self.device_ids)
123 |         if len(self.device_ids) == 1:
124 |             return self.module(inputs, *targets[0])
125 |         replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
126 |         outputs = _criterion_parallel_apply(replicas, inputs, targets, kwargs)
127 |         return Reduce.apply(*outputs) / len(outputs)
128 | 
129 |         #return self.gather(outputs, self.output_device).mean()
130 | 
131 | 
132 | def _criterion_parallel_apply(modules, inputs, targets, kwargs_tup=None, devices=None):
133 |     assert len(modules) == len(inputs)
134 |     assert len(targets) == len(inputs)
135 |     if kwargs_tup:
136 |         assert len(modules) == len(kwargs_tup)
137 |     else:
138 |         kwargs_tup = ({},) * len(modules)
139 |     if devices is not None:
140 |         assert len(modules) == len(devices)
141 |     else:
142 |         devices = [None] * len(modules)
143 | 
144 |     lock = threading.Lock()
145 |     results = {}
146 |     if torch_ver != "0.3":
147 |         grad_enabled = torch.is_grad_enabled()
148 | 
149 |     def _worker(i, module, input, target, kwargs, device=None):
150 |         if torch_ver != "0.3":
151 |             torch.set_grad_enabled(grad_enabled)
152 |         if device is None:
153 |             device = get_a_var(input).get_device()
154 |         try:
155 |             with torch.cuda.device(device):
156 |                 output = module(input, *target)
157 |             with lock:
158 |                 results[i] = output
159 |         except Exception as e:
160 |             with lock:
161 |                 results[i] = e
162 | 
163 |     if len(modules) > 1:
164 |         threads = [threading.Thread(target=_worker,
165 |                                     args=(i, module, input, target,
166 |                                           kwargs, device),)
167 |                    for i, (module, input, target, kwargs, device) in
168 |                    enumerate(zip(modules, inputs, targets, kwargs_tup, devices))]
169 | 
170 |         for thread in threads:
171 |             thread.start()
172 |         for thread in threads:
173 |             thread.join()
174 |     else:
175 |         _worker(0, modules[0], inputs[0], kwargs_tup[0], devices[0])
176 | 
177 |     outputs = []
178 |     for i in range(len(inputs)):
179 |         output = results[i]
180 |         if isinstance(output, Exception):
181 |             raise output
182 |         outputs.append(output)
183 |     return outputs
184 | 
185 | 
186 | ###########################################################################
187 | # Adapted from Synchronized-BatchNorm-PyTorch.
188 | # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
189 | #
190 | class CallbackContext(object):
191 |     pass
192 | 
193 | 
194 | def execute_replication_callbacks(modules):
195 |     """
196 |     Execute an replication callback `__data_parallel_replicate__` on each module created
197 |     by original replication.
198 | 
199 |     The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`
200 | 
201 |     Note that, as all modules are isomorphism, we assign each sub-module with a context
202 |     (shared among multiple copies of this module on different devices).
203 |     Through this context, different copies can share some information.
204 | 
205 |     We guarantee that the callback on the master copy (the first copy) will be called ahead
206 |     of calling the callback of any slave copies.
207 |     """
208 |     master_copy = modules[0]
209 |     nr_modules = len(list(master_copy.modules()))
210 |     ctxs = [CallbackContext() for _ in range(nr_modules)]
211 | 
212 |     for i, module in enumerate(modules):
213 |         for j, m in enumerate(module.modules()):
214 |             if hasattr(m, '__data_parallel_replicate__'):
215 |                 m.__data_parallel_replicate__(ctxs[j], i)
216 | 
217 | 
218 | def patch_replication_callback(data_parallel):
219 |     """
220 |     Monkey-patch an existing `DataParallel` object. Add the replication callback.
221 |     Useful when you have customized `DataParallel` implementation.
222 | 
223 |     Examples:
224 |         > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
225 |         > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])
226 |         > patch_replication_callback(sync_bn)
227 |         # this is equivalent to
228 |         > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)
229 |         > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])
230 |     """
231 | 
232 |     assert isinstance(data_parallel, DataParallel)
233 | 
234 |     old_replicate = data_parallel.replicate
235 | 
236 |     @functools.wraps(old_replicate)
237 |     def new_replicate(module, device_ids):
238 |         modules = old_replicate(module, device_ids)
239 |         execute_replication_callbacks(modules)
240 |         return modules
241 | 
242 |     data_parallel.replicate = new_replicate


--------------------------------------------------------------------------------
/data/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import numpy as np
  4 | import cv2
  5 | import random
  6 | import time
  7 | import torch
  8 | import copy
  9 | from torch.utils.data.dataset import Dataset
 10 | from utils.vis import vis_keypoints, vis_3d_skeleton
 11 | from utils.pose_utils import fliplr_joints
 12 | from config import cfg
 13 | 
 14 | class DatasetLoader(Dataset):
 15 |     def __init__(self, db, is_train, transform):
 16 |         
 17 |         if isinstance(db, list):
 18 |             self.multiple_db = True
 19 |             self.db = [d.load_data() for d in db]
 20 |             self.joints_name = [d.joints_name for d in db]
 21 |             self.joint_num = [d.joint_num for d in db]
 22 |             self.skeleton = [d.skeleton for d in db]
 23 |             self.lr_skeleton = [d.lr_skeleton for d in db]
 24 |             self.flip_pairs = [d.flip_pairs for d in db]
 25 |             self.joints_have_depth = [d.joints_have_depth for d in db]
 26 |         else:
 27 |             self.multiple_db = False
 28 |             self.db = db.load_data()
 29 |             self.joint_num = db.joint_num
 30 |             self.skeleton = db.skeleton
 31 |             self.lr_skeleton = db.lr_skeleton
 32 |             self.flip_pairs = db.flip_pairs
 33 |             self.joints_have_depth = db.joints_have_depth
 34 |         
 35 |         self.transform = transform
 36 |         self.is_train = is_train
 37 | 
 38 |         if self.is_train:
 39 |             self.do_augment = True
 40 |         else:
 41 |             self.do_augment = False
 42 | 
 43 |     def __getitem__(self, index):
 44 |         
 45 |         if self.multiple_db:
 46 |             db_idx = index // max([len(db) for db in self.db])
 47 | 
 48 |             joint_num = self.joint_num[db_idx]
 49 |             skeleton = self.skeleton[db_idx]
 50 |             lr_skeleton = self.lr_skeleton[0]
 51 |             flip_pairs = self.flip_pairs[db_idx]
 52 |             joints_have_depth = self.joints_have_depth[db_idx]
 53 | 
 54 |             ref_joints_name = self.joints_name[0]
 55 |             joints_name = self.joints_name[db_idx]
 56 |             
 57 |             item_idx = index % max([len(db) for db in self.db]) % len(self.db[db_idx])
 58 |             data = copy.deepcopy(self.db[db_idx][item_idx])
 59 |             
 60 |         else:
 61 |             joint_num = self.joint_num
 62 |             skeleton = self.skeleton
 63 |             lr_skeleton = self.lr_skeleton
 64 |             flip_pairs = self.flip_pairs
 65 |             joints_have_depth = self.joints_have_depth
 66 | 
 67 |             data = copy.deepcopy(self.db[index])
 68 | 
 69 |         bbox = data['bbox']
 70 |         joint_img = data['joint_img']
 71 |         joint_vis = data['joint_vis']
 72 | 
 73 |         # 1. load image
 74 |         cvimg = cv2.imread(data['img_path'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
 75 |         if not isinstance(cvimg, np.ndarray):
 76 |             raise IOError("Fail to read %s" % data['img_path'])
 77 |         img_height, img_width, img_channels = cvimg.shape
 78 | 
 79 |         # 2. get augmentation params
 80 |         if self.do_augment:
 81 |             scale, rot, do_flip, color_scale = get_aug_config()
 82 |         else:
 83 |             scale, rot, do_flip, color_scale = 1.0, 0, False, [1.0, 1.0, 1.0]
 84 | 
 85 |         # 3. crop patch from img and perform data augmentation (flip, scale, rot, color scale)
 86 |         img_patch, trans = generate_patch_image(cvimg, bbox, do_flip, scale, rot)
 87 |         for i in range(img_channels):
 88 |             img_patch[:, :, i] = np.clip(img_patch[:, :, i] * color_scale[i], 0, 255)
 89 | 
 90 |         # 4. generate patch joint ground truth
 91 |         # flip joints and apply Affine Transform on joints
 92 |         if do_flip:
 93 |             joint_img[:, 0] = img_width - joint_img[:, 0] - 1
 94 |             for pair in flip_pairs:
 95 |                 joint_img[pair[0], :], joint_img[pair[1], :] = joint_img[pair[1], :], joint_img[pair[0], :].copy()
 96 |                 joint_vis[pair[0], :], joint_vis[pair[1], :] = joint_vis[pair[1], :], joint_vis[pair[0], :].copy()
 97 | 
 98 |         for i in range(len(joint_img)):
 99 |             joint_img[i, 0:2] = trans_point2d(joint_img[i, 0:2], trans)
100 |             joint_img[i, 2] /= (cfg.bbox_3d_shape[0]/2. * scale) # expect depth lies in -bbox_3d_shape[0]/2 ~ bbox_3d_shape[0]/2 -> -1.0 ~ 1.0
101 |             joint_img[i, 2] = (joint_img[i,2] + 1.0)/2. # 0~1 normalize
102 |             joint_vis[i] *= (
103 |                             (joint_img[i,0] >= 0) & \
104 |                             (joint_img[i,0] < cfg.input_shape[1]) & \
105 |                             (joint_img[i,1] >= 0) & \
106 |                             (joint_img[i,1] < cfg.input_shape[0]) & \
107 |                             (joint_img[i,2] >= 0) & \
108 |                             (joint_img[i,2] < 1)
109 |                             )
110 | 
111 |         vis = False
112 |         if vis:
113 |             filename = str(random.randrange(1,500))
114 |             tmpimg = img_patch.copy().astype(np.uint8)
115 |             tmpkps = np.zeros((3,joint_num))
116 |             tmpkps[:2,:] = joint_img[:,:2].transpose(1,0)
117 |             tmpkps[2,:] = joint_vis[:,0]
118 |             tmpimg = vis_keypoints(tmpimg, tmpkps, skeleton)
119 |             cv2.imwrite(osp.join(cfg.vis_dir, filename + '_gt.jpg'), tmpimg)
120 |         
121 |         vis = False
122 |         if vis:
123 |             vis_3d_skeleton(joint_img, joint_vis, skeleton, filename)
124 | 
125 |         # change coordinates to output space
126 |         joint_img[:, 0] = joint_img[:, 0] / cfg.input_shape[1] * cfg.output_shape[1]
127 |         joint_img[:, 1] = joint_img[:, 1] / cfg.input_shape[0] * cfg.output_shape[0]
128 |         joint_img[:, 2] = joint_img[:, 2] * cfg.depth_dim
129 |         
130 |         # change joint coord, vis to reference dataset. 0th db is reference dataset
131 |         if self.multiple_db:
132 |             joint_img = transform_joint_to_other_db(joint_img, joints_name, ref_joints_name)        
133 |             joint_vis = transform_joint_to_other_db(joint_vis, joints_name, ref_joints_name)        
134 |         
135 |         if self.is_train:
136 |             img_patch = self.transform(img_patch)
137 |             joint_img = joint_img.astype(np.float32)
138 |             joint_vis = (joint_vis > 0).astype(np.float32)
139 |             joints_have_depth = np.array([joints_have_depth]).astype(np.float32)
140 | 
141 |             return img_patch, joint_img, joint_vis, joints_have_depth
142 |         else:
143 |             img_patch = self.transform(img_patch)
144 |             return img_patch
145 | 
146 |     def __len__(self):
147 |         if self.multiple_db:
148 |             return max([len(db) for db in self.db]) * len(self.db)
149 |         else:
150 |             return len(self.db)
151 | 
152 | # helper functions
153 | def transform_joint_to_other_db(src_joint, src_name, dst_name):
154 | 
155 |     src_joint_num = len(src_name)
156 |     dst_joint_num = len(dst_name)
157 | 
158 |     new_joint = np.zeros(((dst_joint_num,) + src_joint.shape[1:]))
159 | 
160 |     for src_idx in range(len(src_name)):
161 |         name = src_name[src_idx]
162 |         if name in dst_name:
163 |             dst_idx = dst_name.index(name)
164 |             new_joint[dst_idx] = src_joint[src_idx]
165 | 
166 |     return new_joint
167 | 
168 | def get_aug_config():
169 |     
170 |     scale_factor = 0.25
171 |     rot_factor = 30
172 |     color_factor = 0.2
173 |     
174 |     scale = np.clip(np.random.randn(), -1.0, 1.0) * scale_factor + 1.0
175 |     rot = np.clip(np.random.randn(), -2.0,
176 |                   2.0) * rot_factor if random.random() <= 0.6 else 0
177 |     do_flip = random.random() <= 0.5
178 |     c_up = 1.0 + color_factor
179 |     c_low = 1.0 - color_factor
180 |     color_scale = [random.uniform(c_low, c_up), random.uniform(c_low, c_up), random.uniform(c_low, c_up)]
181 | 
182 |     return scale, rot, do_flip, color_scale
183 | 
184 | 
185 | def generate_patch_image(cvimg, bbox, do_flip, scale, rot):
186 |     img = cvimg.copy()
187 |     img_height, img_width, img_channels = img.shape
188 | 
189 |     bb_c_x = float(bbox[0] + 0.5*bbox[2])
190 |     bb_c_y = float(bbox[1] + 0.5*bbox[3])
191 |     bb_width = float(bbox[2])
192 |     bb_height = float(bbox[3])
193 | 
194 |     if do_flip:
195 |         img = img[:, ::-1, :]
196 |         bb_c_x = img_width - bb_c_x - 1
197 | 
198 |     trans = gen_trans_from_patch_cv(bb_c_x, bb_c_y, bb_width, bb_height, cfg.input_shape[1], cfg.input_shape[0], scale, rot, inv=False)
199 |     img_patch = cv2.warpAffine(img, trans, (int(cfg.input_shape[1]), int(cfg.input_shape[0])), flags=cv2.INTER_LINEAR)
200 | 
201 |     img_patch = img_patch[:,:,::-1].copy()
202 |     img_patch = img_patch.astype(np.float32)
203 | 
204 |     return img_patch, trans
205 | 
206 | def rotate_2d(pt_2d, rot_rad):
207 |     x = pt_2d[0]
208 |     y = pt_2d[1]
209 |     sn, cs = np.sin(rot_rad), np.cos(rot_rad)
210 |     xx = x * cs - y * sn
211 |     yy = x * sn + y * cs
212 |     return np.array([xx, yy], dtype=np.float32)
213 | 
214 | def gen_trans_from_patch_cv(c_x, c_y, src_width, src_height, dst_width, dst_height, scale, rot, inv=False):
215 |     # augment size with scale
216 |     src_w = src_width * scale
217 |     src_h = src_height * scale
218 |     src_center = np.array([c_x, c_y], dtype=np.float32)
219 |     # augment rotation
220 |     rot_rad = np.pi * rot / 180
221 |     src_downdir = rotate_2d(np.array([0, src_h * 0.5], dtype=np.float32), rot_rad)
222 |     src_rightdir = rotate_2d(np.array([src_w * 0.5, 0], dtype=np.float32), rot_rad)
223 | 
224 |     dst_w = dst_width
225 |     dst_h = dst_height
226 |     dst_center = np.array([dst_w * 0.5, dst_h * 0.5], dtype=np.float32)
227 |     dst_downdir = np.array([0, dst_h * 0.5], dtype=np.float32)
228 |     dst_rightdir = np.array([dst_w * 0.5, 0], dtype=np.float32)
229 | 
230 |     src = np.zeros((3, 2), dtype=np.float32)
231 |     src[0, :] = src_center
232 |     src[1, :] = src_center + src_downdir
233 |     src[2, :] = src_center + src_rightdir
234 | 
235 |     dst = np.zeros((3, 2), dtype=np.float32)
236 |     dst[0, :] = dst_center
237 |     dst[1, :] = dst_center + dst_downdir
238 |     dst[2, :] = dst_center + dst_rightdir
239 | 
240 |     if inv:
241 |         trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
242 |     else:
243 |         trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
244 | 
245 |     return trans
246 | 
247 | def trans_point2d(pt_2d, trans):
248 |     src_pt = np.array([pt_2d[0], pt_2d[1], 1.]).T
249 |     dst_pt = np.dot(trans, src_pt)
250 |     return dst_pt[0:2]
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/data/Human36M/Human36M.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import scipy.io as sio
  4 | import numpy as np
  5 | from config import cfg
  6 | from utils.pose_utils import pixel2cam, rigid_align, process_world_coordinate, warp_coord_to_original
  7 | import cv2
  8 | import random
  9 | from utils.vis import vis_keypoints, vis_3d_skeleton
 10 | 
 11 | class Human36M:
 12 |     def __init__(self, data_split):
 13 |         self.data_split = data_split
 14 |         self.data_dir = osp.join('..', 'data', 'Human36M', 'data')
 15 |         self.subsampling = self.get_subsampling_ratio(data_split)
 16 |         self.joint_num = 18
 17 |         self.joints_name = ('Pelvis', 'R_Hip', 'R_Knee', 'R_Ankle', 'L_Hip', 'L_Knee', 'L_Ankle', 'Torso', 'Neck', 'Nose', 'Head', 'L_Shoulder', 'L_Elbow', 'L_Wrist', 'R_Shoulder', 'R_Elbow', 'R_Wrist', 'Thorax')
 18 |         self.flip_pairs = ( (1, 4), (2, 5), (3, 6), (14, 11), (15, 12), (16, 13) )
 19 |         self.skeleton = ( (0, 7), (7, 8), (8, 9), (9, 10), (8, 11), (11, 12), (12, 13), (8, 14), (14, 15), (15, 16), (0, 1), (1, 2), (2, 3), (0, 4), (4, 5), (5, 6) )
 20 |         self.lr_skeleton = ( ((8,11),(8,14)), ((11,12),(14,15)), ((12,13),(15,16)), ((0,1),(0,4)), ((1,2),(4,5)), ((2,3),(5,6)) )
 21 |         self.eval_joint = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
 22 |         self.joints_have_depth = True
 23 | 
 24 |         self.action_idx = (2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)
 25 |         self.subaction_idx = (1, 2)
 26 |         self.camera_idx = (1, 2, 3, 4)
 27 |         self.action_name = ['Directions', 'Discussion', 'Eating', 'Greeting', 'Phoning', 'Posing', 'Purchases', 'Sitting', 'SittingDown', 'Smoking', 'Photo', 'Waiting', 'Walking', 'WalkDog', 'WalkTogether']
 28 |        
 29 |         self.root_idx = self.joints_name.index('Pelvis')
 30 |         self.lshoulder_idx = self.joints_name.index('L_Shoulder')
 31 |         self.rshoulder_idx = self.joints_name.index('R_Shoulder')
 32 |     
 33 |     def get_subsampling_ratio(self, data_split):
 34 | 
 35 |         if data_split == 'train':
 36 |             return 5
 37 |         elif data_split == 'test':
 38 |             return 64
 39 |         else:
 40 |             assert 0, print('Unknown subset')
 41 | 
 42 |     def load_h36m_annot_file(self, annot_file):
 43 |         
 44 |         data = sio.loadmat(annot_file)
 45 |         joint_world = data['pose3d_world'] # 3D world coordinates of keypoints
 46 |         R = data['R'] # extrinsic
 47 |         T = np.reshape(data['T'],(3)) # extrinsic
 48 |         f = np.reshape(data['f'],(-1)) # focal legnth
 49 |         c = np.reshape(data['c'],(-1)) # principal points
 50 |         img_heights = np.reshape(data['img_height'],(-1))
 51 |         img_widths = np.reshape(data['img_width'],(-1))
 52 |        
 53 |         # add thorax
 54 |         thorax = (joint_world[:, self.lshoulder_idx, :] + joint_world[:, self.rshoulder_idx, :]) * 0.5
 55 |         thorax = thorax.reshape((thorax.shape[0], 1, thorax.shape[1]))
 56 |         joint_world = np.concatenate((joint_world, thorax), axis=1)
 57 | 
 58 |         return joint_world, R, T, f, c, img_widths, img_heights
 59 | 
 60 |     def _H36FolderName(self, subject_id, act_id, subact_id, camera_id):
 61 |         return "s_%02d_act_%02d_subact_%02d_ca_%02d" % \
 62 |                (subject_id, act_id, subact_id, camera_id)
 63 | 
 64 |     def _H36ImageName(self, folder_name, frame_id):
 65 |         return "%s_%06d.jpg" % (folder_name, frame_id + 1)
 66 | 
 67 |     def _AllHuman36Folders(self, subject_list):
 68 |         folders = []
 69 |         for i in subject_list:
 70 |             for j in self.action_idx:
 71 |                 for m in self.subaction_idx:
 72 |                     for n in self.camera_idx:
 73 |                         folders.append(self._H36FolderName(i, j, m, n))
 74 |         return folders
 75 | 
 76 |     def _sample_dataset(self, data_split):
 77 |         if data_split == 'train':
 78 |             folders = self._AllHuman36Folders([1, 5, 6, 7, 8])
 79 |         elif data_split == 'test':
 80 |             folders = self._AllHuman36Folders([9, 11])
 81 |         else:
 82 |             print("Unknown subset")
 83 |             assert 0
 84 | 
 85 |         return folders
 86 | 
 87 |     def load_data(self):
 88 | 
 89 |         folders = self._sample_dataset(self.data_split)
 90 |         data = []
 91 |         for folder in folders:
 92 |             
 93 |             if folder == 's_11_act_02_subact_02_ca_01':
 94 |                 continue
 95 | 
 96 |             folder_dir = osp.join(self.data_dir, folder)
 97 |             
 98 |             # load ground truth
 99 |             joint_world, R, T, f, c, img_widths, img_heights = self.load_h36m_annot_file(osp.join(folder_dir, 'h36m_meta.mat'))
100 |             img_num = np.shape(joint_world)[0]
101 | 
102 |             for n in range(0, img_num, self.subsampling):
103 |                 
104 |                 img_path = osp.join(folder_dir, self._H36ImageName(folder, n))
105 |                 joint_img, joint_cam, joint_vis, center_cam, bbox = process_world_coordinate(joint_world[n], self.root_idx, self.joint_num, R, T, f, c)
106 |                 
107 |                 img_width = img_widths[n]
108 |                 img_height = img_heights[n]
109 |                 
110 |                 data.append({
111 |                     'img_path': img_path,
112 |                     'bbox': bbox, 
113 |                     'joint_img': joint_img, # [org_img_x, org_img_y, depth - root_depth]
114 |                     'joint_cam': joint_cam, # [X, Y, Z] in camera coordinate
115 |                     'joint_vis': joint_vis,
116 |                     'center_cam': center_cam, # [X, Y, Z] in camera coordinate
117 |                     'f': f,
118 |                     'c': c
119 |                 })
120 | 
121 |         return data
122 | 
123 |     def evaluate(self, preds, result_dir):
124 | 
125 |         print() 
126 |         print('Evaluation start...')
127 | 
128 |         gts = self.load_data()
129 | 
130 |         assert len(gts) == len(preds)
131 | 
132 |         sample_num = len(gts)
133 |         joint_num = self.joint_num
134 |         
135 |         p1_error = np.zeros((sample_num, joint_num, 3)) # PA MPJPE (protocol #1 metric)
136 |         p2_error = np.zeros((sample_num, joint_num, 3)) # MPJPE (protocol #2 metroc)
137 |         p1_error_action = [ [] for _ in range(len(self.action_idx)) ] # PA MPJPE for each action
138 |         p2_error_action = [ [] for _ in range(len(self.action_idx)) ] # MPJPE error for each action
139 |         pred_to_save = []
140 |         for n in range(sample_num):
141 |             
142 |             gt = gts[n]
143 |             f = gt['f']
144 |             c = gt['c']
145 |             bbox = gt['bbox']
146 |             gt_3d_center = gt['center_cam']
147 |             gt_3d_kpt = gt['joint_cam']
148 |             gt_vis = gt['joint_vis'].copy()
149 | 
150 |             # restore coordinates to original space
151 |             pre_2d_kpt = preds[n].copy()
152 |             pre_2d_kpt[:,0], pre_2d_kpt[:,1], pre_2d_kpt[:,2] = warp_coord_to_original(pre_2d_kpt, bbox, gt_3d_center)
153 | 
154 |             vis = False
155 |             if vis:
156 |                 cvimg = cv2.imread(gt['img_path'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
157 |                 filename = str(random.randrange(1,500))
158 |                 tmpimg = cvimg.copy().astype(np.uint8)
159 |                 tmpkps = np.zeros((3,joint_num))
160 |                 tmpkps[0,:], tmpkps[1,:] = pre_2d_kpt[:,0], pre_2d_kpt[:,1]
161 |                 tmpkps[2,:] = 1
162 |                 tmpimg = vis_keypoints(tmpimg, tmpkps, self.skeleton)
163 |                 cv2.imwrite(osp.join(cfg.vis_dir, filename + '_output.jpg'), tmpimg)
164 | 
165 |             # back project to camera coordinate system
166 |             pre_3d_kpt = np.zeros((joint_num,3))
167 |             pre_3d_kpt[:,0], pre_3d_kpt[:,1], pre_3d_kpt[:,2] = pixel2cam(pre_2d_kpt, f, c)
168 | 
169 |             vis = False
170 |             if vis:
171 |                 vis_3d_skeleton(pre_3d_kpt, gt_vis, self.skeleton, filename)
172 | 
173 |             # root joint alignment
174 |             pre_3d_kpt = pre_3d_kpt - pre_3d_kpt[self.root_idx]
175 |             gt_3d_kpt  = gt_3d_kpt - gt_3d_kpt[self.root_idx]
176 | 
177 |             # rigid alignment for PA MPJPE (protocol #1)
178 |             pre_3d_kpt_align = rigid_align(pre_3d_kpt, gt_3d_kpt)
179 |  
180 |             # prediction save
181 |             pred_to_save.append({'pred': pre_3d_kpt,
182 |                                  'align_pred': pre_3d_kpt_align,
183 |                                  'gt': gt_3d_kpt})
184 |            
185 |             # error save
186 |             p1_error[n] = np.power(pre_3d_kpt_align - gt_3d_kpt,2) # PA MPJPE (protocol #1)
187 |             p2_error[n] = np.power(pre_3d_kpt - gt_3d_kpt,2)  # MPJPE (protocol #2)
188 | 
189 |             img_name = gt['img_path']
190 |             action_idx = int(img_name[img_name.find('act')+4:img_name.find('act')+6]) - 2
191 |             p1_error_action[action_idx].append(p1_error[n].copy())
192 |             p2_error_action[action_idx].append(p2_error[n].copy())
193 | 
194 | 
195 |         # total error calculate
196 |         p1_error = np.take(p1_error, self.eval_joint, axis=1)
197 |         p2_error = np.take(p2_error, self.eval_joint, axis=1)
198 |         p1_error = np.mean(np.power(np.sum(p1_error,axis=2),0.5))
199 |         p2_error = np.mean(np.power(np.sum(p2_error,axis=2),0.5))
200 | 
201 |         p1_eval_summary = 'Protocol #1 error (PA MPJPE) >> %.2f' % (p1_error)
202 |         p2_eval_summary = 'Protocol #2 error (MPJPE) >> %.2f' % (p2_error)
203 |         print()
204 |         print(p1_eval_summary)
205 |         print(p2_eval_summary)
206 | 
207 |         # error for each action calculate
208 |         p1_action_eval_summary = 'Protocol #1 error (PA MPJPE) for each action: \n'
209 |         for i in range(len(p1_error_action)):
210 |             err = np.array(p1_error_action[i])
211 |             err = np.take(err, self.eval_joint, axis=1)
212 |             err = np.mean(np.power(np.sum(err,axis=2),0.5))
213 | 
214 |             action_name = self.action_name[i]
215 |             p1_action_eval_summary += (action_name + ': %.2f\n' % err)
216 | 
217 |             
218 |         p2_action_eval_summary = 'Protocol #2 error (MPJPE) for each action: \n'
219 |         for i in range(len(p2_error_action)):
220 |             err = np.array(p2_error_action[i])
221 |             err = np.take(err, self.eval_joint, axis=1)
222 |             err = np.mean(np.power(np.sum(err,axis=2),0.5))
223 | 
224 |             action_name = self.action_name[i]
225 |             p2_action_eval_summary += (action_name + ': %.2f\n' % err)
226 |         print()
227 |         print(p1_action_eval_summary)
228 |         print(p2_action_eval_summary)
229 |        
230 |         # result save
231 |         f_pred_3d_kpt = open(osp.join(result_dir, 'pred_3d_kpt.txt'), 'w')
232 |         f_pred_3d_kpt_align = open(osp.join(result_dir, 'pred_3d_kpt_align.txt'), 'w')
233 |         f_gt_3d_kpt = open(osp.join(result_dir, 'gt_3d_kpt.txt'), 'w')
234 |         for i in range(len(pred_to_save)):
235 |             for j in range(joint_num):
236 |                 for k in range(3):
237 |                     f_pred_3d_kpt.write('%.3f ' % pred_to_save[i]['pred'][j][k])
238 |                     f_pred_3d_kpt_align.write('%.3f ' % pred_to_save[i]['align_pred'][j][k])
239 |                     f_gt_3d_kpt.write('%.3f ' % pred_to_save[i]['gt'][j][k])
240 |             f_pred_3d_kpt.write('\n')
241 |             f_pred_3d_kpt_align.write('\n')
242 |             f_gt_3d_kpt.write('\n')
243 |         f_pred_3d_kpt.close()
244 |         f_pred_3d_kpt_align.close()
245 |         f_gt_3d_kpt.close()
246 | 
247 |         f_eval_result = open(osp.join(result_dir, 'eval_result.txt'), 'w')
248 |         f_eval_result.write(p1_eval_summary)
249 |         f_eval_result.write('\n')
250 |         f_eval_result.write(p2_eval_summary)
251 |         f_eval_result.write('\n')
252 |         f_eval_result.write(p1_action_eval_summary)
253 |         f_eval_result.write('\n')
254 |         f_eval_result.write(p2_action_eval_summary)
255 |         f_eval_result.write('\n')
256 |         f_eval_result.close()
257 | 
258 | 
259 | 
260 | 
261 | 


--------------------------------------------------------------------------------