├── nets
    ├── __init__.py
    └── ResNet.py
├── utils
    ├── __init__.py
    ├── general.py
    ├── mano_utils.py
    ├── img_util.py
    ├── rendering.py
    └── plot_util.py
├── ckpt
    └── put_checkpoints_here
├── .gitignore
├── mano_models
    └── put_MANO_RIGHT_pkl_here
├── manopth
    ├── __init__.py
    ├── rotproj.py
    ├── tensutils.py
    ├── posemapper.py
    ├── argutils.py
    ├── demo.py
    ├── rot6d.py
    ├── load_util.py
    ├── rodrigues_layer.py
    └── manolayer.py
├── rgb_variants
    ├── 0007
    │   ├── cam3
    │   │   └── 00000015_2.jpg
    │   └── cam4
    │   │   ├── 00000000_3.jpg
    │   │   ├── 00000016_5.jpg
    │   │   ├── 00000017_4.jpg
    │   │   └── 00000018_3.jpg
    ├── 0011
    │   ├── cam3
    │   │   ├── 00000005_6.jpg
    │   │   └── 00000012_5.jpg
    │   └── cam4
    │   │   ├── 00000004_0.jpg
    │   │   ├── 00000005_1.jpg
    │   │   ├── 00000011_1.jpg
    │   │   └── 00000012_2.jpg
    └── 0108
    │   ├── cam3
    │       ├── 00000026_2.jpg
    │       └── 00000026_5.jpg
    │   └── cam4
    │       ├── 00000007_0.jpg
    │       ├── 00000017_0.jpg
    │       ├── 00000017_1.jpg
    │       ├── 00000017_5.jpg
    │       ├── 00000017_6.jpg
    │       ├── 00000017_7.jpg
    │       ├── 00000017_8.jpg
    │       ├── 00000019_0.jpg
    │       └── 00000026_3.jpg
├── run_moco_fw.py
├── run_moco_qualitative_embedding.py
├── README.md
├── run_hand_shape_fw.py
├── show_dataset.py
└── DatasetUnsupervisedMV.py


/nets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ckpt/put_checkpoints_here:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__
3 | 


--------------------------------------------------------------------------------
/mano_models/put_MANO_RIGHT_pkl_here:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/manopth/__init__.py:
--------------------------------------------------------------------------------
1 | name = 'manopth'
2 | 


--------------------------------------------------------------------------------
/rgb_variants/0007/cam3/00000015_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam3/00000015_2.jpg


--------------------------------------------------------------------------------
/rgb_variants/0007/cam4/00000000_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam4/00000000_3.jpg


--------------------------------------------------------------------------------
/rgb_variants/0007/cam4/00000016_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam4/00000016_5.jpg


--------------------------------------------------------------------------------
/rgb_variants/0007/cam4/00000017_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam4/00000017_4.jpg


--------------------------------------------------------------------------------
/rgb_variants/0007/cam4/00000018_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam4/00000018_3.jpg


--------------------------------------------------------------------------------
/rgb_variants/0011/cam3/00000005_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam3/00000005_6.jpg


--------------------------------------------------------------------------------
/rgb_variants/0011/cam3/00000012_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam3/00000012_5.jpg


--------------------------------------------------------------------------------
/rgb_variants/0011/cam4/00000004_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam4/00000004_0.jpg


--------------------------------------------------------------------------------
/rgb_variants/0011/cam4/00000005_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam4/00000005_1.jpg


--------------------------------------------------------------------------------
/rgb_variants/0011/cam4/00000011_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam4/00000011_1.jpg


--------------------------------------------------------------------------------
/rgb_variants/0011/cam4/00000012_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam4/00000012_2.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam3/00000026_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam3/00000026_2.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam3/00000026_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam3/00000026_5.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000007_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000007_0.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000017_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_0.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000017_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_1.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000017_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_5.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000017_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_6.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000017_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_7.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000017_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_8.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000019_0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000019_0.jpg


--------------------------------------------------------------------------------
/rgb_variants/0108/cam4/00000026_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000026_3.jpg


--------------------------------------------------------------------------------
/manopth/rotproj.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def batch_rotprojs(batches_rotmats):
 5 |     proj_rotmats = []
 6 |     for batch_idx, batch_rotmats in enumerate(batches_rotmats):
 7 |         proj_batch_rotmats = []
 8 |         for rot_idx, rotmat in enumerate(batch_rotmats):
 9 |             # GPU implementation of svd is VERY slow
10 |             # ~ 2 10^-3 per hit vs 5 10^-5 on cpu
11 |             U, S, V = rotmat.cpu().svd()
12 |             rotmat = torch.matmul(U, V.transpose(0, 1))
13 |             orth_det = rotmat.det()
14 |             # Remove reflection
15 |             if orth_det < 0:
16 |                 rotmat[:, 2] = -1 * rotmat[:, 2]
17 | 
18 |             rotmat = rotmat.cuda()
19 |             proj_batch_rotmats.append(rotmat)
20 |         proj_rotmats.append(torch.stack(proj_batch_rotmats))
21 |     return torch.stack(proj_rotmats)
22 | 


--------------------------------------------------------------------------------
/run_moco_fw.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | import numpy as np
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | from nets.ResNet import resnet50
 8 | 
 9 | 
10 | class ModelWrap:
11 |     def __init__(self):
12 |         model = resnet50(pretrained=False, head_type='embed')
13 |         model.cuda()
14 |         model.eval()
15 | 
16 |         state_dict = torch.load('ckpt/model_moco.pth')
17 |         model.load_state_dict(state_dict)
18 | 
19 |         self.model = model
20 |         self.base_path = "./rgb_variants/"
21 | 
22 |     def run(self, image_file):
23 |         img = cv2.imread(os.path.join(self.base_path, image_file))
24 |         img = cv2.resize(img, (224, 224))
25 | 
26 |         trafo = lambda x: np.transpose(x[:, :, ::-1], [2, 0, 1]).astype(np.float32) / 255.0 - 0.5
27 |         img_t = trafo(img)
28 |         batch = torch.Tensor(np.stack([img_t], 0)).cuda()
29 |         embed = self.model(batch)
30 |         embed = embed.detach().cpu().numpy()
31 | 
32 |         return embed
33 | 
34 | 
35 | if __name__ == '__main__':
36 | 
37 |     m = ModelWrap()
38 |     f1 = '0007/cam4/00000016_5.jpg'
39 |     f2 = '0007/cam4/00000017_4.jpg'
40 |     embed = m.run(f1), m.run(f2)
41 | 
42 |     def cossim(x, y):
43 |         ip = np.sum(np.multiply(x, y))
44 |         n1 = np.linalg.norm(x, 2)
45 |         n2 = np.linalg.norm(y, 2)
46 |         return ip / (n1*n2)
47 | 
48 |     print('score', cossim(embed[0], embed[1]))
49 | 


--------------------------------------------------------------------------------
/manopth/tensutils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from manopth import rodrigues_layer
 4 | 
 5 | 
 6 | def th_posemap_axisang(pose_vectors):
 7 |     rot_nb = int(pose_vectors.shape[1] / 3)
 8 |     pose_vec_reshaped = pose_vectors.contiguous().view(-1, 3)
 9 |     rot_mats = rodrigues_layer.batch_rodrigues(pose_vec_reshaped)
10 |     rot_mats = rot_mats.view(pose_vectors.shape[0], rot_nb * 9)
11 |     pose_maps = subtract_flat_id(rot_mats)
12 |     return pose_maps, rot_mats
13 | 
14 | 
15 | def th_with_zeros(tensor):
16 |     batch_size = tensor.shape[0]
17 |     padding = tensor.new([0.0, 0.0, 0.0, 1.0])
18 |     padding.requires_grad = False
19 | 
20 |     concat_list = [tensor, padding.view(1, 1, 4).repeat(batch_size, 1, 1)]
21 |     cat_res = torch.cat(concat_list, 1)
22 |     return cat_res
23 | 
24 | 
25 | def th_pack(tensor):
26 |     batch_size = tensor.shape[0]
27 |     padding = tensor.new_zeros((batch_size, 4, 3))
28 |     padding.requires_grad = False
29 |     pack_list = [padding, tensor]
30 |     pack_res = torch.cat(pack_list, 2)
31 |     return pack_res
32 | 
33 | 
34 | def subtract_flat_id(rot_mats):
35 |     # Subtracts identity as a flattened tensor
36 |     rot_nb = int(rot_mats.shape[1] / 9)
37 |     id_flat = torch.eye(
38 |         3, dtype=rot_mats.dtype, device=rot_mats.device).view(1, 9).repeat(
39 |             rot_mats.shape[0], rot_nb)
40 |     # id_flat.requires_grad = False
41 |     results = rot_mats - id_flat
42 |     return results
43 | 
44 | 
45 | def make_list(tensor):
46 |     # type: (List[int]) -> List[int]
47 |     return tensor
48 | 


--------------------------------------------------------------------------------
/manopth/posemapper.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2017 Javier Romero, Dimitrios Tzionas, Michael J Black and the Max Planck Gesellschaft.  All rights reserved.
 3 | This software is provided for research purposes only.
 4 | By using this software you agree to the terms of the MANO/SMPL+H Model license here http://mano.is.tue.mpg.de/license
 5 | 
 6 | More information about MANO/SMPL+H is available at http://mano.is.tue.mpg.de.
 7 | For comments or questions, please email us at: mano@tue.mpg.de
 8 | 
 9 | 
10 | About this file:
11 | ================
12 | This file defines a wrapper for the loading functions of the MANO model.
13 | 
14 | Modules included:
15 | - load_model:
16 |   loads the MANO model from a given file location (i.e. a .pkl file location),
17 |   or a dictionary object.
18 | 
19 | '''
20 | 
21 | 
22 | import chumpy as ch
23 | import numpy as np
24 | import cv2
25 | 
26 | 
27 | class Rodrigues(ch.Ch):
28 |     dterms = 'rt'
29 | 
30 |     def compute_r(self):
31 |         return cv2.Rodrigues(self.rt.r)[0]
32 | 
33 |     def compute_dr_wrt(self, wrt):
34 |         if wrt is self.rt:
35 |             return cv2.Rodrigues(self.rt.r)[1].T
36 | 
37 | 
38 | def lrotmin(p):
39 |     if isinstance(p, np.ndarray):
40 |         p = p.ravel()[3:]
41 |         return np.concatenate(
42 |             [(cv2.Rodrigues(np.array(pp))[0] - np.eye(3)).ravel()
43 |              for pp in p.reshape((-1, 3))]).ravel()
44 |     if p.ndim != 2 or p.shape[1] != 3:
45 |         p = p.reshape((-1, 3))
46 |     p = p[1:]
47 |     return ch.concatenate([(Rodrigues(pp) - ch.eye(3)).ravel()
48 |                            for pp in p]).ravel()
49 | 
50 | 
51 | def posemap(s):
52 |     if s == 'lrotmin':
53 |         return lrotmin
54 |     else:
55 |         raise Exception('Unknown posemapping: %s' % (str(s), ))
56 | 


--------------------------------------------------------------------------------
/manopth/argutils.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | import os
 3 | import pickle
 4 | import subprocess
 5 | import sys
 6 | 
 7 | 
 8 | def print_args(args):
 9 |     opts = vars(args)
10 |     print('======= Options ========')
11 |     for k, v in sorted(opts.items()):
12 |         print('{}: {}'.format(k, v))
13 |     print('========================')
14 | 
15 | 
16 | def save_args(args, save_folder, opt_prefix='opt', verbose=True):
17 |     opts = vars(args)
18 |     # Create checkpoint folder
19 |     if not os.path.exists(save_folder):
20 |         os.makedirs(save_folder, exist_ok=True)
21 | 
22 |     # Save options
23 |     opt_filename = '{}.txt'.format(opt_prefix)
24 |     opt_path = os.path.join(save_folder, opt_filename)
25 |     with open(opt_path, 'a') as opt_file:
26 |         opt_file.write('====== Options ======\n')
27 |         for k, v in sorted(opts.items()):
28 |             opt_file.write(
29 |                 '{option}: {value}\n'.format(option=str(k), value=str(v)))
30 |         opt_file.write('=====================\n')
31 |         opt_file.write('launched {} at {}\n'.format(
32 |             str(sys.argv[0]), str(datetime.datetime.now())))
33 | 
34 |         # Add git info
35 |         label = subprocess.check_output(["git", "describe",
36 |                                          "--always"]).strip()
37 |         if subprocess.call(
38 |             ["git", "branch"],
39 |                 stderr=subprocess.STDOUT,
40 |                 stdout=open(os.devnull, 'w')) == 0:
41 |             opt_file.write('=== Git info ====\n')
42 |             opt_file.write('{}\n'.format(label))
43 |             commit = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
44 |             opt_file.write('commit : {}\n'.format(commit.strip()))
45 | 
46 |     opt_picklename = '{}.pkl'.format(opt_prefix)
47 |     opt_picklepath = os.path.join(save_folder, opt_picklename)
48 |     with open(opt_picklepath, 'wb') as opt_file:
49 |         pickle.dump(opts, opt_file)
50 |     if verbose:
51 |         print('Saved options to {}'.format(opt_path))
52 | 


--------------------------------------------------------------------------------
/manopth/demo.py:
--------------------------------------------------------------------------------
 1 | from matplotlib import pyplot as plt
 2 | from mpl_toolkits.mplot3d import Axes3D
 3 | from mpl_toolkits.mplot3d.art3d import Poly3DCollection
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | from manopth.manolayer import ManoLayer
 8 | 
 9 | 
10 | def generate_random_hand(batch_size=1, ncomps=6, mano_root='mano/models'):
11 |     nfull_comps = ncomps + 3  # Add global orientation dims to PCA
12 |     random_pcapose = torch.rand(batch_size, nfull_comps)
13 |     mano_layer = ManoLayer(mano_root=mano_root)
14 |     verts, joints = mano_layer(random_pcapose)
15 |     return {'verts': verts, 'joints': joints, 'faces': mano_layer.th_faces}
16 | 
17 | 
18 | def display_hand(hand_info, mano_faces=None, ax=None, alpha=0.2, batch_idx=0, show=True):
19 |     """
20 |     Displays hand batch_idx in batch of hand_info, hand_info as returned by
21 |     generate_random_hand
22 |     """
23 |     if ax is None:
24 |         fig = plt.figure()
25 |         ax = fig.add_subplot(111, projection='3d')
26 |     verts, joints = hand_info['verts'][batch_idx], hand_info['joints'][
27 |         batch_idx]
28 |     if mano_faces is None:
29 |         ax.scatter(verts[:, 0], verts[:, 1], verts[:, 2], alpha=0.1)
30 |     else:
31 |         mesh = Poly3DCollection(verts[mano_faces], alpha=alpha)
32 |         face_color = (141 / 255, 184 / 255, 226 / 255)
33 |         edge_color = (50 / 255, 50 / 255, 50 / 255)
34 |         mesh.set_edgecolor(edge_color)
35 |         mesh.set_facecolor(face_color)
36 |         ax.add_collection3d(mesh)
37 |     ax.scatter(joints[:, 0], joints[:, 1], joints[:, 2], color='r')
38 |     cam_equal_aspect_3d(ax, verts.numpy())
39 |     if show:
40 |         plt.show()
41 | 
42 | 
43 | def cam_equal_aspect_3d(ax, verts, flip_x=False):
44 |     """
45 |     Centers view on cuboid containing hand and flips y and z axis
46 |     and fixes azimuth
47 |     """
48 |     extents = np.stack([verts.min(0), verts.max(0)], axis=1)
49 |     sz = extents[:, 1] - extents[:, 0]
50 |     centers = np.mean(extents, axis=1)
51 |     maxsize = max(abs(sz))
52 |     r = maxsize / 2
53 |     if flip_x:
54 |         ax.set_xlim(centers[0] + r, centers[0] - r)
55 |     else:
56 |         ax.set_xlim(centers[0] - r, centers[0] + r)
57 |     # Invert y and z axis
58 |     ax.set_ylim(centers[1] + r, centers[1] - r)
59 |     ax.set_zlim(centers[2] + r, centers[2] - r)
60 | 


--------------------------------------------------------------------------------
/manopth/rot6d.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def compute_rotation_matrix_from_ortho6d(poses):
 5 |     """
 6 |     Code from
 7 |     https://github.com/papagina/RotationContinuity
 8 |     On the Continuity of Rotation Representations in Neural Networks
 9 |     Zhou et al. CVPR19
10 |     https://zhouyisjtu.github.io/project_rotation/rotation.html
11 |     """
12 |     x_raw = poses[:, 0:3]  # batch*3
13 |     y_raw = poses[:, 3:6]  # batch*3
14 |         
15 |     x = normalize_vector(x_raw)  # batch*3
16 |     z = cross_product(x, y_raw)  # batch*3
17 |     z = normalize_vector(z)  # batch*3
18 |     y = cross_product(z, x)  # batch*3
19 |         
20 |     x = x.view(-1, 3, 1)
21 |     y = y.view(-1, 3, 1)
22 |     z = z.view(-1, 3, 1)
23 |     matrix = torch.cat((x, y, z), 2)  # batch*3*3
24 |     return matrix
25 | 
26 | def robust_compute_rotation_matrix_from_ortho6d(poses):
27 |     """
28 |     Instead of making 2nd vector orthogonal to first
29 |     create a base that takes into account the two predicted
30 |     directions equally
31 |     """
32 |     x_raw = poses[:, 0:3]  # batch*3
33 |     y_raw = poses[:, 3:6]  # batch*3
34 | 
35 |     x = normalize_vector(x_raw)  # batch*3
36 |     y = normalize_vector(y_raw)  # batch*3
37 |     middle = normalize_vector(x + y)
38 |     orthmid = normalize_vector(x - y)
39 |     x = normalize_vector(middle + orthmid)
40 |     y = normalize_vector(middle - orthmid)
41 |     # Their scalar product should be small !
42 |     # assert torch.einsum("ij,ij->i", [x, y]).abs().max() < 0.00001
43 |     z = normalize_vector(cross_product(x, y))
44 | 
45 |     x = x.view(-1, 3, 1)
46 |     y = y.view(-1, 3, 1)
47 |     z = z.view(-1, 3, 1)
48 |     matrix = torch.cat((x, y, z), 2)  # batch*3*3
49 |     # Check for reflection in matrix ! If found, flip last vector TODO
50 |     assert (torch.stack([torch.det(mat) for mat in matrix ])< 0).sum() == 0
51 |     return matrix
52 | 
53 | 
54 | def normalize_vector(v):
55 |     batch = v.shape[0]
56 |     v_mag = torch.sqrt(v.pow(2).sum(1))  # batch
57 |     v_mag = torch.max(v_mag, v.new([1e-8]))
58 |     v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1])
59 |     v = v/v_mag
60 |     return v
61 | 
62 | 
63 | def cross_product(u, v):
64 |     batch = u.shape[0]
65 |     i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1]
66 |     j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2]
67 |     k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0]
68 |         
69 |     out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1)
70 |         
71 |     return out
72 | 


--------------------------------------------------------------------------------
/utils/general.py:
--------------------------------------------------------------------------------
 1 | import gzip, pickle
 2 | import numpy as np
 3 | import json
 4 | import os
 5 | import cv2
 6 | 
 7 | def get_dataset_path():
 8 |     return "/misc/lmbraid18/zimmermc/datasets/FreiHAND_full/"
 9 | 
10 | def load_ckpt(model, pretrained_dict):
11 |     model_dict = model.state_dict()
12 |     overlap_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}  # only keys that are in the model
13 |     overlap_dict = {k: v for k, v in overlap_dict.items() if np.all(v.shape == model_dict[k].shape)} # only when the shape matches
14 | 
15 |     if len(model_dict) != len(overlap_dict):
16 |         print('Missing/Not Matching weights:')
17 |         for k, v in model_dict.items():
18 |             if k not in overlap_dict.keys():
19 |                 print(k, 'model:', v.shape)
20 |     print(f'Given {len(pretrained_dict)} weights for {len(model_dict)} model weights. Loaded {len(overlap_dict)} matching weights!')
21 |     if len(overlap_dict) == 0:
22 |         for k, v in pretrained_dict.items():
23 |             print('pretrained content', k, v.shape)
24 |         for k, v in model_dict.items():
25 |             print('model', k, v.shape)
26 |         raise Expection('Not weights were loaded. This indicates and error.')
27 | 
28 |     model_dict.update(overlap_dict)
29 |     model.load_state_dict(model_dict)
30 | 
31 | 
32 | class NumpyEncoder(json.JSONEncoder):
33 |     def default(self, obj):
34 |         if isinstance(obj, np.ndarray):
35 |             return obj.tolist()
36 | 
37 |         if isinstance(obj, np.int32):
38 |             return int(obj)
39 |         if isinstance(obj, np.float32):
40 |             return float(obj)
41 | 
42 |         if isinstance(obj, np.int64):
43 |             return int(obj)
44 |         if isinstance(obj, np.float64):
45 |             return float(obj)
46 |         return json.JSONEncoder.default(self, obj)
47 | 
48 | 
49 | def json_dump(file_name, data, pretty_format=False, overwrite=True, verbose=False):
50 |     msg = 'File does exists and should not be overwritten: %s' % file_name
51 |     assert not os.path.exists(file_name) or overwrite, msg
52 | 
53 |     with open(file_name, 'w') as fo:
54 |         if pretty_format:
55 |             json.dump(data, fo, cls=NumpyEncoder, sort_keys=True, indent=4)
56 |         else:
57 |             json.dump(data, fo, cls=NumpyEncoder)
58 | 
59 |     if verbose:
60 |         print('Dumped %d entries to file %s' % (len(data), file_name))
61 | 
62 | 
63 | def json_load(file_name):
64 |     with open(file_name, 'r') as fi:
65 |         data = json.load(fi)
66 |     return data
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/manopth/load_util.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Copyright 2017 Javier Romero, Dimitrios Tzionas, Michael J Black and the Max Planck Gesellschaft.  All rights reserved.
 3 | This software is provided for research purposes only.
 4 | By using this software you agree to the terms of the MANO/SMPL+H Model license here http://mano.is.tue.mpg.de/license
 5 | 
 6 | More information about MANO/SMPL+H is available at http://mano.is.tue.mpg.de.
 7 | For comments or questions, please email us at: mano@tue.mpg.de
 8 | 
 9 | 
10 | About this file:
11 | ================
12 | This file defines a wrapper for the loading functions of the MANO model.
13 | 
14 | Modules included:
15 | - load_model:
16 |   loads the MANO model from a given file location (i.e. a .pkl file location),
17 |   or a dictionary object.
18 | 
19 | '''
20 | import numpy as np
21 | import pickle
22 | import chumpy as ch
23 | from chumpy.ch import MatVecMult
24 | from .posemapper import posemap
25 | 
26 | 
27 | def ready_arguments(fname_or_dict, posekey4vposed='pose'):
28 | 
29 |     if not isinstance(fname_or_dict, dict):
30 |         dd = pickle.load(open(fname_or_dict, 'rb'), encoding='latin1')
31 |         # dd = pickle.load(open(fname_or_dict, 'rb'))
32 |     else:
33 |         dd = fname_or_dict
34 | 
35 |     want_shapemodel = 'shapedirs' in dd
36 |     nposeparms = dd['kintree_table'].shape[1] * 3
37 | 
38 |     if 'trans' not in dd:
39 |         dd['trans'] = np.zeros(3)
40 |     if 'pose' not in dd:
41 |         dd['pose'] = np.zeros(nposeparms)
42 |     if 'shapedirs' in dd and 'betas' not in dd:
43 |         dd['betas'] = np.zeros(dd['shapedirs'].shape[-1])
44 | 
45 |     for s in [
46 |             'v_template', 'weights', 'posedirs', 'pose', 'trans', 'shapedirs',
47 |             'betas', 'J'
48 |     ]:
49 |         if (s in dd) and not hasattr(dd[s], 'dterms'):
50 |             dd[s] = ch.array(dd[s])
51 | 
52 |     assert (posekey4vposed in dd)
53 |     if want_shapemodel:
54 |         dd['v_shaped'] = dd['shapedirs'].dot(dd['betas']) + dd['v_template']
55 |         v_shaped = dd['v_shaped']
56 |         J_tmpx = MatVecMult(dd['J_regressor'], v_shaped[:, 0])
57 |         J_tmpy = MatVecMult(dd['J_regressor'], v_shaped[:, 1])
58 |         J_tmpz = MatVecMult(dd['J_regressor'], v_shaped[:, 2])
59 |         dd['J'] = ch.vstack((J_tmpx, J_tmpy, J_tmpz)).T
60 |         pose_map_res = posemap(dd['bs_type'])(dd[posekey4vposed])
61 |         dd['v_posed'] = v_shaped + dd['posedirs'].dot(pose_map_res)
62 |     else:
63 |         pose_map_res = posemap(dd['bs_type'])(dd[posekey4vposed])
64 |         dd_add = dd['posedirs'].dot(pose_map_res)
65 |         dd['v_posed'] = dd['v_template'] + dd_add
66 | 
67 |     return dd
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/run_moco_qualitative_embedding.py:
--------------------------------------------------------------------------------
 1 | """ Script to produce the data for Figure 3 of the paper. """
 2 | import numpy as np
 3 | import cv2, os
 4 | import matplotlib.pyplot as plt
 5 | from run_moco_fw import ModelWrap
 6 | 
 7 | 
 8 | m = ModelWrap()
 9 | data_path = m.base_path
10 | 
11 | cossim = lambda x, y: np.sum(x*y)/np.linalg.norm(x, 2)/np.linalg.norm(y, 2)
12 | 
13 | 
14 | def show(path1, path2, save_to=None):
15 |     print("show('%s', '%s')" % (path1, path2))
16 | 
17 |     s = cossim(m.run(path1), m.run(path2))
18 | 
19 |     img1 = cv2.imread(os.path.join(data_path, path1))
20 |     img2 = cv2.imread(os.path.join(data_path, path2))
21 |     if save_to is not None:
22 |         cv2.imwrite(save_to + '_0.png', img1)
23 |         cv2.imwrite(save_to + '_1.png', img2)
24 |         with open(save_to + '_s.txt', 'w') as fo:
25 |             fo.write('%f' % s)
26 | 
27 |     fig, ax = plt.subplots(1, 2)
28 |     ax[0].imshow(img1[:, :, ::-1])
29 |     ax[1].imshow(img2[:, :, ::-1])
30 |     ax[1].set_title('score = %.3f' % s)
31 |     plt.show()
32 | 
33 | 
34 | # 1. show same image image pairs with different backgrounds is encoded the same
35 | for i in (0, 5, 7):
36 |     show('0108/cam4/00000017_%d.jpg' % i, '0108/cam4/00000017_%d.jpg' % (i+1),
37 |          save_to='./moco_vis_ex/same_sample_diff_bg/%02d' % i)
38 | 
39 | # 2. Show similar poses are encoded similarly
40 | i = 2
41 | show('0007/cam4/00000016_5.jpg', '0007/cam4/00000017_4.jpg',
42 |          save_to='./moco_vis_ex/similar_poses/%02d' % i)
43 | i = 3
44 | show('0011/cam4/00000004_0.jpg', '0011/cam4/00000005_1.jpg',
45 |          save_to='./moco_vis_ex/similar_poses/%02d' % i)
46 | i = 4
47 | show('0011/cam4/00000011_1.jpg', '0011/cam4/00000012_2.jpg',
48 |          save_to='./moco_vis_ex/similar_poses/%02d' % i)
49 | 
50 | 
51 | # 3. Different views are encoded similarly
52 | i = 0
53 | show('0108/cam4/00000026_3.jpg', '0108/cam3/00000026_5.jpg',
54 |    save_to='./moco_vis_ex/diff_view/%02d' % i)
55 | i = 1
56 | show('0011/cam4/00000012_2.jpg', '0011/cam3/00000012_5.jpg',
57 |    save_to='./moco_vis_ex/diff_view/%02d' % i)
58 | i = 3
59 | show('0011/cam4/00000005_1.jpg', '0011/cam3/00000005_6.jpg',
60 |    save_to='./moco_vis_ex/diff_view/%02d' % i)
61 | 
62 | 
63 | # 3. Different poses are encoded differently
64 | i = 0
65 | show('0007/cam4/00000000_3.jpg', '0007/cam4/00000018_3.jpg',
66 |          save_to='./moco_vis_ex/diff_poses/%02d' % i)
67 | i = 1
68 | show('0108/cam4/00000007_0.jpg', '0108/cam4/00000019_0.jpg',
69 |          save_to='./moco_vis_ex/diff_poses/%02d' % i)
70 | i = 3
71 | show('0007/cam3/00000015_2.jpg', '0108/cam3/00000026_2.jpg',
72 |          save_to='./moco_vis_ex/diff_poses/%02d' % i)
73 | 


--------------------------------------------------------------------------------
/manopth/rodrigues_layer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This part reuses code from https://github.com/MandyMo/pytorch_HMR/blob/master/src/util.py
 3 | which is part of a PyTorch port of SMPL.
 4 | Thanks to Zhang Xiong (MandyMo) for making this great code available on github !
 5 | """
 6 | 
 7 | import argparse
 8 | from torch.autograd import gradcheck
 9 | import torch
10 | from torch.autograd import Variable
11 | 
12 | from manopth import argutils
13 | 
14 | 
15 | def quat2mat(quat):
16 |     """Convert quaternion coefficients to rotation matrix.
17 |     Args:
18 |         quat: size = [batch_size, 4] 4 <===>(w, x, y, z)
19 |     Returns:
20 |         Rotation matrix corresponding to the quaternion -- size = [batch_size, 3, 3]
21 |     """
22 |     norm_quat = quat
23 |     norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True)
24 |     w, x, y, z = norm_quat[:, 0], norm_quat[:, 1], norm_quat[:,
25 |                                                              2], norm_quat[:,
26 |                                                                            3]
27 | 
28 |     batch_size = quat.size(0)
29 | 
30 |     w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
31 |     wx, wy, wz = w * x, w * y, w * z
32 |     xy, xz, yz = x * y, x * z, y * z
33 | 
34 |     rotMat = torch.stack([
35 |         w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy,
36 |         w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz,
37 |         w2 - x2 - y2 + z2
38 |     ],
39 |                          dim=1).view(batch_size, 3, 3)
40 |     return rotMat
41 | 
42 | 
43 | def batch_rodrigues(axisang):
44 |     #axisang N x 3
45 |     axisang_norm = torch.norm(axisang + 1e-8, p=2, dim=1)
46 |     angle = torch.unsqueeze(axisang_norm, -1)
47 |     axisang_normalized = torch.div(axisang, angle)
48 |     angle = angle * 0.5
49 |     v_cos = torch.cos(angle)
50 |     v_sin = torch.sin(angle)
51 |     quat = torch.cat([v_cos, v_sin * axisang_normalized], dim=1)
52 |     rot_mat = quat2mat(quat)
53 |     rot_mat = rot_mat.view(rot_mat.shape[0], 9)
54 |     return rot_mat
55 | 
56 | 
57 | def th_get_axis_angle(vector):
58 |     angle = torch.norm(vector, 2, 1)
59 |     axes = vector / angle.unsqueeze(1)
60 |     return axes, angle
61 | 
62 | 
63 | if __name__ == '__main__':
64 |     parser = argparse.ArgumentParser()
65 |     parser.add_argument('--batch_size', default=1, type=int)
66 |     parser.add_argument('--cuda', action='store_true')
67 |     args = parser.parse_args()
68 | 
69 |     argutils.print_args(args)
70 | 
71 |     n_components = 6
72 |     rot = 3
73 |     inputs = torch.rand(args.batch_size, rot)
74 |     inputs_var = Variable(inputs.double(), requires_grad=True)
75 |     if args.cuda:
76 |         inputs = inputs.cuda()
77 |     # outputs = batch_rodrigues(inputs)
78 |     test_function = gradcheck(batch_rodrigues, (inputs_var, ))
79 |     print('batch test passed !')
80 | 
81 |     inputs = torch.rand(rot)
82 |     inputs_var = Variable(inputs.double(), requires_grad=True)
83 |     test_function = gradcheck(th_cv2_rod_sub_id.apply, (inputs_var, ))
84 |     print('th_cv2_rod test passed')
85 | 
86 |     inputs = torch.rand(rot)
87 |     inputs_var = Variable(inputs.double(), requires_grad=True)
88 |     test_th = gradcheck(th_cv2_rod.apply, (inputs_var, ))
89 |     print('th_cv2_rod_id test passed !')
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HanCo Dataset & Contrastive Representation Learning for Hand Shape Estimation 
 2 | Code in conjunction with the publication: *Contrastive Representation Learning for Hand Shape Estimation*.
 3 | 
 4 | This repository contains code for inference of both networks:
 5 | The one obtained from self-supervised contrastive pre-training and the network trained supervisedly for hand pose estimation.
 6 | Additionally, we provide examples how to work with the HanCo dataset and release the pytorch Dataset that was used during our pre-training experiments.
 7 | This dataset is an extension of the [FreiHand](https://lmb.informatik.uni-freiburg.de/projects/freihand) dataset.
 8 | 
 9 | Visit our [project page](https://lmb.informatik.uni-freiburg.de/projects/contra-hand/) for additional information.
10 | 
11 | 
12 | # Requirements
13 | 
14 | ### Python environment
15 | 
16 |     conda create -n contra-hand python=3.6
17 |     conda activate contra-hand
18 |     conda install -c pytorch pytorch=1.6.0 torchvision cudatoolkit=10.2
19 |     conda install -c conda-forge -c fvcore fvcore transforms3d
20 |     pip install pytorch3d transforms3d tqdm pytorch-lightning imgaug open3d matplotlib
21 |     pip install git+https://github.com/hassony2/chumpy.git
22 | 
23 | 
24 | ### Hand Pose Dataset
25 | 
26 | You either need the [full HanCo dataset](https://lmb.informatik.uni-freiburg.de/resources/datasets/HanCo.en.html) or the small [tester data sample](https://lmb.informatik.uni-freiburg.de/data/HanCo/HanCo_tester.zip) (recommended).
27 | 
28 | ### Random Background Images
29 | 
30 | As the hand pose dataset contains green screen images, randomized backgrounds can be used. For our dataset we used 2195 images from Flickr. As these were not all licensed in a permissive manner, we provide a set of background images to use with the dataset.
31 | These can be found [here](https://lmb.informatik.uni-freiburg.de/data/HanCo/HanCo_rnd_backgrounds.zip).
32 | 
33 | 
34 | ### MANO model
35 | 
36 | Our supervised training code uses the MANO Hand model, which you need to aquire seperately due to licensing regulations: https://mano.is.tue.mpg.de
37 | 
38 | In order for our code to work fine copy *MANO_RIGHT.pkl* from the MANO website to *contra-hand/mano_models/MANO_RIGHT.pkl*.
39 | 
40 | We also build on to of the great PyTorch implementation of MANO provided by [Yana Hasson et al.](https://github.com/hassony2/manopth), which was modified by us and is already contained in this repository.
41 | 
42 | 
43 | ### Trained models
44 | 
45 | We release both the MoCo pretrained model and the shape estimation network that was derived from it.
46 | 
47 | In order to get the trained models download and unpack them locally:
48 | 
49 | 
50 |     curl https://lmb.informatik.uni-freiburg.de/data/HanCo/contra-hand-ckpt.zip -o contra-hand-ckpt.zip & unzip contra-hand-ckpt.zip 
51 | 
52 | 
53 | # Code
54 | 
55 | This repository contains scripts that facilitate using the HanCo dataset and building on the results from our publication.
56 | 
57 | ### Show dataset
58 | 
59 | You will need to download the HanCo dataset (or at least the tester).
60 | This script gives you some examples on how to work with the dataset.
61 | 
62 |     python show_dataset.py <Path-To-Your-Local-HanCo-Directory>
63 | 
64 | 
65 | ### Use our MoCo trained model
66 | 
67 | 
68 | There is a simple script that calculates the cosine similarity score for two hard coded examples:
69 | 
70 |     python run_moco_fw.py
71 | 
72 | 
73 | There is the script we used to create the respective figure in our paper.
74 | 
75 |     python run_moco_qualitative_embedding.py
76 | 
77 | ### Self-Supervised Training with MoCo
78 | 
79 | We provide a torch data loader that can be used as a drop-in replacement for MoCo training.
80 | The data loader can be found here `DatasetUnsupervisedMV.py`. It has boolean
81 | options that control how the data is provided, these are `cross_bg`, `cross_camera`, and
82 | `cross_time`. The `get_dataset` function also shows the pre-processing that we use, which is
83 | slightly different from the standard MoCo pre-processing.
84 | 
85 | ### Use our MANO prediction model
86 | 
87 | The following script allows to run inference on an example image:
88 | 
89 |     run_hand_shape_fw.py <Path-To-Your-Local-HanCo-Directory>
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/utils/mano_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | np.cat = np.concatenate
  5 | torch.transpose = lambda x, y: x.permute(y)
  6 | 
  7 | 
  8 | def apply_scaling(theta):
  9 |     poses, shapes, cams = slice_theta(theta)
 10 | 
 11 |     poses_scaled = 1.0 * poses
 12 |     shapes_scaled = 0.5 * shapes
 13 |     root = cams[:, :2]  # estimated root im image coords
 14 |     scale = cams[:, -1:]  # estimated shape scale
 15 | 
 16 |     root = 14.0 * root + 112.0
 17 |     scale = 125.0 * scale + 730.0
 18 |     cams_scaled = torch.cat([root, scale], -1)
 19 | 
 20 |     theta_scaled = torch.cat([poses_scaled, shapes_scaled, cams_scaled], -1)
 21 |     return theta_scaled
 22 | 
 23 | 
 24 | def slice_theta(theta):
 25 |     """ Slice vector of all hand shape parameters into sematically meaningful parts.
 26 |     """
 27 |     return theta[:, :48], theta[:, 48:58], theta[:, -3:]
 28 | 
 29 | 
 30 | def slice_cams(cams):
 31 |     """
 32 |     Returns translation in uv and scale.
 33 |     """
 34 |     return cams[:, :2], cams[:, -1:]
 35 | 
 36 | 
 37 | def project(xyz, K, fw=torch):
 38 |     """ Project points into the camera. """
 39 |     uv = fw.matmul(xyz, fw.transpose(K, [0, 2, 1]))
 40 |     uv = uv[:, :, :2] / uv[:, :, -1:]
 41 |     return uv
 42 | 
 43 | 
 44 | def unproject(points2d, K, z=None, K_is_inv=False, fw=torch):
 45 |     """ Unproject a 2D point of camera K to distance z.
 46 |     """
 47 |     batch = K.shape[0]
 48 |     points2d = fw.reshape(points2d, [batch, -1, 2])
 49 |     points2d_h = fw.cat([points2d, fw.ones_like(points2d[:, :, :1])], -1)  # homogeneous
 50 | 
 51 |     if K_is_inv:
 52 |         K_inv = K
 53 |     else:
 54 |         if fw == torch:
 55 |             K_inv = fw.inverse(K)
 56 |         else:
 57 |             K_inv = fw.linalg.inv(K)
 58 | 
 59 |     points3D = fw.matmul(points2d_h, fw.transpose(K_inv, [0, 2, 1]))  # 3d point corresponding to the estimate image point where the root should go to
 60 |     if z is not None:
 61 |         z = fw.reshape(z, [batch, -1, 1])
 62 |         points3D = points3D * z
 63 |     return points3D
 64 | 
 65 | 
 66 | def trafoPoints(xyz, M, fw=torch):
 67 |     """ Transforms points into another coordinate frame. """
 68 |     xyz_h = fw.cat([xyz, fw.ones_like(xyz[:, :, :1])], 2)
 69 |     xyz_cam = fw.matmul(xyz_h, fw.transpose(M, [0, 2, 1]))
 70 |     xyz_cam = xyz_cam[:, :, :3] / xyz_cam[:, :, -1:]
 71 |     return xyz_cam
 72 | 
 73 | 
 74 | def calc_global_translation(trans_uv, scale, K, fw=torch):
 75 |     """ Calculate global translation from uv position and scale.
 76 |     """
 77 |     scale = fw.reshape(scale, [-1, 1, 1])
 78 |     z = 0.5 * (K[:, :1, :1] + K[:, 1:2, 1:2]) / scale  # calculate root depth from scale
 79 | 
 80 |     # calculate and apply global translation
 81 |     global_t = unproject(trans_uv, K, z, fw=fw)  # unprojection of the estimated mano root using the estimated depth
 82 |     return global_t, z
 83 | 
 84 | 
 85 | def calc_global_translation_from_theta(theta, K, fw=torch):
 86 |     """ Calculate global translation from uv position and scale.
 87 |     """
 88 |     _, _, cams = slice_theta(theta)
 89 |     trans_uv, scale = slice_cams(cams)
 90 |     return calc_global_translation(trans_uv, scale, K, fw=fw)
 91 | 
 92 | 
 93 | def pred_to_mano(theta, K, fw=torch):
 94 |     """ Convert predicted theta into MANO parameters.
 95 |     """
 96 |     poses, shapes, cams = slice_theta(theta)
 97 |     trans_uv, scale = slice_cams(cams)
 98 |     global_t, _ = calc_global_translation(trans_uv, scale, K, fw=fw)
 99 |     return poses, shapes, global_t
100 | 
101 | 
102 | def mano_to_vector(poses, shapes, global_t, K, fw=torch):
103 |     """ Given the semantic parts of the mano shape model, create a parameter vector out of it (which will be estimated by networks)
104 | 
105 |         poses and global_t must already be in the cameras 3D coordinate frame.
106 |     """
107 |     # project 3D point into cam
108 |     trans_uv = project(global_t, K, fw=fw)
109 | 
110 |     # find scale = focal_length / depth
111 |     scale = 0.5*(K[:, 0, 0] + K[:, 1, 1])[:, None] / global_t[:, :, -1]
112 | 
113 |     # assemble cams
114 |     cams = fw.cat([trans_uv[:, 0], scale], -1)
115 | 
116 |     # assemble theta
117 |     theta = fw.cat([poses, shapes, cams], -1)
118 |     return theta
119 | 


--------------------------------------------------------------------------------
/utils/img_util.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | 
  4 | 
  5 | def downsample(img, target_size, K=None, borderValue=0.0):
  6 |     """
  7 |         img, HxWxC image
  8 |         target_size, shape in (height, width)
  9 |         K, camera intrinsic matrix
 10 |     """
 11 |     f_y = float(target_size[0]) / img.shape[0]
 12 |     f_x = float(target_size[1]) / img.shape[1]
 13 | 
 14 |     # how to account for crop in intrinsics
 15 |     M = np.array([[f_x, 0.0, 0.0],
 16 |                   [0.0, f_y, 0.0],
 17 |                   [0.0, 0.0, 1.0]])
 18 | 
 19 |     img_c = cv2.warpAffine(img, M[:2, :],
 20 |                            (target_size[0], target_size[1]),
 21 |                            borderValue=borderValue)
 22 |     if K is None:
 23 |         return img_c
 24 |     K_c = np.matmul(M, K)
 25 |     return img_c, K_c
 26 | 
 27 | 
 28 | def random_crop(img, K=None,
 29 |                 f_trans=0.05,  # percent of the image size
 30 |                 f_scale_min=0.8, f_scale_max=1.0,  # percent of the
 31 |                 target_size=128, borderValue=0.0):
 32 |     center = np.array([img.shape[1], img.shape[0]], dtype=np.float32) / 2.0
 33 |     size = np.array([img.shape[1], img.shape[0]], dtype=np.float32)
 34 | 
 35 |     # random translation
 36 |     f = np.random.rand(2, ) * 2 * f_trans - f_trans
 37 |     trans_uv = f*size
 38 | 
 39 |     # random scaling
 40 |     f = np.random.rand() * (f_scale_max - f_scale_min) + f_scale_min
 41 | 
 42 |     trans_uv -= center*(1.0-f)  # translation of the image center due to scaling
 43 | 
 44 |     f *= 224.0 / target_size
 45 | 
 46 |     # how to account for crop in intrinsics
 47 |     M = np.array([[1.0 / f, 0.0, trans_uv[0] / f],
 48 |                   [0.0, 1.0 / f, trans_uv[1] / f],
 49 |                   [0.0, 0.0, 1.0]])
 50 | 
 51 |     img_c = cv2.warpAffine(img, M[:2, :], (target_size, target_size), borderValue=borderValue)
 52 |     if K is None:
 53 |         return img_c
 54 |     K_c = np.matmul(M, K)
 55 |     return img_c, K_c
 56 | 
 57 | 
 58 | def crop(img, center, size, K=None, target_size=128, borderValue=0.0, scale_values=False):
 59 |     size = np.max(size)*np.ones_like(size)
 60 |     size = (size/2.0).round().astype(np.int32) # this cant be a float
 61 | 
 62 |     # create crop image
 63 |     borderValue = np.array(borderValue).astype(img.dtype)
 64 |     img_crop = borderValue * np.ones((2*size[0], 2*size[1], img.shape[2]),
 65 |                                      dtype=img.dtype)  # after mean subtraction 127.5 will be zero
 66 | 
 67 |     # figure out where we would like to crop (can exceed image dimensions)
 68 |     start_t = (center - size).round().astype(np.int32)
 69 |     end_t = start_t + 2*size
 70 | 
 71 |     # check if there is actually anything to be cropped (sometimes crop is completely out of the image).
 72 |     do_crop = True
 73 | 
 74 |     # sanity check the crop values (sometime the crop is completely outside the image)
 75 |     if np.any(np.logical_or(end_t < 0, start_t > np.array(img.shape[:2]) - 1)):
 76 |         print('WARNING: Crop is completely outside image bounds!', center, img.shape)
 77 |         do_crop = False
 78 | 
 79 |     # check image boundaries: Where can we crop?
 80 |     start = np.maximum(start_t, 0)
 81 |     end = np.minimum(end_t, np.array(img.shape[:2]) - 1)
 82 | 
 83 |     # check discrepancy
 84 |     crop_start = start - start_t
 85 |     crop_end = 2*size - (end_t - end)
 86 | 
 87 |     if do_crop:
 88 |         img_crop[crop_start[0]:crop_end[0], crop_start[1]:crop_end[1], :] = img[start[0]:end[0], start[1]:end[1], :]
 89 |     offset = start - crop_start
 90 | 
 91 |     scale = (end - start) / np.array([target_size, target_size], dtype=np.float32)
 92 |     img_crop = cv2.resize(img_crop, (target_size, target_size))
 93 | 
 94 |     if scale_values:
 95 |         # makes sense if the image is a flow
 96 |         img_crop[:, :, 0] /= scale[1]
 97 |         img_crop[:, :, 1] /= scale[0]
 98 | 
 99 |     if K is not None:
100 |         # how to account for crop in intrinsics
101 |         A = np.array([[1.0/scale[1], 0.0, -offset[1]/scale[1]],
102 |                       [0.0, 1.0/scale[0], -offset[0]/scale[0]],
103 |                       [0.0, 0.0, 1.0]])
104 |         return img_crop, np.matmul(A, K.copy())
105 |     return img_crop
106 | 
107 | 


--------------------------------------------------------------------------------
/utils/rendering.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | import numpy as np
  4 | import transforms3d as t3d
  5 | import pickle
  6 | from manopth.manolayer import ManoLayer
  7 | 
  8 | from pytorch3d.io import load_objs_as_meshes
  9 | from pytorch3d.structures import Meshes
 10 | from pytorch3d.renderer import (
 11 |     PerspectiveCameras,
 12 |     PointLights,
 13 |     RasterizationSettings,
 14 |     MeshRenderer,
 15 |     MeshRasterizer,
 16 |     HardGouraudShader,
 17 |     SoftPhongShader,
 18 |     TexturesVertex,
 19 |     TexturesUV,
 20 |     BlendParams
 21 | )
 22 | 
 23 | 
 24 | def render_verts_faces(verts, faces,
 25 |                        K, M_obj2cam, img_shape,
 26 |                        verts_color=None, device='cuda',
 27 |                        segmentation=True, render_size=200):
 28 |     if verts_color is None:
 29 |         verts_color = np.array([205/255., 205/255., 205/255.], dtype=np.float32)
 30 |     verts_color = torch.Tensor(verts_color).to(device)
 31 | 
 32 |     # Load obj file
 33 |     verts_feat = torch.ones_like(verts) * verts_color
 34 |     verts, faces = verts.to(device), faces.to(device)
 35 |     verts_feat = verts_feat.to(device)
 36 |     tex = TexturesVertex(verts_features=verts_feat)
 37 |     mesh = Meshes(verts.to(device),
 38 |                   faces.to(device),
 39 |                   tex)
 40 | 
 41 |     # Convert coordinate frames: pytorch3d X left, Y up --> CV X right, Y down
 42 |     # Rotate 180deg around z axis
 43 |     M_corr = np.eye(4)
 44 |     M_corr[:3, :3] = t3d.euler.euler2mat(0.0, .0, np.pi)
 45 |     M_obj2cam = np.matmul(M_corr, M_obj2cam)
 46 | 
 47 |     # setup camera
 48 |     focal = np.stack([K[:, 0, 0], K[:, 1, 1]], -1)
 49 |     pp = np.stack([K[:, 0, 2], K[:, 1, 2]], -1)
 50 |     img_shape = np.stack([img_shape[:, 1], img_shape[:, 0]], -1)
 51 |     R = np.transpose(M_obj2cam[:, :3, :3], [0, 2, 1])
 52 |     T = M_obj2cam[:, :3, 3]
 53 |     cameras = PerspectiveCameras(focal_length=focal,
 54 |                                  principal_point=pp,
 55 |                                  R=R,
 56 |                                  T=T,
 57 |                                  image_size=img_shape,
 58 |                                  device=device)
 59 | 
 60 |     raster_settings = RasterizationSettings(
 61 |         image_size=render_size,
 62 |         blur_radius=0.0,
 63 |         faces_per_pixel=1,
 64 |     )
 65 | 
 66 |     rasterizer = MeshRasterizer(
 67 |         cameras=cameras,
 68 |         raster_settings=raster_settings
 69 |     )
 70 | 
 71 |     if segmentation:
 72 |         lights = PointLights(location=((1, 1, 0), ),
 73 |                              ambient_color=((1.0, 1.0, 1.0),),
 74 |                              diffuse_color=((0.0, 0.0, 0.0),),
 75 |                              specular_color=((0.1, 0.1, 0.1),),
 76 |                              device=device)
 77 | 
 78 |         shader = HardGouraudShader(
 79 |                 device=device,
 80 |                 cameras=cameras,
 81 |                 lights=lights,
 82 |                 blend_params=BlendParams(background_color=(.0, .0, .0))
 83 |         )
 84 |     else:
 85 | 
 86 |         d = 0.3 # diffuse
 87 |         a = 1.0-d  # ambient
 88 |         lights = PointLights(location=((1, 1, 0),),
 89 |                              diffuse_color=((d, d, d),),
 90 |                              ambient_color=((a, a, a),),
 91 |                              specular_color=((0.1, 0.1, 0.1),),
 92 |                              device=device)
 93 | 
 94 |         shader = SoftPhongShader(
 95 |             device=device,
 96 |             cameras=cameras,
 97 |             lights=lights,
 98 |             blend_params=BlendParams(background_color=((.0, .0, .0),))
 99 |         )
100 | 
101 |     renderer = MeshRenderer(
102 |         rasterizer=rasterizer,
103 |         shader=shader
104 |     )
105 | 
106 |     fragments = rasterizer(mesh)
107 | 
108 |     image = renderer(mesh)
109 |     im_out, dep_out = list(), list()
110 |     for i, (w, h) in enumerate(img_shape):
111 |         im_out.append(
112 |             F.interpolate(
113 |                 image[i:i+1, :, :, :3].permute([0, 3, 1, 2]),
114 |                 (h, w))
115 |         )
116 |         dep_out.append(
117 |             F.interpolate(
118 |                 fragments.zbuf[i:i+1, :, :, :1].permute([0, 3, 1, 2]),
119 |                 (h, w))
120 |         )
121 |     return im_out, dep_out
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/run_hand_shape_fw.py:
--------------------------------------------------------------------------------
  1 | """ Run forward pass on trained hand shape estimation network. """
  2 | import matplotlib
  3 | matplotlib.use('Agg')
  4 | from collections import defaultdict
  5 | import argparse
  6 | import torch
  7 | import torch.nn as nn
  8 | import glob
  9 | import numpy as np
 10 | import cv2
 11 | import time, os, json
 12 | import matplotlib.pyplot as plt
 13 | from tqdm import tqdm
 14 | 
 15 | from utils.plot_util import draw_hand
 16 | from utils.rendering import render_verts_faces
 17 | from manopth.manolayer import ManoLayer
 18 | from nets.ResNet import resnet50
 19 | 
 20 | 
 21 | from utils.img_util import downsample
 22 | from utils.mano_utils import apply_scaling, pred_to_mano, project
 23 | from utils.general import load_ckpt, json_load
 24 | 
 25 | 
 26 | class ManoPredictor(nn.Module):
 27 |     def __init__(self):
 28 |         super().__init__()
 29 |         self.model = resnet50(pretrained=False, head_type='mano')
 30 |         self.mano = ManoLayer(use_pca=False, ncomps=45, flat_hand_mean=False, center_idx=9)
 31 | 
 32 |     def forward(self, image_np, K_np, device='cpu'):
 33 |         assert image_np.shape == (224, 224, 3), 'Image shape mismatch.'
 34 |         img = np.transpose(image_np[:, :, ::-1], [2, 0, 1]).astype(np.float32) / 255.0 - 0.5
 35 |         img = np.expand_dims(img, 0)
 36 |         theta_p = self.model(
 37 |                 torch.Tensor(img).to(device)
 38 |             )
 39 |         theta_p = apply_scaling(theta_p)
 40 |         poses, shapes, global_t = pred_to_mano(theta_p,
 41 |                                                torch.Tensor(K_np[None]).to(device)
 42 |                                                )
 43 |         verts_p, xyz_p = self.mano(poses, shapes, global_t)
 44 |         uv_p = project(xyz_p, torch.Tensor(K_np[None]).to(device))
 45 | 
 46 |         verts_p_np = verts_p.detach().cpu().numpy()[0]
 47 |         xyz_p_np = xyz_p.detach().cpu().numpy()[0]
 48 |         uv_p_np = uv_p.detach().cpu().numpy()[0]
 49 | 
 50 |         img_shape = np.array([[image_np.shape[0], image_np.shape[1]]])
 51 |         mask_p, _  = render_verts_faces(verts_p,
 52 |                                       self.mano.th_faces[None],
 53 |                                       K_np[None], np.eye(4)[None], img_shape)
 54 |         mask_np = mask_p[0].detach().cpu().numpy()[0].transpose([1, 2, 0])
 55 |         return verts_p_np, xyz_p_np, uv_p_np, mask_np
 56 | 
 57 | 
 58 | def main():
 59 |     parser = argparse.ArgumentParser()
 60 |     parser.add_argument('hanco_path', type=str, help='Path to where HanCo dataset is stored.')
 61 |     parser.add_argument('--sid', type=int, help='Sequence ID.', default=110)
 62 |     parser.add_argument('--cid', type=int, help='Camera ID.', default=3)
 63 |     parser.add_argument('--fid', type=int, help='Frame ID.', default=0)
 64 |     args = parser.parse_args()
 65 | 
 66 |     assert os.path.exists(args.hanco_path), 'Path to HanCo not found.'
 67 |     assert os.path.isdir(args.hanco_path), 'Path to HanCo doesnt seem to be a directory.'
 68 | 
 69 | 
 70 |     img_path = os.path.join(args.hanco_path, f'rgb/{args.sid:04d}/cam{args.cid}/{args.fid:08d}.jpg')
 71 |     calib_path = os.path.join(args.hanco_path, f'calib/{args.sid:04d}/{args.fid:08d}.json')
 72 | 
 73 |     assert os.path.exists(img_path), f'Image not found: {img_path}'
 74 |     assert os.path.exists(calib_path), f'Calibration not found: {calib_path}'
 75 | 
 76 |     img = cv2.imread(img_path)
 77 |     K = np.array(json_load(calib_path)['K'][3])
 78 | 
 79 |     # Load network
 80 |     model = ManoPredictor()
 81 |     state_dict = torch.load('ckpt/model_mano.pth')
 82 |     model.load_state_dict(state_dict, strict=False)
 83 |     model.cuda()
 84 |     model.eval()
 85 | 
 86 |     # forward pass
 87 |     with torch.no_grad():
 88 |         verts_xyz_p, joints_xyz_p, joints_uv_p, mask_p = model.forward(img, K, 'cuda')
 89 | 
 90 |         # vis rgb image with predicted skeleton
 91 |         img_vis = draw_hand(img.copy(), joints_uv_p, kp_style=(2, 1), order='uv', img_order='bgr')
 92 | 
 93 |         # vis rendered mask with predicted skeleton
 94 |         mask_p = np.clip(mask_p*255, 0, 255).astype(np.uint8)
 95 |         mask_vis = draw_hand(mask_p.copy(), joints_uv_p, kp_style=(2, 1), order='uv', img_order='bgr')
 96 | 
 97 |     fig, ax = plt.subplots(1, 2)
 98 |     ax[0].imshow(img_vis[:, :, ::-1]), ax[0].set_title('rgb+pred skel')
 99 |     ax[1].imshow(mask_vis[:, :, ::-1]), ax[1].set_title('pred shape+skel')
100 |     plt.show()
101 | 
102 | if __name__ == '__main__':
103 |     main()
104 | 


--------------------------------------------------------------------------------
/utils/plot_util.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, unicode_literals
  2 | import numpy as np
  3 | import cv2
  4 | 
  5 | 
  6 | def draw_hand(image, coords_hw, vis=None, color_fixed=None, linewidth=3, order='hw', img_order='rgb',
  7 |               draw_kp=True, kp_style=None):
  8 |     """ Inpaints a hand stick figure into a matplotlib figure. """
  9 |     if kp_style is None:
 10 |         kp_style = (5, 3)
 11 | 
 12 |     image = np.squeeze(image)
 13 |     if len(image.shape) == 2:
 14 |         image = np.expand_dims(image, 2)
 15 |     s = image.shape
 16 |     assert len(s) == 3, "This only works for single images."
 17 | 
 18 |     convert_to_uint8 = False
 19 |     if s[2] == 1:
 20 |         # grayscale case
 21 |         image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-4)
 22 |         image = np.tile(image, [1, 1, 3])
 23 |         pass
 24 |     elif s[2] == 3:
 25 |         # RGB case
 26 |         if image.dtype == np.uint8:
 27 |             convert_to_uint8 = True
 28 |             image = image.astype('float32') / 255.0
 29 |         elif image.dtype == np.float32:
 30 |             # convert to gray image
 31 |             image = np.mean(image, axis=2)
 32 |             image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-4)
 33 |             image = np.expand_dims(image, 2)
 34 |             image = np.tile(image, [1, 1, 3])
 35 |     else:
 36 |         assert 0, "Unknown image dimensions."
 37 | 
 38 |     if order == 'uv':
 39 |         coords_hw = coords_hw[:, ::-1]
 40 | 
 41 |     colors = np.array([[0.4, 0.4, 0.4],
 42 |                        [0.4, 0.0, 0.0],
 43 |                        [0.6, 0.0, 0.0],
 44 |                        [0.8, 0.0, 0.0],
 45 |                        [1.0, 0.0, 0.0],
 46 |                        [0.4, 0.4, 0.0],
 47 |                        [0.6, 0.6, 0.0],
 48 |                        [0.8, 0.8, 0.0],
 49 |                        [1.0, 1.0, 0.0],
 50 |                        [0.0, 0.4, 0.2],
 51 |                        [0.0, 0.6, 0.3],
 52 |                        [0.0, 0.8, 0.4],
 53 |                        [0.0, 1.0, 0.5],
 54 |                        [0.0, 0.2, 0.4],
 55 |                        [0.0, 0.3, 0.6],
 56 |                        [0.0, 0.4, 0.8],
 57 |                        [0.0, 0.5, 1.0],
 58 |                        [0.4, 0.0, 0.4],
 59 |                        [0.6, 0.0, 0.6],
 60 |                        [0.7, 0.0, 0.8],
 61 |                        [1.0, 0.0, 1.0]])
 62 | 
 63 |     if img_order == 'rgb':
 64 |         colors = colors[:, ::-1]
 65 | 
 66 |     # define connections and colors of the bones
 67 |     bones = [((0, 1), colors[1, :]),
 68 |              ((1, 2), colors[2, :]),
 69 |              ((2, 3), colors[3, :]),
 70 |              ((3, 4), colors[4, :]),
 71 | 
 72 |              ((0, 5), colors[5, :]),
 73 |              ((5, 6), colors[6, :]),
 74 |              ((6, 7), colors[7, :]),
 75 |              ((7, 8), colors[8, :]),
 76 | 
 77 |              ((0, 9), colors[9, :]),
 78 |              ((9, 10), colors[10, :]),
 79 |              ((10, 11), colors[11, :]),
 80 |              ((11, 12), colors[12, :]),
 81 | 
 82 |              ((0, 13), colors[13, :]),
 83 |              ((13, 14), colors[14, :]),
 84 |              ((14, 15), colors[15, :]),
 85 |              ((15, 16), colors[16, :]),
 86 | 
 87 |              ((0, 17), colors[17, :]),
 88 |              ((17, 18), colors[18, :]),
 89 |              ((18, 19), colors[19, :]),
 90 |              ((19, 20), colors[20, :])]
 91 | 
 92 |     color_map = {'k': np.array([0.0, 0.0, 0.0]),
 93 |                  'w': np.array([1.0, 1.0, 1.0]),
 94 |                  'b': np.array([0.0, 0.0, 1.0]),
 95 |                  'g': np.array([0.0, 1.0, 0.0]),
 96 |                  'r': np.array([1.0, 0.0, 0.0]),
 97 |                  'm': np.array([1.0, 1.0, 0.0]),
 98 |                  'c': np.array([0.0, 1.0, 1.0])}
 99 | 
100 |     if vis is None:
101 |         vis = np.ones_like(coords_hw[:, 0]) == 1.0
102 | 
103 |     for connection, color in bones:
104 |         if (vis[connection[0]] == False) or (vis[connection[1]] == False):
105 |             continue
106 | 
107 |         coord1 = coords_hw[connection[0], :].astype(np.int32)
108 |         coord2 = coords_hw[connection[1], :].astype(np.int32)
109 | 
110 |         if (coord1[0] < 1) or (coord1[0] >= s[0]) or (coord1[1] < 1) or (coord1[1] >= s[1]):
111 |             continue
112 |         if (coord2[0] < 1) or (coord2[0] >= s[0]) or (coord2[1] < 1) or (coord2[1] >= s[1]):
113 |             continue
114 | 
115 |         if color_fixed is None:
116 |             cv2.line(image, (coord1[1], coord1[0]), (coord2[1], coord2[0]), color, thickness=linewidth)
117 |         else:
118 |             c = color_map.get(color_fixed, np.array([1.0, 1.0, 1.0]))
119 |             cv2.line(image, (coord1[1], coord1[0]), (coord2[1], coord2[0]), c, thickness=linewidth)
120 | 
121 |     if draw_kp:
122 |         coords_hw = coords_hw.astype(np.int32)
123 |         for i in range(21):
124 |             if vis[i]:
125 |                 # cv2.circle(img, center, radius, color, thickness)
126 |                 image = cv2.circle(image, (coords_hw[i, 1], coords_hw[i, 0]),
127 |                                    radius=kp_style[0], color=colors[i, :], thickness=kp_style[1])
128 | 
129 |     if convert_to_uint8:
130 |         image = (image * 255).astype('uint8')
131 | 
132 |     return image
133 | 
134 | 


--------------------------------------------------------------------------------
/show_dataset.py:
--------------------------------------------------------------------------------
  1 | """ Iterate HanCo dataset and show how to work with data. """
  2 | import os, argparse, json
  3 | import numpy as np
  4 | import cv2
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | 
  8 | from utils.plot_util import draw_hand
  9 | 
 10 | def example_meta_data(args):
 11 |     meta_file = os.path.join(args.hanco_path, 'meta.json')
 12 |     with open(meta_file, 'r') as fi: 
 13 |         meta_data = json.load(fi)
 14 |     print(type(meta_data))  # Its a dict
 15 |     print(meta_data.keys())  # Its keys are: 'is_train', 'subject_id', 'is_valid', 'object_id', 'has_fit'
 16 | 
 17 |     for k, v in meta_data.items():
 18 |         print(k, type(v), len(v), v[0][:3], v[-1][:3])  # these are all lists of length 1518 (= one entry for each sequence), each entry is another list representing the frames of the sequence
 19 | 
 20 |     # is_train: bool, True if recorded with green screen background
 21 |     # subject_id: int, Unique identifier for the human performer
 22 |     # is_valid: bool, True if there is a validated MANO shape fit
 23 |     # object_id: int, Unique identifier for the object used. None for sequences w/o object interaction
 24 |     # has_fit: bool, True if there is a MANO shape fit. Potentially, not validated
 25 | 
 26 | 
 27 | def example_show_data(args, sid):
 28 |     """
 29 |         sid: Sequence id: int, in [0, 1517]
 30 |     """
 31 |     meta_file = os.path.join(args.hanco_path, 'meta.json')
 32 |     with open(meta_file, 'r') as fi: 
 33 |         meta_data = json.load(fi) 
 34 |             
 35 |     print(f"\nShowing sequence {sid} with {len(meta_data['is_train'][sid])} frames.")
 36 |     
 37 |     # iterate frames of this sequence
 38 |     for fid in range(len(meta_data['is_train'])):
 39 |         print(f"fid={fid},\n"
 40 |               f"is_train={meta_data['is_train'][sid][fid]},\n"
 41 |               f"subject_id={meta_data['subject_id'][sid][fid]},\n"
 42 |               f"is_valid={meta_data['is_valid'][sid][fid]},\n"
 43 |               f"object_id={meta_data['object_id'][sid][fid]},\n"
 44 |               f"has_fit={meta_data['has_fit'][sid][fid]}")
 45 |         rgb_list = list()
 46 |         for cid in range(8): # iterate cameras
 47 |             rgb_path = os.path.join(args.hanco_path, f'rgb/{sid:04d}/cam{cid}/{fid:08d}.jpg')
 48 |             rgb_list.append(
 49 |                 cv2.imread(rgb_path)[:, :, ::-1]
 50 |             )
 51 |     
 52 |         # show
 53 |         fig, ax = plt.subplots(1, 8)
 54 |         for j, img in enumerate(rgb_list):
 55 |             ax[j].imshow(img)
 56 |             ax[j].set_xticks([], [])
 57 |             ax[j].set_yticks([], [])
 58 |         plt.show()
 59 | 
 60 |         if fid > 3:
 61 |             # we deliberately stop showing after some samples
 62 |             break
 63 | 
 64 | def example_show_keypoints(args, sid, fid, cid):
 65 |     # load image
 66 |     image_file = os.path.join(args.hanco_path, f'rgb/{sid:04d}/cam{cid}/{fid:08d}.jpg')
 67 |     img = cv2.imread(image_file)[:, :, ::-1]
 68 | 
 69 |     # load keypoints
 70 |     kp_data_file = os.path.join(args.hanco_path, f'xyz/{sid:04d}/{fid:08d}.json')
 71 |     with open(kp_data_file, 'r') as fi:
 72 |         kp_xyz = np.array(json.load(fi))
 73 |     print('kp_xyz', kp_xyz.shape, kp_xyz.dtype)  # 21x3, np.float64, world coordinates
 74 | 
 75 |     # load calibration
 76 |     calib_file = os.path.join(args.hanco_path, f'calib/{sid:04d}/{fid:08d}.json')
 77 |     with open(calib_file, 'r') as fi:
 78 |         calib = json.load(fi)
 79 | 
 80 |     # project points
 81 |     M_w2cam = np.array(calib['M'])[cid]
 82 |     K = np.array(calib['K'])[cid]
 83 |     kp_xyz_cam = np.matmul(kp_xyz, M_w2cam[:3, :3].T) + M_w2cam[:3, 3][None]  # in camera coordinates
 84 |     kp_xyz_cam = kp_xyz_cam / kp_xyz_cam[:, -1:]
 85 |     kp_uv = np.matmul(kp_xyz_cam, K.T)
 86 |     kp_uv = kp_uv[:, :2] / kp_uv[:, -1:]
 87 | 
 88 |     # show
 89 |     img = draw_hand(img, kp_uv, order='uv', img_order='rgb')
 90 | 
 91 |     fig = plt.figure()
 92 |     ax = fig.add_subplot(111)
 93 |     ax.imshow(img)
 94 |     plt.show()
 95 | 
 96 | 
 97 | def example_show_shape(args, sid, fid, cid):
 98 |     import torch
 99 |     from manopth.manolayer import ManoLayer
100 |     from utils.mano_utils import pred_to_mano, project, trafoPoints
101 |     from utils.rendering import render_verts_faces
102 | 
103 |     # load image
104 |     image_file = os.path.join(args.hanco_path, f'rgb/{sid:04d}/cam{cid}/{fid:08d}.jpg')
105 |     img = cv2.imread(image_file)[:, :, ::-1]
106 | 
107 |     # load calibration
108 |     calib_file = os.path.join(args.hanco_path, f'calib/{sid:04d}/{fid:08d}.json')
109 |     with open(calib_file, 'r') as fi: 
110 |         calib = json.load(fi)
111 | 
112 |     # load shape in world space
113 |     kp_data_file = os.path.join(args.hanco_path, f'shape/{sid:04d}/{fid:08d}.json')
114 |     with open(kp_data_file, 'r') as fi: 
115 |         mano_w = json.load(fi)
116 |     for k, v in mano_w.items():
117 |         print(k, np.array(v).shape) # a dict of pose, shape and global_t
118 | 
119 |     # load shape in camera space
120 |     kp_data_file = os.path.join(args.hanco_path, f'shape/{sid:04d}/cam{cid}/{fid:08d}.json')
121 |     with open(kp_data_file, 'r') as fi: 
122 |         mano_cam = np.array(json.load(fi))[None]
123 |     print('mano_vec', mano_cam.shape) # parameter vector
124 |     pose_cam, shape_cam, global_t_cam = pred_to_mano(mano_cam, np.array(calib['K'])[cid][None], fw=np)
125 | 
126 |     # render shape masks
127 |     def render_hand(poses, shapes, global_t, img_shape, K, M=None, center_idx=None):
128 |         if M is None:
129 |             M = np.eye(4)
130 | 
131 |         mano = ManoLayer(use_pca=False, ncomps=45, flat_hand_mean=False, center_idx=center_idx)
132 | 
133 |         verts, xyz = mano(poses, shapes, global_t)
134 |         uv = project(trafoPoints(xyz, torch.Tensor(M)[None]), torch.Tensor(K)[None])
135 |         mask, _  = render_verts_faces(verts,
136 |                                       mano.th_faces[None],
137 |                                       K[None], M[None], img_shape[None], device='cpu')
138 | 
139 | 
140 |         mask = mask[0].detach().cpu().numpy()[0]
141 |         uv = uv.detach().cpu().numpy()[0]
142 |         return mask, uv
143 | 
144 |     mask1, uv1 = render_hand(torch.Tensor(mano_w['poses']), 
145 |                              torch.Tensor(mano_w['shapes']), 
146 |                              torch.Tensor(mano_w['global_t']),
147 |                              np.array(img.shape[:2]),
148 |                              np.array(calib['K'][cid]),
149 |                              np.array(calib['M'][cid]))
150 | 
151 |     mask2, uv2 = render_hand(torch.Tensor(pose_cam), 
152 |                              torch.Tensor(shape_cam), 
153 |                              torch.Tensor(global_t_cam),
154 |                              np.array(img.shape[:2]),
155 |                              np.array(calib['K'][cid]),
156 |                              center_idx=9)
157 | 
158 |     # show
159 |     img1 = draw_hand(img, uv1, order='uv', img_order='rgb')
160 |     img2 = draw_hand(img, uv2, order='uv', img_order='rgb')
161 | 
162 |     fig = plt.figure()
163 |     ax1 = fig.add_subplot(121)
164 |     ax2 = fig.add_subplot(122)
165 |     ax1.imshow(img1)
166 |     ax1.imshow(mask1[0, :, :], alpha=0.5)
167 |     ax2.imshow(img2)
168 |     ax2.imshow(mask2[0, :, :], alpha=0.5)
169 |     plt.show()
170 | 
171 | 
172 | if __name__ == '__main__':
173 |     parser = argparse.ArgumentParser()
174 |     parser.add_argument('hanco_path', type=str, help='Path to where HanCo dataset is stored.')
175 |     args = parser.parse_args()
176 | 
177 |     assert os.path.exists(args.hanco_path), 'Path to HanCo not found.'
178 |     assert os.path.isdir(args.hanco_path), 'Path to HanCo doesnt seem to be a directory.'
179 | 
180 | 
181 |     # Example1: Meta data
182 |     example_meta_data(args)
183 | 
184 |     # Example2: Read/Show all images of one sequence
185 |     example_show_data(args, 110)
186 | 
187 |     # Example3: Show keypoints, calibration, camera projection
188 |     example_show_keypoints(args, sid=110, fid=24, cid=3)
189 | 
190 |     # Example4: Render MANO shape, show 
191 |     example_show_shape(args, sid=110, fid=24, cid=3)
192 | 


--------------------------------------------------------------------------------
/DatasetUnsupervisedMV.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | import numpy as np
  4 | from PIL import Image
  5 | import matplotlib.pyplot as plt
  6 | import torch
  7 | from torch.utils.data import Dataset
  8 | import torchvision.transforms as transforms
  9 | from scipy.ndimage.morphology import binary_erosion
 10 | from utils.general import get_dataset_path, json_load
 11 |  
 12 | 
 13 | def mix(fg_img, mask_fg, bg_img, do_smoothing, do_erosion):
 14 |     """ Mix fg and bg image. Keep the fg where mask_fg is True. """
 15 |     assert bg_img.shape == fg_img.shape
 16 |     fg_img = fg_img.copy()
 17 |     mask_fg = mask_fg.copy()
 18 |     bg_img = bg_img.copy()
 19 | 
 20 |     if len(mask_fg.shape) == 2:
 21 |         mask_fg = np.expand_dims(mask_fg, -1)
 22 | 
 23 |     if do_erosion:
 24 |         mask_fg = binary_erosion(mask_fg, structure=np.ones((5, 5, 1)) )
 25 | 
 26 |     mask_fg = mask_fg.astype(np.float32)
 27 | 
 28 |     if do_smoothing:
 29 |         mask_fg = gaussian_filter(mask_fg, sigma=0.5)
 30 | 
 31 |     merged = (mask_fg * fg_img + (1.0 - mask_fg) * bg_img).astype(np.uint8)
 32 |     return merged
 33 | 
 34 | 
 35 | class DatasetUnsupervisedMultiview(Dataset):
 36 |     def __init__(self,  root=None, transform=None, cross_camera=False,
 37 |                  cross_time=False, cross_bg=False):
 38 |         print("Starting to load multiview data.")
 39 |         if root is None:
 40 |             self.base_path = get_dataset_path()
 41 |         else:
 42 |             self.base_path = root
 43 |         self.cross_camera = cross_camera
 44 |         self.cross_time = cross_time
 45 |         self.cross_bg = cross_bg
 46 | 
 47 |         self.subsets = ['gs', 'merged', 'homo', 'color_auto']  # 'color_sample']
 48 |         #self.subsets = ['gs', ]
 49 | 
 50 |         if self.cross_bg:
 51 |             self.subsets = ['mask_hand']
 52 | 
 53 |         self.camsets = {
 54 |             # neighboring  # opposing
 55 |             0: [1, 4, 7, 0],  #  [3]
 56 |             1: [0, 2, 6, 1],  #  [5]
 57 |             2: [1, 3, 4, 2],  #  [7]
 58 |             3: [2, 5, 6, 3],  #  [0]
 59 |             4: [0, 2, 5, 4],  #  [6]
 60 |             5: [3, 4, 7, 5],  #  [1]
 61 |             6: [1, 3, 7, 6],  #  [4]
 62 |             7: [0, 5, 6, 7],  #  [2]
 63 |         }  # for each cam which cams are considered good partners
 64 | 
 65 |         self.timeset = (-1, 0, 1)
 66 |         # load meta info file
 67 |         self.meta_info = json_load(os.path.join(self.base_path, 'meta.json'))
 68 |         self.dataset = json_load(os.path.join(self.base_path, 'index_mv_unsup_weak.json'))
 69 | 
 70 |         random.shuffle(self.dataset)
 71 |         self.size = len(self.dataset)
 72 | 
 73 |         print("Using dataset: ", self.base_path)
 74 |         print("cross_camera", cross_camera, "size", len(self.camsets[0]))
 75 |         print("cross_time", cross_time, "size", len(self.timeset))
 76 |         print("cross_bg", cross_bg)
 77 |         print('Sampling from subsets', self.subsets)
 78 |         print('Sampling from %d time steps' % self.size)
 79 | 
 80 |         assert transform is not None
 81 |         #assert not isinstance(transform, moco_loader.TwoCropsTransform)
 82 |         self.transform = transform
 83 | 
 84 | 
 85 |     def __len__(self):
 86 |         return self.size * 8
 87 | 
 88 |     def __getitem__(self, idx):
 89 |         sid, fid, K_list, M_list = self.dataset[idx % self.size]
 90 |         # roll for a random camera
 91 |         cid1 = random.randint(0, 7)
 92 | 
 93 |         if self.cross_camera:
 94 |             cid2 = random.choice(self.camsets[cid1])
 95 |         else:
 96 |             cid2 = cid1
 97 | 
 98 |         fid1 = fid
 99 |         if self.cross_time:
100 |             s_max = len(self.meta_info['is_train'][sid])-1
101 |             fid2 = min(max(0, fid + random.choice(self.timeset)), s_max)
102 |         else:
103 |             fid2 = fid
104 | 
105 |         if self.meta_info['is_train'][sid][fid]:
106 |             subset1 = random.choice(self.subsets)
107 |             subset2 = random.choice(self.subsets)
108 |         else:
109 |             subset1 = 'test'
110 |             subset2 = 'test'
111 | 
112 |         try:
113 |             # read the frame
114 |             sample1 = self.read(sid, fid1, cid1, subset1)
115 |             sample2 = self.read(sid, fid2, cid2, subset2)
116 | 
117 |             if self.transform is not None:
118 |                 sample1 = self.transform(sample1)
119 |                 sample2 = self.transform(sample2)
120 |             return (sample1, sample2), 0
121 |         except FileNotFoundError as e:
122 |             # print(e)
123 |             return self.__getitem__(idx)
124 | 
125 | 
126 |     def read(self, sid, fid, cid, subset):
127 |         if subset == 'mask_hand':
128 |             return self.read_rnd_background(sid, fid, cid, subset)
129 | 
130 |         if subset == 'gs' or subset == 'test':
131 |             img_path = 'rgb/%04d/cam%d/%08d.jpg' % (sid, cid, fid)
132 |         else:
133 |             img_path = 'rgb_%s/%04d/cam%d/%08d.jpg' % (subset, sid, cid, fid)
134 | 
135 |         # read samples
136 |         path = os.path.join(self.base_path, img_path)
137 |         with open(path, 'rb') as f:
138 |             img = Image.open(f)
139 |             return img.convert('RGB')
140 | 
141 | 
142 |     def read_rnd_background(self, sid, fid, cid, subset):
143 |         # sample rnd background
144 |         base_path = '/misc/lmbraid18/zimmermc/'
145 |         rid = random.randint(0, 1230)
146 |         bg_image_new_path = os.path.join(base_path, 'background_subtraction/background_examples/bg_new/%05d.jpg' % rid)
147 |         bg_img_new = Image.open(bg_image_new_path)
148 | 
149 |         mask_path = 'mask_hand/%04d/cam%d/%08d.jpg' % (sid, cid, fid)
150 |         mask_path = os.path.join(self.base_path, mask_path)
151 |         mask_fg = Image.open(mask_path)
152 | 
153 |         img_path = 'rgb/%04d/cam%d/%08d.jpg' % (sid, cid, fid)
154 |         img_path = os.path.join(self.base_path, img_path)
155 |         fg_img = Image.open(img_path)
156 | 
157 | 
158 |         bg_img_new = np.asarray(bg_img_new.resize(fg_img.size))
159 |         fg_img = np.asarray(fg_img)
160 |         mask_fg = (np.asarray(mask_fg) / 255.)[:, :, None]
161 | 
162 |         merged = mix(fg_img, mask_fg, bg_img_new, do_smoothing=True, do_erosion=True)
163 | 
164 |         return Image.fromarray(merged)
165 | 
166 | 
167 | def get_dataset(batch_size):
168 |     normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5],
169 |                                      std=[1.0, 1.0, 1.0])
170 | 
171 |     img_size = 112  # running with 224 resolution did not improve results
172 |     print("Warning: Un-comment augmentations for training")
173 | 
174 |     # these are the agumentations as we use for our moco pre-training
175 |     # please un-comment the gaussian blue and normalization before training
176 |     augmentation = [
177 |         transforms.RandomAffine(10),
178 |         transforms.RandomResizedCrop(img_size, scale=(0.2, 1.)),
179 |         transforms.RandomApply([
180 |             transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)  # not strengthened
181 |         ], p=0.8),
182 |         transforms.RandomGrayscale(p=0.2),
183 |         #transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.5),
184 |         transforms.RandomHorizontalFlip(),
185 |         transforms.ToTensor(),
186 |         #normalize
187 |     ]
188 | 
189 |     dataset = DatasetUnsupervisedMultiview(None, transforms.Compose(augmentation),
190 |                                            cross_camera=False,
191 |                                            cross_time=False,
192 |                                            cross_bg=False)
193 | 
194 |     return torch.utils.data.DataLoader(dataset,
195 |                                        batch_size=batch_size,
196 |                                        shuffle=True,
197 |                                        num_workers=8)
198 | 
199 | 
200 | if __name__ == '__main__':
201 |     batch_size = 3
202 |     d = get_dataset(batch_size)
203 |     
204 |     for sample in d:
205 |         data, label = sample
206 |         for i in range(batch_size):
207 |             img = data[0][i].numpy().transpose(1, 2, 0)
208 |             img_aug = data[1][i].numpy().transpose(1, 2, 0)
209 | 
210 |             fig, ax = plt.subplots(1,2)
211 |             ax[0].imshow(img)
212 |             ax[1].imshow(img_aug)
213 |             plt.show()
214 | 


--------------------------------------------------------------------------------
/nets/ResNet.py:
--------------------------------------------------------------------------------
  1 | """ From: https://github.com/chenxi116/DeepLabv3.pytorch/blob/046818d755f91169dbad141362b98178dd685447/deeplab.py """
  2 | import torch
  3 | import torch.nn as nn
  4 | import math
  5 | import numpy as np
  6 | import torch.utils.model_zoo as model_zoo
  7 | from torch.nn import functional as F
  8 | 
  9 | 
 10 | model_urls = {
 11 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 12 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 13 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 14 | }
 15 | 
 16 | class Conv2d(nn.Conv2d):
 17 | 
 18 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
 19 |                  padding=0, dilation=1, groups=1, bias=True):
 20 |         super(Conv2d, self).__init__(in_channels, out_channels, kernel_size, stride,
 21 |                  padding, dilation, groups, bias)
 22 | 
 23 |     def forward(self, x):
 24 |         # return super(Conv2d, self).forward(x)
 25 |         weight = self.weight
 26 |         weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2,
 27 |                                   keepdim=True).mean(dim=3, keepdim=True)
 28 |         weight = weight - weight_mean
 29 |         std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5
 30 |         weight = weight / std.expand_as(weight)
 31 |         return F.conv2d(x, weight, self.bias, self.stride,
 32 |                         self.padding, self.dilation, self.groups)
 33 | 
 34 | 
 35 | class Bottleneck(nn.Module):
 36 |     expansion = 4
 37 |     def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1, conv=None, norm=None):
 38 |         super(Bottleneck, self).__init__()
 39 |         self.conv1 = conv(inplanes, planes, kernel_size=1, bias=False)
 40 |         self.bn1 = norm(planes)
 41 |         self.conv2 = conv(planes, planes, kernel_size=3, stride=stride,
 42 |                                dilation=dilation, padding=dilation, bias=False)
 43 |         self.bn2 = norm(planes)
 44 |         self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, bias=False)
 45 |         self.bn3 = norm(planes * self.expansion)
 46 |         self.relu = nn.ReLU(inplace=True)
 47 |         self.downsample = downsample
 48 |         self.stride = stride
 49 | 
 50 |     def forward(self, x):
 51 |         residual = x
 52 | 
 53 |         out = self.conv1(x)
 54 |         out = self.bn1(out)
 55 |         out = self.relu(out)
 56 | 
 57 |         out = self.conv2(out)
 58 |         out = self.bn2(out)
 59 |         out = self.relu(out)
 60 | 
 61 |         out = self.conv3(out)
 62 |         out = self.bn3(out)
 63 | 
 64 |         if self.downsample is not None:
 65 |             residual = self.downsample(x)
 66 | 
 67 |         out += residual
 68 |         out = self.relu(out)
 69 | 
 70 |         return out
 71 | 
 72 | 
 73 | class ResNet(nn.Module):
 74 |     def __init__(self, block, layers, num_groups=None, weight_std=False, beta=False,
 75 |                  drop_prob=0.5, head_type=None):
 76 |         self.inplanes = 64
 77 |         self.drop_prob = drop_prob
 78 |         if head_type is None:
 79 |             head_type = 'mano'
 80 |         self.head_type = head_type
 81 |         self.norm = lambda planes, momentum=0.05: nn.BatchNorm2d(planes, momentum=momentum) if num_groups is None else nn.GroupNorm(num_groups, planes)
 82 |         self.conv = Conv2d if weight_std else nn.Conv2d
 83 | 
 84 |         super(ResNet, self).__init__()
 85 |         if not beta:
 86 |             self.conv1 = self.conv(3, 64, kernel_size=7, stride=2, padding=3,
 87 |                                    bias=False)
 88 |         else:
 89 |             self.conv1 = nn.Sequential(
 90 |                 self.conv(3, 64, 3, stride=2, padding=1, bias=False),
 91 |                 self.conv(64, 64, 3, stride=1, padding=1, bias=False),
 92 |                 self.conv(64, 64, 3, stride=1, padding=1, bias=False))
 93 |         self.bn1 = self.norm(64)
 94 |         self.relu = nn.ReLU(inplace=True)
 95 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 96 |         self.layer1 = self._make_layer(block, 64, layers[0])
 97 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 98 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 99 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=1,
100 |                                        dilation=2)
101 |         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
102 | 
103 |         if self.head_type == 'mano':
104 |             self.fc1 = nn.Linear(512 * block.expansion, 2048)
105 |             self.fc2 = nn.Linear(2048, 2048)
106 |             self.fc3 = nn.Linear(2048, 61)
107 | 
108 |         elif self.head_type == 'embed':
109 |             self.fc = nn.Linear(512 * block.expansion, 2048)
110 |             self.fc2 = nn.Linear(2048, 128)
111 | 
112 |         else:
113 |             raise NotImplementedError
114 | 
115 |         for m in self.modules():
116 |             if isinstance(m, self.conv):
117 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
118 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
119 |             elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.GroupNorm):
120 |                 m.weight.data.fill_(1)
121 |                 m.bias.data.zero_()
122 | 
123 |         if 'mano' in self.head_type:
124 |             torch.nn.init.xavier_normal_(self.fc3.weight, 0.01)
125 |             torch.nn.init.uniform_(self.fc3.bias, -0.01, 0.01)
126 | 
127 |     def _make_layer(self, block, planes, blocks, stride=1, dilation=1):
128 |         downsample = None
129 |         if stride != 1 or dilation != 1 or self.inplanes != planes * block.expansion:
130 |             downsample = nn.Sequential(
131 |                 self.conv(self.inplanes, planes * block.expansion,
132 |                           kernel_size=1, stride=stride, dilation=max(1, dilation/2), bias=False),
133 |                 self.norm(planes * block.expansion),
134 |             )
135 | 
136 |         layers = []
137 |         layers.append(block(self.inplanes, planes, stride, downsample, dilation=max(1, dilation/2), conv=self.conv, norm=self.norm))
138 |         self.inplanes = planes * block.expansion
139 |         for i in range(1, blocks):
140 |             layers.append(block(self.inplanes, planes, dilation=dilation, conv=self.conv, norm=self.norm))
141 | 
142 |         return nn.Sequential(*layers)
143 | 
144 |     def forward(self, x):
145 |         x = self.conv1(x)
146 |         x = self.bn1(x)
147 |         x = self.relu(x)
148 |         x = self.maxpool(x)
149 | 
150 |         x = self.layer1(x)
151 |         x = self.layer2(x)
152 |         x = self.layer3(x)
153 |         x = self.layer4(x)
154 |         x_feat = x
155 | 
156 |         x = self.avgpool(x)
157 |         x = torch.flatten(x, 1)
158 | 
159 |         if self.head_type == 'mano':
160 |             x = self.fc1(x)
161 |             x = F.relu(x)
162 |             x = F.dropout(x, self.drop_prob, self.training)
163 | 
164 |             x = self.fc2(x)
165 |             x = F.relu(x)
166 |             x = F.dropout(x, self.drop_prob, self.training)
167 | 
168 |             x = self.fc3(x)
169 |         
170 |         elif self.head_type == 'embed':
171 |             x = self.fc(x)
172 |             x = F.dropout(x, self.drop_prob, self.training)
173 |             x = F.relu(x)
174 |             x = self.fc2(x)
175 |         return x
176 | 
177 | 
178 | def resnet50(pretrained=False, **kwargs):
179 |     """Constructs a ResNet-50 model.
180 | 
181 |     Args:
182 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
183 |     """
184 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
185 |     if pretrained:
186 |         model_dict = model.state_dict()
187 |         pretrained_dict = model_zoo.load_url(model_urls['resnet50'])
188 |         overlap_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}  # only keys that are in the model
189 |         overlap_dict = {k: v for k, v in overlap_dict.items() if np.all(v.shape == model_dict[k].shape)} # only when the shape matches
190 |         model_dict.update(overlap_dict)
191 |         model.load_state_dict(model_dict)
192 |         print('Loaded %d weights from the pretrained snapshot.' % len(overlap_dict))
193 |         # model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
194 |     return model
195 | 


--------------------------------------------------------------------------------
/manopth/manolayer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | from torch.nn import Module
  6 | 
  7 | from manopth.load_util import ready_arguments
  8 | from manopth import rodrigues_layer, rotproj, rot6d
  9 | from manopth.tensutils import (th_posemap_axisang, th_with_zeros, th_pack,
 10 |                                subtract_flat_id, make_list)
 11 | 
 12 | class ManoLayer(Module):
 13 |     __constants__ = [
 14 |         'use_pca', 'rot', 'ncomps', 'ncomps', 'kintree_parents', 'check',
 15 |         'side', 'center_idx', 'joint_rot_mode'
 16 |     ]
 17 | 
 18 |     def __init__(self,
 19 |                  center_idx=None,
 20 |                  flat_hand_mean=True,
 21 |                  ncomps=6,
 22 |                  side='right',
 23 |                  mano_root='mano_models',
 24 |                  use_pca=True,
 25 |                  root_rot_mode='axisang',
 26 |                  joint_rot_mode='axisang',
 27 |                  robust_rot=False):
 28 |         """
 29 |         Args:
 30 |             center_idx: If number, then this is considered to be the keypoint 
 31 |                 index to center of. Defaults to None in which case centering
 32 |                 is skipped.
 33 |             flat_hand_mean: if True, (0, 0, 0, ...) pose coefficients match
 34 |                 flat hand, else match average hand pose
 35 |             mano_root: path to MANO pkl files for left and right hand
 36 |             ncomps: number of PCA components form pose space (<45)
 37 |             side: 'right' or 'left'
 38 |             use_pca: Use PCA decomposition for pose space.
 39 |             joint_rot_mode: 'axisang' or 'rotmat', ignored if use_pca
 40 |         """
 41 |         super().__init__()
 42 |         self.center_idx = center_idx
 43 |         self.robust_rot = robust_rot
 44 |         if root_rot_mode == 'axisang':
 45 |             self.rot = 3
 46 |         else:
 47 |             self.rot = 6
 48 |         self.flat_hand_mean = flat_hand_mean
 49 |         self.side = side
 50 |         self.use_pca = use_pca
 51 |         self.joint_rot_mode = joint_rot_mode
 52 |         self.root_rot_mode = root_rot_mode
 53 |         if use_pca:
 54 |             self.ncomps = ncomps
 55 |         else:
 56 |             self.ncomps = 45
 57 | 
 58 |         if side == 'right':
 59 |             self.mano_path = os.path.join(mano_root, 'MANO_RIGHT.pkl')
 60 |         elif side == 'left':
 61 |             self.mano_path = os.path.join(mano_root, 'MANO_LEFT.pkl')
 62 | 
 63 |         smpl_data = ready_arguments(self.mano_path)
 64 | 
 65 |         hands_components = smpl_data['hands_components']
 66 | 
 67 |         self.smpl_data = smpl_data
 68 | 
 69 |         self.register_buffer('th_betas',
 70 |                              torch.Tensor(np.array(smpl_data['betas'].r)).unsqueeze(0))
 71 |         self.register_buffer('th_shapedirs',
 72 |                              torch.Tensor(smpl_data['shapedirs'].r.copy()))
 73 |         self.register_buffer('th_posedirs',
 74 |                              torch.Tensor(smpl_data['posedirs'].r.copy()))
 75 |         self.register_buffer(
 76 |             'th_v_template',
 77 |             torch.Tensor(smpl_data['v_template'].r.copy()).unsqueeze(0))
 78 |         self.register_buffer(
 79 |             'th_J_regressor',
 80 |             torch.Tensor(np.array(smpl_data['J_regressor'].toarray())))
 81 |         self.register_buffer('th_weights',
 82 |                              torch.Tensor(smpl_data['weights'].r.copy()))
 83 |         self.register_buffer('th_faces',
 84 |                              torch.Tensor(smpl_data['f'].astype(np.int32)).long())
 85 | 
 86 |         # Get hand mean
 87 |         hands_mean = np.zeros(hands_components.shape[1]
 88 |                               ) if flat_hand_mean else smpl_data['hands_mean']
 89 |         hands_mean = hands_mean.copy()
 90 |         th_hands_mean = torch.Tensor(hands_mean).unsqueeze(0)
 91 |         if self.use_pca or self.joint_rot_mode == 'axisang':
 92 |             # Save as axis-angle
 93 |             self.register_buffer('th_hands_mean', th_hands_mean)
 94 |             selected_components = hands_components[:ncomps]
 95 |             self.register_buffer('th_comps', torch.Tensor(hands_components))
 96 |             self.register_buffer('th_selected_comps',
 97 |                                  torch.Tensor(selected_components))
 98 |         else:
 99 |             th_hands_mean_rotmat = rodrigues_layer.batch_rodrigues(
100 |                 th_hands_mean.view(15, 3)).reshape(15, 3, 3)
101 |             self.register_buffer('th_hands_mean_rotmat', th_hands_mean_rotmat)
102 | 
103 |         # Kinematic chain params
104 |         self.kintree_table = smpl_data['kintree_table']
105 |         parents = list(self.kintree_table[0].tolist())
106 |         self.kintree_parents = parents
107 | 
108 |     def forward(self,
109 |                 th_pose_coeffs,
110 |                 th_betas=torch.zeros(1),
111 |                 th_trans=torch.zeros(1),
112 |                 root_palm=torch.Tensor([0]),
113 |                 share_betas=torch.Tensor([0]),
114 |                 ):
115 |         """
116 |         Args:
117 |         th_trans (Tensor (batch_size x ncomps)): if provided, applies trans to joints and vertices
118 |         th_betas (Tensor (batch_size x 10)): if provided, uses given shape parameters for hand shape
119 |         else centers on root joint (9th joint)
120 |         root_palm: return palm as hand root instead of wrist
121 |         """
122 | 
123 |         batch_size = th_pose_coeffs.shape[0]
124 |         # Get axis angle from PCA components and coefficients
125 |         if self.use_pca or self.joint_rot_mode == 'axisang':
126 |             # Remove global rot coeffs
127 |             th_hand_pose_coeffs = th_pose_coeffs[:, self.rot:self.rot +
128 |                                                  self.ncomps]
129 |             if self.use_pca:
130 |                 # PCA components --> axis angles
131 |                 th_full_hand_pose = th_hand_pose_coeffs.mm(self.th_selected_comps)
132 |             else:
133 |                 th_full_hand_pose = th_hand_pose_coeffs
134 | 
135 |             # Concatenate back global rot
136 |             th_full_pose = torch.cat([
137 |                 th_pose_coeffs[:, :self.rot],
138 |                 self.th_hands_mean + th_full_hand_pose
139 |             ], 1)
140 |             if self.root_rot_mode == 'axisang':
141 |                 # compute rotation matrixes from axis-angle while skipping global rotation
142 |                 th_pose_map, th_rot_map = th_posemap_axisang(th_full_pose)
143 |                 root_rot = th_rot_map[:, :9].view(batch_size, 3, 3)
144 |                 th_rot_map = th_rot_map[:, 9:]
145 |                 th_pose_map = th_pose_map[:, 9:]
146 |             else:
147 |                 # th_posemap offsets by 3, so add offset or 3 to get to self.rot=6
148 |                 th_pose_map, th_rot_map = th_posemap_axisang(th_full_pose[:, 6:])
149 |                 if self.robust_rot:
150 |                     root_rot = rot6d.robust_compute_rotation_matrix_from_ortho6d(th_full_pose[:, :6])
151 |                 else:
152 |                     root_rot = rot6d.compute_rotation_matrix_from_ortho6d(th_full_pose[:, :6])
153 |         else:
154 |             assert th_pose_coeffs.dim() == 4, (
155 |                 'When not self.use_pca, '
156 |                 'th_pose_coeffs should have 4 dims, got {}'.format(
157 |                     th_pose_coeffs.dim()))
158 |             assert th_pose_coeffs.shape[2:4] == (3, 3), (
159 |                 'When not self.use_pca, th_pose_coeffs have 3x3 matrix for two'
160 |                 'last dims, got {}'.format(th_pose_coeffs.shape[2:4]))
161 |             th_pose_rots = rotproj.batch_rotprojs(th_pose_coeffs)
162 |             th_rot_map = th_pose_rots[:, 1:].view(batch_size, -1)
163 |             th_pose_map = subtract_flat_id(th_rot_map)
164 |             root_rot = th_pose_rots[:, 0]
165 | 
166 |         # Full axis angle representation with root joint
167 |         if th_betas is None or th_betas.numel() == 1:
168 |             th_v_shaped = torch.matmul(self.th_shapedirs,
169 |                                        self.th_betas.transpose(1, 0)).permute(
170 |                                            2, 0, 1) + self.th_v_template
171 |             th_j = torch.matmul(self.th_J_regressor, th_v_shaped).repeat(
172 |                 batch_size, 1, 1)
173 | 
174 |         else:
175 |             if share_betas:
176 |                 th_betas = th_betas.mean(0, keepdim=True).expand(th_betas.shape[0], 10)
177 |             th_v_shaped = torch.matmul(self.th_shapedirs,
178 |                                        th_betas.transpose(1, 0)).permute(
179 |                                            2, 0, 1) + self.th_v_template
180 |             th_j = torch.matmul(self.th_J_regressor, th_v_shaped)
181 |             # th_pose_map should have shape 20x135
182 | 
183 |         th_v_posed = th_v_shaped + torch.matmul(
184 |             self.th_posedirs, th_pose_map.transpose(0, 1)).permute(2, 0, 1)
185 |         # Final T pose with transformation done !
186 | 
187 |         # Global rigid transformation
188 | 
189 |         root_j = th_j[:, 0, :].contiguous().view(batch_size, 3, 1)
190 |         root_trans = th_with_zeros(torch.cat([root_rot, root_j], 2))
191 | 
192 |         all_rots = th_rot_map.view(th_rot_map.shape[0], 15, 3, 3)
193 |         lev1_idxs = [1, 4, 7, 10, 13]
194 |         lev2_idxs = [2, 5, 8, 11, 14]
195 |         lev3_idxs = [3, 6, 9, 12, 15]
196 |         lev1_rots = all_rots[:, [idx - 1 for idx in lev1_idxs]]
197 |         lev2_rots = all_rots[:, [idx - 1 for idx in lev2_idxs]]
198 |         lev3_rots = all_rots[:, [idx - 1 for idx in lev3_idxs]]
199 |         lev1_j = th_j[:, lev1_idxs]
200 |         lev2_j = th_j[:, lev2_idxs]
201 |         lev3_j = th_j[:, lev3_idxs]
202 | 
203 |         # From base to tips
204 |         # Get lev1 results
205 |         all_transforms = [root_trans.unsqueeze(1)]
206 |         lev1_j_rel = lev1_j - root_j.transpose(1, 2)
207 |         lev1_rel_transform_flt = th_with_zeros(torch.cat([lev1_rots, lev1_j_rel.unsqueeze(3)], 3).view(-1, 3, 4))
208 |         root_trans_flt = root_trans.unsqueeze(1).repeat(1, 5, 1, 1).view(root_trans.shape[0] * 5, 4, 4)
209 |         lev1_flt = torch.matmul(root_trans_flt, lev1_rel_transform_flt)
210 |         all_transforms.append(lev1_flt.view(all_rots.shape[0], 5, 4, 4))
211 | 
212 |         # Get lev2 results
213 |         lev2_j_rel = lev2_j - lev1_j
214 |         lev2_rel_transform_flt = th_with_zeros(torch.cat([lev2_rots, lev2_j_rel.unsqueeze(3)], 3).view(-1, 3, 4))
215 |         lev2_flt = torch.matmul(lev1_flt, lev2_rel_transform_flt)
216 |         all_transforms.append(lev2_flt.view(all_rots.shape[0], 5, 4, 4))
217 | 
218 |         # Get lev3 results
219 |         lev3_j_rel = lev3_j - lev2_j
220 |         lev3_rel_transform_flt = th_with_zeros(torch.cat([lev3_rots, lev3_j_rel.unsqueeze(3)], 3).view(-1, 3, 4))
221 |         lev3_flt = torch.matmul(lev2_flt, lev3_rel_transform_flt)
222 |         all_transforms.append(lev3_flt.view(all_rots.shape[0], 5, 4, 4))
223 | 
224 |         reorder_idxs = [0, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14, 5, 10, 15]
225 |         th_results = torch.cat(all_transforms, 1)[:, reorder_idxs]
226 |         th_results_global = th_results
227 | 
228 |         joint_js = torch.cat([th_j, th_j.new_zeros(th_j.shape[0], 16, 1)], 2)
229 |         tmp2 = torch.matmul(th_results, joint_js.unsqueeze(3))
230 |         th_results2 = (th_results - torch.cat([tmp2.new_zeros(*tmp2.shape[:2], 4, 3), tmp2], 3)).permute(0, 2, 3, 1)
231 | 
232 |         th_T = torch.matmul(th_results2, self.th_weights.transpose(0, 1))
233 | 
234 |         th_rest_shape_h = torch.cat([
235 |             th_v_posed.transpose(2, 1),
236 |             torch.ones((batch_size, 1, th_v_posed.shape[1]),
237 |                        dtype=th_T.dtype,
238 |                        device=th_T.device),
239 |         ], 1)
240 | 
241 |         th_verts = (th_T * th_rest_shape_h.unsqueeze(1)).sum(2).transpose(2, 1)
242 |         th_verts = th_verts[:, :, :3]
243 |         th_jtr = th_results_global[:, :, :3, 3]
244 |         # In addition to MANO reference joints we sample vertices on each finger
245 |         # to serve as finger tips
246 |         if self.side == 'right':
247 |             tips = th_verts[:, [745, 317, 444, 556, 673]]
248 |         else:
249 |             tips = th_verts[:, [745, 317, 445, 556, 673]]
250 |         if bool(root_palm):
251 |             palm = (th_verts[:, 95] + th_verts[:, 22]).unsqueeze(1) / 2
252 |             th_jtr = torch.cat([palm, th_jtr[:, 1:]], 1)
253 |         th_jtr = torch.cat([th_jtr, tips], 1)
254 | 
255 |         # Reorder joints to match visualization utilities
256 |         th_jtr = th_jtr[:, [0, 13, 14, 15, 16, 1, 2, 3, 17, 4, 5, 6, 18, 10, 11, 12, 19, 7, 8, 9, 20]]
257 | 
258 |         # Possibly center on a certain keypoint
259 |         if self.center_idx is not None:
260 |             center_joint = th_jtr[:, self.center_idx].unsqueeze(1)
261 |             th_jtr = th_jtr - center_joint
262 |             th_verts = th_verts - center_joint
263 |         th_jtr = th_jtr + th_trans
264 |         th_verts = th_verts + th_trans
265 | 
266 |         return th_verts, th_jtr
267 | 
268 |     def calc_alphas(self, pose):
269 |         if self.flat_hand_mean:
270 |             pose = pose - self.th_hands_mean.squeeze(0)
271 | 
272 |         alphas = list()
273 |         for comp in self.th_selected_comps:
274 |             v = torch.sqrt(torch.sum(torch.square(comp)) + 1e-8)
275 |             alphas.append( torch.sum(torch.mul(comp, pose))/v/v )
276 | 
277 |         return torch.stack(alphas)
278 | 


--------------------------------------------------------------------------------