├── nets ├── __init__.py └── ResNet.py ├── utils ├── __init__.py ├── general.py ├── mano_utils.py ├── img_util.py ├── rendering.py └── plot_util.py ├── ckpt └── put_checkpoints_here ├── .gitignore ├── mano_models └── put_MANO_RIGHT_pkl_here ├── manopth ├── __init__.py ├── rotproj.py ├── tensutils.py ├── posemapper.py ├── argutils.py ├── demo.py ├── rot6d.py ├── load_util.py ├── rodrigues_layer.py └── manolayer.py ├── rgb_variants ├── 0007 │ ├── cam3 │ │ └── 00000015_2.jpg │ └── cam4 │ │ ├── 00000000_3.jpg │ │ ├── 00000016_5.jpg │ │ ├── 00000017_4.jpg │ │ └── 00000018_3.jpg ├── 0011 │ ├── cam3 │ │ ├── 00000005_6.jpg │ │ └── 00000012_5.jpg │ └── cam4 │ │ ├── 00000004_0.jpg │ │ ├── 00000005_1.jpg │ │ ├── 00000011_1.jpg │ │ └── 00000012_2.jpg └── 0108 │ ├── cam3 │ ├── 00000026_2.jpg │ └── 00000026_5.jpg │ └── cam4 │ ├── 00000007_0.jpg │ ├── 00000017_0.jpg │ ├── 00000017_1.jpg │ ├── 00000017_5.jpg │ ├── 00000017_6.jpg │ ├── 00000017_7.jpg │ ├── 00000017_8.jpg │ ├── 00000019_0.jpg │ └── 00000026_3.jpg ├── run_moco_fw.py ├── run_moco_qualitative_embedding.py ├── README.md ├── run_hand_shape_fw.py ├── show_dataset.py └── DatasetUnsupervisedMV.py /nets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ckpt/put_checkpoints_here: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__ 3 | -------------------------------------------------------------------------------- /mano_models/put_MANO_RIGHT_pkl_here: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /manopth/__init__.py: -------------------------------------------------------------------------------- 1 | name = 'manopth' 2 | -------------------------------------------------------------------------------- /rgb_variants/0007/cam3/00000015_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam3/00000015_2.jpg -------------------------------------------------------------------------------- /rgb_variants/0007/cam4/00000000_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam4/00000000_3.jpg -------------------------------------------------------------------------------- /rgb_variants/0007/cam4/00000016_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam4/00000016_5.jpg -------------------------------------------------------------------------------- /rgb_variants/0007/cam4/00000017_4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam4/00000017_4.jpg -------------------------------------------------------------------------------- /rgb_variants/0007/cam4/00000018_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0007/cam4/00000018_3.jpg -------------------------------------------------------------------------------- /rgb_variants/0011/cam3/00000005_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam3/00000005_6.jpg -------------------------------------------------------------------------------- /rgb_variants/0011/cam3/00000012_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam3/00000012_5.jpg -------------------------------------------------------------------------------- /rgb_variants/0011/cam4/00000004_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam4/00000004_0.jpg -------------------------------------------------------------------------------- /rgb_variants/0011/cam4/00000005_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam4/00000005_1.jpg -------------------------------------------------------------------------------- /rgb_variants/0011/cam4/00000011_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam4/00000011_1.jpg -------------------------------------------------------------------------------- /rgb_variants/0011/cam4/00000012_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0011/cam4/00000012_2.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam3/00000026_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam3/00000026_2.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam3/00000026_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam3/00000026_5.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000007_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000007_0.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000017_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_0.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000017_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_1.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000017_5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_5.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000017_6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_6.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000017_7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_7.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000017_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000017_8.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000019_0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000019_0.jpg -------------------------------------------------------------------------------- /rgb_variants/0108/cam4/00000026_3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lmb-freiburg/contra-hand/HEAD/rgb_variants/0108/cam4/00000026_3.jpg -------------------------------------------------------------------------------- /manopth/rotproj.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def batch_rotprojs(batches_rotmats): 5 | proj_rotmats = [] 6 | for batch_idx, batch_rotmats in enumerate(batches_rotmats): 7 | proj_batch_rotmats = [] 8 | for rot_idx, rotmat in enumerate(batch_rotmats): 9 | # GPU implementation of svd is VERY slow 10 | # ~ 2 10^-3 per hit vs 5 10^-5 on cpu 11 | U, S, V = rotmat.cpu().svd() 12 | rotmat = torch.matmul(U, V.transpose(0, 1)) 13 | orth_det = rotmat.det() 14 | # Remove reflection 15 | if orth_det < 0: 16 | rotmat[:, 2] = -1 * rotmat[:, 2] 17 | 18 | rotmat = rotmat.cuda() 19 | proj_batch_rotmats.append(rotmat) 20 | proj_rotmats.append(torch.stack(proj_batch_rotmats)) 21 | return torch.stack(proj_rotmats) 22 | -------------------------------------------------------------------------------- /run_moco_fw.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from nets.ResNet import resnet50 8 | 9 | 10 | class ModelWrap: 11 | def __init__(self): 12 | model = resnet50(pretrained=False, head_type='embed') 13 | model.cuda() 14 | model.eval() 15 | 16 | state_dict = torch.load('ckpt/model_moco.pth') 17 | model.load_state_dict(state_dict) 18 | 19 | self.model = model 20 | self.base_path = "./rgb_variants/" 21 | 22 | def run(self, image_file): 23 | img = cv2.imread(os.path.join(self.base_path, image_file)) 24 | img = cv2.resize(img, (224, 224)) 25 | 26 | trafo = lambda x: np.transpose(x[:, :, ::-1], [2, 0, 1]).astype(np.float32) / 255.0 - 0.5 27 | img_t = trafo(img) 28 | batch = torch.Tensor(np.stack([img_t], 0)).cuda() 29 | embed = self.model(batch) 30 | embed = embed.detach().cpu().numpy() 31 | 32 | return embed 33 | 34 | 35 | if __name__ == '__main__': 36 | 37 | m = ModelWrap() 38 | f1 = '0007/cam4/00000016_5.jpg' 39 | f2 = '0007/cam4/00000017_4.jpg' 40 | embed = m.run(f1), m.run(f2) 41 | 42 | def cossim(x, y): 43 | ip = np.sum(np.multiply(x, y)) 44 | n1 = np.linalg.norm(x, 2) 45 | n2 = np.linalg.norm(y, 2) 46 | return ip / (n1*n2) 47 | 48 | print('score', cossim(embed[0], embed[1])) 49 | -------------------------------------------------------------------------------- /manopth/tensutils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from manopth import rodrigues_layer 4 | 5 | 6 | def th_posemap_axisang(pose_vectors): 7 | rot_nb = int(pose_vectors.shape[1] / 3) 8 | pose_vec_reshaped = pose_vectors.contiguous().view(-1, 3) 9 | rot_mats = rodrigues_layer.batch_rodrigues(pose_vec_reshaped) 10 | rot_mats = rot_mats.view(pose_vectors.shape[0], rot_nb * 9) 11 | pose_maps = subtract_flat_id(rot_mats) 12 | return pose_maps, rot_mats 13 | 14 | 15 | def th_with_zeros(tensor): 16 | batch_size = tensor.shape[0] 17 | padding = tensor.new([0.0, 0.0, 0.0, 1.0]) 18 | padding.requires_grad = False 19 | 20 | concat_list = [tensor, padding.view(1, 1, 4).repeat(batch_size, 1, 1)] 21 | cat_res = torch.cat(concat_list, 1) 22 | return cat_res 23 | 24 | 25 | def th_pack(tensor): 26 | batch_size = tensor.shape[0] 27 | padding = tensor.new_zeros((batch_size, 4, 3)) 28 | padding.requires_grad = False 29 | pack_list = [padding, tensor] 30 | pack_res = torch.cat(pack_list, 2) 31 | return pack_res 32 | 33 | 34 | def subtract_flat_id(rot_mats): 35 | # Subtracts identity as a flattened tensor 36 | rot_nb = int(rot_mats.shape[1] / 9) 37 | id_flat = torch.eye( 38 | 3, dtype=rot_mats.dtype, device=rot_mats.device).view(1, 9).repeat( 39 | rot_mats.shape[0], rot_nb) 40 | # id_flat.requires_grad = False 41 | results = rot_mats - id_flat 42 | return results 43 | 44 | 45 | def make_list(tensor): 46 | # type: (List[int]) -> List[int] 47 | return tensor 48 | -------------------------------------------------------------------------------- /manopth/posemapper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2017 Javier Romero, Dimitrios Tzionas, Michael J Black and the Max Planck Gesellschaft. All rights reserved. 3 | This software is provided for research purposes only. 4 | By using this software you agree to the terms of the MANO/SMPL+H Model license here http://mano.is.tue.mpg.de/license 5 | 6 | More information about MANO/SMPL+H is available at http://mano.is.tue.mpg.de. 7 | For comments or questions, please email us at: mano@tue.mpg.de 8 | 9 | 10 | About this file: 11 | ================ 12 | This file defines a wrapper for the loading functions of the MANO model. 13 | 14 | Modules included: 15 | - load_model: 16 | loads the MANO model from a given file location (i.e. a .pkl file location), 17 | or a dictionary object. 18 | 19 | ''' 20 | 21 | 22 | import chumpy as ch 23 | import numpy as np 24 | import cv2 25 | 26 | 27 | class Rodrigues(ch.Ch): 28 | dterms = 'rt' 29 | 30 | def compute_r(self): 31 | return cv2.Rodrigues(self.rt.r)[0] 32 | 33 | def compute_dr_wrt(self, wrt): 34 | if wrt is self.rt: 35 | return cv2.Rodrigues(self.rt.r)[1].T 36 | 37 | 38 | def lrotmin(p): 39 | if isinstance(p, np.ndarray): 40 | p = p.ravel()[3:] 41 | return np.concatenate( 42 | [(cv2.Rodrigues(np.array(pp))[0] - np.eye(3)).ravel() 43 | for pp in p.reshape((-1, 3))]).ravel() 44 | if p.ndim != 2 or p.shape[1] != 3: 45 | p = p.reshape((-1, 3)) 46 | p = p[1:] 47 | return ch.concatenate([(Rodrigues(pp) - ch.eye(3)).ravel() 48 | for pp in p]).ravel() 49 | 50 | 51 | def posemap(s): 52 | if s == 'lrotmin': 53 | return lrotmin 54 | else: 55 | raise Exception('Unknown posemapping: %s' % (str(s), )) 56 | -------------------------------------------------------------------------------- /manopth/argutils.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import pickle 4 | import subprocess 5 | import sys 6 | 7 | 8 | def print_args(args): 9 | opts = vars(args) 10 | print('======= Options ========') 11 | for k, v in sorted(opts.items()): 12 | print('{}: {}'.format(k, v)) 13 | print('========================') 14 | 15 | 16 | def save_args(args, save_folder, opt_prefix='opt', verbose=True): 17 | opts = vars(args) 18 | # Create checkpoint folder 19 | if not os.path.exists(save_folder): 20 | os.makedirs(save_folder, exist_ok=True) 21 | 22 | # Save options 23 | opt_filename = '{}.txt'.format(opt_prefix) 24 | opt_path = os.path.join(save_folder, opt_filename) 25 | with open(opt_path, 'a') as opt_file: 26 | opt_file.write('====== Options ======\n') 27 | for k, v in sorted(opts.items()): 28 | opt_file.write( 29 | '{option}: {value}\n'.format(option=str(k), value=str(v))) 30 | opt_file.write('=====================\n') 31 | opt_file.write('launched {} at {}\n'.format( 32 | str(sys.argv[0]), str(datetime.datetime.now()))) 33 | 34 | # Add git info 35 | label = subprocess.check_output(["git", "describe", 36 | "--always"]).strip() 37 | if subprocess.call( 38 | ["git", "branch"], 39 | stderr=subprocess.STDOUT, 40 | stdout=open(os.devnull, 'w')) == 0: 41 | opt_file.write('=== Git info ====\n') 42 | opt_file.write('{}\n'.format(label)) 43 | commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']) 44 | opt_file.write('commit : {}\n'.format(commit.strip())) 45 | 46 | opt_picklename = '{}.pkl'.format(opt_prefix) 47 | opt_picklepath = os.path.join(save_folder, opt_picklename) 48 | with open(opt_picklepath, 'wb') as opt_file: 49 | pickle.dump(opts, opt_file) 50 | if verbose: 51 | print('Saved options to {}'.format(opt_path)) 52 | -------------------------------------------------------------------------------- /manopth/demo.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | from mpl_toolkits.mplot3d import Axes3D 3 | from mpl_toolkits.mplot3d.art3d import Poly3DCollection 4 | import numpy as np 5 | import torch 6 | 7 | from manopth.manolayer import ManoLayer 8 | 9 | 10 | def generate_random_hand(batch_size=1, ncomps=6, mano_root='mano/models'): 11 | nfull_comps = ncomps + 3 # Add global orientation dims to PCA 12 | random_pcapose = torch.rand(batch_size, nfull_comps) 13 | mano_layer = ManoLayer(mano_root=mano_root) 14 | verts, joints = mano_layer(random_pcapose) 15 | return {'verts': verts, 'joints': joints, 'faces': mano_layer.th_faces} 16 | 17 | 18 | def display_hand(hand_info, mano_faces=None, ax=None, alpha=0.2, batch_idx=0, show=True): 19 | """ 20 | Displays hand batch_idx in batch of hand_info, hand_info as returned by 21 | generate_random_hand 22 | """ 23 | if ax is None: 24 | fig = plt.figure() 25 | ax = fig.add_subplot(111, projection='3d') 26 | verts, joints = hand_info['verts'][batch_idx], hand_info['joints'][ 27 | batch_idx] 28 | if mano_faces is None: 29 | ax.scatter(verts[:, 0], verts[:, 1], verts[:, 2], alpha=0.1) 30 | else: 31 | mesh = Poly3DCollection(verts[mano_faces], alpha=alpha) 32 | face_color = (141 / 255, 184 / 255, 226 / 255) 33 | edge_color = (50 / 255, 50 / 255, 50 / 255) 34 | mesh.set_edgecolor(edge_color) 35 | mesh.set_facecolor(face_color) 36 | ax.add_collection3d(mesh) 37 | ax.scatter(joints[:, 0], joints[:, 1], joints[:, 2], color='r') 38 | cam_equal_aspect_3d(ax, verts.numpy()) 39 | if show: 40 | plt.show() 41 | 42 | 43 | def cam_equal_aspect_3d(ax, verts, flip_x=False): 44 | """ 45 | Centers view on cuboid containing hand and flips y and z axis 46 | and fixes azimuth 47 | """ 48 | extents = np.stack([verts.min(0), verts.max(0)], axis=1) 49 | sz = extents[:, 1] - extents[:, 0] 50 | centers = np.mean(extents, axis=1) 51 | maxsize = max(abs(sz)) 52 | r = maxsize / 2 53 | if flip_x: 54 | ax.set_xlim(centers[0] + r, centers[0] - r) 55 | else: 56 | ax.set_xlim(centers[0] - r, centers[0] + r) 57 | # Invert y and z axis 58 | ax.set_ylim(centers[1] + r, centers[1] - r) 59 | ax.set_zlim(centers[2] + r, centers[2] - r) 60 | -------------------------------------------------------------------------------- /manopth/rot6d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def compute_rotation_matrix_from_ortho6d(poses): 5 | """ 6 | Code from 7 | https://github.com/papagina/RotationContinuity 8 | On the Continuity of Rotation Representations in Neural Networks 9 | Zhou et al. CVPR19 10 | https://zhouyisjtu.github.io/project_rotation/rotation.html 11 | """ 12 | x_raw = poses[:, 0:3] # batch*3 13 | y_raw = poses[:, 3:6] # batch*3 14 | 15 | x = normalize_vector(x_raw) # batch*3 16 | z = cross_product(x, y_raw) # batch*3 17 | z = normalize_vector(z) # batch*3 18 | y = cross_product(z, x) # batch*3 19 | 20 | x = x.view(-1, 3, 1) 21 | y = y.view(-1, 3, 1) 22 | z = z.view(-1, 3, 1) 23 | matrix = torch.cat((x, y, z), 2) # batch*3*3 24 | return matrix 25 | 26 | def robust_compute_rotation_matrix_from_ortho6d(poses): 27 | """ 28 | Instead of making 2nd vector orthogonal to first 29 | create a base that takes into account the two predicted 30 | directions equally 31 | """ 32 | x_raw = poses[:, 0:3] # batch*3 33 | y_raw = poses[:, 3:6] # batch*3 34 | 35 | x = normalize_vector(x_raw) # batch*3 36 | y = normalize_vector(y_raw) # batch*3 37 | middle = normalize_vector(x + y) 38 | orthmid = normalize_vector(x - y) 39 | x = normalize_vector(middle + orthmid) 40 | y = normalize_vector(middle - orthmid) 41 | # Their scalar product should be small ! 42 | # assert torch.einsum("ij,ij->i", [x, y]).abs().max() < 0.00001 43 | z = normalize_vector(cross_product(x, y)) 44 | 45 | x = x.view(-1, 3, 1) 46 | y = y.view(-1, 3, 1) 47 | z = z.view(-1, 3, 1) 48 | matrix = torch.cat((x, y, z), 2) # batch*3*3 49 | # Check for reflection in matrix ! If found, flip last vector TODO 50 | assert (torch.stack([torch.det(mat) for mat in matrix ])< 0).sum() == 0 51 | return matrix 52 | 53 | 54 | def normalize_vector(v): 55 | batch = v.shape[0] 56 | v_mag = torch.sqrt(v.pow(2).sum(1)) # batch 57 | v_mag = torch.max(v_mag, v.new([1e-8])) 58 | v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1]) 59 | v = v/v_mag 60 | return v 61 | 62 | 63 | def cross_product(u, v): 64 | batch = u.shape[0] 65 | i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1] 66 | j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2] 67 | k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0] 68 | 69 | out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1) 70 | 71 | return out 72 | -------------------------------------------------------------------------------- /utils/general.py: -------------------------------------------------------------------------------- 1 | import gzip, pickle 2 | import numpy as np 3 | import json 4 | import os 5 | import cv2 6 | 7 | def get_dataset_path(): 8 | return "/misc/lmbraid18/zimmermc/datasets/FreiHAND_full/" 9 | 10 | def load_ckpt(model, pretrained_dict): 11 | model_dict = model.state_dict() 12 | overlap_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} # only keys that are in the model 13 | overlap_dict = {k: v for k, v in overlap_dict.items() if np.all(v.shape == model_dict[k].shape)} # only when the shape matches 14 | 15 | if len(model_dict) != len(overlap_dict): 16 | print('Missing/Not Matching weights:') 17 | for k, v in model_dict.items(): 18 | if k not in overlap_dict.keys(): 19 | print(k, 'model:', v.shape) 20 | print(f'Given {len(pretrained_dict)} weights for {len(model_dict)} model weights. Loaded {len(overlap_dict)} matching weights!') 21 | if len(overlap_dict) == 0: 22 | for k, v in pretrained_dict.items(): 23 | print('pretrained content', k, v.shape) 24 | for k, v in model_dict.items(): 25 | print('model', k, v.shape) 26 | raise Expection('Not weights were loaded. This indicates and error.') 27 | 28 | model_dict.update(overlap_dict) 29 | model.load_state_dict(model_dict) 30 | 31 | 32 | class NumpyEncoder(json.JSONEncoder): 33 | def default(self, obj): 34 | if isinstance(obj, np.ndarray): 35 | return obj.tolist() 36 | 37 | if isinstance(obj, np.int32): 38 | return int(obj) 39 | if isinstance(obj, np.float32): 40 | return float(obj) 41 | 42 | if isinstance(obj, np.int64): 43 | return int(obj) 44 | if isinstance(obj, np.float64): 45 | return float(obj) 46 | return json.JSONEncoder.default(self, obj) 47 | 48 | 49 | def json_dump(file_name, data, pretty_format=False, overwrite=True, verbose=False): 50 | msg = 'File does exists and should not be overwritten: %s' % file_name 51 | assert not os.path.exists(file_name) or overwrite, msg 52 | 53 | with open(file_name, 'w') as fo: 54 | if pretty_format: 55 | json.dump(data, fo, cls=NumpyEncoder, sort_keys=True, indent=4) 56 | else: 57 | json.dump(data, fo, cls=NumpyEncoder) 58 | 59 | if verbose: 60 | print('Dumped %d entries to file %s' % (len(data), file_name)) 61 | 62 | 63 | def json_load(file_name): 64 | with open(file_name, 'r') as fi: 65 | data = json.load(fi) 66 | return data 67 | 68 | 69 | -------------------------------------------------------------------------------- /manopth/load_util.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Copyright 2017 Javier Romero, Dimitrios Tzionas, Michael J Black and the Max Planck Gesellschaft. All rights reserved. 3 | This software is provided for research purposes only. 4 | By using this software you agree to the terms of the MANO/SMPL+H Model license here http://mano.is.tue.mpg.de/license 5 | 6 | More information about MANO/SMPL+H is available at http://mano.is.tue.mpg.de. 7 | For comments or questions, please email us at: mano@tue.mpg.de 8 | 9 | 10 | About this file: 11 | ================ 12 | This file defines a wrapper for the loading functions of the MANO model. 13 | 14 | Modules included: 15 | - load_model: 16 | loads the MANO model from a given file location (i.e. a .pkl file location), 17 | or a dictionary object. 18 | 19 | ''' 20 | import numpy as np 21 | import pickle 22 | import chumpy as ch 23 | from chumpy.ch import MatVecMult 24 | from .posemapper import posemap 25 | 26 | 27 | def ready_arguments(fname_or_dict, posekey4vposed='pose'): 28 | 29 | if not isinstance(fname_or_dict, dict): 30 | dd = pickle.load(open(fname_or_dict, 'rb'), encoding='latin1') 31 | # dd = pickle.load(open(fname_or_dict, 'rb')) 32 | else: 33 | dd = fname_or_dict 34 | 35 | want_shapemodel = 'shapedirs' in dd 36 | nposeparms = dd['kintree_table'].shape[1] * 3 37 | 38 | if 'trans' not in dd: 39 | dd['trans'] = np.zeros(3) 40 | if 'pose' not in dd: 41 | dd['pose'] = np.zeros(nposeparms) 42 | if 'shapedirs' in dd and 'betas' not in dd: 43 | dd['betas'] = np.zeros(dd['shapedirs'].shape[-1]) 44 | 45 | for s in [ 46 | 'v_template', 'weights', 'posedirs', 'pose', 'trans', 'shapedirs', 47 | 'betas', 'J' 48 | ]: 49 | if (s in dd) and not hasattr(dd[s], 'dterms'): 50 | dd[s] = ch.array(dd[s]) 51 | 52 | assert (posekey4vposed in dd) 53 | if want_shapemodel: 54 | dd['v_shaped'] = dd['shapedirs'].dot(dd['betas']) + dd['v_template'] 55 | v_shaped = dd['v_shaped'] 56 | J_tmpx = MatVecMult(dd['J_regressor'], v_shaped[:, 0]) 57 | J_tmpy = MatVecMult(dd['J_regressor'], v_shaped[:, 1]) 58 | J_tmpz = MatVecMult(dd['J_regressor'], v_shaped[:, 2]) 59 | dd['J'] = ch.vstack((J_tmpx, J_tmpy, J_tmpz)).T 60 | pose_map_res = posemap(dd['bs_type'])(dd[posekey4vposed]) 61 | dd['v_posed'] = v_shaped + dd['posedirs'].dot(pose_map_res) 62 | else: 63 | pose_map_res = posemap(dd['bs_type'])(dd[posekey4vposed]) 64 | dd_add = dd['posedirs'].dot(pose_map_res) 65 | dd['v_posed'] = dd['v_template'] + dd_add 66 | 67 | return dd 68 | 69 | 70 | -------------------------------------------------------------------------------- /run_moco_qualitative_embedding.py: -------------------------------------------------------------------------------- 1 | """ Script to produce the data for Figure 3 of the paper. """ 2 | import numpy as np 3 | import cv2, os 4 | import matplotlib.pyplot as plt 5 | from run_moco_fw import ModelWrap 6 | 7 | 8 | m = ModelWrap() 9 | data_path = m.base_path 10 | 11 | cossim = lambda x, y: np.sum(x*y)/np.linalg.norm(x, 2)/np.linalg.norm(y, 2) 12 | 13 | 14 | def show(path1, path2, save_to=None): 15 | print("show('%s', '%s')" % (path1, path2)) 16 | 17 | s = cossim(m.run(path1), m.run(path2)) 18 | 19 | img1 = cv2.imread(os.path.join(data_path, path1)) 20 | img2 = cv2.imread(os.path.join(data_path, path2)) 21 | if save_to is not None: 22 | cv2.imwrite(save_to + '_0.png', img1) 23 | cv2.imwrite(save_to + '_1.png', img2) 24 | with open(save_to + '_s.txt', 'w') as fo: 25 | fo.write('%f' % s) 26 | 27 | fig, ax = plt.subplots(1, 2) 28 | ax[0].imshow(img1[:, :, ::-1]) 29 | ax[1].imshow(img2[:, :, ::-1]) 30 | ax[1].set_title('score = %.3f' % s) 31 | plt.show() 32 | 33 | 34 | # 1. show same image image pairs with different backgrounds is encoded the same 35 | for i in (0, 5, 7): 36 | show('0108/cam4/00000017_%d.jpg' % i, '0108/cam4/00000017_%d.jpg' % (i+1), 37 | save_to='./moco_vis_ex/same_sample_diff_bg/%02d' % i) 38 | 39 | # 2. Show similar poses are encoded similarly 40 | i = 2 41 | show('0007/cam4/00000016_5.jpg', '0007/cam4/00000017_4.jpg', 42 | save_to='./moco_vis_ex/similar_poses/%02d' % i) 43 | i = 3 44 | show('0011/cam4/00000004_0.jpg', '0011/cam4/00000005_1.jpg', 45 | save_to='./moco_vis_ex/similar_poses/%02d' % i) 46 | i = 4 47 | show('0011/cam4/00000011_1.jpg', '0011/cam4/00000012_2.jpg', 48 | save_to='./moco_vis_ex/similar_poses/%02d' % i) 49 | 50 | 51 | # 3. Different views are encoded similarly 52 | i = 0 53 | show('0108/cam4/00000026_3.jpg', '0108/cam3/00000026_5.jpg', 54 | save_to='./moco_vis_ex/diff_view/%02d' % i) 55 | i = 1 56 | show('0011/cam4/00000012_2.jpg', '0011/cam3/00000012_5.jpg', 57 | save_to='./moco_vis_ex/diff_view/%02d' % i) 58 | i = 3 59 | show('0011/cam4/00000005_1.jpg', '0011/cam3/00000005_6.jpg', 60 | save_to='./moco_vis_ex/diff_view/%02d' % i) 61 | 62 | 63 | # 3. Different poses are encoded differently 64 | i = 0 65 | show('0007/cam4/00000000_3.jpg', '0007/cam4/00000018_3.jpg', 66 | save_to='./moco_vis_ex/diff_poses/%02d' % i) 67 | i = 1 68 | show('0108/cam4/00000007_0.jpg', '0108/cam4/00000019_0.jpg', 69 | save_to='./moco_vis_ex/diff_poses/%02d' % i) 70 | i = 3 71 | show('0007/cam3/00000015_2.jpg', '0108/cam3/00000026_2.jpg', 72 | save_to='./moco_vis_ex/diff_poses/%02d' % i) 73 | -------------------------------------------------------------------------------- /manopth/rodrigues_layer.py: -------------------------------------------------------------------------------- 1 | """ 2 | This part reuses code from https://github.com/MandyMo/pytorch_HMR/blob/master/src/util.py 3 | which is part of a PyTorch port of SMPL. 4 | Thanks to Zhang Xiong (MandyMo) for making this great code available on github ! 5 | """ 6 | 7 | import argparse 8 | from torch.autograd import gradcheck 9 | import torch 10 | from torch.autograd import Variable 11 | 12 | from manopth import argutils 13 | 14 | 15 | def quat2mat(quat): 16 | """Convert quaternion coefficients to rotation matrix. 17 | Args: 18 | quat: size = [batch_size, 4] 4 <===>(w, x, y, z) 19 | Returns: 20 | Rotation matrix corresponding to the quaternion -- size = [batch_size, 3, 3] 21 | """ 22 | norm_quat = quat 23 | norm_quat = norm_quat / norm_quat.norm(p=2, dim=1, keepdim=True) 24 | w, x, y, z = norm_quat[:, 0], norm_quat[:, 1], norm_quat[:, 25 | 2], norm_quat[:, 26 | 3] 27 | 28 | batch_size = quat.size(0) 29 | 30 | w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2) 31 | wx, wy, wz = w * x, w * y, w * z 32 | xy, xz, yz = x * y, x * z, y * z 33 | 34 | rotMat = torch.stack([ 35 | w2 + x2 - y2 - z2, 2 * xy - 2 * wz, 2 * wy + 2 * xz, 2 * wz + 2 * xy, 36 | w2 - x2 + y2 - z2, 2 * yz - 2 * wx, 2 * xz - 2 * wy, 2 * wx + 2 * yz, 37 | w2 - x2 - y2 + z2 38 | ], 39 | dim=1).view(batch_size, 3, 3) 40 | return rotMat 41 | 42 | 43 | def batch_rodrigues(axisang): 44 | #axisang N x 3 45 | axisang_norm = torch.norm(axisang + 1e-8, p=2, dim=1) 46 | angle = torch.unsqueeze(axisang_norm, -1) 47 | axisang_normalized = torch.div(axisang, angle) 48 | angle = angle * 0.5 49 | v_cos = torch.cos(angle) 50 | v_sin = torch.sin(angle) 51 | quat = torch.cat([v_cos, v_sin * axisang_normalized], dim=1) 52 | rot_mat = quat2mat(quat) 53 | rot_mat = rot_mat.view(rot_mat.shape[0], 9) 54 | return rot_mat 55 | 56 | 57 | def th_get_axis_angle(vector): 58 | angle = torch.norm(vector, 2, 1) 59 | axes = vector / angle.unsqueeze(1) 60 | return axes, angle 61 | 62 | 63 | if __name__ == '__main__': 64 | parser = argparse.ArgumentParser() 65 | parser.add_argument('--batch_size', default=1, type=int) 66 | parser.add_argument('--cuda', action='store_true') 67 | args = parser.parse_args() 68 | 69 | argutils.print_args(args) 70 | 71 | n_components = 6 72 | rot = 3 73 | inputs = torch.rand(args.batch_size, rot) 74 | inputs_var = Variable(inputs.double(), requires_grad=True) 75 | if args.cuda: 76 | inputs = inputs.cuda() 77 | # outputs = batch_rodrigues(inputs) 78 | test_function = gradcheck(batch_rodrigues, (inputs_var, )) 79 | print('batch test passed !') 80 | 81 | inputs = torch.rand(rot) 82 | inputs_var = Variable(inputs.double(), requires_grad=True) 83 | test_function = gradcheck(th_cv2_rod_sub_id.apply, (inputs_var, )) 84 | print('th_cv2_rod test passed') 85 | 86 | inputs = torch.rand(rot) 87 | inputs_var = Variable(inputs.double(), requires_grad=True) 88 | test_th = gradcheck(th_cv2_rod.apply, (inputs_var, )) 89 | print('th_cv2_rod_id test passed !') 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HanCo Dataset & Contrastive Representation Learning for Hand Shape Estimation 2 | Code in conjunction with the publication: *Contrastive Representation Learning for Hand Shape Estimation*. 3 | 4 | This repository contains code for inference of both networks: 5 | The one obtained from self-supervised contrastive pre-training and the network trained supervisedly for hand pose estimation. 6 | Additionally, we provide examples how to work with the HanCo dataset and release the pytorch Dataset that was used during our pre-training experiments. 7 | This dataset is an extension of the [FreiHand](https://lmb.informatik.uni-freiburg.de/projects/freihand) dataset. 8 | 9 | Visit our [project page](https://lmb.informatik.uni-freiburg.de/projects/contra-hand/) for additional information. 10 | 11 | 12 | # Requirements 13 | 14 | ### Python environment 15 | 16 | conda create -n contra-hand python=3.6 17 | conda activate contra-hand 18 | conda install -c pytorch pytorch=1.6.0 torchvision cudatoolkit=10.2 19 | conda install -c conda-forge -c fvcore fvcore transforms3d 20 | pip install pytorch3d transforms3d tqdm pytorch-lightning imgaug open3d matplotlib 21 | pip install git+https://github.com/hassony2/chumpy.git 22 | 23 | 24 | ### Hand Pose Dataset 25 | 26 | You either need the [full HanCo dataset](https://lmb.informatik.uni-freiburg.de/resources/datasets/HanCo.en.html) or the small [tester data sample](https://lmb.informatik.uni-freiburg.de/data/HanCo/HanCo_tester.zip) (recommended). 27 | 28 | ### Random Background Images 29 | 30 | As the hand pose dataset contains green screen images, randomized backgrounds can be used. For our dataset we used 2195 images from Flickr. As these were not all licensed in a permissive manner, we provide a set of background images to use with the dataset. 31 | These can be found [here](https://lmb.informatik.uni-freiburg.de/data/HanCo/HanCo_rnd_backgrounds.zip). 32 | 33 | 34 | ### MANO model 35 | 36 | Our supervised training code uses the MANO Hand model, which you need to aquire seperately due to licensing regulations: https://mano.is.tue.mpg.de 37 | 38 | In order for our code to work fine copy *MANO_RIGHT.pkl* from the MANO website to *contra-hand/mano_models/MANO_RIGHT.pkl*. 39 | 40 | We also build on to of the great PyTorch implementation of MANO provided by [Yana Hasson et al.](https://github.com/hassony2/manopth), which was modified by us and is already contained in this repository. 41 | 42 | 43 | ### Trained models 44 | 45 | We release both the MoCo pretrained model and the shape estimation network that was derived from it. 46 | 47 | In order to get the trained models download and unpack them locally: 48 | 49 | 50 | curl https://lmb.informatik.uni-freiburg.de/data/HanCo/contra-hand-ckpt.zip -o contra-hand-ckpt.zip & unzip contra-hand-ckpt.zip 51 | 52 | 53 | # Code 54 | 55 | This repository contains scripts that facilitate using the HanCo dataset and building on the results from our publication. 56 | 57 | ### Show dataset 58 | 59 | You will need to download the HanCo dataset (or at least the tester). 60 | This script gives you some examples on how to work with the dataset. 61 | 62 | python show_dataset.py 63 | 64 | 65 | ### Use our MoCo trained model 66 | 67 | 68 | There is a simple script that calculates the cosine similarity score for two hard coded examples: 69 | 70 | python run_moco_fw.py 71 | 72 | 73 | There is the script we used to create the respective figure in our paper. 74 | 75 | python run_moco_qualitative_embedding.py 76 | 77 | ### Self-Supervised Training with MoCo 78 | 79 | We provide a torch data loader that can be used as a drop-in replacement for MoCo training. 80 | The data loader can be found here `DatasetUnsupervisedMV.py`. It has boolean 81 | options that control how the data is provided, these are `cross_bg`, `cross_camera`, and 82 | `cross_time`. The `get_dataset` function also shows the pre-processing that we use, which is 83 | slightly different from the standard MoCo pre-processing. 84 | 85 | ### Use our MANO prediction model 86 | 87 | The following script allows to run inference on an example image: 88 | 89 | run_hand_shape_fw.py 90 | 91 | 92 | -------------------------------------------------------------------------------- /utils/mano_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | np.cat = np.concatenate 5 | torch.transpose = lambda x, y: x.permute(y) 6 | 7 | 8 | def apply_scaling(theta): 9 | poses, shapes, cams = slice_theta(theta) 10 | 11 | poses_scaled = 1.0 * poses 12 | shapes_scaled = 0.5 * shapes 13 | root = cams[:, :2] # estimated root im image coords 14 | scale = cams[:, -1:] # estimated shape scale 15 | 16 | root = 14.0 * root + 112.0 17 | scale = 125.0 * scale + 730.0 18 | cams_scaled = torch.cat([root, scale], -1) 19 | 20 | theta_scaled = torch.cat([poses_scaled, shapes_scaled, cams_scaled], -1) 21 | return theta_scaled 22 | 23 | 24 | def slice_theta(theta): 25 | """ Slice vector of all hand shape parameters into sematically meaningful parts. 26 | """ 27 | return theta[:, :48], theta[:, 48:58], theta[:, -3:] 28 | 29 | 30 | def slice_cams(cams): 31 | """ 32 | Returns translation in uv and scale. 33 | """ 34 | return cams[:, :2], cams[:, -1:] 35 | 36 | 37 | def project(xyz, K, fw=torch): 38 | """ Project points into the camera. """ 39 | uv = fw.matmul(xyz, fw.transpose(K, [0, 2, 1])) 40 | uv = uv[:, :, :2] / uv[:, :, -1:] 41 | return uv 42 | 43 | 44 | def unproject(points2d, K, z=None, K_is_inv=False, fw=torch): 45 | """ Unproject a 2D point of camera K to distance z. 46 | """ 47 | batch = K.shape[0] 48 | points2d = fw.reshape(points2d, [batch, -1, 2]) 49 | points2d_h = fw.cat([points2d, fw.ones_like(points2d[:, :, :1])], -1) # homogeneous 50 | 51 | if K_is_inv: 52 | K_inv = K 53 | else: 54 | if fw == torch: 55 | K_inv = fw.inverse(K) 56 | else: 57 | K_inv = fw.linalg.inv(K) 58 | 59 | points3D = fw.matmul(points2d_h, fw.transpose(K_inv, [0, 2, 1])) # 3d point corresponding to the estimate image point where the root should go to 60 | if z is not None: 61 | z = fw.reshape(z, [batch, -1, 1]) 62 | points3D = points3D * z 63 | return points3D 64 | 65 | 66 | def trafoPoints(xyz, M, fw=torch): 67 | """ Transforms points into another coordinate frame. """ 68 | xyz_h = fw.cat([xyz, fw.ones_like(xyz[:, :, :1])], 2) 69 | xyz_cam = fw.matmul(xyz_h, fw.transpose(M, [0, 2, 1])) 70 | xyz_cam = xyz_cam[:, :, :3] / xyz_cam[:, :, -1:] 71 | return xyz_cam 72 | 73 | 74 | def calc_global_translation(trans_uv, scale, K, fw=torch): 75 | """ Calculate global translation from uv position and scale. 76 | """ 77 | scale = fw.reshape(scale, [-1, 1, 1]) 78 | z = 0.5 * (K[:, :1, :1] + K[:, 1:2, 1:2]) / scale # calculate root depth from scale 79 | 80 | # calculate and apply global translation 81 | global_t = unproject(trans_uv, K, z, fw=fw) # unprojection of the estimated mano root using the estimated depth 82 | return global_t, z 83 | 84 | 85 | def calc_global_translation_from_theta(theta, K, fw=torch): 86 | """ Calculate global translation from uv position and scale. 87 | """ 88 | _, _, cams = slice_theta(theta) 89 | trans_uv, scale = slice_cams(cams) 90 | return calc_global_translation(trans_uv, scale, K, fw=fw) 91 | 92 | 93 | def pred_to_mano(theta, K, fw=torch): 94 | """ Convert predicted theta into MANO parameters. 95 | """ 96 | poses, shapes, cams = slice_theta(theta) 97 | trans_uv, scale = slice_cams(cams) 98 | global_t, _ = calc_global_translation(trans_uv, scale, K, fw=fw) 99 | return poses, shapes, global_t 100 | 101 | 102 | def mano_to_vector(poses, shapes, global_t, K, fw=torch): 103 | """ Given the semantic parts of the mano shape model, create a parameter vector out of it (which will be estimated by networks) 104 | 105 | poses and global_t must already be in the cameras 3D coordinate frame. 106 | """ 107 | # project 3D point into cam 108 | trans_uv = project(global_t, K, fw=fw) 109 | 110 | # find scale = focal_length / depth 111 | scale = 0.5*(K[:, 0, 0] + K[:, 1, 1])[:, None] / global_t[:, :, -1] 112 | 113 | # assemble cams 114 | cams = fw.cat([trans_uv[:, 0], scale], -1) 115 | 116 | # assemble theta 117 | theta = fw.cat([poses, shapes, cams], -1) 118 | return theta 119 | -------------------------------------------------------------------------------- /utils/img_util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | 5 | def downsample(img, target_size, K=None, borderValue=0.0): 6 | """ 7 | img, HxWxC image 8 | target_size, shape in (height, width) 9 | K, camera intrinsic matrix 10 | """ 11 | f_y = float(target_size[0]) / img.shape[0] 12 | f_x = float(target_size[1]) / img.shape[1] 13 | 14 | # how to account for crop in intrinsics 15 | M = np.array([[f_x, 0.0, 0.0], 16 | [0.0, f_y, 0.0], 17 | [0.0, 0.0, 1.0]]) 18 | 19 | img_c = cv2.warpAffine(img, M[:2, :], 20 | (target_size[0], target_size[1]), 21 | borderValue=borderValue) 22 | if K is None: 23 | return img_c 24 | K_c = np.matmul(M, K) 25 | return img_c, K_c 26 | 27 | 28 | def random_crop(img, K=None, 29 | f_trans=0.05, # percent of the image size 30 | f_scale_min=0.8, f_scale_max=1.0, # percent of the 31 | target_size=128, borderValue=0.0): 32 | center = np.array([img.shape[1], img.shape[0]], dtype=np.float32) / 2.0 33 | size = np.array([img.shape[1], img.shape[0]], dtype=np.float32) 34 | 35 | # random translation 36 | f = np.random.rand(2, ) * 2 * f_trans - f_trans 37 | trans_uv = f*size 38 | 39 | # random scaling 40 | f = np.random.rand() * (f_scale_max - f_scale_min) + f_scale_min 41 | 42 | trans_uv -= center*(1.0-f) # translation of the image center due to scaling 43 | 44 | f *= 224.0 / target_size 45 | 46 | # how to account for crop in intrinsics 47 | M = np.array([[1.0 / f, 0.0, trans_uv[0] / f], 48 | [0.0, 1.0 / f, trans_uv[1] / f], 49 | [0.0, 0.0, 1.0]]) 50 | 51 | img_c = cv2.warpAffine(img, M[:2, :], (target_size, target_size), borderValue=borderValue) 52 | if K is None: 53 | return img_c 54 | K_c = np.matmul(M, K) 55 | return img_c, K_c 56 | 57 | 58 | def crop(img, center, size, K=None, target_size=128, borderValue=0.0, scale_values=False): 59 | size = np.max(size)*np.ones_like(size) 60 | size = (size/2.0).round().astype(np.int32) # this cant be a float 61 | 62 | # create crop image 63 | borderValue = np.array(borderValue).astype(img.dtype) 64 | img_crop = borderValue * np.ones((2*size[0], 2*size[1], img.shape[2]), 65 | dtype=img.dtype) # after mean subtraction 127.5 will be zero 66 | 67 | # figure out where we would like to crop (can exceed image dimensions) 68 | start_t = (center - size).round().astype(np.int32) 69 | end_t = start_t + 2*size 70 | 71 | # check if there is actually anything to be cropped (sometimes crop is completely out of the image). 72 | do_crop = True 73 | 74 | # sanity check the crop values (sometime the crop is completely outside the image) 75 | if np.any(np.logical_or(end_t < 0, start_t > np.array(img.shape[:2]) - 1)): 76 | print('WARNING: Crop is completely outside image bounds!', center, img.shape) 77 | do_crop = False 78 | 79 | # check image boundaries: Where can we crop? 80 | start = np.maximum(start_t, 0) 81 | end = np.minimum(end_t, np.array(img.shape[:2]) - 1) 82 | 83 | # check discrepancy 84 | crop_start = start - start_t 85 | crop_end = 2*size - (end_t - end) 86 | 87 | if do_crop: 88 | img_crop[crop_start[0]:crop_end[0], crop_start[1]:crop_end[1], :] = img[start[0]:end[0], start[1]:end[1], :] 89 | offset = start - crop_start 90 | 91 | scale = (end - start) / np.array([target_size, target_size], dtype=np.float32) 92 | img_crop = cv2.resize(img_crop, (target_size, target_size)) 93 | 94 | if scale_values: 95 | # makes sense if the image is a flow 96 | img_crop[:, :, 0] /= scale[1] 97 | img_crop[:, :, 1] /= scale[0] 98 | 99 | if K is not None: 100 | # how to account for crop in intrinsics 101 | A = np.array([[1.0/scale[1], 0.0, -offset[1]/scale[1]], 102 | [0.0, 1.0/scale[0], -offset[0]/scale[0]], 103 | [0.0, 0.0, 1.0]]) 104 | return img_crop, np.matmul(A, K.copy()) 105 | return img_crop 106 | 107 | -------------------------------------------------------------------------------- /utils/rendering.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | import transforms3d as t3d 5 | import pickle 6 | from manopth.manolayer import ManoLayer 7 | 8 | from pytorch3d.io import load_objs_as_meshes 9 | from pytorch3d.structures import Meshes 10 | from pytorch3d.renderer import ( 11 | PerspectiveCameras, 12 | PointLights, 13 | RasterizationSettings, 14 | MeshRenderer, 15 | MeshRasterizer, 16 | HardGouraudShader, 17 | SoftPhongShader, 18 | TexturesVertex, 19 | TexturesUV, 20 | BlendParams 21 | ) 22 | 23 | 24 | def render_verts_faces(verts, faces, 25 | K, M_obj2cam, img_shape, 26 | verts_color=None, device='cuda', 27 | segmentation=True, render_size=200): 28 | if verts_color is None: 29 | verts_color = np.array([205/255., 205/255., 205/255.], dtype=np.float32) 30 | verts_color = torch.Tensor(verts_color).to(device) 31 | 32 | # Load obj file 33 | verts_feat = torch.ones_like(verts) * verts_color 34 | verts, faces = verts.to(device), faces.to(device) 35 | verts_feat = verts_feat.to(device) 36 | tex = TexturesVertex(verts_features=verts_feat) 37 | mesh = Meshes(verts.to(device), 38 | faces.to(device), 39 | tex) 40 | 41 | # Convert coordinate frames: pytorch3d X left, Y up --> CV X right, Y down 42 | # Rotate 180deg around z axis 43 | M_corr = np.eye(4) 44 | M_corr[:3, :3] = t3d.euler.euler2mat(0.0, .0, np.pi) 45 | M_obj2cam = np.matmul(M_corr, M_obj2cam) 46 | 47 | # setup camera 48 | focal = np.stack([K[:, 0, 0], K[:, 1, 1]], -1) 49 | pp = np.stack([K[:, 0, 2], K[:, 1, 2]], -1) 50 | img_shape = np.stack([img_shape[:, 1], img_shape[:, 0]], -1) 51 | R = np.transpose(M_obj2cam[:, :3, :3], [0, 2, 1]) 52 | T = M_obj2cam[:, :3, 3] 53 | cameras = PerspectiveCameras(focal_length=focal, 54 | principal_point=pp, 55 | R=R, 56 | T=T, 57 | image_size=img_shape, 58 | device=device) 59 | 60 | raster_settings = RasterizationSettings( 61 | image_size=render_size, 62 | blur_radius=0.0, 63 | faces_per_pixel=1, 64 | ) 65 | 66 | rasterizer = MeshRasterizer( 67 | cameras=cameras, 68 | raster_settings=raster_settings 69 | ) 70 | 71 | if segmentation: 72 | lights = PointLights(location=((1, 1, 0), ), 73 | ambient_color=((1.0, 1.0, 1.0),), 74 | diffuse_color=((0.0, 0.0, 0.0),), 75 | specular_color=((0.1, 0.1, 0.1),), 76 | device=device) 77 | 78 | shader = HardGouraudShader( 79 | device=device, 80 | cameras=cameras, 81 | lights=lights, 82 | blend_params=BlendParams(background_color=(.0, .0, .0)) 83 | ) 84 | else: 85 | 86 | d = 0.3 # diffuse 87 | a = 1.0-d # ambient 88 | lights = PointLights(location=((1, 1, 0),), 89 | diffuse_color=((d, d, d),), 90 | ambient_color=((a, a, a),), 91 | specular_color=((0.1, 0.1, 0.1),), 92 | device=device) 93 | 94 | shader = SoftPhongShader( 95 | device=device, 96 | cameras=cameras, 97 | lights=lights, 98 | blend_params=BlendParams(background_color=((.0, .0, .0),)) 99 | ) 100 | 101 | renderer = MeshRenderer( 102 | rasterizer=rasterizer, 103 | shader=shader 104 | ) 105 | 106 | fragments = rasterizer(mesh) 107 | 108 | image = renderer(mesh) 109 | im_out, dep_out = list(), list() 110 | for i, (w, h) in enumerate(img_shape): 111 | im_out.append( 112 | F.interpolate( 113 | image[i:i+1, :, :, :3].permute([0, 3, 1, 2]), 114 | (h, w)) 115 | ) 116 | dep_out.append( 117 | F.interpolate( 118 | fragments.zbuf[i:i+1, :, :, :1].permute([0, 3, 1, 2]), 119 | (h, w)) 120 | ) 121 | return im_out, dep_out 122 | 123 | 124 | -------------------------------------------------------------------------------- /run_hand_shape_fw.py: -------------------------------------------------------------------------------- 1 | """ Run forward pass on trained hand shape estimation network. """ 2 | import matplotlib 3 | matplotlib.use('Agg') 4 | from collections import defaultdict 5 | import argparse 6 | import torch 7 | import torch.nn as nn 8 | import glob 9 | import numpy as np 10 | import cv2 11 | import time, os, json 12 | import matplotlib.pyplot as plt 13 | from tqdm import tqdm 14 | 15 | from utils.plot_util import draw_hand 16 | from utils.rendering import render_verts_faces 17 | from manopth.manolayer import ManoLayer 18 | from nets.ResNet import resnet50 19 | 20 | 21 | from utils.img_util import downsample 22 | from utils.mano_utils import apply_scaling, pred_to_mano, project 23 | from utils.general import load_ckpt, json_load 24 | 25 | 26 | class ManoPredictor(nn.Module): 27 | def __init__(self): 28 | super().__init__() 29 | self.model = resnet50(pretrained=False, head_type='mano') 30 | self.mano = ManoLayer(use_pca=False, ncomps=45, flat_hand_mean=False, center_idx=9) 31 | 32 | def forward(self, image_np, K_np, device='cpu'): 33 | assert image_np.shape == (224, 224, 3), 'Image shape mismatch.' 34 | img = np.transpose(image_np[:, :, ::-1], [2, 0, 1]).astype(np.float32) / 255.0 - 0.5 35 | img = np.expand_dims(img, 0) 36 | theta_p = self.model( 37 | torch.Tensor(img).to(device) 38 | ) 39 | theta_p = apply_scaling(theta_p) 40 | poses, shapes, global_t = pred_to_mano(theta_p, 41 | torch.Tensor(K_np[None]).to(device) 42 | ) 43 | verts_p, xyz_p = self.mano(poses, shapes, global_t) 44 | uv_p = project(xyz_p, torch.Tensor(K_np[None]).to(device)) 45 | 46 | verts_p_np = verts_p.detach().cpu().numpy()[0] 47 | xyz_p_np = xyz_p.detach().cpu().numpy()[0] 48 | uv_p_np = uv_p.detach().cpu().numpy()[0] 49 | 50 | img_shape = np.array([[image_np.shape[0], image_np.shape[1]]]) 51 | mask_p, _ = render_verts_faces(verts_p, 52 | self.mano.th_faces[None], 53 | K_np[None], np.eye(4)[None], img_shape) 54 | mask_np = mask_p[0].detach().cpu().numpy()[0].transpose([1, 2, 0]) 55 | return verts_p_np, xyz_p_np, uv_p_np, mask_np 56 | 57 | 58 | def main(): 59 | parser = argparse.ArgumentParser() 60 | parser.add_argument('hanco_path', type=str, help='Path to where HanCo dataset is stored.') 61 | parser.add_argument('--sid', type=int, help='Sequence ID.', default=110) 62 | parser.add_argument('--cid', type=int, help='Camera ID.', default=3) 63 | parser.add_argument('--fid', type=int, help='Frame ID.', default=0) 64 | args = parser.parse_args() 65 | 66 | assert os.path.exists(args.hanco_path), 'Path to HanCo not found.' 67 | assert os.path.isdir(args.hanco_path), 'Path to HanCo doesnt seem to be a directory.' 68 | 69 | 70 | img_path = os.path.join(args.hanco_path, f'rgb/{args.sid:04d}/cam{args.cid}/{args.fid:08d}.jpg') 71 | calib_path = os.path.join(args.hanco_path, f'calib/{args.sid:04d}/{args.fid:08d}.json') 72 | 73 | assert os.path.exists(img_path), f'Image not found: {img_path}' 74 | assert os.path.exists(calib_path), f'Calibration not found: {calib_path}' 75 | 76 | img = cv2.imread(img_path) 77 | K = np.array(json_load(calib_path)['K'][3]) 78 | 79 | # Load network 80 | model = ManoPredictor() 81 | state_dict = torch.load('ckpt/model_mano.pth') 82 | model.load_state_dict(state_dict, strict=False) 83 | model.cuda() 84 | model.eval() 85 | 86 | # forward pass 87 | with torch.no_grad(): 88 | verts_xyz_p, joints_xyz_p, joints_uv_p, mask_p = model.forward(img, K, 'cuda') 89 | 90 | # vis rgb image with predicted skeleton 91 | img_vis = draw_hand(img.copy(), joints_uv_p, kp_style=(2, 1), order='uv', img_order='bgr') 92 | 93 | # vis rendered mask with predicted skeleton 94 | mask_p = np.clip(mask_p*255, 0, 255).astype(np.uint8) 95 | mask_vis = draw_hand(mask_p.copy(), joints_uv_p, kp_style=(2, 1), order='uv', img_order='bgr') 96 | 97 | fig, ax = plt.subplots(1, 2) 98 | ax[0].imshow(img_vis[:, :, ::-1]), ax[0].set_title('rgb+pred skel') 99 | ax[1].imshow(mask_vis[:, :, ::-1]), ax[1].set_title('pred shape+skel') 100 | plt.show() 101 | 102 | if __name__ == '__main__': 103 | main() 104 | -------------------------------------------------------------------------------- /utils/plot_util.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, unicode_literals 2 | import numpy as np 3 | import cv2 4 | 5 | 6 | def draw_hand(image, coords_hw, vis=None, color_fixed=None, linewidth=3, order='hw', img_order='rgb', 7 | draw_kp=True, kp_style=None): 8 | """ Inpaints a hand stick figure into a matplotlib figure. """ 9 | if kp_style is None: 10 | kp_style = (5, 3) 11 | 12 | image = np.squeeze(image) 13 | if len(image.shape) == 2: 14 | image = np.expand_dims(image, 2) 15 | s = image.shape 16 | assert len(s) == 3, "This only works for single images." 17 | 18 | convert_to_uint8 = False 19 | if s[2] == 1: 20 | # grayscale case 21 | image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-4) 22 | image = np.tile(image, [1, 1, 3]) 23 | pass 24 | elif s[2] == 3: 25 | # RGB case 26 | if image.dtype == np.uint8: 27 | convert_to_uint8 = True 28 | image = image.astype('float32') / 255.0 29 | elif image.dtype == np.float32: 30 | # convert to gray image 31 | image = np.mean(image, axis=2) 32 | image = (image - np.min(image)) / (np.max(image) - np.min(image) + 1e-4) 33 | image = np.expand_dims(image, 2) 34 | image = np.tile(image, [1, 1, 3]) 35 | else: 36 | assert 0, "Unknown image dimensions." 37 | 38 | if order == 'uv': 39 | coords_hw = coords_hw[:, ::-1] 40 | 41 | colors = np.array([[0.4, 0.4, 0.4], 42 | [0.4, 0.0, 0.0], 43 | [0.6, 0.0, 0.0], 44 | [0.8, 0.0, 0.0], 45 | [1.0, 0.0, 0.0], 46 | [0.4, 0.4, 0.0], 47 | [0.6, 0.6, 0.0], 48 | [0.8, 0.8, 0.0], 49 | [1.0, 1.0, 0.0], 50 | [0.0, 0.4, 0.2], 51 | [0.0, 0.6, 0.3], 52 | [0.0, 0.8, 0.4], 53 | [0.0, 1.0, 0.5], 54 | [0.0, 0.2, 0.4], 55 | [0.0, 0.3, 0.6], 56 | [0.0, 0.4, 0.8], 57 | [0.0, 0.5, 1.0], 58 | [0.4, 0.0, 0.4], 59 | [0.6, 0.0, 0.6], 60 | [0.7, 0.0, 0.8], 61 | [1.0, 0.0, 1.0]]) 62 | 63 | if img_order == 'rgb': 64 | colors = colors[:, ::-1] 65 | 66 | # define connections and colors of the bones 67 | bones = [((0, 1), colors[1, :]), 68 | ((1, 2), colors[2, :]), 69 | ((2, 3), colors[3, :]), 70 | ((3, 4), colors[4, :]), 71 | 72 | ((0, 5), colors[5, :]), 73 | ((5, 6), colors[6, :]), 74 | ((6, 7), colors[7, :]), 75 | ((7, 8), colors[8, :]), 76 | 77 | ((0, 9), colors[9, :]), 78 | ((9, 10), colors[10, :]), 79 | ((10, 11), colors[11, :]), 80 | ((11, 12), colors[12, :]), 81 | 82 | ((0, 13), colors[13, :]), 83 | ((13, 14), colors[14, :]), 84 | ((14, 15), colors[15, :]), 85 | ((15, 16), colors[16, :]), 86 | 87 | ((0, 17), colors[17, :]), 88 | ((17, 18), colors[18, :]), 89 | ((18, 19), colors[19, :]), 90 | ((19, 20), colors[20, :])] 91 | 92 | color_map = {'k': np.array([0.0, 0.0, 0.0]), 93 | 'w': np.array([1.0, 1.0, 1.0]), 94 | 'b': np.array([0.0, 0.0, 1.0]), 95 | 'g': np.array([0.0, 1.0, 0.0]), 96 | 'r': np.array([1.0, 0.0, 0.0]), 97 | 'm': np.array([1.0, 1.0, 0.0]), 98 | 'c': np.array([0.0, 1.0, 1.0])} 99 | 100 | if vis is None: 101 | vis = np.ones_like(coords_hw[:, 0]) == 1.0 102 | 103 | for connection, color in bones: 104 | if (vis[connection[0]] == False) or (vis[connection[1]] == False): 105 | continue 106 | 107 | coord1 = coords_hw[connection[0], :].astype(np.int32) 108 | coord2 = coords_hw[connection[1], :].astype(np.int32) 109 | 110 | if (coord1[0] < 1) or (coord1[0] >= s[0]) or (coord1[1] < 1) or (coord1[1] >= s[1]): 111 | continue 112 | if (coord2[0] < 1) or (coord2[0] >= s[0]) or (coord2[1] < 1) or (coord2[1] >= s[1]): 113 | continue 114 | 115 | if color_fixed is None: 116 | cv2.line(image, (coord1[1], coord1[0]), (coord2[1], coord2[0]), color, thickness=linewidth) 117 | else: 118 | c = color_map.get(color_fixed, np.array([1.0, 1.0, 1.0])) 119 | cv2.line(image, (coord1[1], coord1[0]), (coord2[1], coord2[0]), c, thickness=linewidth) 120 | 121 | if draw_kp: 122 | coords_hw = coords_hw.astype(np.int32) 123 | for i in range(21): 124 | if vis[i]: 125 | # cv2.circle(img, center, radius, color, thickness) 126 | image = cv2.circle(image, (coords_hw[i, 1], coords_hw[i, 0]), 127 | radius=kp_style[0], color=colors[i, :], thickness=kp_style[1]) 128 | 129 | if convert_to_uint8: 130 | image = (image * 255).astype('uint8') 131 | 132 | return image 133 | 134 | -------------------------------------------------------------------------------- /show_dataset.py: -------------------------------------------------------------------------------- 1 | """ Iterate HanCo dataset and show how to work with data. """ 2 | import os, argparse, json 3 | import numpy as np 4 | import cv2 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | from utils.plot_util import draw_hand 9 | 10 | def example_meta_data(args): 11 | meta_file = os.path.join(args.hanco_path, 'meta.json') 12 | with open(meta_file, 'r') as fi: 13 | meta_data = json.load(fi) 14 | print(type(meta_data)) # Its a dict 15 | print(meta_data.keys()) # Its keys are: 'is_train', 'subject_id', 'is_valid', 'object_id', 'has_fit' 16 | 17 | for k, v in meta_data.items(): 18 | print(k, type(v), len(v), v[0][:3], v[-1][:3]) # these are all lists of length 1518 (= one entry for each sequence), each entry is another list representing the frames of the sequence 19 | 20 | # is_train: bool, True if recorded with green screen background 21 | # subject_id: int, Unique identifier for the human performer 22 | # is_valid: bool, True if there is a validated MANO shape fit 23 | # object_id: int, Unique identifier for the object used. None for sequences w/o object interaction 24 | # has_fit: bool, True if there is a MANO shape fit. Potentially, not validated 25 | 26 | 27 | def example_show_data(args, sid): 28 | """ 29 | sid: Sequence id: int, in [0, 1517] 30 | """ 31 | meta_file = os.path.join(args.hanco_path, 'meta.json') 32 | with open(meta_file, 'r') as fi: 33 | meta_data = json.load(fi) 34 | 35 | print(f"\nShowing sequence {sid} with {len(meta_data['is_train'][sid])} frames.") 36 | 37 | # iterate frames of this sequence 38 | for fid in range(len(meta_data['is_train'])): 39 | print(f"fid={fid},\n" 40 | f"is_train={meta_data['is_train'][sid][fid]},\n" 41 | f"subject_id={meta_data['subject_id'][sid][fid]},\n" 42 | f"is_valid={meta_data['is_valid'][sid][fid]},\n" 43 | f"object_id={meta_data['object_id'][sid][fid]},\n" 44 | f"has_fit={meta_data['has_fit'][sid][fid]}") 45 | rgb_list = list() 46 | for cid in range(8): # iterate cameras 47 | rgb_path = os.path.join(args.hanco_path, f'rgb/{sid:04d}/cam{cid}/{fid:08d}.jpg') 48 | rgb_list.append( 49 | cv2.imread(rgb_path)[:, :, ::-1] 50 | ) 51 | 52 | # show 53 | fig, ax = plt.subplots(1, 8) 54 | for j, img in enumerate(rgb_list): 55 | ax[j].imshow(img) 56 | ax[j].set_xticks([], []) 57 | ax[j].set_yticks([], []) 58 | plt.show() 59 | 60 | if fid > 3: 61 | # we deliberately stop showing after some samples 62 | break 63 | 64 | def example_show_keypoints(args, sid, fid, cid): 65 | # load image 66 | image_file = os.path.join(args.hanco_path, f'rgb/{sid:04d}/cam{cid}/{fid:08d}.jpg') 67 | img = cv2.imread(image_file)[:, :, ::-1] 68 | 69 | # load keypoints 70 | kp_data_file = os.path.join(args.hanco_path, f'xyz/{sid:04d}/{fid:08d}.json') 71 | with open(kp_data_file, 'r') as fi: 72 | kp_xyz = np.array(json.load(fi)) 73 | print('kp_xyz', kp_xyz.shape, kp_xyz.dtype) # 21x3, np.float64, world coordinates 74 | 75 | # load calibration 76 | calib_file = os.path.join(args.hanco_path, f'calib/{sid:04d}/{fid:08d}.json') 77 | with open(calib_file, 'r') as fi: 78 | calib = json.load(fi) 79 | 80 | # project points 81 | M_w2cam = np.array(calib['M'])[cid] 82 | K = np.array(calib['K'])[cid] 83 | kp_xyz_cam = np.matmul(kp_xyz, M_w2cam[:3, :3].T) + M_w2cam[:3, 3][None] # in camera coordinates 84 | kp_xyz_cam = kp_xyz_cam / kp_xyz_cam[:, -1:] 85 | kp_uv = np.matmul(kp_xyz_cam, K.T) 86 | kp_uv = kp_uv[:, :2] / kp_uv[:, -1:] 87 | 88 | # show 89 | img = draw_hand(img, kp_uv, order='uv', img_order='rgb') 90 | 91 | fig = plt.figure() 92 | ax = fig.add_subplot(111) 93 | ax.imshow(img) 94 | plt.show() 95 | 96 | 97 | def example_show_shape(args, sid, fid, cid): 98 | import torch 99 | from manopth.manolayer import ManoLayer 100 | from utils.mano_utils import pred_to_mano, project, trafoPoints 101 | from utils.rendering import render_verts_faces 102 | 103 | # load image 104 | image_file = os.path.join(args.hanco_path, f'rgb/{sid:04d}/cam{cid}/{fid:08d}.jpg') 105 | img = cv2.imread(image_file)[:, :, ::-1] 106 | 107 | # load calibration 108 | calib_file = os.path.join(args.hanco_path, f'calib/{sid:04d}/{fid:08d}.json') 109 | with open(calib_file, 'r') as fi: 110 | calib = json.load(fi) 111 | 112 | # load shape in world space 113 | kp_data_file = os.path.join(args.hanco_path, f'shape/{sid:04d}/{fid:08d}.json') 114 | with open(kp_data_file, 'r') as fi: 115 | mano_w = json.load(fi) 116 | for k, v in mano_w.items(): 117 | print(k, np.array(v).shape) # a dict of pose, shape and global_t 118 | 119 | # load shape in camera space 120 | kp_data_file = os.path.join(args.hanco_path, f'shape/{sid:04d}/cam{cid}/{fid:08d}.json') 121 | with open(kp_data_file, 'r') as fi: 122 | mano_cam = np.array(json.load(fi))[None] 123 | print('mano_vec', mano_cam.shape) # parameter vector 124 | pose_cam, shape_cam, global_t_cam = pred_to_mano(mano_cam, np.array(calib['K'])[cid][None], fw=np) 125 | 126 | # render shape masks 127 | def render_hand(poses, shapes, global_t, img_shape, K, M=None, center_idx=None): 128 | if M is None: 129 | M = np.eye(4) 130 | 131 | mano = ManoLayer(use_pca=False, ncomps=45, flat_hand_mean=False, center_idx=center_idx) 132 | 133 | verts, xyz = mano(poses, shapes, global_t) 134 | uv = project(trafoPoints(xyz, torch.Tensor(M)[None]), torch.Tensor(K)[None]) 135 | mask, _ = render_verts_faces(verts, 136 | mano.th_faces[None], 137 | K[None], M[None], img_shape[None], device='cpu') 138 | 139 | 140 | mask = mask[0].detach().cpu().numpy()[0] 141 | uv = uv.detach().cpu().numpy()[0] 142 | return mask, uv 143 | 144 | mask1, uv1 = render_hand(torch.Tensor(mano_w['poses']), 145 | torch.Tensor(mano_w['shapes']), 146 | torch.Tensor(mano_w['global_t']), 147 | np.array(img.shape[:2]), 148 | np.array(calib['K'][cid]), 149 | np.array(calib['M'][cid])) 150 | 151 | mask2, uv2 = render_hand(torch.Tensor(pose_cam), 152 | torch.Tensor(shape_cam), 153 | torch.Tensor(global_t_cam), 154 | np.array(img.shape[:2]), 155 | np.array(calib['K'][cid]), 156 | center_idx=9) 157 | 158 | # show 159 | img1 = draw_hand(img, uv1, order='uv', img_order='rgb') 160 | img2 = draw_hand(img, uv2, order='uv', img_order='rgb') 161 | 162 | fig = plt.figure() 163 | ax1 = fig.add_subplot(121) 164 | ax2 = fig.add_subplot(122) 165 | ax1.imshow(img1) 166 | ax1.imshow(mask1[0, :, :], alpha=0.5) 167 | ax2.imshow(img2) 168 | ax2.imshow(mask2[0, :, :], alpha=0.5) 169 | plt.show() 170 | 171 | 172 | if __name__ == '__main__': 173 | parser = argparse.ArgumentParser() 174 | parser.add_argument('hanco_path', type=str, help='Path to where HanCo dataset is stored.') 175 | args = parser.parse_args() 176 | 177 | assert os.path.exists(args.hanco_path), 'Path to HanCo not found.' 178 | assert os.path.isdir(args.hanco_path), 'Path to HanCo doesnt seem to be a directory.' 179 | 180 | 181 | # Example1: Meta data 182 | example_meta_data(args) 183 | 184 | # Example2: Read/Show all images of one sequence 185 | example_show_data(args, 110) 186 | 187 | # Example3: Show keypoints, calibration, camera projection 188 | example_show_keypoints(args, sid=110, fid=24, cid=3) 189 | 190 | # Example4: Render MANO shape, show 191 | example_show_shape(args, sid=110, fid=24, cid=3) 192 | -------------------------------------------------------------------------------- /DatasetUnsupervisedMV.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import numpy as np 4 | from PIL import Image 5 | import matplotlib.pyplot as plt 6 | import torch 7 | from torch.utils.data import Dataset 8 | import torchvision.transforms as transforms 9 | from scipy.ndimage.morphology import binary_erosion 10 | from utils.general import get_dataset_path, json_load 11 | 12 | 13 | def mix(fg_img, mask_fg, bg_img, do_smoothing, do_erosion): 14 | """ Mix fg and bg image. Keep the fg where mask_fg is True. """ 15 | assert bg_img.shape == fg_img.shape 16 | fg_img = fg_img.copy() 17 | mask_fg = mask_fg.copy() 18 | bg_img = bg_img.copy() 19 | 20 | if len(mask_fg.shape) == 2: 21 | mask_fg = np.expand_dims(mask_fg, -1) 22 | 23 | if do_erosion: 24 | mask_fg = binary_erosion(mask_fg, structure=np.ones((5, 5, 1)) ) 25 | 26 | mask_fg = mask_fg.astype(np.float32) 27 | 28 | if do_smoothing: 29 | mask_fg = gaussian_filter(mask_fg, sigma=0.5) 30 | 31 | merged = (mask_fg * fg_img + (1.0 - mask_fg) * bg_img).astype(np.uint8) 32 | return merged 33 | 34 | 35 | class DatasetUnsupervisedMultiview(Dataset): 36 | def __init__(self, root=None, transform=None, cross_camera=False, 37 | cross_time=False, cross_bg=False): 38 | print("Starting to load multiview data.") 39 | if root is None: 40 | self.base_path = get_dataset_path() 41 | else: 42 | self.base_path = root 43 | self.cross_camera = cross_camera 44 | self.cross_time = cross_time 45 | self.cross_bg = cross_bg 46 | 47 | self.subsets = ['gs', 'merged', 'homo', 'color_auto'] # 'color_sample'] 48 | #self.subsets = ['gs', ] 49 | 50 | if self.cross_bg: 51 | self.subsets = ['mask_hand'] 52 | 53 | self.camsets = { 54 | # neighboring # opposing 55 | 0: [1, 4, 7, 0], # [3] 56 | 1: [0, 2, 6, 1], # [5] 57 | 2: [1, 3, 4, 2], # [7] 58 | 3: [2, 5, 6, 3], # [0] 59 | 4: [0, 2, 5, 4], # [6] 60 | 5: [3, 4, 7, 5], # [1] 61 | 6: [1, 3, 7, 6], # [4] 62 | 7: [0, 5, 6, 7], # [2] 63 | } # for each cam which cams are considered good partners 64 | 65 | self.timeset = (-1, 0, 1) 66 | # load meta info file 67 | self.meta_info = json_load(os.path.join(self.base_path, 'meta.json')) 68 | self.dataset = json_load(os.path.join(self.base_path, 'index_mv_unsup_weak.json')) 69 | 70 | random.shuffle(self.dataset) 71 | self.size = len(self.dataset) 72 | 73 | print("Using dataset: ", self.base_path) 74 | print("cross_camera", cross_camera, "size", len(self.camsets[0])) 75 | print("cross_time", cross_time, "size", len(self.timeset)) 76 | print("cross_bg", cross_bg) 77 | print('Sampling from subsets', self.subsets) 78 | print('Sampling from %d time steps' % self.size) 79 | 80 | assert transform is not None 81 | #assert not isinstance(transform, moco_loader.TwoCropsTransform) 82 | self.transform = transform 83 | 84 | 85 | def __len__(self): 86 | return self.size * 8 87 | 88 | def __getitem__(self, idx): 89 | sid, fid, K_list, M_list = self.dataset[idx % self.size] 90 | # roll for a random camera 91 | cid1 = random.randint(0, 7) 92 | 93 | if self.cross_camera: 94 | cid2 = random.choice(self.camsets[cid1]) 95 | else: 96 | cid2 = cid1 97 | 98 | fid1 = fid 99 | if self.cross_time: 100 | s_max = len(self.meta_info['is_train'][sid])-1 101 | fid2 = min(max(0, fid + random.choice(self.timeset)), s_max) 102 | else: 103 | fid2 = fid 104 | 105 | if self.meta_info['is_train'][sid][fid]: 106 | subset1 = random.choice(self.subsets) 107 | subset2 = random.choice(self.subsets) 108 | else: 109 | subset1 = 'test' 110 | subset2 = 'test' 111 | 112 | try: 113 | # read the frame 114 | sample1 = self.read(sid, fid1, cid1, subset1) 115 | sample2 = self.read(sid, fid2, cid2, subset2) 116 | 117 | if self.transform is not None: 118 | sample1 = self.transform(sample1) 119 | sample2 = self.transform(sample2) 120 | return (sample1, sample2), 0 121 | except FileNotFoundError as e: 122 | # print(e) 123 | return self.__getitem__(idx) 124 | 125 | 126 | def read(self, sid, fid, cid, subset): 127 | if subset == 'mask_hand': 128 | return self.read_rnd_background(sid, fid, cid, subset) 129 | 130 | if subset == 'gs' or subset == 'test': 131 | img_path = 'rgb/%04d/cam%d/%08d.jpg' % (sid, cid, fid) 132 | else: 133 | img_path = 'rgb_%s/%04d/cam%d/%08d.jpg' % (subset, sid, cid, fid) 134 | 135 | # read samples 136 | path = os.path.join(self.base_path, img_path) 137 | with open(path, 'rb') as f: 138 | img = Image.open(f) 139 | return img.convert('RGB') 140 | 141 | 142 | def read_rnd_background(self, sid, fid, cid, subset): 143 | # sample rnd background 144 | base_path = '/misc/lmbraid18/zimmermc/' 145 | rid = random.randint(0, 1230) 146 | bg_image_new_path = os.path.join(base_path, 'background_subtraction/background_examples/bg_new/%05d.jpg' % rid) 147 | bg_img_new = Image.open(bg_image_new_path) 148 | 149 | mask_path = 'mask_hand/%04d/cam%d/%08d.jpg' % (sid, cid, fid) 150 | mask_path = os.path.join(self.base_path, mask_path) 151 | mask_fg = Image.open(mask_path) 152 | 153 | img_path = 'rgb/%04d/cam%d/%08d.jpg' % (sid, cid, fid) 154 | img_path = os.path.join(self.base_path, img_path) 155 | fg_img = Image.open(img_path) 156 | 157 | 158 | bg_img_new = np.asarray(bg_img_new.resize(fg_img.size)) 159 | fg_img = np.asarray(fg_img) 160 | mask_fg = (np.asarray(mask_fg) / 255.)[:, :, None] 161 | 162 | merged = mix(fg_img, mask_fg, bg_img_new, do_smoothing=True, do_erosion=True) 163 | 164 | return Image.fromarray(merged) 165 | 166 | 167 | def get_dataset(batch_size): 168 | normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], 169 | std=[1.0, 1.0, 1.0]) 170 | 171 | img_size = 112 # running with 224 resolution did not improve results 172 | print("Warning: Un-comment augmentations for training") 173 | 174 | # these are the agumentations as we use for our moco pre-training 175 | # please un-comment the gaussian blue and normalization before training 176 | augmentation = [ 177 | transforms.RandomAffine(10), 178 | transforms.RandomResizedCrop(img_size, scale=(0.2, 1.)), 179 | transforms.RandomApply([ 180 | transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened 181 | ], p=0.8), 182 | transforms.RandomGrayscale(p=0.2), 183 | #transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.5), 184 | transforms.RandomHorizontalFlip(), 185 | transforms.ToTensor(), 186 | #normalize 187 | ] 188 | 189 | dataset = DatasetUnsupervisedMultiview(None, transforms.Compose(augmentation), 190 | cross_camera=False, 191 | cross_time=False, 192 | cross_bg=False) 193 | 194 | return torch.utils.data.DataLoader(dataset, 195 | batch_size=batch_size, 196 | shuffle=True, 197 | num_workers=8) 198 | 199 | 200 | if __name__ == '__main__': 201 | batch_size = 3 202 | d = get_dataset(batch_size) 203 | 204 | for sample in d: 205 | data, label = sample 206 | for i in range(batch_size): 207 | img = data[0][i].numpy().transpose(1, 2, 0) 208 | img_aug = data[1][i].numpy().transpose(1, 2, 0) 209 | 210 | fig, ax = plt.subplots(1,2) 211 | ax[0].imshow(img) 212 | ax[1].imshow(img_aug) 213 | plt.show() 214 | -------------------------------------------------------------------------------- /nets/ResNet.py: -------------------------------------------------------------------------------- 1 | """ From: https://github.com/chenxi116/DeepLabv3.pytorch/blob/046818d755f91169dbad141362b98178dd685447/deeplab.py """ 2 | import torch 3 | import torch.nn as nn 4 | import math 5 | import numpy as np 6 | import torch.utils.model_zoo as model_zoo 7 | from torch.nn import functional as F 8 | 9 | 10 | model_urls = { 11 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 12 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 13 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 14 | } 15 | 16 | class Conv2d(nn.Conv2d): 17 | 18 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 19 | padding=0, dilation=1, groups=1, bias=True): 20 | super(Conv2d, self).__init__(in_channels, out_channels, kernel_size, stride, 21 | padding, dilation, groups, bias) 22 | 23 | def forward(self, x): 24 | # return super(Conv2d, self).forward(x) 25 | weight = self.weight 26 | weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, 27 | keepdim=True).mean(dim=3, keepdim=True) 28 | weight = weight - weight_mean 29 | std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5 30 | weight = weight / std.expand_as(weight) 31 | return F.conv2d(x, weight, self.bias, self.stride, 32 | self.padding, self.dilation, self.groups) 33 | 34 | 35 | class Bottleneck(nn.Module): 36 | expansion = 4 37 | def __init__(self, inplanes, planes, stride=1, downsample=None, dilation=1, conv=None, norm=None): 38 | super(Bottleneck, self).__init__() 39 | self.conv1 = conv(inplanes, planes, kernel_size=1, bias=False) 40 | self.bn1 = norm(planes) 41 | self.conv2 = conv(planes, planes, kernel_size=3, stride=stride, 42 | dilation=dilation, padding=dilation, bias=False) 43 | self.bn2 = norm(planes) 44 | self.conv3 = conv(planes, planes * self.expansion, kernel_size=1, bias=False) 45 | self.bn3 = norm(planes * self.expansion) 46 | self.relu = nn.ReLU(inplace=True) 47 | self.downsample = downsample 48 | self.stride = stride 49 | 50 | def forward(self, x): 51 | residual = x 52 | 53 | out = self.conv1(x) 54 | out = self.bn1(out) 55 | out = self.relu(out) 56 | 57 | out = self.conv2(out) 58 | out = self.bn2(out) 59 | out = self.relu(out) 60 | 61 | out = self.conv3(out) 62 | out = self.bn3(out) 63 | 64 | if self.downsample is not None: 65 | residual = self.downsample(x) 66 | 67 | out += residual 68 | out = self.relu(out) 69 | 70 | return out 71 | 72 | 73 | class ResNet(nn.Module): 74 | def __init__(self, block, layers, num_groups=None, weight_std=False, beta=False, 75 | drop_prob=0.5, head_type=None): 76 | self.inplanes = 64 77 | self.drop_prob = drop_prob 78 | if head_type is None: 79 | head_type = 'mano' 80 | self.head_type = head_type 81 | self.norm = lambda planes, momentum=0.05: nn.BatchNorm2d(planes, momentum=momentum) if num_groups is None else nn.GroupNorm(num_groups, planes) 82 | self.conv = Conv2d if weight_std else nn.Conv2d 83 | 84 | super(ResNet, self).__init__() 85 | if not beta: 86 | self.conv1 = self.conv(3, 64, kernel_size=7, stride=2, padding=3, 87 | bias=False) 88 | else: 89 | self.conv1 = nn.Sequential( 90 | self.conv(3, 64, 3, stride=2, padding=1, bias=False), 91 | self.conv(64, 64, 3, stride=1, padding=1, bias=False), 92 | self.conv(64, 64, 3, stride=1, padding=1, bias=False)) 93 | self.bn1 = self.norm(64) 94 | self.relu = nn.ReLU(inplace=True) 95 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 96 | self.layer1 = self._make_layer(block, 64, layers[0]) 97 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 98 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 99 | self.layer4 = self._make_layer(block, 512, layers[3], stride=1, 100 | dilation=2) 101 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 102 | 103 | if self.head_type == 'mano': 104 | self.fc1 = nn.Linear(512 * block.expansion, 2048) 105 | self.fc2 = nn.Linear(2048, 2048) 106 | self.fc3 = nn.Linear(2048, 61) 107 | 108 | elif self.head_type == 'embed': 109 | self.fc = nn.Linear(512 * block.expansion, 2048) 110 | self.fc2 = nn.Linear(2048, 128) 111 | 112 | else: 113 | raise NotImplementedError 114 | 115 | for m in self.modules(): 116 | if isinstance(m, self.conv): 117 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 118 | m.weight.data.normal_(0, math.sqrt(2. / n)) 119 | elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.GroupNorm): 120 | m.weight.data.fill_(1) 121 | m.bias.data.zero_() 122 | 123 | if 'mano' in self.head_type: 124 | torch.nn.init.xavier_normal_(self.fc3.weight, 0.01) 125 | torch.nn.init.uniform_(self.fc3.bias, -0.01, 0.01) 126 | 127 | def _make_layer(self, block, planes, blocks, stride=1, dilation=1): 128 | downsample = None 129 | if stride != 1 or dilation != 1 or self.inplanes != planes * block.expansion: 130 | downsample = nn.Sequential( 131 | self.conv(self.inplanes, planes * block.expansion, 132 | kernel_size=1, stride=stride, dilation=max(1, dilation/2), bias=False), 133 | self.norm(planes * block.expansion), 134 | ) 135 | 136 | layers = [] 137 | layers.append(block(self.inplanes, planes, stride, downsample, dilation=max(1, dilation/2), conv=self.conv, norm=self.norm)) 138 | self.inplanes = planes * block.expansion 139 | for i in range(1, blocks): 140 | layers.append(block(self.inplanes, planes, dilation=dilation, conv=self.conv, norm=self.norm)) 141 | 142 | return nn.Sequential(*layers) 143 | 144 | def forward(self, x): 145 | x = self.conv1(x) 146 | x = self.bn1(x) 147 | x = self.relu(x) 148 | x = self.maxpool(x) 149 | 150 | x = self.layer1(x) 151 | x = self.layer2(x) 152 | x = self.layer3(x) 153 | x = self.layer4(x) 154 | x_feat = x 155 | 156 | x = self.avgpool(x) 157 | x = torch.flatten(x, 1) 158 | 159 | if self.head_type == 'mano': 160 | x = self.fc1(x) 161 | x = F.relu(x) 162 | x = F.dropout(x, self.drop_prob, self.training) 163 | 164 | x = self.fc2(x) 165 | x = F.relu(x) 166 | x = F.dropout(x, self.drop_prob, self.training) 167 | 168 | x = self.fc3(x) 169 | 170 | elif self.head_type == 'embed': 171 | x = self.fc(x) 172 | x = F.dropout(x, self.drop_prob, self.training) 173 | x = F.relu(x) 174 | x = self.fc2(x) 175 | return x 176 | 177 | 178 | def resnet50(pretrained=False, **kwargs): 179 | """Constructs a ResNet-50 model. 180 | 181 | Args: 182 | pretrained (bool): If True, returns a model pre-trained on ImageNet 183 | """ 184 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 185 | if pretrained: 186 | model_dict = model.state_dict() 187 | pretrained_dict = model_zoo.load_url(model_urls['resnet50']) 188 | overlap_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} # only keys that are in the model 189 | overlap_dict = {k: v for k, v in overlap_dict.items() if np.all(v.shape == model_dict[k].shape)} # only when the shape matches 190 | model_dict.update(overlap_dict) 191 | model.load_state_dict(model_dict) 192 | print('Loaded %d weights from the pretrained snapshot.' % len(overlap_dict)) 193 | # model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 194 | return model 195 | -------------------------------------------------------------------------------- /manopth/manolayer.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import torch 5 | from torch.nn import Module 6 | 7 | from manopth.load_util import ready_arguments 8 | from manopth import rodrigues_layer, rotproj, rot6d 9 | from manopth.tensutils import (th_posemap_axisang, th_with_zeros, th_pack, 10 | subtract_flat_id, make_list) 11 | 12 | class ManoLayer(Module): 13 | __constants__ = [ 14 | 'use_pca', 'rot', 'ncomps', 'ncomps', 'kintree_parents', 'check', 15 | 'side', 'center_idx', 'joint_rot_mode' 16 | ] 17 | 18 | def __init__(self, 19 | center_idx=None, 20 | flat_hand_mean=True, 21 | ncomps=6, 22 | side='right', 23 | mano_root='mano_models', 24 | use_pca=True, 25 | root_rot_mode='axisang', 26 | joint_rot_mode='axisang', 27 | robust_rot=False): 28 | """ 29 | Args: 30 | center_idx: If number, then this is considered to be the keypoint 31 | index to center of. Defaults to None in which case centering 32 | is skipped. 33 | flat_hand_mean: if True, (0, 0, 0, ...) pose coefficients match 34 | flat hand, else match average hand pose 35 | mano_root: path to MANO pkl files for left and right hand 36 | ncomps: number of PCA components form pose space (<45) 37 | side: 'right' or 'left' 38 | use_pca: Use PCA decomposition for pose space. 39 | joint_rot_mode: 'axisang' or 'rotmat', ignored if use_pca 40 | """ 41 | super().__init__() 42 | self.center_idx = center_idx 43 | self.robust_rot = robust_rot 44 | if root_rot_mode == 'axisang': 45 | self.rot = 3 46 | else: 47 | self.rot = 6 48 | self.flat_hand_mean = flat_hand_mean 49 | self.side = side 50 | self.use_pca = use_pca 51 | self.joint_rot_mode = joint_rot_mode 52 | self.root_rot_mode = root_rot_mode 53 | if use_pca: 54 | self.ncomps = ncomps 55 | else: 56 | self.ncomps = 45 57 | 58 | if side == 'right': 59 | self.mano_path = os.path.join(mano_root, 'MANO_RIGHT.pkl') 60 | elif side == 'left': 61 | self.mano_path = os.path.join(mano_root, 'MANO_LEFT.pkl') 62 | 63 | smpl_data = ready_arguments(self.mano_path) 64 | 65 | hands_components = smpl_data['hands_components'] 66 | 67 | self.smpl_data = smpl_data 68 | 69 | self.register_buffer('th_betas', 70 | torch.Tensor(np.array(smpl_data['betas'].r)).unsqueeze(0)) 71 | self.register_buffer('th_shapedirs', 72 | torch.Tensor(smpl_data['shapedirs'].r.copy())) 73 | self.register_buffer('th_posedirs', 74 | torch.Tensor(smpl_data['posedirs'].r.copy())) 75 | self.register_buffer( 76 | 'th_v_template', 77 | torch.Tensor(smpl_data['v_template'].r.copy()).unsqueeze(0)) 78 | self.register_buffer( 79 | 'th_J_regressor', 80 | torch.Tensor(np.array(smpl_data['J_regressor'].toarray()))) 81 | self.register_buffer('th_weights', 82 | torch.Tensor(smpl_data['weights'].r.copy())) 83 | self.register_buffer('th_faces', 84 | torch.Tensor(smpl_data['f'].astype(np.int32)).long()) 85 | 86 | # Get hand mean 87 | hands_mean = np.zeros(hands_components.shape[1] 88 | ) if flat_hand_mean else smpl_data['hands_mean'] 89 | hands_mean = hands_mean.copy() 90 | th_hands_mean = torch.Tensor(hands_mean).unsqueeze(0) 91 | if self.use_pca or self.joint_rot_mode == 'axisang': 92 | # Save as axis-angle 93 | self.register_buffer('th_hands_mean', th_hands_mean) 94 | selected_components = hands_components[:ncomps] 95 | self.register_buffer('th_comps', torch.Tensor(hands_components)) 96 | self.register_buffer('th_selected_comps', 97 | torch.Tensor(selected_components)) 98 | else: 99 | th_hands_mean_rotmat = rodrigues_layer.batch_rodrigues( 100 | th_hands_mean.view(15, 3)).reshape(15, 3, 3) 101 | self.register_buffer('th_hands_mean_rotmat', th_hands_mean_rotmat) 102 | 103 | # Kinematic chain params 104 | self.kintree_table = smpl_data['kintree_table'] 105 | parents = list(self.kintree_table[0].tolist()) 106 | self.kintree_parents = parents 107 | 108 | def forward(self, 109 | th_pose_coeffs, 110 | th_betas=torch.zeros(1), 111 | th_trans=torch.zeros(1), 112 | root_palm=torch.Tensor([0]), 113 | share_betas=torch.Tensor([0]), 114 | ): 115 | """ 116 | Args: 117 | th_trans (Tensor (batch_size x ncomps)): if provided, applies trans to joints and vertices 118 | th_betas (Tensor (batch_size x 10)): if provided, uses given shape parameters for hand shape 119 | else centers on root joint (9th joint) 120 | root_palm: return palm as hand root instead of wrist 121 | """ 122 | 123 | batch_size = th_pose_coeffs.shape[0] 124 | # Get axis angle from PCA components and coefficients 125 | if self.use_pca or self.joint_rot_mode == 'axisang': 126 | # Remove global rot coeffs 127 | th_hand_pose_coeffs = th_pose_coeffs[:, self.rot:self.rot + 128 | self.ncomps] 129 | if self.use_pca: 130 | # PCA components --> axis angles 131 | th_full_hand_pose = th_hand_pose_coeffs.mm(self.th_selected_comps) 132 | else: 133 | th_full_hand_pose = th_hand_pose_coeffs 134 | 135 | # Concatenate back global rot 136 | th_full_pose = torch.cat([ 137 | th_pose_coeffs[:, :self.rot], 138 | self.th_hands_mean + th_full_hand_pose 139 | ], 1) 140 | if self.root_rot_mode == 'axisang': 141 | # compute rotation matrixes from axis-angle while skipping global rotation 142 | th_pose_map, th_rot_map = th_posemap_axisang(th_full_pose) 143 | root_rot = th_rot_map[:, :9].view(batch_size, 3, 3) 144 | th_rot_map = th_rot_map[:, 9:] 145 | th_pose_map = th_pose_map[:, 9:] 146 | else: 147 | # th_posemap offsets by 3, so add offset or 3 to get to self.rot=6 148 | th_pose_map, th_rot_map = th_posemap_axisang(th_full_pose[:, 6:]) 149 | if self.robust_rot: 150 | root_rot = rot6d.robust_compute_rotation_matrix_from_ortho6d(th_full_pose[:, :6]) 151 | else: 152 | root_rot = rot6d.compute_rotation_matrix_from_ortho6d(th_full_pose[:, :6]) 153 | else: 154 | assert th_pose_coeffs.dim() == 4, ( 155 | 'When not self.use_pca, ' 156 | 'th_pose_coeffs should have 4 dims, got {}'.format( 157 | th_pose_coeffs.dim())) 158 | assert th_pose_coeffs.shape[2:4] == (3, 3), ( 159 | 'When not self.use_pca, th_pose_coeffs have 3x3 matrix for two' 160 | 'last dims, got {}'.format(th_pose_coeffs.shape[2:4])) 161 | th_pose_rots = rotproj.batch_rotprojs(th_pose_coeffs) 162 | th_rot_map = th_pose_rots[:, 1:].view(batch_size, -1) 163 | th_pose_map = subtract_flat_id(th_rot_map) 164 | root_rot = th_pose_rots[:, 0] 165 | 166 | # Full axis angle representation with root joint 167 | if th_betas is None or th_betas.numel() == 1: 168 | th_v_shaped = torch.matmul(self.th_shapedirs, 169 | self.th_betas.transpose(1, 0)).permute( 170 | 2, 0, 1) + self.th_v_template 171 | th_j = torch.matmul(self.th_J_regressor, th_v_shaped).repeat( 172 | batch_size, 1, 1) 173 | 174 | else: 175 | if share_betas: 176 | th_betas = th_betas.mean(0, keepdim=True).expand(th_betas.shape[0], 10) 177 | th_v_shaped = torch.matmul(self.th_shapedirs, 178 | th_betas.transpose(1, 0)).permute( 179 | 2, 0, 1) + self.th_v_template 180 | th_j = torch.matmul(self.th_J_regressor, th_v_shaped) 181 | # th_pose_map should have shape 20x135 182 | 183 | th_v_posed = th_v_shaped + torch.matmul( 184 | self.th_posedirs, th_pose_map.transpose(0, 1)).permute(2, 0, 1) 185 | # Final T pose with transformation done ! 186 | 187 | # Global rigid transformation 188 | 189 | root_j = th_j[:, 0, :].contiguous().view(batch_size, 3, 1) 190 | root_trans = th_with_zeros(torch.cat([root_rot, root_j], 2)) 191 | 192 | all_rots = th_rot_map.view(th_rot_map.shape[0], 15, 3, 3) 193 | lev1_idxs = [1, 4, 7, 10, 13] 194 | lev2_idxs = [2, 5, 8, 11, 14] 195 | lev3_idxs = [3, 6, 9, 12, 15] 196 | lev1_rots = all_rots[:, [idx - 1 for idx in lev1_idxs]] 197 | lev2_rots = all_rots[:, [idx - 1 for idx in lev2_idxs]] 198 | lev3_rots = all_rots[:, [idx - 1 for idx in lev3_idxs]] 199 | lev1_j = th_j[:, lev1_idxs] 200 | lev2_j = th_j[:, lev2_idxs] 201 | lev3_j = th_j[:, lev3_idxs] 202 | 203 | # From base to tips 204 | # Get lev1 results 205 | all_transforms = [root_trans.unsqueeze(1)] 206 | lev1_j_rel = lev1_j - root_j.transpose(1, 2) 207 | lev1_rel_transform_flt = th_with_zeros(torch.cat([lev1_rots, lev1_j_rel.unsqueeze(3)], 3).view(-1, 3, 4)) 208 | root_trans_flt = root_trans.unsqueeze(1).repeat(1, 5, 1, 1).view(root_trans.shape[0] * 5, 4, 4) 209 | lev1_flt = torch.matmul(root_trans_flt, lev1_rel_transform_flt) 210 | all_transforms.append(lev1_flt.view(all_rots.shape[0], 5, 4, 4)) 211 | 212 | # Get lev2 results 213 | lev2_j_rel = lev2_j - lev1_j 214 | lev2_rel_transform_flt = th_with_zeros(torch.cat([lev2_rots, lev2_j_rel.unsqueeze(3)], 3).view(-1, 3, 4)) 215 | lev2_flt = torch.matmul(lev1_flt, lev2_rel_transform_flt) 216 | all_transforms.append(lev2_flt.view(all_rots.shape[0], 5, 4, 4)) 217 | 218 | # Get lev3 results 219 | lev3_j_rel = lev3_j - lev2_j 220 | lev3_rel_transform_flt = th_with_zeros(torch.cat([lev3_rots, lev3_j_rel.unsqueeze(3)], 3).view(-1, 3, 4)) 221 | lev3_flt = torch.matmul(lev2_flt, lev3_rel_transform_flt) 222 | all_transforms.append(lev3_flt.view(all_rots.shape[0], 5, 4, 4)) 223 | 224 | reorder_idxs = [0, 1, 6, 11, 2, 7, 12, 3, 8, 13, 4, 9, 14, 5, 10, 15] 225 | th_results = torch.cat(all_transforms, 1)[:, reorder_idxs] 226 | th_results_global = th_results 227 | 228 | joint_js = torch.cat([th_j, th_j.new_zeros(th_j.shape[0], 16, 1)], 2) 229 | tmp2 = torch.matmul(th_results, joint_js.unsqueeze(3)) 230 | th_results2 = (th_results - torch.cat([tmp2.new_zeros(*tmp2.shape[:2], 4, 3), tmp2], 3)).permute(0, 2, 3, 1) 231 | 232 | th_T = torch.matmul(th_results2, self.th_weights.transpose(0, 1)) 233 | 234 | th_rest_shape_h = torch.cat([ 235 | th_v_posed.transpose(2, 1), 236 | torch.ones((batch_size, 1, th_v_posed.shape[1]), 237 | dtype=th_T.dtype, 238 | device=th_T.device), 239 | ], 1) 240 | 241 | th_verts = (th_T * th_rest_shape_h.unsqueeze(1)).sum(2).transpose(2, 1) 242 | th_verts = th_verts[:, :, :3] 243 | th_jtr = th_results_global[:, :, :3, 3] 244 | # In addition to MANO reference joints we sample vertices on each finger 245 | # to serve as finger tips 246 | if self.side == 'right': 247 | tips = th_verts[:, [745, 317, 444, 556, 673]] 248 | else: 249 | tips = th_verts[:, [745, 317, 445, 556, 673]] 250 | if bool(root_palm): 251 | palm = (th_verts[:, 95] + th_verts[:, 22]).unsqueeze(1) / 2 252 | th_jtr = torch.cat([palm, th_jtr[:, 1:]], 1) 253 | th_jtr = torch.cat([th_jtr, tips], 1) 254 | 255 | # Reorder joints to match visualization utilities 256 | th_jtr = th_jtr[:, [0, 13, 14, 15, 16, 1, 2, 3, 17, 4, 5, 6, 18, 10, 11, 12, 19, 7, 8, 9, 20]] 257 | 258 | # Possibly center on a certain keypoint 259 | if self.center_idx is not None: 260 | center_joint = th_jtr[:, self.center_idx].unsqueeze(1) 261 | th_jtr = th_jtr - center_joint 262 | th_verts = th_verts - center_joint 263 | th_jtr = th_jtr + th_trans 264 | th_verts = th_verts + th_trans 265 | 266 | return th_verts, th_jtr 267 | 268 | def calc_alphas(self, pose): 269 | if self.flat_hand_mean: 270 | pose = pose - self.th_hands_mean.squeeze(0) 271 | 272 | alphas = list() 273 | for comp in self.th_selected_comps: 274 | v = torch.sqrt(torch.sum(torch.square(comp)) + 1e-8) 275 | alphas.append( torch.sum(torch.mul(comp, pose))/v/v ) 276 | 277 | return torch.stack(alphas) 278 | --------------------------------------------------------------------------------