├── handmocap └── hand_modules │ ├── __init__.py │ ├── test_options.py │ ├── h3dw_networks.py │ ├── base_options.py │ ├── resnet.py │ └── h3dw_model.py ├── bodymocap ├── models │ ├── __init__.py │ ├── hmr.py │ └── smpl.py ├── constants.py ├── body_bbox_detector.py ├── utils │ └── geometry.py └── body_mocap_api.py ├── renderer ├── shaders │ ├── simple140.fs │ ├── simple140.vs │ ├── normal140.fs │ ├── normal140.vs │ ├── geo140.vs │ ├── geo140.fs │ ├── colorgeo140.vs │ ├── colorgeo140.fs │ └── framework.py ├── image_utils.py ├── screen_free_visualizer.py ├── meshRenderer.py ├── render_utils.py ├── od_renderer.py ├── p3d_renderer.py └── denseposeRenderer.py ├── docs ├── requirements.txt ├── run_totalmocap.md ├── INSTALL.md ├── joint_order.md └── run_handmocap.md ├── scripts ├── download_sample_data.sh ├── install_pose2d.sh ├── install_frankmocap.sh ├── download_data_hand_module.sh ├── install_hand_detectors.sh └── download_data_body_module.sh ├── .gitignore ├── mocap_utils ├── compose_image.py ├── extract_frame.py ├── timer.py ├── compare_results.py ├── general_utils.py ├── geometry_utils.py └── coordconv.py ├── CONTRIBUTING.md ├── CODE_OF_CONDUCT.md ├── demo ├── demo_options.py ├── demo_bodymocap.py ├── demo_handmocap.py ├── demo_frankmocap.py └── demo_visualize_prediction.py └── README.md /handmocap/hand_modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /bodymocap/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .hmr import hmr 2 | from .smpl import SMPL, SMPLX 3 | -------------------------------------------------------------------------------- /renderer/shaders/simple140.fs: -------------------------------------------------------------------------------- 1 | //Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #version 140 4 | 5 | out vec4 FragColor; 6 | in vec3 Color; 7 | 8 | void main() 9 | { 10 | FragColor = vec4(Color,1.0); 11 | } 12 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | pip 2 | gdown 3 | opencv-python 4 | PyOpenGL 5 | PyOpenGL_accelerate 6 | pycocotools 7 | pafy 8 | youtube-dl 9 | scipy 10 | pillow>=7.1.0 11 | easydict 12 | cython 13 | cffi 14 | msgpack 15 | pyyaml 16 | tensorboardX 17 | tqdm 18 | jinja2 19 | smplx 20 | sklearn 21 | opendr 22 | chumpy 23 | -------------------------------------------------------------------------------- /renderer/shaders/simple140.vs: -------------------------------------------------------------------------------- 1 | //Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #version 140 4 | 5 | in vec3 a_Position; 6 | in vec3 a_Color; 7 | 8 | out vec3 Color; 9 | 10 | uniform mat4 ModelMat; 11 | uniform mat4 PerspMat; 12 | 13 | void main() 14 | { 15 | gl_Position = PerspMat * ModelMat * vec4(a_Position, 1.0); 16 | Color = a_Color; 17 | } -------------------------------------------------------------------------------- /renderer/shaders/normal140.fs: -------------------------------------------------------------------------------- 1 | //Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #version 140 4 | 5 | out vec4 FragColor; 6 | in vec3 CamNormal; 7 | 8 | void main() 9 | { 10 | // FragColor = vec4(Color,1.0); 11 | vec3 cam_norm_normalized = normalize(CamNormal); 12 | vec3 rgb = (cam_norm_normalized + 1.0) / 2.0; 13 | FragColor = vec4(rgb, 1.0); 14 | } -------------------------------------------------------------------------------- /renderer/shaders/normal140.vs: -------------------------------------------------------------------------------- 1 | //Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #version 140 4 | 5 | in vec3 a_Position; 6 | in vec3 a_Normal; 7 | 8 | out vec3 CamNormal; 9 | 10 | uniform mat4 ModelMat; 11 | uniform mat4 PerspMat; 12 | 13 | void main() 14 | { 15 | gl_Position = PerspMat * ModelMat * vec4(a_Position, 1.0); 16 | CamNormal = (ModelMat * vec4(a_Normal, 0.0)).xyz; 17 | } -------------------------------------------------------------------------------- /scripts/download_sample_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | if [ ! -d "sample_data" ] 6 | then 7 | echo "Downloading sample videos" 8 | wget https://dl.fbaipublicfiles.com/eft/sampledata_frank.tar && tar -xvf sampledata_frank.tar && rm sampledata_frank.tar && mv sampledata sample_data 9 | else 10 | echo "There exists sample_data already" 11 | fi 12 | echo "Done" 13 | -------------------------------------------------------------------------------- /renderer/shaders/geo140.vs: -------------------------------------------------------------------------------- 1 | //Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #version 140 4 | 5 | in vec3 a_Position; 6 | in vec3 a_Normal; 7 | 8 | out vec3 CamNormal; 9 | out vec3 CamPos; 10 | 11 | uniform mat4 ModelMat; 12 | uniform mat4 PerspMat; 13 | 14 | void main() 15 | { 16 | gl_Position = PerspMat * ModelMat * vec4(a_Position, 1.0); 17 | CamNormal = (ModelMat * vec4(a_Normal, 0.0)).xyz; 18 | CamPos = (ModelMat * vec4(a_Position, 1.0)).xyz; 19 | } -------------------------------------------------------------------------------- /renderer/shaders/geo140.fs: -------------------------------------------------------------------------------- 1 | //Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #version 140 4 | 5 | out vec4 FragColor; 6 | 7 | in vec3 CamNormal; 8 | in vec3 CamPos; 9 | 10 | void main() 11 | { 12 | vec3 light_direction = vec3(0, 0, 1); 13 | vec3 f_normal = normalize(CamNormal.xyz); 14 | vec4 specular_reflection = vec4(0.2) * pow(max(0.0, dot(reflect(-light_direction, f_normal), vec3(0, 0, -1))), 16.f); 15 | FragColor = vec4(dot(f_normal, light_direction)*vec3(1.0, 1.0, 1.0)+specular_reflection.xyz, 1.0); 16 | } -------------------------------------------------------------------------------- /renderer/shaders/colorgeo140.vs: -------------------------------------------------------------------------------- 1 | //Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #version 140 4 | 5 | in vec3 a_Position; 6 | in vec3 a_Normal; 7 | in vec3 a_Color; 8 | 9 | out vec3 CamNormal; 10 | out vec3 CamPos; 11 | out vec3 Color; 12 | 13 | uniform mat4 ModelMat; 14 | uniform mat4 PerspMat; 15 | 16 | void main() 17 | { 18 | gl_Position = PerspMat * ModelMat * vec4(a_Position, 1.0); 19 | CamNormal = (ModelMat * vec4(a_Normal, 0.0)).xyz; 20 | CamPos = (ModelMat * vec4(a_Position, 1.0)).xyz; 21 | 22 | //Color = vec3(1.0, 1.0, 1.0); 23 | Color = a_Color; 24 | } -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.jpg 3 | *.png 4 | smpl/ 5 | tmp/ 6 | pretrained_models/ 7 | .vscode/ 8 | test_result/ 9 | log/ 10 | log.txt 11 | checkpoints/ 12 | evaluate_results/ 13 | *.pyc 14 | *.npy 15 | test_log.txt 16 | *.pkl 17 | pretrained/ 18 | render_result/ 19 | lightweight-human-pose-estimation.pytorch/ 20 | data/* 21 | webvideos/ 22 | samples/ 23 | *.pth 24 | *.obj 25 | data/ 26 | detectors/ 27 | data 28 | samples_all/ 29 | *.zip 30 | extra_data/ 31 | sample_data/ 32 | mocap_output/ 33 | demo.sh 34 | demo_multi.sh 35 | mocap_utils/frame_to_video.py 36 | mocap_utils/frame_to_gif.py 37 | mocap_utils/select_epick_kitchen.py 38 | -------------------------------------------------------------------------------- /renderer/shaders/colorgeo140.fs: -------------------------------------------------------------------------------- 1 | //Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | #version 140 4 | 5 | out vec4 FragColor; 6 | 7 | in vec3 CamNormal; 8 | in vec3 CamPos; 9 | in vec3 Color; 10 | 11 | void main() 12 | { 13 | vec3 light_direction = vec3(0, 0, 1); 14 | vec3 f_normal = normalize(CamNormal.xyz); 15 | vec4 specular_reflection = vec4(0.2) * pow(max(0.0, dot(reflect(-light_direction, f_normal), vec3(0, 0, -1))), 16.f); 16 | // FragColor = vec4(dot(f_normal, light_direction)*vec3(1.0, 1.0, 1.0)+specular_reflection.xyz, 1.0); 17 | FragColor = vec4(dot(f_normal, light_direction)*Color+specular_reflection.xyz, 1.0); 18 | } -------------------------------------------------------------------------------- /scripts/install_pose2d.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | mkdir -p detectors 6 | cd detectors 7 | 8 | git clone git@github.com:jhugestar/lightweight-human-pose-estimation.pytorch.git 9 | if [ ! -d lightweight-human-pose-estimation.pytorch ]; then 10 | git clone https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git 11 | fi 12 | mv lightweight-human-pose-estimation.pytorch body_pose_estimator 13 | 14 | #Download pretrained model 15 | wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth 16 | mkdir -p ../extra_data/body_module/body_pose_estimator 17 | mv *.pth ../extra_data/body_module/body_pose_estimator -------------------------------------------------------------------------------- /scripts/install_frankmocap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | echo "" 6 | echo ">> Installing a third-party 2D keypoint detector" 7 | sh scripts/install_pose2d.sh 8 | 9 | echo "" 10 | echo ">> Download extra data for body module" 11 | sh scripts/download_data_body_module.sh 12 | 13 | 14 | echo "" 15 | echo ">> Installing a third-party hand detector" 16 | sh scripts/install_hand_detectors.sh 17 | 18 | 19 | echo "" 20 | echo ">> Download extra data for hand module" 21 | sh scripts/download_data_hand_module.sh 22 | 23 | echo "" 24 | if [ ! -d "sample_data" ] 25 | then 26 | echo "Downloading sample videos" 27 | wget https://dl.fbaipublicfiles.com/eft/sampledata_frank.tar && tar -xvf sampledata_frank.tar && rm sampledata_frank.tar && mv sampledata sample_data 28 | else 29 | echo "There exists sample_data already" 30 | fi -------------------------------------------------------------------------------- /mocap_utils/compose_image.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os, sys, shutil 4 | import os.path as osp 5 | import cv2 6 | import numpy as np 7 | import general_utils as gnu 8 | 9 | 10 | def main(): 11 | in_dir = "./sample_data/images/single_person" 12 | out_dir = "./sample_data/images/multi_person" 13 | gnu.renew_dir(out_dir) 14 | 15 | all_imgs = gnu.get_all_files(in_dir, (".jpg", ".png", ".jpeg"), "full") 16 | num_img = len(all_imgs) 17 | 18 | for i in range(num_img): 19 | for j in range(num_img): 20 | img1 = cv2.imread(all_imgs[i]) 21 | img2 = cv2.imread(all_imgs[j]) 22 | img2 = cv2.resize(img2, img1.shape[:2][::-1]) 23 | res_img = np.concatenate((img1, img2), axis=1) 24 | res_img_path = osp.join(out_dir, f"{i:02d}_{j:02d}.jpg") 25 | cv2.imwrite(res_img_path, res_img) 26 | 27 | 28 | if __name__ == '__main__': 29 | main() -------------------------------------------------------------------------------- /scripts/download_data_hand_module.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | set -ex 6 | 7 | [ -d extra_data ] || mkdir extra_data 8 | cd extra_data 9 | 10 | [ -d hand_module ] || mkdir hand_module 11 | cd hand_module 12 | 13 | echo "Downloading other data" 14 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/hand_module/SMPLX_HAND_INFO.pkl 15 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/hand_module/mean_mano_params.pkl 16 | 17 | echo "Downloading pretrained hand model" 18 | [ -d pretrained_weights ] || mkdir pretrained_weights 19 | cd pretrained_weights 20 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/hand_module/checkpoints_best/pose_shape_best.pth 21 | 22 | #Go to root directory 23 | cd ../../../ 24 | 25 | echo "Downloading sample videos" 26 | wget https://dl.fbaipublicfiles.com/eft/sample_data_frank.tar && tar -xvf sample_data_frank.tar && rm sample_data_frank.tar 27 | echo "Done" -------------------------------------------------------------------------------- /handmocap/hand_modules/test_options.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # Part of the code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix 4 | 5 | from .base_options import BaseOptions 6 | 7 | class TestOptions(BaseOptions): 8 | def initialize(self): 9 | BaseOptions.initialize(self) 10 | self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc') 11 | self.parser.add_argument('--which_epoch', type=str, default='-1', help='which epoch to load? set to latest to use latest cached model') 12 | self.parser.add_argument('--visualize_eval', action='store_true') 13 | self.parser.add_argument('--test_dataset', type=str, choices=['freihand', 'ho3d', 'stb', 'rhd', 'mtc', 'wild', 'demo'], help="which dataset to test on") 14 | self.parser.add_argument("--checkpoint_path", type=str, default=None, help="path of checkpoints used in test") 15 | self.isTrain = False 16 | -------------------------------------------------------------------------------- /scripts/install_hand_detectors.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | mkdir -p detectors 6 | cd detectors 7 | 8 | pip install gdown 9 | 10 | # Install 100-DOH hand-object detectors 11 | git clone git@github.com:ddshan/hand_object_detector.git 12 | if [ ! -d hand_object_detector ]; then 13 | git clone https://github.com/ddshan/hand_object_detector 14 | fi 15 | # compile 16 | cd hand_object_detector/lib 17 | python setup.py build develop 18 | cd ../../ 19 | 20 | # Install 100-DOH hand-only detectors 21 | git clone git@github.com:ddshan/hand_detector.d2.git 22 | if [ ! -d hand_detector.d2 ]; then 23 | git clone https://github.com/ddshan/hand_detector.d2.git 24 | fi 25 | mv hand_detector.d2 hand_only_detector 26 | 27 | # downloading weights 28 | gdown https://drive.google.com/uc?id=1H2tWsZkS7tDF8q1-jdjx6V9XrK25EDbE 29 | gdown https://drive.google.com/uc?id=1OqgexNM52uxsPG3i8GuodDOJAGFsYkPg 30 | mkdir -p ../extra_data/hand_module/hand_detector 31 | mv *pth ../extra_data/hand_module/hand_detector -------------------------------------------------------------------------------- /mocap_utils/extract_frame.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os, sys, shutil 4 | import os.path as osp 5 | import subprocess as sp 6 | import general_utils as gnu 7 | 8 | 9 | def extract_frame(video_dir, frame_dir): 10 | for file in os.listdir(video_dir): 11 | if file.endswith((".mov", ".mp4")): 12 | file_path = osp.join(video_dir, file) 13 | file_name = file[:-4] 14 | # if file_name != 'legao_02_01': continue 15 | res_dir = osp.join(frame_dir, file_name) 16 | gnu.build_dir(res_dir) 17 | command = f"ffmpeg -i {file_path} {res_dir}/{file_name}_%05d.png" 18 | command = command.split() 19 | sp.run(command) 20 | 21 | 22 | def main(): 23 | root_dir = "./sample_data/" 24 | 25 | video_dir = osp.join(root_dir, "videos") 26 | frame_dir = osp.join(root_dir, "frames") 27 | gnu.renew_dir(frame_dir) 28 | 29 | extract_frame(video_dir, frame_dir) 30 | 31 | if __name__ == '__main__': 32 | main() -------------------------------------------------------------------------------- /scripts/download_data_body_module.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (c) Facebook, Inc. and its affiliates. 4 | 5 | set -ex 6 | 7 | mkdir -p extra_data/body_module 8 | cd extra_data/body_module 9 | 10 | echo "Downloading extra data from SPIN" 11 | wget http://visiondata.cis.upenn.edu/spin/data.tar.gz && tar -xvf data.tar.gz && rm data.tar.gz 12 | mv data data_from_spin 13 | 14 | echo "Downloading pretrained model" 15 | mkdir -p pretrained_weights 16 | cd pretrained_weights 17 | wget https://dl.fbaipublicfiles.com/eft/2020_05_31-00_50_43-best-51.749683916568756.pt 18 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/body_module/smplx-03-28-46060-w_spin_mlc3d_46582-2089_2020_03_28-21_56_16.pt 19 | cd .. 20 | 21 | echo "Downloading other data" 22 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/body_module/J_regressor_extra_smplx.npy 23 | 24 | 25 | # if [ ! -d "sample_data" ] 26 | # then 27 | # echo "Downloading sample videos" 28 | # wget https://dl.fbaipublicfiles.com/eft/sampledata_frank.tar && tar -xvf sampledata_frank.tar && rm sampledata_frank.tar && mv sampledata sample_data 29 | # else 30 | # echo "There exists sample_data already" 31 | # fi 32 | 33 | echo "Done" 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to pifuhd 2 | We want to make contributing to this project as easy and transparent as 3 | possible. 4 | 5 | ## Pull Requests 6 | We actively welcome your pull requests. 7 | 8 | 1. Fork the repo and create your branch from `master`. 9 | 2. If you've added code that should be tested, add tests. 10 | 3. If you've changed APIs, update the documentation. 11 | 4. Ensure the test suite passes. 12 | 5. Make sure your code lints. 13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA"). 14 | 15 | ## Contributor License Agreement ("CLA") 16 | In order to accept your pull request, we need you to submit a CLA. You only need 17 | to do this once to work on any of Facebook's open source projects. 18 | 19 | Complete your CLA here: 20 | 21 | ## Issues 22 | We use GitHub issues to track public bugs. Please ensure your description is 23 | clear and has sufficient instructions to be able to reproduce the issue. 24 | 25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe 26 | disclosure of security bugs. In those cases, please go through the process 27 | outlined on that page and do not file a public issue. 28 | 29 | ## License 30 | By contributing to pifuhd, you agree that your contributions will be licensed 31 | under the LICENSE file in the root directory of this source tree. -------------------------------------------------------------------------------- /docs/run_totalmocap.md: -------------------------------------------------------------------------------- 1 | # Whole Body Motion Capture Demo (Body + Hand) 2 | 3 | Our whole body motion capture is based on our [FrankMocap paper](https://penincillin.github.io/frank_mocap), by intergrating the output of body module and hand module. See our paper for details. 4 | 5 |

6 | 7 |

8 | 9 | ## Requirements 10 | - You should install both [body module](run_bodymocap.md) and [hand module](run_handmocap.md). 11 | 12 | 13 | ## A Quick Start 14 | - Run the following. The mocap output will be shown on your screen 15 | ``` 16 | # Using a machine with a monitor to show output on screen 17 | # OpenGL renderer is used by default (--renderer_type opengl) 18 | # The output images are also saved in ./mocap_output 19 | python -m demo.demo_frankmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output 20 | 21 | # Screenless mode (e.g., a remote server) 22 | xvfb-run -a python -m demo.demo_frankmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output 23 | 24 | # Set other render_type to use other renderers 25 | python -m demo.demo_frankmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output --renderer_type pytorch3d 26 | ``` 27 | 28 | ## Run Demo with A Webcam Input 29 | - Run, 30 | ``` 31 | python -m demo.demo_frankmocap --input_path webcam 32 | 33 | #or using opengl gui renderer 34 | python -m demo.demo_frankmocap --input_path webcam --renderer_type opengl_gui 35 | ``` 36 | - See below to see how to control in opengl gui mode 37 | 38 | ## Other Details 39 | - Other options should be the same as [body module](run_bodymocap.md). 40 | 41 | ## License 42 | - [CC-BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode). 43 | See the [LICENSE](LICENSE) file. 44 | -------------------------------------------------------------------------------- /mocap_utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | # All rights reserved. 3 | # 4 | # This source code is licensed under the license found in the 5 | # LICENSE file in the root directory of this source tree. 6 | ############################################################################## 7 | # 8 | # Based on: 9 | # -------------------------------------------------------- 10 | # Fast R-CNN 11 | # Copyright (c) 2015 Microsoft 12 | # Licensed under The MIT License [see LICENSE for details] 13 | # Written by Ross Girshick 14 | # -------------------------------------------------------- 15 | 16 | """Timing related functions.""" 17 | 18 | from __future__ import absolute_import 19 | from __future__ import division 20 | from __future__ import print_function 21 | from __future__ import unicode_literals 22 | 23 | import time 24 | 25 | 26 | class Timer(object): 27 | """A simple timer.""" 28 | 29 | def __init__(self): 30 | self.reset() 31 | 32 | def tic(self): 33 | # using time.time instead of time.clock because time time.clock 34 | # does not normalize for multithreading 35 | self.start_time = time.time() 36 | 37 | def toc(self, average=True, bPrint=False,title="Time"): 38 | self.diff = time.time() - self.start_time 39 | self.total_time += self.diff 40 | self.calls += 1 41 | self.average_time = self.total_time / self.calls 42 | if average: 43 | if bPrint: 44 | # print("Avg Time: {}".format(self.average_time)) 45 | print("{}: {:0.2f} sec/frame, FPS {:0.2f}".format(title, self.diff, 1.0/self.diff)) 46 | 47 | return self.average_time 48 | else: 49 | if bPrint: 50 | print("{}: {}, FPS {:0.2f}".format(title, self.diff , 1.0/self.diff)) 51 | return self.diff 52 | 53 | def reset(self): 54 | self.total_time = 0. 55 | self.calls = 0 56 | self.start_time = 0. 57 | self.diff = 0. 58 | self.average_time = 0. 59 | -------------------------------------------------------------------------------- /mocap_utils/compare_results.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | """ 4 | This code is used to visually compare the results 5 | """ 6 | import os, sys, shutil 7 | import os.path as osp 8 | import ry_utils 9 | import cv2 10 | import numpy as np 11 | 12 | def check_keywords(subdir, keywords): 13 | if len(keywords) == 0: 14 | return True 15 | else: 16 | for keyword in keywords: 17 | if subdir.find(keyword)>=0: 18 | return True 19 | return False 20 | 21 | def main(): 22 | dir_list = [ 23 | 'samples/output/body/third_view_thresh_0.3_distance_2.0', 24 | 'samples/output/body/third_view_thresh_0.5_distance_1.5', 25 | 'samples/output/body/third_view_thresh_0.7_distance_1.0', 26 | ] 27 | dir1 = dir_list[0] 28 | 29 | keywords = ['cj_dance', 'body_capture'] 30 | 31 | res_dir = "samples/output/body/third_view_compare" 32 | res_dir = osp.join(res_dir, '_&&_'.join(['_'.join(item.split('/')[-1:]) for item in dir_list])) 33 | 34 | for subdir in os.listdir(dir1): 35 | if osp.isdir(osp.join(dir1, subdir)): 36 | if check_keywords(subdir, keywords): 37 | dir_path1 = osp.join(dir1, subdir) 38 | for img_name in ry_utils.get_all_files(dir_path1, ('.jpg','.png'), 'name_only'): 39 | img_list = list() 40 | #print(img_name) 41 | for dir in dir_list: 42 | dir_path = dir_path1.replace(dir1, dir) 43 | img_path = osp.join(dir_path, img_name) 44 | img = cv2.imread(img_path) 45 | img_list.append(img) 46 | if img_path.find(dir1)>=0: 47 | res_img_path = img_path.replace(dir1, res_dir) 48 | #print(img_path, osp.exists(img_path)) 49 | if any([img is None for img in img_list]): 50 | continue 51 | res_img = np.concatenate(img_list, axis=0) 52 | h, w = res_img.shape[:2] 53 | res_img = cv2.resize(res_img, (int(w*0.7), int(h*0.7))) 54 | res_img_path = res_img_path.replace('.png', '.jpg') 55 | ry_utils.make_subdir(res_img_path) 56 | cv2.imwrite(res_img_path, res_img) 57 | print(res_img_path) 58 | 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /handmocap/hand_modules/h3dw_networks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn import init 6 | import functools 7 | import numpy as np 8 | from . import resnet 9 | 10 | def weights_init(m): 11 | classname = m.__class__.__name__ 12 | if classname.find('Conv') != -1: 13 | m.weight.data.normal_(0.0, 0.02) 14 | if hasattr(m.bias, 'data'): 15 | m.bias.data.fill_(0) 16 | elif classname.find('BatchNorm2d') != -1: 17 | m.weight.data.normal_(1.0, 0.02) 18 | m.bias.data.fill_(0) 19 | 20 | 21 | def get_norm_layer(norm_type='instance'): 22 | if norm_type == 'batch': 23 | norm_layer = functools.partial(nn.BatchNorm2d, affine=True) 24 | elif norm_type == 'instance': 25 | norm_layer = functools.partial(nn.InstanceNorm2d, affine=False) 26 | else: 27 | raise NotImplementedError('normalization layer [%s] is not found' % norm_type) 28 | return norm_layer 29 | 30 | 31 | def print_network(net): 32 | num_params = 0 33 | for param in net.parameters(): 34 | num_params += param.numel() 35 | print(net) 36 | print('Total number of parameters: %d' % num_params) 37 | 38 | 39 | def get_model(arch): 40 | if hasattr(resnet, arch): 41 | network = getattr(resnet, arch) 42 | return network(pretrained=True, num_classes=512) 43 | else: 44 | raise ValueError("Invalid Backbone Architecture") 45 | 46 | 47 | class H3DWEncoder(nn.Module): 48 | def __init__(self, opt, mean_params): 49 | super(H3DWEncoder, self).__init__() 50 | self.two_branch = opt.two_branch 51 | self.mean_params = mean_params.clone().cuda() 52 | self.opt = opt 53 | 54 | relu = nn.ReLU(inplace=False) 55 | fc2 = nn.Linear(1024, 1024) 56 | regressor = nn.Linear(1024 + opt.total_params_dim, opt.total_params_dim) 57 | 58 | feat_encoder = [relu, fc2, relu] 59 | regressor = [regressor, ] 60 | self.feat_encoder = nn.Sequential(*feat_encoder) 61 | self.regressor = nn.Sequential(*regressor) 62 | 63 | self.main_encoder = get_model(opt.main_encoder) 64 | 65 | 66 | def forward(self, main_input): 67 | main_feat = self.main_encoder(main_input) 68 | feat = self.feat_encoder(main_feat) 69 | 70 | pred_params = self.mean_params 71 | for i in range(3): 72 | input_feat = torch.cat([feat, pred_params], dim=1) 73 | output = self.regressor(input_feat) 74 | pred_params = pred_params + output 75 | return pred_params 76 | -------------------------------------------------------------------------------- /renderer/image_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import cv2 4 | import numpy as np 5 | 6 | def draw_keypoints(image, kps, color=(0,0,255), radius=5, check_exist=False): 7 | # recover color 8 | if color == 'red': 9 | color = (0, 0, 255) 10 | elif color == 'green': 11 | color = (0, 255, 0) 12 | elif color == 'blue': 13 | color = (255, 0, 0) 14 | else: 15 | assert isinstance(color, tuple) and len(color) == 3 16 | 17 | # draw keypoints 18 | res_img = image.copy() 19 | for i in range(kps.shape[0]): 20 | x, y = kps[i][:2].astype(np.int32) 21 | if check_exist: 22 | score = kps[i][2] 23 | else: 24 | score = 1.0 25 | # print(i, score) 26 | if score > 0.0: 27 | cv2.circle(res_img, (x,y), radius=radius, color=color, thickness=-1) 28 | return res_img.astype(np.uint8) 29 | 30 | 31 | def draw_bbox(image, bbox, color=(0,0,255), thickness=3): 32 | x0, y0 = int(bbox[0]), int(bbox[1]) 33 | x1, y1 = int(bbox[2]), int(bbox[3]) 34 | res_img = cv2.rectangle(image.copy(), (x0,y0), (x1,y1), color=color, thickness=thickness) 35 | return res_img.astype(np.uint8) 36 | 37 | 38 | 39 | def draw_raw_bbox(img, bboxes): 40 | img = img.copy() 41 | for bbox in bboxes: 42 | x0, y0, w, h = bbox 43 | bbox_xyxy = (x0, y0, x0+w, y0+h) 44 | img = draw_bbox(img, bbox_xyxy) 45 | return img 46 | 47 | 48 | def draw_body_bbox(img, body_bbox_list): 49 | img = img.copy() 50 | for body_bbox in body_bbox_list: 51 | if body_bbox is not None: 52 | x0, y0, w, h = body_bbox 53 | img = draw_bbox(img, (x0, y0, x0+w, y0+h)) 54 | return img 55 | 56 | 57 | def draw_arm_pose(img, body_pose_list): 58 | img = img.copy() 59 | for body_pose in body_pose_list: 60 | # left & right arm 61 | img = draw_keypoints( 62 | img, body_pose[6:8, :], radius=10, color=(255, 0, 0)) 63 | img = draw_keypoints( 64 | img, body_pose[3:5, :], radius=10, color=(0, 0, 255)) 65 | return img 66 | 67 | 68 | def draw_hand_bbox(img, hand_bbox_list): 69 | img = img.copy() 70 | for hand_bboxes in hand_bbox_list: 71 | if hand_bboxes is not None: 72 | for key in hand_bboxes: 73 | bbox = hand_bboxes[key] 74 | if bbox is not None: 75 | x0, y0, w, h = bbox 76 | bbox_new = (x0, y0, x0+w, y0+h) 77 | color = (255, 0, 0) if key == 'left_hand' else (0, 255, 0) 78 | img = draw_bbox(img, bbox_new, color=color) 79 | return img 80 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to make participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, sex characteristics, gender identity and expression, 9 | level of experience, education, socio-economic status, nationality, personal 10 | appearance, race, religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies within all project spaces, and it also applies when 49 | an individual is representing the project or its community in public spaces. 50 | Examples of representing a project or community include using an official 51 | project e-mail address, posting via an official social media account, or acting 52 | as an appointed representative at an online or offline event. Representation of 53 | a project may be further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at . All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | 75 | For answers to common questions about this code of conduct, see 76 | https://www.contributor-covenant.org/faq 77 | -------------------------------------------------------------------------------- /renderer/shaders/framework.py: -------------------------------------------------------------------------------- 1 | 2 | # Mario Rosasco, 2016 3 | # adapted from framework.cpp, Copyright (C) 2010-2012 by Jason L. McKesson 4 | # This file is licensed under the MIT License. 5 | # 6 | # NB: Unlike in the framework.cpp organization, the main loop is contained 7 | # in the tutorial files, not in this framework file. Additionally, a copy of 8 | # this module file must exist in the same directory as the tutorial files 9 | # to be imported properly. 10 | 11 | import os 12 | 13 | from OpenGL.GL import * 14 | 15 | 16 | # Function that creates and compiles shaders according to the given type (a GL enum value) and 17 | # shader program (a file containing a GLSL program). 18 | def loadShader(shaderType, shaderFile): 19 | # check if file exists, get full path name 20 | strFilename = findFileOrThrow(shaderFile) 21 | shaderData = None 22 | with open(strFilename, 'r') as f: 23 | shaderData = f.read() 24 | 25 | shader = glCreateShader(shaderType) 26 | glShaderSource(shader, shaderData) # note that this is a simpler function call than in C 27 | 28 | # This shader compilation is more explicit than the one used in 29 | # framework.cpp, which relies on a glutil wrapper function. 30 | # This is made explicit here mainly to decrease dependence on pyOpenGL 31 | # utilities and wrappers, which docs caution may change in future versions. 32 | glCompileShader(shader) 33 | 34 | status = glGetShaderiv(shader, GL_COMPILE_STATUS) 35 | if status == GL_FALSE: 36 | # Note that getting the error log is much simpler in Python than in C/C++ 37 | # and does not require explicit handling of the string buffer 38 | strInfoLog = glGetShaderInfoLog(shader) 39 | strShaderType = "" 40 | if shaderType is GL_VERTEX_SHADER: 41 | strShaderType = "vertex" 42 | elif shaderType is GL_GEOMETRY_SHADER: 43 | strShaderType = "geometry" 44 | elif shaderType is GL_FRAGMENT_SHADER: 45 | strShaderType = "fragment" 46 | 47 | print("Compilation failure for " + strShaderType + " shader:\n" + str(strInfoLog)) 48 | 49 | return shader 50 | 51 | 52 | # Function that accepts a list of shaders, compiles them, and returns a handle to the compiled program 53 | def createProgram(shaderList): 54 | program = glCreateProgram() 55 | 56 | for shader in shaderList: 57 | glAttachShader(program, shader) 58 | 59 | glLinkProgram(program) 60 | 61 | status = glGetProgramiv(program, GL_LINK_STATUS) 62 | if status == GL_FALSE: 63 | # Note that getting the error log is much simpler in Python than in C/C++ 64 | # and does not require explicit handling of the string buffer 65 | strInfoLog = glGetProgramInfoLog(program) 66 | print("Linker failure: \n" + str(strInfoLog)) 67 | 68 | for shader in shaderList: 69 | glDetachShader(program, shader) 70 | 71 | return program 72 | 73 | 74 | # Helper function to locate and open the target file (passed in as a string). 75 | # Returns the full path to the file as a string. 76 | def findFileOrThrow(strBasename): 77 | # Keep constant names in C-style convention, for readability 78 | # when comparing to C(/C++) code. 79 | if os.path.isfile(strBasename): 80 | return strBasename 81 | 82 | LOCAL_FILE_DIR = "data" + os.sep 83 | GLOBAL_FILE_DIR = os.path.dirname(os.path.abspath(__file__)) + os.sep + "data" + os.sep 84 | 85 | strFilename = LOCAL_FILE_DIR + strBasename 86 | if os.path.isfile(strFilename): 87 | return strFilename 88 | 89 | strFilename = GLOBAL_FILE_DIR + strBasename 90 | if os.path.isfile(strFilename): 91 | return strFilename 92 | 93 | raise IOError('Could not find target file ' + strBasename) 94 | -------------------------------------------------------------------------------- /bodymocap/constants.py: -------------------------------------------------------------------------------- 1 | # Original code from SPIN: https://github.com/nkolot/SPIN 2 | 3 | FOCAL_LENGTH = 5000. 4 | IMG_RES = 224 5 | 6 | # Mean and standard deviation for normalizing input image 7 | IMG_NORM_MEAN = [0.485, 0.456, 0.406] 8 | IMG_NORM_STD = [0.229, 0.224, 0.225] 9 | 10 | """ 11 | We create a superset of joints containing the OpenPose joints together with the ones that each dataset provides. 12 | We keep a superset of 24 joints such that we include all joints from every dataset. 13 | If a dataset doesn't provide annotations for a specific joint, we simply ignore it. 14 | The joints used here are the following: 15 | """ 16 | JOINT_NAMES = [ 17 | 'OP Nose', 'OP Neck', 'OP RShoulder', #0,1,2 18 | 'OP RElbow', 'OP RWrist', 'OP LShoulder', #3,4,5 19 | 'OP LElbow', 'OP LWrist', 'OP MidHip', #6, 7,8 20 | 'OP RHip', 'OP RKnee', 'OP RAnkle', #9,10,11 21 | 'OP LHip', 'OP LKnee', 'OP LAnkle', #12,13,14 22 | 'OP REye', 'OP LEye', 'OP REar', #15,16,17 23 | 'OP LEar', 'OP LBigToe', 'OP LSmallToe', #18,19,20 24 | 'OP LHeel', 'OP RBigToe', 'OP RSmallToe', 'OP RHeel', #21, 22, 23, 24 ##Total 25 joints for openpose 25 | 'Right Ankle', 'Right Knee', 'Right Hip', #0,1,2 26 | 'Left Hip', 'Left Knee', 'Left Ankle', #3, 4, 5 27 | 'Right Wrist', 'Right Elbow', 'Right Shoulder', #6 28 | 'Left Shoulder', 'Left Elbow', 'Left Wrist', #9 29 | 'Neck (LSP)', 'Top of Head (LSP)', #12, 13 30 | 'Pelvis (MPII)', 'Thorax (MPII)', #14, 15 31 | 'Spine (H36M)', 'Jaw (H36M)', #16, 17 32 | 'Head (H36M)', 'Nose', 'Left Eye', #18, 19, 20 33 | 'Right Eye', 'Left Ear', 'Right Ear' #21,22,23 (Total 24 joints) 34 | ] 35 | 36 | # Dict containing the joints in numerical order 37 | JOINT_IDS = {JOINT_NAMES[i]: i for i in range(len(JOINT_NAMES))} 38 | 39 | # Map joints to SMPL joints 40 | JOINT_MAP = { 41 | 'OP Nose': 24, 'OP Neck': 12, 'OP RShoulder': 17, 42 | 'OP RElbow': 19, 'OP RWrist': 21, 'OP LShoulder': 16, 43 | 'OP LElbow': 18, 'OP LWrist': 20, 'OP MidHip': 0, 44 | 'OP RHip': 2, 'OP RKnee': 5, 'OP RAnkle': 8, 45 | 'OP LHip': 1, 'OP LKnee': 4, 'OP LAnkle': 7, 46 | 'OP REye': 25, 'OP LEye': 26, 'OP REar': 27, 47 | 'OP LEar': 28, 'OP LBigToe': 29, 'OP LSmallToe': 30, 48 | 'OP LHeel': 31, 'OP RBigToe': 32, 'OP RSmallToe': 33, 'OP RHeel': 34, 49 | 'Right Ankle': 8, 'Right Knee': 5, 'Right Hip': 45, 50 | 'Left Hip': 46, 'Left Knee': 4, 'Left Ankle': 7, 51 | 'Right Wrist': 21, 'Right Elbow': 19, 'Right Shoulder': 17, 52 | 'Left Shoulder': 16, 'Left Elbow': 18, 'Left Wrist': 20, 53 | 'Neck (LSP)': 47, 'Top of Head (LSP)': 48, 54 | 'Pelvis (MPII)': 49, 'Thorax (MPII)': 50, 55 | 'Spine (H36M)': 51, 'Jaw (H36M)': 52, 56 | 'Head (H36M)': 53, 'Nose': 24, 'Left Eye': 26, 57 | 'Right Eye': 25, 'Left Ear': 28, 'Right Ear': 27 58 | } 59 | 60 | # Joint selectors 61 | # Indices to get the 14 LSP joints from the 17 H36M joints 62 | H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9] 63 | H36M_TO_J14 = H36M_TO_J17[:14] 64 | # Indices to get the 14 LSP joints from the ground truth joints 65 | J24_TO_J17 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18, 14, 16, 17] 66 | J24_TO_J14 = J24_TO_J17[:14] 67 | 68 | # Permutation of SMPL pose parameters when flipping the shape 69 | SMPL_JOINTS_FLIP_PERM = [0, 2, 1, 3, 5, 4, 6, 8, 7, 9, 11, 10, 12, 14, 13, 15, 17, 16, 19, 18, 21, 20, 23, 22] 70 | SMPL_POSE_FLIP_PERM = [] 71 | for i in SMPL_JOINTS_FLIP_PERM: 72 | SMPL_POSE_FLIP_PERM.append(3*i) 73 | SMPL_POSE_FLIP_PERM.append(3*i+1) 74 | SMPL_POSE_FLIP_PERM.append(3*i+2) 75 | # Permutation indices for the 24 ground truth joints 76 | J24_FLIP_PERM = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21, 20, 23, 22] 77 | # Permutation indices for the full set of 49 joints 78 | J49_FLIP_PERM = [0, 1, 5, 6, 7, 2, 3, 4, 8, 12, 13, 14, 9, 10, 11, 16, 15, 18, 17, 22, 23, 24, 19, 20, 21]\ 79 | + [25+i for i in J24_FLIP_PERM] 80 | 81 | 82 | -------------------------------------------------------------------------------- /renderer/screen_free_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | """ 4 | Renders mesh using OpenDr / Pytorch-3D for visualization. 5 | """ 6 | 7 | import sys 8 | import numpy as np 9 | import cv2 10 | import pdb 11 | from .image_utils import draw_raw_bbox, draw_hand_bbox, draw_body_bbox, draw_arm_pose 12 | 13 | # To use screen_free visualizer. Either OpenDR or Pytorch3D should be installed. 14 | g_valid_visualize = False 15 | try: 16 | from .od_renderer import OpendrRenderer 17 | g_valid_visualize = True 18 | except ImportError: 19 | print("Cannot import OpenDR Renderer") 20 | try: 21 | from .p3d_renderer import Pytorch3dRenderer 22 | g_valid_visualize = True 23 | except ImportError: 24 | print("Cannot import Pytorch3D Renderer") 25 | assert g_valid_visualize, "You should import either OpenDR or Pytorch3D" 26 | 27 | class Visualizer(object): 28 | 29 | def __init__(self, renderer_backend): 30 | colors = { 31 | # colorbline/print/copy safe: 32 | 'light_gray': [0.9, 0.9, 0.9], 33 | 'light_purple': [0.8, 0.53, 0.53], 34 | 'light_green': [166/255.0, 178/255.0, 30/255.0], 35 | 'light_blue': [0.65098039, 0.74117647, 0.85882353], 36 | } 37 | 38 | self.input_size = 1920 39 | 40 | # set-up renderer 41 | assert renderer_backend in ['opendr', 'pytorch3d'] 42 | if renderer_backend == 'opendr': 43 | self.renderer = OpendrRenderer( 44 | img_size=self.input_size, 45 | mesh_color=colors['light_purple']) 46 | else: 47 | self.renderer = Pytorch3dRenderer( 48 | img_size=self.input_size, 49 | mesh_color=colors['light_purple']) 50 | 51 | 52 | def __render_pred_verts(self, img_original, pred_mesh_list): 53 | assert max(img_original.shape) <= self.input_size, \ 54 | f"Currently, we donlt support images size larger than:{self.input_size}" 55 | 56 | res_img = img_original.copy() 57 | rend_img = np.ones((self.input_size, self.input_size, 3)) 58 | h, w = img_original.shape[:2] 59 | rend_img[:h, :w, :] = img_original 60 | 61 | for mesh in pred_mesh_list: 62 | verts = mesh['vertices'] 63 | faces = mesh['faces'] 64 | rend_img = self.renderer.render(verts, faces, rend_img) 65 | 66 | res_img = rend_img[:h, :w, :] 67 | return res_img 68 | 69 | 70 | def visualize(self, 71 | input_img, 72 | hand_bbox_list = None, 73 | body_bbox_list = None, 74 | body_pose_list = None, 75 | raw_hand_bboxes = None, 76 | pred_mesh_list = None, 77 | vis_raw_hand_bbox = True, 78 | vis_body_pose = True, 79 | vis_hand_bbox = True, 80 | ): 81 | # init 82 | res_img = input_img.copy() 83 | 84 | # draw raw hand bboxes 85 | if raw_hand_bboxes is not None and vis_raw_hand_bbox: 86 | res_img = draw_raw_bbox(input_img, raw_hand_bboxes) 87 | # res_img = np.concatenate((res_img, raw_bbox_img), axis=1) 88 | 89 | # draw 2D Pose 90 | if body_pose_list is not None and vis_body_pose: 91 | res_img = draw_arm_pose(res_img, body_pose_list) 92 | 93 | # draw body bbox 94 | if body_bbox_list is not None: 95 | body_bbox_img = draw_body_bbox(input_img, body_bbox_list) 96 | res_img = body_bbox_img 97 | 98 | # draw hand bbox 99 | if hand_bbox_list is not None: 100 | res_img = draw_hand_bbox(res_img, hand_bbox_list) 101 | 102 | # render predicted meshes 103 | if pred_mesh_list is not None: 104 | rend_img = self.__render_pred_verts(input_img, pred_mesh_list) 105 | res_img = np.concatenate((res_img, rend_img), axis=1) 106 | # res_img = rend_img 107 | 108 | return res_img -------------------------------------------------------------------------------- /renderer/meshRenderer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import numpy as np 4 | from OpenGL.GLUT import * 5 | from OpenGL.GLU import * 6 | from renderer.shaders.framework import * 7 | 8 | from renderer.glRenderer import glRenderer 9 | 10 | # from renderer.render_utils import ComputeNormal 11 | 12 | _glut_window = None 13 | 14 | ''' 15 | #Usage: 16 | render.set_smpl_mesh(v) #v for vertex locations in(6890,3) 17 | render.setBackgroundTexture(rawImg) #Optional BG texture 18 | render.setWindowSize(rawImg.shape[1], rawImg.shape[0]) #Optional: window size 19 | render.show_once() 20 | ''' 21 | 22 | class meshRenderer(glRenderer): 23 | 24 | def __init__(self, width=1600, height=1200, name='GL Renderer', 25 | # program_files=['renderer/shaders/simple140.fs', 'renderer/shaders/simple140.vs'], 26 | # program_files=['renderer/shaders/normal140.fs', 'renderer/shaders/normal140.vs'], 27 | # program_files=['renderer/shaders/geo140.fs', 'renderer/shaders/geo140.vs'], 28 | render_mode ="normal", #color, geo, normal 29 | color_size=1, ms_rate=1): 30 | 31 | self.render_mode = render_mode 32 | self.program_files ={} 33 | self.program_files['color'] = ['renderer/shaders/simple140.fs', 'renderer/shaders/simple140.vs'] 34 | self.program_files['normal'] = ['renderer/shaders/normal140.fs', 'renderer/shaders/normal140.vs'] 35 | self.program_files['geo'] = ['renderer/shaders/colorgeo140.fs', 'renderer/shaders/colorgeo140.vs'] 36 | 37 | glRenderer.__init__(self, width, height, name, self.program_files[render_mode], color_size, ms_rate) 38 | 39 | def setRenderMode(self, render_mode): 40 | """ 41 | Set render mode among ['color', 'normal', 'geo'] 42 | """ 43 | if self.render_mode == render_mode: 44 | return 45 | 46 | self.render_mode = render_mode 47 | self.initShaderProgram(self.program_files[render_mode]) 48 | 49 | 50 | def drawMesh(self): 51 | 52 | if self.vertex_dim is None: 53 | return 54 | # self.draw_init() 55 | 56 | glColor3f(1,1,0) 57 | glUseProgram(self.program) 58 | 59 | mvMat = glGetFloatv(GL_MODELVIEW_MATRIX) 60 | pMat = glGetFloatv(GL_PROJECTION_MATRIX) 61 | # mvpMat = pMat*mvMat 62 | 63 | self.model_view_matrix = mvMat 64 | self.projection_matrix = pMat 65 | 66 | # glUniformMatrix4fv(self.model_mat_unif, 1, GL_FALSE, self.model_view_matrix.transpose()) 67 | # glUniformMatrix4fv(self.persp_mat_unif, 1, GL_FALSE, self.projection_matrix.transpose()) 68 | glUniformMatrix4fv(self.model_mat_unif, 1, GL_FALSE, self.model_view_matrix) 69 | glUniformMatrix4fv(self.persp_mat_unif, 1, GL_FALSE, self.projection_matrix) 70 | 71 | # Handle vertex buffer 72 | glBindBuffer(GL_ARRAY_BUFFER, self.vertex_buffer) 73 | glEnableVertexAttribArray(0) 74 | glVertexAttribPointer(0, self.vertex_dim, GL_DOUBLE, GL_FALSE, 0, None) 75 | 76 | # # Handle normal buffer 77 | glBindBuffer(GL_ARRAY_BUFFER, self.normal_buffer) 78 | glEnableVertexAttribArray(1) 79 | glVertexAttribPointer(1, 3, GL_DOUBLE, GL_FALSE, 0, None) 80 | 81 | # # Handle color buffer 82 | glBindBuffer(GL_ARRAY_BUFFER, self.color_buffer) 83 | glEnableVertexAttribArray(2) 84 | glVertexAttribPointer(2, 3, GL_DOUBLE, GL_FALSE, 0, None) 85 | 86 | 87 | if True:#self.meshindex_data: 88 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, self.index_buffer) #Note "GL_ELEMENT_ARRAY_BUFFER" instead of GL_ARRAY_BUFFER 89 | glBufferData(GL_ELEMENT_ARRAY_BUFFER, self.meshindex_data, GL_STATIC_DRAW) 90 | 91 | # glDrawArrays(GL_TRIANGLES, 0, self.n_vertices) 92 | glDrawElements(GL_TRIANGLES, len(self.meshindex_data), GL_UNSIGNED_INT, None) #For index array (mesh face data) 93 | glDisableVertexAttribArray(0) 94 | glBindBuffer(GL_ARRAY_BUFFER, 0) 95 | 96 | glUseProgram(0) -------------------------------------------------------------------------------- /renderer/render_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import numpy as np 4 | 5 | 6 | # vertices: frames x meshVerNum x 3 7 | # trifaces: facePolygonNum x 3 = 22800 x 3 8 | def ComputeNormal(vertices, trifaces): 9 | 10 | if vertices.shape[0] > 5000: 11 | print('ComputeNormal: Warning: too big to compute {0}'.format(vertices.shape) ) 12 | return 13 | 14 | #compute vertex Normals for all frames 15 | U = vertices[:,trifaces[:,1],:] - vertices[:,trifaces[:,0],:] #frames x faceNum x 3 16 | V = vertices[:,trifaces[:,2],:] - vertices[:,trifaces[:,1],:] #frames x faceNum x 3 17 | originalShape = U.shape #remember: frames x faceNum x 3 18 | 19 | U = np.reshape(U, [-1,3]) 20 | V = np.reshape(V, [-1,3]) 21 | faceNormals = np.cross(U,V) #frames x 13776 x 3 22 | from sklearn.preprocessing import normalize 23 | 24 | if np.isnan(np.max(faceNormals)): 25 | print('ComputeNormal: Warning nan is detected {0}') 26 | return 27 | faceNormals = normalize(faceNormals) 28 | 29 | faceNormals = np.reshape(faceNormals, originalShape) 30 | 31 | if False: #Slow version 32 | vertex_normals = np.zeros(vertices.shape) #(frames x 11510) x 3 33 | for fIdx, vIdx in enumerate(trifaces[:,0]): 34 | vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:] 35 | for fIdx, vIdx in enumerate(trifaces[:,1]): 36 | vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:] 37 | for fIdx, vIdx in enumerate(trifaces[:,2]): 38 | vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:] 39 | else: #Faster version 40 | # Computing vertex normals, much faster (and obscure) replacement 41 | index = np.vstack((np.ravel(trifaces), np.repeat(np.arange(len(trifaces)), 3))).T 42 | index_sorted = index[index[:,0].argsort()] 43 | vertex_normals = np.add.reduceat(faceNormals[:,index_sorted[:, 1],:][0], 44 | np.concatenate(([0], np.cumsum(np.unique(index_sorted[:, 0], 45 | return_counts=True)[1])[:-1])))[None, :] 46 | vertex_normals = vertex_normals.astype(np.float64) 47 | 48 | originalShape = vertex_normals.shape 49 | vertex_normals = np.reshape(vertex_normals, [-1,3]) 50 | vertex_normals = normalize(vertex_normals) 51 | vertex_normals = np.reshape(vertex_normals,originalShape) 52 | 53 | return vertex_normals 54 | 55 | 56 | 57 | def ComputeNormal_gpu(vertices, trifaces): 58 | import torch 59 | import torch.nn.functional as F 60 | 61 | if vertices.shape[0] > 5000: 62 | print('ComputeNormal: Warning: too big to compute {0}'.format(vertices.shape) ) 63 | return 64 | 65 | #compute vertex Normals for all frames 66 | #trifaces_cuda = torch.from_numpy(trifaces.astype(np.long)).cuda() 67 | vertices_cuda = torch.from_numpy(vertices.astype(np.float32)).cuda() 68 | 69 | U_cuda = vertices_cuda[:,trifaces[:,1],:] - vertices_cuda[:,trifaces[:,0],:] #frames x faceNum x 3 70 | V_cuda = vertices_cuda[:,trifaces[:,2],:] - vertices_cuda[:,trifaces[:,1],:] #frames x faceNum x 3 71 | originalShape = list(U_cuda.size()) #remember: frames x faceNum x 3 72 | 73 | U_cuda = torch.reshape(U_cuda, [-1,3])#.astype(np.float32) 74 | V_cuda = torch.reshape(V_cuda, [-1,3])#.astype(np.float32) 75 | 76 | faceNormals = U_cuda.cross(V_cuda) 77 | faceNormals = F.normalize(faceNormals,dim=1) 78 | 79 | faceNormals = torch.reshape(faceNormals, originalShape) 80 | 81 | # trifaces has duplicated vertex index, so cannot be parallazied 82 | # vertex_normals = torch.zeros(vertices.shape,dtype=torch.float32).cuda() #(frames x 11510) x 3 83 | # for fIdx, vIdx in enumerate(trifaces[:,0]): 84 | # vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:] 85 | # for fIdx, vIdx in enumerate(trifaces[:,1]): 86 | # vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:] 87 | # for fIdx, vIdx in enumerate(trifaces[:,2]): 88 | # vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:] 89 | 90 | # Computing vertex normals, much faster (and obscure) replacement 91 | index = np.vstack((np.ravel(trifaces), np.repeat(np.arange(len(trifaces)), 3))).T 92 | index_sorted = index[index[:,0].argsort()] 93 | vertex_normals = np.add.reduceat(faceNormals[:,index_sorted[:, 1],:][0], 94 | np.concatenate(([0], np.cumsum(np.unique(index_sorted[:, 0], 95 | return_counts=True)[1])[:-1])))[None, :] 96 | vertex_normals = torch.from_numpy(vertex_normals).float().cuda() 97 | 98 | vertex_normals = F.normalize(vertex_normals,dim=2) 99 | vertex_normals = vertex_normals.data.cpu().numpy() #(batch, chunksize, dim) 100 | 101 | return vertex_normals 102 | -------------------------------------------------------------------------------- /demo/demo_options.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import argparse 4 | 5 | class DemoOptions(): 6 | 7 | def __init__(self): 8 | parser = argparse.ArgumentParser() 9 | 10 | # parser.add_argument('--checkpoint', required=False, default=default_checkpoint, help='Path to pretrained checkpoint') 11 | default_checkpoint_body_smpl ='./extra_data/body_module/pretrained_weights/2020_05_31-00_50_43-best-51.749683916568756.pt' 12 | parser.add_argument('--checkpoint_body_smpl', required=False, default=default_checkpoint_body_smpl, help='Path to pretrained checkpoint') 13 | default_checkpoint_body_smplx ='./extra_data/body_module/pretrained_weights/smplx-03-28-46060-w_spin_mlc3d_46582-2089_2020_03_28-21_56_16.pt' 14 | parser.add_argument('--checkpoint_body_smplx', required=False, default=default_checkpoint_body_smplx, help='Path to pretrained checkpoint') 15 | default_checkpoint_hand = "./extra_data/hand_module/pretrained_weights/pose_shape_best.pth" 16 | parser.add_argument('--checkpoint_hand', required=False, default=default_checkpoint_hand, help='Path to pretrained checkpoint') 17 | 18 | # input options 19 | parser.add_argument('--input_path', type=str, default=None, help="""Path of video, image, or a folder where image files exists""") 20 | parser.add_argument('--start_frame', type=int, default=0, help='given a sequence of frames, set the starting frame') 21 | parser.add_argument('--end_frame', type=int, default=float('inf'), help='given a sequence of frames, set the last frame') 22 | parser.add_argument('--pkl_dir', type=str, help='Path of storing pkl files that store the predicted results') 23 | parser.add_argument('--openpose_dir', type=str, help='Directory of storing the prediction of openpose prediction') 24 | 25 | # output options 26 | parser.add_argument('--out_dir', type=str, default=None, help='Folder of output images.') 27 | # parser.add_argument('--pklout', action='store_true', help='Export mocap output as pkl file') 28 | parser.add_argument('--save_bbox_output', action='store_true', help='Save the bboxes in json files (bbox_xywh format)') 29 | parser.add_argument('--save_pred_pkl', action='store_true', help='Save the predictions (bboxes, params, meshes in pkl format') 30 | parser.add_argument("--save_mesh", action='store_true', help="Save the predicted vertices and faces") 31 | parser.add_argument("--save_frame", action='store_true', help='Save the extracted frames from video input or webcam') 32 | 33 | # Other options 34 | parser.add_argument('--single_person', action='store_true', help='Reconstruct only one person in the scene with the biggest bbox') 35 | parser.add_argument('--no_display', action='store_true', help='Do not visualize output on the screen') 36 | parser.add_argument('--no_video_out', action='store_true', help='Do not merge rendered frames to video (ffmpeg)') 37 | parser.add_argument('--smpl_dir', type=str, default='./extra_data/smpl/', help='Folder where smpl files are located.') 38 | parser.add_argument('--skip', action='store_true', help='Skip there exist already processed outputs') 39 | parser.add_argument('--video_url', type=str, default=None, help='URL of YouTube video, or image.') 40 | parser.add_argument('--download', '-d', action='store_true', help='Download YouTube video first (in webvideo folder), and process it') 41 | 42 | # Body mocap specific options 43 | parser.add_argument('--use_smplx', action='store_true', help='Use SMPLX model for body mocap') 44 | 45 | # Hand mocap specific options 46 | parser.add_argument('--view_type', type=str, default='third_view', choices=['third_view', 'ego_centric'], 47 | help = "The view type of input. It could be ego-centric (such as epic kitchen) or third view") 48 | parser.add_argument('--crop_type', type=str, default='no_crop', choices=['hand_crop', 'no_crop'], 49 | help = """ 'hand_crop' means the hand are central cropped in input. (left hand should be flipped to right). 50 | 'no_crop' means hand detection is required to obtain hand bbox""") 51 | 52 | # Whole motion capture (FrankMocap) specific options 53 | parser.add_argument('--frankmocap_fast_mode', action='store_true', help="Use fast hand detection mode for whole body motion capture (frankmocap)") 54 | 55 | # renderer 56 | parser.add_argument("--renderer_type", type=str, default="opengl", 57 | choices=['pytorch3d', 'opendr', 'opengl_gui', 'opengl'], help="type of renderer to use") 58 | 59 | self.parser = parser 60 | 61 | 62 | def parse(self): 63 | self.opt = self.parser.parse_args() 64 | return self.opt 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FrankMocap: A Strong and Easy-to-use Single View 3D Hand+Body Pose Estimator 2 | 3 | FrankMocap pursues an easy-to-use single view 3D motion capture system developed by Facebook AI Research (FAIR). FrankMocap provides state-of-the-art 3D pose estimation outputs for body, hand, and body+hands in a single system. The core objective of FrankMocap is to democratize the 3D human pose estimation technology, enabling anyone (researchers, engineers, developers, artists, and others) can easily obtain 3D motion capture outputs from videos and images. 4 | 5 | Btw, why the name FrankMocap? Our pipeline to integrate body and hand modules reminds us of [Frankenstein's monster](https://en.wikipedia.org/wiki/Frankenstein)! 6 | 7 | ### News: 8 | - [2021/08/18] Our paper has been accepted to ICCV Workshop 2021. 9 | - [2020/10/09] We have improved openGL rendering speed. It's about 40% faster. (e.g., body module: 6fps -> 11fps) 10 | 11 | ## Key Features 12 | - Body Motion Capture: 13 |

14 | 15 |

16 | 17 | - Hand Motion Capture 18 |

19 | 20 |

21 | 22 | - Egocentric Hand Motion Capture 23 |

24 | 25 |

26 | 27 | - Whole body Motion Capture (body + hands) 28 |

29 | 30 |

31 |

32 | 33 |

34 | 35 | 36 | ## Installation 37 | - See [INSTALL.md](docs/INSTALL.md) 38 | 39 | ## A Quick Start 40 | - Run body motion capture 41 | ``` 42 | # using a machine with a monitor to show output on screen 43 | python -m demo.demo_bodymocap --input_path ./sample_data/han_short.mp4 --out_dir ./mocap_output 44 | 45 | # screenless mode (e.g., a remote server) 46 | xvfb-run -a python -m demo.demo_bodymocap --input_path ./sample_data/han_short.mp4 --out_dir ./mocap_output 47 | ``` 48 | 49 | - Run hand motion capture 50 | ``` 51 | # using a machine with a monitor to show outputs on screen 52 | python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output 53 | 54 | # screenless mode (e.g., a remote server) 55 | xvfb-run -a python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output 56 | ``` 57 | 58 | - Run whole body motion capture 59 | ``` 60 | # using a machine with a monitor to show outputs on screen 61 | python -m demo.demo_frankmocap --input_path ./sample_data/han_short.mp4 --out_dir ./mocap_output 62 | 63 | # screenless mode (e.g., a remote server) 64 | xvfb-run -a python -m demo.demo_frankmocap --input_path ./sample_data/han_short.mp4 --out_dir ./mocap_output 65 | ``` 66 | - Note: 67 | - Above commands use openGL by default. If it does not work, you may try alternative renderers (pytorch3d or openDR). 68 | - See the readme of each module for details 69 | 70 | 71 | ## Joint Order 72 | - See [joint_order](docs/joint_order.md) 73 | 74 | 75 | ## Body Motion Capture Module 76 | - See [run_bodymocap](docs/run_bodymocap.md) 77 | 78 | ## Hand Motion Capture Module 79 | - See [run_handmocap](docs/run_handmocap.md) 80 | 81 | ## Whole Body Motion Capture Module (Body + Hand) 82 | - See [run_totalmocap](docs/run_totalmocap.md) 83 | 84 | ## License 85 | - [CC-BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode). 86 | See the [LICENSE](LICENSE) file. 87 | 88 | ## References 89 | - FrankMocap is based on the following research outputs: 90 | ``` 91 | @InProceedings{rong2021frankmocap, 92 | title={FrankMocap: A Monocular 3D Whole-Body Pose Estimation System via Regression and Integration}, 93 | author={Rong, Yu and Shiratori, Takaaki and Joo, Hanbyul}, 94 | booktitle={IEEE International Conference on Computer Vision Workshops}, 95 | year={2021} 96 | } 97 | 98 | @article{joo2020eft, 99 | title={Exemplar Fine-Tuning for 3D Human Pose Fitting Towards In-the-Wild 3D Human Pose Estimation}, 100 | author={Joo, Hanbyul and Neverova, Natalia and Vedaldi, Andrea}, 101 | journal={3DV}, 102 | year={2021} 103 | } 104 | ``` 105 | 106 | - FrankMocap leverages many amazing open-sources shared in research community. 107 | - [SMPL](https://smpl.is.tue.mpg.de/), [SMPLX](https://smpl-x.is.tue.mpg.de/) 108 | - [Detectron2](https://github.com/facebookresearch/detectron2) 109 | - [Pytorch3D](https://pytorch3d.org/) (for rendering) 110 | - [OpenDR](https://github.com/mattloper/opendr/wiki) (for rendering) 111 | - [SPIN](https://github.com/nkolot/SPIN) (for body module) 112 | - [100DOH](https://fouheylab.eecs.umich.edu/~dandans/projects/100DOH/) (for hand detection) 113 | - [lightweight-human-pose-estimation](https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch) (for body detection) 114 | 115 | -------------------------------------------------------------------------------- /docs/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Installing All Modules 4 | 5 | - The entire modules can be installed following the instruction below. 6 | Note that you may want to install body module only which has fewer dependencies. In this case, you may skip some steps. See below the details. 7 | 8 | - The basic installation 9 | ``` 10 | conda create -n venv_frankmocap python=3.7 11 | conda activate venv_frankmocap 12 | 13 | # Install basic dependencies 14 | sudo apt-get install libglu1-mesa libxi-dev libxmu-dev libglu1-mesa-dev freeglut3-dev libosmesa6-dev 15 | 16 | # Install ffmpeg 17 | sudo apt-get install ffmpeg 18 | 19 | # Install cuda 20 | # Choose versions based on your system. For example: 21 | # conda install cudatoolkit=10.1 cudnn=7.6.0 22 | 23 | # Install pytorch and torchvision 24 | conda install -c pytorch pytorch==1.6.0 torchvision cudatoolkit=10.1 25 | 26 | # Install other required libraries 27 | pip install -r docs/requirements.txt 28 | ``` 29 | 30 | - Install [Detectron2](https://github.com/facebookresearch/detectron2) (for hand module) 31 | - This is required for hand motion capture. You can skip this if you need only body module 32 | - If you followed the versions mentioned above (pytorch 1.6.0, CUDA 10.1, on Linux), you may try the following: 33 | ``` 34 | python -m pip install detectron2 -f \ 35 | https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html 36 | ``` 37 | - If it doesn't work, follow the instruction of [Detectron2](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md) 38 | 39 | - Install pytorch3d (optional, for pytorch3d renderering) 40 | - We use pytorch3d for an alternative rendering option. We provide other options (opengl by default) and you may skip this process. 41 | - You may try the following (pytorch 1.6.0, on Linux and Mac). 42 | ``` 43 | pip install pytorch3d 44 | ``` 45 | - If it doesn't work, follow the instruction of [Pytorch3D](https://github.com/facebookresearch/pytorch3d/blob/master/INSTALL.md) 46 | 47 | - Install other third-party libraries + download pretrained models and sample data 48 | - Run the following script 49 | ``` 50 | sh scripts/install_frankmocap.sh 51 | ``` 52 | 53 | - Setting SMPL/SMPL-X Models 54 | - We use SMPL and SMPL-X model as 3D pose estimation output. You have to download them from the original website. 55 | - Download SMPL Model (Neutral model: basicModel_neutral_lbs_10_207_0_v1.0.0.pkl): 56 | - Download in the original [website](http://smplify.is.tue.mpg.de/login). You need to register to download the SMPL data. 57 | - Put the file in: ./extra_data/smpl/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl 58 | - This is only for body module 59 | 60 | - Download SMPLX Model (Neutral model: SMPLX_NEUTRAL.pkl): 61 | - You can use SMPL-X model for body mocap instead of SMPL model. 62 | - Download ```SMPLX_NEUTRAL.pkl``` in the original [SMPL website](https://smpl-x.is.tue.mpg.de/). You need to register to download the SMPLX data. 63 | - Put the ```SMPLX_NEUTRAL.pkl`` file in: ./extra_data/smpl/SMPLX_NEUTRAL.pkl 64 | - This is for hand module and whole body module 65 | 66 | ## Folder hierarchy 67 | - Once you sucessfully installed and downloaded all, you should have the following files in your directory: 68 | ``` 69 | ./extra_data/ 70 | ├── hand_module 71 | │ └── mean_mano_params.pkl 72 | │ └── SMPLX_HAND_INFO.pkl 73 | | └── pretrained_weights 74 | | | └── pose_shape_best.pth 75 | │ └── hand_detector 76 | │ └── faster_rcnn_1_8_132028.pth 77 | │ └── model_0529999.pth 78 | ├── body_module 79 | | └──body_pose_estimator 80 | | └── checkpoint_iter_370000.pth 81 | └── smpl 82 | └── basicModel_neutral_lbs_10_207_0_v1.0.0.pkl 83 | └── SMPLX_NEUTRAL.pkl 84 | 85 | ./detectors/ 86 | ├── body_pose_estimator 87 | ├── hand_object_detector 88 | └── hand_only_detector 89 | ``` 90 | 91 | ## Installing Body Module Only 92 | 93 | - The basic installation 94 | ``` 95 | conda create -n venv_frankmocap python=3.7 96 | conda activate venv_frankmocap 97 | 98 | # Install cuda 99 | # Choose versions based on your system. For example: 100 | # conda install cudatoolkit=10.1 cudnn=7.6.0 101 | 102 | # Install pytorch and torchvision 103 | conda install -c pytorch pytorch==1.6.0 torchvision cudatoolkit=10.1 104 | 105 | # Install other required libraries 106 | pip install -r docs/requirements.txt 107 | ``` 108 | 109 | - Install pytorch3d (optional, for pytorch3d renderering) 110 | - We use pytorch3d for an alternative rendering option. We provide other options (opengl by default) and you may skip this process. 111 | - You may try the following (pytorch 1.6.0, on Linux and Mac). 112 | ``` 113 | pip install pytorch3d 114 | ``` 115 | - If it doesn't work, follow the instruction of [Pytorch3D](https://github.com/facebookresearch/pytorch3d/blob/master/INSTALL.md) 116 | 117 | - Install 2D pose detector and download pretrained models and sample data 118 | - Install [2D keypoint detector](https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch): 119 | ``` 120 | sh scripts/install_pose2d.sh 121 | ``` 122 | - Download pretrained model and other extra data 123 | ``` 124 | sh scripts/download_data_body_module.sh 125 | ``` 126 | - Download sample data 127 | ``` 128 | sh scripts/download_sample_data.sh 129 | ``` 130 | - Setting SMPL/SMPL-X Models 131 | - You only need SMPL model. See above 132 | -------------------------------------------------------------------------------- /mocap_utils/general_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # file to store some often use functions 4 | import os, sys, shutil 5 | import os.path as osp 6 | import multiprocessing as mp 7 | import numpy as np 8 | import cv2 9 | import pickle 10 | import json 11 | 12 | 13 | def save_mesh_to_obj(obj_path, verts, faces=None): 14 | assert isinstance(verts, np.ndarray) 15 | assert isinstance(faces, np.ndarray) 16 | 17 | with open(obj_path, 'w') as out_f: 18 | # write verts 19 | for v in verts: 20 | out_f.write(f"v {v[0]:.4f} {v[1]:.4f} {v[2]:.4f}\n") 21 | # write faces 22 | if faces is not None: 23 | faces = faces.copy() + 1 24 | for f in faces: 25 | out_f.write(f"f {f[0]} {f[1]} {f[2]}\n") 26 | 27 | 28 | def renew_dir(target_dir): 29 | if osp.exists(target_dir): 30 | shutil.rmtree(target_dir) 31 | os.makedirs(target_dir) 32 | 33 | 34 | def build_dir(target_dir): 35 | if not osp.exists(target_dir): 36 | os.makedirs(target_dir) 37 | 38 | 39 | def get_subdir(in_path): 40 | subdir_path = '/'.join(in_path.split('/')[:-1]) 41 | return subdir_path 42 | 43 | def make_subdir(in_path): 44 | subdir_path = get_subdir(in_path) 45 | build_dir(subdir_path) 46 | 47 | 48 | def update_extension(file_path, new_extension): 49 | assert new_extension[0] == '.' 50 | old_extension = '.' + file_path.split('.')[-1] 51 | new_file_path = file_path.replace(old_extension, new_extension) 52 | return new_file_path 53 | 54 | 55 | def get_all_files(in_dir, extension, path_type='full', keywords=''): 56 | assert path_type in ['full', 'relative', 'name_only'] 57 | assert isinstance(extension, str) or isinstance(extension, tuple) 58 | assert isinstance(keywords, str) 59 | 60 | all_files = list() 61 | for subdir, dirs, files in os.walk(in_dir): 62 | for file in files: 63 | if len(keywords)>0: 64 | if file.find(keywords)<0: 65 | continue 66 | if file.endswith(extension): 67 | if path_type == 'full': 68 | file_path = osp.join(subdir, file) 69 | elif path_type == 'relative': 70 | file_path = osp.join(subdir, file).replace(in_dir, '') 71 | if file_path.startswith('/'): 72 | file_path = file_path[1:] 73 | else: 74 | file_path = file 75 | all_files.append(file_path) 76 | return sorted(all_files) 77 | 78 | 79 | def remove_swp(in_dir): 80 | remove_files = list() 81 | for subdir, dirs, files in os.walk(in_dir): 82 | for file in files: 83 | if file.endswith('.swp'): 84 | full_path = osp.join(subdir,file) 85 | os.remove(full_path) 86 | 87 | 88 | def remove_pyc(in_dir): 89 | remove_files = list() 90 | for subdir, dirs, files in os.walk(in_dir): 91 | for file in files: 92 | if file.endswith('.pyc'): 93 | full_path = osp.join(subdir,file) 94 | os.remove(full_path) 95 | 96 | 97 | def md5sum(file_path): 98 | import hashlib 99 | hash_md5 = hashlib.md5() 100 | with open(file_path, 'rb') as in_f: 101 | hash_md5.update(in_f.read()) 102 | return hash_md5.hexdigest() 103 | 104 | 105 | # save data to pkl 106 | def save_pkl(res_file, data_list, protocol=-1): 107 | assert res_file.endswith(".pkl") 108 | res_file_dir = '/'.join(res_file.split('/')[:-1]) 109 | if len(res_file_dir)>0: 110 | if not osp.exists(res_file_dir): 111 | os.makedirs(res_file_dir) 112 | with open(res_file, 'wb') as out_f: 113 | if protocol==2: 114 | pickle.dump(data_list, out_f, protocol=2) 115 | else: 116 | pickle.dump(data_list, out_f) 117 | 118 | 119 | def load_pkl(pkl_file, res_list=None): 120 | assert pkl_file.endswith(".pkl") 121 | with open(pkl_file, 'rb') as in_f: 122 | try: 123 | data = pickle.load(in_f) 124 | except UnicodeDecodeError: 125 | in_f.seek(0) 126 | data = pickle.load(in_f, encoding='latin1') 127 | return data 128 | 129 | 130 | def load_json(in_file): 131 | assert in_file.endswith(".json") 132 | with open(in_file, 'r') as in_f: 133 | all_data = json.load(in_f) 134 | return all_data 135 | 136 | 137 | def save_json(out_file, data): 138 | assert out_file.endswith(".json") 139 | with open(out_file, "w") as out_f: 140 | json.dump(data, out_f) 141 | 142 | 143 | def load_npz(npz_file): 144 | res_data = dict() 145 | assert npz_file.endswith(".npz") 146 | raw_data = np.load(npz_file, mmap_mode='r') 147 | for key in raw_data.files: 148 | res_data[key] = raw_data[key] 149 | return res_data 150 | 151 | 152 | def update_npz_file(npz_file, new_key, new_data): 153 | # load original data 154 | assert npz_file.endswith(".npz") 155 | raw_data = np.load(npz_file, mmap_mode='r') 156 | all_data = dict() 157 | for key in raw_data.files: 158 | all_data[key] = raw_data[key] 159 | # add new data && save 160 | all_data[new_key] = new_data 161 | np.savez(npz_file, **all_data) 162 | 163 | 164 | def analyze_path(input_path): 165 | # assume input_path is the path of a file not a directory 166 | record = input_path.split('/') 167 | input_dir = '/'.join(record[:-1]) 168 | file_name = record[-1] 169 | assert file_name.find(".")>0 170 | ext = file_name.split('.')[-1] 171 | file_basename = '.'.join(file_name.split('.')[:-1]) 172 | return input_dir, file_name, file_basename, ext -------------------------------------------------------------------------------- /bodymocap/body_bbox_detector.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os 4 | import os.path as osp 5 | import sys 6 | import numpy as np 7 | import cv2 8 | 9 | import torch 10 | import torchvision.transforms as transforms 11 | # from PIL import Image 12 | 13 | # Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/blob/master/demo.py 14 | 15 | # 2D body pose estimator 16 | pose2d_estimator_path = './detectors/body_pose_estimator' 17 | sys.path.append(pose2d_estimator_path) 18 | from detectors.body_pose_estimator.pose2d_models.with_mobilenet import PoseEstimationWithMobileNet 19 | from detectors.body_pose_estimator.modules.load_state import load_state 20 | from detectors.body_pose_estimator.val import normalize, pad_width 21 | from detectors.body_pose_estimator.modules.pose import Pose, track_poses 22 | from detectors.body_pose_estimator.modules.keypoints import extract_keypoints, group_keypoints 23 | 24 | 25 | class BodyPoseEstimator(object): 26 | """ 27 | Hand Detector for third-view input. 28 | It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git) 29 | """ 30 | def __init__(self): 31 | print("Loading Body Pose Estimator") 32 | self.__load_body_estimator() 33 | 34 | 35 | def __load_body_estimator(self): 36 | net = PoseEstimationWithMobileNet() 37 | pose2d_checkpoint = "./extra_data/body_module/body_pose_estimator/checkpoint_iter_370000.pth" 38 | checkpoint = torch.load(pose2d_checkpoint, map_location='cpu') 39 | load_state(net, checkpoint) 40 | net = net.eval() 41 | net = net.cuda() 42 | self.model = net 43 | 44 | 45 | #Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/demo.py 46 | def __infer_fast(self, img, input_height_size, stride, upsample_ratio, 47 | cpu=False, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256): 48 | height, width, _ = img.shape 49 | scale = input_height_size / height 50 | 51 | scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 52 | scaled_img = normalize(scaled_img, img_mean, img_scale) 53 | min_dims = [input_height_size, max(scaled_img.shape[1], input_height_size)] 54 | padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims) 55 | 56 | tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float() 57 | if not cpu: 58 | tensor_img = tensor_img.cuda() 59 | 60 | stages_output = self.model(tensor_img) 61 | 62 | stage2_heatmaps = stages_output[-2] 63 | heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0)) 64 | heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC) 65 | 66 | stage2_pafs = stages_output[-1] 67 | pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0)) 68 | pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC) 69 | 70 | return heatmaps, pafs, scale, pad 71 | 72 | def detect_body_pose(self, img): 73 | """ 74 | Output: 75 | current_bbox: BBOX_XYWH 76 | """ 77 | stride = 8 78 | upsample_ratio = 4 79 | orig_img = img.copy() 80 | 81 | # forward 82 | heatmaps, pafs, scale, pad = self.__infer_fast(img, 83 | input_height_size=256, stride=stride, upsample_ratio=upsample_ratio) 84 | 85 | total_keypoints_num = 0 86 | all_keypoints_by_type = [] 87 | num_keypoints = Pose.num_kpts 88 | for kpt_idx in range(num_keypoints): # 19th for bg 89 | total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num) 90 | 91 | pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True) 92 | for kpt_id in range(all_keypoints.shape[0]): 93 | all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale 94 | all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale 95 | 96 | ''' 97 | # print(len(pose_entries)) 98 | if len(pose_entries)>1: 99 | pose_entries = pose_entries[:1] 100 | print("We only support one person currently") 101 | # assert len(pose_entries) == 1, "We only support one person currently" 102 | ''' 103 | 104 | current_poses, current_bbox = list(), list() 105 | for n in range(len(pose_entries)): 106 | if len(pose_entries[n]) == 0: 107 | continue 108 | pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1 109 | for kpt_id in range(num_keypoints): 110 | if pose_entries[n][kpt_id] != -1.0: # keypoint was found 111 | pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0]) 112 | pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1]) 113 | pose = Pose(pose_keypoints, pose_entries[n][18]) 114 | current_poses.append(pose.keypoints) 115 | current_bbox.append(np.array(pose.bbox)) 116 | 117 | # enlarge the bbox 118 | for i, bbox in enumerate(current_bbox): 119 | x, y, w, h = bbox 120 | margin = 0.05 121 | x_margin = int(w * margin) 122 | y_margin = int(h * margin) 123 | x0 = max(x-x_margin, 0) 124 | y0 = max(y-y_margin, 0) 125 | x1 = min(x+w+x_margin, orig_img.shape[1]) 126 | y1 = min(y+h+y_margin, orig_img.shape[0]) 127 | current_bbox[i] = np.array((x0, y0, x1-x0, y1-y0)).astype(np.int32) 128 | 129 | return current_poses, current_bbox -------------------------------------------------------------------------------- /docs/joint_order.md: -------------------------------------------------------------------------------- 1 | # Joint Order (Position & Rotation) 2 | 3 | ## Attention !!! 4 | The orders of joint position and joint angle are different. The details are listed below. 5 | 6 | ## Hand Joint 7 | ### Joint Position (Hand) 8 | 9 | The joint positions are converted to image space (X,Y coordinates are aligned to image, Z coordinates are rescaled accordingly.) 10 | 11 | To obtain predicted 3D hand joint position, you can use [pred_joints_img](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/handmocap/hand_mocap_api.py#L222) in hand-only demo or 12 | [pred_lhand_joints_img](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L186) and [pred_rhand_joints_img](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L192) in body-plus-hand demo. 13 | 14 | The order of hand joint position is visualized below: 15 | 16 |

17 | 18 |

19 | 20 | 21 | The order of hand joint (position) is listed below: 22 | ``` 23 | 0 : Wrist 24 | 1 : Thumb_00 25 | 2 : Thumb_01 26 | 3 : Thumb_02 27 | 4 : Thumb_03 28 | 5 : Index_00 29 | 6 : Index_01 30 | 7 : Index_02 31 | 8 : Index_03 32 | 9 : Middle_00 33 | 10 : Middle_01 34 | 11 : Middle_02 35 | 12 : Middle_03 36 | 13 : Ring_00 37 | 14 : Ring_01 38 | 15 : Ring_02 39 | 16 : Ring_03 40 | 17 : Little_00 41 | 18 : Little_01 42 | 19 : Little_02 43 | 20 : Little_03 44 | ``` 45 | 46 | ### Joint Angle (Hand) 47 | To obtain predicted 3D hand joint angles (in [angle-axis format](https://en.wikipedia.org/wiki/Axis%E2%80%93angle_representation)), you can use [pred_hand_pose](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/handmocap/hand_mocap_api.py#L197) in hand-only demo or [pred_left_hand_pose](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L234) [pred_right_hand_pose](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L235) in body-plus-hand demo. 48 | 49 | The axis of joint angle is depicted below (right-hand rule): 50 |

51 | 52 |

53 | 54 | 55 | If the dimension of ```hand_pose``` is 45 (15 * 3), then the joint starts from ```Index_00```; otherwise the dimension should be 48 (16 * 3) and the joint starts from wrist (or say, hand global orientation). 56 | 57 | The order of hand joint (angle) is listed below: 58 | ``` 59 | 0 : Wrist 60 | 1 : Index_00 61 | 2 : Index_01 62 | 3 : Index_02 63 | 4 : Middle_00 64 | 5 : Middle_01 65 | 6 : Middle_02 66 | 7 : Little_00 67 | 8 : Little_01 68 | 9 : Little_02 69 | 10 : Ring_00 70 | 11 : Ring_01 71 | 12 : Ring_02 72 | 13 : Thumb_00 73 | 14 : Thumb_01 74 | 15 : Thumb_02 75 | ``` 76 | 77 | 78 | ## Body Joint 79 | ### Joint Position (Body) 80 | 81 | The joint positions are converted to image space (X,Y coordinates are aligned to image, Z coordinates are rescaled accordingly.) 82 | 83 | To obtain predicted 3D body joint position, you can use [pred_joints_img](https://github.com/facebookresearch/frankmocap/blob/44f4f6718a45baf0836c9785f02ea1d74f6f5774/bodymocap/body_mocap_api.py#L112) in body-only demo or 84 | [pred_body_joints_img](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L179) in body-plus-hand demo. 85 | 86 | The order of body joint (position) is listed below: 87 | ``` 88 | 0: OP_Nose 89 | 1: OP_Neck 90 | 2: OP_R_Shoulder 91 | 3: OP_R_Elblow 92 | 4: OP_R_Wrist 93 | 5: OP_L_Shoulder 94 | 6: OP_L_Elbow 95 | 7: OP_L_Wrist 96 | 8: OP_Middle_Hip 97 | 9: OP_R_Hip 98 | 10: OP_R_Knee 99 | 11: OP_R_Ankle 100 | 12: OP_L_Hip 101 | 13: OP_L_Knee 102 | 14: OP_L_Ankle 103 | 15: OP_R_Eye 104 | 16: OP_L_Eye 105 | 17: OP R_Ear 106 | 18: OP_L_Ear 107 | 19: OP_L_Big_Toe 108 | 20: OP_L_Small_Toe 109 | 21: OP_L_Heel 110 | 22: OP_R_Big_Toe 111 | 23: OP_R_Small_Toe 112 | 24: OP_R_Heel 113 | 25: R_Ankle 114 | 26: R_Knee 115 | 27: R_Hip 116 | 28: L_Hip 117 | 29: L_Knee 118 | 30: L_Ankle 119 | 31: R_Wrist 120 | 32: R_Elbow 121 | 33: R_Shoulder 122 | 34: L_Shoulder 123 | 35: L_Elbow 124 | 36: L_Wrist 125 | 37: Neck (LSP) 126 | 38: Top of Head (LSP) 127 | 39: Pelvis (MPII) 128 | 40: Thorax (MPII) 129 | 41: Spine (H36M) 130 | 42: Jaw (H36M) 131 | 43: Head (H36M) 132 | 44: Nose 133 | 45: L_Eye 134 | 46: R_Eye 135 | 47: L_Ear 136 | 48: R_Ear 137 | ``` 138 | 139 | ### Joint Angle (Body) 140 | To obtain predicted 3D body joint angles (in [angle-axis format](https://en.wikipedia.org/wiki/Axis%E2%80%93angle_representation)), you can use [pred_body_pose](https://github.com/facebookresearch/frankmocap/blob/44f4f6718a45baf0836c9785f02ea1d74f6f5774/bodymocap/body_mocap_api.py#L115) in body-only demo or [pred_body_pose](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L164) in body-plus-hand demo. 141 | 142 | The dimesion should be 72 (24 * 3). It is worth noting that if SMPL-X is used for body module, then the 22-th and 23-th body joint angles are invalid, we keep it for the consistent format with SMPL. 143 | 144 | The order of body joint (angle) is listed below: 145 | ``` 146 | 0: Global 147 | 1: L_Hip 148 | 2: R_Hip 149 | 3: Spine_01 150 | 4: L_Knee 151 | 5: R_Knee 152 | 6: Spine_02 153 | 7: L_Ankle 154 | 8: R_Ankle 155 | 9: Spine_03 156 | 10: L_Toe 157 | 11: R_Toe 158 | 12: Neck 159 | 13: L_Collar 160 | 14: R_Collar 161 | 15: Head 162 | 16: L_Shoulder 163 | 17: R_Shoulder 164 | 18: L_Elbow 165 | 19: R_Elbow 166 | 20: L_Wrist 167 | 21: R_Wrist 168 | 22: L_Palm (Invalid for SMPL-X) 169 | 23: R_Palm (Invalid for SMPL-X) 170 | ``` 171 | 172 | The skeleton of SMPL body is depicted below, for SMPL-X body, the 22-th and 23-th body joint are invalid: 173 |

174 | 175 |

176 | 177 | 178 | 179 | ## License 180 | - [CC-BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode). 181 | See the [LICENSE](LICENSE) file. -------------------------------------------------------------------------------- /handmocap/hand_modules/base_options.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # Part of the code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix 4 | 5 | import argparse 6 | import os 7 | import os.path as osp 8 | import torch 9 | 10 | class BaseOptions(): 11 | def __init__(self): 12 | self.parser = argparse.ArgumentParser() 13 | self.initialized = False 14 | 15 | def initialize(self): 16 | self.parser.add_argument('--dist', action='store_true', help='whether to use distributed training') 17 | self.parser.add_argument('--local_rank', type=int, default=0) 18 | self.parser.add_argument('--batchSize', type=int, default=128, help='input batch size') 19 | self.parser.add_argument('--inputSize', type=int, default=224, help='then crop to this size') 20 | self.parser.add_argument('--input_nc', type=int, default=3, help='# of input image channels') 21 | self.parser.add_argument('--gpu_ids', type=str, default='0,1', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') 22 | self.parser.add_argument('--name', type=str, default='h3dw', help='name of the experiment. It decides where to store samples and models') 23 | self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data') 24 | self.parser.add_argument('--checkpoints_dir', type=str, default='/home/hjoo/dropbox/hand_yu/checkpoints', help='models are saved here') 25 | self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') 26 | self.parser.add_argument('--display_winsize', type=int, default=256, help='display window size') 27 | self.parser.add_argument('--display_id', type=int, default=1, help='window id of the web display') 28 | self.parser.add_argument('--display_port', type=int, default=80, help='visdom port of the web display') 29 | 30 | self.parser.add_argument('--data_root', type=str, default='', help='root dir for all the datasets') 31 | self.parser.add_argument('--freihand_anno_path', type=str, default='', help='annotation_path that stores the information of freihand dataset') 32 | self.parser.add_argument('--ho3d_anno_path', type=str, default='', help='annotation_path that stores the information of HO3D dataset') 33 | self.parser.add_argument('--mtc_anno_path', type=str, default='', help='annotation_path that stores the information of MTC (Panoptic 3D) dataset') 34 | self.parser.add_argument('--stb_anno_path', type=str, default='', help='annotation_path that stores the information of STB dataset') 35 | self.parser.add_argument('--rhd_anno_path', type=str, default='', help='annotation_path that stores the information of RHD dataset') 36 | self.parser.add_argument('--frl_anno_path', type=str, default='', help='annotation_path that stores the information of FRL dataset') 37 | self.parser.add_argument('--ganerated_anno_path', type=str, default='', help='annotation_path that stores the information of GANerated dataset') 38 | self.parser.add_argument('--demo_img_dir', type=str, default='', help='image root of demo dataset') 39 | self.parser.add_argument('--wild_img_dir', type=str, default='', help='image root of in-the-wild dataset (in-the-wild means without any annotation, only image)') 40 | 41 | self.parser.add_argument('--num_joints', type=int, default=21, help='number of keypoints') 42 | self.parser.add_argument('--total_params_dim', type=int, default=61, help='number of params to be estimated') 43 | self.parser.add_argument('--cam_params_dim', type=int, default=3, help='number of params to be estimated') 44 | self.parser.add_argument('--pose_params_dim', type=int, default=48, help='number of params to be estimated') 45 | self.parser.add_argument('--shape_params_dim', type=int, default=10, help='number of params to be estimated') 46 | 47 | self.parser.add_argument('--model_root', type=str, default='./extra_data', help='root dir for all the pretrained weights and pre-defined models') 48 | self.parser.add_argument('--smplx_model_file', type=str, default='./extra_data/smpl/SMPLX_NEUTRAL.pkl', help='path of pretraind smpl model') 49 | self.parser.add_argument('--smplx_hand_info_file', type=str, default='hand_module/SMPLX_HAND_INFO.pkl', help='path of smpl face') 50 | self.parser.add_argument('--mean_param_file', type=str, default='hand_module/mean_mano_params.pkl', help='path of smpl face') 51 | 52 | self.parser.add_argument('--single_branch', action='store_true', help='use only one branch, this branch could either be IUV or other format such as image') 53 | self.parser.add_argument('--two_branch', action='store_true', help='two branch input, image and another auxiliary branch, the auxiliary branch is IUV in default') 54 | self.parser.add_argument('--aux_as_main', action='store_true', help='use aux as input instead of image') 55 | self.parser.add_argument('--main_encoder', type=str, default='resnet50', help='selects model to use for major input, it is usually image') 56 | self.parser.add_argument('--aux_encoder', type=str, default='resnet18', help='selects model to use for auxiliary input, it could be IUV') 57 | 58 | self.parser.add_argument('--resize_or_crop', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]') 59 | self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation') 60 | self.parser.add_argument('--use_hand_rotation', action='store_true', help='if specified, use ground truth hand rotation in training') 61 | self.parser.add_argument('--top_finger_joints_type', type=str, default='ave', help="use which kind of top finger joints") 62 | self.initialized = True 63 | self.initialized = True 64 | 65 | 66 | def parse(self, args=None): 67 | if not self.initialized: 68 | self.initialize() 69 | 70 | if args is None: 71 | self.opt = self.parser.parse_args() 72 | else: 73 | self.opt = self.parser.parse_args(args) 74 | # self.opt, unknown = self.parser.parse_known_args() 75 | self.opt.isTrain = self.isTrain # train or test 76 | 77 | return self.opt 78 | -------------------------------------------------------------------------------- /bodymocap/models/hmr.py: -------------------------------------------------------------------------------- 1 | # Original code from SPIN: https://github.com/nkolot/SPIN 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torchvision.models.resnet as resnet 6 | import numpy as np 7 | import math 8 | 9 | import sys 10 | from bodymocap.utils.geometry import rot6d_to_rotmat 11 | 12 | class Bottleneck(nn.Module): 13 | """ Redefinition of Bottleneck residual block 14 | Adapted from the official PyTorch implementation 15 | """ 16 | expansion = 4 17 | 18 | def __init__(self, inplanes, planes, stride=1, downsample=None): 19 | super(Bottleneck, self).__init__() 20 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 21 | self.bn1 = nn.BatchNorm2d(planes) 22 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 23 | padding=1, bias=False) 24 | self.bn2 = nn.BatchNorm2d(planes) 25 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 26 | self.bn3 = nn.BatchNorm2d(planes * 4) 27 | self.relu = nn.ReLU(inplace=True) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | out = self.relu(out) 41 | 42 | out = self.conv3(out) 43 | out = self.bn3(out) 44 | 45 | if self.downsample is not None: 46 | residual = self.downsample(x) 47 | 48 | out += residual 49 | out = self.relu(out) 50 | 51 | return out 52 | 53 | class HMR(nn.Module): 54 | """ SMPL Iterative Regressor with ResNet50 backbone 55 | """ 56 | 57 | def __init__(self, block, layers, smpl_mean_params): 58 | self.inplanes = 64 59 | super(HMR, self).__init__() 60 | npose = 24 * 6 61 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 62 | bias=False) 63 | self.bn1 = nn.BatchNorm2d(64) 64 | self.relu = nn.ReLU(inplace=True) 65 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 66 | self.layer1 = self._make_layer(block, 64, layers[0]) 67 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 68 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 69 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 70 | self.avgpool = nn.AvgPool2d(7, stride=1) 71 | self.fc1 = nn.Linear(512 * block.expansion + npose + 13, 1024) 72 | self.drop1 = nn.Dropout() 73 | self.fc2 = nn.Linear(1024, 1024) 74 | self.drop2 = nn.Dropout() 75 | self.decpose = nn.Linear(1024, npose) 76 | self.decshape = nn.Linear(1024, 10) 77 | self.deccam = nn.Linear(1024, 3) 78 | nn.init.xavier_uniform_(self.decpose.weight, gain=0.01) 79 | nn.init.xavier_uniform_(self.decshape.weight, gain=0.01) 80 | nn.init.xavier_uniform_(self.deccam.weight, gain=0.01) 81 | 82 | for m in self.modules(): 83 | if isinstance(m, nn.Conv2d): 84 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 85 | m.weight.data.normal_(0, math.sqrt(2. / n)) 86 | elif isinstance(m, nn.BatchNorm2d): 87 | m.weight.data.fill_(1) 88 | m.bias.data.zero_() 89 | 90 | mean_params = np.load(smpl_mean_params) 91 | init_pose = torch.from_numpy(mean_params['pose'][:]).unsqueeze(0) 92 | init_shape = torch.from_numpy(mean_params['shape'][:].astype('float32')).unsqueeze(0) 93 | init_cam = torch.from_numpy(mean_params['cam']).unsqueeze(0) 94 | self.register_buffer('init_pose', init_pose) 95 | self.register_buffer('init_shape', init_shape) 96 | self.register_buffer('init_cam', init_cam) 97 | 98 | 99 | def _make_layer(self, block, planes, blocks, stride=1): 100 | downsample = None 101 | if stride != 1 or self.inplanes != planes * block.expansion: 102 | downsample = nn.Sequential( 103 | nn.Conv2d(self.inplanes, planes * block.expansion, 104 | kernel_size=1, stride=stride, bias=False), 105 | nn.BatchNorm2d(planes * block.expansion), 106 | ) 107 | 108 | layers = [] 109 | layers.append(block(self.inplanes, planes, stride, downsample)) 110 | self.inplanes = planes * block.expansion 111 | for i in range(1, blocks): 112 | layers.append(block(self.inplanes, planes)) 113 | 114 | return nn.Sequential(*layers) 115 | 116 | 117 | def forward(self, x, init_pose=None, init_shape=None, init_cam=None, n_iter=3): 118 | 119 | batch_size = x.shape[0] 120 | 121 | if init_pose is None: 122 | init_pose = self.init_pose.expand(batch_size, -1) 123 | if init_shape is None: 124 | init_shape = self.init_shape.expand(batch_size, -1) 125 | if init_cam is None: 126 | init_cam = self.init_cam.expand(batch_size, -1) 127 | 128 | x = self.conv1(x) 129 | x = self.bn1(x) 130 | x = self.relu(x) 131 | x = self.maxpool(x) 132 | 133 | x1 = self.layer1(x) 134 | x2 = self.layer2(x1) 135 | x3 = self.layer3(x2) 136 | x4 = self.layer4(x3) 137 | 138 | xf = self.avgpool(x4) 139 | xf = xf.view(xf.size(0), -1) 140 | 141 | pred_pose = init_pose 142 | pred_shape = init_shape 143 | pred_cam = init_cam 144 | for i in range(n_iter): 145 | xc = torch.cat([xf, pred_pose, pred_shape, pred_cam],1) 146 | xc = self.fc1(xc) 147 | xc = self.drop1(xc) 148 | xc = self.fc2(xc) 149 | xc = self.drop2(xc) 150 | pred_pose = self.decpose(xc) + pred_pose 151 | pred_shape = self.decshape(xc) + pred_shape 152 | pred_cam = self.deccam(xc) + pred_cam 153 | 154 | pred_rotmat = rot6d_to_rotmat(pred_pose).view(batch_size, 24, 3, 3) 155 | 156 | return pred_rotmat, pred_shape, pred_cam 157 | 158 | def hmr(smpl_mean_params, pretrained=True, **kwargs): 159 | """ Constructs an HMR model with ResNet50 backbone. 160 | Args: 161 | pretrained (bool): If True, returns a model pre-trained on ImageNet 162 | """ 163 | model = HMR(Bottleneck, [3, 4, 6, 3], smpl_mean_params, **kwargs) 164 | if pretrained: 165 | resnet_imagenet = resnet.resnet50(pretrained=True) 166 | model.load_state_dict(resnet_imagenet.state_dict(),strict=False) 167 | return model 168 | 169 | -------------------------------------------------------------------------------- /mocap_utils/geometry_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os, sys, shutil 4 | import os.path as osp 5 | # sys.path.append("/") 6 | import numpy as np 7 | import torch 8 | from torch.nn import functional as F 9 | import cv2 10 | import numpy.matlib as npm 11 | import mocap_utils.geometry_utils_torch as gut 12 | 13 | 14 | def flip_hand_pose(pose): 15 | pose = pose.copy() 16 | if len(pose.shape) == 1: 17 | pose = pose.reshape(-1, 3) 18 | pose[:, 1] *= -1 19 | pose[:, 2] *= -1 20 | return pose.reshape(-1,) 21 | else: 22 | assert len(pose.shape) == 2 23 | pose[:, 1] *= -1 24 | pose[:, 2] *= -1 25 | return pose 26 | 27 | 28 | def flip_hand_joints_3d(joints_3d): 29 | assert joints_3d.shape[1] == 3 30 | assert len(joints_3d.shape) == 2 31 | rot_mat = np.diag([-1, 1, 1]) 32 | return np.matmul(rot_mat, joints_3d.T).T 33 | 34 | 35 | def __quaternion_to_angle_axis_torch(quat): 36 | quat = quat.clone() 37 | if quat.dim() == 1: 38 | assert quat.size(0) == 4 39 | quat = quat.view(1, 4) 40 | angle_axis = gut.quaternion_to_angle_axis(quat)[0] 41 | elif quat.dim() == 2: 42 | assert quat.size(1) == 4 43 | angle_axis = gut.quaternion_to_angle_axis(quat) 44 | else: 45 | assert quat.dim() == 3 46 | dim0 = quat.size(0) 47 | dim1 = quat.size(1) 48 | assert quat.size(2) == 4 49 | quat = quat.view(dim0*dim1, 4) 50 | angle_axis = gut.quaternion_to_angle_axis(quat) 51 | angle_axis = angle_axis.view(dim0, dim1, 3) 52 | return angle_axis 53 | 54 | 55 | def quaternion_to_angle_axis(quaternion): 56 | quat = quaternion 57 | if isinstance(quat, torch.Tensor): 58 | return __quaternion_to_angle_axis_torch(quaternion) 59 | else: 60 | assert isinstance(quat, np.ndarray) 61 | quat_torch = torch.from_numpy(quat) 62 | angle_axis_torch = __quaternion_to_angle_axis_torch(quat_torch) 63 | return angle_axis_torch.numpy() 64 | 65 | 66 | def __angle_axis_to_quaternion_torch(aa): 67 | aa = aa.clone() 68 | if aa.dim() == 1: 69 | assert aa.size(0) == 3 70 | aa = aa.view(1, 3) 71 | quat = gut.angle_axis_to_quaternion(aa)[0] 72 | elif aa.dim() == 2: 73 | assert aa.size(1) == 3 74 | quat = gut.angle_axis_to_quaternion(aa) 75 | else: 76 | assert aa.dim() == 3 77 | dim0 = aa.size(0) 78 | dim1 = aa.size(1) 79 | assert aa.size(2) == 3 80 | aa = aa.view(dim0*dim1, 3) 81 | quat = gut.angle_axis_to_quaternion(aa) 82 | quat = quat.view(dim0, dim1, 4) 83 | return quat 84 | 85 | 86 | def angle_axis_to_quaternion(angle_axis): 87 | aa = angle_axis 88 | if isinstance(aa, torch.Tensor): 89 | return __angle_axis_to_quaternion_torch(aa) 90 | else: 91 | assert isinstance(aa, np.ndarray) 92 | aa_torch = torch.from_numpy(aa) 93 | quat_torch = __angle_axis_to_quaternion_torch(aa_torch) 94 | return quat_torch.numpy() 95 | 96 | 97 | def __angle_axis_to_rotation_matrix_torch(aa): 98 | aa = aa.clone() 99 | if aa.dim() == 1: 100 | assert aa.size(0) ==3 101 | aa = aa.view(1, 3) 102 | rotmat = gut.angle_axis_to_rotation_matrix(aa)[0][:3, :3] 103 | elif aa.dim() == 2: 104 | assert aa.size(1) == 3 105 | rotmat = gut.angle_axis_to_rotation_matrix(aa)[:, :3, :3] 106 | else: 107 | assert aa.dim() == 3 108 | dim0 = aa.size(0) 109 | dim1 = aa.size(1) 110 | assert aa.size(2) == 3 111 | aa = aa.view(dim0*dim1, 3) 112 | rotmat = gut.angle_axis_to_rotation_matrix(aa) 113 | rotmat = rotmat.view(dim0, dim1, 4, 4) 114 | rotmat = rotmat[:, :, :3, :3] 115 | return rotmat 116 | 117 | 118 | def angle_axis_to_rotation_matrix(angle_axis): 119 | aa = angle_axis 120 | if isinstance(aa, torch.Tensor): 121 | return __angle_axis_to_rotation_matrix_torch(aa) 122 | else: 123 | assert isinstance(aa, np.ndarray) 124 | aa_torch = torch.from_numpy(aa) 125 | rotmat_torch = __angle_axis_to_rotation_matrix_torch(aa_torch) 126 | return rotmat_torch.numpy() 127 | 128 | 129 | def __rotation_matrix_to_angle_axis_torch(rotmat): 130 | rotmat = rotmat.clone() 131 | if rotmat.dim() == 2: 132 | assert rotmat.size(0) == 3 133 | assert rotmat.size(1) == 3 134 | rotmat0 = torch.zeros((1, 3, 4)) 135 | rotmat0[0, :, :3] = rotmat 136 | rotmat0[:, 2, 3] = 1.0 137 | aa = gut.rotation_matrix_to_angle_axis(rotmat0)[0] 138 | elif rotmat.dim() == 3: 139 | dim0 = rotmat.size(0) 140 | assert rotmat.size(1) == 3 141 | assert rotmat.size(2) == 3 142 | rotmat0 = torch.zeros((dim0, 3, 4)) 143 | rotmat0[:, :, :3] = rotmat 144 | rotmat0[:, 2, 3] = 1.0 145 | aa = gut.rotation_matrix_to_angle_axis(rotmat0) 146 | else: 147 | assert rotmat.dim() == 4 148 | dim0 = rotmat.size(0) 149 | dim1 = rotmat.size(1) 150 | assert rotmat.size(2) == 3 151 | assert rotmat.size(3) == 3 152 | rotmat0 = torch.zeros((dim0*dim1, 3, 4)) 153 | rotmat0[:, :, :3] = rotmat.view(dim0*dim1, 3, 3) 154 | rotmat0[:, 2, 3] = 1.0 155 | aa = gut.rotation_matrix_to_angle_axis(rotmat0) 156 | aa = aa.view(dim0, dim1, 3) 157 | return aa 158 | 159 | 160 | def rotation_matrix_to_angle_axis(rotmat): 161 | if isinstance(rotmat, torch.Tensor): 162 | return __rotation_matrix_to_angle_axis_torch(rotmat) 163 | else: 164 | assert isinstance(rotmat, np.ndarray) 165 | rotmat_torch = torch.from_numpy(rotmat) 166 | aa_torch = __rotation_matrix_to_angle_axis_torch(rotmat_torch) 167 | return aa_torch.numpy() 168 | 169 | 170 | def rot6d_to_rotmat(x): 171 | """Convert 6D rotation representation to 3x3 rotation matrix. 172 | Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019 173 | Input: 174 | (B,6) Batch of 6-D rotation representations 175 | Output: 176 | (B,3,3) Batch of corresponding rotation matrices 177 | """ 178 | assert isinstance(x, torch.Tensor), "Current version only supports torch.tensor" 179 | 180 | x = x.view(-1,3,2) 181 | a1 = x[:, :, 0] 182 | a2 = x[:, :, 1] 183 | b1 = F.normalize(a1) 184 | b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1) 185 | b3 = torch.cross(b1, b2) 186 | return torch.stack((b1, b2, b3), dim=-1) 187 | 188 | 189 | def angle_axis_to_rot6d(aa): 190 | assert aa.dim() == 2 191 | assert aa.size(1) == 3 192 | bs = aa.size(0) 193 | 194 | rotmat = angle_axis_to_rotation_matrix(aa) 195 | rot6d = rotmat[:, :3, :2] 196 | return rot6d -------------------------------------------------------------------------------- /renderer/od_renderer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | """ 4 | Renders mesh using OpenDr / Pytorch-3d for visualization. 5 | Part of code is modified from https://github.com/akanazawa/hmr 6 | """ 7 | 8 | import sys 9 | import numpy as np 10 | import cv2 11 | import pdb 12 | from PIL import Image, ImageDraw 13 | from opendr.camera import ProjectPoints 14 | from opendr.renderer import ColoredRenderer 15 | from opendr.lighting import LambertianPointLight 16 | 17 | 18 | class OpendrRenderer(object): 19 | def __init__(self, 20 | img_size=224, 21 | mesh_color=np.array([0.5, 0.5, 0.5]),): 22 | 23 | self.w = img_size 24 | self.h = img_size 25 | self.color = mesh_color 26 | self.img_size = img_size 27 | self.flength = 500. 28 | 29 | 30 | def render(self, verts, faces, bg_img): 31 | verts = verts.copy() 32 | faces = faces.copy() 33 | 34 | input_size = 500 35 | 36 | f = 10 37 | 38 | verts[:, 0] = (verts[:, 0] - input_size) / input_size 39 | verts[:, 1] = (verts[:, 1] - input_size) / input_size 40 | 41 | verts[:, 2] /= (5 * 112) 42 | verts[:, 2] += f 43 | 44 | cam_for_render = np.array([f, 1, 1]) * input_size 45 | 46 | rend_img = self.__call__( 47 | img=bg_img, cam=cam_for_render, 48 | verts=verts, faces=faces, color=self.color) 49 | 50 | return rend_img 51 | 52 | 53 | def __call__(self, 54 | verts, 55 | faces, 56 | cam=None, 57 | img=None, 58 | do_alpha=False, 59 | far=None, 60 | near=None, 61 | color = np.array([0, 0, 255]), 62 | img_size=None): 63 | """ 64 | cam is 3D [f, px, py] 65 | """ 66 | if img is not None: 67 | h, w = img.shape[:2] 68 | elif img_size is not None: 69 | h = img_size[0] 70 | w = img_size[1] 71 | else: 72 | h = self.h 73 | w = self.w 74 | 75 | if cam is None: 76 | cam = [self.flength, w / 2., h / 2.] 77 | 78 | use_cam = ProjectPoints( 79 | f=cam[0] * np.ones(2), 80 | rt=np.zeros(3), 81 | t=np.zeros(3), 82 | k=np.zeros(5), 83 | c=cam[1:3]) 84 | 85 | if near is None: 86 | near = np.maximum(np.min(verts[:, 2]) - 25, 0.1) 87 | if far is None: 88 | far = np.maximum(np.max(verts[:, 2]) + 25, 25) 89 | 90 | return_value = render_model( 91 | verts, 92 | faces, 93 | w, 94 | h, 95 | use_cam, 96 | do_alpha=do_alpha, 97 | img=img, 98 | far=far, 99 | near=near, 100 | color=color) 101 | 102 | imtmp = return_value 103 | image = (imtmp * 255).astype('uint8') 104 | return image 105 | 106 | 107 | 108 | def _create_renderer(w=640, 109 | h=480, 110 | rt=np.zeros(3), 111 | t=np.zeros(3), 112 | f=None, 113 | c=None, 114 | k=None, 115 | near=.5, 116 | far=10.): 117 | 118 | f = np.array([w, w]) / 2. if f is None else f 119 | c = np.array([w, h]) / 2. if c is None else c 120 | k = np.zeros(5) if k is None else k 121 | 122 | rn = ColoredRenderer() 123 | 124 | rn.camera = ProjectPoints(rt=rt, t=t, f=f, c=c, k=k) 125 | rn.frustum = {'near': near, 'far': far, 'height': h, 'width': w} 126 | return rn 127 | 128 | 129 | def _rotateY(points, angle): 130 | """Rotate the points by a specified angle.""" 131 | ry = np.array([[np.cos(angle), 0., np.sin(angle)], [0., 1., 0.], 132 | [-np.sin(angle), 0., np.cos(angle)]]) 133 | return np.dot(points, ry) 134 | 135 | 136 | def simple_renderer(rn, 137 | verts, 138 | faces, 139 | yrot=np.radians(70), 140 | color=np.array([0, 0, 255]) 141 | ): 142 | 143 | # Rendered model color 144 | rn.set(v=verts, f=faces, vc=color, bgcolor=np.ones(3)) 145 | albedo = rn.vc 146 | 147 | # Construct Back Light (on back right corner) 148 | rn.vc = LambertianPointLight( 149 | f=rn.f, 150 | v=rn.v, 151 | num_verts=len(rn.v), 152 | light_pos=_rotateY(np.array([-200, -100, -100]), yrot), 153 | vc=albedo, 154 | light_color=np.array([1, 1, 1])) 155 | 156 | # Construct Left Light 157 | rn.vc += LambertianPointLight( 158 | f=rn.f, 159 | v=rn.v, 160 | num_verts=len(rn.v), 161 | # light_pos=_rotateY(np.array([800, 10, 300]), yrot), 162 | light_pos=_rotateY(np.array([800, 10, 300]), yrot), 163 | vc=albedo, 164 | light_color=np.array([1, 1, 1])) 165 | 166 | # Construct Right Light 167 | rn.vc += LambertianPointLight( 168 | f=rn.f, 169 | v=rn.v, 170 | num_verts=len(rn.v), 171 | light_pos=_rotateY(np.array([-500, 500, 1000]), yrot), 172 | # light_pos=_rotateY(np.array([-500, 500, 1000]), yrot), 173 | vc=albedo, 174 | light_color=np.array([.7, .7, .7])) 175 | 176 | return rn.r 177 | 178 | 179 | def get_alpha(imtmp, bgval=1.): 180 | h, w = imtmp.shape[:2] 181 | alpha = (~np.all(imtmp == bgval, axis=2)).astype(imtmp.dtype) 182 | 183 | b_channel, g_channel, r_channel = cv2.split(imtmp) 184 | 185 | im_RGBA = cv2.merge((b_channel, g_channel, r_channel, alpha.astype( 186 | imtmp.dtype))) 187 | return im_RGBA 188 | 189 | 190 | def append_alpha(imtmp): 191 | alpha = np.ones_like(imtmp[:, :, 0]).astype(imtmp.dtype) 192 | if np.issubdtype(imtmp.dtype, np.uint8): 193 | alpha = alpha * 255 194 | b_channel, g_channel, r_channel = cv2.split(imtmp) 195 | im_RGBA = cv2.merge((b_channel, g_channel, r_channel, alpha)) 196 | return im_RGBA 197 | 198 | 199 | def render_model(verts, 200 | faces, 201 | w, 202 | h, 203 | cam, 204 | near=0.5, 205 | far=25, 206 | img=None, 207 | do_alpha=False, 208 | color=None): 209 | rn = _create_renderer( 210 | w=w, h=h, near=near, far=far, rt=cam.rt, t=cam.t, f=cam.f, c=cam.c) 211 | 212 | # Uses img as background, otherwise white background. 213 | if img is not None: 214 | rn.background_image = img / 255. if img.max() > 1.1 else img 215 | 216 | imtmp = simple_renderer(rn, verts, faces, color=color) 217 | 218 | # If white bg, make transparent. 219 | if img is None and do_alpha: 220 | imtmp = get_alpha(imtmp) 221 | elif img is not None and do_alpha: 222 | imtmp = append_alpha(imtmp) 223 | 224 | return imtmp -------------------------------------------------------------------------------- /demo/demo_bodymocap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os 4 | import sys 5 | import os.path as osp 6 | import torch 7 | from torchvision.transforms import Normalize 8 | import numpy as np 9 | import cv2 10 | import argparse 11 | import json 12 | import pickle 13 | from datetime import datetime 14 | 15 | from demo.demo_options import DemoOptions 16 | from bodymocap.body_mocap_api import BodyMocap 17 | from bodymocap.body_bbox_detector import BodyPoseEstimator 18 | import mocap_utils.demo_utils as demo_utils 19 | import mocap_utils.general_utils as gnu 20 | from mocap_utils.timer import Timer 21 | 22 | import renderer.image_utils as imu 23 | from renderer.viewer2D import ImShow 24 | 25 | def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer): 26 | #Setup input data to handle different types of inputs 27 | input_type, input_data = demo_utils.setup_input(args) 28 | 29 | cur_frame = args.start_frame 30 | video_frame = 0 31 | timer = Timer() 32 | while True: 33 | timer.tic() 34 | # load data 35 | load_bbox = False 36 | 37 | if input_type =='image_dir': 38 | if cur_frame < len(input_data): 39 | image_path = input_data[cur_frame] 40 | img_original_bgr = cv2.imread(image_path) 41 | else: 42 | img_original_bgr = None 43 | 44 | elif input_type == 'bbox_dir': 45 | if cur_frame < len(input_data): 46 | print("Use pre-computed bounding boxes") 47 | image_path = input_data[cur_frame]['image_path'] 48 | hand_bbox_list = input_data[cur_frame]['hand_bbox_list'] 49 | body_bbox_list = input_data[cur_frame]['body_bbox_list'] 50 | img_original_bgr = cv2.imread(image_path) 51 | load_bbox = True 52 | else: 53 | img_original_bgr = None 54 | 55 | elif input_type == 'video': 56 | _, img_original_bgr = input_data.read() 57 | if video_frame < cur_frame: 58 | video_frame += 1 59 | continue 60 | # save the obtained video frames 61 | image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg") 62 | if img_original_bgr is not None: 63 | video_frame += 1 64 | if args.save_frame: 65 | gnu.make_subdir(image_path) 66 | cv2.imwrite(image_path, img_original_bgr) 67 | 68 | elif input_type == 'webcam': 69 | _, img_original_bgr = input_data.read() 70 | 71 | if video_frame < cur_frame: 72 | video_frame += 1 73 | continue 74 | # save the obtained video frames 75 | image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg") 76 | if img_original_bgr is not None: 77 | video_frame += 1 78 | if args.save_frame: 79 | gnu.make_subdir(image_path) 80 | cv2.imwrite(image_path, img_original_bgr) 81 | else: 82 | assert False, "Unknown input_type" 83 | 84 | cur_frame +=1 85 | if img_original_bgr is None or cur_frame > args.end_frame: 86 | break 87 | print("--------------------------------------") 88 | 89 | if load_bbox: 90 | body_pose_list = None 91 | else: 92 | body_pose_list, body_bbox_list = body_bbox_detector.detect_body_pose( 93 | img_original_bgr) 94 | hand_bbox_list = [None, ] * len(body_bbox_list) 95 | 96 | # save the obtained body & hand bbox to json file 97 | if args.save_bbox_output: 98 | demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list) 99 | 100 | if len(body_bbox_list) < 1: 101 | print(f"No body deteced: {image_path}") 102 | continue 103 | 104 | #Sort the bbox using bbox size 105 | # (to make the order as consistent as possible without tracking) 106 | bbox_size = [ (x[2] * x[3]) for x in body_bbox_list] 107 | idx_big2small = np.argsort(bbox_size)[::-1] 108 | body_bbox_list = [ body_bbox_list[i] for i in idx_big2small ] 109 | if args.single_person and len(body_bbox_list)>0: 110 | body_bbox_list = [body_bbox_list[0], ] 111 | 112 | # Body Pose Regression 113 | pred_output_list = body_mocap.regress(img_original_bgr, body_bbox_list) 114 | assert len(body_bbox_list) == len(pred_output_list) 115 | 116 | # extract mesh for rendering (vertices in image space and faces) from pred_output_list 117 | pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list) 118 | 119 | # visualization 120 | res_img = visualizer.visualize( 121 | img_original_bgr, 122 | pred_mesh_list = pred_mesh_list, 123 | body_bbox_list = body_bbox_list) 124 | 125 | # show result in the screen 126 | if not args.no_display: 127 | res_img = res_img.astype(np.uint8) 128 | ImShow(res_img) 129 | 130 | # save result image 131 | if args.out_dir is not None: 132 | demo_utils.save_res_img(args.out_dir, image_path, res_img) 133 | 134 | # save predictions to pkl 135 | if args.save_pred_pkl: 136 | demo_type = 'body' 137 | demo_utils.save_pred_to_pkl( 138 | args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list) 139 | 140 | timer.toc(bPrint=True,title="Time") 141 | print(f"Processed : {image_path}") 142 | 143 | #save images as a video 144 | if not args.no_video_out and input_type in ['video', 'webcam']: 145 | demo_utils.gen_video_out(args.out_dir, args.seq_name) 146 | 147 | if input_type =='webcam' and input_data is not None: 148 | input_data.release() 149 | cv2.destroyAllWindows() 150 | 151 | 152 | def main(): 153 | args = DemoOptions().parse() 154 | 155 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 156 | assert torch.cuda.is_available(), "Current version only supports GPU" 157 | 158 | # Set bbox detector 159 | body_bbox_detector = BodyPoseEstimator() 160 | 161 | # Set mocap regressor 162 | use_smplx = args.use_smplx 163 | checkpoint_path = args.checkpoint_body_smplx if use_smplx else args.checkpoint_body_smpl 164 | print("use_smplx", use_smplx) 165 | body_mocap = BodyMocap(checkpoint_path, args.smpl_dir, device, use_smplx) 166 | 167 | # Set Visualizer 168 | if args.renderer_type in ['pytorch3d', 'opendr']: 169 | from renderer.screen_free_visualizer import Visualizer 170 | else: 171 | from renderer.visualizer import Visualizer 172 | visualizer = Visualizer(args.renderer_type) 173 | 174 | run_body_mocap(args, body_bbox_detector, body_mocap, visualizer) 175 | 176 | 177 | if __name__ == '__main__': 178 | main() -------------------------------------------------------------------------------- /renderer/p3d_renderer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # Part of code is modified from https://github.com/facebookresearch/pytorch3d 4 | 5 | import cv2 6 | import os 7 | import sys 8 | import torch 9 | import numpy as np 10 | 11 | from pytorch3d.structures import Meshes 12 | from pytorch3d.renderer.mesh import Textures 13 | from pytorch3d.renderer import ( 14 | PerspectiveCameras, 15 | FoVOrthographicCameras, 16 | PointLights, 17 | RasterizationSettings, 18 | MeshRenderer, 19 | BlendParams, 20 | MeshRasterizer, 21 | SoftPhongShader, 22 | ) 23 | 24 | class Pytorch3dRenderer(object): 25 | 26 | def __init__(self, img_size, mesh_color): 27 | self.device = torch.device("cuda:0") 28 | # self.render_size = 1920 29 | 30 | self.img_size = img_size 31 | 32 | # mesh color 33 | mesh_color = np.array(mesh_color)[::-1] 34 | self.mesh_color = torch.from_numpy( 35 | mesh_color.copy()).view(1, 1, 3).float().to(self.device) 36 | 37 | # renderer for large objects, such as whole body. 38 | self.render_size_large = 700 39 | lights = PointLights( 40 | ambient_color = [[1.0, 1.0, 1.0],], 41 | diffuse_color = [[1.0, 1.0, 1.0],], 42 | device=self.device, location=[[1.0, 1.0, -30]]) 43 | self.renderer_large = self.__get_renderer(self.render_size_large, lights) 44 | 45 | # renderer for small objects, such as whole body. 46 | self.render_size_medium = 400 47 | lights = PointLights( 48 | ambient_color = [[0.5, 0.5, 0.5],], 49 | diffuse_color = [[0.5, 0.5, 0.5],], 50 | device=self.device, location=[[1.0, 1.0, -30]]) 51 | self.renderer_medium = self.__get_renderer(self.render_size_medium, lights) 52 | 53 | 54 | # renderer for small objects, such as whole body. 55 | self.render_size_small = 200 56 | lights = PointLights( 57 | ambient_color = [[0.5, 0.5, 0.5],], 58 | diffuse_color = [[0.5, 0.5, 0.5],], 59 | device=self.device, location=[[1.0, 1.0, -30]]) 60 | self.renderer_small = self.__get_renderer(self.render_size_small, lights) 61 | 62 | 63 | 64 | def __get_renderer(self, render_size, lights): 65 | 66 | cameras = FoVOrthographicCameras( 67 | device = self.device, 68 | znear=0.1, 69 | zfar=10.0, 70 | max_y=1.0, 71 | min_y=-1.0, 72 | max_x=1.0, 73 | min_x=-1.0, 74 | scale_xyz=((1.0, 1.0, 1.0),), # (1, 3) 75 | ) 76 | 77 | raster_settings = RasterizationSettings( 78 | image_size = render_size, 79 | blur_radius = 0, 80 | faces_per_pixel = 1, 81 | ) 82 | blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color = (0,0,0)) 83 | 84 | renderer = MeshRenderer( 85 | rasterizer=MeshRasterizer( 86 | cameras=cameras, 87 | raster_settings=raster_settings 88 | ), 89 | shader=SoftPhongShader( 90 | device=self.device, 91 | cameras=cameras, 92 | lights=lights, 93 | blend_params=blend_params 94 | ) 95 | ) 96 | 97 | return renderer 98 | 99 | 100 | def render(self, verts, faces, bg_img): 101 | verts = verts.copy() 102 | faces = faces.copy() 103 | 104 | # bbox for verts 105 | x0 = int(np.min(verts[:, 0])) 106 | x1 = int(np.max(verts[:, 0])) 107 | y0 = int(np.min(verts[:, 1])) 108 | y1 = int(np.max(verts[:, 1])) 109 | width = x1 - x0 110 | height = y1 - y0 111 | 112 | bbox_size = max(height, width) 113 | if bbox_size <= self.render_size_small: 114 | # print("Using small size renderer") 115 | render_size = self.render_size_small 116 | renderer = self.renderer_small 117 | else: 118 | if bbox_size <= self.render_size_medium: 119 | # print("Using medium size renderer") 120 | render_size = self.render_size_medium 121 | renderer = self.renderer_medium 122 | else: 123 | # print("Using large size renderer") 124 | render_size = self.render_size_large 125 | renderer = self.renderer_large 126 | 127 | # padding the tight bbox 128 | margin = int(max(width, height) * 0.1) 129 | x0 = max(0, x0-margin) 130 | y0 = max(0, y0-margin) 131 | x1 = min(self.img_size, x1+margin) 132 | y1 = min(self.img_size, y1+margin) 133 | 134 | # move verts to be in the bbox 135 | verts[:, 0] -= x0 136 | verts[:, 1] -= y0 137 | 138 | # normalize verts to (-1, 1) 139 | bbox_size = max(y1-y0, x1-x0) 140 | half_size = bbox_size / 2 141 | verts[:, 0] = (verts[:, 0] - half_size) / half_size 142 | verts[:, 1] = (verts[:, 1] - half_size) / half_size 143 | 144 | # the coords of pytorch-3d is (1, 1) for upper-left and (-1, -1) for lower-right 145 | # so need to multiple minus for vertices 146 | verts[:, :2] *= -1 147 | 148 | # shift verts along the z-axis 149 | verts[:, 2] /= 112 150 | verts[:, 2] += 5 151 | 152 | verts_tensor = torch.from_numpy(verts).float().unsqueeze(0).cuda() 153 | faces_tensor = torch.from_numpy(faces.copy()).long().unsqueeze(0).cuda() 154 | 155 | # set color 156 | mesh_color = self.mesh_color.repeat(1, verts.shape[0], 1) 157 | textures = Textures(verts_rgb = mesh_color) 158 | 159 | # rendering 160 | mesh = Meshes(verts=verts_tensor, faces=faces_tensor, textures=textures) 161 | 162 | # blending rendered mesh with background image 163 | rend_img = renderer(mesh) 164 | rend_img = rend_img[0].cpu().numpy() 165 | 166 | 167 | scale_ratio = render_size / bbox_size 168 | img_size_new = int(self.img_size * scale_ratio) 169 | bg_img_new = cv2.resize(bg_img, (img_size_new, img_size_new)) 170 | 171 | x0 = max(int(x0 * scale_ratio), 0) 172 | y0 = max(int(y0 * scale_ratio), 0) 173 | x1 = min(int(x1 * scale_ratio), img_size_new) 174 | y1 = min(int(y1 * scale_ratio), img_size_new) 175 | 176 | h0 = min(y1-y0, render_size) 177 | w0 = min(x1-x0, render_size) 178 | 179 | y1 = y0 + h0 180 | x1 = x0 + w0 181 | 182 | rend_img_new = np.zeros((img_size_new, img_size_new, 4)) 183 | rend_img_new[y0:y1, x0:x1, :] = rend_img[:h0, :w0, :] 184 | rend_img = rend_img_new 185 | 186 | alpha = rend_img[:, :, 3:4] 187 | alpha[alpha>0] = 1.0 188 | 189 | 190 | rend_img = rend_img[:, :, :3] 191 | maxColor = rend_img.max() 192 | rend_img *= 255 /maxColor #Make sure <1.0 193 | rend_img = rend_img[:, :, ::-1] 194 | 195 | res_img = alpha * rend_img + (1.0 - alpha) * bg_img_new 196 | 197 | res_img = cv2.resize(res_img, (self.img_size, self.img_size)) 198 | 199 | return res_img -------------------------------------------------------------------------------- /demo/demo_handmocap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os, sys, shutil 4 | import os.path as osp 5 | import numpy as np 6 | import cv2 7 | import json 8 | import torch 9 | from torchvision.transforms import Normalize 10 | 11 | from demo.demo_options import DemoOptions 12 | import mocap_utils.general_utils as gnu 13 | import mocap_utils.demo_utils as demo_utils 14 | 15 | from handmocap.hand_mocap_api import HandMocap 16 | from handmocap.hand_bbox_detector import HandBboxDetector 17 | 18 | import renderer.image_utils as imu 19 | from renderer.viewer2D import ImShow 20 | import time 21 | 22 | 23 | def run_hand_mocap(args, bbox_detector, hand_mocap, visualizer): 24 | #Set up input data (images or webcam) 25 | input_type, input_data = demo_utils.setup_input(args) 26 | 27 | assert args.out_dir is not None, "Please specify output dir to store the results" 28 | cur_frame = args.start_frame 29 | video_frame = 0 30 | 31 | while True: 32 | # load data 33 | load_bbox = False 34 | 35 | if input_type =='image_dir': 36 | if cur_frame < len(input_data): 37 | image_path = input_data[cur_frame] 38 | img_original_bgr = cv2.imread(image_path) 39 | else: 40 | img_original_bgr = None 41 | 42 | elif input_type == 'bbox_dir': 43 | if cur_frame < len(input_data): 44 | print("Use pre-computed bounding boxes") 45 | image_path = input_data[cur_frame]['image_path'] 46 | hand_bbox_list = input_data[cur_frame]['hand_bbox_list'] 47 | body_bbox_list = input_data[cur_frame]['body_bbox_list'] 48 | img_original_bgr = cv2.imread(image_path) 49 | load_bbox = True 50 | else: 51 | img_original_bgr = None 52 | 53 | elif input_type == 'video': 54 | _, img_original_bgr = input_data.read() 55 | if video_frame < cur_frame: 56 | video_frame += 1 57 | continue 58 | # save the obtained video frames 59 | image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg") 60 | if img_original_bgr is not None: 61 | video_frame += 1 62 | if args.save_frame: 63 | gnu.make_subdir(image_path) 64 | cv2.imwrite(image_path, img_original_bgr) 65 | 66 | elif input_type == 'webcam': 67 | _, img_original_bgr = input_data.read() 68 | 69 | if video_frame < cur_frame: 70 | video_frame += 1 71 | continue 72 | # save the obtained video frames 73 | image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg") 74 | if img_original_bgr is not None: 75 | video_frame += 1 76 | if args.save_frame: 77 | gnu.make_subdir(image_path) 78 | cv2.imwrite(image_path, img_original_bgr) 79 | else: 80 | assert False, "Unknown input_type" 81 | 82 | cur_frame +=1 83 | if img_original_bgr is None or cur_frame > args.end_frame: 84 | break 85 | print("--------------------------------------") 86 | 87 | # bbox detection 88 | if load_bbox: 89 | body_pose_list = None 90 | raw_hand_bboxes = None 91 | elif args.crop_type == 'hand_crop': 92 | # hand already cropped, thererore, no need for detection 93 | img_h, img_w = img_original_bgr.shape[:2] 94 | body_pose_list = None 95 | raw_hand_bboxes = None 96 | hand_bbox_list = [ dict(right_hand = np.array([0, 0, img_w, img_h])) ] 97 | else: 98 | # Input images has other body part or hand not cropped. 99 | # Use hand detection model & body detector for hand detection 100 | assert args.crop_type == 'no_crop' 101 | detect_output = bbox_detector.detect_hand_bbox(img_original_bgr.copy()) 102 | body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = detect_output 103 | 104 | # save the obtained body & hand bbox to json file 105 | if args.save_bbox_output: 106 | demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list) 107 | 108 | if len(hand_bbox_list) < 1: 109 | print(f"No hand deteced: {image_path}") 110 | continue 111 | 112 | # Hand Pose Regression 113 | pred_output_list = hand_mocap.regress( 114 | img_original_bgr, hand_bbox_list, add_margin=True) 115 | assert len(hand_bbox_list) == len(body_bbox_list) 116 | assert len(body_bbox_list) == len(pred_output_list) 117 | 118 | # extract mesh for rendering (vertices in image space and faces) from pred_output_list 119 | pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list) 120 | 121 | # visualize 122 | res_img = visualizer.visualize( 123 | img_original_bgr, 124 | pred_mesh_list = pred_mesh_list, 125 | hand_bbox_list = hand_bbox_list) 126 | 127 | # show result in the screen 128 | if not args.no_display: 129 | res_img = res_img.astype(np.uint8) 130 | ImShow(res_img) 131 | 132 | # save the image (we can make an option here) 133 | if args.out_dir is not None: 134 | demo_utils.save_res_img(args.out_dir, image_path, res_img) 135 | 136 | # save predictions to pkl 137 | if args.save_pred_pkl: 138 | demo_type = 'hand' 139 | demo_utils.save_pred_to_pkl( 140 | args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list) 141 | 142 | print(f"Processed : {image_path}") 143 | 144 | #save images as a video 145 | if not args.no_video_out and input_type in ['video', 'webcam']: 146 | demo_utils.gen_video_out(args.out_dir, args.seq_name) 147 | 148 | # When everything done, release the capture 149 | if input_type =='webcam' and input_data is not None: 150 | input_data.release() 151 | cv2.destroyAllWindows() 152 | 153 | 154 | def main(): 155 | args = DemoOptions().parse() 156 | args.use_smplx = True 157 | 158 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 159 | assert torch.cuda.is_available(), "Current version only supports GPU" 160 | 161 | #Set Bbox detector 162 | bbox_detector = HandBboxDetector(args.view_type, device) 163 | 164 | # Set Mocap regressor 165 | hand_mocap = HandMocap(args.checkpoint_hand, args.smpl_dir, device = device) 166 | 167 | # Set Visualizer 168 | if args.renderer_type in ['pytorch3d', 'opendr']: 169 | from renderer.screen_free_visualizer import Visualizer 170 | else: 171 | from renderer.visualizer import Visualizer 172 | visualizer = Visualizer(args.renderer_type) 173 | 174 | # run 175 | run_hand_mocap(args, bbox_detector, hand_mocap, visualizer) 176 | 177 | 178 | if __name__ == '__main__': 179 | main() 180 | -------------------------------------------------------------------------------- /renderer/denseposeRenderer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import numpy as np 4 | from OpenGL.GLUT import * 5 | from OpenGL.GLU import * 6 | from renderer.shaders.framework import * 7 | 8 | from renderer.glRenderer import glRenderer 9 | 10 | _glut_window = None 11 | 12 | ''' 13 | #Usage: 14 | render.set_smpl_mesh(v) #v for vertex locations in(6890,3) 15 | render.setBackgroundTexture(rawImg) #Optional BG texture 16 | render.setWindowSize(rawImg.shape[1], rawImg.shape[0]) #Optional: window size 17 | render.show_once() 18 | ''' 19 | 20 | class denseposeRenderer(glRenderer): 21 | 22 | def __init__(self, width=1600, height=1200, name='GL Renderer', 23 | program_files=['renderer/shaders/simple140.fs', 'renderer/shaders/simple140.vs'], color_size=1, ms_rate=1): 24 | glRenderer.__init__(self, width, height, name, program_files, color_size, ms_rate) 25 | 26 | self.densepose_info = self.loadDensepose_info() 27 | 28 | #Densepose Specific 29 | self.dp_faces = self.densepose_info['All_Faces']-1 #0~7828 30 | self.dp_vertexIndices = self.densepose_info['All_vertices']-1 #(1,7829) #Vertex orders used in denpose info. There are repeated vetices 31 | 32 | #DP color information 33 | dp_color_seg = self.densepose_info['All_FaceIndices'] #(13774,1) 34 | dp_color_seg = np.repeat(dp_color_seg,3,axis=1) /100.0#24.0 #(13774,3) 35 | self.dp_color_seg = np.repeat( dp_color_seg.flatten()[:,None],3,axis=1) #(41332,3) 36 | 37 | dp_color_U = self.densepose_info['All_U_norm'] #(7289,1) 38 | dp_color_U = np.repeat(dp_color_U,3,axis=1) #(13774,3) 39 | self.dp_color_U = dp_color_U[self.dp_faces.reshape([-1])] #(41332,3) 40 | 41 | dp_color_V = self.densepose_info['All_V_norm'] #(7829,3) 42 | dp_color_V = np.repeat(dp_color_V,3,axis=1) #(13774,3) 43 | self.dp_color_V = dp_color_V[self.dp_faces.reshape([-1])] #(41332,3) 44 | 45 | #make sure you have: /yourpath/renderer/densepose_uv_data/UV_Processed.mat 46 | def loadDensepose_info(self, dp_data_path= 'extra_data/densepose_uv_data/UV_Processed.mat'): 47 | 48 | #Load densepose data 49 | import scipy.io as sio 50 | densepose_info = None 51 | densepose_info = sio.loadmat(dp_data_path) #All_FaceIndices (13774), All_Faces(13774), All_U(7829), All_U_norm(7829), All_V(7829), All_V_norm (7829), All_vertices (7829) 52 | assert densepose_info is not None 53 | # All_FaceIndices - part labels for each face 54 | # All_Faces - vertex indices for each face 55 | # All_vertices - SMPL vertex IDs for all vertices (note that one SMPL vertex can be shared across parts and thus appear in faces with different part labels) 56 | # All_U - U coordinates for all vertices 57 | # All_V - V coordinates for all vertices 58 | # All_U_norm - normalization factor for U coordinates to map them to [0, 1] interval 59 | # All_V_norm - normalization factor for V coordinates to map them to [0, 1] interval 60 | # vertexColor = densepose_info['All_U_norm']*255 61 | # vertexColor = np.zeros((v.shape[1], 3)) 62 | # vertexColor[:,0] = densepose_info['All_U_norm'][:v.shape[1]].flatten() #(6890,3) 63 | # vertexColor[:,1] = densepose_info['All_V_norm'][:v.shape[1]].flatten() #(6890,3) 64 | 65 | # # faces = smplWrapper.f 66 | # v =v[0] #(6890,3) 67 | # dp_vertex = v[densepose_info['All_vertices']-1] #(1,7829,3) #Considering repeatation 68 | # faces =densepose_info['All_Faces']-1 #0~7828 69 | # # vertexColor = densepose_info['All_FaceIndices'] #(13774,1) 70 | # # vertexColor = np.repeat(vertexColor,3,axis=1) /24.0 #(13774,3) 71 | 72 | # # vertexColor = densepose_info['All_U_norm'] #(13774,1) 73 | # vertexColor = densepose_info['All_V_norm'] #(13774,1) 74 | # vertexColor = np.repeat(vertexColor,3,axis=1) 75 | 76 | # # vertexColor[vertexColor!=2]*=0 77 | # vertexColor[vertexColor==2]=24 78 | return densepose_info 79 | 80 | 81 | #vertice: (6890,3) 82 | #colormode: ['seg', 'u', 'v'] 83 | def set_mesh(self, vertices, _): 84 | 85 | if vertices.dtype != np.dtype('float64'): 86 | vertices = vertices.astype(np.float64) #Should be DOUBLE 87 | 88 | #Change the vertex and 89 | dp_vertex = vertices[self.dp_vertexIndices][0] #(7829,3) #Considering repeatation 90 | 91 | # if colormode=='seg': #segment 92 | # self.color_data = self.dp_color_seg 93 | # elif colormode=='v': 94 | # self.color_data[:,1] = self.dp_color_V 95 | # elif colormode=='u': 96 | # self.color_data = self.dp_color_U #(41322,3) 97 | # else: 98 | # assert False 99 | 100 | self.color_data = self.dp_color_U #(41322,3) 101 | self.color_data[:,1] = self.dp_color_V[:,1] 102 | self.color_data[:,2] = self.dp_color_seg[:,2] 103 | 104 | self.vertex_data = dp_vertex[self.dp_faces.reshape([-1])] #(41322,3) 105 | self.vertex_dim = self.vertex_data.shape[1] 106 | self.n_vertices = self.vertex_data.shape[0] 107 | 108 | glBindBuffer(GL_ARRAY_BUFFER, self.vertex_buffer) 109 | glBufferData(GL_ARRAY_BUFFER, self.vertex_data, GL_STATIC_DRAW) 110 | 111 | glBindBuffer(GL_ARRAY_BUFFER, self.color_buffer) 112 | glBufferData(GL_ARRAY_BUFFER, self.color_data, GL_STATIC_DRAW) 113 | 114 | glBindBuffer(GL_ARRAY_BUFFER, 0) 115 | 116 | 117 | def add_mesh(self, vertices, _, color=None): 118 | """ 119 | Concatenate the new mesh data to self.vertex_data (as if a single giant mesh) 120 | 121 | Args: 122 | input_vertices (np.ndarray): (verNum, 3). 123 | input_faces (np.ndarray): (faceNum, 3). 124 | """ 125 | 126 | if vertices.dtype != np.dtype('float64'): 127 | vertices = vertices.astype(np.float64) #Should be DOUBLE 128 | 129 | dp_vertex = vertices[self.dp_vertexIndices][0] #(7829,3) #Considering repeatation 130 | 131 | color_data = self.dp_color_U #(41322,3) 132 | color_data[:,1] = self.dp_color_V[:,1] 133 | color_data[:,2] = self.dp_color_seg[:,2] 134 | 135 | if self.vertex_data is None: 136 | self.vertex_data = dp_vertex[self.dp_faces.reshape([-1])] #(41322,3) 137 | self.color_data = color_data 138 | 139 | else: #Add the data 140 | input_vertices = dp_vertex[self.dp_faces.reshape([-1])] #(41322,3) 141 | self.vertex_data = np.concatenate( (self.vertex_data, input_vertices), axis=0) #(6870,3) 142 | self.color_data = np.concatenate( (self.color_data, color_data), axis=0) #(6870,3) 143 | 144 | self.vertex_dim = self.vertex_data.shape[1] 145 | self.n_vertices = self.vertex_data.shape[0] 146 | 147 | glBindBuffer(GL_ARRAY_BUFFER, self.vertex_buffer) 148 | glBufferData(GL_ARRAY_BUFFER, self.vertex_data, GL_STATIC_DRAW) 149 | 150 | glBindBuffer(GL_ARRAY_BUFFER, self.color_buffer) 151 | glBufferData(GL_ARRAY_BUFFER, self.color_data, GL_STATIC_DRAW) 152 | 153 | 154 | glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, self.index_buffer) 155 | glBufferData(GL_ELEMENT_ARRAY_BUFFER, self.meshindex_data, GL_STATIC_DRAW) 156 | 157 | glBindBuffer(GL_ARRAY_BUFFER, 0) 158 | -------------------------------------------------------------------------------- /handmocap/hand_modules/resnet.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | import torch.nn as nn 7 | import math 8 | import torch.utils.model_zoo as model_zoo 9 | 10 | 11 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 12 | 'resnet152'] 13 | 14 | 15 | model_urls = { 16 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 17 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 18 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 19 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 20 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 21 | } 22 | 23 | 24 | def conv3x3(in_planes, out_planes, stride=1): 25 | """3x3 convolution with padding""" 26 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 27 | padding=1, bias=False) 28 | 29 | 30 | class BasicBlock(nn.Module): 31 | expansion = 1 32 | 33 | def __init__(self, inplanes, planes, stride=1, downsample=None): 34 | super(BasicBlock, self).__init__() 35 | self.conv1 = conv3x3(inplanes, planes, stride) 36 | self.bn1 = nn.BatchNorm2d(planes) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.conv2 = conv3x3(planes, planes) 39 | self.bn2 = nn.BatchNorm2d(planes) 40 | self.downsample = downsample 41 | self.stride = stride 42 | 43 | def forward(self, x): 44 | residual = x 45 | 46 | out = self.conv1(x) 47 | out = self.bn1(out) 48 | out = self.relu(out) 49 | 50 | out = self.conv2(out) 51 | out = self.bn2(out) 52 | 53 | if self.downsample is not None: 54 | residual = self.downsample(x) 55 | 56 | out += residual 57 | out = self.relu(out) 58 | 59 | return out 60 | 61 | 62 | class Bottleneck(nn.Module): 63 | expansion = 4 64 | 65 | def __init__(self, inplanes, planes, stride=1, downsample=None): 66 | super(Bottleneck, self).__init__() 67 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 68 | self.bn1 = nn.BatchNorm2d(planes) 69 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 70 | padding=1, bias=False) 71 | self.bn2 = nn.BatchNorm2d(planes) 72 | self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) 73 | self.bn3 = nn.BatchNorm2d(planes * self.expansion) 74 | self.relu = nn.ReLU(inplace=True) 75 | self.downsample = downsample 76 | self.stride = stride 77 | 78 | def forward(self, x): 79 | residual = x 80 | 81 | out = self.conv1(x) 82 | out = self.bn1(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv2(out) 86 | out = self.bn2(out) 87 | out = self.relu(out) 88 | 89 | out = self.conv3(out) 90 | out = self.bn3(out) 91 | 92 | if self.downsample is not None: 93 | residual = self.downsample(x) 94 | 95 | out += residual 96 | out = self.relu(out) 97 | 98 | return out 99 | 100 | 101 | class ResNet(nn.Module): 102 | 103 | def __init__(self, block, layers, num_classes=1000): 104 | self.inplanes = 64 105 | super(ResNet, self).__init__() 106 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 107 | bias=False) 108 | self.bn1 = nn.BatchNorm2d(64) 109 | self.relu = nn.ReLU(inplace=True) 110 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 111 | self.layer1 = self._make_layer(block, 64, layers[0]) 112 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 113 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 114 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 115 | self.avgpool = nn.AvgPool2d(7, stride=1) 116 | self.fc1 = nn.Linear(512 * block.expansion, 1024) 117 | 118 | for m in self.modules(): 119 | if isinstance(m, nn.Conv2d): 120 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 121 | elif isinstance(m, nn.BatchNorm2d): 122 | nn.init.constant_(m.weight, 1) 123 | nn.init.constant_(m.bias, 0) 124 | 125 | def _make_layer(self, block, planes, blocks, stride=1): 126 | downsample = None 127 | if stride != 1 or self.inplanes != planes * block.expansion: 128 | downsample = nn.Sequential( 129 | nn.Conv2d(self.inplanes, planes * block.expansion, 130 | kernel_size=1, stride=stride, bias=False), 131 | nn.BatchNorm2d(planes * block.expansion), 132 | ) 133 | 134 | layers = [] 135 | layers.append(block(self.inplanes, planes, stride, downsample)) 136 | self.inplanes = planes * block.expansion 137 | for i in range(1, blocks): 138 | layers.append(block(self.inplanes, planes)) 139 | 140 | return nn.Sequential(*layers) 141 | 142 | def forward(self, x): 143 | x = self.conv1(x) 144 | x = self.bn1(x) 145 | x = self.relu(x) 146 | x = self.maxpool(x) 147 | 148 | x = self.layer1(x) 149 | x = self.layer2(x) 150 | x = self.layer3(x) 151 | x = self.layer4(x) 152 | 153 | x = self.avgpool(x) 154 | x = x.view(x.size(0), -1) 155 | x = self.relu(x) 156 | 157 | x = self.fc1(x) 158 | x = self.relu(x) 159 | 160 | return x 161 | 162 | 163 | def resnet18(pretrained=False, **kwargs): 164 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 165 | if pretrained: 166 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet18']) 167 | model.load_state_dict(pretrained_state_dict, strict=False) 168 | return model 169 | 170 | 171 | def resnet34(pretrained=False, **kwargs): 172 | """Constructs a ResNet-34 model. 173 | 174 | Args: 175 | pretrained (bool): If True, returns a model pre-trained on ImageNet 176 | """ 177 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 178 | if pretrained: 179 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet34']) 180 | model.load_state_dict(pretrained_state_dict, strict=False) 181 | return model 182 | 183 | 184 | def resnet50(pretrained=False, **kwargs): 185 | """Constructs a ResNet-50 model. 186 | 187 | Args: 188 | pretrained (bool): If True, returns a model pre-trained on ImageNet 189 | """ 190 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 191 | if pretrained: 192 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet50']) 193 | model.load_state_dict(pretrained_state_dict, strict=False) 194 | return model 195 | 196 | 197 | def resnet101(pretrained=False, **kwargs): 198 | """Constructs a ResNet-101 model. 199 | 200 | Args: 201 | pretrained (bool): If True, returns a model pre-trained on ImageNet 202 | """ 203 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 204 | if pretrained: 205 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet101']) 206 | model.load_state_dict(pretrained_state_dict, strict=False) 207 | return model 208 | 209 | 210 | def resnet152(pretrained=False, **kwargs): 211 | """Constructs a ResNet-152 model. 212 | 213 | Args: 214 | pretrained (bool): If True, returns a model pre-trained on ImageNet 215 | """ 216 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 217 | if pretrained: 218 | pretrained_state_dict = model_zoo.load_url(model_urls['resnet152']) 219 | model.load_state_dict(pretrained_state_dict, strict=False) 220 | return model 221 | -------------------------------------------------------------------------------- /mocap_utils/coordconv.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | # Original code from SPIN: https://github.com/nkolot/SPIN 4 | 5 | """ 6 | This file contains functions that are used to perform data augmentation. 7 | """ 8 | import sys 9 | import torch 10 | import numpy as np 11 | import scipy.misc 12 | import cv2 13 | from torchvision.transforms import Normalize 14 | 15 | # For converting coordinate between SMPL 3D coord <-> 2D bbox <-> original 2D image 16 | # data3D: (N,3), where N is number of 3D points in "smpl"'s 3D coordinate (vertex or skeleton) 17 | 18 | def convert_smpl_to_bbox(data3D, scale, trans, bAppTransFirst=False): 19 | data3D = data3D.copy() 20 | resnet_input_size_half = 224 *0.5 21 | if bAppTransFirst: # Hand model 22 | data3D[:,0:2] += trans 23 | data3D *= scale # apply scaling 24 | else: 25 | data3D *= scale # apply scaling 26 | data3D[:,0:2] += trans 27 | 28 | data3D*= resnet_input_size_half # 112 is originated from hrm's input size (224,24) 29 | # data3D[:,:2]*= resnet_input_size_half # 112 is originated from hrm's input size (224,24) 30 | return data3D 31 | 32 | 33 | def convert_bbox_to_oriIm(data3D, boxScale_o2n, bboxTopLeft, imgSizeW, imgSizeH): 34 | data3D = data3D.copy() 35 | resnet_input_size_half = 224 *0.5 36 | imgSize = np.array([imgSizeW,imgSizeH]) 37 | 38 | data3D /= boxScale_o2n 39 | 40 | if not isinstance(bboxTopLeft, np.ndarray): 41 | assert isinstance(bboxTopLeft, tuple) 42 | assert len(bboxTopLeft) == 2 43 | bboxTopLeft = np.array(bboxTopLeft) 44 | 45 | data3D[:,:2] += (bboxTopLeft + resnet_input_size_half/boxScale_o2n) 46 | 47 | return data3D 48 | 49 | 50 | def convert_smpl_to_bbox_perspective(data3D, scale_ori, trans_ori, focalLeng, scaleFactor=1.0): 51 | data3D = data3D.copy() 52 | resnet_input_size_half = 224 *0.5 53 | 54 | scale = scale_ori* resnet_input_size_half 55 | trans = trans_ori *resnet_input_size_half 56 | 57 | if False: #Weak perspective 58 | data3D *= scale #apply scaling 59 | data3D[:,0:2] += trans 60 | else: 61 | # delta = (trans - imageShape*0.5)/scale 62 | # Current projection already consider camera center during the rendering. 63 | # Thus no need to consider principle axis 64 | delta = (trans )/scale 65 | data3D[:,0:2] +=delta 66 | 67 | newZ = focalLeng/scale 68 | deltaZ = newZ - np.mean(data3D[:,2]) 69 | data3D[:,2] +=deltaZ 70 | # data3D[:,2] +=16.471718554146534 #debug 71 | 72 | if False: #Scaling to be a certain dist from camera 73 | texture_plan_depth = 500 74 | ratio = texture_plan_depth /np.mean(data3D[:,2]) 75 | data3D *=ratio 76 | else: 77 | data3D *=scaleFactor 78 | 79 | return data3D 80 | 81 | 82 | """ Extract bbox information """ 83 | def bbox_from_openpose(openpose_file, rescale=1.2, detection_thresh=0.2): 84 | """Get center and scale for bounding box from openpose detections.""" 85 | with open(openpose_file, 'r') as f: 86 | data = json.load(f) 87 | if 'people' not in data or len(data['people'])==0: 88 | return None, None 89 | # keypoints = json.load(f)['people'][0]['pose_keypoints_2d'] 90 | keypoints = data['people'][0]['pose_keypoints_2d'] 91 | keypoints = np.reshape(np.array(keypoints), (-1,3)) 92 | valid = keypoints[:,-1] > detection_thresh 93 | 94 | valid_keypoints = keypoints[valid][:,:-1] #(25,2) 95 | 96 | # min_pt = np.min(valid_keypoints, axis=0) 97 | # max_pt = np.max(valid_keypoints, axis=0) 98 | # bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]] 99 | 100 | center = valid_keypoints.mean(axis=0) 101 | bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)).max() 102 | # adjust bounding box tightness 103 | scale = bbox_size / 200.0 104 | scale *= rescale 105 | return center, scale#, bbox 106 | 107 | 108 | # keypoints: (Nx3) 109 | def bbox_from_keypoint2d(keypoints, rescale=1.2, detection_thresh=0.2): 110 | """ 111 | output: 112 | center: bbox center 113 | scale: scale_n2o: 224x224 -> original bbox size (max length if not a square bbox) 114 | """ 115 | # """Get center and scale for bounding box from openpose detections.""" 116 | 117 | if len(keypoints.shape)==2 and keypoints.shape[1]==2: #(X,2) 118 | valid_keypoints = keypoints 119 | else: 120 | keypoints = np.reshape(np.array(keypoints), (-1,3)) 121 | valid = keypoints[:,-1] > detection_thresh 122 | 123 | valid_keypoints = keypoints[valid][:,:-1] #(25,2) 124 | 125 | # min_pt = np.min(valid_keypoints, axis=0) 126 | # max_pt = np.max(valid_keypoints, axis=0) 127 | # bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]] 128 | 129 | center = valid_keypoints.mean(axis=0) 130 | bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)).max() 131 | 132 | 133 | # adjust bounding box tightness 134 | scale = bbox_size / 200.0 135 | scale *= rescale 136 | return center, scale#, bbox 137 | 138 | 139 | def bbox_from_keypoints(keypoints, rescale=1.2, detection_thresh=0.2, imageHeight= None): 140 | """Get center and scale for bounding box from openpose detections.""" 141 | 142 | keypoints = np.reshape(np.array(keypoints), (-1,3)) 143 | valid = keypoints[:,-1] > detection_thresh 144 | 145 | valid_keypoints = keypoints[valid][:,:-1] #(25,2) 146 | 147 | if len(valid_keypoints)<2: 148 | return None, None, None 149 | 150 | 151 | if False: #Should have all limbs and nose 152 | if np.sum(valid[ [ 2,3,4, 5,6,7, 9,10, 12,13,1,0] ]) <12: 153 | return None, None, None 154 | 155 | min_pt = np.min(valid_keypoints, axis=0) 156 | max_pt = np.max(valid_keypoints, axis=0) 157 | 158 | bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]] 159 | 160 | if imageHeight is not None: 161 | 162 | if valid[10]==False and valid[13] == False: # No knees ub ioeb 163 | max_pt[1] = min(max_pt[1] + (max_pt[1]- min_pt[1]), imageHeight ) 164 | bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]] 165 | valid_keypoints = np.vstack( (valid_keypoints, np.array(max_pt)) ) 166 | 167 | 168 | elif valid[11]==False and valid[14] == False: #No foot 169 | max_pt[1] = min(max_pt[1] + (max_pt[1]- min_pt[1])*0.2, imageHeight ) 170 | bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]] 171 | 172 | valid_keypoints = np.vstack( (valid_keypoints, np.array(max_pt)) ) 173 | 174 | 175 | center = valid_keypoints.mean(axis=0) 176 | bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)).max() 177 | # adjust bounding box tightness 178 | scale = bbox_size / 200.0 179 | scale *= rescale 180 | return center, scale, bbox 181 | 182 | 183 | def bbox_from_bbr(bbox_XYWH, rescale=1.2, detection_thresh=0.2, imageHeight= None): 184 | #bbr: (minX, minY, width, height) 185 | """Get center and scale for bounding box from openpose detections.""" 186 | 187 | center = bbox_XYWH[:2] + 0.5 * bbox_XYWH[2:] 188 | bbox_size = max(bbox_XYWH[2:]) 189 | # adjust bounding box tightness 190 | scale = bbox_size / 200.0 191 | scale *= rescale 192 | return center, scale#, bbox_XYWH 193 | 194 | 195 | def bbox_from_json(bbox_file): 196 | """Get center and scale of bounding box from bounding box annotations. 197 | The expected format is [top_left(x), top_left(y), width, height]. 198 | """ 199 | with open(bbox_file, 'r') as f: 200 | bbox = np.array(json.load(f)['bbox']).astype(np.float32) 201 | ul_corner = bbox[:2] 202 | center = ul_corner + 0.5 * bbox[2:] 203 | width = max(bbox[2], bbox[3]) 204 | scale = width / 200.0 205 | # make sure the bounding box is rectangular 206 | return center, scale -------------------------------------------------------------------------------- /bodymocap/utils/geometry.py: -------------------------------------------------------------------------------- 1 | 2 | # Original code from SPIN: https://github.com/nkolot/SPIN 3 | 4 | import torch 5 | from torch.nn import functional as F 6 | import numpy as np 7 | 8 | import torchgeometry 9 | 10 | """ 11 | Useful geometric operations, e.g. Perspective projection and a differentiable Rodrigues formula 12 | Parts of the code are taken from https://github.com/MandyMo/pytorch_HMR 13 | """ 14 | def batch_rodrigues(theta): 15 | """Convert axis-angle representation to rotation matrix. 16 | Args: 17 | theta: size = [B, 3] 18 | Returns: 19 | Rotation matrix corresponding to the quaternion -- size = [B, 3, 3] 20 | """ 21 | l1norm = torch.norm(theta + 1e-8, p = 2, dim = 1) 22 | angle = torch.unsqueeze(l1norm, -1) 23 | normalized = torch.div(theta, angle) 24 | angle = angle * 0.5 25 | v_cos = torch.cos(angle) 26 | v_sin = torch.sin(angle) 27 | quat = torch.cat([v_cos, v_sin * normalized], dim = 1) 28 | return quat_to_rotmat(quat) 29 | 30 | def quat_to_rotmat(quat): 31 | """Convert quaternion coefficients to rotation matrix. 32 | Args: 33 | quat: size = [B, 4] 4 <===>(w, x, y, z) 34 | Returns: 35 | Rotation matrix corresponding to the quaternion -- size = [B, 3, 3] 36 | """ 37 | norm_quat = quat 38 | norm_quat = norm_quat/norm_quat.norm(p=2, dim=1, keepdim=True) 39 | w, x, y, z = norm_quat[:,0], norm_quat[:,1], norm_quat[:,2], norm_quat[:,3] 40 | 41 | B = quat.size(0) 42 | 43 | w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2) 44 | wx, wy, wz = w*x, w*y, w*z 45 | xy, xz, yz = x*y, x*z, y*z 46 | 47 | rotMat = torch.stack([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz, 48 | 2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx, 49 | 2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3) 50 | return rotMat 51 | 52 | 53 | 54 | 55 | 56 | 57 | def cross_product(u, v): 58 | batch = u.shape[0] 59 | i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1] 60 | j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2] 61 | k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0] 62 | 63 | out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1) 64 | 65 | return out 66 | 67 | def normalize_vector(v): 68 | batch = v.shape[0] 69 | v_mag = torch.sqrt(v.pow(2).sum(1)) # batch 70 | v_mag = torch.max(v_mag, v.new([1e-8])) 71 | v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1]) 72 | v = v/v_mag 73 | return v 74 | 75 | #Code from 76 | def rot6d_to_rotmat(x): 77 | """Convert 6D rotation representation to 3x3 rotation matrix. 78 | Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019 79 | Input: 80 | (B,6) Batch of 6-D rotation representations 81 | Output: 82 | (B,3,3) Batch of corresponding rotation matrices 83 | """ 84 | x = x.view(-1,3,2) 85 | a1 = x[:, :, 0] 86 | a2 = x[:, :, 1] 87 | b1 = F.normalize(a1) 88 | b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1) 89 | b3 = torch.cross(b1, b2) 90 | return torch.stack((b1, b2, b3), dim=-1) 91 | 92 | 93 | def perspective_projection(points, rotation, translation, 94 | focal_length, camera_center): 95 | """ 96 | This function computes the perspective projection of a set of points. 97 | Input: 98 | points (bs, N, 3): 3D points 99 | rotation (bs, 3, 3): Camera rotation 100 | translation (bs, 3): Camera translation 101 | focal_length (bs,) or scalar: Focal length 102 | camera_center (bs, 2): Camera center 103 | """ 104 | batch_size = points.shape[0] 105 | K = torch.zeros([batch_size, 3, 3], device=points.device) 106 | K[:,0,0] = focal_length 107 | K[:,1,1] = focal_length 108 | K[:,2,2] = 1. 109 | K[:,:-1, -1] = camera_center 110 | 111 | # Transform points 112 | points = torch.einsum('bij,bkj->bki', rotation, points) 113 | points = points + translation.unsqueeze(1) 114 | 115 | # Apply perspective distortion 116 | projected_points = points / points[:,:,-1].unsqueeze(-1) 117 | 118 | # Apply camera intrinsics 119 | projected_points = torch.einsum('bij,bkj->bki', K, projected_points) 120 | 121 | return projected_points[:, :, :-1] 122 | 123 | 124 | def estimate_translation_np(S, joints_2d, joints_conf, focal_length=5000, img_size=224): 125 | """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d. 126 | Input: 127 | S: (25, 3) 3D joint locations 128 | joints: (25, 3) 2D joint locations and confidence 129 | Returns: 130 | (3,) camera translation vector 131 | """ 132 | 133 | num_joints = S.shape[0] 134 | # focal length 135 | f = np.array([focal_length,focal_length]) 136 | # optical center 137 | center = np.array([img_size/2., img_size/2.]) 138 | 139 | # transformations 140 | Z = np.reshape(np.tile(S[:,2],(2,1)).T,-1) 141 | XY = np.reshape(S[:,0:2],-1) 142 | O = np.tile(center,num_joints) 143 | F = np.tile(f,num_joints) 144 | weight2 = np.reshape(np.tile(np.sqrt(joints_conf),(2,1)).T,-1) 145 | 146 | # least squares 147 | Q = np.array([F*np.tile(np.array([1,0]),num_joints), F*np.tile(np.array([0,1]),num_joints), O-np.reshape(joints_2d,-1)]).T 148 | c = (np.reshape(joints_2d,-1)-O)*Z - F*XY 149 | 150 | # weighted least squares 151 | W = np.diagflat(weight2) 152 | Q = np.dot(W,Q) 153 | c = np.dot(W,c) 154 | 155 | # square matrix 156 | A = np.dot(Q.T,Q) 157 | b = np.dot(Q.T,c) 158 | 159 | # solution 160 | trans = np.linalg.solve(A, b) 161 | 162 | return trans 163 | 164 | 165 | def estimate_translation(S, joints_2d, focal_length=5000., img_size=224.): 166 | """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d. 167 | Input: 168 | S: (B, 49, 3) 3D joint locations 169 | joints: (B, 49, 3) 2D joint locations and confidence 170 | Returns: 171 | (B, 3) camera translation vectors 172 | """ 173 | 174 | device = S.device 175 | # Use only joints 25:49 (GT joints) 176 | S = S[:, 25:, :].cpu().numpy() 177 | joints_2d = joints_2d[:, 25:, :].cpu().numpy() 178 | joints_conf = joints_2d[:, :, -1] 179 | joints_2d = joints_2d[:, :, :-1] 180 | trans = np.zeros((S.shape[0], 3), dtype=np.float32) 181 | # Find the translation for each example in the batch 182 | for i in range(S.shape[0]): 183 | S_i = S[i] 184 | joints_i = joints_2d[i] 185 | conf_i = joints_conf[i] 186 | trans[i] = estimate_translation_np(S_i, joints_i, conf_i, focal_length=focal_length, img_size=img_size) 187 | return torch.from_numpy(trans).to(device) 188 | 189 | 190 | 191 | 192 | def weakProjection_gpu(skel3D, scale, trans2D ): 193 | # if len(skel3D.shape)==1: 194 | # skel3D = np.reshape(skel3D, (-1,3)) 195 | 196 | skel3D = skel3D.view((skel3D.shape[0],-1,3)) 197 | trans2D = trans2D.view((trans2D.shape[0],1,2)) 198 | scale = scale.view((scale.shape[0],1,1)) 199 | skel3D_proj = scale* skel3D[:,:,:2] + trans2D 200 | 201 | return skel3D_proj#skel3D_proj.view((skel3D.shape[0],-1)) #(N, 19*2) o 202 | 203 | 204 | 205 | #(57) (1) (2) 206 | def weakProjection(skel3D, scale, trans2D ): 207 | 208 | skel3D_proj = scale* skel3D[:,:2] + trans2D 209 | 210 | return skel3D_proj#skel3D_proj.view((skel3D.shape[0],-1)) #(N, 19*2) o 211 | 212 | 213 | 214 | def rotmat_to_angleaxis(init_pred_rotmat): 215 | """ 216 | init_pred_rotmat: torch.tensor with (24,3,3) dimension 217 | """ 218 | device = init_pred_rotmat.device 219 | ones = torch.tensor([0,0,1], dtype=torch.float32,).view(1, 3, 1).expand(init_pred_rotmat.shape[1], -1, -1).to(device) 220 | 221 | pred_rotmat_hom = torch.cat([ init_pred_rotmat.view(-1, 3, 3),ones ], dim=-1) #24,3,4 222 | pred_aa = torchgeometry.rotation_matrix_to_angle_axis(pred_rotmat_hom).contiguous().view(1, -1) #[1,72] 223 | # tgm.rotation_matrix_to_angle_axis returns NaN for 0 rotation, so manually hack it 224 | pred_aa[torch.isnan(pred_aa)] = 0.0 #[1,72] 225 | pred_aa = pred_aa.view(1,24,3) 226 | 227 | return pred_aa 228 | 229 | -------------------------------------------------------------------------------- /docs/run_handmocap.md: -------------------------------------------------------------------------------- 1 | # Hand Motion Capture Demo 2 | 3 | Our hande module provides 3D hand motion capture output. We use the [HMR](https://akanazawa.github.io/hmr/) model, trained with several public hand pose datasets, the SOTA peformance among single-image based methods. See our [FrankMocap paper](https://penincillin.github.io/frank_mocap) for details. 4 | 5 |

6 | 7 |

8 | 9 | 10 | ## A Quick Start 11 | - Run the following. The mocap output will be shown on your screen 12 | ``` 13 | # Using a machine with a monitor to show output on screen 14 | # OpenGL renderer is used by default (--renderer_type opengl) 15 | # The output images are also saved in ./mocap_output 16 | python -m demo.demo_handmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output 17 | 18 | # Screenless mode (e.g., a remote server) 19 | xvfb-run -a python -m demo.demo_handmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output 20 | 21 | # Set other render_type to use other renderers 22 | python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.single_totalbody.mp4 --out_dir ./mocap_output --renderer_type pytorch3d 23 | ``` 24 | 25 | ## Run Demo with A Webcam Input 26 | - Run, 27 | ``` 28 | python -m demo.demo_handmocap --input_path webcam 29 | 30 | #or using opengl gui renderer 31 | python -m demo.demo_handmocap --input_path webcam --renderer_type opengl_gui 32 | ``` 33 | - See below to see how to control in opengl gui mode 34 | 35 | ## Run Demo for Egocentric Videos 36 | - For 3D hand pose estimation in egocentric views, use --view_type ego_centric 37 | ``` 38 | # with Screen 39 | python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --view_type ego_centric 40 | 41 | # Screenless mode (e.g., a remote server) 42 | xvfb-run -a python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --view_type ego_centric 43 | ``` 44 | - We use a different hand detector adjusted for egocentric views, but the 3D hand pose regressor is the same. By default, hand module assumes ```third_view``` 45 |

46 | 47 |

48 | 49 | ## Other Renderer Options 50 | - While opengl would be faster, it requires a screen connected to your machine. You may try screenless rendering or other rendering options described below. 51 | - Screenless Opengl Rendering 52 | - If you do not have a screen attached in your machine (e.g., remote servers), use [xvfb-run](http://manpages.ubuntu.com/manpages/trusty/man1/xvfb-run.1.html) tool 53 | ``` 54 | # The output images are also saved in ./mocap_output 55 | xvfb-run -a python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --renderer_type opengl 56 | ``` 57 | - [Pytorch3D](https://pytorch3d.org/) 58 | - We use pytorch3d only for rendering purpose. 59 | - Run the following command to use pytorch3d renderer 60 | ``` 61 | python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --renderer_type pytorch3d 62 | ``` 63 | - [OpenDR](https://github.com/mattloper/opendr/wiki) 64 | - Alternatively, run the following command to use opendr renderer 65 | ``` 66 | python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --renderer_type opendr 67 | ``` 68 | 69 | ## Keys for OpenGL GUI Mode 70 | - In OpenGL GUI visualization mode, you can use mouse and keyboard to change view point. 71 | - This mode requires a screen connected to your machine 72 | - Keys in OpenGL 3D window 73 | - mouse-Left: view rotation 74 | - mouse-Right: view zoom chnages 75 | - shift+ mouseLeft: view pan 76 | - C: toggle for image view/3D free view 77 | - w: toggle wireframe/solid mesh 78 | - j: toggle skeleton visualization 79 | - R: automatically rotate views 80 | - f: toggle floordrawing 81 | - q: exit program 82 | 83 | 84 | ## Run Demo with Precomputed Bboxes 85 | - You can use the precomputed bboxes without running any detectors. Save bboxes for each image as a json format. Each json should contain the input image path. 86 | - Assuming your bboxes are `/your/bbox_dir/XXX.json` 87 | ``` 88 | python -m demo.demo_handmocap --input_path /your/bbox_dir --out_dir ./mocap_output 89 | ``` 90 | - Bbox format (json) 91 | ``` 92 | {"image_path": "xxx.jpg", "hand_bbox_list":[{"left_hand":[x,y,w,h], "right_hand":[x,y,w,h]}], "body_bbox_list":[[x,y,w,h]]} 93 | ``` 94 | - Note that bbox format is [minX,minY,width,height] 95 | - For example 96 | ``` 97 | {"image_path": "./sample_data/images/cj_dance_01_03_1_00075.png", "body_bbox_list": [[149, 380, 242, 565]], "hand_bbox_list": [{"left_hand": [288.9151611328125, 376.70184326171875, 39.796295166015625, 51.72357177734375], "right_hand": [234.97779846191406, 363.4115295410156, 50.28489685058594, 57.89691162109375]}]} 98 | ``` 99 | ## Options 100 | ### Input options 101 | - `--input_path webcam`: Run demo for a video file (without using `--vPath` option) 102 | - `--input_path /your/path/video.mp4`: Run demo for a video file (mp4, avi, mov) 103 | - `--input_path /your/dirPath`: Run demo for a folder that contains image seqeunces 104 | - `--input_path /your/bboxDirPath`: Run demo for a folder that contains bbox json files. See [bbox format](https://github.com/facebookresearch/eft/blob/master/docs/README_dataformat.md#bbox-format-json) 105 | 106 | - `--view_type`: The view type of input. It could be ```third_view``` or```ego_centric``` 107 | 108 | 109 | ### Output options 110 | - `--out_dir ./outputdirname`: Save the output images into files 111 | - `--save_pred_pkl`: Save the pose reconstruction data (SMPL parameters and vertices) into pkl files (requires `--out_dir ./outputdirname`) 112 | - `--save_bbox_output`: Save the bbox data in json files (bbox_xywh format) (requires `--out_dir ./outputdirname`) 113 | - `--no_display`: Do not visualize output on the screen 114 | 115 | ### Other options 116 | - `--use_smplx`: Use SMPLX model for body pose estimation (instead of SMPL). This uses a different pre-trainined weights and may have different performance. 117 | - `--start_frame 100 --end_frame 200`: Specify start and end frames (e.g., 100th frame and 200th frame in this example) 118 | - `--single_person`: To enforce single person mocap (to avoid outlier bboxes). This mode chooses the biggest bbox. 119 | 120 | ## License 121 | - [CC-BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode). 122 | See the [LICENSE](LICENSE) file. 123 | 124 | 176 | -------------------------------------------------------------------------------- /bodymocap/body_mocap_api.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import cv2 4 | import sys 5 | import torch 6 | import numpy as np 7 | import pickle 8 | from torchvision.transforms import Normalize 9 | 10 | from bodymocap.models import hmr, SMPL, SMPLX 11 | from bodymocap import constants 12 | from bodymocap.utils.imutils import crop, crop_bboxInfo, process_image_bbox, process_image_keypoints, bbox_from_keypoints 13 | from mocap_utils.coordconv import convert_smpl_to_bbox, convert_bbox_to_oriIm 14 | import mocap_utils.geometry_utils as gu 15 | 16 | 17 | class BodyMocap(object): 18 | def __init__(self, regressor_checkpoint, smpl_dir, device=torch.device('cuda'), use_smplx=False): 19 | 20 | self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 21 | 22 | # Load parametric model (SMPLX or SMPL) 23 | if use_smplx: 24 | smplModelPath = smpl_dir + '/SMPLX_NEUTRAL.pkl' 25 | self.smpl = SMPLX(smpl_dir, 26 | batch_size=1, 27 | num_betas = 10, 28 | use_pca = False, 29 | create_transl=False).to(self.device) 30 | self.use_smplx = True 31 | else: 32 | smplModelPath = smpl_dir + '/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl' 33 | self.smpl = SMPL(smplModelPath, batch_size=1, create_transl=False).to(self.device) 34 | self.use_smplx = False 35 | 36 | #Load pre-trained neural network 37 | SMPL_MEAN_PARAMS = './extra_data/body_module/data_from_spin/smpl_mean_params.npz' 38 | self.model_regressor = hmr(SMPL_MEAN_PARAMS).to(self.device) 39 | checkpoint = torch.load(regressor_checkpoint) 40 | self.model_regressor.load_state_dict(checkpoint['model'], strict=False) 41 | self.model_regressor.eval() 42 | 43 | 44 | def regress(self, img_original, body_bbox_list): 45 | """ 46 | args: 47 | img_original: original raw image (BGR order by using cv2.imread) 48 | body_bbox: bounding box around the target: (minX, minY, width, height) 49 | outputs: 50 | pred_vertices_img: 51 | pred_joints_vis_img: 52 | pred_rotmat 53 | pred_betas 54 | pred_camera 55 | bbox: [bbr[0], bbr[1],bbr[0]+bbr[2], bbr[1]+bbr[3]]) 56 | bboxTopLeft: bbox top left (redundant) 57 | boxScale_o2n: bbox scaling factor (redundant) 58 | """ 59 | pred_output_list = list() 60 | 61 | for body_bbox in body_bbox_list: 62 | img, norm_img, boxScale_o2n, bboxTopLeft, bbox = process_image_bbox( 63 | img_original, body_bbox, input_res=constants.IMG_RES) 64 | bboxTopLeft = np.array(bboxTopLeft) 65 | 66 | # bboxTopLeft = bbox['bboxXYWH'][:2] 67 | if img is None: 68 | pred_output_list.append(None) 69 | continue 70 | 71 | with torch.no_grad(): 72 | # model forward 73 | pred_rotmat, pred_betas, pred_camera = self.model_regressor(norm_img.to(self.device)) 74 | 75 | #Convert rot_mat to aa since hands are always in aa 76 | # pred_aa = rotmat3x3_to_angle_axis(pred_rotmat) 77 | pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cuda() 78 | pred_aa = pred_aa.reshape(pred_aa.shape[0], 72) 79 | smpl_output = self.smpl( 80 | betas=pred_betas, 81 | body_pose=pred_aa[:,3:], 82 | global_orient=pred_aa[:,:3], 83 | pose2rot=True) 84 | pred_vertices = smpl_output.vertices 85 | pred_joints_3d = smpl_output.joints 86 | 87 | pred_vertices = pred_vertices[0].cpu().numpy() 88 | 89 | pred_camera = pred_camera.cpu().numpy().ravel() 90 | camScale = pred_camera[0] # *1.15 91 | camTrans = pred_camera[1:] 92 | 93 | pred_output = dict() 94 | # Convert mesh to original image space (X,Y are aligned to image) 95 | # 1. SMPL -> 2D bbox 96 | # 2. 2D bbox -> original 2D image 97 | pred_vertices_bbox = convert_smpl_to_bbox(pred_vertices, camScale, camTrans) 98 | pred_vertices_img = convert_bbox_to_oriIm( 99 | pred_vertices_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) 100 | 101 | # Convert joint to original image space (X,Y are aligned to image) 102 | pred_joints_3d = pred_joints_3d[0].cpu().numpy() # (1,49,3) 103 | pred_joints_vis = pred_joints_3d[:,:3] # (49,3) 104 | pred_joints_vis_bbox = convert_smpl_to_bbox(pred_joints_vis, camScale, camTrans) 105 | pred_joints_vis_img = convert_bbox_to_oriIm( 106 | pred_joints_vis_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) 107 | 108 | # Output 109 | pred_output['img_cropped'] = img[:, :, ::-1] 110 | pred_output['pred_vertices_smpl'] = smpl_output.vertices[0].cpu().numpy() # SMPL vertex in original smpl space 111 | pred_output['pred_vertices_img'] = pred_vertices_img # SMPL vertex in image space 112 | pred_output['pred_joints_img'] = pred_joints_vis_img # SMPL joints in image space 113 | 114 | pred_aa_tensor = gu.rotation_matrix_to_angle_axis(pred_rotmat.detach().cpu()[0]) 115 | pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72) # (1, 72) 116 | 117 | pred_output['pred_rotmat'] = pred_rotmat.detach().cpu().numpy() # (1, 24, 3, 3) 118 | pred_output['pred_betas'] = pred_betas.detach().cpu().numpy() # (1, 10) 119 | 120 | pred_output['pred_camera'] = pred_camera 121 | pred_output['bbox_top_left'] = bboxTopLeft 122 | pred_output['bbox_scale_ratio'] = boxScale_o2n 123 | pred_output['faces'] = self.smpl.faces 124 | 125 | if self.use_smplx: 126 | img_center = np.array((img_original.shape[1], img_original.shape[0]) ) * 0.5 127 | # right hand 128 | pred_joints = smpl_output.right_hand_joints[0].cpu().numpy() 129 | pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans) 130 | pred_joints_img = convert_bbox_to_oriIm( 131 | pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) 132 | pred_output['right_hand_joints_img_coord'] = pred_joints_img 133 | # left hand 134 | pred_joints = smpl_output.left_hand_joints[0].cpu().numpy() 135 | pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans) 136 | pred_joints_img = convert_bbox_to_oriIm( 137 | pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) 138 | pred_output['left_hand_joints_img_coord'] = pred_joints_img 139 | 140 | pred_output_list.append(pred_output) 141 | 142 | return pred_output_list 143 | 144 | 145 | def get_hand_bboxes(self, pred_body_list, img_shape): 146 | """ 147 | args: 148 | pred_body_list: output of body regresion 149 | img_shape: img_height, img_width 150 | outputs: 151 | hand_bbox_list: 152 | """ 153 | hand_bbox_list = list() 154 | for pred_body in pred_body_list: 155 | hand_bbox = dict( 156 | left_hand = None, 157 | right_hand = None 158 | ) 159 | if pred_body is None: 160 | hand_bbox_list.append(hand_bbox) 161 | else: 162 | for hand_type in hand_bbox: 163 | key = f'{hand_type}_joints_img_coord' 164 | pred_joints_vis_img = pred_body[key] 165 | 166 | if pred_joints_vis_img is not None: 167 | # get initial bbox 168 | x0, x1 = np.min(pred_joints_vis_img[:, 0]), np.max(pred_joints_vis_img[:, 0]) 169 | y0, y1 = np.min(pred_joints_vis_img[:, 1]), np.max(pred_joints_vis_img[:, 1]) 170 | width, height = x1-x0, y1-y0 171 | # extend the obtained bbox 172 | margin = int(max(height, width) * 0.2) 173 | img_height, img_width = img_shape 174 | x0 = max(x0 - margin, 0) 175 | y0 = max(y0 - margin, 0) 176 | x1 = min(x1 + margin, img_width) 177 | y1 = min(y1 + margin, img_height) 178 | # result bbox in (x0, y0, w, h) format 179 | hand_bbox[hand_type] = np.array([x0, y0, x1-x0, y1-y0]) # in (x, y, w, h ) format 180 | 181 | hand_bbox_list.append(hand_bbox) 182 | 183 | return hand_bbox_list 184 | -------------------------------------------------------------------------------- /demo/demo_frankmocap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os 4 | import sys 5 | import os.path as osp 6 | import torch 7 | from torchvision.transforms import Normalize 8 | import numpy as np 9 | import cv2 10 | import argparse 11 | import json 12 | import pickle 13 | 14 | ############# input parameters ############# 15 | from demo.demo_options import DemoOptions 16 | from bodymocap.body_mocap_api import BodyMocap 17 | from handmocap.hand_mocap_api import HandMocap 18 | import mocap_utils.demo_utils as demo_utils 19 | import mocap_utils.general_utils as gnu 20 | from mocap_utils.timer import Timer 21 | from datetime import datetime 22 | 23 | from bodymocap.body_bbox_detector import BodyPoseEstimator 24 | from handmocap.hand_bbox_detector import HandBboxDetector 25 | from integration.copy_and_paste import integration_copy_paste 26 | 27 | import renderer.image_utils as imu 28 | from renderer.viewer2D import ImShow 29 | 30 | 31 | def __filter_bbox_list(body_bbox_list, hand_bbox_list, single_person): 32 | # (to make the order as consistent as possible without tracking) 33 | bbox_size = [ (x[2] * x[3]) for x in body_bbox_list] 34 | idx_big2small = np.argsort(bbox_size)[::-1] 35 | body_bbox_list = [ body_bbox_list[i] for i in idx_big2small ] 36 | hand_bbox_list = [hand_bbox_list[i] for i in idx_big2small] 37 | 38 | if single_person and len(body_bbox_list)>0: 39 | body_bbox_list = [body_bbox_list[0], ] 40 | hand_bbox_list = [hand_bbox_list[0], ] 41 | 42 | return body_bbox_list, hand_bbox_list 43 | 44 | 45 | def run_regress( 46 | args, img_original_bgr, 47 | body_bbox_list, hand_bbox_list, bbox_detector, 48 | body_mocap, hand_mocap 49 | ): 50 | cond1 = len(body_bbox_list) > 0 and len(hand_bbox_list) > 0 51 | cond2 = not args.frankmocap_fast_mode 52 | 53 | # use pre-computed bbox or use slow detection mode 54 | if cond1 or cond2: 55 | if not cond1 and cond2: 56 | # run detection only when bbox is not available 57 | body_pose_list, body_bbox_list, hand_bbox_list, _ = \ 58 | bbox_detector.detect_hand_bbox(img_original_bgr.copy()) 59 | else: 60 | print("Use pre-computed bounding boxes") 61 | assert len(body_bbox_list) == len(hand_bbox_list) 62 | 63 | if len(body_bbox_list) < 1: 64 | return list(), list(), list() 65 | 66 | # sort the bbox using bbox size 67 | # only keep on bbox if args.single_person is set 68 | body_bbox_list, hand_bbox_list = __filter_bbox_list( 69 | body_bbox_list, hand_bbox_list, args.single_person) 70 | 71 | # hand & body pose regression 72 | pred_hand_list = hand_mocap.regress( 73 | img_original_bgr, hand_bbox_list, add_margin=True) 74 | pred_body_list = body_mocap.regress(img_original_bgr, body_bbox_list) 75 | assert len(hand_bbox_list) == len(pred_hand_list) 76 | assert len(pred_hand_list) == len(pred_body_list) 77 | 78 | else: 79 | _, body_bbox_list = bbox_detector.detect_body_bbox(img_original_bgr.copy()) 80 | 81 | if len(body_bbox_list) < 1: 82 | return list(), list(), list() 83 | 84 | # sort the bbox using bbox size 85 | # only keep on bbox if args.single_person is set 86 | hand_bbox_list = [None, ] * len(body_bbox_list) 87 | body_bbox_list, _ = __filter_bbox_list( 88 | body_bbox_list, hand_bbox_list, args.single_person) 89 | 90 | # body regression first 91 | pred_body_list = body_mocap.regress(img_original_bgr, body_bbox_list) 92 | assert len(body_bbox_list) == len(pred_body_list) 93 | 94 | # get hand bbox from body 95 | hand_bbox_list = body_mocap.get_hand_bboxes(pred_body_list, img_original_bgr.shape[:2]) 96 | assert len(pred_body_list) == len(hand_bbox_list) 97 | 98 | # hand regression 99 | pred_hand_list = hand_mocap.regress( 100 | img_original_bgr, hand_bbox_list, add_margin=True) 101 | assert len(hand_bbox_list) == len(pred_hand_list) 102 | 103 | # integration by copy-and-paste 104 | integral_output_list = integration_copy_paste( 105 | pred_body_list, pred_hand_list, body_mocap.smpl, img_original_bgr.shape) 106 | 107 | return body_bbox_list, hand_bbox_list, integral_output_list 108 | 109 | 110 | def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer): 111 | #Setup input data to handle different types of inputs 112 | input_type, input_data = demo_utils.setup_input(args) 113 | 114 | cur_frame = args.start_frame 115 | video_frame = 0 116 | while True: 117 | # load data 118 | load_bbox = False 119 | 120 | if input_type =='image_dir': 121 | if cur_frame < len(input_data): 122 | image_path = input_data[cur_frame] 123 | img_original_bgr = cv2.imread(image_path) 124 | else: 125 | img_original_bgr = None 126 | 127 | elif input_type == 'bbox_dir': 128 | if cur_frame < len(input_data): 129 | image_path = input_data[cur_frame]['image_path'] 130 | hand_bbox_list = input_data[cur_frame]['hand_bbox_list'] 131 | body_bbox_list = input_data[cur_frame]['body_bbox_list'] 132 | img_original_bgr = cv2.imread(image_path) 133 | load_bbox = True 134 | else: 135 | img_original_bgr = None 136 | 137 | elif input_type == 'video': 138 | _, img_original_bgr = input_data.read() 139 | if video_frame < cur_frame: 140 | video_frame += 1 141 | continue 142 | # save the obtained video frames 143 | image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg") 144 | if img_original_bgr is not None: 145 | video_frame += 1 146 | if args.save_frame: 147 | gnu.make_subdir(image_path) 148 | cv2.imwrite(image_path, img_original_bgr) 149 | 150 | elif input_type == 'webcam': 151 | _, img_original_bgr = input_data.read() 152 | 153 | if video_frame < cur_frame: 154 | video_frame += 1 155 | continue 156 | # save the obtained video frames 157 | image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg") 158 | if img_original_bgr is not None: 159 | video_frame += 1 160 | if args.save_frame: 161 | gnu.make_subdir(image_path) 162 | cv2.imwrite(image_path, img_original_bgr) 163 | else: 164 | assert False, "Unknown input_type" 165 | 166 | cur_frame +=1 167 | if img_original_bgr is None or cur_frame > args.end_frame: 168 | break 169 | print("--------------------------------------") 170 | 171 | # bbox detection 172 | if not load_bbox: 173 | body_bbox_list, hand_bbox_list = list(), list() 174 | 175 | # regression (includes integration) 176 | body_bbox_list, hand_bbox_list, pred_output_list = run_regress( 177 | args, img_original_bgr, 178 | body_bbox_list, hand_bbox_list, bbox_detector, 179 | body_mocap, hand_mocap) 180 | 181 | # save the obtained body & hand bbox to json file 182 | if args.save_bbox_output: 183 | demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list) 184 | 185 | if len(body_bbox_list) < 1: 186 | print(f"No body deteced: {image_path}") 187 | continue 188 | 189 | pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list) 190 | 191 | # visualization 192 | res_img = visualizer.visualize( 193 | img_original_bgr, 194 | pred_mesh_list = pred_mesh_list, 195 | body_bbox_list = body_bbox_list, 196 | hand_bbox_list = hand_bbox_list) 197 | 198 | # show result in the screen 199 | if not args.no_display: 200 | res_img = res_img.astype(np.uint8) 201 | ImShow(res_img) 202 | 203 | # save result image 204 | if args.out_dir is not None: 205 | demo_utils.save_res_img(args.out_dir, image_path, res_img) 206 | 207 | # save predictions to pkl 208 | if args.save_pred_pkl: 209 | demo_type = 'frank' 210 | demo_utils.save_pred_to_pkl( 211 | args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list) 212 | 213 | print(f"Processed : {image_path}") 214 | 215 | # save images as a video 216 | if not args.no_video_out and input_type in ['video', 'webcam']: 217 | demo_utils.gen_video_out(args.out_dir, args.seq_name) 218 | 219 | if input_type =='webcam' and input_data is not None: 220 | input_data.release() 221 | cv2.destroyAllWindows() 222 | 223 | def main(): 224 | args = DemoOptions().parse() 225 | args.use_smplx = True 226 | 227 | device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') 228 | assert torch.cuda.is_available(), "Current version only supports GPU" 229 | 230 | hand_bbox_detector = HandBboxDetector('third_view', device) 231 | 232 | #Set Mocap regressor 233 | body_mocap = BodyMocap(args.checkpoint_body_smplx, args.smpl_dir, device = device, use_smplx= True) 234 | hand_mocap = HandMocap(args.checkpoint_hand, args.smpl_dir, device = device) 235 | 236 | # Set Visualizer 237 | if args.renderer_type in ['pytorch3d', 'opendr']: 238 | from renderer.screen_free_visualizer import Visualizer 239 | else: 240 | from renderer.visualizer import Visualizer 241 | visualizer = Visualizer(args.renderer_type) 242 | 243 | run_frank_mocap(args, hand_bbox_detector, body_mocap, hand_mocap, visualizer) 244 | 245 | 246 | if __name__ == '__main__': 247 | main() -------------------------------------------------------------------------------- /handmocap/hand_modules/h3dw_model.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | # dct is the abbr. of Human Model recovery with Densepose supervision 7 | import numpy as np 8 | import torch 9 | import os 10 | import sys 11 | import shutil 12 | import os.path as osp 13 | from collections import OrderedDict 14 | import itertools 15 | import torch.nn.functional as F 16 | import torch.nn as nn 17 | from torch.nn.parallel import DistributedDataParallel 18 | import pdb 19 | import cv2 20 | from . import resnet 21 | from handmocap.hand_modules.h3dw_networks import H3DWEncoder 22 | import time 23 | import mocap_utils.general_utils as gnu 24 | import smplx 25 | import pdb 26 | 27 | 28 | def extract_hand_output(output, hand_type, hand_info, top_finger_joints_type='ave', use_cuda=True): 29 | assert hand_type in ['left', 'right'] 30 | 31 | if hand_type == 'left': 32 | wrist_idx, hand_start_idx, middle_finger_idx = 20, 25, 28 33 | else: 34 | wrist_idx, hand_start_idx, middle_finger_idx = 21, 40, 43 35 | 36 | vertices = output.vertices 37 | joints = output.joints 38 | vertices_shift = vertices - joints[:, hand_start_idx:hand_start_idx+1, :] 39 | 40 | hand_verts_idx = torch.Tensor(hand_info[f'{hand_type}_hand_verts_idx']).long() 41 | if use_cuda: 42 | hand_verts_idx = hand_verts_idx.cuda() 43 | 44 | hand_verts = vertices[:, hand_verts_idx, :] 45 | hand_verts_shift = hand_verts - joints[:, hand_start_idx:hand_start_idx+1, :] 46 | 47 | # Hand joints 48 | if hand_type == 'left': 49 | hand_idxs = [20] + list(range(25,40)) + list(range(66, 71)) # 20 for left wrist. 20 finger joints 50 | else: 51 | hand_idxs = [21] + list(range(40,55)) + list(range(71, 76)) # 21 for right wrist. 20 finger joints 52 | smplx_hand_to_panoptic = [0, 13,14,15,16, 1,2,3,17, 4,5,6,18, 10,11,12,19, 7,8,9,20] 53 | hand_joints = joints[:, hand_idxs, :][:, smplx_hand_to_panoptic, :] 54 | hand_joints_shift = hand_joints - joints[:, hand_start_idx:hand_start_idx+1, :] 55 | 56 | output = dict( 57 | wrist_idx = wrist_idx, 58 | hand_start_idx = hand_start_idx, 59 | middle_finger_idx = middle_finger_idx, 60 | vertices_shift = vertices_shift, 61 | hand_vertices = hand_verts, 62 | hand_vertices_shift = hand_verts_shift, 63 | hand_joints = hand_joints, 64 | hand_joints_shift = hand_joints_shift 65 | ) 66 | return output 67 | 68 | 69 | class H3DWModel(object): 70 | @property 71 | def name(self): 72 | return 'H3DWModel' 73 | 74 | def __init__(self, opt): 75 | self.opt = opt 76 | self.Tensor = torch.cuda.FloatTensor 77 | 78 | # set params 79 | self.inputSize = opt.inputSize 80 | self.total_params_dim = opt.total_params_dim 81 | self.cam_params_dim = opt.cam_params_dim 82 | self.pose_params_dim = opt.pose_params_dim 83 | self.shape_params_dim = opt.shape_params_dim 84 | self.top_finger_joints_type = opt.top_finger_joints_type 85 | 86 | assert(self.total_params_dim == 87 | self.cam_params_dim+self.pose_params_dim+self.shape_params_dim) 88 | 89 | if opt.dist: 90 | self.batch_size = opt.batchSize // torch.distributed.get_world_size() 91 | else: 92 | self.batch_size = opt.batchSize 93 | nb = self.batch_size 94 | 95 | # set input image and 2d keypoints 96 | self.input_img = self.Tensor( 97 | nb, opt.input_nc, self.inputSize, self.inputSize) 98 | 99 | # joints 2d 100 | self.keypoints = self.Tensor(nb, opt.num_joints, 2) 101 | self.keypoints_weights = self.Tensor(nb, opt.num_joints) 102 | 103 | # mano pose params 104 | self.gt_pose_params = self.Tensor(nb, opt.pose_params_dim) 105 | self.mano_params_weight = self.Tensor(nb, 1) 106 | 107 | # joints 3d 108 | self.joints_3d = self.Tensor(nb, opt.num_joints, 3) 109 | self.joints_3d_weight = self.Tensor(nb, opt.num_joints, 1) 110 | 111 | # load mean params, the mean params are from HMR 112 | self.mean_param_file = osp.join( 113 | opt.model_root, opt.mean_param_file) 114 | self.load_params() 115 | 116 | # set differential SMPL (implemented with pytorch) and smpl_renderer 117 | # smplx_model_path = osp.join(opt.model_root, opt.smplx_model_file) 118 | smplx_model_path = opt.smplx_model_file 119 | self.smplx = smplx.create( 120 | smplx_model_path, 121 | model_type = "smplx", 122 | batch_size = self.batch_size, 123 | gender = 'neutral', 124 | num_betas = 10, 125 | use_pca = False, 126 | ext='pkl').cuda() 127 | 128 | # set encoder and optimizer 129 | self.encoder = H3DWEncoder(opt, self.mean_params).cuda() 130 | if opt.dist: 131 | self.encoder = DistributedDataParallel( 132 | self.encoder, device_ids=[torch.cuda.current_device()]) 133 | 134 | checkpoint_path = opt.checkpoint_path 135 | if not osp.exists(checkpoint_path): 136 | print(f"Error: {checkpoint_path} does not exists") 137 | self.success_load = False 138 | else: 139 | if self.opt.dist: 140 | self.encoder.module.load_state_dict(torch.load( 141 | checkpoint_path, map_location=lambda storage, loc: storage.cuda(torch.cuda.current_device()))) 142 | else: 143 | saved_weights = torch.load(checkpoint_path) 144 | self.encoder.load_state_dict(saved_weights) 145 | self.success_load = True 146 | 147 | 148 | def load_params(self): 149 | # load mean params first 150 | mean_vals = gnu.load_pkl(self.mean_param_file) 151 | mean_params = np.zeros((1, self.total_params_dim)) 152 | 153 | # set camera model first 154 | mean_params[0, 0] = 5.0 155 | 156 | # set pose (might be problematic) 157 | mean_pose = mean_vals['mean_pose'][3:] 158 | # set hand global rotation 159 | mean_pose = np.concatenate( (np.zeros((3,)), mean_pose) ) 160 | mean_pose = mean_pose[None, :] 161 | 162 | # set shape 163 | mean_shape = np.zeros((1, 10)) 164 | mean_params[0, 3:] = np.hstack((mean_pose, mean_shape)) 165 | # concat them together 166 | mean_params = np.repeat(mean_params, self.batch_size, axis=0) 167 | self.mean_params = torch.from_numpy(mean_params).float() 168 | self.mean_params.requires_grad = False 169 | 170 | # define global rotation 171 | self.global_orient = torch.zeros((self.batch_size, 3), dtype=torch.float32).cuda() 172 | # self.global_orient[:, 0] = np.pi 173 | self.global_orient.requires_grad = False 174 | 175 | # load smplx-hand faces 176 | hand_info_file = osp.join(self.opt.model_root, self.opt.smplx_hand_info_file) 177 | 178 | self.hand_info = gnu.load_pkl(hand_info_file) 179 | self.right_hand_faces_holistic = self.hand_info['right_hand_faces_holistic'] 180 | self.right_hand_faces_local = self.hand_info['right_hand_faces_local'] 181 | self.right_hand_verts_idx = np.array(self.hand_info['right_hand_verts_idx'], dtype=np.int32) 182 | 183 | 184 | def set_input_imgonly(self, input): 185 | # image 186 | input_img = input['img'] 187 | self.input_img.resize_(input_img.size()).copy_(input_img) 188 | 189 | 190 | def get_smplx_output(self, pose_params, shape_params=None): 191 | hand_rotation = pose_params[:, :3] 192 | hand_pose = pose_params[:, 3:] 193 | body_pose = torch.zeros((self.batch_size, 63)).float().cuda() 194 | body_pose[:, 60:] = hand_rotation # set right hand rotation 195 | 196 | output = self.smplx( 197 | global_orient = self.global_orient, 198 | body_pose = body_pose, 199 | right_hand_pose = hand_pose, 200 | betas = shape_params, 201 | return_verts = True) 202 | 203 | hand_output = extract_hand_output( 204 | output, 205 | hand_type = 'right', 206 | hand_info = self.hand_info, 207 | top_finger_joints_type = self.top_finger_joints_type, 208 | use_cuda=True) 209 | 210 | pred_verts = hand_output['vertices_shift'] 211 | pred_joints_3d = hand_output['hand_joints_shift'] 212 | return pred_verts, pred_joints_3d 213 | 214 | 215 | def forward(self): 216 | # get predicted params first 217 | self.output = self.encoder(self.input_img) 218 | # print(self.output.mean()) 219 | self.final_params = self.output 220 | # self.final_params = self.output + self.mean_params 221 | 222 | # get predicted params for cam, pose, shape 223 | cam_dim = self.cam_params_dim 224 | pose_dim = self.pose_params_dim 225 | shape_dim = self.shape_params_dim 226 | self.pred_cam_params = self.final_params[:, :cam_dim] 227 | self.pred_pose_params = self.final_params[:, cam_dim: ( 228 | cam_dim + pose_dim)] 229 | self.pred_shape_params = self.final_params[:, (cam_dim + pose_dim):] 230 | 231 | # get predicted smpl verts and joints, 232 | self.pred_verts, self.pred_joints_3d = self.get_smplx_output( 233 | self.pred_pose_params, self.pred_shape_params) 234 | 235 | 236 | def test(self): 237 | with torch.no_grad(): 238 | self.forward() 239 | 240 | 241 | def get_pred_result(self): 242 | pred_result = OrderedDict( 243 | cams = self.pred_cam_params.cpu().numpy(), 244 | pred_shape_params = self.pred_shape_params.cpu().numpy(), 245 | pred_pose_params = self.pred_pose_params.cpu().numpy(), 246 | pred_verts = self.pred_verts.cpu().numpy()[:, self.right_hand_verts_idx, :], 247 | pred_joints_3d = self.pred_joints_3d.cpu().numpy(), 248 | ) 249 | return pred_result 250 | 251 | 252 | def eval(self): 253 | self.encoder.eval() -------------------------------------------------------------------------------- /demo/demo_visualize_prediction.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. 2 | 3 | import os 4 | import sys 5 | import os.path as osp 6 | import torch 7 | import numpy as np 8 | import cv2 9 | import argparse 10 | import json 11 | import pickle 12 | import smplx 13 | from datetime import datetime 14 | 15 | from demo.demo_options import DemoOptions 16 | from bodymocap.body_mocap_api import BodyMocap 17 | import mocap_utils.demo_utils as demo_utils 18 | import mocap_utils.general_utils as gnu 19 | from bodymocap.models import SMPL, SMPLX 20 | from handmocap.hand_modules.h3dw_model import extract_hand_output 21 | from mocap_utils.coordconv import convert_smpl_to_bbox, convert_bbox_to_oriIm 22 | 23 | 24 | def __get_data_type(pkl_files): 25 | for pkl_file in pkl_files: 26 | saved_data = gnu.load_pkl(pkl_file) 27 | return saved_data['demo_type'], saved_data['smpl_type'] 28 | 29 | 30 | def __get_smpl_model(demo_type, smpl_type): 31 | smplx_model_path = './extra_data/smpl/SMPLX_NEUTRAL.pkl' 32 | smpl_model_path = './extra_data/smpl//basicModel_neutral_lbs_10_207_0_v1.0.0.pkl' 33 | 34 | if demo_type == 'hand': 35 | # use original smpl-x 36 | smpl = smplx.create( 37 | smplx_model_path, 38 | model_type = "smplx", 39 | batch_size = 1, 40 | gender = 'neutral', 41 | num_betas = 10, 42 | use_pca = False, 43 | ext='pkl' 44 | ) 45 | else: 46 | if smpl_type == 'smplx': 47 | # use modified smpl-x from body module 48 | smpl = SMPLX( 49 | smplx_model_path, 50 | batch_size=1, 51 | num_betas = 10, 52 | use_pca = False, 53 | create_transl=False) 54 | else: 55 | # use modified smpl from body module 56 | assert smpl_type == 'smpl' 57 | smpl = SMPL( 58 | smpl_model_path, 59 | batch_size=1, 60 | create_transl=False) 61 | return smpl 62 | 63 | 64 | def __calc_hand_mesh(hand_type, pose_params, betas, smplx_model): 65 | hand_rotation = pose_params[:, :3] 66 | hand_pose = pose_params[:, 3:] 67 | body_pose = torch.zeros((1, 63)).float() 68 | 69 | assert hand_type in ['left_hand', 'right_hand'] 70 | if hand_type == 'right_hand': 71 | body_pose[:, 60:] = hand_rotation # set right hand rotation 72 | right_hand_pose = hand_pose 73 | left_hand_pose = torch.zeros((1, 45), dtype=torch.float32) 74 | else: 75 | body_pose[:, 57:60] = hand_rotation # set right hand rotation 76 | left_hand_pose = hand_pose 77 | right_hand_pose = torch.zeros((1, 45), dtype=torch.float32) 78 | 79 | output = smplx_model( 80 | global_orient = torch.zeros((1,3)), 81 | body_pose = body_pose, 82 | betas = betas, 83 | left_hand_pose = left_hand_pose, 84 | right_hand_pose = right_hand_pose, 85 | return_verts = True) 86 | 87 | hand_info_file = "extra_data/hand_module/SMPLX_HAND_INFO.pkl" 88 | hand_info = gnu.load_pkl(hand_info_file) 89 | hand_output = extract_hand_output( 90 | output, 91 | hand_type = hand_type.split("_")[0], 92 | hand_info = hand_info, 93 | top_finger_joints_type = 'ave', 94 | use_cuda = False) 95 | 96 | pred_verts = hand_output['hand_vertices_shift'].detach().numpy() 97 | faces = hand_info[f'{hand_type}_faces_local'] 98 | return pred_verts[0], faces 99 | 100 | 101 | def _calc_body_mesh(smpl_type, smpl_model, body_pose, betas, 102 | left_hand_pose, right_hand_pose): 103 | if smpl_type == 'smpl': 104 | smpl_output = smpl_model( 105 | global_orient = body_pose[:, :3], 106 | body_pose = body_pose[:, 3:], 107 | betas = betas, 108 | ) 109 | else: 110 | smpl_output = smpl_model( 111 | global_orient = body_pose[:, :3], 112 | body_pose = body_pose[:, 3:], 113 | betas = betas, 114 | left_hand_pose = left_hand_pose, 115 | right_hand_pose = right_hand_pose, 116 | ) 117 | 118 | vertices = smpl_output.vertices.detach().cpu().numpy()[0] 119 | faces = smpl_model.faces 120 | return vertices, faces 121 | 122 | 123 | def __calc_mesh(demo_type, smpl_type, smpl_model, img_shape, pred_output_list): 124 | for pred_output in pred_output_list: 125 | if pred_output is not None: 126 | # hand 127 | if demo_type == 'hand': 128 | assert 'left_hand' in pred_output and 'right_hand' in pred_output 129 | for hand_type in pred_output: 130 | hand_pred = pred_output[hand_type] 131 | if hand_pred is not None: 132 | pose_params = torch.from_numpy(hand_pred['pred_hand_pose']) 133 | betas = torch.from_numpy(hand_pred['pred_hand_betas']) 134 | pred_verts, hand_faces = __calc_hand_mesh(hand_type, pose_params, betas, smpl_model) 135 | hand_pred['pred_vertices_smpl'] = pred_verts 136 | 137 | cam_scale = hand_pred['pred_camera'][0] 138 | cam_trans = hand_pred['pred_camera'][1:] 139 | vert_bboxcoord = convert_smpl_to_bbox( 140 | pred_verts, cam_scale, cam_trans, bAppTransFirst=True) # SMPL space -> bbox space 141 | 142 | bbox_scale_ratio = hand_pred['bbox_scale_ratio'] 143 | bbox_top_left = hand_pred['bbox_top_left'] 144 | vert_imgcoord = convert_bbox_to_oriIm( 145 | vert_bboxcoord, bbox_scale_ratio, bbox_top_left, 146 | img_shape[1], img_shape[0]) 147 | pred_output[hand_type]['pred_vertices_img'] = vert_imgcoord 148 | # body 149 | else: 150 | pose_params = torch.from_numpy(pred_output['pred_body_pose']) 151 | betas = torch.from_numpy(pred_output['pred_betas']) 152 | if 'pred_right_hand_pose' in pred_output: 153 | pred_right_hand_pose = torch.from_numpy(pred_output['pred_right_hand_pose']) 154 | else: 155 | pred_right_hand_pose = torch.zeros((1, 45), dtype=torch.float32) 156 | if 'pred_left_hand_pose' in pred_output: 157 | pred_left_hand_pose = torch.from_numpy(pred_output['pred_left_hand_pose']) 158 | else: 159 | pred_left_hand_pose = torch.zeros((1, 45), dtype=torch.float32) 160 | pred_verts, faces = _calc_body_mesh( 161 | smpl_type, smpl_model, pose_params, betas, pred_left_hand_pose, pred_right_hand_pose) 162 | 163 | pred_output['pred_vertices_smpl'] = pred_verts 164 | pred_output['faces'] = faces 165 | 166 | cam_scale = pred_output['pred_camera'][0] 167 | cam_trans = pred_output['pred_camera'][1:] 168 | vert_bboxcoord = convert_smpl_to_bbox( 169 | pred_verts, cam_scale, cam_trans, bAppTransFirst=False) # SMPL space -> bbox space 170 | 171 | bbox_scale_ratio = pred_output['bbox_scale_ratio'] 172 | bbox_top_left = pred_output['bbox_top_left'] 173 | vert_imgcoord = convert_bbox_to_oriIm( 174 | vert_bboxcoord, bbox_scale_ratio, bbox_top_left, 175 | img_shape[1], img_shape[0]) 176 | pred_output['pred_vertices_img'] = vert_imgcoord 177 | 178 | 179 | def visualize_prediction(args, demo_type, smpl_type, smpl_model, pkl_files, visualizer): 180 | for pkl_file in pkl_files: 181 | # load data 182 | saved_data = gnu.load_pkl(pkl_file) 183 | 184 | image_path = saved_data['image_path'] 185 | img_original_bgr = cv2.imread(image_path) 186 | if img_original_bgr is None: 187 | print(f"{image_path} does not exists, skip") 188 | 189 | print("--------------------------------------") 190 | 191 | demo_type = saved_data['demo_type'] 192 | assert saved_data['smpl_type'] == smpl_type 193 | 194 | hand_bbox_list = saved_data['hand_bbox_list'] 195 | body_bbox_list = saved_data['body_bbox_list'] 196 | pred_output_list = saved_data['pred_output_list'] 197 | 198 | if not saved_data['save_mesh']: 199 | __calc_mesh( 200 | demo_type, smpl_type, smpl_model, img_original_bgr.shape[:2], pred_output_list) 201 | else: 202 | pass 203 | 204 | pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list) 205 | 206 | # visualization 207 | res_img = visualizer.visualize( 208 | img_original_bgr, 209 | pred_mesh_list = pred_mesh_list, 210 | body_bbox_list = body_bbox_list, 211 | hand_bbox_list = hand_bbox_list) 212 | 213 | # save result image 214 | demo_utils.save_res_img(args.out_dir, image_path, res_img) 215 | 216 | # save predictions to pkl 217 | if args.save_pred_pkl: 218 | args.use_smplx = smpl_type == 'smplx' 219 | demo_utils.save_pred_to_pkl( 220 | args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list) 221 | 222 | 223 | def main(): 224 | args = DemoOptions().parse() 225 | 226 | # load pkl files 227 | pkl_files = gnu.get_all_files(args.pkl_dir, ".pkl", "full") 228 | 229 | # get smpl type 230 | demo_type, smpl_type = __get_data_type(pkl_files) 231 | 232 | # get smpl model 233 | smpl_model = __get_smpl_model(demo_type, smpl_type) 234 | 235 | # Set Visualizer 236 | assert args.renderer_type in ['pytorch3d', 'opendr'], \ 237 | f"{args.renderer_type} not implemented yet." 238 | from renderer.screen_free_visualizer import Visualizer 239 | visualizer = Visualizer(args.renderer_type) 240 | 241 | # load smpl model 242 | visualize_prediction(args, demo_type, smpl_type, smpl_model, pkl_files, visualizer) 243 | 244 | 245 | if __name__ == '__main__': 246 | main() -------------------------------------------------------------------------------- /bodymocap/models/smpl.py: -------------------------------------------------------------------------------- 1 | # Original code from SPIN: https://github.com/nkolot/SPIN 2 | 3 | 4 | import torch 5 | import numpy as np 6 | import smplx 7 | from smplx import SMPL as _SMPL 8 | from smplx import SMPLX as _SMPLX 9 | # from bodymocap.models.body_models import SMPLX as _SMPLX #Use our custom SMPLX 10 | # from smplx.body_models import ModelOutput 11 | # from bodymocap.models.body_models import ModelOutput 12 | from smplx.lbs import vertices2joints 13 | 14 | from bodymocap import constants 15 | 16 | from collections import namedtuple 17 | ModelOutput = namedtuple('ModelOutput', 18 | ['vertices', 'joints', 'full_pose', 'betas', 19 | 'global_orient', 20 | 'body_pose', 'expression', 21 | 'left_hand_pose', 'right_hand_pose', 22 | 'right_hand_joints', 'left_hand_joints', 23 | 'jaw_pose']) 24 | ModelOutput.__new__.__defaults__ = (None,) * len(ModelOutput._fields) 25 | 26 | 27 | class SMPL(_SMPL): 28 | """ Extension of the official SMPL implementation to support more joints """ 29 | 30 | def __init__(self, *args, **kwargs): 31 | super(SMPL, self).__init__(*args, **kwargs) 32 | joints = [constants.JOINT_MAP[i] for i in constants.JOINT_NAMES] 33 | JOINT_REGRESSOR_TRAIN_EXTRA = 'extra_data/body_module/data_from_spin//J_regressor_extra.npy' 34 | J_regressor_extra = np.load(JOINT_REGRESSOR_TRAIN_EXTRA) 35 | self.register_buffer('J_regressor_extra', torch.tensor(J_regressor_extra, dtype=torch.float32)) 36 | self.joint_map = torch.tensor(joints, dtype=torch.long) 37 | 38 | def forward(self, *args, **kwargs): 39 | kwargs['get_skin'] = True 40 | smpl_output = super(SMPL, self).forward(*args, **kwargs) 41 | extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices) #Additional 9 joints #Check doc/J_regressor_extra.png 42 | joints = torch.cat([smpl_output.joints, extra_joints], dim=1) #[N, 24 + 21, 3] + [N, 9, 3] 43 | joints = joints[:, self.joint_map, :] 44 | output = ModelOutput(vertices=smpl_output.vertices, 45 | global_orient=smpl_output.global_orient, 46 | body_pose=smpl_output.body_pose, 47 | joints=joints, 48 | betas=smpl_output.betas, 49 | full_pose=smpl_output.full_pose) 50 | return output 51 | 52 | 53 | 54 | class SMPLX(_SMPLX): 55 | """ Extension of the official SMPL implementation to support more joints """ 56 | 57 | def __init__(self, *args, **kwargs): 58 | kwargs['ext'] = 'pkl' #We have pkl file 59 | super(SMPLX, self).__init__(*args, **kwargs) 60 | joints = [constants.JOINT_MAP[i] for i in constants.JOINT_NAMES] 61 | JOINT_REGRESSOR_TRAIN_EXTRA_SMPLX = 'extra_data/body_module/J_regressor_extra_smplx.npy' 62 | J_regressor_extra = np.load(JOINT_REGRESSOR_TRAIN_EXTRA_SMPLX) #(9, 10475) 63 | self.register_buffer('J_regressor_extra', torch.tensor(J_regressor_extra, dtype=torch.float32)) 64 | self.joint_map = torch.tensor(joints, dtype=torch.long) 65 | 66 | def forward(self, *args, **kwargs): 67 | kwargs['get_skin'] = True 68 | 69 | #if pose parameter is for SMPL with 21 joints (ignoring root) 70 | if(kwargs['body_pose'].shape[1]==69): 71 | kwargs['body_pose'] = kwargs['body_pose'][:,:-2*3] #Ignore the last two joints (which are on the palm. Not used) 72 | 73 | if(kwargs['body_pose'].shape[1]==23): 74 | kwargs['body_pose'] = kwargs['body_pose'][:,:-2] #Ignore the last two joints (which are on the palm. Not used) 75 | 76 | smpl_output = super(SMPLX, self).forward(*args, **kwargs) 77 | extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices) 78 | # extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices[:,:6890]) *0 #TODO: implement this correctly 79 | 80 | #SMPL-X Joint order: https://docs.google.com/spreadsheets/d/1_1dLdaX-sbMkCKr_JzJW_RZCpwBwd7rcKkWT_VgAQ_0/edit#gid=0 81 | smplx_to_smpl = list(range(0,22)) + [28,43] + list(range(55,76)) # 28 left middle finger , 43: right middle finger 1 82 | smpl_joints = smpl_output.joints[:,smplx_to_smpl,:] # Convert SMPL-X to SMPL 127 ->45 83 | joints = torch.cat([smpl_joints, extra_joints], dim=1) # [N, 127, 3]->[N, 45, 3] + [N, 9, 3] # SMPL-X has more joints. should convert 45 84 | joints = joints[:, self.joint_map, :] 85 | 86 | # Hand joints 87 | smplx_hand_to_panoptic = [0, 13,14,15,16, 1,2,3,17, 4,5,6,18, 10,11,12,19, 7,8,9,20] #Wrist Thumb to Pinky 88 | 89 | smplx_lhand = [20] + list(range(25,40)) + list(range(66, 71)) #20 for left wrist. 20 finger joints 90 | lhand_joints = smpl_output.joints[:,smplx_lhand, :] #(N,21,3) 91 | lhand_joints = lhand_joints[:, smplx_hand_to_panoptic, :] #Convert SMPL-X hand order to paonptic hand order 92 | 93 | smplx_rhand = [21] + list(range(40,55)) + list(range(71, 76)) #21 for right wrist. 20 finger joints 94 | rhand_joints = smpl_output.joints[:, smplx_rhand, :] #(N,21,3) 95 | rhand_joints = rhand_joints[:,smplx_hand_to_panoptic,:] #Convert SMPL-X hand order to paonptic hand order 96 | 97 | output = ModelOutput(vertices=smpl_output.vertices, 98 | global_orient=smpl_output.global_orient, 99 | body_pose=smpl_output.body_pose, 100 | joints=joints, 101 | right_hand_joints=rhand_joints, #N,21,3 102 | left_hand_joints=lhand_joints, #N,21,3 103 | betas=smpl_output.betas, 104 | full_pose=smpl_output.full_pose) 105 | return output 106 | 107 | 108 | """ 109 | 0 pelvis', 110 | 1 left_hip', 111 | 2 right_hip', 112 | 3 spine1', 113 | 4 left_knee', 114 | 5 right_knee', 115 | 6 spine2', 116 | 7 left_ankle', 117 | 8 right_ankle', 118 | 9 spine3', 119 | 10 left_foot', 120 | 11 right_foot', 121 | 12 neck', 122 | 13 left_collar', 123 | 14 right_collar', 124 | 15 head', 125 | 16 left_shoulder', 126 | 17 right_shoulder', 127 | 18 left_elbow', 128 | 19 right_elbow', 129 | 20 left_wrist', 130 | 21 right_wrist', 131 | 22 jaw', 132 | 23 left_eye_smplhf', 133 | 24 right_eye_smplhf', 134 | 25 left_index1', 135 | 26 left_index2', 136 | 27 left_index3', 137 | 28 left_middle1', 138 | 29 left_middle2', 139 | 30 left_middle3', 140 | 31 left_pinky1', 141 | 32 left_pinky2', 142 | 33 left_pinky3', 143 | 34 left_ring1', 144 | 35 left_ring2', 145 | 36 left_ring3', 146 | 37 left_thumb1', 147 | 38 left_thumb2', 148 | 39 left_thumb3', 149 | 40 right_index1', 150 | 41 right_index2', 151 | 42 right_index3', 152 | 43 right_middle1', 153 | 44 right_middle2', 154 | 45 right_middle3', 155 | 46 right_pinky1', 156 | 47 right_pinky2', 157 | 48 right_pinky3', 158 | 49 right_ring1', 159 | 50 right_ring2', 160 | 51 right_ring3', 161 | 52 right_thumb1', 162 | 53 right_thumb2', 163 | 54 right_thumb3', 164 | 55 nose', 165 | 56 right_eye', 166 | 57 left_eye', 167 | 58 right_ear', 168 | 59 left_ear', 169 | 60 left_big_toe', 170 | 61 left_small_toe', 171 | 62 left_heel', 172 | 63 right_big_toe', 173 | 64 right_small_toe', 174 | 65 right_heel', 175 | 66 left_thumb', 176 | 67 left_index', 177 | 68 left_middle', 178 | 69 left_ring', 179 | 70 left_pinky', 180 | 71 right_thumb', 181 | 72 right_index', 182 | 73 right_middle', 183 | 74 right_ring', 184 | 75 right_pinky', 185 | 76 right_eye_brow1', 186 | 77 right_eye_brow2', 187 | 78 right_eye_brow3', 188 | 79 right_eye_brow4', 189 | 80 right_eye_brow5', 190 | 81 left_eye_brow5', 191 | 82 left_eye_brow4', 192 | 83 left_eye_brow3', 193 | 84 left_eye_brow2', 194 | 85 left_eye_brow1', 195 | 86 nose1', 196 | 87 nose2', 197 | 88 nose3', 198 | 89 nose4', 199 | 90 right_nose_2', 200 | 91 right_nose_1', 201 | 92 nose_middle', 202 | 93 left_nose_1', 203 | 94 left_nose_2', 204 | 95 right_eye1', 205 | 96 right_eye2', 206 | 97 right_eye3', 207 | 98 right_eye4', 208 | 99 right_eye5', 209 | 100 right_eye6', 210 | 101 left_eye4', 211 | 102 left_eye3', 212 | 103 left_eye2', 213 | 104 left_eye1', 214 | 105 left_eye6', 215 | 106 left_eye5', 216 | 107 right_mouth_1', 217 | 108 right_mouth_2', 218 | 109 right_mouth_3', 219 | 110 mouth_top', 220 | 111 left_mouth_3', 221 | 112 left_mouth_2', 222 | 113 left_mouth_1', 223 | 114 left_mouth_5', # 59 in OpenPose output 224 | 115 left_mouth_4', # 58 in OpenPose output 225 | 116 mouth_bottom', 226 | 117 right_mouth_4', 227 | 118 right_mouth_5', 228 | 119 right_lip_1', 229 | 120 right_lip_2', 230 | 121 lip_top', 231 | 122 left_lip_2', 232 | 123 left_lip_1', 233 | 124 left_lip_3', 234 | 125 lip_bottom', 235 | 126 right_lip_3', 236 | 127 right_contour_1', 237 | 128 right_contour_2', 238 | 129 right_contour_3', 239 | 130 right_contour_4', 240 | 131 right_contour_5', 241 | 132 right_contour_6', 242 | 133 right_contour_7', 243 | 134 right_contour_8', 244 | 135 contour_middle', 245 | 136 left_contour_8', 246 | 137 left_contour_7', 247 | 138 left_contour_6', 248 | 139 left_contour_5', 249 | 140 left_contour_4', 250 | 141 left_contour_3', 251 | 142 left_contour_2', 252 | 143 left_contour_1' 253 | """ 254 | 255 | 256 | #SMPL Joints: 257 | """ 258 | 0 pelvis', 259 | 1 left_hip', 260 | 2 right_hip', 261 | 3 spine1', 262 | 4 left_knee', 263 | 5 right_knee', 264 | 6 spine2', 265 | 7 left_ankle', 266 | 8 right_ankle', 267 | 9 spine3', 268 | 10 left_foot', 269 | 11 right_foot', 270 | 12 neck', 271 | 13 left_collar', 272 | 14 right_collar', 273 | 15 head', 274 | 16 left_shoulder', 275 | 17 right_shoulder', 276 | 18 left_elbow', 277 | 19 right_elbow', 278 | 20 left_wrist', 279 | 21 right_wrist', 280 | 22 281 | 23 282 | 24 nose', 283 | 25 right_eye', 284 | 26 left_eye', 285 | 27 right_ear', 286 | 28 left_ear', 287 | 29 left_big_toe', 288 | 30 left_small_toe', 289 | 31 left_heel', 290 | 32 right_big_toe', 291 | 33 right_small_toe', 292 | 34 right_heel', 293 | 35 left_thumb', 294 | 36 left_index', 295 | 37 left_middle', 296 | 38 left_ring', 297 | 39 left_pinky', 298 | 40 right_thumb', 299 | 41 right_index', 300 | 42 right_middle', 301 | 43 right_ring', 302 | 44 right_pinky', 303 | """ --------------------------------------------------------------------------------