├── handmocap
    └── hand_modules
    │   ├── __init__.py
    │   ├── test_options.py
    │   ├── h3dw_networks.py
    │   ├── base_options.py
    │   ├── resnet.py
    │   └── h3dw_model.py
├── bodymocap
    ├── models
    │   ├── __init__.py
    │   ├── hmr.py
    │   └── smpl.py
    ├── constants.py
    ├── body_bbox_detector.py
    ├── utils
    │   └── geometry.py
    └── body_mocap_api.py
├── renderer
    ├── shaders
    │   ├── simple140.fs
    │   ├── simple140.vs
    │   ├── normal140.fs
    │   ├── normal140.vs
    │   ├── geo140.vs
    │   ├── geo140.fs
    │   ├── colorgeo140.vs
    │   ├── colorgeo140.fs
    │   └── framework.py
    ├── image_utils.py
    ├── screen_free_visualizer.py
    ├── meshRenderer.py
    ├── render_utils.py
    ├── od_renderer.py
    ├── p3d_renderer.py
    └── denseposeRenderer.py
├── docs
    ├── requirements.txt
    ├── run_totalmocap.md
    ├── INSTALL.md
    ├── joint_order.md
    └── run_handmocap.md
├── scripts
    ├── download_sample_data.sh
    ├── install_pose2d.sh
    ├── install_frankmocap.sh
    ├── download_data_hand_module.sh
    ├── install_hand_detectors.sh
    └── download_data_body_module.sh
├── .gitignore
├── mocap_utils
    ├── compose_image.py
    ├── extract_frame.py
    ├── timer.py
    ├── compare_results.py
    ├── general_utils.py
    ├── geometry_utils.py
    └── coordconv.py
├── CONTRIBUTING.md
├── CODE_OF_CONDUCT.md
├── demo
    ├── demo_options.py
    ├── demo_bodymocap.py
    ├── demo_handmocap.py
    ├── demo_frankmocap.py
    └── demo_visualize_prediction.py
└── README.md


/handmocap/hand_modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/bodymocap/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .hmr import hmr
2 | from .smpl import SMPL, SMPLX
3 | 


--------------------------------------------------------------------------------
/renderer/shaders/simple140.fs:
--------------------------------------------------------------------------------
 1 | //Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #version 140
 4 | 
 5 | out vec4 FragColor;
 6 | in vec3 Color;
 7 | 
 8 | void main() 
 9 | {
10 |     FragColor = vec4(Color,1.0);
11 | }
12 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | pip
 2 | gdown
 3 | opencv-python
 4 | PyOpenGL 
 5 | PyOpenGL_accelerate
 6 | pycocotools
 7 | pafy
 8 | youtube-dl
 9 | scipy
10 | pillow>=7.1.0
11 | easydict
12 | cython
13 | cffi
14 | msgpack
15 | pyyaml
16 | tensorboardX
17 | tqdm
18 | jinja2
19 | smplx
20 | sklearn
21 | opendr
22 | chumpy
23 | 


--------------------------------------------------------------------------------
/renderer/shaders/simple140.vs:
--------------------------------------------------------------------------------
 1 | //Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #version 140
 4 | 
 5 | in vec3 a_Position;
 6 | in vec3 a_Color;
 7 | 
 8 | out vec3 Color;
 9 | 
10 | uniform mat4 ModelMat;
11 | uniform mat4 PerspMat;
12 | 
13 | void main()
14 | {
15 | 	gl_Position = PerspMat * ModelMat * vec4(a_Position, 1.0);
16 |     Color = a_Color;
17 | }


--------------------------------------------------------------------------------
/renderer/shaders/normal140.fs:
--------------------------------------------------------------------------------
 1 | //Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #version 140
 4 | 
 5 | out vec4 FragColor;
 6 | in vec3 CamNormal;
 7 | 
 8 | void main() 
 9 | {
10 |     // FragColor = vec4(Color,1.0);
11 |     vec3 cam_norm_normalized = normalize(CamNormal);
12 |     vec3 rgb = (cam_norm_normalized + 1.0) / 2.0;
13 | 	FragColor = vec4(rgb, 1.0);
14 | }


--------------------------------------------------------------------------------
/renderer/shaders/normal140.vs:
--------------------------------------------------------------------------------
 1 | //Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #version 140
 4 | 
 5 | in vec3 a_Position;
 6 | in vec3 a_Normal;
 7 | 
 8 | out vec3 CamNormal;
 9 | 
10 | uniform mat4 ModelMat;
11 | uniform mat4 PerspMat;
12 | 
13 | void main()
14 | {
15 | 	gl_Position = PerspMat * ModelMat * vec4(a_Position, 1.0);
16 | 	CamNormal = (ModelMat * vec4(a_Normal, 0.0)).xyz;
17 | }


--------------------------------------------------------------------------------
/scripts/download_sample_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | if [ ! -d "sample_data" ] 
 6 | then
 7 |     echo "Downloading sample videos"
 8 |     wget https://dl.fbaipublicfiles.com/eft/sampledata_frank.tar && tar -xvf sampledata_frank.tar && rm sampledata_frank.tar && mv sampledata sample_data
 9 | else
10 |     echo "There exists sample_data already"
11 | fi
12 | echo "Done"
13 | 


--------------------------------------------------------------------------------
/renderer/shaders/geo140.vs:
--------------------------------------------------------------------------------
 1 | //Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #version 140
 4 | 
 5 | in vec3 a_Position;
 6 | in vec3 a_Normal;
 7 | 
 8 | out vec3 CamNormal;
 9 | out vec3 CamPos;
10 | 
11 | uniform mat4 ModelMat;
12 | uniform mat4 PerspMat;
13 | 
14 | void main()
15 | {
16 | 	gl_Position = PerspMat * ModelMat * vec4(a_Position, 1.0);
17 |     CamNormal = (ModelMat * vec4(a_Normal, 0.0)).xyz;
18 |     CamPos = (ModelMat * vec4(a_Position, 1.0)).xyz;
19 | }


--------------------------------------------------------------------------------
/renderer/shaders/geo140.fs:
--------------------------------------------------------------------------------
 1 | //Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #version 140
 4 | 
 5 | out vec4 FragColor;
 6 | 
 7 | in vec3 CamNormal;
 8 | in vec3 CamPos;
 9 | 
10 | void main() 
11 | {
12 |     vec3 light_direction = vec3(0, 0, 1);
13 |     vec3 f_normal = normalize(CamNormal.xyz);
14 |     vec4 specular_reflection = vec4(0.2) * pow(max(0.0, dot(reflect(-light_direction, f_normal), vec3(0, 0, -1))), 16.f);
15 |     FragColor = vec4(dot(f_normal, light_direction)*vec3(1.0, 1.0, 1.0)+specular_reflection.xyz, 1.0);
16 | }


--------------------------------------------------------------------------------
/renderer/shaders/colorgeo140.vs:
--------------------------------------------------------------------------------
 1 | //Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #version 140
 4 | 
 5 | in vec3 a_Position;
 6 | in vec3 a_Normal;
 7 | in vec3 a_Color;
 8 | 
 9 | out vec3 CamNormal;
10 | out vec3 CamPos;
11 | out vec3 Color;
12 | 
13 | uniform mat4 ModelMat;
14 | uniform mat4 PerspMat;
15 | 
16 | void main()
17 | {
18 | 	gl_Position = PerspMat * ModelMat * vec4(a_Position, 1.0);
19 |     CamNormal = (ModelMat * vec4(a_Normal, 0.0)).xyz;
20 |     CamPos = (ModelMat * vec4(a_Position, 1.0)).xyz;
21 | 
22 |     //Color = vec3(1.0, 1.0, 1.0);
23 |     Color = a_Color;
24 | }


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.swp
 2 | *.jpg
 3 | *.png
 4 | smpl/
 5 | tmp/
 6 | pretrained_models/
 7 | .vscode/
 8 | test_result/
 9 | log/
10 | log.txt
11 | checkpoints/
12 | evaluate_results/
13 | *.pyc
14 | *.npy
15 | test_log.txt
16 | *.pkl
17 | pretrained/
18 | render_result/
19 | lightweight-human-pose-estimation.pytorch/
20 | data/*
21 | webvideos/
22 | samples/
23 | *.pth
24 | *.obj
25 | data/
26 | detectors/
27 | data
28 | samples_all/
29 | *.zip
30 | extra_data/
31 | sample_data/
32 | mocap_output/
33 | demo.sh
34 | demo_multi.sh
35 | mocap_utils/frame_to_video.py
36 | mocap_utils/frame_to_gif.py
37 | mocap_utils/select_epick_kitchen.py
38 | 


--------------------------------------------------------------------------------
/renderer/shaders/colorgeo140.fs:
--------------------------------------------------------------------------------
 1 | //Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | #version 140
 4 | 
 5 | out vec4 FragColor;
 6 | 
 7 | in vec3 CamNormal;
 8 | in vec3 CamPos;
 9 | in vec3 Color;
10 | 
11 | void main() 
12 | {
13 |     vec3 light_direction = vec3(0, 0, 1);
14 |     vec3 f_normal = normalize(CamNormal.xyz);
15 |     vec4 specular_reflection = vec4(0.2) * pow(max(0.0, dot(reflect(-light_direction, f_normal), vec3(0, 0, -1))), 16.f);
16 |     // FragColor = vec4(dot(f_normal, light_direction)*vec3(1.0, 1.0, 1.0)+specular_reflection.xyz, 1.0);
17 |     FragColor = vec4(dot(f_normal, light_direction)*Color+specular_reflection.xyz, 1.0);
18 | }


--------------------------------------------------------------------------------
/scripts/install_pose2d.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | mkdir -p detectors
 6 | cd detectors
 7 | 
 8 | git clone git@github.com:jhugestar/lightweight-human-pose-estimation.pytorch.git
 9 | if [ ! -d lightweight-human-pose-estimation.pytorch ]; then
10 |     git clone https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git
11 | fi
12 | mv lightweight-human-pose-estimation.pytorch body_pose_estimator
13 | 
14 | #Download pretrained model
15 | wget https://download.01.org/opencv/openvino_training_extensions/models/human_pose_estimation/checkpoint_iter_370000.pth
16 | mkdir -p ../extra_data/body_module/body_pose_estimator
17 | mv *.pth ../extra_data/body_module/body_pose_estimator


--------------------------------------------------------------------------------
/scripts/install_frankmocap.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | echo ""
 6 | echo ">>  Installing a third-party 2D keypoint detector"
 7 | sh scripts/install_pose2d.sh
 8 | 
 9 | echo ""
10 | echo ">>  Download extra data for body module"
11 | sh scripts/download_data_body_module.sh
12 | 
13 | 
14 | echo ""
15 | echo ">>  Installing a third-party hand detector"
16 | sh scripts/install_hand_detectors.sh
17 | 
18 | 
19 | echo ""
20 | echo ">>  Download extra data for hand module"
21 | sh scripts/download_data_hand_module.sh
22 | 
23 | echo ""
24 | if [ ! -d "sample_data" ] 
25 | then
26 |     echo "Downloading sample videos"
27 |     wget https://dl.fbaipublicfiles.com/eft/sampledata_frank.tar && tar -xvf sampledata_frank.tar && rm sampledata_frank.tar && mv sampledata sample_data
28 | else
29 |     echo "There exists sample_data already"
30 | fi


--------------------------------------------------------------------------------
/mocap_utils/compose_image.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import os, sys, shutil
 4 | import os.path as osp
 5 | import cv2
 6 | import numpy as np
 7 | import general_utils as gnu
 8 | 
 9 | 
10 | def main():
11 |     in_dir = "./sample_data/images/single_person"
12 |     out_dir = "./sample_data/images/multi_person"
13 |     gnu.renew_dir(out_dir)
14 | 
15 |     all_imgs = gnu.get_all_files(in_dir, (".jpg", ".png", ".jpeg"), "full")
16 |     num_img = len(all_imgs)
17 | 
18 |     for i in range(num_img):
19 |         for j in range(num_img):
20 |             img1 = cv2.imread(all_imgs[i])
21 |             img2 = cv2.imread(all_imgs[j])
22 |             img2 = cv2.resize(img2, img1.shape[:2][::-1])
23 |             res_img = np.concatenate((img1, img2), axis=1)
24 |             res_img_path = osp.join(out_dir, f"{i:02d}_{j:02d}.jpg")
25 |             cv2.imwrite(res_img_path, res_img)
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     main()


--------------------------------------------------------------------------------
/scripts/download_data_hand_module.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | set -ex
 6 | 
 7 | [ -d extra_data ] || mkdir extra_data
 8 | cd extra_data
 9 | 
10 | [ -d hand_module ] || mkdir hand_module
11 | cd hand_module
12 | 
13 | echo "Downloading other data"
14 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/hand_module/SMPLX_HAND_INFO.pkl
15 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/hand_module/mean_mano_params.pkl
16 | 
17 | echo "Downloading pretrained hand model"
18 | [ -d pretrained_weights ] || mkdir pretrained_weights
19 | cd pretrained_weights
20 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/hand_module/checkpoints_best/pose_shape_best.pth
21 | 
22 | #Go to root directory
23 | cd ../../../        
24 | 
25 | echo "Downloading sample videos"
26 | wget https://dl.fbaipublicfiles.com/eft/sample_data_frank.tar && tar -xvf sample_data_frank.tar && rm sample_data_frank.tar
27 | echo "Done"


--------------------------------------------------------------------------------
/handmocap/hand_modules/test_options.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | # Part of the code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix
 4 | 
 5 | from .base_options import BaseOptions
 6 | 
 7 | class TestOptions(BaseOptions):
 8 |     def initialize(self):
 9 |         BaseOptions.initialize(self)
10 |         self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
11 |         self.parser.add_argument('--which_epoch', type=str, default='-1', help='which epoch to load? set to latest to use latest cached model')
12 |         self.parser.add_argument('--visualize_eval', action='store_true')
13 |         self.parser.add_argument('--test_dataset', type=str, choices=['freihand', 'ho3d', 'stb', 'rhd', 'mtc', 'wild', 'demo'], help="which dataset to test on")
14 |         self.parser.add_argument("--checkpoint_path", type=str, default=None, help="path of checkpoints used in test")
15 |         self.isTrain = False
16 | 


--------------------------------------------------------------------------------
/scripts/install_hand_detectors.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | mkdir -p detectors
 6 | cd detectors
 7 | 
 8 | pip install gdown
 9 | 
10 | # Install 100-DOH hand-object detectors
11 | git clone git@github.com:ddshan/hand_object_detector.git
12 | if [ ! -d hand_object_detector ]; then
13 |     git clone https://github.com/ddshan/hand_object_detector
14 | fi
15 | # compile
16 | cd hand_object_detector/lib
17 | python setup.py build develop
18 | cd ../../
19 | 
20 | # Install 100-DOH hand-only detectors
21 | git clone git@github.com:ddshan/hand_detector.d2.git
22 | if [ ! -d hand_detector.d2 ]; then
23 |     git clone https://github.com/ddshan/hand_detector.d2.git
24 | fi
25 | mv hand_detector.d2 hand_only_detector
26 | 
27 | # downloading weights
28 | gdown https://drive.google.com/uc?id=1H2tWsZkS7tDF8q1-jdjx6V9XrK25EDbE
29 | gdown https://drive.google.com/uc?id=1OqgexNM52uxsPG3i8GuodDOJAGFsYkPg
30 | mkdir -p ../extra_data/hand_module/hand_detector
31 | mv *pth ../extra_data/hand_module/hand_detector


--------------------------------------------------------------------------------
/mocap_utils/extract_frame.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import os, sys, shutil
 4 | import os.path as osp
 5 | import subprocess as sp
 6 | import general_utils as gnu
 7 | 
 8 | 
 9 | def extract_frame(video_dir, frame_dir):
10 |     for file in os.listdir(video_dir):
11 |         if file.endswith((".mov", ".mp4")):
12 |             file_path = osp.join(video_dir, file)
13 |             file_name = file[:-4]
14 |             # if file_name != 'legao_02_01': continue
15 |             res_dir = osp.join(frame_dir, file_name)
16 |             gnu.build_dir(res_dir)
17 |             command = f"ffmpeg -i {file_path} {res_dir}/{file_name}_%05d.png"
18 |             command = command.split()
19 |             sp.run(command)
20 | 
21 | 
22 | def main():
23 |     root_dir = "./sample_data/"
24 | 
25 |     video_dir = osp.join(root_dir, "videos")
26 |     frame_dir = osp.join(root_dir, "frames")
27 |     gnu.renew_dir(frame_dir)
28 | 
29 |     extract_frame(video_dir, frame_dir)
30 | 
31 | if __name__ == '__main__':
32 |     main()


--------------------------------------------------------------------------------
/scripts/download_data_body_module.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) Facebook, Inc. and its affiliates.
 4 | 
 5 | set -ex
 6 | 
 7 | mkdir -p extra_data/body_module
 8 | cd extra_data/body_module
 9 | 
10 | echo "Downloading extra data from SPIN"
11 | wget http://visiondata.cis.upenn.edu/spin/data.tar.gz && tar -xvf data.tar.gz && rm data.tar.gz
12 | mv data data_from_spin
13 | 
14 | echo "Downloading pretrained model"
15 | mkdir -p pretrained_weights
16 | cd pretrained_weights
17 | wget https://dl.fbaipublicfiles.com/eft/2020_05_31-00_50_43-best-51.749683916568756.pt
18 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/body_module/smplx-03-28-46060-w_spin_mlc3d_46582-2089_2020_03_28-21_56_16.pt
19 | cd ..
20 | 
21 | echo "Downloading other data"
22 | wget https://dl.fbaipublicfiles.com/eft/fairmocap_data/body_module/J_regressor_extra_smplx.npy
23 | 
24 | 
25 | # if [ ! -d "sample_data" ] 
26 | # then
27 | #     echo "Downloading sample videos"
28 | #     wget https://dl.fbaipublicfiles.com/eft/sampledata_frank.tar && tar -xvf sampledata_frank.tar && rm sampledata_frank.tar && mv sampledata sample_data
29 | # else
30 | #     echo "There exists sample_data already"
31 | # fi
32 | 
33 | echo "Done"
34 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to pifuhd
 2 | We want to make contributing to this project as easy and transparent as
 3 | possible.
 4 | 
 5 | ## Pull Requests
 6 | We actively welcome your pull requests.
 7 | 
 8 | 1. Fork the repo and create your branch from `master`.
 9 | 2. If you've added code that should be tested, add tests.
10 | 3. If you've changed APIs, update the documentation.
11 | 4. Ensure the test suite passes.
12 | 5. Make sure your code lints.
13 | 6. If you haven't already, complete the Contributor License Agreement ("CLA").
14 | 
15 | ## Contributor License Agreement ("CLA")
16 | In order to accept your pull request, we need you to submit a CLA. You only need
17 | to do this once to work on any of Facebook's open source projects.
18 | 
19 | Complete your CLA here: <https://code.facebook.com/cla>
20 | 
21 | ## Issues
22 | We use GitHub issues to track public bugs. Please ensure your description is
23 | clear and has sufficient instructions to be able to reproduce the issue.
24 | 
25 | Facebook has a [bounty program](https://www.facebook.com/whitehat/) for the safe
26 | disclosure of security bugs. In those cases, please go through the process
27 | outlined on that page and do not file a public issue.
28 | 
29 | ## License
30 | By contributing to pifuhd, you agree that your contributions will be licensed
31 | under the LICENSE file in the root directory of this source tree.


--------------------------------------------------------------------------------
/docs/run_totalmocap.md:
--------------------------------------------------------------------------------
 1 | # Whole Body Motion Capture Demo (Body + Hand)
 2 | 
 3 | Our whole body motion capture is based on our [FrankMocap paper](https://penincillin.github.io/frank_mocap), by intergrating the output of body module and hand module. See our paper for details.
 4 | 
 5 | <p>
 6 |     <img src="https://github.com/jhugestar/jhugestar.github.io/blob/master/img/frankmocap_wholebody.gif" height="250">
 7 | </p>
 8 | 
 9 | ## Requirements
10 | - You should install both [body module](run_bodymocap.md) and [hand module](run_handmocap.md).
11 | 
12 | 
13 | ## A Quick Start
14 | - Run the following. The mocap output will be shown on your screen
15 | ```
16 |     # Using a machine with a monitor to show output on screen
17 |     # OpenGL renderer is used by default (--renderer_type opengl)
18 |     # The output images are also saved in ./mocap_output
19 |     python -m demo.demo_frankmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output
20 | 
21 |     # Screenless mode (e.g., a remote server)
22 |     xvfb-run -a python -m demo.demo_frankmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output
23 | 
24 |     # Set other render_type to use other renderers
25 |     python -m demo.demo_frankmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output --renderer_type pytorch3d
26 | ```
27 | 
28 | ## Run Demo with A Webcam Input
29 | - Run,
30 |     ```
31 |         python -m demo.demo_frankmocap --input_path webcam
32 | 
33 |         #or using opengl gui renderer
34 |         python -m demo.demo_frankmocap --input_path webcam --renderer_type opengl_gui
35 |     ```
36 | - See below to see how to control in opengl gui mode
37 | 
38 | ## Other Details
39 | - Other options should be the same as [body module](run_bodymocap.md). 
40 | 
41 | ## License
42 | - [CC-BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode). 
43 | See the [LICENSE](LICENSE) file. 
44 | 


--------------------------------------------------------------------------------
/mocap_utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | # All rights reserved.
 3 | #
 4 | # This source code is licensed under the license found in the
 5 | # LICENSE file in the root directory of this source tree.
 6 | ##############################################################################
 7 | #
 8 | # Based on:
 9 | # --------------------------------------------------------
10 | # Fast R-CNN
11 | # Copyright (c) 2015 Microsoft
12 | # Licensed under The MIT License [see LICENSE for details]
13 | # Written by Ross Girshick
14 | # --------------------------------------------------------
15 | 
16 | """Timing related functions."""
17 | 
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 | from __future__ import unicode_literals
22 | 
23 | import time
24 | 
25 | 
26 | class Timer(object):
27 |     """A simple timer."""
28 | 
29 |     def __init__(self):
30 |         self.reset()
31 | 
32 |     def tic(self):
33 |         # using time.time instead of time.clock because time time.clock
34 |         # does not normalize for multithreading
35 |         self.start_time = time.time()
36 | 
37 |     def toc(self, average=True, bPrint=False,title="Time"):
38 |         self.diff = time.time() - self.start_time
39 |         self.total_time += self.diff
40 |         self.calls += 1
41 |         self.average_time = self.total_time / self.calls
42 |         if average:
43 |             if bPrint:
44 |                 # print("Avg Time: {}".format(self.average_time))
45 |                 print("{}: {:0.2f} sec/frame, FPS {:0.2f}".format(title, self.diff, 1.0/self.diff))
46 | 
47 |             return self.average_time
48 |         else:
49 |             if bPrint:
50 |                 print("{}: {}, FPS {:0.2f}".format(title, self.diff , 1.0/self.diff))
51 |             return self.diff
52 | 
53 |     def reset(self):
54 |         self.total_time = 0.
55 |         self.calls = 0
56 |         self.start_time = 0.
57 |         self.diff = 0.
58 |         self.average_time = 0.
59 | 


--------------------------------------------------------------------------------
/mocap_utils/compare_results.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | """
 4 | This code is used to visually compare the results
 5 | """
 6 | import os, sys, shutil
 7 | import os.path as osp
 8 | import ry_utils
 9 | import cv2
10 | import numpy as np
11 | 
12 | def check_keywords(subdir, keywords):
13 |     if len(keywords) == 0:
14 |         return True
15 |     else:
16 |         for keyword in keywords:
17 |             if subdir.find(keyword)>=0:
18 |                 return True
19 |     return False
20 | 
21 | def main():
22 |     dir_list = [
23 |         'samples/output/body/third_view_thresh_0.3_distance_2.0',
24 |         'samples/output/body/third_view_thresh_0.5_distance_1.5',
25 |         'samples/output/body/third_view_thresh_0.7_distance_1.0',
26 |     ]
27 |     dir1 = dir_list[0]
28 | 
29 |     keywords = ['cj_dance', 'body_capture']
30 | 
31 |     res_dir = "samples/output/body/third_view_compare"
32 |     res_dir = osp.join(res_dir, '_&&_'.join(['_'.join(item.split('/')[-1:]) for item in dir_list]))
33 | 
34 |     for subdir in os.listdir(dir1):
35 |         if osp.isdir(osp.join(dir1, subdir)):
36 |             if check_keywords(subdir, keywords):
37 |                 dir_path1 = osp.join(dir1, subdir)
38 |                 for img_name in ry_utils.get_all_files(dir_path1, ('.jpg','.png'), 'name_only'):
39 |                     img_list = list()
40 |                     #print(img_name)
41 |                     for dir in dir_list:
42 |                         dir_path = dir_path1.replace(dir1, dir)
43 |                         img_path = osp.join(dir_path, img_name)
44 |                         img = cv2.imread(img_path)
45 |                         img_list.append(img)
46 |                         if img_path.find(dir1)>=0:
47 |                             res_img_path = img_path.replace(dir1, res_dir)
48 |                         #print(img_path, osp.exists(img_path))
49 |                     if any([img is None for img in img_list]):
50 |                         continue
51 |                     res_img = np.concatenate(img_list, axis=0)
52 |                     h, w = res_img.shape[:2]
53 |                     res_img = cv2.resize(res_img, (int(w*0.7), int(h*0.7)))
54 |                     res_img_path = res_img_path.replace('.png', '.jpg')
55 |                     ry_utils.make_subdir(res_img_path)
56 |                     cv2.imwrite(res_img_path, res_img)
57 |                     print(res_img_path)
58 | 
59 | 
60 | 
61 | if __name__ == '__main__':
62 |      main()
63 | 


--------------------------------------------------------------------------------
/handmocap/hand_modules/h3dw_networks.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.nn import init
 6 | import functools
 7 | import numpy as np
 8 | from . import resnet
 9 | 
10 | def weights_init(m):
11 |     classname = m.__class__.__name__
12 |     if classname.find('Conv') != -1:
13 |         m.weight.data.normal_(0.0, 0.02)
14 |         if hasattr(m.bias, 'data'):
15 |             m.bias.data.fill_(0)
16 |     elif classname.find('BatchNorm2d') != -1:
17 |         m.weight.data.normal_(1.0, 0.02)
18 |         m.bias.data.fill_(0)
19 | 
20 | 
21 | def get_norm_layer(norm_type='instance'):
22 |     if norm_type == 'batch':
23 |         norm_layer = functools.partial(nn.BatchNorm2d, affine=True)
24 |     elif norm_type == 'instance':
25 |         norm_layer = functools.partial(nn.InstanceNorm2d, affine=False)
26 |     else:
27 |         raise NotImplementedError('normalization layer [%s] is not found' % norm_type)
28 |     return norm_layer
29 | 
30 | 
31 | def print_network(net):
32 |     num_params = 0
33 |     for param in net.parameters():
34 |         num_params += param.numel()
35 |     print(net)
36 |     print('Total number of parameters: %d' % num_params)
37 | 
38 | 
39 | def get_model(arch):
40 |     if hasattr(resnet, arch):
41 |         network = getattr(resnet, arch)
42 |         return network(pretrained=True, num_classes=512)
43 |     else:
44 |         raise ValueError("Invalid Backbone Architecture")
45 | 
46 | 
47 | class H3DWEncoder(nn.Module):
48 |     def __init__(self, opt, mean_params):
49 |         super(H3DWEncoder, self).__init__()
50 |         self.two_branch = opt.two_branch
51 |         self.mean_params = mean_params.clone().cuda()
52 |         self.opt = opt
53 | 
54 |         relu = nn.ReLU(inplace=False)
55 |         fc2  = nn.Linear(1024, 1024)
56 |         regressor = nn.Linear(1024 + opt.total_params_dim, opt.total_params_dim)
57 | 
58 |         feat_encoder = [relu, fc2, relu]
59 |         regressor = [regressor, ]
60 |         self.feat_encoder = nn.Sequential(*feat_encoder)
61 |         self.regressor = nn.Sequential(*regressor)
62 | 
63 |         self.main_encoder = get_model(opt.main_encoder)
64 | 
65 | 
66 |     def forward(self, main_input):
67 |         main_feat = self.main_encoder(main_input)
68 |         feat = self.feat_encoder(main_feat)
69 | 
70 |         pred_params = self.mean_params
71 |         for i in range(3):
72 |             input_feat = torch.cat([feat, pred_params], dim=1)
73 |             output = self.regressor(input_feat)
74 |             pred_params = pred_params + output
75 |         return pred_params
76 | 


--------------------------------------------------------------------------------
/renderer/image_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | 
 6 | def draw_keypoints(image, kps, color=(0,0,255), radius=5, check_exist=False):
 7 |     # recover color 
 8 |     if color == 'red':
 9 |         color = (0, 0, 255)
10 |     elif color == 'green':
11 |         color = (0, 255, 0)
12 |     elif color == 'blue':
13 |         color = (255, 0, 0)
14 |     else:
15 |         assert isinstance(color, tuple) and len(color) == 3
16 | 
17 |     # draw keypoints
18 |     res_img = image.copy()
19 |     for i in range(kps.shape[0]):
20 |         x, y = kps[i][:2].astype(np.int32)
21 |         if check_exist:
22 |             score = kps[i][2]
23 |         else:
24 |             score = 1.0
25 |         # print(i, score)
26 |         if score > 0.0:
27 |             cv2.circle(res_img, (x,y), radius=radius, color=color, thickness=-1)
28 |     return res_img.astype(np.uint8)
29 | 
30 | 
31 | def draw_bbox(image, bbox, color=(0,0,255), thickness=3):
32 |     x0, y0 = int(bbox[0]), int(bbox[1])
33 |     x1, y1 = int(bbox[2]), int(bbox[3])
34 |     res_img = cv2.rectangle(image.copy(), (x0,y0), (x1,y1), color=color, thickness=thickness)
35 |     return res_img.astype(np.uint8)
36 | 
37 | 
38 | 
39 | def draw_raw_bbox(img, bboxes):
40 |     img = img.copy()
41 |     for bbox in bboxes:
42 |         x0, y0, w, h = bbox
43 |         bbox_xyxy = (x0, y0, x0+w, y0+h)
44 |         img = draw_bbox(img, bbox_xyxy)
45 |     return img
46 | 
47 | 
48 | def draw_body_bbox(img, body_bbox_list):
49 |     img = img.copy()
50 |     for body_bbox in body_bbox_list:
51 |         if body_bbox is not None:
52 |             x0, y0, w, h = body_bbox
53 |             img = draw_bbox(img, (x0, y0, x0+w, y0+h))
54 |     return img
55 | 
56 | 
57 | def draw_arm_pose(img, body_pose_list):
58 |     img = img.copy()
59 |     for body_pose in body_pose_list:
60 |         # left & right arm
61 |         img = draw_keypoints(
62 |             img, body_pose[6:8, :], radius=10, color=(255, 0, 0))
63 |         img = draw_keypoints(
64 |             img, body_pose[3:5, :], radius=10, color=(0, 0, 255))
65 |     return img
66 | 
67 | 
68 | def draw_hand_bbox(img, hand_bbox_list):
69 |     img = img.copy()
70 |     for hand_bboxes in hand_bbox_list:
71 |         if hand_bboxes is not None:
72 |             for key in hand_bboxes:
73 |                 bbox = hand_bboxes[key]
74 |                 if bbox is not None:
75 |                     x0, y0, w, h = bbox
76 |                     bbox_new = (x0, y0, x0+w, y0+h)
77 |                     color = (255, 0, 0) if key == 'left_hand' else (0, 255, 0)
78 |                     img = draw_bbox(img, bbox_new, color=color)
79 |     return img
80 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to make participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, sex characteristics, gender identity and expression,
 9 | level of experience, education, socio-economic status, nationality, personal
10 | appearance, race, religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 | advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 | address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 | professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies within all project spaces, and it also applies when
49 | an individual is representing the project or its community in public spaces.
50 | Examples of representing a project or community include using an official
51 | project e-mail address, posting via an official social media account, or acting
52 | as an appointed representative at an online or offline event. Representation of
53 | a project may be further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at <opensource-conduct@fb.com>. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 
75 | For answers to common questions about this code of conduct, see
76 | https://www.contributor-covenant.org/faq
77 | 


--------------------------------------------------------------------------------
/renderer/shaders/framework.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # Mario Rosasco, 2016
 3 | # adapted from framework.cpp, Copyright (C) 2010-2012 by Jason L. McKesson
 4 | # This file is licensed under the MIT License.
 5 | #
 6 | # NB: Unlike in the framework.cpp organization, the main loop is contained
 7 | # in the tutorial files, not in this framework file. Additionally, a copy of
 8 | # this module file must exist in the same directory as the tutorial files
 9 | # to be imported properly.
10 | 
11 | import os
12 | 
13 | from OpenGL.GL import *
14 | 
15 | 
16 | # Function that creates and compiles shaders according to the given type (a GL enum value) and
17 | # shader program (a file containing a GLSL program).
18 | def loadShader(shaderType, shaderFile):
19 |     # check if file exists, get full path name
20 |     strFilename = findFileOrThrow(shaderFile)
21 |     shaderData = None
22 |     with open(strFilename, 'r') as f:
23 |         shaderData = f.read()
24 | 
25 |     shader = glCreateShader(shaderType)
26 |     glShaderSource(shader, shaderData)  # note that this is a simpler function call than in C
27 | 
28 |     # This shader compilation is more explicit than the one used in
29 |     # framework.cpp, which relies on a glutil wrapper function.
30 |     # This is made explicit here mainly to decrease dependence on pyOpenGL
31 |     # utilities and wrappers, which docs caution may change in future versions.
32 |     glCompileShader(shader)
33 | 
34 |     status = glGetShaderiv(shader, GL_COMPILE_STATUS)
35 |     if status == GL_FALSE:
36 |         # Note that getting the error log is much simpler in Python than in C/C++
37 |         # and does not require explicit handling of the string buffer
38 |         strInfoLog = glGetShaderInfoLog(shader)
39 |         strShaderType = ""
40 |         if shaderType is GL_VERTEX_SHADER:
41 |             strShaderType = "vertex"
42 |         elif shaderType is GL_GEOMETRY_SHADER:
43 |             strShaderType = "geometry"
44 |         elif shaderType is GL_FRAGMENT_SHADER:
45 |             strShaderType = "fragment"
46 | 
47 |         print("Compilation failure for " + strShaderType + " shader:\n" + str(strInfoLog))
48 | 
49 |     return shader
50 | 
51 | 
52 | # Function that accepts a list of shaders, compiles them, and returns a handle to the compiled program
53 | def createProgram(shaderList):
54 |     program = glCreateProgram()
55 | 
56 |     for shader in shaderList:
57 |         glAttachShader(program, shader)
58 | 
59 |     glLinkProgram(program)
60 | 
61 |     status = glGetProgramiv(program, GL_LINK_STATUS)
62 |     if status == GL_FALSE:
63 |         # Note that getting the error log is much simpler in Python than in C/C++
64 |         # and does not require explicit handling of the string buffer
65 |         strInfoLog = glGetProgramInfoLog(program)
66 |         print("Linker failure: \n" + str(strInfoLog))
67 | 
68 |     for shader in shaderList:
69 |         glDetachShader(program, shader)
70 | 
71 |     return program
72 | 
73 | 
74 | # Helper function to locate and open the target file (passed in as a string).
75 | # Returns the full path to the file as a string.
76 | def findFileOrThrow(strBasename):
77 |     # Keep constant names in C-style convention, for readability
78 |     # when comparing to C(/C++) code.
79 |     if os.path.isfile(strBasename):
80 |         return strBasename
81 | 
82 |     LOCAL_FILE_DIR = "data" + os.sep
83 |     GLOBAL_FILE_DIR = os.path.dirname(os.path.abspath(__file__)) + os.sep + "data" + os.sep
84 | 
85 |     strFilename = LOCAL_FILE_DIR + strBasename
86 |     if os.path.isfile(strFilename):
87 |         return strFilename
88 | 
89 |     strFilename = GLOBAL_FILE_DIR + strBasename
90 |     if os.path.isfile(strFilename):
91 |         return strFilename
92 | 
93 |     raise IOError('Could not find target file ' + strBasename)
94 | 


--------------------------------------------------------------------------------
/bodymocap/constants.py:
--------------------------------------------------------------------------------
 1 | # Original code from SPIN: https://github.com/nkolot/SPIN
 2 | 
 3 | FOCAL_LENGTH = 5000.
 4 | IMG_RES = 224
 5 | 
 6 | # Mean and standard deviation for normalizing input image
 7 | IMG_NORM_MEAN = [0.485, 0.456, 0.406]
 8 | IMG_NORM_STD = [0.229, 0.224, 0.225]
 9 | 
10 | """
11 | We create a superset of joints containing the OpenPose joints together with the ones that each dataset provides.
12 | We keep a superset of 24 joints such that we include all joints from every dataset.
13 | If a dataset doesn't provide annotations for a specific joint, we simply ignore it.
14 | The joints used here are the following:
15 | """
16 | JOINT_NAMES = [
17 | 'OP Nose', 'OP Neck', 'OP RShoulder',           #0,1,2
18 | 'OP RElbow', 'OP RWrist', 'OP LShoulder',       #3,4,5
19 | 'OP LElbow', 'OP LWrist', 'OP MidHip',          #6, 7,8
20 | 'OP RHip', 'OP RKnee', 'OP RAnkle',             #9,10,11
21 | 'OP LHip', 'OP LKnee', 'OP LAnkle',             #12,13,14
22 | 'OP REye', 'OP LEye', 'OP REar',                #15,16,17
23 | 'OP LEar', 'OP LBigToe', 'OP LSmallToe',        #18,19,20
24 | 'OP LHeel', 'OP RBigToe', 'OP RSmallToe', 'OP RHeel',  #21, 22, 23, 24  ##Total 25 joints  for openpose
25 | 'Right Ankle', 'Right Knee', 'Right Hip',               #0,1,2
26 | 'Left Hip', 'Left Knee', 'Left Ankle',                  #3, 4, 5
27 | 'Right Wrist', 'Right Elbow', 'Right Shoulder',     #6
28 | 'Left Shoulder', 'Left Elbow', 'Left Wrist',            #9
29 | 'Neck (LSP)', 'Top of Head (LSP)',                      #12, 13
30 | 'Pelvis (MPII)', 'Thorax (MPII)',                       #14, 15
31 | 'Spine (H36M)', 'Jaw (H36M)',                           #16, 17
32 | 'Head (H36M)', 'Nose', 'Left Eye',                      #18, 19, 20
33 | 'Right Eye', 'Left Ear', 'Right Ear'                    #21,22,23 (Total 24 joints)
34 | ]
35 | 
36 | # Dict containing the joints in numerical order
37 | JOINT_IDS = {JOINT_NAMES[i]: i for i in range(len(JOINT_NAMES))}
38 | 
39 | # Map joints to SMPL joints
40 | JOINT_MAP = {
41 | 'OP Nose': 24, 'OP Neck': 12, 'OP RShoulder': 17,
42 | 'OP RElbow': 19, 'OP RWrist': 21, 'OP LShoulder': 16,
43 | 'OP LElbow': 18, 'OP LWrist': 20, 'OP MidHip': 0,
44 | 'OP RHip': 2, 'OP RKnee': 5, 'OP RAnkle': 8,
45 | 'OP LHip': 1, 'OP LKnee': 4, 'OP LAnkle': 7,
46 | 'OP REye': 25, 'OP LEye': 26, 'OP REar': 27,
47 | 'OP LEar': 28, 'OP LBigToe': 29, 'OP LSmallToe': 30,
48 | 'OP LHeel': 31, 'OP RBigToe': 32, 'OP RSmallToe': 33, 'OP RHeel': 34,
49 | 'Right Ankle': 8, 'Right Knee': 5, 'Right Hip': 45,
50 | 'Left Hip': 46, 'Left Knee': 4, 'Left Ankle': 7,
51 | 'Right Wrist': 21, 'Right Elbow': 19, 'Right Shoulder': 17,
52 | 'Left Shoulder': 16, 'Left Elbow': 18, 'Left Wrist': 20,
53 | 'Neck (LSP)': 47, 'Top of Head (LSP)': 48,
54 | 'Pelvis (MPII)': 49, 'Thorax (MPII)': 50,
55 | 'Spine (H36M)': 51, 'Jaw (H36M)': 52,
56 | 'Head (H36M)': 53, 'Nose': 24, 'Left Eye': 26,
57 | 'Right Eye': 25, 'Left Ear': 28, 'Right Ear': 27
58 | }
59 | 
60 | # Joint selectors
61 | # Indices to get the 14 LSP joints from the 17 H36M joints
62 | H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9]
63 | H36M_TO_J14 = H36M_TO_J17[:14]
64 | # Indices to get the 14 LSP joints from the ground truth joints
65 | J24_TO_J17 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18, 14, 16, 17]
66 | J24_TO_J14 = J24_TO_J17[:14]
67 | 
68 | # Permutation of SMPL pose parameters when flipping the shape
69 | SMPL_JOINTS_FLIP_PERM = [0, 2, 1, 3, 5, 4, 6, 8, 7, 9, 11, 10, 12, 14, 13, 15, 17, 16, 19, 18, 21, 20, 23, 22]
70 | SMPL_POSE_FLIP_PERM = []
71 | for i in SMPL_JOINTS_FLIP_PERM:
72 |     SMPL_POSE_FLIP_PERM.append(3*i)
73 |     SMPL_POSE_FLIP_PERM.append(3*i+1)
74 |     SMPL_POSE_FLIP_PERM.append(3*i+2)
75 | # Permutation indices for the 24 ground truth joints
76 | J24_FLIP_PERM = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21, 20, 23, 22]
77 | # Permutation indices for the full set of 49 joints
78 | J49_FLIP_PERM = [0, 1, 5, 6, 7, 2, 3, 4, 8, 12, 13, 14, 9, 10, 11, 16, 15, 18, 17, 22, 23, 24, 19, 20, 21]\
79 |               + [25+i for i in J24_FLIP_PERM]
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/renderer/screen_free_visualizer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | """
  4 | Renders mesh using OpenDr / Pytorch-3D for visualization.
  5 | """
  6 | 
  7 | import sys
  8 | import numpy as np
  9 | import cv2
 10 | import pdb
 11 | from .image_utils import draw_raw_bbox, draw_hand_bbox, draw_body_bbox, draw_arm_pose
 12 | 
 13 | # To use screen_free visualizer. Either OpenDR or Pytorch3D should be installed.
 14 | g_valid_visualize = False
 15 | try:
 16 |     from .od_renderer import OpendrRenderer
 17 |     g_valid_visualize = True
 18 | except ImportError:
 19 |     print("Cannot import OpenDR Renderer")
 20 | try:
 21 |     from .p3d_renderer import Pytorch3dRenderer
 22 |     g_valid_visualize = True
 23 | except ImportError:
 24 |     print("Cannot import Pytorch3D Renderer")
 25 | assert g_valid_visualize, "You should import either OpenDR or Pytorch3D"
 26 | 
 27 | class Visualizer(object):
 28 | 
 29 |     def __init__(self, renderer_backend):
 30 |         colors = {
 31 |             # colorbline/print/copy safe:
 32 |             'light_gray':  [0.9, 0.9, 0.9],
 33 |             'light_purple':  [0.8, 0.53, 0.53],
 34 |             'light_green': [166/255.0, 178/255.0, 30/255.0],
 35 |             'light_blue': [0.65098039, 0.74117647, 0.85882353],
 36 |         }
 37 | 
 38 |         self.input_size = 1920
 39 | 
 40 |         # set-up renderer
 41 |         assert renderer_backend in ['opendr', 'pytorch3d']
 42 |         if renderer_backend == 'opendr':
 43 |             self.renderer = OpendrRenderer(
 44 |                 img_size=self.input_size, 
 45 |                 mesh_color=colors['light_purple'])
 46 |         else:
 47 |             self.renderer = Pytorch3dRenderer(
 48 |                 img_size=self.input_size, 
 49 |                 mesh_color=colors['light_purple'])
 50 | 
 51 | 
 52 |     def __render_pred_verts(self, img_original, pred_mesh_list):
 53 |         assert max(img_original.shape) <= self.input_size, \
 54 |             f"Currently, we donlt support images size larger than:{self.input_size}"
 55 | 
 56 |         res_img = img_original.copy()
 57 |         rend_img = np.ones((self.input_size, self.input_size, 3))
 58 |         h, w = img_original.shape[:2]
 59 |         rend_img[:h, :w, :] = img_original
 60 | 
 61 |         for mesh in pred_mesh_list:
 62 |             verts = mesh['vertices']
 63 |             faces = mesh['faces']
 64 |             rend_img = self.renderer.render(verts, faces, rend_img)
 65 | 
 66 |         res_img = rend_img[:h, :w, :]
 67 |         return res_img
 68 | 
 69 | 
 70 |     def visualize(self, 
 71 |         input_img, 
 72 |         hand_bbox_list = None, 
 73 |         body_bbox_list = None,
 74 |         body_pose_list = None,
 75 |         raw_hand_bboxes = None,
 76 |         pred_mesh_list = None,
 77 |         vis_raw_hand_bbox = True,
 78 |         vis_body_pose = True,
 79 |         vis_hand_bbox = True,
 80 |     ):
 81 |         # init
 82 |         res_img = input_img.copy()
 83 | 
 84 |         # draw raw hand bboxes
 85 |         if raw_hand_bboxes is not None and vis_raw_hand_bbox:
 86 |             res_img = draw_raw_bbox(input_img, raw_hand_bboxes)
 87 |             # res_img = np.concatenate((res_img, raw_bbox_img), axis=1)
 88 | 
 89 |         # draw 2D Pose
 90 |         if body_pose_list is not None and vis_body_pose:
 91 |             res_img = draw_arm_pose(res_img, body_pose_list)
 92 | 
 93 |         # draw body bbox
 94 |         if body_bbox_list is not None:
 95 |             body_bbox_img = draw_body_bbox(input_img, body_bbox_list)
 96 |             res_img = body_bbox_img
 97 | 
 98 |         # draw hand bbox
 99 |         if hand_bbox_list is not None:
100 |             res_img = draw_hand_bbox(res_img, hand_bbox_list)
101 |         
102 |         # render predicted meshes
103 |         if pred_mesh_list is not None:
104 |             rend_img = self.__render_pred_verts(input_img, pred_mesh_list)
105 |             res_img = np.concatenate((res_img, rend_img), axis=1)
106 |             # res_img = rend_img
107 |         
108 |         return res_img


--------------------------------------------------------------------------------
/renderer/meshRenderer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import numpy as np
 4 | from OpenGL.GLUT import *
 5 | from OpenGL.GLU import *
 6 | from renderer.shaders.framework import *
 7 | 
 8 | from renderer.glRenderer import glRenderer
 9 | 
10 | # from renderer.render_utils import ComputeNormal
11 | 
12 | _glut_window = None
13 | 
14 | '''
15 | #Usage:
16 |     render.set_smpl_mesh(v)     #v for vertex locations in(6890,3)
17 |     render.setBackgroundTexture(rawImg) #Optional BG texture
18 |     render.setWindowSize(rawImg.shape[1], rawImg.shape[0])      #Optional: window size
19 |     render.show_once()
20 | '''
21 | 
22 | class meshRenderer(glRenderer):
23 | 
24 |     def __init__(self, width=1600, height=1200, name='GL Renderer',
25 |                 #  program_files=['renderer/shaders/simple140.fs', 'renderer/shaders/simple140.vs'],
26 |                 #  program_files=['renderer/shaders/normal140.fs', 'renderer/shaders/normal140.vs'],
27 |                 # program_files=['renderer/shaders/geo140.fs', 'renderer/shaders/geo140.vs'],
28 |                 render_mode ="normal",  #color, geo, normal
29 |                 color_size=1, ms_rate=1):
30 | 
31 |         self.render_mode = render_mode
32 |         self.program_files ={}
33 |         self.program_files['color'] = ['renderer/shaders/simple140.fs', 'renderer/shaders/simple140.vs']
34 |         self.program_files['normal'] = ['renderer/shaders/normal140.fs', 'renderer/shaders/normal140.vs']
35 |         self.program_files['geo'] = ['renderer/shaders/colorgeo140.fs', 'renderer/shaders/colorgeo140.vs']
36 | 
37 |         glRenderer.__init__(self, width, height, name, self.program_files[render_mode], color_size, ms_rate)
38 | 
39 |     def setRenderMode(self, render_mode):
40 |         """
41 |         Set render mode among ['color', 'normal', 'geo']
42 |         """
43 |         if self.render_mode == render_mode:
44 |             return
45 |         
46 |         self.render_mode = render_mode
47 |         self.initShaderProgram(self.program_files[render_mode])
48 | 
49 | 
50 |     def drawMesh(self):
51 | 
52 |         if self.vertex_dim is None:
53 |             return
54 |         # self.draw_init()
55 | 
56 |         glColor3f(1,1,0)
57 |         glUseProgram(self.program)
58 |         
59 |         mvMat = glGetFloatv(GL_MODELVIEW_MATRIX)
60 |         pMat = glGetFloatv(GL_PROJECTION_MATRIX)
61 |         # mvpMat = pMat*mvMat
62 | 
63 |         self.model_view_matrix = mvMat
64 |         self.projection_matrix = pMat
65 | 
66 |         # glUniformMatrix4fv(self.model_mat_unif, 1, GL_FALSE, self.model_view_matrix.transpose())
67 |         # glUniformMatrix4fv(self.persp_mat_unif, 1, GL_FALSE, self.projection_matrix.transpose())
68 |         glUniformMatrix4fv(self.model_mat_unif, 1, GL_FALSE, self.model_view_matrix)
69 |         glUniformMatrix4fv(self.persp_mat_unif, 1, GL_FALSE, self.projection_matrix)
70 | 
71 |         # Handle vertex buffer
72 |         glBindBuffer(GL_ARRAY_BUFFER, self.vertex_buffer)
73 |         glEnableVertexAttribArray(0)
74 |         glVertexAttribPointer(0, self.vertex_dim, GL_DOUBLE, GL_FALSE, 0, None)
75 | 
76 |         # # Handle normal buffer
77 |         glBindBuffer(GL_ARRAY_BUFFER, self.normal_buffer)
78 |         glEnableVertexAttribArray(1)
79 |         glVertexAttribPointer(1, 3, GL_DOUBLE, GL_FALSE, 0, None)
80 | 
81 |         # # Handle color buffer
82 |         glBindBuffer(GL_ARRAY_BUFFER, self.color_buffer)
83 |         glEnableVertexAttribArray(2)
84 |         glVertexAttribPointer(2, 3, GL_DOUBLE, GL_FALSE, 0, None)
85 |         
86 | 
87 |         if True:#self.meshindex_data:
88 |             glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, self.index_buffer)           #Note "GL_ELEMENT_ARRAY_BUFFER" instead of GL_ARRAY_BUFFER
89 |             glBufferData(GL_ELEMENT_ARRAY_BUFFER, self.meshindex_data, GL_STATIC_DRAW)
90 | 
91 |         # glDrawArrays(GL_TRIANGLES, 0, self.n_vertices)
92 |         glDrawElements(GL_TRIANGLES, len(self.meshindex_data), GL_UNSIGNED_INT, None)       #For index array (mesh face data)
93 |         glDisableVertexAttribArray(0)
94 |         glBindBuffer(GL_ARRAY_BUFFER, 0)
95 | 
96 |         glUseProgram(0)


--------------------------------------------------------------------------------
/renderer/render_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import numpy as np
  4 | 
  5 | 
  6 | # vertices: frames x meshVerNum x 3
  7 | # trifaces: facePolygonNum x 3 = 22800 x 3
  8 | def ComputeNormal(vertices, trifaces):
  9 | 
 10 |     if vertices.shape[0] > 5000:
 11 |         print('ComputeNormal: Warning: too big to compute {0}'.format(vertices.shape) )
 12 |         return
 13 | 
 14 |     #compute vertex Normals for all frames
 15 |     U = vertices[:,trifaces[:,1],:] - vertices[:,trifaces[:,0],:]  #frames x faceNum x 3
 16 |     V = vertices[:,trifaces[:,2],:] - vertices[:,trifaces[:,1],:]  #frames x faceNum x 3
 17 |     originalShape = U.shape  #remember: frames x faceNum x 3
 18 | 
 19 |     U = np.reshape(U, [-1,3])
 20 |     V = np.reshape(V, [-1,3])
 21 |     faceNormals = np.cross(U,V)     #frames x 13776 x 3
 22 |     from sklearn.preprocessing import normalize
 23 | 
 24 |     if np.isnan(np.max(faceNormals)):
 25 |         print('ComputeNormal: Warning nan is detected {0}')
 26 |         return
 27 |     faceNormals = normalize(faceNormals)
 28 | 
 29 |     faceNormals = np.reshape(faceNormals, originalShape)
 30 | 
 31 |     if False:        #Slow version
 32 |         vertex_normals = np.zeros(vertices.shape) #(frames x 11510) x 3
 33 |         for fIdx, vIdx in enumerate(trifaces[:,0]):
 34 |             vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:]
 35 |         for fIdx, vIdx in enumerate(trifaces[:,1]):
 36 |             vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:]
 37 |         for fIdx, vIdx in enumerate(trifaces[:,2]):
 38 |             vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:]
 39 |     else:   #Faster version
 40 |         # Computing vertex normals, much faster (and obscure) replacement
 41 |         index = np.vstack((np.ravel(trifaces), np.repeat(np.arange(len(trifaces)), 3))).T
 42 |         index_sorted = index[index[:,0].argsort()]
 43 |         vertex_normals = np.add.reduceat(faceNormals[:,index_sorted[:, 1],:][0],
 44 |             np.concatenate(([0], np.cumsum(np.unique(index_sorted[:, 0],
 45 |             return_counts=True)[1])[:-1])))[None, :]
 46 |         vertex_normals = vertex_normals.astype(np.float64)
 47 | 
 48 |     originalShape = vertex_normals.shape
 49 |     vertex_normals = np.reshape(vertex_normals, [-1,3])
 50 |     vertex_normals = normalize(vertex_normals)
 51 |     vertex_normals = np.reshape(vertex_normals,originalShape)
 52 | 
 53 |     return vertex_normals
 54 | 
 55 | 
 56 | 
 57 | def ComputeNormal_gpu(vertices, trifaces):
 58 |     import torch
 59 |     import torch.nn.functional as F
 60 | 
 61 |     if vertices.shape[0] > 5000:
 62 |         print('ComputeNormal: Warning: too big to compute {0}'.format(vertices.shape) )
 63 |         return
 64 | 
 65 |     #compute vertex Normals for all frames
 66 |     #trifaces_cuda = torch.from_numpy(trifaces.astype(np.long)).cuda()
 67 |     vertices_cuda = torch.from_numpy(vertices.astype(np.float32)).cuda()
 68 | 
 69 |     U_cuda = vertices_cuda[:,trifaces[:,1],:] - vertices_cuda[:,trifaces[:,0],:]  #frames x faceNum x 3
 70 |     V_cuda = vertices_cuda[:,trifaces[:,2],:] - vertices_cuda[:,trifaces[:,1],:]  #frames x faceNum x 3
 71 |     originalShape = list(U_cuda.size())  #remember: frames x faceNum x 3
 72 | 
 73 |     U_cuda = torch.reshape(U_cuda, [-1,3])#.astype(np.float32)
 74 |     V_cuda = torch.reshape(V_cuda, [-1,3])#.astype(np.float32)
 75 | 
 76 |     faceNormals = U_cuda.cross(V_cuda)
 77 |     faceNormals = F.normalize(faceNormals,dim=1)
 78 | 
 79 |     faceNormals = torch.reshape(faceNormals, originalShape)
 80 | 
 81 |     # trifaces has duplicated vertex index, so cannot be parallazied
 82 |     # vertex_normals = torch.zeros(vertices.shape,dtype=torch.float32).cuda() #(frames x 11510) x 3
 83 |     # for fIdx, vIdx in enumerate(trifaces[:,0]):
 84 |     #    vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:]
 85 |     # for fIdx, vIdx in enumerate(trifaces[:,1]):
 86 |     #     vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:]
 87 |     # for fIdx, vIdx in enumerate(trifaces[:,2]):
 88 |     #     vertex_normals[:,vIdx,:] += faceNormals[:,fIdx,:]
 89 | 
 90 |     # Computing vertex normals, much faster (and obscure) replacement
 91 |     index = np.vstack((np.ravel(trifaces), np.repeat(np.arange(len(trifaces)), 3))).T
 92 |     index_sorted = index[index[:,0].argsort()]
 93 |     vertex_normals = np.add.reduceat(faceNormals[:,index_sorted[:, 1],:][0],
 94 |         np.concatenate(([0], np.cumsum(np.unique(index_sorted[:, 0],
 95 |         return_counts=True)[1])[:-1])))[None, :]
 96 |     vertex_normals = torch.from_numpy(vertex_normals).float().cuda()
 97 | 
 98 |     vertex_normals = F.normalize(vertex_normals,dim=2)
 99 |     vertex_normals = vertex_normals.data.cpu().numpy()  #(batch, chunksize, dim)
100 | 
101 |     return vertex_normals
102 | 


--------------------------------------------------------------------------------
/demo/demo_options.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | import argparse
 4 | 
 5 | class DemoOptions():
 6 | 
 7 |     def __init__(self):
 8 |         parser = argparse.ArgumentParser()
 9 |         
10 |         # parser.add_argument('--checkpoint', required=False, default=default_checkpoint, help='Path to pretrained checkpoint')
11 |         default_checkpoint_body_smpl ='./extra_data/body_module/pretrained_weights/2020_05_31-00_50_43-best-51.749683916568756.pt'
12 |         parser.add_argument('--checkpoint_body_smpl', required=False, default=default_checkpoint_body_smpl, help='Path to pretrained checkpoint')
13 |         default_checkpoint_body_smplx ='./extra_data/body_module/pretrained_weights/smplx-03-28-46060-w_spin_mlc3d_46582-2089_2020_03_28-21_56_16.pt'
14 |         parser.add_argument('--checkpoint_body_smplx', required=False, default=default_checkpoint_body_smplx, help='Path to pretrained checkpoint')
15 |         default_checkpoint_hand = "./extra_data/hand_module/pretrained_weights/pose_shape_best.pth"
16 |         parser.add_argument('--checkpoint_hand', required=False, default=default_checkpoint_hand, help='Path to pretrained checkpoint')
17 | 
18 |         # input options
19 |         parser.add_argument('--input_path', type=str, default=None, help="""Path of video, image, or a folder where image files exists""")
20 |         parser.add_argument('--start_frame', type=int, default=0, help='given a sequence of frames, set the starting frame')
21 |         parser.add_argument('--end_frame', type=int, default=float('inf'), help='given a sequence of frames, set the last frame')
22 |         parser.add_argument('--pkl_dir', type=str, help='Path of storing pkl files that store the predicted results')
23 |         parser.add_argument('--openpose_dir', type=str, help='Directory of storing the prediction of openpose prediction')
24 | 
25 |         # output options
26 |         parser.add_argument('--out_dir', type=str, default=None, help='Folder of output images.')
27 |         # parser.add_argument('--pklout', action='store_true', help='Export mocap output as pkl file')
28 |         parser.add_argument('--save_bbox_output', action='store_true', help='Save the bboxes in json files (bbox_xywh format)')
29 |         parser.add_argument('--save_pred_pkl', action='store_true', help='Save the predictions (bboxes, params, meshes in pkl format')
30 |         parser.add_argument("--save_mesh", action='store_true', help="Save the predicted vertices and faces")
31 |         parser.add_argument("--save_frame", action='store_true', help='Save the extracted frames from video input or webcam')
32 | 
33 |         # Other options
34 |         parser.add_argument('--single_person', action='store_true', help='Reconstruct only one person in the scene with the biggest bbox')
35 |         parser.add_argument('--no_display', action='store_true', help='Do not visualize output on the screen')
36 |         parser.add_argument('--no_video_out', action='store_true', help='Do not merge rendered frames to video (ffmpeg)')
37 |         parser.add_argument('--smpl_dir', type=str, default='./extra_data/smpl/', help='Folder where smpl files are located.')
38 |         parser.add_argument('--skip', action='store_true', help='Skip there exist already processed outputs')
39 |         parser.add_argument('--video_url', type=str, default=None, help='URL of YouTube video, or image.')
40 |         parser.add_argument('--download', '-d', action='store_true', help='Download YouTube video first (in webvideo folder), and process it')
41 | 
42 |         # Body mocap specific options
43 |         parser.add_argument('--use_smplx', action='store_true', help='Use SMPLX model for body mocap')
44 | 
45 |         # Hand mocap specific options
46 |         parser.add_argument('--view_type', type=str, default='third_view', choices=['third_view', 'ego_centric'],
47 |             help = "The view type of input. It could be ego-centric (such as epic kitchen) or third view")
48 |         parser.add_argument('--crop_type', type=str, default='no_crop', choices=['hand_crop', 'no_crop'],
49 |             help = """ 'hand_crop' means the hand are central cropped in input. (left hand should be flipped to right). 
50 |                         'no_crop' means hand detection is required to obtain hand bbox""")
51 |         
52 |         # Whole motion capture (FrankMocap) specific options
53 |         parser.add_argument('--frankmocap_fast_mode', action='store_true', help="Use fast hand detection mode for whole body motion capture (frankmocap)")
54 | 
55 |         # renderer
56 |         parser.add_argument("--renderer_type", type=str, default="opengl", 
57 |             choices=['pytorch3d', 'opendr', 'opengl_gui', 'opengl'], help="type of renderer to use")
58 | 
59 |         self.parser = parser
60 |     
61 | 
62 |     def parse(self):
63 |         self.opt = self.parser.parse_args()
64 |         return self.opt
65 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FrankMocap: A Strong and Easy-to-use Single View 3D Hand+Body Pose Estimator
  2 | 
  3 | FrankMocap pursues an easy-to-use single view 3D motion capture system developed by Facebook AI Research (FAIR). FrankMocap provides state-of-the-art 3D pose estimation outputs for body, hand, and body+hands in a single system. The core objective of FrankMocap is to democratize the 3D human pose estimation technology, enabling anyone (researchers, engineers, developers, artists, and others) can easily obtain 3D motion capture outputs from videos and images.
  4 | 
  5 | <b>Btw, why the name FrankMocap? </b> Our pipeline to integrate body and hand modules reminds us of [Frankenstein's monster](https://en.wikipedia.org/wiki/Frankenstein)!
  6 | 
  7 | ### News:
  8 |   - [2021/08/18] Our paper has been accepted to ICCV Workshop 2021.
  9 |   - [2020/10/09] We have improved openGL rendering speed. It's about 40% faster. (e.g., body module: 6fps -> 11fps)
 10 | 
 11 | ## Key Features
 12 | - Body Motion Capture:
 13 | <p>
 14 |     <img src="https://github.com/jhugestar/jhugestar.github.io/blob/master/img/eft_bodymocap.gif" height="200">
 15 | </p>
 16 | 
 17 | - Hand Motion Capture
 18 | <p>
 19 |     <img src="https://github.com/jhugestar/jhugestar.github.io/blob/master/img/frankmocap_hand.gif" height="200">
 20 | </p>
 21 | 
 22 | - Egocentric Hand Motion Capture
 23 | <p>
 24 |     <img src="https://github.com/jhugestar/jhugestar.github.io/blob/master/img/frankmotion_egohand.gif" height="150">
 25 | </p>
 26 | 
 27 | - Whole body Motion Capture (body + hands)
 28 | <p>
 29 |     <img src="https://github.com/jhugestar/jhugestar.github.io/blob/master/img/frankmocap_wholebody.gif" height="200">
 30 | </p>
 31 | <p>
 32 |     <img src="https://penincillin.github.io/project/frankmocap_iccvw2021/video_02.gif" height="200">
 33 | </p>
 34 | 
 35 | 
 36 | ## Installation
 37 | - See [INSTALL.md](docs/INSTALL.md)
 38 | 
 39 | ## A Quick Start
 40 | - Run body motion capture
 41 |   ```
 42 |   # using a machine with a monitor to show output on screen
 43 |   python -m demo.demo_bodymocap --input_path ./sample_data/han_short.mp4 --out_dir ./mocap_output
 44 |   
 45 |   # screenless mode (e.g., a remote server)
 46 |   xvfb-run -a python -m demo.demo_bodymocap --input_path ./sample_data/han_short.mp4 --out_dir ./mocap_output
 47 |   ```
 48 | 
 49 | - Run hand motion capture
 50 |   ```
 51 |   # using a machine with a monitor to show outputs on screen
 52 |   python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output
 53 |   
 54 |   # screenless mode  (e.g., a remote server)
 55 |   xvfb-run -a python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output
 56 |   ```
 57 | 
 58 | - Run whole body motion capture
 59 |   ```
 60 |   # using a machine with a monitor to show outputs on screen
 61 |   python -m demo.demo_frankmocap --input_path ./sample_data/han_short.mp4 --out_dir ./mocap_output
 62 | 
 63 |   # screenless mode  (e.g., a remote server)
 64 |   xvfb-run -a python -m demo.demo_frankmocap --input_path ./sample_data/han_short.mp4 --out_dir ./mocap_output
 65 |   ```
 66 | - Note: 
 67 |   - Above commands use openGL by default. If it does not work, you may try alternative renderers (pytorch3d or openDR). 
 68 |   - See the readme of each module for details
 69 |   
 70 |   
 71 | ## Joint Order
 72 | - See [joint_order](docs/joint_order.md)
 73 | 
 74 | 
 75 | ## Body Motion Capture Module
 76 | - See [run_bodymocap](docs/run_bodymocap.md)
 77 | 
 78 | ## Hand Motion Capture Module
 79 | - See [run_handmocap](docs/run_handmocap.md)
 80 | 
 81 | ## Whole Body Motion Capture Module (Body + Hand)
 82 | - See [run_totalmocap](docs/run_totalmocap.md)
 83 | 
 84 | ## License
 85 | - [CC-BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode). 
 86 | See the [LICENSE](LICENSE) file. 
 87 | 
 88 | ## References
 89 | - FrankMocap is based on the following research outputs:
 90 | ```
 91 | @InProceedings{rong2021frankmocap,
 92 |   title={FrankMocap: A Monocular 3D Whole-Body Pose Estimation System via Regression and Integration},
 93 |   author={Rong, Yu and Shiratori, Takaaki and Joo, Hanbyul},
 94 |   booktitle={IEEE International Conference on Computer Vision Workshops},
 95 |   year={2021}
 96 | }
 97 | 
 98 | @article{joo2020eft,
 99 |   title={Exemplar Fine-Tuning for 3D Human Pose Fitting Towards In-the-Wild 3D Human Pose Estimation},
100 |   author={Joo, Hanbyul and Neverova, Natalia and Vedaldi, Andrea},
101 |   journal={3DV},
102 |   year={2021}
103 | }
104 | ```
105 | 
106 | - FrankMocap leverages many amazing open-sources shared in research community.
107 |     - [SMPL](https://smpl.is.tue.mpg.de/), [SMPLX](https://smpl-x.is.tue.mpg.de/) 
108 |     - [Detectron2](https://github.com/facebookresearch/detectron2)       
109 |     - [Pytorch3D](https://pytorch3d.org/) (for rendering)
110 |     - [OpenDR](https://github.com/mattloper/opendr/wiki) (for rendering)
111 |     - [SPIN](https://github.com/nkolot/SPIN) (for body module)
112 |     - [100DOH](https://fouheylab.eecs.umich.edu/~dandans/projects/100DOH/) (for hand detection)
113 |     - [lightweight-human-pose-estimation](https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch) (for body detection)
114 | 
115 | 


--------------------------------------------------------------------------------
/docs/INSTALL.md:
--------------------------------------------------------------------------------
  1 | # Installation
  2 | 
  3 | ## Installing All Modules
  4 | 
  5 | - The entire modules can be installed following the instruction below.
  6 |   Note that you may want to install body module only which has fewer dependencies. In this case, you may skip some steps. See below the details. 
  7 | 
  8 | - The basic installation
  9 |   ```
 10 |   conda create -n venv_frankmocap python=3.7
 11 |   conda activate venv_frankmocap
 12 | 
 13 |   # Install basic dependencies
 14 |   sudo apt-get install libglu1-mesa libxi-dev libxmu-dev libglu1-mesa-dev freeglut3-dev libosmesa6-dev
 15 | 
 16 |   # Install ffmpeg
 17 |   sudo apt-get install ffmpeg 
 18 | 
 19 |   # Install cuda 
 20 |   # Choose versions based on your system. For example:
 21 |   # conda install cudatoolkit=10.1 cudnn=7.6.0
 22 | 
 23 |   # Install pytorch and torchvision 
 24 |   conda install -c pytorch pytorch==1.6.0 torchvision cudatoolkit=10.1
 25 | 
 26 |   # Install other required libraries
 27 |   pip install -r docs/requirements.txt
 28 |   ```
 29 | 
 30 | - Install [Detectron2](https://github.com/facebookresearch/detectron2) (for hand module)
 31 |   - This is required for hand motion capture. You can skip this if you need only body module
 32 |   - If you followed the versions mentioned above (pytorch 1.6.0, CUDA 10.1, on Linux), you may try the following:
 33 |   ```
 34 |     python -m pip install detectron2 -f \
 35 |     https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html
 36 |   ```
 37 |   - If it doesn't work, follow the instruction of [Detectron2](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md)
 38 |   
 39 | - Install pytorch3d (optional, for pytorch3d renderering)
 40 |   - We use pytorch3d for an alternative rendering option. We provide other options (opengl by default) and you may skip this process.
 41 |   - You may try the following (pytorch 1.6.0, on Linux and Mac).
 42 |     ```
 43 |     pip install pytorch3d
 44 |     ```
 45 |   - If it doesn't work, follow the instruction of [Pytorch3D](https://github.com/facebookresearch/pytorch3d/blob/master/INSTALL.md)
 46 | 
 47 | - Install other third-party libraries + download pretrained models and sample data
 48 |   - Run the following script
 49 |   ```
 50 |   sh scripts/install_frankmocap.sh
 51 |   ```
 52 | 
 53 | - Setting SMPL/SMPL-X Models
 54 |     - We use SMPL and SMPL-X model as 3D pose estimation output. You have to download them from the original website.
 55 |     - Download SMPL Model (Neutral model: basicModel_neutral_lbs_10_207_0_v1.0.0.pkl):    
 56 |         - Download in the original [website](http://smplify.is.tue.mpg.de/login). You need to register to download the SMPL data.
 57 |         - Put the file in: ./extra_data/smpl/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl
 58 |         - This is only for body module
 59 | 
 60 |     - Download SMPLX Model (Neutral model: SMPLX_NEUTRAL.pkl):
 61 |         - You can use SMPL-X model for body mocap instead of SMPL model. 
 62 |         - Download ```SMPLX_NEUTRAL.pkl``` in the original [SMPL website](https://smpl-x.is.tue.mpg.de/). You need to register to download the SMPLX data.
 63 |         - Put the ```SMPLX_NEUTRAL.pkl`` file in: ./extra_data/smpl/SMPLX_NEUTRAL.pkl
 64 |         - This is for hand module and whole body module
 65 | 
 66 | ## Folder hierarchy
 67 | - Once you sucessfully installed and downloaded all, you should have the following files in your directory:
 68 |     ```
 69 |     ./extra_data/
 70 |     ├── hand_module
 71 |     │   └── mean_mano_params.pkl
 72 |     │   └── SMPLX_HAND_INFO.pkl
 73 |     |   └── pretrained_weights
 74 |     |   |   └── pose_shape_best.pth
 75 |     │   └── hand_detector
 76 |     │       └── faster_rcnn_1_8_132028.pth  
 77 |     │       └── model_0529999.pth
 78 |     ├── body_module
 79 |     |   └──body_pose_estimator
 80 |     |       └── checkpoint_iter_370000.pth     
 81 |     └── smpl
 82 |         └── basicModel_neutral_lbs_10_207_0_v1.0.0.pkl
 83 |         └── SMPLX_NEUTRAL.pkl
 84 |         
 85 |     ./detectors/
 86 |     ├── body_pose_estimator
 87 |     ├── hand_object_detector
 88 |     └── hand_only_detector
 89 |     ```
 90 | 
 91 | ## Installing Body Module Only
 92 | 
 93 | - The basic installation
 94 |     ```
 95 |     conda create -n venv_frankmocap python=3.7
 96 |     conda activate venv_frankmocap
 97 | 
 98 |     # Install cuda 
 99 |     # Choose versions based on your system. For example:
100 |     # conda install cudatoolkit=10.1 cudnn=7.6.0
101 | 
102 |     # Install pytorch and torchvision 
103 |     conda install -c pytorch pytorch==1.6.0 torchvision cudatoolkit=10.1
104 | 
105 |     # Install other required libraries
106 |     pip install -r docs/requirements.txt
107 |     ```
108 | 
109 | - Install pytorch3d (optional, for pytorch3d renderering)
110 |     - We use pytorch3d for an alternative rendering option. We provide other options (opengl by default) and you may skip this process.
111 |     - You may try the following (pytorch 1.6.0, on Linux and Mac).
112 |         ```
113 |         pip install pytorch3d
114 |         ```
115 |     - If it doesn't work, follow the instruction of [Pytorch3D](https://github.com/facebookresearch/pytorch3d/blob/master/INSTALL.md)
116 | 
117 | - Install 2D pose detector and download pretrained models and sample data
118 |     - Install [2D keypoint detector](https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch): 
119 |     ```
120 |     sh scripts/install_pose2d.sh
121 |     ```
122 |     - Download pretrained model and other extra data
123 |     ```
124 |     sh scripts/download_data_body_module.sh
125 |     ```
126 |     - Download sample data
127 |     ```
128 |     sh scripts/download_sample_data.sh
129 |     ```
130 | - Setting SMPL/SMPL-X Models
131 |     - You only need SMPL model. See above
132 | 


--------------------------------------------------------------------------------
/mocap_utils/general_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | # file to store some often use functions
  4 | import os, sys, shutil
  5 | import os.path as osp
  6 | import multiprocessing as mp
  7 | import numpy as np
  8 | import cv2
  9 | import pickle
 10 | import json
 11 | 
 12 | 
 13 | def save_mesh_to_obj(obj_path, verts, faces=None):
 14 |     assert isinstance(verts, np.ndarray)
 15 |     assert isinstance(faces, np.ndarray)
 16 | 
 17 |     with open(obj_path, 'w') as out_f:
 18 |         # write verts
 19 |         for v in verts:
 20 |             out_f.write(f"v {v[0]:.4f} {v[1]:.4f} {v[2]:.4f}\n")
 21 |         # write faces 
 22 |         if faces is not None:
 23 |             faces = faces.copy() + 1
 24 |             for f in faces:
 25 |                 out_f.write(f"f {f[0]} {f[1]} {f[2]}\n")
 26 | 
 27 | 
 28 | def renew_dir(target_dir):
 29 |     if osp.exists(target_dir):
 30 |         shutil.rmtree(target_dir)
 31 |     os.makedirs(target_dir)
 32 | 
 33 | 
 34 | def build_dir(target_dir):
 35 |     if not osp.exists(target_dir):
 36 |         os.makedirs(target_dir)
 37 | 
 38 | 
 39 | def get_subdir(in_path):
 40 |     subdir_path = '/'.join(in_path.split('/')[:-1])
 41 |     return subdir_path
 42 | 
 43 | def make_subdir(in_path):
 44 |     subdir_path = get_subdir(in_path)
 45 |     build_dir(subdir_path)
 46 | 
 47 | 
 48 | def update_extension(file_path, new_extension):
 49 |     assert new_extension[0] == '.'
 50 |     old_extension = '.' + file_path.split('.')[-1]
 51 |     new_file_path = file_path.replace(old_extension, new_extension)
 52 |     return new_file_path
 53 | 
 54 | 
 55 | def get_all_files(in_dir, extension, path_type='full', keywords=''):
 56 |     assert path_type in ['full', 'relative', 'name_only']
 57 |     assert isinstance(extension, str) or isinstance(extension, tuple)
 58 |     assert isinstance(keywords, str)
 59 | 
 60 |     all_files = list()
 61 |     for subdir, dirs, files in os.walk(in_dir):
 62 |         for file in files:
 63 |             if len(keywords)>0:
 64 |                 if file.find(keywords)<0: 
 65 |                     continue
 66 |             if file.endswith(extension):
 67 |                 if path_type == 'full':
 68 |                     file_path = osp.join(subdir, file)
 69 |                 elif path_type == 'relative':
 70 |                     file_path = osp.join(subdir, file).replace(in_dir, '')
 71 |                     if file_path.startswith('/'):
 72 |                         file_path = file_path[1:]
 73 |                 else:
 74 |                     file_path = file
 75 |                 all_files.append(file_path)
 76 |     return sorted(all_files)
 77 | 
 78 | 
 79 | def remove_swp(in_dir):
 80 |     remove_files = list()
 81 |     for subdir, dirs, files in os.walk(in_dir):
 82 |         for file in files:
 83 |             if file.endswith('.swp'):
 84 |                 full_path = osp.join(subdir,file)
 85 |                 os.remove(full_path)
 86 | 
 87 | 
 88 | def remove_pyc(in_dir):
 89 |     remove_files = list()
 90 |     for subdir, dirs, files in os.walk(in_dir):
 91 |         for file in files:
 92 |             if file.endswith('.pyc'):
 93 |                 full_path = osp.join(subdir,file)
 94 |                 os.remove(full_path)
 95 | 
 96 | 
 97 | def md5sum(file_path):
 98 |     import hashlib
 99 |     hash_md5 = hashlib.md5()
100 |     with open(file_path, 'rb') as in_f:
101 |         hash_md5.update(in_f.read())
102 |     return hash_md5.hexdigest()
103 | 
104 | 
105 | # save data to pkl
106 | def save_pkl(res_file, data_list, protocol=-1):
107 |     assert res_file.endswith(".pkl")
108 |     res_file_dir = '/'.join(res_file.split('/')[:-1])
109 |     if len(res_file_dir)>0:
110 |         if not osp.exists(res_file_dir):
111 |             os.makedirs(res_file_dir)
112 |     with open(res_file, 'wb') as out_f:
113 |         if protocol==2:
114 |             pickle.dump(data_list, out_f, protocol=2)
115 |         else:
116 |             pickle.dump(data_list, out_f)
117 | 
118 | 
119 | def load_pkl(pkl_file, res_list=None):
120 |     assert pkl_file.endswith(".pkl")
121 |     with open(pkl_file, 'rb') as in_f:
122 |         try:
123 |             data = pickle.load(in_f)
124 |         except UnicodeDecodeError:
125 |             in_f.seek(0)
126 |             data = pickle.load(in_f, encoding='latin1')
127 |     return data
128 | 
129 | 
130 | def load_json(in_file):
131 |     assert in_file.endswith(".json")
132 |     with open(in_file, 'r') as in_f:
133 |         all_data = json.load(in_f)
134 |         return all_data
135 | 
136 | 
137 | def save_json(out_file, data):
138 |     assert out_file.endswith(".json")
139 |     with open(out_file, "w") as out_f:
140 |         json.dump(data, out_f)
141 | 
142 | 
143 | def load_npz(npz_file):
144 |     res_data = dict()
145 |     assert npz_file.endswith(".npz")
146 |     raw_data = np.load(npz_file, mmap_mode='r')
147 |     for key in raw_data.files:
148 |         res_data[key] = raw_data[key]
149 |     return res_data
150 | 
151 | 
152 | def update_npz_file(npz_file, new_key, new_data):
153 |     # load original data
154 |     assert npz_file.endswith(".npz")
155 |     raw_data = np.load(npz_file, mmap_mode='r')
156 |     all_data = dict()
157 |     for key in raw_data.files:
158 |         all_data[key] = raw_data[key]
159 |     # add new data && save
160 |     all_data[new_key] = new_data
161 |     np.savez(npz_file, **all_data)
162 | 
163 | 
164 | def analyze_path(input_path):
165 |     # assume input_path is the path of a file not a directory
166 |     record = input_path.split('/')
167 |     input_dir = '/'.join(record[:-1])
168 |     file_name = record[-1]
169 |     assert file_name.find(".")>0
170 |     ext = file_name.split('.')[-1]
171 |     file_basename = '.'.join(file_name.split('.')[:-1])
172 |     return input_dir, file_name, file_basename, ext


--------------------------------------------------------------------------------
/bodymocap/body_bbox_detector.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import os
  4 | import os.path as osp
  5 | import sys
  6 | import numpy as np
  7 | import cv2
  8 | 
  9 | import torch
 10 | import torchvision.transforms as transforms
 11 | # from PIL import Image
 12 | 
 13 | # Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/blob/master/demo.py
 14 | 
 15 | # 2D body pose estimator
 16 | pose2d_estimator_path = './detectors/body_pose_estimator'
 17 | sys.path.append(pose2d_estimator_path)
 18 | from detectors.body_pose_estimator.pose2d_models.with_mobilenet import PoseEstimationWithMobileNet
 19 | from detectors.body_pose_estimator.modules.load_state import load_state
 20 | from detectors.body_pose_estimator.val import normalize, pad_width
 21 | from detectors.body_pose_estimator.modules.pose import Pose, track_poses
 22 | from detectors.body_pose_estimator.modules.keypoints import extract_keypoints, group_keypoints
 23 | 
 24 | 
 25 | class BodyPoseEstimator(object):
 26 |     """
 27 |     Hand Detector for third-view input.
 28 |     It combines a body pose estimator (https://github.com/jhugestar/lightweight-human-pose-estimation.pytorch.git)
 29 |     """
 30 |     def __init__(self):
 31 |         print("Loading Body Pose Estimator")
 32 |         self.__load_body_estimator()
 33 |     
 34 | 
 35 |     def __load_body_estimator(self):
 36 |         net = PoseEstimationWithMobileNet()
 37 |         pose2d_checkpoint = "./extra_data/body_module/body_pose_estimator/checkpoint_iter_370000.pth"
 38 |         checkpoint = torch.load(pose2d_checkpoint, map_location='cpu')
 39 |         load_state(net, checkpoint)
 40 |         net = net.eval()
 41 |         net = net.cuda()
 42 |         self.model = net
 43 |     
 44 | 
 45 |     #Code from https://github.com/Daniil-Osokin/lightweight-human-pose-estimation.pytorch/demo.py
 46 |     def __infer_fast(self, img, input_height_size, stride, upsample_ratio, 
 47 |         cpu=False, pad_value=(0, 0, 0), img_mean=(128, 128, 128), img_scale=1/256):
 48 |         height, width, _ = img.shape
 49 |         scale = input_height_size / height
 50 | 
 51 |         scaled_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
 52 |         scaled_img = normalize(scaled_img, img_mean, img_scale)
 53 |         min_dims = [input_height_size, max(scaled_img.shape[1], input_height_size)]
 54 |         padded_img, pad = pad_width(scaled_img, stride, pad_value, min_dims)
 55 | 
 56 |         tensor_img = torch.from_numpy(padded_img).permute(2, 0, 1).unsqueeze(0).float()
 57 |         if not cpu:
 58 |             tensor_img = tensor_img.cuda()
 59 | 
 60 |         stages_output = self.model(tensor_img)
 61 | 
 62 |         stage2_heatmaps = stages_output[-2]
 63 |         heatmaps = np.transpose(stage2_heatmaps.squeeze().cpu().data.numpy(), (1, 2, 0))
 64 |         heatmaps = cv2.resize(heatmaps, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)
 65 | 
 66 |         stage2_pafs = stages_output[-1]
 67 |         pafs = np.transpose(stage2_pafs.squeeze().cpu().data.numpy(), (1, 2, 0))
 68 |         pafs = cv2.resize(pafs, (0, 0), fx=upsample_ratio, fy=upsample_ratio, interpolation=cv2.INTER_CUBIC)
 69 | 
 70 |         return heatmaps, pafs, scale, pad
 71 |     
 72 |     def detect_body_pose(self, img):
 73 |         """
 74 |         Output:
 75 |             current_bbox: BBOX_XYWH
 76 |         """
 77 |         stride = 8
 78 |         upsample_ratio = 4
 79 |         orig_img = img.copy()
 80 | 
 81 |         # forward
 82 |         heatmaps, pafs, scale, pad = self.__infer_fast(img, 
 83 |             input_height_size=256, stride=stride, upsample_ratio=upsample_ratio)
 84 | 
 85 |         total_keypoints_num = 0
 86 |         all_keypoints_by_type = []
 87 |         num_keypoints = Pose.num_kpts
 88 |         for kpt_idx in range(num_keypoints):  # 19th for bg
 89 |             total_keypoints_num += extract_keypoints(heatmaps[:, :, kpt_idx], all_keypoints_by_type, total_keypoints_num)
 90 |         
 91 |         pose_entries, all_keypoints = group_keypoints(all_keypoints_by_type, pafs, demo=True)
 92 |         for kpt_id in range(all_keypoints.shape[0]):
 93 |             all_keypoints[kpt_id, 0] = (all_keypoints[kpt_id, 0] * stride / upsample_ratio - pad[1]) / scale
 94 |             all_keypoints[kpt_id, 1] = (all_keypoints[kpt_id, 1] * stride / upsample_ratio - pad[0]) / scale
 95 |         
 96 |         '''
 97 |         # print(len(pose_entries))
 98 |         if len(pose_entries)>1:
 99 |             pose_entries = pose_entries[:1]
100 |             print("We only support one person currently")
101 |             # assert len(pose_entries) == 1, "We only support one person currently"
102 |         '''
103 | 
104 |         current_poses, current_bbox = list(), list()
105 |         for n in range(len(pose_entries)):
106 |             if len(pose_entries[n]) == 0:
107 |                 continue
108 |             pose_keypoints = np.ones((num_keypoints, 2), dtype=np.int32) * -1
109 |             for kpt_id in range(num_keypoints):
110 |                 if pose_entries[n][kpt_id] != -1.0:  # keypoint was found
111 |                     pose_keypoints[kpt_id, 0] = int(all_keypoints[int(pose_entries[n][kpt_id]), 0])
112 |                     pose_keypoints[kpt_id, 1] = int(all_keypoints[int(pose_entries[n][kpt_id]), 1])
113 |             pose = Pose(pose_keypoints, pose_entries[n][18]) 
114 |             current_poses.append(pose.keypoints)
115 |             current_bbox.append(np.array(pose.bbox))
116 | 
117 |         # enlarge the bbox
118 |         for i, bbox in enumerate(current_bbox):
119 |             x, y, w, h = bbox
120 |             margin = 0.05
121 |             x_margin = int(w * margin)
122 |             y_margin = int(h * margin)
123 |             x0 = max(x-x_margin, 0)
124 |             y0 = max(y-y_margin, 0)
125 |             x1 = min(x+w+x_margin, orig_img.shape[1])
126 |             y1 = min(y+h+y_margin, orig_img.shape[0])
127 |             current_bbox[i] = np.array((x0, y0, x1-x0, y1-y0)).astype(np.int32)
128 | 
129 |         return current_poses, current_bbox


--------------------------------------------------------------------------------
/docs/joint_order.md:
--------------------------------------------------------------------------------
  1 | # Joint Order (Position & Rotation)
  2 | 
  3 | ## Attention !!!
  4 | The orders of joint position and joint angle are different. The details are listed below.
  5 | 
  6 | ## Hand Joint
  7 | ### Joint Position (Hand)
  8 | 
  9 | The joint positions are  converted to image space (X,Y coordinates are aligned to image, Z coordinates are rescaled accordingly.)  
 10 | 
 11 | To obtain predicted 3D hand joint position, you can use [pred_joints_img](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/handmocap/hand_mocap_api.py#L222) in hand-only demo or 
 12 | [pred_lhand_joints_img](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L186) and [pred_rhand_joints_img](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L192) in body-plus-hand demo.  
 13 | 
 14 | The order of hand joint position is visualized below:
 15 | 
 16 | <p>
 17 |     <img src="https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/.github/media/keypoints_hand.png" height="500">
 18 | </p>
 19 | 
 20 | 
 21 | The order of hand joint (position) is listed below:
 22 | ```
 23 | 0 : Wrist
 24 | 1 : Thumb_00
 25 | 2 : Thumb_01
 26 | 3 : Thumb_02
 27 | 4 : Thumb_03
 28 | 5 : Index_00
 29 | 6 : Index_01
 30 | 7 : Index_02
 31 | 8 : Index_03
 32 | 9 : Middle_00
 33 | 10 : Middle_01
 34 | 11 : Middle_02
 35 | 12 : Middle_03
 36 | 13 : Ring_00
 37 | 14 : Ring_01
 38 | 15 : Ring_02
 39 | 16 : Ring_03
 40 | 17 : Little_00
 41 | 18 : Little_01
 42 | 19 : Little_02
 43 | 20 : Little_03
 44 | ```
 45 | 
 46 | ### Joint Angle (Hand)
 47 | To obtain predicted 3D hand joint angles (in [angle-axis format](https://en.wikipedia.org/wiki/Axis%E2%80%93angle_representation)), you can use [pred_hand_pose](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/handmocap/hand_mocap_api.py#L197) in hand-only demo or [pred_left_hand_pose](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L234) [pred_right_hand_pose](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L235) in body-plus-hand demo.  
 48 | 
 49 | The axis of joint angle is depicted below (right-hand rule):
 50 | <p>
 51 |     <img src="https://penincillin.github.io/project/frankmocap_iccvw2021/axis.png" height="300">
 52 | </p>
 53 | 
 54 | 
 55 | If the dimension of ```hand_pose``` is 45 (15 * 3), then the joint starts from ```Index_00```; otherwise the dimension should be 48 (16 * 3) and the joint starts from wrist (or say, hand global orientation).  
 56 | 
 57 | The order of hand joint (angle) is listed below:
 58 | ```
 59 | 0 : Wrist
 60 | 1 : Index_00
 61 | 2 : Index_01
 62 | 3 : Index_02
 63 | 4 : Middle_00
 64 | 5 : Middle_01
 65 | 6 : Middle_02
 66 | 7 : Little_00
 67 | 8 : Little_01
 68 | 9 : Little_02
 69 | 10 : Ring_00
 70 | 11 : Ring_01
 71 | 12 : Ring_02
 72 | 13 : Thumb_00
 73 | 14 : Thumb_01
 74 | 15 : Thumb_02
 75 | ```
 76 | 
 77 | 
 78 | ## Body Joint
 79 | ### Joint Position (Body)
 80 | 
 81 | The joint positions are  converted to image space (X,Y coordinates are aligned to image, Z coordinates are rescaled accordingly.)  
 82 | 
 83 | To obtain predicted 3D body joint position, you can use [pred_joints_img](https://github.com/facebookresearch/frankmocap/blob/44f4f6718a45baf0836c9785f02ea1d74f6f5774/bodymocap/body_mocap_api.py#L112) in body-only demo or 
 84 | [pred_body_joints_img](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L179) in body-plus-hand demo.  
 85 | 
 86 | The order of body joint (position) is listed below:
 87 | ```
 88 | 0: OP_Nose
 89 | 1: OP_Neck
 90 | 2: OP_R_Shoulder
 91 | 3: OP_R_Elblow
 92 | 4: OP_R_Wrist
 93 | 5: OP_L_Shoulder
 94 | 6: OP_L_Elbow
 95 | 7: OP_L_Wrist
 96 | 8: OP_Middle_Hip
 97 | 9: OP_R_Hip
 98 | 10: OP_R_Knee
 99 | 11: OP_R_Ankle
100 | 12: OP_L_Hip
101 | 13: OP_L_Knee
102 | 14: OP_L_Ankle
103 | 15: OP_R_Eye
104 | 16: OP_L_Eye
105 | 17: OP R_Ear
106 | 18: OP_L_Ear
107 | 19: OP_L_Big_Toe
108 | 20: OP_L_Small_Toe
109 | 21: OP_L_Heel
110 | 22: OP_R_Big_Toe
111 | 23: OP_R_Small_Toe
112 | 24: OP_R_Heel
113 | 25: R_Ankle
114 | 26: R_Knee
115 | 27: R_Hip
116 | 28: L_Hip
117 | 29: L_Knee
118 | 30: L_Ankle
119 | 31: R_Wrist
120 | 32: R_Elbow
121 | 33: R_Shoulder
122 | 34: L_Shoulder
123 | 35: L_Elbow
124 | 36: L_Wrist
125 | 37: Neck (LSP)
126 | 38: Top of Head (LSP)
127 | 39: Pelvis (MPII)
128 | 40: Thorax (MPII)
129 | 41: Spine (H36M)
130 | 42: Jaw (H36M)
131 | 43: Head (H36M)
132 | 44: Nose
133 | 45: L_Eye
134 | 46: R_Eye
135 | 47: L_Ear
136 | 48: R_Ear
137 | ```
138 | 
139 | ### Joint Angle (Body)
140 | To obtain predicted 3D body joint angles (in [angle-axis format](https://en.wikipedia.org/wiki/Axis%E2%80%93angle_representation)), you can use [pred_body_pose](https://github.com/facebookresearch/frankmocap/blob/44f4f6718a45baf0836c9785f02ea1d74f6f5774/bodymocap/body_mocap_api.py#L115) in body-only demo or [pred_body_pose](https://github.com/facebookresearch/frankmocap/blob/60584337f81795b1b9fe4f4da5ffe273f6f1266a/integration/copy_and_paste.py#L164) in body-plus-hand demo.  
141 | 
142 | The dimesion should be 72 (24 * 3).  It is worth noting that if SMPL-X is used for body module, then the 22-th and 23-th body joint angles are invalid, we keep it for the consistent format with SMPL.
143 | 
144 | The order of body joint (angle) is listed below:
145 | ```
146 | 0: Global
147 | 1: L_Hip
148 | 2: R_Hip
149 | 3: Spine_01
150 | 4: L_Knee
151 | 5: R_Knee
152 | 6: Spine_02
153 | 7: L_Ankle
154 | 8: R_Ankle
155 | 9: Spine_03
156 | 10: L_Toe
157 | 11: R_Toe
158 | 12: Neck
159 | 13: L_Collar
160 | 14: R_Collar
161 | 15: Head
162 | 16: L_Shoulder
163 | 17: R_Shoulder
164 | 18: L_Elbow
165 | 19: R_Elbow
166 | 20: L_Wrist
167 | 21: R_Wrist
168 | 22: L_Palm (Invalid for SMPL-X)
169 | 23: R_Palm (Invalid for SMPL-X)
170 | ```
171 | 
172 | The skeleton of SMPL body is depicted below, for SMPL-X body, the 22-th and 23-th body joint are invalid:
173 | <p>
174 |     <img src="https://penincillin.github.io/project/frankmocap_iccvw2021/body_skeleton.png" height="500">
175 | </p>
176 | 
177 | 
178 | 
179 | ## License
180 | - [CC-BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode). 
181 | See the [LICENSE](LICENSE) file.


--------------------------------------------------------------------------------
/handmocap/hand_modules/base_options.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates.
 2 | 
 3 | # Part of the code from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix
 4 | 
 5 | import argparse
 6 | import os
 7 | import os.path as osp
 8 | import torch
 9 | 
10 | class BaseOptions():
11 |     def __init__(self):
12 |         self.parser = argparse.ArgumentParser()
13 |         self.initialized = False
14 | 
15 |     def initialize(self):
16 |         self.parser.add_argument('--dist', action='store_true', help='whether to use distributed training')
17 |         self.parser.add_argument('--local_rank', type=int, default=0)
18 |         self.parser.add_argument('--batchSize', type=int, default=128, help='input batch size')
19 |         self.parser.add_argument('--inputSize', type=int, default=224, help='then crop to this size')
20 |         self.parser.add_argument('--input_nc', type=int, default=3, help='# of input image channels')
21 |         self.parser.add_argument('--gpu_ids', type=str, default='0,1', help='gpu ids: e.g. 0  0,1,2, 0,2. use -1 for CPU')
22 |         self.parser.add_argument('--name', type=str, default='h3dw', help='name of the experiment. It decides where to store samples and models')
23 |         self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data')
24 |         self.parser.add_argument('--checkpoints_dir', type=str, default='/home/hjoo/dropbox/hand_yu/checkpoints', help='models are saved here')
25 |         self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly')
26 |         self.parser.add_argument('--display_winsize', type=int, default=256,  help='display window size')
27 |         self.parser.add_argument('--display_id', type=int, default=1, help='window id of the web display')
28 |         self.parser.add_argument('--display_port', type=int, default=80, help='visdom port of the web display')
29 | 
30 |         self.parser.add_argument('--data_root', type=str, default='', help='root dir for all the datasets')
31 |         self.parser.add_argument('--freihand_anno_path', type=str, default='', help='annotation_path that stores the information of freihand dataset')
32 |         self.parser.add_argument('--ho3d_anno_path', type=str, default='', help='annotation_path that stores the information of HO3D dataset')
33 |         self.parser.add_argument('--mtc_anno_path', type=str, default='', help='annotation_path that stores the information of MTC (Panoptic 3D) dataset')
34 |         self.parser.add_argument('--stb_anno_path', type=str, default='', help='annotation_path that stores the information of  STB dataset')
35 |         self.parser.add_argument('--rhd_anno_path', type=str, default='', help='annotation_path that stores the information of RHD dataset')
36 |         self.parser.add_argument('--frl_anno_path', type=str, default='', help='annotation_path that stores the information of FRL dataset')
37 |         self.parser.add_argument('--ganerated_anno_path', type=str, default='', help='annotation_path that stores the information of GANerated dataset')
38 |         self.parser.add_argument('--demo_img_dir', type=str, default='', help='image root of demo dataset')
39 |         self.parser.add_argument('--wild_img_dir', type=str, default='', help='image root of in-the-wild dataset (in-the-wild means without any annotation, only image)')
40 | 
41 |         self.parser.add_argument('--num_joints', type=int, default=21, help='number of keypoints')
42 |         self.parser.add_argument('--total_params_dim', type=int, default=61, help='number of params to be estimated')
43 |         self.parser.add_argument('--cam_params_dim', type=int, default=3, help='number of params to be estimated')
44 |         self.parser.add_argument('--pose_params_dim', type=int, default=48, help='number of params to be estimated')
45 |         self.parser.add_argument('--shape_params_dim', type=int, default=10, help='number of params to be estimated')
46 | 
47 |         self.parser.add_argument('--model_root', type=str, default='./extra_data', help='root dir for all the pretrained weights and pre-defined models')
48 |         self.parser.add_argument('--smplx_model_file', type=str, default='./extra_data/smpl/SMPLX_NEUTRAL.pkl', help='path of pretraind smpl model')
49 |         self.parser.add_argument('--smplx_hand_info_file', type=str, default='hand_module/SMPLX_HAND_INFO.pkl', help='path of smpl face')
50 |         self.parser.add_argument('--mean_param_file', type=str, default='hand_module/mean_mano_params.pkl', help='path of smpl face')
51 | 
52 |         self.parser.add_argument('--single_branch', action='store_true', help='use only one branch, this branch could either be IUV or other format such as image')
53 |         self.parser.add_argument('--two_branch', action='store_true', help='two branch input, image and another auxiliary branch, the auxiliary branch is IUV in default')
54 |         self.parser.add_argument('--aux_as_main', action='store_true', help='use aux as input instead of image')
55 |         self.parser.add_argument('--main_encoder', type=str, default='resnet50', help='selects model to use for major input, it is usually image')
56 |         self.parser.add_argument('--aux_encoder', type=str, default='resnet18', help='selects model to use for auxiliary input, it could be IUV') 
57 | 
58 |         self.parser.add_argument('--resize_or_crop', type=str, default='resize_and_crop', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]')
59 |         self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data augmentation')
60 |         self.parser.add_argument('--use_hand_rotation', action='store_true', help='if specified, use ground truth hand rotation in training')
61 |         self.parser.add_argument('--top_finger_joints_type', type=str, default='ave', help="use which kind of top finger joints")
62 |         self.initialized = True
63 |         self.initialized = True
64 | 
65 | 
66 |     def parse(self, args=None):
67 |         if not self.initialized:
68 |             self.initialize()
69 | 
70 |         if args is None:
71 |             self.opt = self.parser.parse_args()
72 |         else:
73 |             self.opt = self.parser.parse_args(args)
74 |         # self.opt, unknown = self.parser.parse_known_args()
75 |         self.opt.isTrain = self.isTrain   # train or test
76 | 
77 |         return self.opt
78 | 


--------------------------------------------------------------------------------
/bodymocap/models/hmr.py:
--------------------------------------------------------------------------------
  1 | # Original code from SPIN: https://github.com/nkolot/SPIN
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torchvision.models.resnet as resnet
  6 | import numpy as np
  7 | import math
  8 | 
  9 | import sys
 10 | from bodymocap.utils.geometry import rot6d_to_rotmat
 11 | 
 12 | class Bottleneck(nn.Module):
 13 |     """ Redefinition of Bottleneck residual block
 14 |         Adapted from the official PyTorch implementation
 15 |     """
 16 |     expansion = 4
 17 | 
 18 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 19 |         super(Bottleneck, self).__init__()
 20 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 21 |         self.bn1 = nn.BatchNorm2d(planes)
 22 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 23 |                                padding=1, bias=False)
 24 |         self.bn2 = nn.BatchNorm2d(planes)
 25 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
 26 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 27 |         self.relu = nn.ReLU(inplace=True)
 28 |         self.downsample = downsample
 29 |         self.stride = stride
 30 | 
 31 |     def forward(self, x):
 32 |         residual = x
 33 | 
 34 |         out = self.conv1(x)
 35 |         out = self.bn1(out)
 36 |         out = self.relu(out)
 37 | 
 38 |         out = self.conv2(out)
 39 |         out = self.bn2(out)
 40 |         out = self.relu(out)
 41 | 
 42 |         out = self.conv3(out)
 43 |         out = self.bn3(out)
 44 | 
 45 |         if self.downsample is not None:
 46 |             residual = self.downsample(x)
 47 | 
 48 |         out += residual
 49 |         out = self.relu(out)
 50 | 
 51 |         return out
 52 | 
 53 | class HMR(nn.Module):
 54 |     """ SMPL Iterative Regressor with ResNet50 backbone
 55 |     """
 56 | 
 57 |     def __init__(self, block, layers, smpl_mean_params):
 58 |         self.inplanes = 64
 59 |         super(HMR, self).__init__()
 60 |         npose = 24 * 6
 61 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 62 |                                bias=False)
 63 |         self.bn1 = nn.BatchNorm2d(64)
 64 |         self.relu = nn.ReLU(inplace=True)
 65 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 66 |         self.layer1 = self._make_layer(block, 64, layers[0])
 67 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 68 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 69 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 70 |         self.avgpool = nn.AvgPool2d(7, stride=1)
 71 |         self.fc1 = nn.Linear(512 * block.expansion + npose + 13, 1024)
 72 |         self.drop1 = nn.Dropout()
 73 |         self.fc2 = nn.Linear(1024, 1024)
 74 |         self.drop2 = nn.Dropout()
 75 |         self.decpose = nn.Linear(1024, npose)
 76 |         self.decshape = nn.Linear(1024, 10)
 77 |         self.deccam = nn.Linear(1024, 3)
 78 |         nn.init.xavier_uniform_(self.decpose.weight, gain=0.01)
 79 |         nn.init.xavier_uniform_(self.decshape.weight, gain=0.01)
 80 |         nn.init.xavier_uniform_(self.deccam.weight, gain=0.01)
 81 | 
 82 |         for m in self.modules():
 83 |             if isinstance(m, nn.Conv2d):
 84 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 85 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 86 |             elif isinstance(m, nn.BatchNorm2d):
 87 |                 m.weight.data.fill_(1)
 88 |                 m.bias.data.zero_()
 89 | 
 90 |         mean_params = np.load(smpl_mean_params)
 91 |         init_pose = torch.from_numpy(mean_params['pose'][:]).unsqueeze(0)
 92 |         init_shape = torch.from_numpy(mean_params['shape'][:].astype('float32')).unsqueeze(0)
 93 |         init_cam = torch.from_numpy(mean_params['cam']).unsqueeze(0)
 94 |         self.register_buffer('init_pose', init_pose)
 95 |         self.register_buffer('init_shape', init_shape)
 96 |         self.register_buffer('init_cam', init_cam)
 97 | 
 98 | 
 99 |     def _make_layer(self, block, planes, blocks, stride=1):
100 |         downsample = None
101 |         if stride != 1 or self.inplanes != planes * block.expansion:
102 |             downsample = nn.Sequential(
103 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
104 |                           kernel_size=1, stride=stride, bias=False),
105 |                 nn.BatchNorm2d(planes * block.expansion),
106 |             )
107 | 
108 |         layers = []
109 |         layers.append(block(self.inplanes, planes, stride, downsample))
110 |         self.inplanes = planes * block.expansion
111 |         for i in range(1, blocks):
112 |             layers.append(block(self.inplanes, planes))
113 | 
114 |         return nn.Sequential(*layers)
115 | 
116 | 
117 |     def forward(self, x, init_pose=None, init_shape=None, init_cam=None, n_iter=3):
118 | 
119 |         batch_size = x.shape[0]
120 | 
121 |         if init_pose is None:
122 |             init_pose = self.init_pose.expand(batch_size, -1)
123 |         if init_shape is None:
124 |             init_shape = self.init_shape.expand(batch_size, -1)
125 |         if init_cam is None:
126 |             init_cam = self.init_cam.expand(batch_size, -1)
127 | 
128 |         x = self.conv1(x)
129 |         x = self.bn1(x)
130 |         x = self.relu(x)
131 |         x = self.maxpool(x)
132 | 
133 |         x1 = self.layer1(x)
134 |         x2 = self.layer2(x1)
135 |         x3 = self.layer3(x2)
136 |         x4 = self.layer4(x3)
137 | 
138 |         xf = self.avgpool(x4)
139 |         xf = xf.view(xf.size(0), -1)
140 | 
141 |         pred_pose = init_pose
142 |         pred_shape = init_shape
143 |         pred_cam = init_cam
144 |         for i in range(n_iter):
145 |             xc = torch.cat([xf, pred_pose, pred_shape, pred_cam],1)
146 |             xc = self.fc1(xc)
147 |             xc = self.drop1(xc)
148 |             xc = self.fc2(xc)
149 |             xc = self.drop2(xc)
150 |             pred_pose = self.decpose(xc) + pred_pose
151 |             pred_shape = self.decshape(xc) + pred_shape
152 |             pred_cam = self.deccam(xc) + pred_cam
153 |         
154 |         pred_rotmat = rot6d_to_rotmat(pred_pose).view(batch_size, 24, 3, 3)
155 | 
156 |         return pred_rotmat, pred_shape, pred_cam
157 | 
158 | def hmr(smpl_mean_params, pretrained=True, **kwargs):
159 |     """ Constructs an HMR model with ResNet50 backbone.
160 |     Args:
161 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
162 |     """
163 |     model = HMR(Bottleneck, [3, 4, 6, 3],  smpl_mean_params, **kwargs)
164 |     if pretrained:
165 |         resnet_imagenet = resnet.resnet50(pretrained=True)
166 |         model.load_state_dict(resnet_imagenet.state_dict(),strict=False)
167 |     return model
168 | 
169 | 


--------------------------------------------------------------------------------
/mocap_utils/geometry_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import os, sys, shutil
  4 | import os.path as osp
  5 | # sys.path.append("/")
  6 | import numpy as np
  7 | import torch
  8 | from torch.nn import functional as F
  9 | import cv2
 10 | import numpy.matlib as npm
 11 | import mocap_utils.geometry_utils_torch as gut
 12 | 
 13 | 
 14 | def flip_hand_pose(pose):
 15 |     pose = pose.copy()
 16 |     if len(pose.shape) == 1:
 17 |         pose = pose.reshape(-1, 3)
 18 |         pose[:, 1] *= -1
 19 |         pose[:, 2] *= -1
 20 |         return pose.reshape(-1,)
 21 |     else:
 22 |         assert len(pose.shape) == 2
 23 |         pose[:, 1] *= -1
 24 |         pose[:, 2] *= -1
 25 |         return pose
 26 | 
 27 | 
 28 | def flip_hand_joints_3d(joints_3d):
 29 |     assert joints_3d.shape[1] == 3
 30 |     assert len(joints_3d.shape) == 2
 31 |     rot_mat = np.diag([-1, 1, 1])
 32 |     return np.matmul(rot_mat, joints_3d.T).T
 33 | 
 34 | 
 35 | def __quaternion_to_angle_axis_torch(quat):
 36 |     quat = quat.clone()
 37 |     if quat.dim() == 1:
 38 |         assert quat.size(0) == 4
 39 |         quat = quat.view(1, 4)
 40 |         angle_axis = gut.quaternion_to_angle_axis(quat)[0]
 41 |     elif quat.dim() == 2:
 42 |         assert quat.size(1) == 4
 43 |         angle_axis = gut.quaternion_to_angle_axis(quat)
 44 |     else:
 45 |         assert quat.dim() == 3
 46 |         dim0 = quat.size(0)
 47 |         dim1 = quat.size(1)
 48 |         assert quat.size(2) == 4
 49 |         quat = quat.view(dim0*dim1, 4)
 50 |         angle_axis = gut.quaternion_to_angle_axis(quat)
 51 |         angle_axis = angle_axis.view(dim0, dim1, 3)
 52 |     return angle_axis
 53 | 
 54 | 
 55 | def quaternion_to_angle_axis(quaternion):
 56 |     quat = quaternion
 57 |     if isinstance(quat, torch.Tensor):
 58 |         return __quaternion_to_angle_axis_torch(quaternion)
 59 |     else:
 60 |         assert isinstance(quat, np.ndarray)
 61 |         quat_torch = torch.from_numpy(quat)
 62 |         angle_axis_torch = __quaternion_to_angle_axis_torch(quat_torch)
 63 |         return angle_axis_torch.numpy()
 64 | 
 65 | 
 66 | def __angle_axis_to_quaternion_torch(aa):
 67 |     aa = aa.clone()
 68 |     if aa.dim() == 1:
 69 |         assert aa.size(0) == 3 
 70 |         aa = aa.view(1, 3)
 71 |         quat = gut.angle_axis_to_quaternion(aa)[0]
 72 |     elif aa.dim() == 2:
 73 |         assert aa.size(1) == 3
 74 |         quat = gut.angle_axis_to_quaternion(aa)
 75 |     else:
 76 |         assert aa.dim() == 3
 77 |         dim0 = aa.size(0)
 78 |         dim1 = aa.size(1)
 79 |         assert aa.size(2) == 3
 80 |         aa = aa.view(dim0*dim1, 3)
 81 |         quat = gut.angle_axis_to_quaternion(aa)
 82 |         quat = quat.view(dim0, dim1, 4)
 83 |     return quat
 84 | 
 85 | 
 86 | def angle_axis_to_quaternion(angle_axis):
 87 |     aa = angle_axis
 88 |     if isinstance(aa, torch.Tensor):
 89 |         return __angle_axis_to_quaternion_torch(aa)
 90 |     else:
 91 |         assert isinstance(aa, np.ndarray)
 92 |         aa_torch = torch.from_numpy(aa)
 93 |         quat_torch = __angle_axis_to_quaternion_torch(aa_torch)
 94 |         return quat_torch.numpy()
 95 | 
 96 | 
 97 | def __angle_axis_to_rotation_matrix_torch(aa):
 98 |     aa = aa.clone()
 99 |     if aa.dim() == 1:
100 |         assert aa.size(0) ==3 
101 |         aa = aa.view(1, 3)
102 |         rotmat = gut.angle_axis_to_rotation_matrix(aa)[0][:3, :3]
103 |     elif aa.dim() == 2:
104 |         assert aa.size(1) == 3
105 |         rotmat = gut.angle_axis_to_rotation_matrix(aa)[:, :3, :3]
106 |     else:
107 |         assert aa.dim() == 3
108 |         dim0 = aa.size(0)
109 |         dim1 = aa.size(1)
110 |         assert aa.size(2) == 3
111 |         aa = aa.view(dim0*dim1, 3)
112 |         rotmat = gut.angle_axis_to_rotation_matrix(aa)
113 |         rotmat = rotmat.view(dim0, dim1, 4, 4)
114 |         rotmat = rotmat[:, :, :3, :3]
115 |     return rotmat
116 | 
117 | 
118 | def angle_axis_to_rotation_matrix(angle_axis):
119 |     aa = angle_axis
120 |     if isinstance(aa, torch.Tensor):
121 |         return __angle_axis_to_rotation_matrix_torch(aa)
122 |     else:
123 |         assert isinstance(aa, np.ndarray)
124 |         aa_torch = torch.from_numpy(aa)
125 |         rotmat_torch = __angle_axis_to_rotation_matrix_torch(aa_torch)
126 |         return rotmat_torch.numpy()
127 | 
128 | 
129 | def __rotation_matrix_to_angle_axis_torch(rotmat):
130 |     rotmat = rotmat.clone()
131 |     if rotmat.dim() == 2:
132 |         assert rotmat.size(0) == 3
133 |         assert rotmat.size(1) == 3
134 |         rotmat0 = torch.zeros((1, 3, 4))
135 |         rotmat0[0, :, :3] = rotmat
136 |         rotmat0[:, 2, 3] = 1.0
137 |         aa = gut.rotation_matrix_to_angle_axis(rotmat0)[0]
138 |     elif rotmat.dim() == 3:
139 |         dim0 = rotmat.size(0)
140 |         assert rotmat.size(1) == 3
141 |         assert rotmat.size(2) == 3
142 |         rotmat0 = torch.zeros((dim0, 3, 4))
143 |         rotmat0[:, :, :3] = rotmat
144 |         rotmat0[:, 2, 3] = 1.0
145 |         aa = gut.rotation_matrix_to_angle_axis(rotmat0)
146 |     else:
147 |         assert rotmat.dim() == 4
148 |         dim0 = rotmat.size(0)
149 |         dim1 = rotmat.size(1)
150 |         assert rotmat.size(2) == 3
151 |         assert rotmat.size(3) == 3
152 |         rotmat0 = torch.zeros((dim0*dim1, 3, 4))
153 |         rotmat0[:, :, :3] = rotmat.view(dim0*dim1, 3, 3)
154 |         rotmat0[:, 2, 3] = 1.0
155 |         aa = gut.rotation_matrix_to_angle_axis(rotmat0)
156 |         aa = aa.view(dim0, dim1, 3)
157 |     return aa
158 | 
159 | 
160 | def rotation_matrix_to_angle_axis(rotmat):
161 |     if isinstance(rotmat, torch.Tensor):
162 |         return __rotation_matrix_to_angle_axis_torch(rotmat)
163 |     else:
164 |         assert isinstance(rotmat, np.ndarray)
165 |         rotmat_torch = torch.from_numpy(rotmat)
166 |         aa_torch = __rotation_matrix_to_angle_axis_torch(rotmat_torch)
167 |         return aa_torch.numpy()
168 |     
169 | 
170 | def rot6d_to_rotmat(x):
171 |     """Convert 6D rotation representation to 3x3 rotation matrix.
172 |     Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019
173 |     Input:
174 |         (B,6) Batch of 6-D rotation representations
175 |     Output:
176 |         (B,3,3) Batch of corresponding rotation matrices
177 |     """
178 |     assert isinstance(x, torch.Tensor), "Current version only supports torch.tensor"
179 | 
180 |     x = x.view(-1,3,2)
181 |     a1 = x[:, :, 0]
182 |     a2 = x[:, :, 1]
183 |     b1 = F.normalize(a1)
184 |     b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1)
185 |     b3 = torch.cross(b1, b2)
186 |     return torch.stack((b1, b2, b3), dim=-1)
187 | 
188 | 
189 | def angle_axis_to_rot6d(aa):
190 |     assert aa.dim() == 2
191 |     assert aa.size(1) == 3
192 |     bs = aa.size(0)
193 | 
194 |     rotmat = angle_axis_to_rotation_matrix(aa)
195 |     rot6d = rotmat[:, :3, :2]
196 |     return rot6d


--------------------------------------------------------------------------------
/renderer/od_renderer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | """
  4 | Renders mesh using OpenDr / Pytorch-3d for visualization.
  5 | Part of code is modified from https://github.com/akanazawa/hmr
  6 | """
  7 | 
  8 | import sys
  9 | import numpy as np
 10 | import cv2
 11 | import pdb
 12 | from PIL import Image, ImageDraw
 13 | from opendr.camera import ProjectPoints
 14 | from opendr.renderer import ColoredRenderer
 15 | from opendr.lighting import LambertianPointLight
 16 | 
 17 | 
 18 | class OpendrRenderer(object):
 19 |     def __init__(self,
 20 |                  img_size=224,
 21 |                  mesh_color=np.array([0.5, 0.5, 0.5]),):
 22 | 
 23 |         self.w = img_size
 24 |         self.h = img_size
 25 |         self.color = mesh_color
 26 |         self.img_size = img_size
 27 |         self.flength = 500.
 28 | 
 29 |     
 30 |     def render(self, verts, faces, bg_img):
 31 |         verts = verts.copy()
 32 |         faces = faces.copy()
 33 | 
 34 |         input_size = 500
 35 | 
 36 |         f = 10
 37 | 
 38 |         verts[:, 0] = (verts[:, 0] - input_size) / input_size
 39 |         verts[:, 1] = (verts[:, 1] - input_size) / input_size
 40 | 
 41 |         verts[:, 2] /= (5 * 112)
 42 |         verts[:, 2] += f
 43 | 
 44 |         cam_for_render = np.array([f, 1, 1]) * input_size
 45 | 
 46 |         rend_img = self.__call__(
 47 |             img=bg_img, cam=cam_for_render, 
 48 |             verts=verts, faces=faces, color=self.color)
 49 |         
 50 |         return rend_img
 51 |     
 52 | 
 53 |     def __call__(self,
 54 |                  verts,
 55 |                  faces,
 56 |                  cam=None,
 57 |                  img=None,
 58 |                  do_alpha=False,
 59 |                  far=None,
 60 |                  near=None,
 61 |                  color = np.array([0, 0, 255]),
 62 |                  img_size=None):
 63 |         """
 64 |         cam is 3D [f, px, py]
 65 |         """
 66 |         if img is not None:
 67 |             h, w = img.shape[:2]
 68 |         elif img_size is not None:
 69 |             h = img_size[0]
 70 |             w = img_size[1]
 71 |         else:
 72 |             h = self.h
 73 |             w = self.w
 74 | 
 75 |         if cam is None:
 76 |             cam = [self.flength, w / 2., h / 2.]
 77 | 
 78 |         use_cam = ProjectPoints(
 79 |             f=cam[0] * np.ones(2),
 80 |             rt=np.zeros(3),
 81 |             t=np.zeros(3),
 82 |             k=np.zeros(5),
 83 |             c=cam[1:3])
 84 | 
 85 |         if near is None:
 86 |             near = np.maximum(np.min(verts[:, 2]) - 25, 0.1)
 87 |         if far is None:
 88 |             far = np.maximum(np.max(verts[:, 2]) + 25, 25)
 89 | 
 90 |         return_value = render_model(
 91 |             verts,
 92 |             faces,
 93 |             w,
 94 |             h,
 95 |             use_cam,
 96 |             do_alpha=do_alpha,
 97 |             img=img,
 98 |             far=far,
 99 |             near=near,
100 |             color=color)
101 | 
102 |         imtmp = return_value
103 |         image = (imtmp * 255).astype('uint8')
104 |         return image
105 | 
106 | 
107 | 
108 | def _create_renderer(w=640,
109 |                      h=480,
110 |                      rt=np.zeros(3),
111 |                      t=np.zeros(3),
112 |                      f=None,
113 |                      c=None,
114 |                      k=None,
115 |                      near=.5,
116 |                      far=10.):
117 | 
118 |     f = np.array([w, w]) / 2. if f is None else f
119 |     c = np.array([w, h]) / 2. if c is None else c
120 |     k = np.zeros(5) if k is None else k
121 | 
122 |     rn = ColoredRenderer()
123 | 
124 |     rn.camera = ProjectPoints(rt=rt, t=t, f=f, c=c, k=k)
125 |     rn.frustum = {'near': near, 'far': far, 'height': h, 'width': w}
126 |     return rn
127 | 
128 | 
129 | def _rotateY(points, angle):
130 |     """Rotate the points by a specified angle."""
131 |     ry = np.array([[np.cos(angle), 0., np.sin(angle)], [0., 1., 0.],
132 |                    [-np.sin(angle), 0., np.cos(angle)]])
133 |     return np.dot(points, ry)
134 | 
135 | 
136 | def simple_renderer(rn,
137 |                     verts,
138 |                     faces,
139 |                     yrot=np.radians(70),
140 |                     color=np.array([0, 0, 255])
141 |                     ):
142 | 
143 |     # Rendered model color
144 |     rn.set(v=verts, f=faces, vc=color, bgcolor=np.ones(3))
145 |     albedo = rn.vc
146 | 
147 |     # Construct Back Light (on back right corner)
148 |     rn.vc = LambertianPointLight(
149 |         f=rn.f,
150 |         v=rn.v,
151 |         num_verts=len(rn.v),
152 |         light_pos=_rotateY(np.array([-200, -100, -100]), yrot),
153 |         vc=albedo,
154 |         light_color=np.array([1, 1, 1]))
155 | 
156 |     # Construct Left Light
157 |     rn.vc += LambertianPointLight(
158 |         f=rn.f,
159 |         v=rn.v,
160 |         num_verts=len(rn.v),
161 |         # light_pos=_rotateY(np.array([800, 10, 300]), yrot),
162 |         light_pos=_rotateY(np.array([800, 10, 300]), yrot),
163 |         vc=albedo,
164 |         light_color=np.array([1, 1, 1]))
165 | 
166 |     # Construct Right Light
167 |     rn.vc += LambertianPointLight(
168 |         f=rn.f,
169 |         v=rn.v,
170 |         num_verts=len(rn.v),
171 |         light_pos=_rotateY(np.array([-500, 500, 1000]), yrot),
172 |         # light_pos=_rotateY(np.array([-500, 500, 1000]), yrot),
173 |         vc=albedo,
174 |         light_color=np.array([.7, .7, .7]))
175 |  
176 |     return rn.r
177 | 
178 | 
179 | def get_alpha(imtmp, bgval=1.):
180 |     h, w = imtmp.shape[:2]
181 |     alpha = (~np.all(imtmp == bgval, axis=2)).astype(imtmp.dtype)
182 | 
183 |     b_channel, g_channel, r_channel = cv2.split(imtmp)
184 | 
185 |     im_RGBA = cv2.merge((b_channel, g_channel, r_channel, alpha.astype(
186 |         imtmp.dtype)))
187 |     return im_RGBA
188 | 
189 | 
190 | def append_alpha(imtmp):
191 |     alpha = np.ones_like(imtmp[:, :, 0]).astype(imtmp.dtype)
192 |     if np.issubdtype(imtmp.dtype, np.uint8):
193 |         alpha = alpha * 255
194 |     b_channel, g_channel, r_channel = cv2.split(imtmp)
195 |     im_RGBA = cv2.merge((b_channel, g_channel, r_channel, alpha))
196 |     return im_RGBA
197 | 
198 | 
199 | def render_model(verts,
200 |                  faces,
201 |                  w,
202 |                  h,
203 |                  cam,
204 |                  near=0.5,
205 |                  far=25,
206 |                  img=None,
207 |                  do_alpha=False,
208 |                  color=None):
209 |     rn = _create_renderer(
210 |         w=w, h=h, near=near, far=far, rt=cam.rt, t=cam.t, f=cam.f, c=cam.c)
211 | 
212 |     # Uses img as background, otherwise white background.
213 |     if img is not None:
214 |         rn.background_image = img / 255. if img.max() > 1.1 else img
215 | 
216 |     imtmp = simple_renderer(rn, verts, faces, color=color)
217 |     
218 |     # If white bg, make transparent.
219 |     if img is None and do_alpha:
220 |         imtmp = get_alpha(imtmp)
221 |     elif img is not None and do_alpha:
222 |         imtmp = append_alpha(imtmp)
223 |     
224 |     return imtmp


--------------------------------------------------------------------------------
/demo/demo_bodymocap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import os
  4 | import sys
  5 | import os.path as osp
  6 | import torch
  7 | from torchvision.transforms import Normalize
  8 | import numpy as np
  9 | import cv2
 10 | import argparse
 11 | import json
 12 | import pickle
 13 | from datetime import datetime
 14 | 
 15 | from demo.demo_options import DemoOptions
 16 | from bodymocap.body_mocap_api import BodyMocap
 17 | from bodymocap.body_bbox_detector import BodyPoseEstimator
 18 | import mocap_utils.demo_utils as demo_utils
 19 | import mocap_utils.general_utils as gnu
 20 | from mocap_utils.timer import Timer
 21 | 
 22 | import renderer.image_utils as imu
 23 | from renderer.viewer2D import ImShow
 24 | 
 25 | def run_body_mocap(args, body_bbox_detector, body_mocap, visualizer):
 26 |     #Setup input data to handle different types of inputs
 27 |     input_type, input_data = demo_utils.setup_input(args)
 28 | 
 29 |     cur_frame = args.start_frame
 30 |     video_frame = 0
 31 |     timer = Timer()
 32 |     while True:
 33 |         timer.tic()
 34 |         # load data
 35 |         load_bbox = False
 36 | 
 37 |         if input_type =='image_dir':
 38 |             if cur_frame < len(input_data):
 39 |                 image_path = input_data[cur_frame]
 40 |                 img_original_bgr  = cv2.imread(image_path)
 41 |             else:
 42 |                 img_original_bgr = None
 43 | 
 44 |         elif input_type == 'bbox_dir':
 45 |             if cur_frame < len(input_data):
 46 |                 print("Use pre-computed bounding boxes")
 47 |                 image_path = input_data[cur_frame]['image_path']
 48 |                 hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
 49 |                 body_bbox_list = input_data[cur_frame]['body_bbox_list']
 50 |                 img_original_bgr  = cv2.imread(image_path)
 51 |                 load_bbox = True
 52 |             else:
 53 |                 img_original_bgr = None
 54 | 
 55 |         elif input_type == 'video':      
 56 |             _, img_original_bgr = input_data.read()
 57 |             if video_frame < cur_frame:
 58 |                 video_frame += 1
 59 |                 continue
 60 |             # save the obtained video frames
 61 |             image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
 62 |             if img_original_bgr is not None:
 63 |                 video_frame += 1
 64 |                 if args.save_frame:
 65 |                     gnu.make_subdir(image_path)
 66 |                     cv2.imwrite(image_path, img_original_bgr)
 67 | 
 68 |         elif input_type == 'webcam':    
 69 |             _, img_original_bgr = input_data.read()
 70 | 
 71 |             if video_frame < cur_frame:
 72 |                 video_frame += 1
 73 |                 continue
 74 |             # save the obtained video frames
 75 |             image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
 76 |             if img_original_bgr is not None:
 77 |                 video_frame += 1
 78 |                 if args.save_frame:
 79 |                     gnu.make_subdir(image_path)
 80 |                     cv2.imwrite(image_path, img_original_bgr)
 81 |         else:
 82 |             assert False, "Unknown input_type"
 83 | 
 84 |         cur_frame +=1
 85 |         if img_original_bgr is None or cur_frame > args.end_frame:
 86 |             break   
 87 |         print("--------------------------------------")
 88 | 
 89 |         if load_bbox:
 90 |             body_pose_list = None
 91 |         else:
 92 |             body_pose_list, body_bbox_list = body_bbox_detector.detect_body_pose(
 93 |                 img_original_bgr)
 94 |         hand_bbox_list = [None, ] * len(body_bbox_list)
 95 | 
 96 |         # save the obtained body & hand bbox to json file
 97 |         if args.save_bbox_output: 
 98 |             demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)
 99 | 
100 |         if len(body_bbox_list) < 1: 
101 |             print(f"No body deteced: {image_path}")
102 |             continue
103 | 
104 |         #Sort the bbox using bbox size 
105 |         # (to make the order as consistent as possible without tracking)
106 |         bbox_size =  [ (x[2] * x[3]) for x in body_bbox_list]
107 |         idx_big2small = np.argsort(bbox_size)[::-1]
108 |         body_bbox_list = [ body_bbox_list[i] for i in idx_big2small ]
109 |         if args.single_person and len(body_bbox_list)>0:
110 |             body_bbox_list = [body_bbox_list[0], ]       
111 | 
112 |         # Body Pose Regression
113 |         pred_output_list = body_mocap.regress(img_original_bgr, body_bbox_list)
114 |         assert len(body_bbox_list) == len(pred_output_list)
115 | 
116 |         # extract mesh for rendering (vertices in image space and faces) from pred_output_list
117 |         pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
118 | 
119 |         # visualization
120 |         res_img = visualizer.visualize(
121 |             img_original_bgr,
122 |             pred_mesh_list = pred_mesh_list, 
123 |             body_bbox_list = body_bbox_list)
124 |         
125 |         # show result in the screen
126 |         if not args.no_display:
127 |             res_img = res_img.astype(np.uint8)
128 |             ImShow(res_img)
129 | 
130 |         # save result image
131 |         if args.out_dir is not None:
132 |             demo_utils.save_res_img(args.out_dir, image_path, res_img)
133 | 
134 |         # save predictions to pkl
135 |         if args.save_pred_pkl:
136 |             demo_type = 'body'
137 |             demo_utils.save_pred_to_pkl(
138 |                 args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)
139 | 
140 |         timer.toc(bPrint=True,title="Time")
141 |         print(f"Processed : {image_path}")
142 | 
143 |     #save images as a video
144 |     if not args.no_video_out and input_type in ['video', 'webcam']:
145 |         demo_utils.gen_video_out(args.out_dir, args.seq_name)
146 | 
147 |     if input_type =='webcam' and input_data is not None:
148 |         input_data.release()
149 |     cv2.destroyAllWindows()
150 | 
151 | 
152 | def main():
153 |     args = DemoOptions().parse()
154 | 
155 |     device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
156 |     assert torch.cuda.is_available(), "Current version only supports GPU"
157 | 
158 |     # Set bbox detector
159 |     body_bbox_detector = BodyPoseEstimator()
160 | 
161 |     # Set mocap regressor
162 |     use_smplx = args.use_smplx
163 |     checkpoint_path = args.checkpoint_body_smplx if use_smplx else args.checkpoint_body_smpl
164 |     print("use_smplx", use_smplx)
165 |     body_mocap = BodyMocap(checkpoint_path, args.smpl_dir, device, use_smplx)
166 | 
167 |     # Set Visualizer
168 |     if args.renderer_type in ['pytorch3d', 'opendr']:
169 |         from renderer.screen_free_visualizer import Visualizer
170 |     else:
171 |         from renderer.visualizer import Visualizer
172 |     visualizer = Visualizer(args.renderer_type)
173 |   
174 |     run_body_mocap(args, body_bbox_detector, body_mocap, visualizer)
175 | 
176 | 
177 | if __name__ == '__main__':
178 |     main()


--------------------------------------------------------------------------------
/renderer/p3d_renderer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | # Part of code is modified from https://github.com/facebookresearch/pytorch3d
  4 | 
  5 | import cv2
  6 | import os
  7 | import sys
  8 | import torch
  9 | import numpy as np
 10 | 
 11 | from pytorch3d.structures import Meshes
 12 | from pytorch3d.renderer.mesh import Textures
 13 | from pytorch3d.renderer import (
 14 |     PerspectiveCameras,
 15 |     FoVOrthographicCameras,
 16 |     PointLights, 
 17 |     RasterizationSettings, 
 18 |     MeshRenderer, 
 19 |     BlendParams,
 20 |     MeshRasterizer,  
 21 |     SoftPhongShader,
 22 | )
 23 | 
 24 | class Pytorch3dRenderer(object):
 25 | 
 26 |     def __init__(self, img_size, mesh_color):
 27 |         self.device = torch.device("cuda:0")
 28 |         # self.render_size = 1920
 29 | 
 30 |         self.img_size = img_size
 31 | 
 32 |         # mesh color
 33 |         mesh_color = np.array(mesh_color)[::-1]
 34 |         self.mesh_color = torch.from_numpy(
 35 |             mesh_color.copy()).view(1, 1, 3).float().to(self.device)
 36 | 
 37 |         # renderer for large objects, such as whole body.
 38 |         self.render_size_large = 700
 39 |         lights = PointLights(
 40 |             ambient_color = [[1.0, 1.0, 1.0],],
 41 |             diffuse_color = [[1.0, 1.0, 1.0],],
 42 |             device=self.device, location=[[1.0, 1.0, -30]])
 43 |         self.renderer_large = self.__get_renderer(self.render_size_large, lights)
 44 | 
 45 |         # renderer for small objects, such as whole body.
 46 |         self.render_size_medium = 400
 47 |         lights = PointLights(
 48 |             ambient_color = [[0.5, 0.5, 0.5],],
 49 |             diffuse_color = [[0.5, 0.5, 0.5],],
 50 |             device=self.device, location=[[1.0, 1.0, -30]])
 51 |         self.renderer_medium = self.__get_renderer(self.render_size_medium, lights)
 52 | 
 53 | 
 54 |         # renderer for small objects, such as whole body.
 55 |         self.render_size_small = 200
 56 |         lights = PointLights(
 57 |             ambient_color = [[0.5, 0.5, 0.5],],
 58 |             diffuse_color = [[0.5, 0.5, 0.5],],
 59 |             device=self.device, location=[[1.0, 1.0, -30]])
 60 |         self.renderer_small = self.__get_renderer(self.render_size_small, lights)
 61 | 
 62 | 
 63 | 
 64 |     def __get_renderer(self, render_size, lights):
 65 | 
 66 |         cameras = FoVOrthographicCameras(
 67 |             device = self.device,
 68 |             znear=0.1,
 69 |             zfar=10.0,
 70 |             max_y=1.0,
 71 |             min_y=-1.0,
 72 |             max_x=1.0,
 73 |             min_x=-1.0,
 74 |             scale_xyz=((1.0, 1.0, 1.0),),  # (1, 3)
 75 |         )
 76 | 
 77 |         raster_settings = RasterizationSettings(
 78 |             image_size = render_size,
 79 |             blur_radius = 0,
 80 |             faces_per_pixel = 1,
 81 |         )
 82 |         blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color = (0,0,0))
 83 | 
 84 |         renderer = MeshRenderer(
 85 |             rasterizer=MeshRasterizer(
 86 |                 cameras=cameras, 
 87 |                 raster_settings=raster_settings
 88 |             ),
 89 |             shader=SoftPhongShader(
 90 |                 device=self.device, 
 91 |                 cameras=cameras,
 92 |                 lights=lights,
 93 |                 blend_params=blend_params
 94 |             )
 95 |         )
 96 | 
 97 |         return renderer
 98 |     
 99 | 
100 |     def render(self, verts, faces, bg_img):
101 |         verts = verts.copy()
102 |         faces = faces.copy()
103 | 
104 |         # bbox for verts
105 |         x0 = int(np.min(verts[:, 0]))
106 |         x1 = int(np.max(verts[:, 0]))
107 |         y0 = int(np.min(verts[:, 1]))
108 |         y1 = int(np.max(verts[:, 1]))
109 |         width = x1 - x0
110 |         height = y1 - y0
111 | 
112 |         bbox_size = max(height, width)
113 |         if bbox_size <= self.render_size_small:
114 |             # print("Using small size renderer")
115 |             render_size = self.render_size_small
116 |             renderer = self.renderer_small
117 |         else:
118 |             if bbox_size <= self.render_size_medium:
119 |                 # print("Using medium size renderer")
120 |                 render_size = self.render_size_medium
121 |                 renderer = self.renderer_medium
122 |             else:
123 |                 # print("Using large size renderer")
124 |                 render_size = self.render_size_large
125 |                 renderer = self.renderer_large
126 |         
127 |         # padding the tight bbox
128 |         margin = int(max(width, height) * 0.1)
129 |         x0 = max(0, x0-margin)
130 |         y0 = max(0, y0-margin)
131 |         x1 = min(self.img_size, x1+margin)
132 |         y1 = min(self.img_size, y1+margin)
133 | 
134 |         # move verts to be in the bbox
135 |         verts[:, 0] -= x0
136 |         verts[:, 1] -= y0
137 | 
138 |         # normalize verts to (-1, 1)
139 |         bbox_size = max(y1-y0, x1-x0)
140 |         half_size = bbox_size / 2
141 |         verts[:, 0] = (verts[:, 0] - half_size) / half_size
142 |         verts[:, 1] = (verts[:, 1] - half_size) / half_size
143 | 
144 |         # the coords of pytorch-3d is (1, 1) for upper-left and (-1, -1) for lower-right
145 |         # so need to multiple minus for vertices
146 |         verts[:, :2] *= -1
147 | 
148 |         # shift verts along the z-axis
149 |         verts[:, 2] /= 112
150 |         verts[:, 2] += 5
151 | 
152 |         verts_tensor = torch.from_numpy(verts).float().unsqueeze(0).cuda()
153 |         faces_tensor = torch.from_numpy(faces.copy()).long().unsqueeze(0).cuda()
154 | 
155 |         # set color
156 |         mesh_color = self.mesh_color.repeat(1, verts.shape[0], 1)
157 |         textures = Textures(verts_rgb = mesh_color)
158 | 
159 |         # rendering
160 |         mesh = Meshes(verts=verts_tensor, faces=faces_tensor, textures=textures)
161 | 
162 |         # blending rendered mesh with background image
163 |         rend_img = renderer(mesh)
164 |         rend_img = rend_img[0].cpu().numpy()
165 | 
166 | 
167 |         scale_ratio = render_size / bbox_size
168 |         img_size_new = int(self.img_size * scale_ratio)
169 |         bg_img_new = cv2.resize(bg_img, (img_size_new, img_size_new))
170 | 
171 |         x0 = max(int(x0 * scale_ratio), 0)
172 |         y0 = max(int(y0 * scale_ratio), 0)
173 |         x1 = min(int(x1 * scale_ratio), img_size_new)
174 |         y1 = min(int(y1 * scale_ratio), img_size_new)
175 | 
176 |         h0 = min(y1-y0, render_size)
177 |         w0 = min(x1-x0, render_size)
178 | 
179 |         y1 = y0 + h0
180 |         x1 = x0 + w0
181 | 
182 |         rend_img_new = np.zeros((img_size_new, img_size_new, 4))
183 |         rend_img_new[y0:y1, x0:x1, :] = rend_img[:h0, :w0, :]
184 |         rend_img = rend_img_new
185 | 
186 |         alpha = rend_img[:, :, 3:4]
187 |         alpha[alpha>0] = 1.0
188 |         
189 | 
190 |         rend_img = rend_img[:, :, :3] 
191 |         maxColor = rend_img.max()
192 |         rend_img *= 255 /maxColor #Make sure <1.0
193 |         rend_img = rend_img[:, :, ::-1]
194 | 
195 |         res_img = alpha * rend_img + (1.0 - alpha) * bg_img_new
196 | 
197 |         res_img = cv2.resize(res_img, (self.img_size, self.img_size))
198 | 
199 |         return res_img


--------------------------------------------------------------------------------
/demo/demo_handmocap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import os, sys, shutil
  4 | import os.path as osp
  5 | import numpy as np
  6 | import cv2
  7 | import json
  8 | import torch
  9 | from torchvision.transforms import Normalize
 10 | 
 11 | from demo.demo_options import DemoOptions
 12 | import mocap_utils.general_utils as gnu
 13 | import mocap_utils.demo_utils as demo_utils
 14 | 
 15 | from handmocap.hand_mocap_api import HandMocap
 16 | from handmocap.hand_bbox_detector import HandBboxDetector
 17 | 
 18 | import renderer.image_utils as imu
 19 | from renderer.viewer2D import ImShow
 20 | import time
 21 | 
 22 | 
 23 | def run_hand_mocap(args, bbox_detector, hand_mocap, visualizer):
 24 |     #Set up input data (images or webcam)
 25 |     input_type, input_data = demo_utils.setup_input(args)
 26 |  
 27 |     assert args.out_dir is not None, "Please specify output dir to store the results"
 28 |     cur_frame = args.start_frame
 29 |     video_frame = 0
 30 | 
 31 |     while True:
 32 |         # load data
 33 |         load_bbox = False
 34 | 
 35 |         if input_type =='image_dir':
 36 |             if cur_frame < len(input_data):
 37 |                 image_path = input_data[cur_frame]
 38 |                 img_original_bgr  = cv2.imread(image_path)
 39 |             else:
 40 |                 img_original_bgr = None
 41 | 
 42 |         elif input_type == 'bbox_dir':
 43 |             if cur_frame < len(input_data):
 44 |                 print("Use pre-computed bounding boxes")
 45 |                 image_path = input_data[cur_frame]['image_path']
 46 |                 hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
 47 |                 body_bbox_list = input_data[cur_frame]['body_bbox_list']
 48 |                 img_original_bgr  = cv2.imread(image_path)
 49 |                 load_bbox = True
 50 |             else:
 51 |                 img_original_bgr = None
 52 | 
 53 |         elif input_type == 'video':      
 54 |             _, img_original_bgr = input_data.read()
 55 |             if video_frame < cur_frame:
 56 |                 video_frame += 1
 57 |                 continue
 58 |             # save the obtained video frames
 59 |             image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
 60 |             if img_original_bgr is not None:
 61 |                 video_frame += 1
 62 |                 if args.save_frame:
 63 |                     gnu.make_subdir(image_path)
 64 |                     cv2.imwrite(image_path, img_original_bgr)
 65 |         
 66 |         elif input_type == 'webcam':
 67 |             _, img_original_bgr = input_data.read()
 68 | 
 69 |             if video_frame < cur_frame:
 70 |                 video_frame += 1
 71 |                 continue
 72 |             # save the obtained video frames
 73 |             image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
 74 |             if img_original_bgr is not None:
 75 |                 video_frame += 1
 76 |                 if args.save_frame:
 77 |                     gnu.make_subdir(image_path)
 78 |                     cv2.imwrite(image_path, img_original_bgr)
 79 |         else:
 80 |             assert False, "Unknown input_type"
 81 | 
 82 |         cur_frame +=1
 83 |         if img_original_bgr is None or cur_frame > args.end_frame:
 84 |             break   
 85 |         print("--------------------------------------")
 86 | 
 87 |         # bbox detection
 88 |         if load_bbox:
 89 |             body_pose_list = None
 90 |             raw_hand_bboxes = None
 91 |         elif args.crop_type == 'hand_crop':
 92 |             # hand already cropped, thererore, no need for detection
 93 |             img_h, img_w = img_original_bgr.shape[:2]
 94 |             body_pose_list = None
 95 |             raw_hand_bboxes = None
 96 |             hand_bbox_list = [ dict(right_hand = np.array([0, 0, img_w, img_h])) ]
 97 |         else:            
 98 |             # Input images has other body part or hand not cropped.
 99 |             # Use hand detection model & body detector for hand detection
100 |             assert args.crop_type == 'no_crop'
101 |             detect_output = bbox_detector.detect_hand_bbox(img_original_bgr.copy())
102 |             body_pose_list, body_bbox_list, hand_bbox_list, raw_hand_bboxes = detect_output
103 |         
104 |         # save the obtained body & hand bbox to json file
105 |         if args.save_bbox_output:
106 |             demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)
107 | 
108 |         if len(hand_bbox_list) < 1:
109 |             print(f"No hand deteced: {image_path}")
110 |             continue
111 |     
112 |         # Hand Pose Regression
113 |         pred_output_list = hand_mocap.regress(
114 |                 img_original_bgr, hand_bbox_list, add_margin=True)
115 |         assert len(hand_bbox_list) == len(body_bbox_list)
116 |         assert len(body_bbox_list) == len(pred_output_list)
117 | 
118 |         # extract mesh for rendering (vertices in image space and faces) from pred_output_list
119 |         pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
120 | 
121 |         # visualize
122 |         res_img = visualizer.visualize(
123 |             img_original_bgr, 
124 |             pred_mesh_list = pred_mesh_list, 
125 |             hand_bbox_list = hand_bbox_list)
126 | 
127 |         # show result in the screen
128 |         if not args.no_display:
129 |             res_img = res_img.astype(np.uint8)
130 |             ImShow(res_img)
131 | 
132 |         # save the image (we can make an option here)
133 |         if args.out_dir is not None:
134 |             demo_utils.save_res_img(args.out_dir, image_path, res_img)
135 | 
136 |         # save predictions to pkl
137 |         if args.save_pred_pkl:
138 |             demo_type = 'hand'
139 |             demo_utils.save_pred_to_pkl(
140 |                 args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)
141 | 
142 |         print(f"Processed : {image_path}")
143 |         
144 |     #save images as a video
145 |     if not args.no_video_out and input_type in ['video', 'webcam']:
146 |         demo_utils.gen_video_out(args.out_dir, args.seq_name)
147 | 
148 |     # When everything done, release the capture
149 |     if input_type =='webcam' and input_data is not None:
150 |         input_data.release()
151 |     cv2.destroyAllWindows()
152 | 
153 |   
154 | def main():
155 |     args = DemoOptions().parse()
156 |     args.use_smplx = True
157 | 
158 |     device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
159 |     assert torch.cuda.is_available(), "Current version only supports GPU"
160 | 
161 |     #Set Bbox detector
162 |     bbox_detector =  HandBboxDetector(args.view_type, device)
163 | 
164 |     # Set Mocap regressor
165 |     hand_mocap = HandMocap(args.checkpoint_hand, args.smpl_dir, device = device)
166 | 
167 |     # Set Visualizer
168 |     if args.renderer_type in ['pytorch3d', 'opendr']:
169 |         from renderer.screen_free_visualizer import Visualizer
170 |     else:
171 |         from renderer.visualizer import Visualizer
172 |     visualizer = Visualizer(args.renderer_type)
173 | 
174 |     # run
175 |     run_hand_mocap(args, bbox_detector, hand_mocap, visualizer)
176 |    
177 | 
178 | if __name__ == '__main__':
179 |     main()
180 | 


--------------------------------------------------------------------------------
/renderer/denseposeRenderer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import numpy as np
  4 | from OpenGL.GLUT import *
  5 | from OpenGL.GLU import *
  6 | from renderer.shaders.framework import *
  7 | 
  8 | from renderer.glRenderer import glRenderer
  9 | 
 10 | _glut_window = None
 11 | 
 12 | '''
 13 | #Usage:
 14 |     render.set_smpl_mesh(v)     #v for vertex locations in(6890,3)
 15 |     render.setBackgroundTexture(rawImg) #Optional BG texture
 16 |     render.setWindowSize(rawImg.shape[1], rawImg.shape[0])      #Optional: window size
 17 |     render.show_once()
 18 | '''
 19 | 
 20 | class denseposeRenderer(glRenderer):
 21 | 
 22 |     def __init__(self, width=1600, height=1200, name='GL Renderer',
 23 |                  program_files=['renderer/shaders/simple140.fs', 'renderer/shaders/simple140.vs'], color_size=1, ms_rate=1):
 24 |         glRenderer.__init__(self, width, height, name, program_files, color_size, ms_rate)
 25 | 
 26 |         self.densepose_info = self.loadDensepose_info()
 27 | 
 28 |         #Densepose Specific 
 29 |         self.dp_faces = self.densepose_info['All_Faces']-1 #0~7828
 30 |         self.dp_vertexIndices = self.densepose_info['All_vertices']-1    #(1,7829)       #Vertex orders used in denpose info. There are repeated vetices
 31 | 
 32 |         #DP color information
 33 |         dp_color_seg = self.densepose_info['All_FaceIndices']     #(13774,1)
 34 |         dp_color_seg = np.repeat(dp_color_seg,3,axis=1) /100.0#24.0   #(13774,3)
 35 |         self.dp_color_seg = np.repeat( dp_color_seg.flatten()[:,None],3,axis=1)        #(41332,3)
 36 | 
 37 |         dp_color_U = self.densepose_info['All_U_norm']     #(7289,1)
 38 |         dp_color_U =  np.repeat(dp_color_U,3,axis=1)    #(13774,3)
 39 |         self.dp_color_U = dp_color_U[self.dp_faces.reshape([-1])]  #(41332,3)
 40 | 
 41 |         dp_color_V = self.densepose_info['All_V_norm']     #(7829,3)
 42 |         dp_color_V =  np.repeat(dp_color_V,3,axis=1)    #(13774,3)
 43 |         self.dp_color_V = dp_color_V[self.dp_faces.reshape([-1])]  #(41332,3)
 44 | 
 45 |     #make sure you have: /yourpath/renderer/densepose_uv_data/UV_Processed.mat
 46 |     def loadDensepose_info(self, dp_data_path= 'extra_data/densepose_uv_data/UV_Processed.mat'):
 47 |         
 48 |         #Load densepose data
 49 |         import scipy.io as sio
 50 |         densepose_info = None
 51 |         densepose_info = sio.loadmat(dp_data_path)      #All_FaceIndices (13774), All_Faces(13774), All_U(7829), All_U_norm(7829), All_V(7829), All_V_norm (7829), All_vertices (7829)
 52 |         assert densepose_info is not None
 53 |         # All_FaceIndices - part labels for each face
 54 |         # All_Faces - vertex indices for each face
 55 |         # All_vertices - SMPL vertex IDs for all vertices (note that one SMPL vertex can be shared across parts and thus appear in faces with different part labels)
 56 |         # All_U - U coordinates for all vertices
 57 |         # All_V - V coordinates for all vertices
 58 |         # All_U_norm - normalization factor for U coordinates to map them to [0, 1] interval
 59 |         # All_V_norm - normalization factor for V coordinates to map them to [0, 1] interval
 60 |         # vertexColor = densepose_info['All_U_norm']*255
 61 |         # vertexColor = np.zeros((v.shape[1], 3))
 62 |         # vertexColor[:,0] = densepose_info['All_U_norm'][:v.shape[1]].flatten()       #(6890,3)
 63 |         # vertexColor[:,1] = densepose_info['All_V_norm'][:v.shape[1]].flatten()       #(6890,3)
 64 | 
 65 |         # # faces = smplWrapper.f
 66 |         # v =v[0]  #(6890,3)
 67 |         # dp_vertex = v[densepose_info['All_vertices']-1]  #(1,7829,3)        #Considering repeatation
 68 |         # faces =densepose_info['All_Faces']-1 #0~7828
 69 |         # # vertexColor = densepose_info['All_FaceIndices']     #(13774,1)
 70 |         # # vertexColor = np.repeat(vertexColor,3,axis=1) /24.0   #(13774,3)
 71 | 
 72 |         # # vertexColor = densepose_info['All_U_norm']     #(13774,1)
 73 |         # vertexColor = densepose_info['All_V_norm']     #(13774,1)
 74 |         # vertexColor =  np.repeat(vertexColor,3,axis=1) 
 75 | 
 76 |         # # vertexColor[vertexColor!=2]*=0
 77 |         # vertexColor[vertexColor==2]=24
 78 |         return densepose_info
 79 | 
 80 | 
 81 |     #vertice: (6890,3)
 82 |     #colormode: ['seg', 'u', 'v']
 83 |     def set_mesh(self, vertices, _):
 84 |         
 85 |         if vertices.dtype != np.dtype('float64'):
 86 |             vertices = vertices.astype(np.float64)      #Should be DOUBLE
 87 | 
 88 |         #Change the vertex and 
 89 |         dp_vertex = vertices[self.dp_vertexIndices][0]  #(7829,3)        #Considering repeatation
 90 | 
 91 |         # if colormode=='seg': #segment   
 92 |         #     self.color_data = self.dp_color_seg
 93 |         # elif colormode=='v':
 94 |         #     self.color_data[:,1] = self.dp_color_V
 95 |         # elif colormode=='u':
 96 |         #     self.color_data = self.dp_color_U       #(41322,3)
 97 |         # else:
 98 |         #     assert False
 99 | 
100 |         self.color_data = self.dp_color_U       #(41322,3)       
101 |         self.color_data[:,1] = self.dp_color_V[:,1]
102 |         self.color_data[:,2] = self.dp_color_seg[:,2]
103 | 
104 |         self.vertex_data = dp_vertex[self.dp_faces.reshape([-1])]       #(41322,3)
105 |         self.vertex_dim = self.vertex_data.shape[1]
106 |         self.n_vertices = self.vertex_data.shape[0]
107 | 
108 |         glBindBuffer(GL_ARRAY_BUFFER, self.vertex_buffer)
109 |         glBufferData(GL_ARRAY_BUFFER, self.vertex_data, GL_STATIC_DRAW)
110 | 
111 |         glBindBuffer(GL_ARRAY_BUFFER, self.color_buffer)
112 |         glBufferData(GL_ARRAY_BUFFER, self.color_data, GL_STATIC_DRAW)
113 | 
114 |         glBindBuffer(GL_ARRAY_BUFFER, 0)
115 | 
116 | 
117 |     def add_mesh(self, vertices, _, color=None):
118 |         """
119 |         Concatenate the new mesh data to self.vertex_data  (as if a single giant mesh)
120 | 
121 |         Args:
122 |             input_vertices (np.ndarray): (verNum, 3).
123 |             input_faces (np.ndarray): (faceNum, 3).
124 |         """
125 | 
126 |         if vertices.dtype != np.dtype('float64'):
127 |             vertices = vertices.astype(np.float64)      #Should be DOUBLE
128 |         
129 |         dp_vertex = vertices[self.dp_vertexIndices][0]  #(7829,3)        #Considering repeatation
130 | 
131 |         color_data = self.dp_color_U       #(41322,3)       
132 |         color_data[:,1] = self.dp_color_V[:,1]
133 |         color_data[:,2] = self.dp_color_seg[:,2]
134 | 
135 |         if self.vertex_data is None:
136 |             self.vertex_data = dp_vertex[self.dp_faces.reshape([-1])]       #(41322,3)
137 |             self.color_data = color_data
138 |         
139 |         else:       #Add the data
140 |             input_vertices = dp_vertex[self.dp_faces.reshape([-1])]       #(41322,3)
141 |             self.vertex_data = np.concatenate( (self.vertex_data, input_vertices), axis=0)    #(6870,3)
142 |             self.color_data = np.concatenate( (self.color_data, color_data), axis=0)    #(6870,3)
143 | 
144 |         self.vertex_dim = self.vertex_data.shape[1]
145 |         self.n_vertices = self.vertex_data.shape[0]
146 | 
147 |         glBindBuffer(GL_ARRAY_BUFFER, self.vertex_buffer)
148 |         glBufferData(GL_ARRAY_BUFFER, self.vertex_data, GL_STATIC_DRAW)
149 | 
150 |         glBindBuffer(GL_ARRAY_BUFFER, self.color_buffer)
151 |         glBufferData(GL_ARRAY_BUFFER, self.color_data, GL_STATIC_DRAW)
152 | 
153 | 
154 |         glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, self.index_buffer)
155 |         glBufferData(GL_ELEMENT_ARRAY_BUFFER, self.meshindex_data, GL_STATIC_DRAW)
156 | 
157 |         glBindBuffer(GL_ARRAY_BUFFER, 0)
158 | 


--------------------------------------------------------------------------------
/handmocap/hand_modules/resnet.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | import torch.nn as nn
  7 | import math
  8 | import torch.utils.model_zoo as model_zoo
  9 | 
 10 | 
 11 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 12 |            'resnet152']
 13 | 
 14 | 
 15 | model_urls = {
 16 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 17 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 18 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 19 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 20 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 21 | }
 22 | 
 23 | 
 24 | def conv3x3(in_planes, out_planes, stride=1):
 25 |     """3x3 convolution with padding"""
 26 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 27 |                      padding=1, bias=False)
 28 | 
 29 | 
 30 | class BasicBlock(nn.Module):
 31 |     expansion = 1
 32 | 
 33 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 34 |         super(BasicBlock, self).__init__()
 35 |         self.conv1 = conv3x3(inplanes, planes, stride)
 36 |         self.bn1 = nn.BatchNorm2d(planes)
 37 |         self.relu = nn.ReLU(inplace=True)
 38 |         self.conv2 = conv3x3(planes, planes)
 39 |         self.bn2 = nn.BatchNorm2d(planes)
 40 |         self.downsample = downsample
 41 |         self.stride = stride
 42 | 
 43 |     def forward(self, x):
 44 |         residual = x
 45 | 
 46 |         out = self.conv1(x)
 47 |         out = self.bn1(out)
 48 |         out = self.relu(out)
 49 | 
 50 |         out = self.conv2(out)
 51 |         out = self.bn2(out)
 52 | 
 53 |         if self.downsample is not None:
 54 |             residual = self.downsample(x)
 55 | 
 56 |         out += residual
 57 |         out = self.relu(out)
 58 | 
 59 |         return out
 60 | 
 61 | 
 62 | class Bottleneck(nn.Module):
 63 |     expansion = 4
 64 | 
 65 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 66 |         super(Bottleneck, self).__init__()
 67 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
 68 |         self.bn1 = nn.BatchNorm2d(planes)
 69 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 70 |                                padding=1, bias=False)
 71 |         self.bn2 = nn.BatchNorm2d(planes)
 72 |         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False)
 73 |         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
 74 |         self.relu = nn.ReLU(inplace=True)
 75 |         self.downsample = downsample
 76 |         self.stride = stride
 77 | 
 78 |     def forward(self, x):
 79 |         residual = x
 80 | 
 81 |         out = self.conv1(x)
 82 |         out = self.bn1(out)
 83 |         out = self.relu(out)
 84 | 
 85 |         out = self.conv2(out)
 86 |         out = self.bn2(out)
 87 |         out = self.relu(out)
 88 | 
 89 |         out = self.conv3(out)
 90 |         out = self.bn3(out)
 91 | 
 92 |         if self.downsample is not None:
 93 |             residual = self.downsample(x)
 94 | 
 95 |         out += residual
 96 |         out = self.relu(out)
 97 | 
 98 |         return out
 99 | 
100 | 
101 | class ResNet(nn.Module):
102 | 
103 |     def __init__(self, block, layers, num_classes=1000):
104 |         self.inplanes = 64
105 |         super(ResNet, self).__init__()
106 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
107 |                                bias=False)
108 |         self.bn1 = nn.BatchNorm2d(64)
109 |         self.relu = nn.ReLU(inplace=True)
110 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
111 |         self.layer1 = self._make_layer(block, 64, layers[0])
112 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
113 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
114 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
115 |         self.avgpool = nn.AvgPool2d(7, stride=1)
116 |         self.fc1 = nn.Linear(512 * block.expansion, 1024)
117 | 
118 |         for m in self.modules():
119 |             if isinstance(m, nn.Conv2d):
120 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
121 |             elif isinstance(m, nn.BatchNorm2d):
122 |                 nn.init.constant_(m.weight, 1)
123 |                 nn.init.constant_(m.bias, 0)
124 | 
125 |     def _make_layer(self, block, planes, blocks, stride=1):
126 |         downsample = None
127 |         if stride != 1 or self.inplanes != planes * block.expansion:
128 |             downsample = nn.Sequential(
129 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
130 |                           kernel_size=1, stride=stride, bias=False),
131 |                 nn.BatchNorm2d(planes * block.expansion),
132 |             )
133 | 
134 |         layers = []
135 |         layers.append(block(self.inplanes, planes, stride, downsample))
136 |         self.inplanes = planes * block.expansion
137 |         for i in range(1, blocks):
138 |             layers.append(block(self.inplanes, planes))
139 | 
140 |         return nn.Sequential(*layers)
141 | 
142 |     def forward(self, x):
143 |         x = self.conv1(x)
144 |         x = self.bn1(x)
145 |         x = self.relu(x)
146 |         x = self.maxpool(x)
147 | 
148 |         x = self.layer1(x)
149 |         x = self.layer2(x)
150 |         x = self.layer3(x)
151 |         x = self.layer4(x)
152 | 
153 |         x = self.avgpool(x)
154 |         x = x.view(x.size(0), -1)
155 |         x = self.relu(x)
156 | 
157 |         x = self.fc1(x)
158 |         x = self.relu(x)
159 | 
160 |         return x
161 | 
162 | 
163 | def resnet18(pretrained=False, **kwargs):
164 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
165 |     if pretrained:
166 |         pretrained_state_dict = model_zoo.load_url(model_urls['resnet18'])
167 |         model.load_state_dict(pretrained_state_dict, strict=False)
168 |     return model
169 | 
170 | 
171 | def resnet34(pretrained=False, **kwargs):
172 |     """Constructs a ResNet-34 model.
173 | 
174 |     Args:
175 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
176 |     """
177 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
178 |     if pretrained:
179 |         pretrained_state_dict = model_zoo.load_url(model_urls['resnet34'])
180 |         model.load_state_dict(pretrained_state_dict, strict=False)
181 |     return model
182 | 
183 | 
184 | def resnet50(pretrained=False, **kwargs):
185 |     """Constructs a ResNet-50 model.
186 | 
187 |     Args:
188 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
189 |     """
190 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
191 |     if pretrained:
192 |         pretrained_state_dict = model_zoo.load_url(model_urls['resnet50'])
193 |         model.load_state_dict(pretrained_state_dict, strict=False)
194 |     return model
195 | 
196 | 
197 | def resnet101(pretrained=False, **kwargs):
198 |     """Constructs a ResNet-101 model.
199 | 
200 |     Args:
201 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
202 |     """
203 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
204 |     if pretrained:
205 |         pretrained_state_dict = model_zoo.load_url(model_urls['resnet101'])
206 |         model.load_state_dict(pretrained_state_dict, strict=False)
207 |     return model
208 | 
209 | 
210 | def resnet152(pretrained=False, **kwargs):
211 |     """Constructs a ResNet-152 model.
212 | 
213 |     Args:
214 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
215 |     """
216 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
217 |     if pretrained:
218 |         pretrained_state_dict = model_zoo.load_url(model_urls['resnet152'])
219 |         model.load_state_dict(pretrained_state_dict, strict=False)
220 |     return model
221 | 


--------------------------------------------------------------------------------
/mocap_utils/coordconv.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | # Original code from SPIN: https://github.com/nkolot/SPIN
  4 | 
  5 | """
  6 | This file contains functions that are used to perform data augmentation.
  7 | """
  8 | import sys
  9 | import torch
 10 | import numpy as np
 11 | import scipy.misc
 12 | import cv2
 13 | from torchvision.transforms import Normalize
 14 | 
 15 | # For converting coordinate between SMPL 3D coord <-> 2D bbox <-> original 2D image 
 16 | # data3D: (N,3), where N is number of 3D points in "smpl"'s 3D coordinate (vertex or skeleton)
 17 | 
 18 | def convert_smpl_to_bbox(data3D, scale, trans, bAppTransFirst=False):
 19 |     data3D = data3D.copy()
 20 |     resnet_input_size_half = 224 *0.5
 21 |     if bAppTransFirst:      # Hand model
 22 |         data3D[:,0:2] += trans
 23 |         data3D *= scale   # apply scaling
 24 |     else:
 25 |         data3D *= scale # apply scaling
 26 |         data3D[:,0:2] += trans
 27 |     
 28 |     data3D*= resnet_input_size_half # 112 is originated from hrm's input size (224,24)
 29 |     # data3D[:,:2]*= resnet_input_size_half # 112 is originated from hrm's input size (224,24)
 30 |     return data3D
 31 | 
 32 | 
 33 | def convert_bbox_to_oriIm(data3D, boxScale_o2n, bboxTopLeft, imgSizeW, imgSizeH):
 34 |     data3D = data3D.copy()
 35 |     resnet_input_size_half = 224 *0.5
 36 |     imgSize = np.array([imgSizeW,imgSizeH])
 37 | 
 38 |     data3D /= boxScale_o2n
 39 | 
 40 |     if not isinstance(bboxTopLeft, np.ndarray):
 41 |         assert isinstance(bboxTopLeft, tuple)
 42 |         assert len(bboxTopLeft) == 2
 43 |         bboxTopLeft = np.array(bboxTopLeft)
 44 | 
 45 |     data3D[:,:2] += (bboxTopLeft + resnet_input_size_half/boxScale_o2n)
 46 | 
 47 |     return data3D
 48 | 
 49 | 
 50 | def convert_smpl_to_bbox_perspective(data3D, scale_ori, trans_ori, focalLeng, scaleFactor=1.0):
 51 |     data3D = data3D.copy()
 52 |     resnet_input_size_half = 224 *0.5
 53 | 
 54 |     scale = scale_ori* resnet_input_size_half
 55 |     trans = trans_ori *resnet_input_size_half
 56 | 
 57 |     if False:   #Weak perspective
 58 |         data3D *= scale           #apply scaling
 59 |         data3D[:,0:2] += trans
 60 |     else:
 61 |         # delta = (trans - imageShape*0.5)/scale            
 62 |         # Current projection already consider camera center during the rendering. 
 63 |         # Thus no need to consider principle axis
 64 |         delta = (trans )/scale
 65 |         data3D[:,0:2] +=delta
 66 | 
 67 |         newZ = focalLeng/scale
 68 |         deltaZ =  newZ - np.mean(data3D[:,2])
 69 |         data3D[:,2] +=deltaZ
 70 |         # data3D[:,2] +=16.471718554146534        #debug
 71 | 
 72 |     if False:   #Scaling to be a certain dist from camera
 73 |         texture_plan_depth = 500
 74 |         ratio = texture_plan_depth /np.mean(data3D[:,2])
 75 |         data3D *=ratio  
 76 |     else:
 77 |         data3D *=scaleFactor
 78 | 
 79 |     return data3D
 80 | 
 81 | 
 82 | """ Extract bbox information """
 83 | def bbox_from_openpose(openpose_file, rescale=1.2, detection_thresh=0.2):
 84 |     """Get center and scale for bounding box from openpose detections."""
 85 |     with open(openpose_file, 'r') as f:
 86 |         data = json.load(f)
 87 |         if 'people' not in data or len(data['people'])==0:
 88 |             return None, None
 89 |         # keypoints = json.load(f)['people'][0]['pose_keypoints_2d']
 90 |         keypoints = data['people'][0]['pose_keypoints_2d']
 91 |     keypoints = np.reshape(np.array(keypoints), (-1,3))
 92 |     valid = keypoints[:,-1] > detection_thresh
 93 | 
 94 |     valid_keypoints = keypoints[valid][:,:-1]           #(25,2)
 95 | 
 96 |     # min_pt = np.min(valid_keypoints, axis=0)
 97 |     # max_pt = np.max(valid_keypoints, axis=0)
 98 |     # bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]]
 99 | 
100 |     center = valid_keypoints.mean(axis=0)
101 |     bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)).max()
102 |     # adjust bounding box tightness
103 |     scale = bbox_size / 200.0
104 |     scale *= rescale
105 |     return center, scale#, bbox
106 | 
107 | 
108 | # keypoints: (Nx3)
109 | def bbox_from_keypoint2d(keypoints, rescale=1.2, detection_thresh=0.2):
110 |     """
111 |         output:
112 |             center: bbox center
113 |             scale: scale_n2o: 224x224 -> original bbox size (max length if not a square bbox)
114 |     """
115 |     # """Get center and scale for bounding box from openpose detections."""
116 | 
117 |     if len(keypoints.shape)==2 and keypoints.shape[1]==2:       #(X,2)
118 |         valid_keypoints = keypoints
119 |     else:
120 |         keypoints = np.reshape(np.array(keypoints), (-1,3))
121 |         valid = keypoints[:,-1] > detection_thresh
122 | 
123 |         valid_keypoints = keypoints[valid][:,:-1]           #(25,2)
124 | 
125 |     # min_pt = np.min(valid_keypoints, axis=0)
126 |     # max_pt = np.max(valid_keypoints, axis=0)
127 |     # bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]]
128 | 
129 |     center = valid_keypoints.mean(axis=0)
130 |     bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)).max()
131 | 
132 | 
133 |     # adjust bounding box tightness
134 |     scale = bbox_size / 200.0
135 |     scale *= rescale
136 |     return center, scale#, bbox
137 | 
138 | 
139 | def bbox_from_keypoints(keypoints, rescale=1.2, detection_thresh=0.2, imageHeight= None):
140 |     """Get center and scale for bounding box from openpose detections."""
141 |   
142 |     keypoints = np.reshape(np.array(keypoints), (-1,3))
143 |     valid = keypoints[:,-1] > detection_thresh
144 | 
145 |     valid_keypoints = keypoints[valid][:,:-1]           #(25,2)
146 | 
147 |     if len(valid_keypoints)<2:
148 |         return None, None, None
149 | 
150 | 
151 |     if False:            #Should have all limbs and nose
152 |         if np.sum(valid[ [ 2,3,4, 5,6,7, 9,10, 12,13,1,0] ]) <12:
153 |             return None, None, None
154 | 
155 |     min_pt = np.min(valid_keypoints, axis=0)
156 |     max_pt = np.max(valid_keypoints, axis=0)
157 | 
158 |     bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]]
159 | 
160 |     if imageHeight is not None:
161 | 
162 |         if valid[10]==False and valid[13] == False:  # No knees ub ioeb
163 |             max_pt[1] = min(max_pt[1] + (max_pt[1]- min_pt[1]), imageHeight )
164 |             bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]]
165 |             valid_keypoints = np.vstack( (valid_keypoints, np.array(max_pt)) )
166 | 
167 | 
168 |         elif valid[11]==False and valid[14] == False: #No foot
169 |             max_pt[1] = min(max_pt[1] + (max_pt[1]- min_pt[1])*0.2, imageHeight )
170 |             bbox= [ min_pt[0], min_pt[1], max_pt[0] - min_pt[0], max_pt[1] - min_pt[1]]
171 | 
172 |             valid_keypoints = np.vstack( (valid_keypoints, np.array(max_pt)) )
173 | 
174 | 
175 |     center = valid_keypoints.mean(axis=0)
176 |     bbox_size = (valid_keypoints.max(axis=0) - valid_keypoints.min(axis=0)).max()
177 |     # adjust bounding box tightness
178 |     scale = bbox_size / 200.0
179 |     scale *= rescale
180 |     return center, scale, bbox
181 | 
182 | 
183 | def bbox_from_bbr(bbox_XYWH, rescale=1.2, detection_thresh=0.2, imageHeight= None):
184 |     #bbr: (minX, minY, width, height)
185 |     """Get center and scale for bounding box from openpose detections."""
186 | 
187 |     center = bbox_XYWH[:2] + 0.5 * bbox_XYWH[2:]
188 |     bbox_size = max(bbox_XYWH[2:])
189 |     # adjust bounding box tightness
190 |     scale = bbox_size / 200.0
191 |     scale *= rescale
192 |     return center, scale#, bbox_XYWH
193 | 
194 | 
195 | def bbox_from_json(bbox_file):
196 |     """Get center and scale of bounding box from bounding box annotations.
197 |     The expected format is [top_left(x), top_left(y), width, height].
198 |     """
199 |     with open(bbox_file, 'r') as f:
200 |         bbox = np.array(json.load(f)['bbox']).astype(np.float32)
201 |     ul_corner = bbox[:2]
202 |     center = ul_corner + 0.5 * bbox[2:]
203 |     width = max(bbox[2], bbox[3])
204 |     scale = width / 200.0
205 |     # make sure the bounding box is rectangular
206 |     return center, scale


--------------------------------------------------------------------------------
/bodymocap/utils/geometry.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # Original code from SPIN: https://github.com/nkolot/SPIN
  3 | 
  4 | import torch
  5 | from torch.nn import functional as F
  6 | import numpy as np
  7 | 
  8 | import torchgeometry
  9 | 
 10 | """
 11 | Useful geometric operations, e.g. Perspective projection and a differentiable Rodrigues formula
 12 | Parts of the code are taken from https://github.com/MandyMo/pytorch_HMR
 13 | """
 14 | def batch_rodrigues(theta):
 15 |     """Convert axis-angle representation to rotation matrix.
 16 |     Args:
 17 |         theta: size = [B, 3]
 18 |     Returns:
 19 |         Rotation matrix corresponding to the quaternion -- size = [B, 3, 3]
 20 |     """
 21 |     l1norm = torch.norm(theta + 1e-8, p = 2, dim = 1)
 22 |     angle = torch.unsqueeze(l1norm, -1)
 23 |     normalized = torch.div(theta, angle)
 24 |     angle = angle * 0.5
 25 |     v_cos = torch.cos(angle)
 26 |     v_sin = torch.sin(angle)
 27 |     quat = torch.cat([v_cos, v_sin * normalized], dim = 1)
 28 |     return quat_to_rotmat(quat)
 29 | 
 30 | def quat_to_rotmat(quat):
 31 |     """Convert quaternion coefficients to rotation matrix.
 32 |     Args:
 33 |         quat: size = [B, 4] 4 <===>(w, x, y, z)
 34 |     Returns:
 35 |         Rotation matrix corresponding to the quaternion -- size = [B, 3, 3]
 36 |     """ 
 37 |     norm_quat = quat
 38 |     norm_quat = norm_quat/norm_quat.norm(p=2, dim=1, keepdim=True)
 39 |     w, x, y, z = norm_quat[:,0], norm_quat[:,1], norm_quat[:,2], norm_quat[:,3]
 40 | 
 41 |     B = quat.size(0)
 42 | 
 43 |     w2, x2, y2, z2 = w.pow(2), x.pow(2), y.pow(2), z.pow(2)
 44 |     wx, wy, wz = w*x, w*y, w*z
 45 |     xy, xz, yz = x*y, x*z, y*z
 46 | 
 47 |     rotMat = torch.stack([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz,
 48 |                           2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx,
 49 |                           2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2], dim=1).view(B, 3, 3)
 50 |     return rotMat    
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | def cross_product(u, v):
 58 |     batch = u.shape[0]
 59 |     i = u[:, 1] * v[:, 2] - u[:, 2] * v[:, 1]
 60 |     j = u[:, 2] * v[:, 0] - u[:, 0] * v[:, 2]
 61 |     k = u[:, 0] * v[:, 1] - u[:, 1] * v[:, 0]
 62 |         
 63 |     out = torch.cat((i.view(batch, 1), j.view(batch, 1), k.view(batch, 1)), 1)
 64 |         
 65 |     return out
 66 | 
 67 | def normalize_vector(v):
 68 |     batch = v.shape[0]
 69 |     v_mag = torch.sqrt(v.pow(2).sum(1))  # batch
 70 |     v_mag = torch.max(v_mag, v.new([1e-8]))
 71 |     v_mag = v_mag.view(batch, 1).expand(batch, v.shape[1])
 72 |     v = v/v_mag
 73 |     return v
 74 | 
 75 | #Code from
 76 | def rot6d_to_rotmat(x):
 77 |     """Convert 6D rotation representation to 3x3 rotation matrix.
 78 |     Based on Zhou et al., "On the Continuity of Rotation Representations in Neural Networks", CVPR 2019
 79 |     Input:
 80 |         (B,6) Batch of 6-D rotation representations
 81 |     Output:
 82 |         (B,3,3) Batch of corresponding rotation matrices
 83 |     """
 84 |     x = x.view(-1,3,2)
 85 |     a1 = x[:, :, 0]
 86 |     a2 = x[:, :, 1]
 87 |     b1 = F.normalize(a1)
 88 |     b2 = F.normalize(a2 - torch.einsum('bi,bi->b', b1, a2).unsqueeze(-1) * b1)
 89 |     b3 = torch.cross(b1, b2)
 90 |     return torch.stack((b1, b2, b3), dim=-1)
 91 | 
 92 |      
 93 | def perspective_projection(points, rotation, translation,
 94 |                            focal_length, camera_center):
 95 |     """
 96 |     This function computes the perspective projection of a set of points.
 97 |     Input:
 98 |         points (bs, N, 3): 3D points
 99 |         rotation (bs, 3, 3): Camera rotation
100 |         translation (bs, 3): Camera translation
101 |         focal_length (bs,) or scalar: Focal length
102 |         camera_center (bs, 2): Camera center
103 |     """
104 |     batch_size = points.shape[0]
105 |     K = torch.zeros([batch_size, 3, 3], device=points.device)
106 |     K[:,0,0] = focal_length
107 |     K[:,1,1] = focal_length
108 |     K[:,2,2] = 1.
109 |     K[:,:-1, -1] = camera_center
110 | 
111 |     # Transform points
112 |     points = torch.einsum('bij,bkj->bki', rotation, points)
113 |     points = points + translation.unsqueeze(1)
114 | 
115 |     # Apply perspective distortion
116 |     projected_points = points / points[:,:,-1].unsqueeze(-1)
117 | 
118 |     # Apply camera intrinsics
119 |     projected_points = torch.einsum('bij,bkj->bki', K, projected_points)
120 | 
121 |     return projected_points[:, :, :-1]
122 | 
123 | 
124 | def estimate_translation_np(S, joints_2d, joints_conf, focal_length=5000, img_size=224):
125 |     """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
126 |     Input:
127 |         S: (25, 3) 3D joint locations
128 |         joints: (25, 3) 2D joint locations and confidence
129 |     Returns:
130 |         (3,) camera translation vector
131 |     """
132 | 
133 |     num_joints = S.shape[0]
134 |     # focal length
135 |     f = np.array([focal_length,focal_length])
136 |     # optical center
137 |     center = np.array([img_size/2., img_size/2.])
138 | 
139 |     # transformations
140 |     Z = np.reshape(np.tile(S[:,2],(2,1)).T,-1)
141 |     XY = np.reshape(S[:,0:2],-1)
142 |     O = np.tile(center,num_joints)
143 |     F = np.tile(f,num_joints)
144 |     weight2 = np.reshape(np.tile(np.sqrt(joints_conf),(2,1)).T,-1)
145 | 
146 |     # least squares
147 |     Q = np.array([F*np.tile(np.array([1,0]),num_joints), F*np.tile(np.array([0,1]),num_joints), O-np.reshape(joints_2d,-1)]).T
148 |     c = (np.reshape(joints_2d,-1)-O)*Z - F*XY
149 | 
150 |     # weighted least squares
151 |     W = np.diagflat(weight2)
152 |     Q = np.dot(W,Q)
153 |     c = np.dot(W,c)
154 | 
155 |     # square matrix
156 |     A = np.dot(Q.T,Q)
157 |     b = np.dot(Q.T,c)
158 | 
159 |     # solution
160 |     trans = np.linalg.solve(A, b)
161 | 
162 |     return trans
163 | 
164 | 
165 | def estimate_translation(S, joints_2d, focal_length=5000., img_size=224.):
166 |     """Find camera translation that brings 3D joints S closest to 2D the corresponding joints_2d.
167 |     Input:
168 |         S: (B, 49, 3) 3D joint locations
169 |         joints: (B, 49, 3) 2D joint locations and confidence
170 |     Returns:
171 |         (B, 3) camera translation vectors
172 |     """
173 | 
174 |     device = S.device
175 |     # Use only joints 25:49 (GT joints)
176 |     S = S[:, 25:, :].cpu().numpy()
177 |     joints_2d = joints_2d[:, 25:, :].cpu().numpy()
178 |     joints_conf = joints_2d[:, :, -1]
179 |     joints_2d = joints_2d[:, :, :-1]
180 |     trans = np.zeros((S.shape[0], 3), dtype=np.float32)
181 |     # Find the translation for each example in the batch
182 |     for i in range(S.shape[0]):
183 |         S_i = S[i]
184 |         joints_i = joints_2d[i]
185 |         conf_i = joints_conf[i]
186 |         trans[i] = estimate_translation_np(S_i, joints_i, conf_i, focal_length=focal_length, img_size=img_size)
187 |     return torch.from_numpy(trans).to(device)
188 | 
189 | 
190 | 
191 | 
192 | def weakProjection_gpu(skel3D, scale, trans2D ):
193 |     # if len(skel3D.shape)==1:
194 |     #     skel3D = np.reshape(skel3D, (-1,3))
195 | 
196 |     skel3D = skel3D.view((skel3D.shape[0],-1,3))
197 |     trans2D = trans2D.view((trans2D.shape[0],1,2))
198 |     scale = scale.view((scale.shape[0],1,1))
199 |     skel3D_proj = scale* skel3D[:,:,:2] + trans2D
200 | 
201 |     return skel3D_proj#skel3D_proj.view((skel3D.shape[0],-1))       #(N, 19*2) o
202 | 
203 | 
204 | 
205 | #(57) (1) (2)
206 | def weakProjection(skel3D, scale, trans2D ):
207 | 
208 |     skel3D_proj = scale* skel3D[:,:2] + trans2D
209 | 
210 |     return skel3D_proj#skel3D_proj.view((skel3D.shape[0],-1))       #(N, 19*2) o
211 | 
212 | 
213 | 
214 | def rotmat_to_angleaxis(init_pred_rotmat):
215 |     """
216 |         init_pred_rotmat: torch.tensor with (24,3,3) dimension
217 |     """
218 |     device = init_pred_rotmat.device
219 |     ones = torch.tensor([0,0,1], dtype=torch.float32,).view(1, 3, 1).expand(init_pred_rotmat.shape[1], -1, -1).to(device)
220 | 
221 |     pred_rotmat_hom = torch.cat([ init_pred_rotmat.view(-1, 3, 3),ones ], dim=-1)       #24,3,4
222 |     pred_aa = torchgeometry.rotation_matrix_to_angle_axis(pred_rotmat_hom).contiguous().view(1, -1)  #[1,72]
223 |     # tgm.rotation_matrix_to_angle_axis returns NaN for 0 rotation, so manually hack it
224 |     pred_aa[torch.isnan(pred_aa)] = 0.0       #[1,72]
225 |     pred_aa = pred_aa.view(1,24,3) 
226 | 
227 |     return pred_aa
228 |     
229 | 


--------------------------------------------------------------------------------
/docs/run_handmocap.md:
--------------------------------------------------------------------------------
  1 | # Hand Motion Capture Demo
  2 | 
  3 | Our hande module provides 3D hand motion capture output. We use the [HMR](https://akanazawa.github.io/hmr/) model, trained with several public hand pose datasets, the SOTA peformance among single-image based methods. See our [FrankMocap paper](https://penincillin.github.io/frank_mocap) for details.
  4 | 
  5 | <p>
  6 |     <img src="https://github.com/jhugestar/jhugestar.github.io/blob/master/img/frankmocap_hand.gif" height="256">
  7 | </p>
  8 | 
  9 | 
 10 | ## A Quick Start
 11 | - Run the following. The mocap output will be shown on your screen
 12 | ```
 13 |     # Using a machine with a monitor to show output on screen
 14 |     # OpenGL renderer is used by default (--renderer_type opengl)
 15 |     # The output images are also saved in ./mocap_output
 16 |     python -m demo.demo_handmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output
 17 | 
 18 |     # Screenless mode (e.g., a remote server)
 19 |     xvfb-run -a python -m demo.demo_handmocap --input_path ./sample_data/single_totalbody.mp4 --out_dir ./mocap_output
 20 | 
 21 |     # Set other render_type to use other renderers
 22 |     python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.single_totalbody.mp4 --out_dir ./mocap_output --renderer_type pytorch3d
 23 | ```
 24 | 
 25 | ## Run Demo with A Webcam Input
 26 | - Run,
 27 |     ```
 28 |         python -m demo.demo_handmocap --input_path webcam
 29 | 
 30 |         #or using opengl gui renderer
 31 |         python -m demo.demo_handmocap --input_path webcam --renderer_type opengl_gui
 32 |     ```
 33 | - See below to see how to control in opengl gui mode
 34 | 
 35 | ## Run Demo for Egocentric Videos
 36 | - For 3D hand pose estimation in egocentric views, use --view_type ego_centric
 37 |     ```
 38 |     # with Screen
 39 |     python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --view_type ego_centric
 40 | 
 41 |     # Screenless mode (e.g., a remote server)
 42 |     xvfb-run -a python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --view_type ego_centric
 43 |     ```
 44 | - We use a different hand detector adjusted for egocentric views, but the 3D hand pose regressor is the same. By default, hand module assumes ```third_view```
 45 | <p>
 46 |     <img src="https://github.com/jhugestar/jhugestar.github.io/blob/master/img/frankmotion_egohand.gif" height="200">
 47 | </p>
 48 | 
 49 | ## Other Renderer Options
 50 | - While opengl would be faster, it requires a screen connected to your machine. You may try screenless rendering or other rendering options described below.
 51 | - Screenless Opengl Rendering
 52 |     - If you do not have a screen attached in your machine (e.g., remote servers), use [xvfb-run](http://manpages.ubuntu.com/manpages/trusty/man1/xvfb-run.1.html) tool
 53 |     ```
 54 |         # The output images are also saved in ./mocap_output
 55 |         xvfb-run -a python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --renderer_type opengl
 56 |     ```
 57 | - [Pytorch3D](https://pytorch3d.org/)
 58 |     - We use pytorch3d only for rendering purpose. 
 59 |     - Run the following command to use pytorch3d renderer
 60 |     ```
 61 |         python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --renderer_type pytorch3d
 62 |     ```
 63 | - [OpenDR](https://github.com/mattloper/opendr/wiki)
 64 |     - Alternatively, run the following command to use opendr renderer
 65 |     ```
 66 |         python -m demo.demo_handmocap --input_path ./sample_data/han_hand_short.mp4 --out_dir ./mocap_output --renderer_type opendr
 67 |     ```
 68 | 
 69 | ## Keys for OpenGL GUI Mode 
 70 | - In OpenGL GUI visualization mode, you can use mouse and keyboard to change view point. 
 71 |     - This mode requires a screen connected to your machine 
 72 |     - Keys in OpenGL 3D window
 73 |         - mouse-Left: view rotation
 74 |         - mouse-Right: view zoom chnages
 75 |         - shift+ mouseLeft: view pan
 76 |         - C: toggle for image view/3D free view
 77 |         - w: toggle wireframe/solid mesh
 78 |         - j: toggle skeleton visualization
 79 |         - R: automatically rotate views
 80 |         - f: toggle floordrawing
 81 |         - q: exit program
 82 | 
 83 | 
 84 | ## Run Demo with Precomputed Bboxes 
 85 | - You can use the precomputed bboxes without running any detectors. Save bboxes for each image as a json format. Each json should contain the input image path.
 86 | - Assuming your bboxes are `/your/bbox_dir/XXX.json`
 87 |     ```
 88 |         python -m demo.demo_handmocap --input_path /your/bbox_dir --out_dir ./mocap_output
 89 |     ```
 90 | - Bbox format (json)
 91 |     ```
 92 |     {"image_path": "xxx.jpg", "hand_bbox_list":[{"left_hand":[x,y,w,h], "right_hand":[x,y,w,h]}], "body_bbox_list":[[x,y,w,h]]}
 93 |     ```
 94 |     - Note that bbox format is [minX,minY,width,height]
 95 | - For example
 96 |     ```
 97 |     {"image_path": "./sample_data/images/cj_dance_01_03_1_00075.png", "body_bbox_list": [[149, 380, 242, 565]], "hand_bbox_list": [{"left_hand": [288.9151611328125, 376.70184326171875, 39.796295166015625, 51.72357177734375], "right_hand": [234.97779846191406, 363.4115295410156, 50.28489685058594, 57.89691162109375]}]}
 98 |     ```
 99 | ## Options 
100 | ### Input options
101 | - `--input_path webcam`: Run demo for a video file  (without using `--vPath` option)
102 | - `--input_path /your/path/video.mp4`: Run demo for a video file (mp4, avi, mov)
103 | - `--input_path /your/dirPath`: Run demo for a folder that contains image seqeunces
104 | - `--input_path /your/bboxDirPath`: Run demo for a folder that contains bbox json files. See [bbox format](https://github.com/facebookresearch/eft/blob/master/docs/README_dataformat.md#bbox-format-json)
105 | 
106 | - `--view_type`: The view type of input. It could be ```third_view``` or```ego_centric```
107 | 
108 | 
109 | ### Output options
110 | - `--out_dir ./outputdirname`: Save the output images into files
111 | - `--save_pred_pkl`: Save the pose reconstruction data (SMPL parameters and vertices) into pkl files   (requires `--out_dir ./outputdirname`)
112 | - `--save_bbox_output`: Save the bbox data in json files (bbox_xywh format) (requires `--out_dir ./outputdirname`)
113 | - `--no_display`: Do not visualize output on the screen
114 | 
115 | ### Other options
116 | - `--use_smplx`: Use SMPLX model for body pose estimation (instead of SMPL). This uses a different pre-trainined weights and may have different performance.
117 | - `--start_frame 100 --end_frame 200`: Specify start and end frames (e.g., 100th frame and 200th frame in this example)
118 | - `--single_person`: To enforce single person mocap (to avoid outlier bboxes). This mode chooses the biggest bbox. 
119 | 
120 | ## License
121 | - [CC-BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode). 
122 | See the [LICENSE](LICENSE) file. 
123 | 
124 | <!-- 
125 | 
126 | ## Installation
127 | 
128 | ### Basic Requirements
129 | - Linux with at least one GPU.
130 | - Python ≥ 3.7
131 | - CUDA >= 10.0
132 | - smplx >= 0.1.21
133 | - PyTorch ≥ 1.4 and torchvision that matches the PyTorch installation.
134 |   You can install them together at [pytorch.org](https://pytorch.org) to make sure of this  
135 | - xvfb-run (for mesh rendering, it can be installed with apt-get)  
136 | - Pytorch-3D: [Install](https://github.com/facebookresearch/pytorch3d/blob/master/INSTALL.md)
137 | - Opendr ```pip install opendr```
138 | - Other 3rd-party package: 
139 |     - ```pip intall -r docs/requirements.txt``
140 | 
141 | ### Download Extra Data
142 | - Run the following script to download pretrained weight and others
143 |     - ```sh scripts/download_data_hand_module.sh```
144 | - The data will be downloaded in ```extra_data/hand_module```
145 | 
146 | ### Setting Third-Party Required Data
147 | - Download SMPLX Model (Neutral model: SMPLX_NEUTRAL.pkl):
148 |     - Download in the original [SMPL-X website](https://smpl-x.is.tue.mpg.de/). You need to register to download the SMPLX data.
149 |     - Put the ```SMPLX_NEUTRAL.pkl`` file in: ./extra_data/smpl/SMPLX_NEUTRAL.pkl
150 | 
151 | - Installing third-party hand bbox detection tools
152 |     - Detectron-2: [install](https://github.com/facebookresearch/detectron2/blob/master/INSTALL.md)
153 |     - Hand Detector: We use hand detector provided by [100DOH](https://fouheylab.eecs.umich.edu/~dandans/projects/100DOH/download.html). Run following commands to install:
154 |         - ```sh scripts/install_hand_detectors.sh ```
155 |     - 2D Body Pose estimator: Install with the following commands
156 |         - ```sh scripts/install_pose2d.sh```
157 | 
158 | ### FYI, ./extra_data folder hierarchy
159 | - The ./extra_data/ folder should look like:
160 | ```
161 | extra_data/
162 | ├── hand_module
163 | │   └── mean_mano_params.pkl
164 | │   └── SMPLX_HAND_INFO.pkl
165 | |   └── pretrained_weights
166 | |   |   └── pose_shape_best.pth
167 | │   └── hand_detector
168 | │       └── faster_rcnn_1_8_132028.pth  
169 | │       └── model_0529999.pth
170 | ├── body_module
171 | |   └──body_pose_estimator
172 | |       └── checkpoint_iter_370000.pth     
173 | └── smpl
174 |     └── SMPLX_NEUTRAL.pkl
175 | ``` -->
176 | 


--------------------------------------------------------------------------------
/bodymocap/body_mocap_api.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import cv2
  4 | import sys
  5 | import torch
  6 | import numpy as np
  7 | import pickle 
  8 | from torchvision.transforms import Normalize
  9 | 
 10 | from bodymocap.models import hmr, SMPL, SMPLX
 11 | from bodymocap import constants
 12 | from bodymocap.utils.imutils import crop, crop_bboxInfo, process_image_bbox, process_image_keypoints, bbox_from_keypoints
 13 | from mocap_utils.coordconv import convert_smpl_to_bbox, convert_bbox_to_oriIm
 14 | import mocap_utils.geometry_utils as gu
 15 | 
 16 | 
 17 | class BodyMocap(object):
 18 |     def __init__(self, regressor_checkpoint, smpl_dir, device=torch.device('cuda'), use_smplx=False):
 19 | 
 20 |         self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
 21 | 
 22 |         # Load parametric model (SMPLX or SMPL)
 23 |         if use_smplx:
 24 |             smplModelPath = smpl_dir + '/SMPLX_NEUTRAL.pkl'
 25 |             self.smpl = SMPLX(smpl_dir,
 26 |                     batch_size=1,
 27 |                     num_betas = 10,
 28 |                     use_pca = False,
 29 |                     create_transl=False).to(self.device)
 30 |             self.use_smplx = True
 31 |         else:
 32 |             smplModelPath = smpl_dir + '/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl'
 33 |             self.smpl = SMPL(smplModelPath, batch_size=1, create_transl=False).to(self.device)
 34 |             self.use_smplx = False
 35 |             
 36 |         #Load pre-trained neural network 
 37 |         SMPL_MEAN_PARAMS = './extra_data/body_module/data_from_spin/smpl_mean_params.npz'
 38 |         self.model_regressor = hmr(SMPL_MEAN_PARAMS).to(self.device)
 39 |         checkpoint = torch.load(regressor_checkpoint)
 40 |         self.model_regressor.load_state_dict(checkpoint['model'], strict=False)
 41 |         self.model_regressor.eval()
 42 |         
 43 | 
 44 |     def regress(self, img_original, body_bbox_list):
 45 |         """
 46 |             args: 
 47 |                 img_original: original raw image (BGR order by using cv2.imread)
 48 |                 body_bbox: bounding box around the target: (minX, minY, width, height)
 49 |             outputs:
 50 |                 pred_vertices_img:
 51 |                 pred_joints_vis_img:
 52 |                 pred_rotmat
 53 |                 pred_betas
 54 |                 pred_camera
 55 |                 bbox: [bbr[0], bbr[1],bbr[0]+bbr[2], bbr[1]+bbr[3]])
 56 |                 bboxTopLeft:  bbox top left (redundant)
 57 |                 boxScale_o2n: bbox scaling factor (redundant) 
 58 |         """
 59 |         pred_output_list = list()
 60 | 
 61 |         for body_bbox in body_bbox_list:
 62 |             img, norm_img, boxScale_o2n, bboxTopLeft, bbox = process_image_bbox(
 63 |                 img_original, body_bbox, input_res=constants.IMG_RES)
 64 |             bboxTopLeft = np.array(bboxTopLeft)
 65 | 
 66 |             # bboxTopLeft = bbox['bboxXYWH'][:2]
 67 |             if img is None:
 68 |                 pred_output_list.append(None)
 69 |                 continue
 70 | 
 71 |             with torch.no_grad():
 72 |                 # model forward
 73 |                 pred_rotmat, pred_betas, pred_camera = self.model_regressor(norm_img.to(self.device))
 74 | 
 75 |                 #Convert rot_mat to aa since hands are always in aa
 76 |                 # pred_aa = rotmat3x3_to_angle_axis(pred_rotmat)
 77 |                 pred_aa = gu.rotation_matrix_to_angle_axis(pred_rotmat).cuda()
 78 |                 pred_aa = pred_aa.reshape(pred_aa.shape[0], 72)
 79 |                 smpl_output = self.smpl(
 80 |                     betas=pred_betas, 
 81 |                     body_pose=pred_aa[:,3:],
 82 |                     global_orient=pred_aa[:,:3], 
 83 |                     pose2rot=True)
 84 |                 pred_vertices = smpl_output.vertices
 85 |                 pred_joints_3d = smpl_output.joints
 86 | 
 87 |                 pred_vertices = pred_vertices[0].cpu().numpy()
 88 | 
 89 |                 pred_camera = pred_camera.cpu().numpy().ravel()
 90 |                 camScale = pred_camera[0] # *1.15
 91 |                 camTrans = pred_camera[1:]
 92 | 
 93 |                 pred_output = dict()
 94 |                 # Convert mesh to original image space (X,Y are aligned to image)
 95 |                 # 1. SMPL -> 2D bbox
 96 |                 # 2. 2D bbox -> original 2D image
 97 |                 pred_vertices_bbox = convert_smpl_to_bbox(pred_vertices, camScale, camTrans)
 98 |                 pred_vertices_img = convert_bbox_to_oriIm(
 99 |                     pred_vertices_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
100 | 
101 |                 # Convert joint to original image space (X,Y are aligned to image)
102 |                 pred_joints_3d = pred_joints_3d[0].cpu().numpy() # (1,49,3)
103 |                 pred_joints_vis = pred_joints_3d[:,:3]  # (49,3)
104 |                 pred_joints_vis_bbox = convert_smpl_to_bbox(pred_joints_vis, camScale, camTrans) 
105 |                 pred_joints_vis_img = convert_bbox_to_oriIm(
106 |                     pred_joints_vis_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0]) 
107 | 
108 |                 # Output
109 |                 pred_output['img_cropped'] = img[:, :, ::-1]
110 |                 pred_output['pred_vertices_smpl'] = smpl_output.vertices[0].cpu().numpy() # SMPL vertex in original smpl space
111 |                 pred_output['pred_vertices_img'] = pred_vertices_img # SMPL vertex in image space
112 |                 pred_output['pred_joints_img'] = pred_joints_vis_img # SMPL joints in image space
113 | 
114 |                 pred_aa_tensor = gu.rotation_matrix_to_angle_axis(pred_rotmat.detach().cpu()[0])
115 |                 pred_output['pred_body_pose'] = pred_aa_tensor.cpu().numpy().reshape(1, 72) # (1, 72)
116 | 
117 |                 pred_output['pred_rotmat'] = pred_rotmat.detach().cpu().numpy() # (1, 24, 3, 3)
118 |                 pred_output['pred_betas'] = pred_betas.detach().cpu().numpy() # (1, 10)
119 | 
120 |                 pred_output['pred_camera'] = pred_camera
121 |                 pred_output['bbox_top_left'] = bboxTopLeft
122 |                 pred_output['bbox_scale_ratio'] = boxScale_o2n
123 |                 pred_output['faces'] = self.smpl.faces
124 | 
125 |                 if self.use_smplx:
126 |                     img_center = np.array((img_original.shape[1], img_original.shape[0]) ) * 0.5
127 |                     # right hand
128 |                     pred_joints = smpl_output.right_hand_joints[0].cpu().numpy()     
129 |                     pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans)
130 |                     pred_joints_img = convert_bbox_to_oriIm(
131 |                         pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
132 |                     pred_output['right_hand_joints_img_coord'] = pred_joints_img
133 |                     # left hand 
134 |                     pred_joints = smpl_output.left_hand_joints[0].cpu().numpy()
135 |                     pred_joints_bbox = convert_smpl_to_bbox(pred_joints, camScale, camTrans)
136 |                     pred_joints_img = convert_bbox_to_oriIm(
137 |                         pred_joints_bbox, boxScale_o2n, bboxTopLeft, img_original.shape[1], img_original.shape[0])
138 |                     pred_output['left_hand_joints_img_coord'] = pred_joints_img
139 |                 
140 |                 pred_output_list.append(pred_output)
141 | 
142 |         return pred_output_list
143 |     
144 | 
145 |     def get_hand_bboxes(self, pred_body_list, img_shape):
146 |         """
147 |             args: 
148 |                 pred_body_list: output of body regresion
149 |                 img_shape: img_height, img_width
150 |             outputs:
151 |                 hand_bbox_list: 
152 |         """
153 |         hand_bbox_list = list()
154 |         for pred_body in pred_body_list:
155 |             hand_bbox = dict(
156 |                 left_hand = None,
157 |                 right_hand = None
158 |             )
159 |             if pred_body is None:
160 |                 hand_bbox_list.append(hand_bbox)
161 |             else:
162 |                 for hand_type in hand_bbox:
163 |                     key = f'{hand_type}_joints_img_coord'
164 |                     pred_joints_vis_img = pred_body[key]
165 | 
166 |                     if pred_joints_vis_img is not None:
167 |                         # get initial bbox
168 |                         x0, x1 = np.min(pred_joints_vis_img[:, 0]), np.max(pred_joints_vis_img[:, 0])
169 |                         y0, y1 = np.min(pred_joints_vis_img[:, 1]), np.max(pred_joints_vis_img[:, 1])
170 |                         width, height = x1-x0, y1-y0
171 |                         # extend the obtained bbox
172 |                         margin = int(max(height, width) * 0.2)
173 |                         img_height, img_width = img_shape
174 |                         x0 = max(x0 - margin, 0)
175 |                         y0 = max(y0 - margin, 0)
176 |                         x1 = min(x1 + margin, img_width)
177 |                         y1 = min(y1 + margin, img_height)
178 |                         # result bbox in (x0, y0, w, h) format
179 |                         hand_bbox[hand_type] = np.array([x0, y0, x1-x0, y1-y0]) # in (x, y, w, h ) format
180 | 
181 |                 hand_bbox_list.append(hand_bbox)
182 | 
183 |         return hand_bbox_list
184 | 


--------------------------------------------------------------------------------
/demo/demo_frankmocap.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import os
  4 | import sys
  5 | import os.path as osp
  6 | import torch
  7 | from torchvision.transforms import Normalize
  8 | import numpy as np
  9 | import cv2
 10 | import argparse
 11 | import json
 12 | import pickle
 13 | 
 14 | ############# input parameters  #############
 15 | from demo.demo_options import DemoOptions
 16 | from bodymocap.body_mocap_api import BodyMocap
 17 | from handmocap.hand_mocap_api import HandMocap
 18 | import mocap_utils.demo_utils as demo_utils
 19 | import mocap_utils.general_utils as gnu
 20 | from mocap_utils.timer import Timer
 21 | from datetime import datetime
 22 | 
 23 | from bodymocap.body_bbox_detector import BodyPoseEstimator
 24 | from handmocap.hand_bbox_detector import HandBboxDetector
 25 | from integration.copy_and_paste import integration_copy_paste
 26 | 
 27 | import renderer.image_utils as imu
 28 | from renderer.viewer2D import ImShow
 29 | 
 30 | 
 31 | def __filter_bbox_list(body_bbox_list, hand_bbox_list, single_person):
 32 |     # (to make the order as consistent as possible without tracking)
 33 |     bbox_size =  [ (x[2] * x[3]) for x in body_bbox_list]
 34 |     idx_big2small = np.argsort(bbox_size)[::-1]
 35 |     body_bbox_list = [ body_bbox_list[i] for i in idx_big2small ]
 36 |     hand_bbox_list = [hand_bbox_list[i] for i in idx_big2small]
 37 | 
 38 |     if single_person and len(body_bbox_list)>0:
 39 |         body_bbox_list = [body_bbox_list[0], ]
 40 |         hand_bbox_list = [hand_bbox_list[0], ]
 41 | 
 42 |     return body_bbox_list, hand_bbox_list
 43 | 
 44 | 
 45 | def run_regress(
 46 |     args, img_original_bgr, 
 47 |     body_bbox_list, hand_bbox_list, bbox_detector,
 48 |     body_mocap, hand_mocap
 49 | ):
 50 |     cond1 = len(body_bbox_list) > 0 and len(hand_bbox_list) > 0
 51 |     cond2 = not args.frankmocap_fast_mode
 52 | 
 53 |     # use pre-computed bbox or use slow detection mode
 54 |     if cond1 or cond2:
 55 |         if not cond1 and cond2:
 56 |             # run detection only when bbox is not available
 57 |             body_pose_list, body_bbox_list, hand_bbox_list, _ = \
 58 |                 bbox_detector.detect_hand_bbox(img_original_bgr.copy())
 59 |         else:
 60 |             print("Use pre-computed bounding boxes")
 61 |         assert len(body_bbox_list) == len(hand_bbox_list)
 62 | 
 63 |         if len(body_bbox_list) < 1: 
 64 |             return list(), list(), list()
 65 | 
 66 |         # sort the bbox using bbox size 
 67 |         # only keep on bbox if args.single_person is set
 68 |         body_bbox_list, hand_bbox_list = __filter_bbox_list(
 69 |             body_bbox_list, hand_bbox_list, args.single_person)
 70 | 
 71 |         # hand & body pose regression
 72 |         pred_hand_list = hand_mocap.regress(
 73 |             img_original_bgr, hand_bbox_list, add_margin=True)
 74 |         pred_body_list = body_mocap.regress(img_original_bgr, body_bbox_list)
 75 |         assert len(hand_bbox_list) == len(pred_hand_list)
 76 |         assert len(pred_hand_list) == len(pred_body_list)
 77 | 
 78 |     else:
 79 |         _, body_bbox_list = bbox_detector.detect_body_bbox(img_original_bgr.copy())
 80 | 
 81 |         if len(body_bbox_list) < 1: 
 82 |             return list(), list(), list()
 83 | 
 84 |         # sort the bbox using bbox size 
 85 |         # only keep on bbox if args.single_person is set
 86 |         hand_bbox_list = [None, ] * len(body_bbox_list)
 87 |         body_bbox_list, _ = __filter_bbox_list(
 88 |             body_bbox_list, hand_bbox_list, args.single_person)
 89 | 
 90 |         # body regression first 
 91 |         pred_body_list = body_mocap.regress(img_original_bgr, body_bbox_list)
 92 |         assert len(body_bbox_list) == len(pred_body_list)
 93 | 
 94 |         # get hand bbox from body
 95 |         hand_bbox_list = body_mocap.get_hand_bboxes(pred_body_list, img_original_bgr.shape[:2])
 96 |         assert len(pred_body_list) == len(hand_bbox_list)
 97 | 
 98 |         # hand regression
 99 |         pred_hand_list = hand_mocap.regress(
100 |             img_original_bgr, hand_bbox_list, add_margin=True)
101 |         assert len(hand_bbox_list) == len(pred_hand_list) 
102 | 
103 |     # integration by copy-and-paste
104 |     integral_output_list = integration_copy_paste(
105 |         pred_body_list, pred_hand_list, body_mocap.smpl, img_original_bgr.shape)
106 |     
107 |     return body_bbox_list, hand_bbox_list, integral_output_list
108 | 
109 | 
110 | def run_frank_mocap(args, bbox_detector, body_mocap, hand_mocap, visualizer):
111 |     #Setup input data to handle different types of inputs
112 |     input_type, input_data = demo_utils.setup_input(args)
113 | 
114 |     cur_frame = args.start_frame
115 |     video_frame = 0
116 |     while True:
117 |         # load data
118 |         load_bbox = False
119 | 
120 |         if input_type =='image_dir':
121 |             if cur_frame < len(input_data):
122 |                 image_path = input_data[cur_frame]
123 |                 img_original_bgr  = cv2.imread(image_path)
124 |             else:
125 |                 img_original_bgr = None
126 | 
127 |         elif input_type == 'bbox_dir':
128 |             if cur_frame < len(input_data):
129 |                 image_path = input_data[cur_frame]['image_path']
130 |                 hand_bbox_list = input_data[cur_frame]['hand_bbox_list']
131 |                 body_bbox_list = input_data[cur_frame]['body_bbox_list']
132 |                 img_original_bgr  = cv2.imread(image_path)
133 |                 load_bbox = True
134 |             else:
135 |                 img_original_bgr = None
136 | 
137 |         elif input_type == 'video':      
138 |             _, img_original_bgr = input_data.read()
139 |             if video_frame < cur_frame:
140 |                 video_frame += 1
141 |                 continue
142 |           # save the obtained video frames
143 |             image_path = osp.join(args.out_dir, "frames", f"{cur_frame:05d}.jpg")
144 |             if img_original_bgr is not None:
145 |                 video_frame += 1
146 |                 if args.save_frame:
147 |                     gnu.make_subdir(image_path)
148 |                     cv2.imwrite(image_path, img_original_bgr)
149 |         
150 |         elif input_type == 'webcam':
151 |             _, img_original_bgr = input_data.read()
152 | 
153 |             if video_frame < cur_frame:
154 |                 video_frame += 1
155 |                 continue
156 |             # save the obtained video frames
157 |             image_path = osp.join(args.out_dir, "frames", f"scene_{cur_frame:05d}.jpg")
158 |             if img_original_bgr is not None:
159 |                 video_frame += 1
160 |                 if args.save_frame:
161 |                     gnu.make_subdir(image_path)
162 |                     cv2.imwrite(image_path, img_original_bgr)
163 |         else:
164 |             assert False, "Unknown input_type"
165 | 
166 |         cur_frame +=1
167 |         if img_original_bgr is None or cur_frame > args.end_frame:
168 |             break   
169 |         print("--------------------------------------")
170 |         
171 |         # bbox detection
172 |         if not load_bbox:
173 |             body_bbox_list, hand_bbox_list = list(), list()
174 |         
175 |         # regression (includes integration)
176 |         body_bbox_list, hand_bbox_list, pred_output_list = run_regress(
177 |             args, img_original_bgr, 
178 |             body_bbox_list, hand_bbox_list, bbox_detector,
179 |             body_mocap, hand_mocap)
180 | 
181 |         # save the obtained body & hand bbox to json file
182 |         if args.save_bbox_output: 
183 |             demo_utils.save_info_to_json(args, image_path, body_bbox_list, hand_bbox_list)
184 | 
185 |         if len(body_bbox_list) < 1: 
186 |             print(f"No body deteced: {image_path}")
187 |             continue
188 | 
189 |         pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
190 | 
191 |         # visualization
192 |         res_img = visualizer.visualize(
193 |             img_original_bgr,
194 |             pred_mesh_list = pred_mesh_list,
195 |             body_bbox_list = body_bbox_list,
196 |             hand_bbox_list = hand_bbox_list)
197 |         
198 |        # show result in the screen
199 |         if not args.no_display:
200 |             res_img = res_img.astype(np.uint8)
201 |             ImShow(res_img)
202 | 
203 |         # save result image
204 |         if args.out_dir is not None:
205 |             demo_utils.save_res_img(args.out_dir, image_path, res_img)
206 | 
207 |         # save predictions to pkl
208 |         if args.save_pred_pkl:
209 |             demo_type = 'frank'
210 |             demo_utils.save_pred_to_pkl(
211 |                 args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)
212 | 
213 |         print(f"Processed : {image_path}")
214 | 
215 |     # save images as a video
216 |     if not args.no_video_out and input_type in ['video', 'webcam']:
217 |         demo_utils.gen_video_out(args.out_dir, args.seq_name)
218 | 
219 |     if input_type =='webcam' and input_data is not None:
220 |         input_data.release()
221 |     cv2.destroyAllWindows()
222 | 
223 | def main():
224 |     args = DemoOptions().parse()
225 |     args.use_smplx = True
226 | 
227 |     device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
228 |     assert torch.cuda.is_available(), "Current version only supports GPU"
229 | 
230 |     hand_bbox_detector =  HandBboxDetector('third_view', device)
231 |     
232 |     #Set Mocap regressor
233 |     body_mocap = BodyMocap(args.checkpoint_body_smplx, args.smpl_dir, device = device, use_smplx= True)
234 |     hand_mocap = HandMocap(args.checkpoint_hand, args.smpl_dir, device = device)
235 | 
236 |     # Set Visualizer
237 |     if args.renderer_type in ['pytorch3d', 'opendr']:
238 |         from renderer.screen_free_visualizer import Visualizer
239 |     else:
240 |         from renderer.visualizer import Visualizer
241 |     visualizer = Visualizer(args.renderer_type)
242 | 
243 |     run_frank_mocap(args, hand_bbox_detector, body_mocap, hand_mocap, visualizer)
244 | 
245 | 
246 | if __name__ == '__main__':
247 |     main()


--------------------------------------------------------------------------------
/handmocap/hand_modules/h3dw_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | # dct is the abbr. of Human Model recovery with Densepose supervision
  7 | import numpy as np
  8 | import torch
  9 | import os
 10 | import sys
 11 | import shutil
 12 | import os.path as osp
 13 | from collections import OrderedDict
 14 | import itertools
 15 | import torch.nn.functional as F
 16 | import torch.nn as nn
 17 | from torch.nn.parallel import DistributedDataParallel
 18 | import pdb
 19 | import cv2
 20 | from . import resnet
 21 | from handmocap.hand_modules.h3dw_networks import H3DWEncoder
 22 | import time
 23 | import mocap_utils.general_utils as gnu
 24 | import smplx
 25 | import pdb
 26 | 
 27 | 
 28 | def extract_hand_output(output, hand_type, hand_info, top_finger_joints_type='ave', use_cuda=True):
 29 |     assert hand_type in ['left', 'right']
 30 | 
 31 |     if hand_type == 'left':
 32 |         wrist_idx, hand_start_idx, middle_finger_idx = 20, 25, 28
 33 |     else:
 34 |         wrist_idx, hand_start_idx, middle_finger_idx = 21, 40, 43
 35 | 
 36 |     vertices = output.vertices
 37 |     joints = output.joints
 38 |     vertices_shift = vertices - joints[:, hand_start_idx:hand_start_idx+1, :]
 39 | 
 40 |     hand_verts_idx = torch.Tensor(hand_info[f'{hand_type}_hand_verts_idx']).long()
 41 |     if use_cuda:
 42 |         hand_verts_idx = hand_verts_idx.cuda()
 43 | 
 44 |     hand_verts = vertices[:, hand_verts_idx, :]
 45 |     hand_verts_shift = hand_verts - joints[:, hand_start_idx:hand_start_idx+1, :]
 46 | 
 47 |    # Hand joints
 48 |     if hand_type == 'left':
 49 |         hand_idxs =  [20] + list(range(25,40)) + list(range(66, 71)) # 20 for left wrist. 20 finger joints
 50 |     else:
 51 |         hand_idxs = [21] + list(range(40,55)) + list(range(71, 76)) # 21 for right wrist. 20 finger joints
 52 |     smplx_hand_to_panoptic = [0, 13,14,15,16, 1,2,3,17, 4,5,6,18, 10,11,12,19, 7,8,9,20] 
 53 |     hand_joints = joints[:, hand_idxs, :][:, smplx_hand_to_panoptic, :]
 54 |     hand_joints_shift = hand_joints - joints[:, hand_start_idx:hand_start_idx+1, :]
 55 | 
 56 |     output = dict(
 57 |         wrist_idx = wrist_idx,
 58 |         hand_start_idx = hand_start_idx,
 59 |         middle_finger_idx = middle_finger_idx,
 60 |         vertices_shift = vertices_shift,
 61 |         hand_vertices = hand_verts,
 62 |         hand_vertices_shift = hand_verts_shift,
 63 |         hand_joints = hand_joints,
 64 |         hand_joints_shift = hand_joints_shift
 65 |     )
 66 |     return output
 67 | 
 68 | 
 69 | class H3DWModel(object):
 70 |     @property
 71 |     def name(self):
 72 |         return 'H3DWModel'
 73 | 
 74 |     def __init__(self, opt):
 75 |         self.opt = opt
 76 |         self.Tensor = torch.cuda.FloatTensor
 77 | 
 78 |         # set params
 79 |         self.inputSize = opt.inputSize
 80 |         self.total_params_dim = opt.total_params_dim
 81 |         self.cam_params_dim = opt.cam_params_dim
 82 |         self.pose_params_dim = opt.pose_params_dim
 83 |         self.shape_params_dim = opt.shape_params_dim
 84 |         self.top_finger_joints_type = opt.top_finger_joints_type
 85 | 
 86 |         assert(self.total_params_dim ==
 87 |                self.cam_params_dim+self.pose_params_dim+self.shape_params_dim)
 88 | 
 89 |         if opt.dist:
 90 |             self.batch_size = opt.batchSize // torch.distributed.get_world_size()
 91 |         else:
 92 |             self.batch_size = opt.batchSize
 93 |         nb = self.batch_size
 94 | 
 95 |         # set input image and 2d keypoints
 96 |         self.input_img = self.Tensor(
 97 |             nb, opt.input_nc, self.inputSize, self.inputSize)
 98 |       
 99 |         # joints 2d
100 |         self.keypoints = self.Tensor(nb, opt.num_joints, 2)
101 |         self.keypoints_weights = self.Tensor(nb, opt.num_joints)
102 | 
103 |         # mano pose params
104 |         self.gt_pose_params = self.Tensor(nb, opt.pose_params_dim)
105 |         self.mano_params_weight = self.Tensor(nb, 1)
106 | 
107 |         # joints 3d
108 |         self.joints_3d = self.Tensor(nb, opt.num_joints, 3)
109 |         self.joints_3d_weight = self.Tensor(nb, opt.num_joints, 1)
110 | 
111 |         # load mean params, the mean params are from HMR
112 |         self.mean_param_file = osp.join(
113 |             opt.model_root, opt.mean_param_file)
114 |         self.load_params()
115 | 
116 |         # set differential SMPL (implemented with pytorch) and smpl_renderer
117 |         # smplx_model_path = osp.join(opt.model_root, opt.smplx_model_file)
118 |         smplx_model_path = opt.smplx_model_file
119 |         self.smplx = smplx.create(
120 |             smplx_model_path, 
121 |             model_type = "smplx", 
122 |             batch_size = self.batch_size,
123 |             gender = 'neutral',
124 |             num_betas = 10,
125 |             use_pca = False,
126 |             ext='pkl').cuda()
127 | 
128 |         # set encoder and optimizer
129 |         self.encoder = H3DWEncoder(opt, self.mean_params).cuda()
130 |         if opt.dist:
131 |             self.encoder = DistributedDataParallel(
132 |                 self.encoder, device_ids=[torch.cuda.current_device()])
133 |         
134 |         checkpoint_path = opt.checkpoint_path
135 |         if not osp.exists(checkpoint_path): 
136 |             print(f"Error: {checkpoint_path} does not exists")
137 |             self.success_load = False
138 |         else:
139 |             if self.opt.dist:
140 |                 self.encoder.module.load_state_dict(torch.load(
141 |                     checkpoint_path, map_location=lambda storage, loc: storage.cuda(torch.cuda.current_device())))
142 |             else:
143 |                 saved_weights = torch.load(checkpoint_path)
144 |                 self.encoder.load_state_dict(saved_weights)
145 |             self.success_load = True
146 | 
147 | 
148 |     def load_params(self):
149 |         # load mean params first
150 |         mean_vals = gnu.load_pkl(self.mean_param_file)
151 |         mean_params = np.zeros((1, self.total_params_dim))
152 | 
153 |         # set camera model first
154 |         mean_params[0, 0] = 5.0
155 | 
156 |         # set pose (might be problematic)
157 |         mean_pose = mean_vals['mean_pose'][3:]
158 |         # set hand global rotation
159 |         mean_pose = np.concatenate( (np.zeros((3,)), mean_pose) )
160 |         mean_pose = mean_pose[None, :]
161 | 
162 |         # set shape
163 |         mean_shape = np.zeros((1, 10))
164 |         mean_params[0, 3:] = np.hstack((mean_pose, mean_shape))
165 |         # concat them together
166 |         mean_params = np.repeat(mean_params, self.batch_size, axis=0)
167 |         self.mean_params = torch.from_numpy(mean_params).float()
168 |         self.mean_params.requires_grad = False
169 | 
170 |         # define global rotation
171 |         self.global_orient = torch.zeros((self.batch_size, 3), dtype=torch.float32).cuda()
172 |         # self.global_orient[:, 0] = np.pi
173 |         self.global_orient.requires_grad = False
174 | 
175 |         # load smplx-hand faces
176 |         hand_info_file = osp.join(self.opt.model_root, self.opt.smplx_hand_info_file)
177 | 
178 |         self.hand_info = gnu.load_pkl(hand_info_file)
179 |         self.right_hand_faces_holistic = self.hand_info['right_hand_faces_holistic']        
180 |         self.right_hand_faces_local = self.hand_info['right_hand_faces_local']
181 |         self.right_hand_verts_idx = np.array(self.hand_info['right_hand_verts_idx'], dtype=np.int32)
182 | 
183 | 
184 |     def set_input_imgonly(self, input):
185 |         # image
186 |         input_img = input['img']
187 |         self.input_img.resize_(input_img.size()).copy_(input_img)
188 | 
189 | 
190 |     def get_smplx_output(self, pose_params, shape_params=None):
191 |         hand_rotation = pose_params[:, :3]
192 |         hand_pose = pose_params[:, 3:]
193 |         body_pose = torch.zeros((self.batch_size, 63)).float().cuda() 
194 |         body_pose[:, 60:] = hand_rotation # set right hand rotation
195 | 
196 |         output = self.smplx(
197 |             global_orient = self.global_orient,
198 |             body_pose = body_pose,
199 |             right_hand_pose = hand_pose,
200 |             betas = shape_params,
201 |             return_verts = True)
202 |         
203 |         hand_output = extract_hand_output(
204 |             output, 
205 |             hand_type = 'right', 
206 |             hand_info = self.hand_info,
207 |             top_finger_joints_type = self.top_finger_joints_type, 
208 |             use_cuda=True)
209 | 
210 |         pred_verts = hand_output['vertices_shift']
211 |         pred_joints_3d = hand_output['hand_joints_shift']
212 |         return pred_verts, pred_joints_3d
213 | 
214 | 
215 |     def forward(self):
216 |         # get predicted params first
217 |         self.output = self.encoder(self.input_img)
218 |         # print(self.output.mean())
219 |         self.final_params = self.output
220 |         # self.final_params = self.output + self.mean_params
221 | 
222 |         # get predicted params for cam, pose, shape
223 |         cam_dim = self.cam_params_dim
224 |         pose_dim = self.pose_params_dim
225 |         shape_dim = self.shape_params_dim
226 |         self.pred_cam_params = self.final_params[:, :cam_dim]
227 |         self.pred_pose_params = self.final_params[:, cam_dim: (
228 |             cam_dim + pose_dim)]
229 |         self.pred_shape_params = self.final_params[:, (cam_dim + pose_dim):]
230 | 
231 |         #  get predicted smpl verts and joints,
232 |         self.pred_verts, self.pred_joints_3d = self.get_smplx_output(
233 |             self.pred_pose_params, self.pred_shape_params)
234 | 
235 | 
236 |     def test(self):
237 |         with torch.no_grad():
238 |             self.forward()
239 | 
240 | 
241 |     def get_pred_result(self):
242 |         pred_result = OrderedDict(
243 |             cams = self.pred_cam_params.cpu().numpy(),
244 |             pred_shape_params = self.pred_shape_params.cpu().numpy(),
245 |             pred_pose_params = self.pred_pose_params.cpu().numpy(),
246 |             pred_verts = self.pred_verts.cpu().numpy()[:, self.right_hand_verts_idx, :],
247 |             pred_joints_3d = self.pred_joints_3d.cpu().numpy(),
248 |         )
249 |         return pred_result
250 | 
251 | 
252 |     def eval(self):
253 |         self.encoder.eval()


--------------------------------------------------------------------------------
/demo/demo_visualize_prediction.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | 
  3 | import os
  4 | import sys
  5 | import os.path as osp
  6 | import torch
  7 | import numpy as np
  8 | import cv2
  9 | import argparse
 10 | import json
 11 | import pickle
 12 | import smplx
 13 | from datetime import datetime
 14 | 
 15 | from demo.demo_options import DemoOptions
 16 | from bodymocap.body_mocap_api import BodyMocap
 17 | import mocap_utils.demo_utils as demo_utils
 18 | import mocap_utils.general_utils as gnu
 19 | from bodymocap.models import SMPL, SMPLX
 20 | from handmocap.hand_modules.h3dw_model import extract_hand_output
 21 | from mocap_utils.coordconv import convert_smpl_to_bbox, convert_bbox_to_oriIm
 22 | 
 23 | 
 24 | def __get_data_type(pkl_files):
 25 |     for pkl_file in pkl_files:
 26 |         saved_data = gnu.load_pkl(pkl_file)
 27 |         return saved_data['demo_type'], saved_data['smpl_type']
 28 | 
 29 | 
 30 | def __get_smpl_model(demo_type, smpl_type):
 31 |     smplx_model_path = './extra_data/smpl/SMPLX_NEUTRAL.pkl'
 32 |     smpl_model_path = './extra_data/smpl//basicModel_neutral_lbs_10_207_0_v1.0.0.pkl'
 33 | 
 34 |     if demo_type == 'hand':
 35 |         # use original smpl-x
 36 |         smpl = smplx.create(
 37 |             smplx_model_path, 
 38 |             model_type = "smplx", 
 39 |             batch_size = 1,
 40 |             gender = 'neutral',
 41 |             num_betas = 10,
 42 |             use_pca = False,
 43 |             ext='pkl'
 44 |         )
 45 |     else:
 46 |         if smpl_type == 'smplx':
 47 |             # use modified smpl-x from body module
 48 |             smpl = SMPLX(
 49 |                 smplx_model_path,
 50 |                 batch_size=1,
 51 |                 num_betas = 10,
 52 |                 use_pca = False,
 53 |                 create_transl=False)
 54 |         else:
 55 |             # use modified smpl from body module
 56 |             assert smpl_type == 'smpl'
 57 |             smpl = SMPL(
 58 |                 smpl_model_path, 
 59 |                 batch_size=1, 
 60 |                 create_transl=False)
 61 |     return smpl
 62 |     
 63 | 
 64 | def __calc_hand_mesh(hand_type, pose_params, betas, smplx_model):
 65 |     hand_rotation = pose_params[:, :3]
 66 |     hand_pose = pose_params[:, 3:]
 67 |     body_pose = torch.zeros((1, 63)).float()
 68 | 
 69 |     assert hand_type in ['left_hand', 'right_hand']
 70 |     if hand_type == 'right_hand':
 71 |         body_pose[:, 60:] = hand_rotation # set right hand rotation
 72 |         right_hand_pose = hand_pose
 73 |         left_hand_pose = torch.zeros((1, 45), dtype=torch.float32)
 74 |     else:
 75 |         body_pose[:, 57:60] = hand_rotation # set right hand rotation
 76 |         left_hand_pose = hand_pose
 77 |         right_hand_pose = torch.zeros((1, 45), dtype=torch.float32)
 78 | 
 79 |     output = smplx_model(
 80 |         global_orient = torch.zeros((1,3)),
 81 |         body_pose = body_pose,
 82 |         betas = betas,
 83 |         left_hand_pose = left_hand_pose,
 84 |         right_hand_pose = right_hand_pose,
 85 |         return_verts = True)
 86 |     
 87 |     hand_info_file = "extra_data/hand_module/SMPLX_HAND_INFO.pkl"
 88 |     hand_info = gnu.load_pkl(hand_info_file)
 89 |     hand_output = extract_hand_output(
 90 |         output, 
 91 |         hand_type = hand_type.split("_")[0],
 92 |         hand_info = hand_info,
 93 |         top_finger_joints_type = 'ave',
 94 |         use_cuda = False)
 95 | 
 96 |     pred_verts = hand_output['hand_vertices_shift'].detach().numpy()
 97 |     faces = hand_info[f'{hand_type}_faces_local']
 98 |     return pred_verts[0], faces
 99 | 
100 | 
101 | def _calc_body_mesh(smpl_type, smpl_model, body_pose, betas, 
102 |     left_hand_pose, right_hand_pose):
103 |     if smpl_type == 'smpl':
104 |         smpl_output = smpl_model(
105 |             global_orient = body_pose[:, :3],
106 |             body_pose = body_pose[:, 3:], 
107 |             betas = betas, 
108 |         )
109 |     else:
110 |         smpl_output = smpl_model(
111 |             global_orient = body_pose[:, :3],
112 |             body_pose = body_pose[:, 3:], 
113 |             betas = betas, 
114 |             left_hand_pose = left_hand_pose,
115 |             right_hand_pose = right_hand_pose,
116 |         )
117 | 
118 |     vertices = smpl_output.vertices.detach().cpu().numpy()[0]
119 |     faces = smpl_model.faces
120 |     return vertices, faces
121 | 
122 | 
123 | def __calc_mesh(demo_type, smpl_type, smpl_model, img_shape, pred_output_list):
124 |     for pred_output in pred_output_list:
125 |         if pred_output is not None:
126 |             # hand
127 |             if demo_type == 'hand':
128 |                 assert 'left_hand' in pred_output and 'right_hand' in pred_output
129 |                 for hand_type in pred_output:
130 |                     hand_pred = pred_output[hand_type]
131 |                     if hand_pred is not None:
132 |                         pose_params = torch.from_numpy(hand_pred['pred_hand_pose'])
133 |                         betas = torch.from_numpy(hand_pred['pred_hand_betas'])
134 |                         pred_verts, hand_faces = __calc_hand_mesh(hand_type, pose_params, betas, smpl_model)
135 |                         hand_pred['pred_vertices_smpl'] = pred_verts
136 | 
137 |                         cam_scale = hand_pred['pred_camera'][0]
138 |                         cam_trans = hand_pred['pred_camera'][1:]
139 |                         vert_bboxcoord = convert_smpl_to_bbox(
140 |                             pred_verts, cam_scale, cam_trans, bAppTransFirst=True) # SMPL space -> bbox space
141 | 
142 |                         bbox_scale_ratio = hand_pred['bbox_scale_ratio']
143 |                         bbox_top_left = hand_pred['bbox_top_left']
144 |                         vert_imgcoord = convert_bbox_to_oriIm(
145 |                                 vert_bboxcoord, bbox_scale_ratio, bbox_top_left,
146 |                                 img_shape[1], img_shape[0]) 
147 |                         pred_output[hand_type]['pred_vertices_img'] = vert_imgcoord
148 |             # body
149 |             else:
150 |                 pose_params = torch.from_numpy(pred_output['pred_body_pose'])
151 |                 betas = torch.from_numpy(pred_output['pred_betas'])
152 |                 if 'pred_right_hand_pose' in pred_output:
153 |                     pred_right_hand_pose = torch.from_numpy(pred_output['pred_right_hand_pose'])
154 |                 else:
155 |                     pred_right_hand_pose = torch.zeros((1, 45), dtype=torch.float32)
156 |                 if 'pred_left_hand_pose' in pred_output:
157 |                     pred_left_hand_pose = torch.from_numpy(pred_output['pred_left_hand_pose'])
158 |                 else:
159 |                     pred_left_hand_pose = torch.zeros((1, 45), dtype=torch.float32)
160 |                 pred_verts, faces = _calc_body_mesh(
161 |                     smpl_type, smpl_model, pose_params, betas, pred_left_hand_pose, pred_right_hand_pose)
162 | 
163 |                 pred_output['pred_vertices_smpl'] = pred_verts
164 |                 pred_output['faces'] = faces
165 | 
166 |                 cam_scale = pred_output['pred_camera'][0]
167 |                 cam_trans = pred_output['pred_camera'][1:]
168 |                 vert_bboxcoord = convert_smpl_to_bbox(
169 |                     pred_verts, cam_scale, cam_trans, bAppTransFirst=False) # SMPL space -> bbox space
170 | 
171 |                 bbox_scale_ratio = pred_output['bbox_scale_ratio']
172 |                 bbox_top_left = pred_output['bbox_top_left']
173 |                 vert_imgcoord = convert_bbox_to_oriIm(
174 |                         vert_bboxcoord, bbox_scale_ratio, bbox_top_left,
175 |                         img_shape[1], img_shape[0]) 
176 |                 pred_output['pred_vertices_img'] = vert_imgcoord
177 | 
178 | 
179 | def visualize_prediction(args, demo_type, smpl_type, smpl_model, pkl_files, visualizer):
180 |     for pkl_file in pkl_files:
181 |         # load data
182 |         saved_data = gnu.load_pkl(pkl_file)
183 |         
184 |         image_path = saved_data['image_path']
185 |         img_original_bgr = cv2.imread(image_path)
186 |         if img_original_bgr is None:
187 |             print(f"{image_path} does not exists, skip")
188 |         
189 |         print("--------------------------------------")
190 | 
191 |         demo_type = saved_data['demo_type']
192 |         assert saved_data['smpl_type'] == smpl_type
193 | 
194 |         hand_bbox_list = saved_data['hand_bbox_list']        
195 |         body_bbox_list = saved_data['body_bbox_list']
196 |         pred_output_list = saved_data['pred_output_list']
197 | 
198 |         if not saved_data['save_mesh']:
199 |             __calc_mesh(
200 |                 demo_type, smpl_type, smpl_model, img_original_bgr.shape[:2], pred_output_list)
201 |         else:
202 |             pass
203 | 
204 |         pred_mesh_list = demo_utils.extract_mesh_from_output(pred_output_list)
205 | 
206 |         # visualization
207 |         res_img = visualizer.visualize(
208 |             img_original_bgr,
209 |             pred_mesh_list = pred_mesh_list,
210 |             body_bbox_list = body_bbox_list,
211 |             hand_bbox_list = hand_bbox_list)
212 | 
213 |         # save result image
214 |         demo_utils.save_res_img(args.out_dir, image_path, res_img)
215 | 
216 |         # save predictions to pkl
217 |         if args.save_pred_pkl:
218 |             args.use_smplx = smpl_type == 'smplx'
219 |             demo_utils.save_pred_to_pkl(
220 |                 args, demo_type, image_path, body_bbox_list, hand_bbox_list, pred_output_list)
221 | 
222 | 
223 | def main():
224 |     args = DemoOptions().parse()
225 | 
226 |     # load pkl files
227 |     pkl_files = gnu.get_all_files(args.pkl_dir, ".pkl", "full")
228 | 
229 |     # get smpl type
230 |     demo_type, smpl_type = __get_data_type(pkl_files)
231 | 
232 |     # get smpl model
233 |     smpl_model = __get_smpl_model(demo_type, smpl_type)
234 | 
235 |    # Set Visualizer
236 |     assert args.renderer_type in ['pytorch3d', 'opendr'], \
237 |         f"{args.renderer_type} not implemented yet."
238 |     from renderer.screen_free_visualizer import Visualizer
239 |     visualizer = Visualizer(args.renderer_type)
240 | 
241 |     # load smpl model
242 |     visualize_prediction(args, demo_type, smpl_type, smpl_model, pkl_files, visualizer)
243 | 
244 | 
245 | if __name__ == '__main__':
246 |     main()


--------------------------------------------------------------------------------
/bodymocap/models/smpl.py:
--------------------------------------------------------------------------------
  1 | # Original code from SPIN: https://github.com/nkolot/SPIN
  2 | 
  3 | 
  4 | import torch
  5 | import numpy as np
  6 | import smplx
  7 | from smplx import SMPL as _SMPL
  8 | from smplx import SMPLX as _SMPLX
  9 | # from bodymocap.models.body_models import SMPLX as _SMPLX        #Use our custom SMPLX 
 10 | # from smplx.body_models import ModelOutput
 11 | # from bodymocap.models.body_models import ModelOutput
 12 | from smplx.lbs import vertices2joints
 13 | 
 14 | from bodymocap import constants
 15 | 
 16 | from collections import namedtuple
 17 | ModelOutput = namedtuple('ModelOutput',
 18 |                          ['vertices', 'joints', 'full_pose', 'betas',
 19 |                           'global_orient',
 20 |                           'body_pose', 'expression',
 21 |                           'left_hand_pose', 'right_hand_pose',
 22 |                           'right_hand_joints', 'left_hand_joints',
 23 |                           'jaw_pose'])
 24 | ModelOutput.__new__.__defaults__ = (None,) * len(ModelOutput._fields)
 25 | 
 26 | 
 27 | class SMPL(_SMPL):
 28 |     """ Extension of the official SMPL implementation to support more joints """
 29 | 
 30 |     def __init__(self, *args, **kwargs):
 31 |         super(SMPL, self).__init__(*args, **kwargs)
 32 |         joints = [constants.JOINT_MAP[i] for i in constants.JOINT_NAMES]
 33 |         JOINT_REGRESSOR_TRAIN_EXTRA = 'extra_data/body_module/data_from_spin//J_regressor_extra.npy'
 34 |         J_regressor_extra = np.load(JOINT_REGRESSOR_TRAIN_EXTRA)
 35 |         self.register_buffer('J_regressor_extra', torch.tensor(J_regressor_extra, dtype=torch.float32))
 36 |         self.joint_map = torch.tensor(joints, dtype=torch.long)
 37 | 
 38 |     def forward(self, *args, **kwargs):
 39 |         kwargs['get_skin'] = True
 40 |         smpl_output = super(SMPL, self).forward(*args, **kwargs)
 41 |         extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices)        #Additional 9 joints #Check doc/J_regressor_extra.png
 42 |         joints = torch.cat([smpl_output.joints, extra_joints], dim=1)               #[N, 24 + 21, 3]  + [N, 9, 3]
 43 |         joints = joints[:, self.joint_map, :]
 44 |         output = ModelOutput(vertices=smpl_output.vertices,
 45 |                              global_orient=smpl_output.global_orient,
 46 |                              body_pose=smpl_output.body_pose,
 47 |                              joints=joints,
 48 |                              betas=smpl_output.betas,
 49 |                              full_pose=smpl_output.full_pose)
 50 |         return output
 51 | 
 52 | 
 53 | 
 54 | class SMPLX(_SMPLX):
 55 |     """ Extension of the official SMPL implementation to support more joints """
 56 | 
 57 |     def __init__(self, *args, **kwargs):
 58 |         kwargs['ext'] = 'pkl'       #We have pkl file
 59 |         super(SMPLX, self).__init__(*args, **kwargs)
 60 |         joints = [constants.JOINT_MAP[i] for i in constants.JOINT_NAMES]
 61 |         JOINT_REGRESSOR_TRAIN_EXTRA_SMPLX = 'extra_data/body_module/J_regressor_extra_smplx.npy'
 62 |         J_regressor_extra = np.load(JOINT_REGRESSOR_TRAIN_EXTRA_SMPLX)           #(9, 10475)
 63 |         self.register_buffer('J_regressor_extra', torch.tensor(J_regressor_extra, dtype=torch.float32))
 64 |         self.joint_map = torch.tensor(joints, dtype=torch.long)
 65 | 
 66 |     def forward(self, *args, **kwargs):
 67 |         kwargs['get_skin'] = True
 68 | 
 69 |         #if pose parameter is for SMPL with 21 joints (ignoring root)
 70 |         if(kwargs['body_pose'].shape[1]==69):
 71 |             kwargs['body_pose'] = kwargs['body_pose'][:,:-2*3]        #Ignore the last two joints (which are on the palm. Not used)
 72 | 
 73 |         if(kwargs['body_pose'].shape[1]==23):
 74 |             kwargs['body_pose'] = kwargs['body_pose'][:,:-2]        #Ignore the last two joints (which are on the palm. Not used)
 75 | 
 76 |         smpl_output = super(SMPLX, self).forward(*args, **kwargs)
 77 |         extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices)
 78 |         # extra_joints = vertices2joints(self.J_regressor_extra, smpl_output.vertices[:,:6890])   *0      #TODO: implement this correctly
 79 | 
 80 |         #SMPL-X Joint order: https://docs.google.com/spreadsheets/d/1_1dLdaX-sbMkCKr_JzJW_RZCpwBwd7rcKkWT_VgAQ_0/edit#gid=0
 81 |         smplx_to_smpl = list(range(0,22)) + [28,43] + list(range(55,76)) # 28 left middle finger , 43: right middle finger 1
 82 |         smpl_joints = smpl_output.joints[:,smplx_to_smpl,:] # Convert SMPL-X to SMPL     127 ->45
 83 |         joints = torch.cat([smpl_joints, extra_joints], dim=1) # [N, 127, 3]->[N, 45, 3]  + [N, 9, 3]  # SMPL-X has more joints. should convert 45
 84 |         joints = joints[:, self.joint_map, :]     
 85 | 
 86 |         # Hand joints
 87 |         smplx_hand_to_panoptic = [0, 13,14,15,16, 1,2,3,17, 4,5,6,18, 10,11,12,19, 7,8,9,20] #Wrist Thumb to Pinky
 88 | 
 89 |         smplx_lhand =  [20] + list(range(25,40)) + list(range(66, 71))         #20 for left wrist. 20 finger joints
 90 |         lhand_joints = smpl_output.joints[:,smplx_lhand, :]      #(N,21,3)
 91 |         lhand_joints = lhand_joints[:, smplx_hand_to_panoptic, :]     #Convert SMPL-X hand order to paonptic hand order
 92 | 
 93 |         smplx_rhand = [21] + list(range(40,55)) + list(range(71, 76))     #21 for right wrist. 20 finger joints
 94 |         rhand_joints = smpl_output.joints[:, smplx_rhand, :]      #(N,21,3)
 95 |         rhand_joints = rhand_joints[:,smplx_hand_to_panoptic,:] #Convert SMPL-X hand order to paonptic hand order
 96 | 
 97 |         output = ModelOutput(vertices=smpl_output.vertices,
 98 |                              global_orient=smpl_output.global_orient,
 99 |                              body_pose=smpl_output.body_pose,
100 |                              joints=joints,
101 |                              right_hand_joints=rhand_joints,     #N,21,3
102 |                              left_hand_joints=lhand_joints,         #N,21,3
103 |                              betas=smpl_output.betas,
104 |                              full_pose=smpl_output.full_pose)
105 |         return output
106 | 
107 | 
108 | """
109 | 0	pelvis',
110 | 1	left_hip',
111 | 2	right_hip',
112 | 3	spine1',
113 | 4	left_knee',
114 | 5	right_knee',
115 | 6	spine2',
116 | 7	left_ankle',
117 | 8	right_ankle',
118 | 9	spine3',
119 | 10	left_foot',
120 | 11	right_foot',
121 | 12	neck',
122 | 13	left_collar',
123 | 14	right_collar',
124 | 15	head',
125 | 16	left_shoulder',
126 | 17	right_shoulder',
127 | 18	left_elbow',
128 | 19	right_elbow',
129 | 20	left_wrist',
130 | 21	right_wrist',
131 | 22	jaw',
132 | 23	left_eye_smplhf',
133 | 24	right_eye_smplhf',
134 | 25	left_index1',
135 | 26	left_index2',
136 | 27	left_index3',
137 | 28	left_middle1',
138 | 29	left_middle2',
139 | 30	left_middle3',
140 | 31	left_pinky1',
141 | 32	left_pinky2',
142 | 33	left_pinky3',
143 | 34	left_ring1',
144 | 35	left_ring2',
145 | 36	left_ring3',
146 | 37	left_thumb1',
147 | 38	left_thumb2',
148 | 39	left_thumb3',
149 | 40	right_index1',
150 | 41	right_index2',
151 | 42	right_index3',
152 | 43	right_middle1',
153 | 44	right_middle2',
154 | 45	right_middle3',
155 | 46	right_pinky1',
156 | 47	right_pinky2',
157 | 48	right_pinky3',
158 | 49	right_ring1',
159 | 50	right_ring2',
160 | 51	right_ring3',
161 | 52	right_thumb1',
162 | 53	right_thumb2',
163 | 54	right_thumb3',
164 | 55	nose',
165 | 56	right_eye',
166 | 57	left_eye',
167 | 58	right_ear',
168 | 59	left_ear',
169 | 60	left_big_toe',
170 | 61	left_small_toe',
171 | 62	left_heel',
172 | 63	right_big_toe',
173 | 64	right_small_toe',
174 | 65	right_heel',
175 | 66	left_thumb',
176 | 67	left_index',
177 | 68	left_middle',
178 | 69	left_ring',
179 | 70	left_pinky',
180 | 71	right_thumb',
181 | 72	right_index',
182 | 73	right_middle',
183 | 74	right_ring',
184 | 75	right_pinky',
185 | 76	right_eye_brow1',
186 | 77	right_eye_brow2',
187 | 78	right_eye_brow3',
188 | 79	right_eye_brow4',
189 | 80	right_eye_brow5',
190 | 81	left_eye_brow5',
191 | 82	left_eye_brow4',
192 | 83	left_eye_brow3',
193 | 84	left_eye_brow2',
194 | 85	left_eye_brow1',
195 | 86	nose1',
196 | 87	nose2',
197 | 88	nose3',
198 | 89	nose4',
199 | 90	right_nose_2',
200 | 91	right_nose_1',
201 | 92	nose_middle',
202 | 93	left_nose_1',
203 | 94	left_nose_2',
204 | 95	right_eye1',
205 | 96	right_eye2',
206 | 97	right_eye3',
207 | 98	right_eye4',
208 | 99	right_eye5',
209 | 100	right_eye6',
210 | 101	left_eye4',
211 | 102	left_eye3',
212 | 103	left_eye2',
213 | 104	left_eye1',
214 | 105	left_eye6',
215 | 106	left_eye5',
216 | 107	right_mouth_1',
217 | 108	right_mouth_2',
218 | 109	right_mouth_3',
219 | 110	mouth_top',
220 | 111	left_mouth_3',
221 | 112	left_mouth_2',
222 | 113	left_mouth_1',
223 | 114	left_mouth_5', # 59 in OpenPose output
224 | 115	left_mouth_4', # 58 in OpenPose output
225 | 116	mouth_bottom',
226 | 117	right_mouth_4',
227 | 118	right_mouth_5',
228 | 119	right_lip_1',
229 | 120	right_lip_2',
230 | 121	lip_top',
231 | 122	left_lip_2',
232 | 123	left_lip_1',
233 | 124	left_lip_3',
234 | 125	lip_bottom',
235 | 126	right_lip_3',
236 | 127	right_contour_1',
237 | 128	right_contour_2',
238 | 129	right_contour_3',
239 | 130	right_contour_4',
240 | 131	right_contour_5',
241 | 132	right_contour_6',
242 | 133	right_contour_7',
243 | 134	right_contour_8',
244 | 135	contour_middle',
245 | 136	left_contour_8',
246 | 137	left_contour_7',
247 | 138	left_contour_6',
248 | 139	left_contour_5',
249 | 140	left_contour_4',
250 | 141	left_contour_3',
251 | 142	left_contour_2',
252 | 143	left_contour_1'
253 | """
254 | 
255 | 
256 | #SMPL Joints:
257 | """
258 | 0	pelvis',
259 | 1	left_hip',
260 | 2	right_hip',
261 | 3	spine1',
262 | 4	left_knee',
263 | 5	right_knee',
264 | 6	spine2',
265 | 7	left_ankle',
266 | 8	right_ankle',
267 | 9	spine3',
268 | 10	left_foot',
269 | 11	right_foot',
270 | 12	neck',
271 | 13	left_collar',
272 | 14	right_collar',
273 | 15	head',
274 | 16	left_shoulder',
275 | 17	right_shoulder',
276 | 18	left_elbow',
277 | 19	right_elbow',
278 | 20	left_wrist',
279 | 21	right_wrist',
280 | 22	
281 | 23	
282 | 24	nose',
283 | 25	right_eye',
284 | 26	left_eye',
285 | 27	right_ear',
286 | 28	left_ear',
287 | 29	left_big_toe',
288 | 30	left_small_toe',
289 | 31	left_heel',
290 | 32	right_big_toe',
291 | 33	right_small_toe',
292 | 34	right_heel',
293 | 35	left_thumb',
294 | 36	left_index',
295 | 37	left_middle',
296 | 38	left_ring',
297 | 39	left_pinky',
298 | 40	right_thumb',
299 | 41	right_index',
300 | 42	right_middle',
301 | 43	right_ring',
302 | 44	right_pinky',
303 | """


--------------------------------------------------------------------------------