├── .gitignore ├── CODE_OF_CONDUCT.md ├── DATASETS.md ├── LICENSE ├── README.md ├── SECURITY.md ├── SUPPORT.md ├── docs ├── _config.yml ├── css │ └── styles.css ├── img │ ├── Microsoft-logo-only.svg │ ├── Microsoft-logo.svg │ ├── Microsoft-logo_rgb_c-gray.png │ ├── Microsoft-logo_rgb_c-wht.png │ ├── Microsoft-logo_rgb_wht.png │ ├── body-data.jpg │ ├── face-data.jpg │ ├── favicon.ico │ ├── hand-data.jpg │ ├── sa-logo-black.png │ └── sa-logo-white.png ├── index.html └── vid │ ├── body_res.mp4 │ ├── face_res.mp4 │ ├── hand_res.mp4 │ ├── in-the-wild.mp4 │ └── studio.mp4 ├── download_data.py ├── img ├── body_data.jpg ├── body_ldmks.png ├── face_data.jpg ├── face_ldmks.png ├── hand_data.jpg └── hand_ldmks.png ├── requirements.txt ├── smpl_numpy.py └── visualize_data.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ruff_cache/ 2 | __pycache__/ 3 | smplh/ 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Microsoft Open Source Code of Conduct 2 | 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). 4 | 5 | Resources: 6 | 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/) 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns 10 | -------------------------------------------------------------------------------- /DATASETS.md: -------------------------------------------------------------------------------- 1 | # SynthMoCap Datasets 2 | 3 | The following datasets are released for **non-commercial** use, refer to [LICENSE](LICENSE) for more details. 4 | 5 | In all datasets samples are indexed by identity and frame - there are approximately 20,000 identities in each dataset 6 | with 5 frames each. Indices are formatted with leading zeros, for example `img_0000123_004.jpg` for identity 123, frame 4. 7 | 8 | Some pose data is sourced from the [AMASS](https://amass.is.tue.mpg.de/) and [MANO](https://mano.is.tue.mpg.de/) 9 | datasets and is not directly redistributed by us. This data will be downloaded and spliced into the full dataset as part 10 | of the `download_data.py` script. You therefore need valid logins to https://amass.is.tue.mpg.de/ and 11 | https://mano.is.tue.mpg.de/ which you will be prompted for when running the script. 12 | 13 | Once downloaded, you can use `python visualize_data.py [path_to_dataset]` to visualize the data 14 | including some ground-truth annotations. 15 | 16 | First setup your environment by running `pip install -r requirements.txt` using python 3.10 and installing 17 | `wget` on your system if it isn't already. Our server requires TLSv1.2 which some old versions of wget do not support. 18 | We have successfully tested 1.21.4 of this [this build](https://eternallybored.org/misc/wget/) on windows. 19 | 20 | ## SynthBody 21 | 22 | ![body_data](img/body_data.jpg) 23 | 24 | ### Download Instructions 25 | 26 | The following command will download the dataset to `YOUR_DATA_DIRECTORY/synth_body/`: 27 | 28 | ```bash 29 | python download_data.py --dataset body --output-dir YOUR_DATA_DIRECTORY/ 30 | ``` 31 | 32 | If you want just a single identity (500KB) you can add the `--single_id` flag, or for a single chunk (380MB) add `--single_chunk`. 33 | The total size of the dataset is approximately 10GB. 34 | 35 | ### Contents 36 | 37 | #### Image Data 38 | 39 | | Data Type | File Name | 40 | |-|-| 41 | |RGB image| `img_XXXXXXX_XXX.jpg`| 42 | | Grayscale beard segmentation| `segm_beard_XXXXXXX_XXX.png`| 43 | | Grayscale eyebrows segmentation| `segm_eyebrows_XXXXXXX_XXX.png`| 44 | | Grayscale eyelashes segmentation| `segm_eyelashes_XXXXXXX_XXX.png`| 45 | | Grayscale facewear segmentation| `segm_facewear_XXXXXXX_XXX.png`| 46 | | Grayscale glasses segmentation| `segm_glasses_XXXXXXX_XXX.png`| 47 | | Grayscale head hair segmentation| `segm_hair_XXXXXXX_XXX.png`| 48 | | Grayscale headwear segmentation| `segm_headwear_XXXXXXX_XXX.png`| 49 | | Integer body parts segmentation| `segm_parts_XXXXXXX_XXX.png`| 50 | 51 | #### Segmentation parts indices 52 | 53 | |Class|Index| 54 | |-|-| 55 | |BACKGROUND|0| 56 | |FACE|1| 57 | |LEFT_UPPER_TORSO|2| 58 | |LEFT_LOWER_TORSO|3| 59 | |RIGHT_UPPER_TORSO|4| 60 | |RIGHT_LOWER_TORSO|5| 61 | |LEFT_UPPER_LEG|6| 62 | |LEFT_LOWER_LEG|7| 63 | |LEFT_FOOT|8| 64 | |RIGHT_UPPER_LEG|9| 65 | |RIGHT_LOWER_LEG|10| 66 | |RIGHT_FOOT|11| 67 | |LEFT_UPPER_ARM|12| 68 | |LEFT_LOWER_ARM|13| 69 | |LEFT_HAND|14| 70 | |RIGHT_UPPER_ARM|15| 71 | |RIGHT_LOWER_ARM|16| 72 | |RIGHT_HAND|17| 73 | 74 | #### Metadata 75 | 76 | ```json 77 | { 78 | "camera": { 79 | "world_to_camera": [ "4x4 array of camera extrinsics" ], 80 | "camera_to_image": [ "3x3 array of camera intrinsics" ], 81 | "resolution": [ 82 | 512, 83 | 512 84 | ] 85 | }, 86 | "pose": [ " 52x3 array of SMPL-H pose parameters" ], 87 | "translation": [ "3 element array for SMPL-H translation" ], 88 | "body_identity": [ "16 element array of neutral SMPL-H shape parameters" ], 89 | "landmarks": { 90 | "3D_world": [ "52x3 array of 3D landmarks in world-space corresponding to SMPL-H joints" ], 91 | "3D_cam": [ "52x3 array of 3D landmarks in camera-space corresponding to SMPL-H joints" ], 92 | "2D": [ "52x2 array of 2D landmarks in image-space corresponding to SMPL-H joints" ] 93 | } 94 | } 95 | ``` 96 | 97 | #### Landmarks 98 | 99 | ![body landmark definition](img/body_ldmks.png) 100 | 101 | #### Notes 102 | 103 | The dataset includes some images with secondary "distractor" people in the background, the ground-truth data does 104 | not include annotations for these people, only the primary person. These images help with robustness to occlusions and 105 | cases where people are close together in real-world scenarios. 106 | 107 | As detailed in the paper, clothing is modeled using displacement maps. Segmentation ground-truth includes the effect 108 | of these displacements, but landmarks are not displaced and instead lie directly on the surface of the body mesh. 109 | 110 | ## SynthFace 111 | 112 | ![face_data](img/face_data.jpg) 113 | 114 | ### Download Instructions 115 | 116 | The following command will download the dataset to `YOUR_DATA_DIRECTORY/synth_face/`: 117 | 118 | ```bash 119 | python download_data.py --dataset face --output-dir /YOUR_DATA_DIRECTORY/ 120 | ``` 121 | 122 | If you want just a single identity (500KB) you can add the `--single_id` flag, or for a single chunk (500MB) add `--single_chunk`. 123 | The total size of the dataset is approximately 11GB. 124 | 125 | ### Contents 126 | 127 | #### Image Data 128 | 129 | | Data Type | File Name | 130 | |-|-| 131 | |RGB image|`img_XXXXXXX_XXX.jpg`| 132 | |Grayscale beard segmentation|`segm_beard_XXXXXXX_XXX.png`| 133 | |Grayscale clothing segmentation|`segm_clothing_XXXXXXX_XXX.png`| 134 | |Grayscale eyebrows segmentation|`segm_eyebrows_XXXXXXX_XXX.png`| 135 | |Grayscale eyelashes segmentation|`segm_eyelashes_XXXXXXX_XXX.png`| 136 | |Grayscale facewear segmentation|`segm_facewear_XXXXXXX_XXX.png`| 137 | |Grayscale glasses segmentation|`segm_glasses_XXXXXXX_XXX.png`| 138 | |Grayscale head hair segmentation|`segm_hair_XXXXXXX_XXX.png`| 139 | |Grayscale headwear segmentation|`segm_headwear_XXXXXXX_XXX.png`| 140 | |Integer face parts segmentation|`segm_parts_XXXXXXX_XXX.png`| 141 | 142 | #### Segmentation parts indices 143 | 144 | |Class|Index| 145 | |-|-| 146 | |BACKGROUND|0| 147 | |SKIN|1| 148 | |NOSE|2| 149 | |RIGHT_EYE|3| 150 | |LEFT_EYE|4| 151 | |RIGHT_BROW|5| 152 | |LEFT_BROW|6| 153 | |RIGHT_EAR|7| 154 | |LEFT_EAR|8| 155 | |MOUTH_INTERIOR|9| 156 | |TOP_LIP|10| 157 | |BOTTOM_LIP|11| 158 | |NECK|12| 159 | 160 | #### Metadata 161 | 162 | ```json 163 | { 164 | "camera": { 165 | "world_to_camera": [ "4x4 array of camera extrinsics" ], 166 | "camera_to_image": [ "3x3 array of camera intrinsics" ], 167 | "resolution": [ 168 | 512, 169 | 512 170 | ] 171 | }, 172 | "head_pose": [ "3x3 rotation matrix of the head" ], 173 | "left_eye_pose": [ "3x3 rotation matrix of the left eye"], 174 | "right_eye_pose": [ "3x3 rotation matrix of the right eye" ], 175 | "landmarks": { 176 | "2D": [ "70x2 array of landmarks in image space" ] 177 | } 178 | } 179 | ``` 180 | 181 | #### Landmarks 182 | 183 | ![face landmark definition](img/face_ldmks.png) 184 | 185 | #### Notes 186 | 187 | The dataset includes some images with secondary "distractor" faces in the background, the ground-truth data does 188 | not include annotations for these faces, only the primary face. These images help with robustness to occlusions and 189 | cases where faces are close together in real-world scenarios. 190 | 191 | ## SynthHand 192 | 193 | ![hand_data](img/hand_data.jpg) 194 | 195 | ### Download Instructions 196 | 197 | The following command will download the dataset to `YOUR_DATA_DIRECTORY/synth_hand/`: 198 | 199 | ```bash 200 | python download_data.py --dataset hand --output-dir /YOUR_DATA_DIRECTORY/ 201 | ``` 202 | 203 | If you want just a single identity (250KB) you can add the `--single_id` flag, or for a single chunk (300MB) add `--single_chunk`. 204 | The total size of the dataset is approximately 7GB. 205 | 206 | ### Contents 207 | 208 | #### Image Data 209 | 210 | | Data Type | File Name | 211 | |-|-| 212 | |RGB image|`img_XXXXXXX_XXX.jpg`| 213 | 214 | #### Metadata 215 | 216 | ```json 217 | { 218 | "camera": { 219 | "world_to_camera": [ "4x4 array of camera extrinsics" ], 220 | "camera_to_image": [ "3x3 array of camera intrinsics" ], 221 | "resolution": [ 222 | 512, 223 | 512 224 | ] 225 | }, 226 | "pose": [ " 52x3 array of SMPL-H pose parameters" ], 227 | "translation": [ "3 element array for SMPL-H translation" ], 228 | "body_identity": [ "16 element array of neutral SMPL-H shape parameters" ], 229 | "landmarks": { 230 | "3D_world": [ "21x3 array of 3D landmarks in world-space - first 15 elements are MANO joints, last 5 are finger tips" ], 231 | "3D_cam": [ "21x3 array of 3D landmarks in camera-space - first 15 elements are MANO joints, last 5 are finger tips" ], 232 | "2D": [ "21x2 array of 2D landmarks in image-space - first 15 elements are MANO joints, last 5 are finger tips" ] 233 | } 234 | } 235 | ``` 236 | 237 | #### Landmarks 238 | 239 | ![hand landmark definition](img/hand_ldmks.png) 240 | 241 | #### Notes 242 | 243 | Our parametric body model uses a 300 component SMPL-H shape basis and adds the MANO shape basis to the hands, as well as 244 | incorporating skin displacement maps. The reposed SMPL-H meshes therefore do not exactly match the rendered images, this 245 | difference is only significant for some hand images. 246 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Research Use of Data Agreement v1.0 2 | 3 | This is the Research Use of Data Agreement, Version 1.0 (the “R-UDA”). Capitalized terms are defined in Section 5. Data Provider and you agree as follows: 4 | 5 | 1. Provision of the Data 6 | 1.1. You may use, modify, and distribute the Data made available to you by the Data Provider under this R-UDA for Research Use if you follow the R-UDA’s terms. 7 | 1.2. Data Provider will not sue you or any Downstream Recipient for any claim arising out of the use, modification, or distribution of the Data provided you meet the terms of the R-UDA. 8 | 1.3. This R-UDA does not restrict your use, modification, or distribution of any portions of the Data that are in the public domain or that may be used, modified, or distributed under any other legal exception or limitation. 9 | 10 | 2. Restrictions 11 | 2.1. You agree that you will use the Data solely for Computational Use for non-commercial research. This restriction means that you may engage in non-commercial research activities (including non-commercial research undertaken by or funded via a commercial entity), but you may not use the Data or any Results in any commercial offering, including as part of a product or service (or to improve any product or service) you use or provide to others. 12 | 2.2. You may not receive money or other consideration in exchange for use or redistribution of Data. 13 | 14 | 3. Redistribution of Data 15 | 3.1. You may redistribute the Data, so long as: 16 | 3.1.1. You include with any Data you redistribute all credit or attribution information that you received with the Data, and your terms require any Downstream Recipient to do the same; and 17 | 3.1.2. You bind each recipient to whom you redistribute the Data to the terms of the R-UDA. 18 | 19 | 4. No Warranty, Limitation of Liability 20 | 4.1. Data Provider does not represent or warrant that it has any rights whatsoever in the Data. 21 | 4.2. THE DATA IS PROVIDED ON AN “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 22 | 4.3. NEITHER DATA PROVIDER NOR ANY UPSTREAM DATA PROVIDER SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE DATA OR RESULTS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 23 | 24 | 5. Definitions 25 | 5.1. “Computational Use” means activities necessary to enable the use of Data (alone or along with other material) for analysis by a computer. 26 | 5.2. “Data” means the material you receive under the R-UDA in modified or unmodified form, but not including Results. 27 | 5.3. “Data Provider” means the source from which you receive the Data and with whom you enter into the R-UDA. 28 | 5.4. “Downstream Recipient” means any person or persons who receives the Data directly or indirectly from you in accordance with the R-UDA. 29 | 5.5. “Result” means anything that you develop or improve from your use of Data that does not include more than a de minimis portion of the Data on which the use is based. Results may include de minimis portions of the Data necessary to report on or explain use that has been conducted with the Data, such as figures in scientific papers, but do not include more. Artificial intelligence models trained on Data (and which do not include more than a de minimis portion of Data) are Results. 30 | 5.6. “Upstream Data Providers” means the source or sources from which the Data Provider directly or indirectly received, under the terms of the R-UDA, material that is included in the Data. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SynthMoCap Datasets 2 | 3 | This repo accompanies the paper [Look Ma, no markers: holistic performance capture without the hassle](https://aka.ms/SynthMoCap) which appeared in ACM Transaction on Graphics and was presented at SIGGRAPH Asia 2024. 4 | The repo includes download instructions for the synthetic datasets used in the paper. For detailed information about the dataset contents and download instructions see [DATASETS.md](DATASETS.md). 5 | 6 | ## SynthBody Dataset 7 | 8 | ![body_data](docs/img/body-data.jpg) 9 | 10 | The SynthBody dataset includes the following: 11 | 12 | - RGB body images 13 | - 2D and 3D skeletal landmarks 14 | - Segmentation masks 15 | - SMPL-H pose parameters 16 | - SMPL-H shape parameters 17 | - Camera parameters 18 | 19 | ## SynthFace Dataset 20 | 21 | ![face_data](docs/img/face-data.jpg) 22 | 23 | The SynthFace dataset includes the following: 24 | 25 | - RGB face images 26 | - Sparse 2D landmarks 27 | - Segmentation masks 28 | - Camera parameters 29 | - Head pose 30 | - Eye gaze 31 | 32 | ## SynthHand Dataset 33 | 34 | ![hand_data](docs/img/hand-data.jpg) 35 | 36 | The SynthHand dataset includes the following: 37 | 38 | - RGB hand images (left hand only) 39 | - 2D and 3D skeletal landmarks 40 | - SMPL-H pose parameters 41 | - SMPL-H shape parameters 42 | - Camera parameters 43 | 44 | ## Citation 45 | 46 | If you use any of the datasets in your research, please cite the following [paper](https://aka.ms/SynthMoCap): 47 | 48 | ```bibtex 49 | @article{hewitt2024look, 50 | title={Look Ma, no markers: holistic performance capture without the hassle}, 51 | author={Hewitt, Charlie and Saleh, Fatemeh and Aliakbarian, Sadegh and Petikam, Lohit and Rezaeifar, Shideh and Florentin, Louis and Hosenie, Zafiirah and Cashman, Thomas J and Valentin, Julien and Cosker, Darren and Baltru\v{s}aitis, Tadas}, 52 | journal={ACM Transactions on Graphics (TOG)}, 53 | volume={43}, 54 | number={6}, 55 | year={2024}, 56 | publisher={ACM New York, NY, USA}, 57 | articleno={235}, 58 | numpages={12}, 59 | } 60 | ``` 61 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet) and [Xamarin](https://github.com/xamarin). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/security.md/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/security.md/msrc/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/security.md/msrc/pgp). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/security.md/msrc/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/security.md/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /SUPPORT.md: -------------------------------------------------------------------------------- 1 | # TODO: The maintainer of this repo has not yet edited this file 2 | 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project? 4 | 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help. 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/onboardsupport](https://aka.ms/onboardsupport). CSS will work with/help you to determine next steps. 7 | - **Not sure?** Fill out an intake as though the answer were "Yes". CSS will help you decide. 8 | 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.* 10 | 11 | # Support 12 | 13 | ## How to file issues and get help 14 | 15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 16 | issues before filing new issues to avoid duplicates. For new issues, file your bug or 17 | feature request as a new Issue. 18 | 19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER 21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**. 22 | 23 | ## Microsoft Support Policy 24 | 25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above. 26 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /docs/css/styles.css: -------------------------------------------------------------------------------- 1 | @import "https://cdn.jsdelivr.net/npm/bulma@0.9.3/css/bulma.min.css"; 2 | @import "https://cdn.jsdelivr.net/npm/@creativebulma/bulma-tooltip@1.2.0/dist/bulma-tooltip.min.css"; 3 | @media screen and (min-width: 1024px) { 4 | .container { 5 | max-width: 860px; 6 | } 7 | } 8 | 9 | .authors>span { 10 | padding: 0 0.5rem; 11 | display: inline-block; 12 | } 13 | 14 | @media only screen and (max-width: 480px) { 15 | a.button.is-rounded.is-link.is-light:not(:last-child) { 16 | margin-bottom: 0.75em; 17 | } 18 | } 19 | 20 | .footer-links a { 21 | color: inherit !important; 22 | padding: 0 1em; 23 | } 24 | 25 | .column { 26 | text-align: left; 27 | } 28 | -------------------------------------------------------------------------------- /docs/img/Microsoft-logo-only.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/img/Microsoft-logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 12 | 13 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /docs/img/Microsoft-logo_rgb_c-gray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/Microsoft-logo_rgb_c-gray.png -------------------------------------------------------------------------------- /docs/img/Microsoft-logo_rgb_c-wht.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/Microsoft-logo_rgb_c-wht.png -------------------------------------------------------------------------------- /docs/img/Microsoft-logo_rgb_wht.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/Microsoft-logo_rgb_wht.png -------------------------------------------------------------------------------- /docs/img/body-data.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/body-data.jpg -------------------------------------------------------------------------------- /docs/img/face-data.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/face-data.jpg -------------------------------------------------------------------------------- /docs/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/favicon.ico -------------------------------------------------------------------------------- /docs/img/hand-data.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/hand-data.jpg -------------------------------------------------------------------------------- /docs/img/sa-logo-black.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/sa-logo-black.png -------------------------------------------------------------------------------- /docs/img/sa-logo-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/img/sa-logo-white.png -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Look Ma, no markers 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 45 |
46 |
47 |

Look Ma, no markers

48 |

Holistic performance capture without the hassle

49 |

ACM Transactions on Graphics

50 |

SIGGRAPH Asia 2024

51 |

52 | Charlie Hewitt 53 | Fatemeh Saleh 54 | Sadegh Aliakbarian 55 | Lohit Petikam 56 | Shideh Rezaeifar 57 | Louis Florentin 58 | Zafiirah Hosenie 59 | Thomas J Cashman 60 | Julien Valentin 61 | Darren Cosker 62 | Tadas Baltrušaitis 63 |

64 |
65 |
66 | 67 | 68 | Paper 69 | 70 | 71 | 72 | arXiv 73 | 74 | 75 | 76 | Video 77 | 78 | 79 | 80 | Datasets 81 | 82 |
83 |
84 |
85 |
86 |
87 | 88 |
89 |
90 |
91 |
92 |
93 |

94 | Abstract 95 |

96 |
97 | We tackle the problem of highly-accurate, holistic performance capture for the face, body and hands 98 | simultaneously. 99 | Motion-capture technologies used in film and game production typically focus only on face, body or hand 100 | capture independently, involve complex and expensive hardware and a high degree of manual intervention 101 | from skilled operators. 102 | While machine-learning-based approaches exist to overcome these problems, they usually only support a 103 | single camera, often operate on a single part of the body, do not produce precise world-space results, 104 | and rarely generalize outside specific contexts. 105 | In this work, we introduce the first technique for marker-free, high-quality reconstruction of the 106 | complete human body, including eyes and tongue, without requiring any calibration, manual intervention 107 | or custom hardware. 108 | Our approach produces stable world-space results from arbitrary camera rigs as well as supporting varied 109 | capture environments and clothing. 110 | We achieve this through a hybrid approach that leverages machine learning models trained exclusively on 111 | synthetic data and powerful parametric models of human shape and motion. 112 | We evaluate our method on a number of body, face and hand reconstruction benchmarks and demonstrate 113 | state-of-the-art results that generalize on diverse datasets. 114 |
115 |
116 |
117 |
118 |
119 |

120 | Holistic Performance Capture 121 |

122 |
123 |

Our approach combines machine-learning models for dense-landmark and parameter prediction with 124 | model-fitting to provide a robust, accurate and adaptable system. Our method supports registration 125 | of the face, body and hands; in isolation, and together in a single take. 126 |

127 |
128 |
129 |
130 | 133 |
134 |

135 | Our parametric model captures body and hand pose, body and face shape, and facial 136 | expression. 137 |

138 |
139 |
140 |
141 | 144 |
145 |

146 | We can also track tongue articulation and eye gaze. 147 |

148 |
149 |
150 |
151 | 154 |
155 |

156 | Our method achieves state-of-the-art results on a number of 3D reconstruction benchmarks. 157 |

158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |

166 | No Hassle 167 |

168 |
169 |

Motion capture shoots typically require specialist hardware, skilled experts and a lot of time to 170 | get right. This can make them expensive and challenging to manage in a tight production schedule. 171 | Our method aims to eliminate this inconvenience by providing a marker-less, calibration-free 172 | solution that can be used with off-the-shelf hardware. This allows for quick and easy capture of 173 | high-quality motion data in a variety of environments. 174 |

175 |
176 |
177 | 180 |

181 | Using just two uncalibrated mobile-phone cameras we can achieve high quality results in 182 | world-space. 183 |

184 |
185 |
186 | 189 |

190 | Our method even works with a single, moving camera in an unconstrained environment with 191 | arbitrary clothing. 192 |

193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |

201 | Synthetic Datasets 202 |

203 |
204 |

Our method is trained exclusively on synthetic data, generated using a conventional computer 205 | graphics pipeline. The three datasets used in the paper are available to download here. 207 |

208 |
209 |
210 | 211 |

212 | SynthBody can be used for tasks such as skeletal tracking and body pose prediction. 213 |

214 |
215 |
216 | 217 |

218 | SynthFace can be used for tasks such as facial landmark and head pose prediction or 219 | face parsing. 220 |

221 |
222 |
223 | 224 |

225 | SynthHand can be used for tasks such as hand pose prediction or landmark regression. 226 |

227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |

235 | BibTeX 236 |

237 |
238 | @article{hewitt2024look,
239 |     title={Look Ma, no markers: holistic performance capture without the hassle},
240 |     author={Hewitt, Charlie and Saleh, Fatemeh and Aliakbarian, Sadegh and Petikam, Lohit and Rezaeifar, Shideh and Florentin, Louis and Hosenie, Zafiirah and Cashman, Thomas J and Valentin, Julien and Cosker, Darren and Baltru\v{s}aitis, Tadas},
241 |     journal={ACM Transactions on Graphics (TOG)},
242 |     volume={43},
243 |     number={6},
244 |     year={2024},
245 |     publisher={ACM New York, NY, USA},
246 |     articleno={235},
247 |     numpages={12},
248 | }
249 |
250 |
251 | 267 | 268 | 269 | 295 | 296 | 297 | -------------------------------------------------------------------------------- /docs/vid/body_res.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/vid/body_res.mp4 -------------------------------------------------------------------------------- /docs/vid/face_res.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/vid/face_res.mp4 -------------------------------------------------------------------------------- /docs/vid/hand_res.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/vid/hand_res.mp4 -------------------------------------------------------------------------------- /docs/vid/in-the-wild.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/vid/in-the-wild.mp4 -------------------------------------------------------------------------------- /docs/vid/studio.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/docs/vid/studio.mp4 -------------------------------------------------------------------------------- /download_data.py: -------------------------------------------------------------------------------- 1 | """Helper script to download the SynthMoCap datasets. 2 | 3 | This python file is licensed under the MIT license (see below). 4 | The datasets are licensed under the Research Use of Data Agreement v1.0 (see LICENSE.md). 5 | 6 | Copyright (c) 2024 Microsoft Corporation 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 9 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation 10 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and 11 | to permit persons to whom the Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 17 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | """ 21 | 22 | import argparse 23 | import json 24 | import subprocess 25 | import sys 26 | import tarfile 27 | from getpass import getpass 28 | from multiprocessing import Pool 29 | from pathlib import Path 30 | from typing import Optional 31 | from zipfile import ZipFile 32 | 33 | import numpy as np 34 | from tqdm import tqdm 35 | 36 | MANO_N_J = 15 37 | SMPL_H_N_J = 22 38 | LEFT_HAND = SMPL_H_N_J 39 | RIGHT_HAND = SMPL_H_N_J + MANO_N_J 40 | 41 | MANO_FILENAME = "manoposesv10" 42 | MOSH_FILENAME = "MoSh" 43 | POSELIM_FILENAME = "PosePrior" 44 | 45 | MANO_LEFT_DATA = None 46 | MANO_RIGHT_DATA = None 47 | 48 | N_PARTS = 20 49 | 50 | 51 | def _download_mpii_file(username: str, password: str, domain: str, file: str, out_path: Path) -> None: 52 | out_path.parent.mkdir(exist_ok=True, parents=True) 53 | url = f"https://download.is.tue.mpg.de/download.php?domain={domain}&resume=1&sfile={file}" 54 | try: 55 | subprocess.check_call( 56 | [ 57 | "wget", 58 | "--post-data", 59 | f"username={username}&password={password}", 60 | url, 61 | "-O", 62 | out_path.as_posix(), 63 | "--no-check-certificate", 64 | "--continue", 65 | ] 66 | ) 67 | except FileNotFoundError as exc: 68 | raise RuntimeError("wget not found, please install it") from exc 69 | except subprocess.CalledProcessError as exc: 70 | if out_path.exists(): 71 | out_path.unlink() 72 | raise RuntimeError("Download failed, check your login details") from exc 73 | 74 | 75 | def get_mano(out_dir: Path) -> None: 76 | """Download MANO data.""" 77 | print("Downloading MANO...") 78 | username = input("Username for https://mano.is.tue.mpg.de/: ") 79 | password = getpass("Password for https://mano.is.tue.mpg.de/: ") 80 | _download_mpii_file( 81 | username, 82 | password, 83 | "mano", 84 | f"{MANO_FILENAME}.zip", 85 | out_dir / f"{MANO_FILENAME}.zip", 86 | ) 87 | 88 | 89 | def get_amass(out_dir: Path) -> None: 90 | """Download AMASS data.""" 91 | print("Downloading AMASS...") 92 | username = input("Username for https://amass.is.tue.mpg.de/: ") 93 | password = getpass("Password for https://amass.is.tue.mpg.de/: ") 94 | _download_mpii_file( 95 | username, 96 | password, 97 | "amass", 98 | f"amass_per_dataset/smplh/gender_specific/mosh_results/{MOSH_FILENAME}.tar.bz2", 99 | out_dir / f"{MOSH_FILENAME}.tar.bz2", 100 | ) 101 | _download_mpii_file( 102 | username, 103 | password, 104 | "amass", 105 | f"amass_per_dataset/smplh/gender_specific/mosh_results/{POSELIM_FILENAME}.tar.bz2", 106 | out_dir / f"{POSELIM_FILENAME}.tar.bz2", 107 | ) 108 | 109 | 110 | def extract(data_path: Path, out_path: Optional[Path] = None) -> None: 111 | """Extract the data from the given path.""" 112 | print(f"Extracting {data_path.name}...") 113 | if data_path.suffix == ".zip": 114 | out_path = out_path or data_path.parent / data_path.stem 115 | with ZipFile(data_path) as f: 116 | f.extractall(out_path) 117 | elif data_path.suffix == ".bz2": 118 | out_path = out_path or data_path.parent / data_path.name.replace(".tar.bz2", "") 119 | with tarfile.open(data_path, "r:bz2") as f: 120 | f.extractall(out_path) 121 | else: 122 | raise ValueError(f"Unknown file type {data_path.suffix}") 123 | 124 | 125 | def _mano_data(data_dir: Path) -> tuple[np.ndarray, np.ndarray]: 126 | """Load the MANO data.""" 127 | global MANO_LEFT_DATA, MANO_RIGHT_DATA 128 | if MANO_LEFT_DATA is None: 129 | MANO_LEFT_DATA = np.load( 130 | data_dir / f"{MANO_FILENAME}/mano_poses_v1_0/handsOnly_REGISTRATIONS_r_lm___POSES___L.npy" 131 | ) 132 | if MANO_RIGHT_DATA is None: 133 | MANO_RIGHT_DATA = np.load( 134 | data_dir / f"{MANO_FILENAME}/mano_poses_v1_0/handsOnly_REGISTRATIONS_r_lm___POSES___R.npy" 135 | ) 136 | return MANO_LEFT_DATA, MANO_RIGHT_DATA 137 | 138 | 139 | def _process_meta(args: tuple[Path, Path]) -> None: 140 | metadata_fn, data_dir = args 141 | mano_left, mano_right = _mano_data(data_dir) 142 | with open(metadata_fn, "r") as f: 143 | metadata = json.load(f) 144 | if isinstance(metadata["pose"][1], str): 145 | # body pose comes from AMASS 146 | seq_name: str = metadata["pose"][1] 147 | frame = int(seq_name.split("_")[-2]) 148 | assert int(seq_name.split("_")[-1]) == 0 149 | seq_path = Path("/".join(seq_name.split("/")[1:])).with_suffix(".npz").as_posix() 150 | if seq_name.startswith("MoSh_MPI_MoSh"): 151 | # fix paths to match downloaded data 152 | seq_path = seq_path.replace("Data/moshpp_fits_SMPL", "MPI_mosh") 153 | seq_path = seq_path.replace(".npz", "_poses.npz") 154 | if not (data_dir / MOSH_FILENAME / seq_path).exists(): 155 | # there is a sequence incorrectly named with _poses_poses 156 | seq_path = seq_path.replace(".npz", "_poses.npz") 157 | seq_data = np.load(data_dir / MOSH_FILENAME / seq_path) 158 | elif seq_name.startswith("MoSh_MPI_PoseLimits"): 159 | # fix paths to match downloaded data 160 | seq_path = seq_path.replace("Data/moshpp_fits_SMPL", "MPI_Limits") 161 | seq_path = seq_path.replace(".npz", "_poses.npz") 162 | seq_data = np.load(data_dir / POSELIM_FILENAME / seq_path) 163 | else: 164 | raise RuntimeError(f"Unknown sequence name {seq_name}") 165 | # we resampled to ~30 fps so have to adjust the frame number 166 | frame_step = int(np.floor(seq_data["mocap_framerate"] / 30)) 167 | seq = seq_data["poses"][::frame_step] 168 | # exclude root joint 169 | metadata["pose"][1:SMPL_H_N_J] = seq[frame].reshape((-1, 3))[1:SMPL_H_N_J].tolist() 170 | if isinstance(metadata["pose"][LEFT_HAND], str): 171 | # left hand comes from MANO 172 | idx = int(metadata["pose"][LEFT_HAND].split("_")[1]) 173 | metadata["pose"][LEFT_HAND:RIGHT_HAND] = mano_left[idx].reshape((MANO_N_J, 3)).tolist() 174 | if isinstance(metadata["pose"][RIGHT_HAND], str): 175 | # right hand comes from MANO 176 | idx = int(metadata["pose"][RIGHT_HAND].split("_")[1]) 177 | metadata["pose"][RIGHT_HAND:] = mano_right[idx].reshape((MANO_N_J, 3)).tolist() 178 | with open(metadata_fn, "w") as f: 179 | json.dump(metadata, f, indent=4) 180 | 181 | 182 | def download_synthmocap_data(data_dir: Path, dataset: str, zip_dir: Path, single_id: bool, single_chunck: bool) -> None: 183 | """Download one of the SynthMoCap datasets.""" 184 | data_dir.mkdir(exist_ok=True, parents=True) 185 | zip_dir.mkdir(exist_ok=True, parents=True) 186 | parts = ( 187 | [f"{dataset}_sample.zip"] 188 | if single_id 189 | else [f"{dataset}_{i:02d}.zip" for i in range(1, 2 if single_chunck else N_PARTS + 1)] 190 | ) 191 | for part in parts: 192 | out_path = zip_dir / part 193 | print(f"Downloading {part}...") 194 | url = f"https://facesyntheticspubwedata.z6.web.core.windows.net/sga-2024-synthmocap/{part}" 195 | try: 196 | subprocess.check_call( 197 | [ 198 | "wget", 199 | url, 200 | "-O", 201 | str(out_path), 202 | "--no-check-certificate", 203 | "--continue", 204 | "--secure-protocol=TLSv1_2", 205 | ] 206 | ) 207 | except FileNotFoundError as exc: 208 | raise RuntimeError("wget not found, please install it") from exc 209 | except subprocess.CalledProcessError: 210 | print("Download failed") 211 | if out_path.exists(): 212 | out_path.unlink() 213 | sys.exit(1) 214 | extract(out_path, data_dir / dataset) 215 | out_path.unlink() 216 | 217 | 218 | def process_metadata(data_dir: Path, dataset_name: str) -> None: 219 | """Process the metadata to include the correct pose data.""" 220 | metadata_files = list((data_dir / dataset_name).glob("*.json")) 221 | with Pool() as p: 222 | list( 223 | tqdm( 224 | p.imap( 225 | _process_meta, 226 | [(metadata_fn, data_dir) for metadata_fn in metadata_files], 227 | ), 228 | total=len(metadata_files), 229 | desc="Processing metadata", 230 | ) 231 | ) 232 | 233 | 234 | def main() -> None: 235 | """Download and process the dataset.""" 236 | parser = argparse.ArgumentParser(description="Download SynthMoCap datasets") 237 | parser.add_argument("--output-dir", type=Path, help="Output directory", required=True) 238 | parser.add_argument( 239 | "--dataset", 240 | type=str, 241 | help="Dataset to download", 242 | choices=["face", "body", "hand"], 243 | required=True, 244 | ) 245 | parser.add_argument( 246 | "--single-id", 247 | action="store_true", 248 | help="Only download one subject from the dataset", 249 | ) 250 | parser.add_argument( 251 | "--single-chunk", 252 | action="store_true", 253 | help="Only download one chunk from the dataset", 254 | ) 255 | args = parser.parse_args() 256 | assert not (args.single_id and args.single_chunk), "Cannot specify both single-id and single-chunk" 257 | dataset_name = f"synth_{args.dataset}" 258 | data_dir = Path(args.output_dir) 259 | if args.dataset != "face": 260 | # download data from MPII sources 261 | if not (data_dir / MOSH_FILENAME).exists() or not (data_dir / POSELIM_FILENAME).exists(): 262 | get_amass(data_dir) 263 | if not (data_dir / MANO_FILENAME).exists(): 264 | get_mano(data_dir) 265 | # extract the data 266 | for path in list(data_dir.glob("*.zip")) + list(data_dir.glob("*.bz2")): 267 | extract(path) 268 | path.unlink() 269 | # download the SynthMoCap dataset 270 | zip_dir = data_dir / f"{dataset_name}_zip" 271 | download_synthmocap_data(data_dir, dataset_name, zip_dir, args.single_id, args.single_chunk) 272 | zip_dir.rmdir() 273 | if args.dataset != "face": 274 | # process the metadata 275 | process_metadata(data_dir, dataset_name) 276 | 277 | 278 | if __name__ == "__main__": 279 | main() 280 | -------------------------------------------------------------------------------- /img/body_data.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/img/body_data.jpg -------------------------------------------------------------------------------- /img/body_ldmks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/img/body_ldmks.png -------------------------------------------------------------------------------- /img/face_data.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/img/face_data.jpg -------------------------------------------------------------------------------- /img/face_ldmks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/img/face_ldmks.png -------------------------------------------------------------------------------- /img/hand_data.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/img/hand_data.jpg -------------------------------------------------------------------------------- /img/hand_ldmks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/SynthMoCap/251295e29abf574f5e6b2b49460f4acd47735ee6/img/hand_ldmks.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.7.1 2 | numpy==1.26.4 3 | opencv-python==4.10.0.84 4 | pyrender==0.1.45 5 | tqdm==4.66.3 6 | transformations==2022.9.26 7 | trimesh==3.22.1 -------------------------------------------------------------------------------- /smpl_numpy.py: -------------------------------------------------------------------------------- 1 | """Numpy implementation of the SMPL body model. 2 | 3 | See https://smpl.is.tue.mpg.de/ for information about the model. 4 | 5 | This python file is licensed under the MIT license (see below). 6 | The datasets are licensed under the Research Use of Data Agreement v1.0 (see LICENSE.md). 7 | 8 | Copyright (c) 2024 Microsoft Corporation 9 | 10 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 11 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation 12 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and 13 | to permit persons to whom the Software is furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in all 16 | copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 19 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 20 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | 24 | from pathlib import Path 25 | 26 | import numpy as np 27 | 28 | 29 | def axis_angle_to_rotation_matrix(axis_angle: np.ndarray) -> np.ndarray: 30 | """Turns an axis-angle rotation into a 3x3 rotation matrix. 31 | 32 | See https://en.wikipedia.org/wiki/Rotation_matrix#Conversion_from_and_to_axis%E2%80%93angle. 33 | """ 34 | assert isinstance(axis_angle, np.ndarray) 35 | 36 | angle = np.linalg.norm(axis_angle) 37 | if angle < np.finfo(np.float32).tiny: 38 | return np.identity(3) 39 | 40 | axis = axis_angle / angle 41 | u_x, u_y, u_z = axis 42 | R = np.cos(angle) * np.identity(3) 43 | R += np.sin(angle) * np.array([0, -u_z, u_y, u_z, 0, -u_x, -u_y, u_x, 0]).reshape(3, 3) 44 | R += +(1.0 - np.cos(angle)) * (axis * axis[:, None]) 45 | 46 | return R 47 | 48 | 49 | class SMPL: 50 | """A NumPy implementation of SMPL.""" 51 | 52 | def __init__(self, model_path: str | Path): 53 | """A NumPy implementation of SMPL. 54 | 55 | Arguments: 56 | model_path: A path to a SMPL model file (.npz). 57 | """ 58 | model_path = Path(model_path) 59 | assert model_path.is_file(), f"{model_path} does not exist." 60 | assert model_path.suffix == ".npz", "Expecting a pickle file." 61 | 62 | params = np.load(model_path) 63 | 64 | self._vertex_template = params["v_template"] 65 | self._vertex_shape_basis = params["shapedirs"] 66 | self._vertex_pose_basis = params["posedirs"] 67 | 68 | self._joint_parent_idxs = params["kintree_table"][0] 69 | self._joint_regressor = params["J_regressor"] 70 | 71 | self._skinning_weights = params["weights"] 72 | 73 | self._triangles = params["f"] 74 | 75 | self._n_vertices = len(self._vertex_template) 76 | self._n_joints = len(self._joint_regressor) 77 | 78 | # Used to calculate pose-dependent blendshapes coefficients 79 | self._identity_cube = np.identity(3)[np.newaxis, ...].repeat(self._n_joints - 1, axis=0) 80 | 81 | # The vertex and joint positions in the bind-pose 82 | self._vertices_bind_pose = self._vertex_template.copy() 83 | self._joints_bind_pose = self._joint_regressor.dot(self._vertices_bind_pose) 84 | 85 | self._shape_dim = self._vertex_shape_basis.shape[-1] 86 | self._theta_shape = (self._n_joints, 3) 87 | 88 | self.beta = np.zeros(self._shape_dim, dtype=float) 89 | self.theta = np.zeros(self._theta_shape, dtype=float) 90 | self._translation = np.zeros(3, dtype=float) 91 | 92 | # Joint transforms in local and world space 93 | self._j_transforms_local = np.identity(4)[np.newaxis, ...].repeat(self._n_joints, axis=0) 94 | self._j_transforms_global = self._j_transforms_local.copy() 95 | 96 | # The vertices of the posed model 97 | self._vertices = self._vertices_bind_pose.copy() 98 | 99 | # The normals of the posed model, to be calculated 100 | self._normals = np.zeros_like(self._vertices) 101 | 102 | @property 103 | def beta(self) -> np.ndarray: 104 | """SMPL's linear shape basis parameters.""" 105 | return self._beta 106 | 107 | @beta.setter 108 | def beta(self, value: np.ndarray) -> None: 109 | value = np.asarray(value) 110 | assert value.shape == (self._shape_dim,), f"Expecting beta to have shape ({self._shape_dim},)." 111 | self._beta = value 112 | self._update_shape() 113 | 114 | @property 115 | def joint_parent_indices(self) -> np.ndarray: 116 | """For each joint, the index of its parent in the skeleton hierarchy.""" 117 | return self._readonly_view(self._joint_parent_idxs) 118 | 119 | @property 120 | def theta(self) -> np.ndarray: 121 | """SMPL's pose parameters - per-joint rotations in axis-angle representation.""" 122 | return self._theta 123 | 124 | @theta.setter 125 | def theta(self, value: np.ndarray) -> None: 126 | value = np.asarray(value) 127 | assert value.shape == self._theta_shape, f"Expecting theta to have shape ({self._theta_shape},)." 128 | self._theta = value 129 | 130 | @property 131 | def translation(self) -> np.ndarray: 132 | """The 3D translation of the SMPL model.""" 133 | return self._translation 134 | 135 | @translation.setter 136 | def translation(self, value: np.ndarray) -> None: 137 | value = np.asarray(value) 138 | assert value.shape == (3,), "Translation should be 3D." 139 | self._translation = value 140 | 141 | @staticmethod 142 | def _readonly_view(array: np.ndarray) -> np.ndarray: 143 | view = array.view() 144 | view.setflags(write=False) 145 | return view 146 | 147 | @property 148 | def joint_transforms(self) -> np.ndarray: 149 | """The global transforms of each joint in SMPL (read-only).""" 150 | self._update_joint_transforms() 151 | return self._readonly_view(self._j_transforms_global) 152 | 153 | @property 154 | def vertices(self) -> np.ndarray: 155 | """The posed vertex positions of the SMPL model (read-only).""" 156 | self._update_joint_transforms() 157 | self._update_vertices() 158 | return self._readonly_view(self._vertices) 159 | 160 | @property 161 | def normals(self) -> np.ndarray: 162 | """Normals to the surface of the posed SMPL model (read-only).""" 163 | self._update_joint_transforms() 164 | self._update_vertices() 165 | self._update_normals() 166 | return self._readonly_view(self._normals) 167 | 168 | @property 169 | def joint_positions(self) -> np.ndarray: 170 | """The posed joint positions of the SMPL model (read-only).""" 171 | self._update_joint_transforms() 172 | return self._readonly_view(self._j_transforms_global[:, :3, 3]) 173 | 174 | @property 175 | def shape_dim(self) -> int: 176 | """The number of elements in SMPL's shape vector (read-only).""" 177 | return self._shape_dim 178 | 179 | @property 180 | def n_vertices(self) -> int: 181 | """The number of vertices in SMPL (read-only).""" 182 | return self._n_vertices 183 | 184 | @property 185 | def n_joints(self) -> int: 186 | """The number of joints in SMPL (read-only).""" 187 | return self._n_joints 188 | 189 | @property 190 | def triangles(self) -> np.ndarray: 191 | """The vertex triangle indices (read-only).""" 192 | return self._triangles 193 | 194 | def _update_shape(self) -> None: 195 | self._vertices_bind_pose = self._vertex_template + self._vertex_shape_basis.dot(self._beta) 196 | self._joints_bind_pose = self._joint_regressor.dot(self._vertices_bind_pose) 197 | 198 | def _update_joint_transforms(self) -> None: 199 | # Initialize joint-local pose transforms to the identity 200 | self._j_transforms_local = np.identity(4)[np.newaxis, ...].repeat(self._n_joints, axis=0) 201 | 202 | # Set the root joint translation 203 | self._j_transforms_local[0, :3, 3] = self._translation + self._joints_bind_pose[0] 204 | 205 | # Set the translational offset between each joint and its parent, excluding the root 206 | p_offsets = self._joints_bind_pose[1:] - self._joints_bind_pose[self._joint_parent_idxs[1:]] 207 | self._j_transforms_local[1:, :3, 3] = p_offsets 208 | 209 | # Set local rotations of each joint 210 | for j_idx in range(self._n_joints): 211 | self._j_transforms_local[j_idx, :3, :3] = axis_angle_to_rotation_matrix(self._theta[j_idx]) 212 | 213 | # Calculate transforms of each joint in global space 214 | self._j_transforms_global = np.zeros_like(self._j_transforms_local) 215 | self._j_transforms_global[0] = self._j_transforms_local[0] 216 | for j_idx in range(1, self._n_joints): 217 | parent_idx = self._joint_parent_idxs[j_idx] 218 | self._j_transforms_global[j_idx] = self._j_transforms_global[parent_idx] @ self._j_transforms_local[j_idx] 219 | 220 | def _update_vertices(self) -> None: 221 | # Apply the SMPL vertex pose basis 222 | pose_basis_coeffs = (self._j_transforms_local[1:, :3, :3] - self._identity_cube).ravel() 223 | vertices = self._vertices_bind_pose + self._vertex_pose_basis.dot(pose_basis_coeffs) 224 | 225 | # Skinning transforms are relative to the bind pose. 226 | # This is the equivalent to pre-applying the inverse bind pose transform of each joint. 227 | skinning_transforms = self._j_transforms_global.copy() 228 | deltas = np.einsum("nij,nj->ni", self._j_transforms_global[:, :3, :3], self._joints_bind_pose) 229 | skinning_transforms[:, :3, 3] -= deltas 230 | 231 | # Get weighted per-vertex skinning transforms 232 | skinning_transforms = np.einsum("nj,jkl->nkl", self._skinning_weights, skinning_transforms) 233 | 234 | # Homogenize vertices 235 | vertices = np.hstack([vertices, np.ones((self._n_vertices, 1))]) 236 | 237 | # Apply skinning transforms to vertices 238 | vertices = np.matmul(skinning_transforms, vertices[..., np.newaxis]) 239 | 240 | # Dehomogenize, and remove additional dimension 241 | self._vertices = vertices[:, :3, 0] 242 | 243 | def _update_normals(self) -> None: 244 | # Calculate per-face normals for the mesh 245 | vs_ts = self._vertices[self._triangles] 246 | per_face_normals = np.cross(vs_ts[::, 1] - vs_ts[::, 0], vs_ts[::, 2] - vs_ts[::, 0]) 247 | 248 | # For each triangle, add that triangle's normal to each vertex in the triangle 249 | normals = np.zeros_like(self._vertices) 250 | np.add.at(normals, self._triangles.ravel(), np.repeat(per_face_normals, 3, axis=0)) 251 | 252 | # Normalize normals 253 | normals /= np.linalg.norm(normals, axis=1).reshape(-1, 1) 254 | 255 | self._normals = normals 256 | -------------------------------------------------------------------------------- /visualize_data.py: -------------------------------------------------------------------------------- 1 | """Helper script to visualize the SynthMoCap datasets. 2 | 3 | This python file is licensed under the MIT license (see below). 4 | The datasets are licensed under the Research Use of Data Agreement v1.0 (see LICENSE.md). 5 | 6 | Copyright (c) 2024 Microsoft Corporation 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 9 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation 10 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and 11 | to permit persons to whom the Software is furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 17 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 20 | """ 21 | 22 | import argparse 23 | import json 24 | import lzma 25 | import subprocess 26 | from getpass import getpass 27 | from pathlib import Path 28 | from tarfile import TarFile 29 | 30 | import cv2 31 | import matplotlib.pyplot as plt 32 | import numpy as np 33 | import pyrender 34 | import trimesh 35 | from transformations import rotation_matrix 36 | 37 | from smpl_numpy import SMPL 38 | 39 | SEMSEG_LUT = (plt.get_cmap("tab20")(np.arange(255 + 1)) * 255).astype(np.uint8)[..., :3][..., ::-1] 40 | LDMK_CONN = { 41 | "face": [ 42 | [0, 1], 43 | [1, 2], 44 | [2, 3], 45 | [3, 4], 46 | [4, 5], 47 | [5, 6], 48 | [6, 7], 49 | [7, 8], 50 | [8, 9], 51 | [9, 10], 52 | [10, 11], 53 | [11, 12], 54 | [12, 13], 55 | [13, 14], 56 | [14, 15], 57 | [15, 16], 58 | [17, 18], 59 | [18, 19], 60 | [19, 20], 61 | [20, 21], 62 | [22, 23], 63 | [23, 24], 64 | [24, 25], 65 | [25, 26], 66 | [27, 28], 67 | [28, 29], 68 | [29, 30], 69 | [31, 32], 70 | [32, 33], 71 | [33, 34], 72 | [34, 35], 73 | [36, 37], 74 | [37, 38], 75 | [38, 39], 76 | [39, 40], 77 | [40, 41], 78 | [41, 36], 79 | [42, 43], 80 | [43, 44], 81 | [44, 45], 82 | [45, 46], 83 | [46, 47], 84 | [47, 42], 85 | [48, 49], 86 | [49, 50], 87 | [50, 51], 88 | [51, 52], 89 | [52, 53], 90 | [53, 54], 91 | [54, 55], 92 | [55, 56], 93 | [56, 57], 94 | [57, 58], 95 | [58, 59], 96 | [59, 48], 97 | [60, 61], 98 | [61, 62], 99 | [62, 63], 100 | [63, 64], 101 | [64, 65], 102 | [65, 66], 103 | [66, 67], 104 | [67, 60], 105 | ], 106 | "body": [ 107 | [1, 0], 108 | [2, 0], 109 | [3, 0], 110 | [4, 1], 111 | [5, 2], 112 | [6, 3], 113 | [7, 4], 114 | [8, 5], 115 | [9, 6], 116 | [10, 7], 117 | [11, 8], 118 | [12, 9], 119 | [13, 9], 120 | [14, 9], 121 | [15, 12], 122 | [16, 13], 123 | [17, 14], 124 | [18, 16], 125 | [19, 17], 126 | [20, 18], 127 | [21, 19], 128 | [22, 20], 129 | [23, 22], 130 | [24, 23], 131 | [25, 20], 132 | [26, 25], 133 | [27, 26], 134 | [28, 20], 135 | [29, 28], 136 | [30, 29], 137 | [31, 20], 138 | [32, 31], 139 | [33, 32], 140 | [34, 20], 141 | [35, 34], 142 | [36, 35], 143 | [37, 21], 144 | [38, 37], 145 | [39, 38], 146 | [40, 21], 147 | [41, 40], 148 | [42, 41], 149 | [43, 21], 150 | [44, 43], 151 | [45, 44], 152 | [46, 21], 153 | [47, 46], 154 | [48, 47], 155 | [49, 21], 156 | [50, 49], 157 | [51, 50], 158 | ], 159 | "hand": [ 160 | [1, 0], 161 | [2, 1], 162 | [3, 2], 163 | [4, 0], 164 | [5, 4], 165 | [6, 5], 166 | [7, 0], 167 | [8, 7], 168 | [9, 8], 169 | [10, 0], 170 | [11, 10], 171 | [12, 11], 172 | [13, 0], 173 | [14, 13], 174 | [15, 14], 175 | [16, 3], 176 | [17, 6], 177 | [18, 9], 178 | [19, 12], 179 | [20, 15], 180 | ], 181 | } 182 | SMPLH_MODEL = None 183 | 184 | 185 | def draw_transformed_3d_axes( 186 | image: np.ndarray, 187 | transform: np.ndarray, 188 | loc: np.ndarray, 189 | scale: float, 190 | projection_matrix: np.ndarray, 191 | ) -> None: 192 | """Draw a transformed set of coordinate axes, in color.""" 193 | trsf_4x4 = np.eye(4) 194 | trsf_4x4[:3, :3] = transform 195 | axes_edges = np.array([[0, 1], [0, 2], [0, 3]]) 196 | axes_verts = np.vstack([np.zeros((1, 3)), np.eye(3)]) * 3.0 197 | axes_verts = np.hstack([axes_verts, np.ones((len(axes_verts), 1))]) 198 | axes_verts = np.array([0, 0, 10]) + axes_verts.dot(trsf_4x4.T)[:, :-1] 199 | projected = axes_verts.dot(projection_matrix.T) 200 | projected = projected[:, :2] / projected[:, 2:] 201 | 202 | center = np.array([image.shape[0] // 2, image.shape[1] // 2]) 203 | projected = ((projected - center) * scale + loc).astype(int) 204 | 205 | ldmk_connection_pairs = projected[axes_edges].astype(int) 206 | for p_0, p_1 in ldmk_connection_pairs: 207 | cv2.line(image, tuple(p_0 + 1), tuple(p_1 + 1), (0, 0, 0), 2, cv2.LINE_AA) 208 | 209 | colors = np.fliplr(np.eye(3) * 255) 210 | for i, (p_0, p_1) in enumerate(ldmk_connection_pairs): 211 | cv2.line(image, tuple(p_0), tuple(p_1), colors[i], 2, cv2.LINE_AA) 212 | 213 | 214 | def draw_landmarks( 215 | img: np.ndarray, 216 | ldmks_2d: np.ndarray, 217 | connectivity: list[list[int]], 218 | thickness: int = 1, 219 | color: tuple[int, int, int] = (255, 255, 255), 220 | ) -> None: 221 | """Drawing dots on an image.""" 222 | if img.dtype != np.uint8: 223 | raise ValueError("Image must be uint8") 224 | if np.any(np.isnan(ldmks_2d)): 225 | raise ValueError("NaNs in landmarks") 226 | 227 | img_size = (img.shape[1], img.shape[0]) 228 | 229 | ldmk_connection_pairs = ldmks_2d[np.asarray(connectivity).astype(int)].astype(int) 230 | for p_0, p_1 in ldmk_connection_pairs: 231 | cv2.line(img, tuple(p_0 + 1), tuple(p_1 + 1), (0, 0, 0), thickness, cv2.LINE_AA) 232 | for i, (p_0, p_1) in enumerate(ldmk_connection_pairs): 233 | cv2.line( 234 | img, 235 | tuple(p_0), 236 | tuple(p_1), 237 | (int(color[0]), int(color[1]), int(color[2])), 238 | thickness, 239 | cv2.LINE_AA, 240 | ) 241 | 242 | for ldmk in ldmks_2d.astype(int): 243 | if np.all(ldmk > 0) and np.all(ldmk < img_size): 244 | cv2.circle(img, tuple(ldmk + 1), thickness + 1, (0, 0, 0), -1, cv2.LINE_AA) 245 | cv2.circle( 246 | img, 247 | tuple(ldmk), 248 | thickness + 1, 249 | (int(color[0]), int(color[1]), int(color[2])), 250 | -1, 251 | cv2.LINE_AA, 252 | ) 253 | 254 | 255 | def _download_smplh() -> None: 256 | print("Downloading SMPL-H...") 257 | username = input("Username for https://mano.is.tue.mpg.de/: ") 258 | password = getpass("Password for https://mano.is.tue.mpg.de/: ") 259 | out_path = Path(__file__).parent / "smplh" / "smplh.tar.xz" 260 | out_path.parent.mkdir(exist_ok=True, parents=True) 261 | url = "https://download.is.tue.mpg.de/download.php?domain=mano&resume=1&sfile=smplh.tar.xz" 262 | try: 263 | subprocess.check_call( 264 | [ 265 | "wget", 266 | "--post-data", 267 | f"username={username}&password={password}", 268 | url, 269 | "-O", 270 | out_path.as_posix(), 271 | "--no-check-certificate", 272 | "--continue", 273 | ] 274 | ) 275 | except FileNotFoundError as exc: 276 | raise RuntimeError("wget not found, please install it") from exc 277 | except subprocess.CalledProcessError as exc: 278 | if out_path.exists(): 279 | out_path.unlink() 280 | raise RuntimeError("Download failed, check your login details") from exc 281 | with lzma.open(out_path) as fd: 282 | with TarFile(fileobj=fd) as f: 283 | f.extractall(out_path.parent) 284 | out_path.unlink() 285 | 286 | 287 | def _get_smplh() -> SMPL: 288 | global SMPLH_MODEL 289 | 290 | if SMPLH_MODEL is None: 291 | model_path = Path(__file__).parent / "smplh" / "neutral" / "model.npz" 292 | if not model_path.exists(): 293 | _download_smplh() 294 | 295 | SMPLH_MODEL = SMPL(model_path) 296 | 297 | return SMPLH_MODEL 298 | 299 | 300 | def _render_mesh( 301 | vertices: np.ndarray, 302 | triangles: np.ndarray, 303 | world_to_cam: np.ndarray, 304 | cam_to_img: np.ndarray, 305 | resolution: tuple[int, int], 306 | ) -> np.ndarray: 307 | renderer = pyrender.OffscreenRenderer(resolution[0], resolution[1]) 308 | 309 | camera_pr = pyrender.IntrinsicsCamera( 310 | cx=cam_to_img[0, 2], 311 | cy=cam_to_img[1, 2], 312 | fx=cam_to_img[0, 0], 313 | fy=cam_to_img[1, 1], 314 | zfar=5000.0, 315 | name="cam", 316 | ) 317 | scene = pyrender.Scene(ambient_light=[100, 100, 100], bg_color=[0, 0, 0, 0]) 318 | 319 | # OpenCV to OpenGL convention 320 | world_to_cam_gl = np.linalg.inv(world_to_cam).dot(rotation_matrix(np.pi, [1, 0, 0])) 321 | camera_node = pyrender.Node(camera=camera_pr, matrix=world_to_cam_gl) 322 | scene.add_node(camera_node) 323 | 324 | key_light = pyrender.DirectionalLight(color=np.ones(3), intensity=4.0) 325 | R1 = rotation_matrix(np.radians(25), [0, 1, 0]) 326 | R2 = rotation_matrix(np.radians(-30), [1, 0, 0]) 327 | key_pose = world_to_cam_gl.dot(R1.dot(R2)) 328 | scene.add(key_light, pose=key_pose) 329 | 330 | back_light = pyrender.DirectionalLight(color=np.ones(3), intensity=1.0) 331 | R1 = rotation_matrix(np.radians(-150), [0, 1, 0]) 332 | back_pose = world_to_cam_gl.dot(R1) 333 | scene.add(back_light, pose=back_pose) 334 | 335 | mesh_trimesh = trimesh.Trimesh(vertices, triangles, process=False) 336 | colors = np.repeat([[255, 61, 13]], len(vertices), axis=0) 337 | mesh_trimesh.visual.vertex_colors = colors 338 | mesh_pyrender = pyrender.Mesh.from_trimesh(mesh_trimesh, smooth=True) 339 | mesh_pyrender.primitives[0].material.roughnessFactor = 0.6 340 | mesh_pyrender.primitives[0].material.alphaMode = "OPAQUE" 341 | scene.add(mesh_pyrender) 342 | 343 | rendered_img, _ = renderer.render(scene, flags=pyrender.RenderFlags.RGBA | pyrender.RenderFlags.ALL_SOLID) 344 | renderer.delete() 345 | 346 | return rendered_img.astype(float) / 255 347 | 348 | 349 | def draw_mesh( 350 | image: np.ndarray, 351 | identity: np.ndarray, 352 | pose: np.ndarray, 353 | translation: np.ndarray, 354 | world_to_cam: np.ndarray, 355 | cam_to_img: np.ndarray, 356 | ) -> None: 357 | """Draw a mesh from identity, pose, and translation parameters.""" 358 | smplh = _get_smplh() 359 | smplh.beta = identity[: smplh.shape_dim] 360 | smplh.theta = pose 361 | smplh.translation = translation 362 | render = _render_mesh(smplh.vertices, smplh.triangles, world_to_cam, cam_to_img, image.shape[:2][::-1]) 363 | # alpha blend 364 | return ( 365 | ((image.astype(np.float64) / 255) * (1 - 0.75 * render[..., -1:]) + render[..., :3] * 0.75 * render[..., -1:]) 366 | * 255 367 | ).astype(np.uint8) 368 | 369 | 370 | def main() -> None: 371 | parser = argparse.ArgumentParser() 372 | parser.add_argument("data_dir", type=Path) 373 | parser.add_argument("--n-ids", type=int, default=25) 374 | parser.add_argument("--n-frames", type=int, default=2) 375 | args = parser.parse_args() 376 | 377 | dataset_type = args.data_dir.stem.split("_")[-1] 378 | 379 | for sidx in range(args.n_ids): 380 | for fidx in range(args.n_frames): 381 | meta_file = args.data_dir / f"metadata_{sidx:07d}_{fidx:03d}.json" 382 | img_file = args.data_dir / f"img_{sidx:07d}_{fidx:03d}.jpg" 383 | 384 | if not meta_file.exists() or not img_file.exists(): 385 | continue 386 | 387 | with open(meta_file, "r") as f: 388 | metadata = json.load(f) 389 | 390 | frame = cv2.imread(str(img_file)) 391 | vis_imgs = [frame] 392 | ldmks_2d = np.asarray(metadata["landmarks"]["2D"]) 393 | ldmk_vis = frame.copy() 394 | draw_landmarks( 395 | ldmk_vis, 396 | ldmks_2d, 397 | LDMK_CONN[dataset_type], 398 | ) 399 | vis_imgs.append(ldmk_vis) 400 | 401 | if dataset_type != "hand": 402 | seg_file = args.data_dir / f"segm_parts_{sidx:07d}_{fidx:03d}.png" 403 | seg_parts = cv2.imread(str(seg_file), cv2.IMREAD_GRAYSCALE) 404 | parts = SEMSEG_LUT[seg_parts] 405 | for idx, mask_name in enumerate( 406 | [ 407 | "hair", 408 | "beard", 409 | "eyebrows", 410 | "eyelashes", 411 | "glasses", 412 | "headwear", 413 | "facewear", 414 | "clothing", 415 | ] 416 | ): 417 | mask_path = args.data_dir / f"segm_{mask_name}_{sidx:07d}_{fidx:03d}.png" 418 | if not mask_path.exists(): 419 | continue 420 | mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE) > 100 421 | parts[mask] = SEMSEG_LUT[idx + (13 if dataset_type == "face" else 17)] 422 | vis_imgs.append(parts) 423 | 424 | if dataset_type in {"hand", "body"}: 425 | vis_imgs.append( 426 | draw_mesh( 427 | frame.copy(), 428 | np.asarray(metadata["body_identity"]), 429 | np.asarray(metadata["pose"]), 430 | np.asarray(metadata["translation"]), 431 | np.asarray(metadata["camera"]["world_to_camera"]), 432 | np.asarray(metadata["camera"]["camera_to_image"]), 433 | ) 434 | ) 435 | else: 436 | pose_vis = frame.copy() 437 | cam_to_img = np.asarray(metadata["camera"]["camera_to_image"]) 438 | head_loc = np.mean(metadata["landmarks"]["2D"], axis=0) 439 | left_eye_loc = metadata["landmarks"]["2D"][-2] 440 | right_eye_loc = metadata["landmarks"]["2D"][-1] 441 | draw_transformed_3d_axes( 442 | pose_vis, 443 | np.asarray(metadata["head_pose"]), 444 | head_loc, 445 | 0.5, 446 | cam_to_img, 447 | ) 448 | draw_transformed_3d_axes( 449 | pose_vis, 450 | np.asarray(metadata["left_eye_pose"]), 451 | left_eye_loc, 452 | 0.1, 453 | cam_to_img, 454 | ) 455 | draw_transformed_3d_axes( 456 | pose_vis, 457 | np.asarray(metadata["right_eye_pose"]), 458 | right_eye_loc, 459 | 0.1, 460 | cam_to_img, 461 | ) 462 | vis_imgs.append(pose_vis) 463 | 464 | cv2.imshow(args.data_dir.stem, np.hstack(vis_imgs)) 465 | k = cv2.waitKey(0) 466 | if k in {ord("q"), 27}: 467 | cv2.destroyAllWindows() 468 | exit() 469 | 470 | 471 | if __name__ == "__main__": 472 | main() 473 | --------------------------------------------------------------------------------